pathling 7.0.1__tar.gz → 7.2.0.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {pathling-7.0.1/pathling.egg-info → pathling-7.2.0.dev0}/PKG-INFO +2 -2
  2. pathling-7.2.0.dev0/examples/bulk.py +149 -0
  3. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/_version.py +3 -3
  4. pathling-7.2.0.dev0/pathling/bulk.py +213 -0
  5. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/datasink.py +4 -2
  6. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/datasource.py +91 -3
  7. {pathling-7.0.1 → pathling-7.2.0.dev0/pathling.egg-info}/PKG-INFO +2 -2
  8. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling.egg-info/SOURCES.txt +2 -0
  9. pathling-7.2.0.dev0/pathling.egg-info/requires.txt +2 -0
  10. {pathling-7.0.1 → pathling-7.2.0.dev0}/setup.py +1 -1
  11. pathling-7.0.1/pathling.egg-info/requires.txt +0 -2
  12. {pathling-7.0.1 → pathling-7.2.0.dev0}/LICENSE +0 -0
  13. {pathling-7.0.1 → pathling-7.2.0.dev0}/MANIFEST.in +0 -0
  14. {pathling-7.0.1 → pathling-7.2.0.dev0}/README.md +0 -0
  15. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/data/bundles/Bennett146_Swaniawski813_704c9750-f6e6-473b-ee83-fbd48e07fe3f.json +0 -0
  16. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/data/bundles/Dino214_Parisian75_40d82b80-b682-cd8b-da6d-396809878641.json +0 -0
  17. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/data/resources/Condition.ndjson +0 -0
  18. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/data/resources/Patient.ndjson +0 -0
  19. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/designation.py +0 -0
  20. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/display.py +0 -0
  21. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/encode_bundles.py +0 -0
  22. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/encode_resources.py +0 -0
  23. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/member_of.py +0 -0
  24. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/member_of_old.py +0 -0
  25. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/property_of.py +0 -0
  26. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/query.py +0 -0
  27. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/subsumes.py +0 -0
  28. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/subsumes_old.py +0 -0
  29. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/translate.py +0 -0
  30. {pathling-7.0.1 → pathling-7.2.0.dev0}/examples/translate_old.py +0 -0
  31. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/__init__.py +0 -0
  32. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/coding.py +0 -0
  33. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/context.py +0 -0
  34. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/core.py +0 -0
  35. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/fhir.py +0 -0
  36. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/functions.py +0 -0
  37. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/query.py +0 -0
  38. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling/udfs.py +0 -0
  39. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling.egg-info/dependency_links.txt +0 -0
  40. {pathling-7.0.1 → pathling-7.2.0.dev0}/pathling.egg-info/top_level.txt +0 -0
  41. {pathling-7.0.1 → pathling-7.2.0.dev0}/setup.cfg +0 -0
  42. {pathling-7.0.1 → pathling-7.2.0.dev0}/tests/test_datasource.py +0 -0
  43. {pathling-7.0.1 → pathling-7.2.0.dev0}/tests/test_encoders.py +0 -0
  44. {pathling-7.0.1 → pathling-7.2.0.dev0}/tests/test_functions.py +0 -0
  45. {pathling-7.0.1 → pathling-7.2.0.dev0}/tests/test_query.py +0 -0
  46. {pathling-7.0.1 → pathling-7.2.0.dev0}/tests/test_udfs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pathling
3
- Version: 7.0.1
3
+ Version: 7.2.0.dev0
4
4
  Summary: Python API for Pathling
5
5
  Home-page: https://github.com/aehrc/pathling
6
6
  Author: Australian e-Health Research Centre, CSIRO
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.10
15
15
  Requires-Python: >=3.8
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: pyspark<3.6.0,>=3.5.0
18
+ Requires-Dist: pyspark<3.6.0,>=3.5.3
19
19
  Requires-Dist: deprecated>=1.2.13
20
20
 
21
21
  Python API for Pathling
@@ -0,0 +1,149 @@
1
+ # Copyright 2025 Commonwealth Scientific and Industrial Research
2
+ # Organisation (CSIRO) ABN 41 687 119 230.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import os
17
+ import tempfile
18
+ from datetime import datetime, timezone
19
+
20
+ from pathling import PathlingContext
21
+
22
+ jwk = """
23
+ {
24
+ "kty": "RSA",
25
+ "alg": "RS384",
26
+ "n": "llps-ufRYIVplRdtF2FB1xn5iCuojOyHmCmkLsMLU2zsUNcpwxlFUmv9xQRHTArfGm_vRkKjtx4dsW8LMG45EvDK_a6TmLF5H5hDlCqr0aXuInpN-c3f6f9d0zRtBCc18IKHL_IBskoaHGK4LVdQypIPcLqMiKkPFXI-NRwtJLUpQt6NH_p8vW0fiIRbkdC1t2pSrPX0307et38IE_vv8_RZm3CAKef2pnbWzRUBleeQybqaR28VNNallixegt1Sh5ShQLfQvA0QmrST25Kzs5K0d_6eAKl4xPDp1Q_dC1N4mygMZAkbRXKdq49Pg9C-56pbzEmvOYiM_CtMWkzr9w",
27
+ "e": "AQAB",
28
+ "d": "By9zHdqOSwqVLSbdc8yWFO2M21Ea0QFMyZzT19hCZk5CTOq7eDNw-KtoiU3XCm9KkjzfNoBgypOJ37zqz_m0iI8xZEY_j4CLxVLFiAMyCubfJo6pw1JvbQNjPIC45QXqsf_K7iOmqRqZfNnK63_MwKGSU1TW-oD505COIIOkNKjQ7KpIOm56EfyH2_cPUfmlHsBCRGy6eQ2M8cSK-uxXchSrNqt46nD8ArCE8qtrGJn1zJTgWOkH2lS73uzkc_P6rGg3IdiAbmPl4HWU-PlJ2jwykFbbXhnzL3Tpruc8okR_cda5u7KSa8dfV5WPjnygTxPHNt5_iuszPKxa0X9nwQ",
29
+ "p": "1DcRbY_DevTMMni3WynbKGm-MXmnH7NMU-4IU1hdegZfrStoBC2DngP77JILRO_TApaMPiAkIpxIpgvovnWKtCZ3-2BXDWnd4x_Ews3BUUVzCjvxAatLTiSq_lZTAL93Htqf3FQPa86Q2x4lyvJ-rFWBfpONzMGr-5g9ut1sGbk",
30
+ "q": "tV_kQ3ggaBSYRkckrpWnKJI3-uREyZVI_-PTK8kUS43Glz12sxVpYIIRqt57XtArkpHG9_YjUxj_ROF_LjSFaGbCxmccPqu9tHr7JIsuVWQlz8ooxXNW3lURMCtKd3k2xm9FhoFmtncP7nLbCfVaBIlTLhaXZXVZSSUv-vDDSy8",
31
+ "dp": "dVk-OeeVoRhdEkvOmIq8tcxDb_hlghIT0xV9ZRkoF6IOpiOqkSTZ8zcgx-C6epRjirrVMkVzte_V_Hv5Z9h3qsba8haEDNbN7BpVI6PDkr1kr_QVgWbHbZ65L4tsuq0lodojLCMPo_3F_GTfYSpXAdUGlofhkahHAgldmUd3z4E",
32
+ "dq": "O6MdHiYombBz5V_NKu6gORHjAEcAazv_9cvGirYiSzmB3AbkubvHm2kJQCLJdAKE4Tu3rZ6sPM2SWea_d8TjPNHVJ4GN4vl7dhWd8IUnJgK5ABrbzxi-rnpQHYOOh7w-i37Y4II58LMzdNclOKAJCkbRJ-1buIueYROuNBfoTxc",
33
+ "qi": "CPlT4vGuJbV-WMLIRL4c-VW0H0fwRUljqvv-_nNDQyZ98uFlXYLtmQS2h3VX4WjK1UR8Ca3m9110JNe8Va_7Tepuk13p4CyMG0ccGojzl50fvfrINj1zN6jz0lRI4cAPWdfGwgEs0tpvtW1saVrg9y89XefEx8Iq2Z0bLrlKGrU",
34
+ "key_ops": [
35
+ "sign"
36
+ ],
37
+ "ext": true,
38
+ "kid": "b2979595c62deb396306ba3edbdfb4a0"
39
+ }
40
+ """
41
+
42
+ client_id = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6InJlZ2lzdHJhdGlvbi10b2tlbiJ9.eyJqd2tzIjp7ImtleXMiOlt7Imt0eSI6IlJTQSIsImFsZyI6IlJTMzg0IiwibiI6ImxscHMtdWZSWUlWcGxSZHRGMkZCMXhuNWlDdW9qT3lIbUNta0xzTUxVMnpzVU5jcHd4bEZVbXY5eFFSSFRBcmZHbV92UmtLanR4NGRzVzhMTUc0NUV2REtfYTZUbUxGNUg1aERsQ3FyMGFYdUlucE4tYzNmNmY5ZDB6UnRCQ2MxOElLSExfSUJza29hSEdLNExWZFF5cElQY0xxTWlLa1BGWEktTlJ3dEpMVXBRdDZOSF9wOHZXMGZpSVJia2RDMXQycFNyUFgwMzA3ZXQzOElFX3Z2OF9SWm0zQ0FLZWYycG5iV3pSVUJsZWVReWJxYVIyOFZOTmFsbGl4ZWd0MVNoNVNoUUxmUXZBMFFtclNUMjVLenM1SzBkXzZlQUtsNHhQRHAxUV9kQzFONG15Z01aQWtiUlhLZHE0OVBnOUMtNTZwYnpFbXZPWWlNX0N0TVdrenI5dyIsImUiOiJBUUFCIiwia2V5X29wcyI6WyJ2ZXJpZnkiXSwiZXh0Ijp0cnVlLCJraWQiOiJiMjk3OTU5NWM2MmRlYjM5NjMwNmJhM2VkYmRmYjRhMCJ9LHsia3R5IjoiUlNBIiwiYWxnIjoiUlMzODQiLCJuIjoibGxwcy11ZlJZSVZwbFJkdEYyRkIxeG41aUN1b2pPeUhtQ21rTHNNTFUyenNVTmNwd3hsRlVtdjl4UVJIVEFyZkdtX3ZSa0tqdHg0ZHNXOExNRzQ1RXZES19hNlRtTEY1SDVoRGxDcXIwYVh1SW5wTi1jM2Y2ZjlkMHpSdEJDYzE4SUtITF9JQnNrb2FIR0s0TFZkUXlwSVBjTHFNaUtrUEZYSS1OUnd0SkxVcFF0Nk5IX3A4dlcwZmlJUmJrZEMxdDJwU3JQWDAzMDdldDM4SUVfdnY4X1JabTNDQUtlZjJwbmJXelJVQmxlZVF5YnFhUjI4Vk5OYWxsaXhlZ3QxU2g1U2hRTGZRdkEwUW1yU1QyNUt6czVLMGRfNmVBS2w0eFBEcDFRX2RDMU40bXlnTVpBa2JSWEtkcTQ5UGc5Qy01NnBiekVtdk9ZaU1fQ3RNV2t6cjl3IiwiZSI6IkFRQUIiLCJkIjoiQnk5ekhkcU9Td3FWTFNiZGM4eVdGTzJNMjFFYTBRRk15WnpUMTloQ1prNUNUT3E3ZUROdy1LdG9pVTNYQ205S2tqemZOb0JneXBPSjM3enF6X20waUk4eFpFWV9qNENMeFZMRmlBTXlDdWJmSm82cHcxSnZiUU5qUElDNDVRWHFzZl9LN2lPbXFScVpmTm5LNjNfTXdLR1NVMVRXLW9ENTA1Q09JSU9rTktqUTdLcElPbTU2RWZ5SDJfY1BVZm1sSHNCQ1JHeTZlUTJNOGNTSy11eFhjaFNyTnF0NDZuRDhBckNFOHF0ckdKbjF6SlRnV09rSDJsUzczdXprY19QNnJHZzNJZGlBYm1QbDRIV1UtUGxKMmp3eWtGYmJYaG56TDNUcHJ1Yzhva1JfY2RhNXU3S1NhOGRmVjVXUGpueWdUeFBITnQ1X2l1c3pQS3hhMFg5bndRIiwicCI6IjFEY1JiWV9EZXZUTU1uaTNXeW5iS0dtLU1YbW5IN05NVS00SVUxaGRlZ1pmclN0b0JDMkRuZ1A3N0pJTFJPX1RBcGFNUGlBa0lweElwZ3Zvdm5XS3RDWjMtMkJYRFduZDR4X0V3czNCVVVWekNqdnhBYXRMVGlTcV9sWlRBTDkzSHRxZjNGUVBhODZRMng0bHl2Si1yRldCZnBPTnpNR3ItNWc5dXQxc0diayIsInEiOiJ0Vl9rUTNnZ2FCU1lSa2NrcnBXbktKSTMtdVJFeVpWSV8tUFRLOGtVUzQzR2x6MTJzeFZwWUlJUnF0NTdYdEFya3BIRzlfWWpVeGpfUk9GX0xqU0ZhR2JDeG1jY1BxdTl0SHI3SklzdVZXUWx6OG9veFhOVzNsVVJNQ3RLZDNrMnhtOUZob0ZtdG5jUDduTGJDZlZhQklsVExoYVhaWFZaU1NVdi12RERTeTgiLCJkcCI6ImRWay1PZWVWb1JoZEVrdk9tSXE4dGN4RGJfaGxnaElUMHhWOVpSa29GNklPcGlPcWtTVFo4emNneC1DNmVwUmppcnJWTWtWenRlX1ZfSHY1WjloM3FzYmE4aGFFRE5iTjdCcFZJNlBEa3Ixa3JfUVZnV2JIYlo2NUw0dHN1cTBsb2RvakxDTVBvXzNGX0dUZllTcFhBZFVHbG9maGthaEhBZ2xkbVVkM3o0RSIsImRxIjoiTzZNZEhpWW9tYkJ6NVZfTkt1NmdPUkhqQUVjQWF6dl85Y3ZHaXJZaVN6bUIzQWJrdWJ2SG0ya0pRQ0xKZEFLRTRUdTNyWjZzUE0yU1dlYV9kOFRqUE5IVko0R040dmw3ZGhXZDhJVW5KZ0s1QUJyYnp4aS1ybnBRSFlPT2g3dy1pMzdZNElJNThMTXpkTmNsT0tBSkNrYlJKLTFidUl1ZVlST3VOQmZvVHhjIiwicWkiOiJDUGxUNHZHdUpiVi1XTUxJUkw0Yy1WVzBIMGZ3UlVsanF2di1fbk5EUXlaOTh1RmxYWUx0bVFTMmgzVlg0V2pLMVVSOENhM205MTEwSk5lOFZhXzdUZXB1azEzcDRDeU1HMGNjR29qemw1MGZ2ZnJJTmoxek42anowbFJJNGNBUFdkZkd3Z0VzMHRwdnRXMXNhVnJnOXk4OVhlZkV4OElxMlowYkxybEtHclUiLCJrZXlfb3BzIjpbInNpZ24iXSwiZXh0Ijp0cnVlLCJraWQiOiJiMjk3OTU5NWM2MmRlYjM5NjMwNmJhM2VkYmRmYjRhMCJ9XX0sImFjY2Vzc1Rva2Vuc0V4cGlyZUluIjoxNSwiaWF0IjoxNzQwMzY3MDU0fQ.avoHoKI9g_2fmoRxZB0QnscRgEqb9xHip9CU_f-2U1I"
43
+
44
+
45
+ def test_bulk_exports():
46
+ # Initialize PathlingContext.
47
+ pc = PathlingContext.create()
48
+
49
+ # Base parameters from the demo server
50
+ fhir_server = "https://bulk-data.smarthealthit.org/fhir"
51
+ output_base = os.path.join(tempfile.gettempdir(), "bulk_export_test")
52
+
53
+ if os.path.exists(output_base):
54
+ import shutil
55
+ shutil.rmtree(output_base)
56
+ os.makedirs(output_base)
57
+
58
+ # Test 1: System level export with all parameters.
59
+ print("\n=== Testing system level export with all parameters ===")
60
+ pc.read.bulk(
61
+ fhir_endpoint_url=fhir_server,
62
+ output_dir=f"{output_base}/system_detailed",
63
+ output_format="application/fhir+ndjson",
64
+ since=datetime(2015, 1, 1, tzinfo=timezone.utc),
65
+ types=["Patient", "Observation"],
66
+ elements=["id", "status"],
67
+ include_associated_data=["LatestProvenanceResources"],
68
+ type_filters=["Patient?status=active"],
69
+ output_extension="ndjson",
70
+ timeout=3600,
71
+ auth_config={
72
+ "enabled": True,
73
+ "client_id": client_id,
74
+ "private_key_jwk": jwk,
75
+ "use_smart": True,
76
+ "use_form_for_basic_auth": False,
77
+ "scope": "system/*.read",
78
+ "token_expiry_tolerance": 30
79
+ }
80
+ )
81
+ print("System export completed successfully")
82
+
83
+ # Test 2: Group level export with minimal parameters.
84
+ print("\n=== Testing group level export with minimal parameters ===")
85
+ pc.read.bulk(
86
+ fhir_endpoint_url=fhir_server,
87
+ output_dir=f"{output_base}/group_basic",
88
+ group_id="BMCHealthNet"
89
+ )
90
+ print("Group export completed successfully")
91
+
92
+ # Test 3: Group level export with all parameters.
93
+ print("\n=== Testing group level export with all parameters ===")
94
+ pc.read.bulk(
95
+ fhir_endpoint_url=fhir_server,
96
+ output_dir=f"{output_base}/group_detailed",
97
+ group_id="BMCHealthNet",
98
+ output_format="application/fhir+ndjson",
99
+ since=datetime(2015, 1, 1, tzinfo=timezone.utc),
100
+ types=["Patient", "Condition", "Observation"],
101
+ elements=["id", "status"],
102
+ include_associated_data=["LatestProvenanceResources"],
103
+ type_filters=["Patient?status=active"],
104
+ output_extension="ndjson",
105
+ timeout=1800,
106
+ max_concurrent_downloads=8
107
+ )
108
+ print("Group export completed successfully")
109
+
110
+ # Test 4: Patient level export with minimal parameters.
111
+ print("\n=== Testing patient level export with minimal parameters ===")
112
+ pc.read.bulk(
113
+ fhir_endpoint_url=fhir_server,
114
+ output_dir=f"{output_base}/patient_basic",
115
+ patients=[
116
+ "Patient/58c297c4-d684-4677-8024-01131d93835e",
117
+ "Patient/118616a4-f0b2-411f-8050-39d5d27c738c"
118
+ ]
119
+ )
120
+ print("Patient export completed successfully")
121
+
122
+ # Test 5: Patient level export with all parameters.
123
+ print("\n=== Testing patient level export with all parameters ===")
124
+ pc.read.bulk(
125
+ fhir_endpoint_url=fhir_server,
126
+ output_dir=f"{output_base}/patient_detailed",
127
+ patients=[
128
+ "Patient/58c297c4-d684-4677-8024-01131d93835e",
129
+ "Patient/118616a4-f0b2-411f-8050-39d5d27c738c",
130
+ "Patient/21fba439-ca79-411f-a081-37a432a78f3a"
131
+ ],
132
+ output_format="application/fhir+ndjson",
133
+ since=datetime(2020, 1, 1, tzinfo=timezone.utc),
134
+ types=["Observation", "MedicationRequest"],
135
+ elements=["id", "status", "code"],
136
+ include_associated_data=["LatestProvenanceResources"],
137
+ type_filters=["Observation?category=vital-signs"],
138
+ output_extension="ndjson",
139
+ timeout=2400,
140
+ max_concurrent_downloads=3
141
+ )
142
+ print("Patient export completed successfully")
143
+
144
+ print("\nAll bulk exports completed successfully!")
145
+ print(f"Output written to: {output_base}")
146
+
147
+
148
+ if __name__ == "__main__":
149
+ test_bulk_exports()
@@ -2,8 +2,8 @@
2
2
  # Auto generated from POM project version.
3
3
  # Please do not modify.
4
4
  #
5
- __version__="7.0.1"
6
- __java_version__="7.0.1"
5
+ __version__="7.2.0.dev0"
6
+ __java_version__="7.2.0-SNAPSHOT"
7
7
  __scala_version__="2.12"
8
- __delta_version__="3.2.0"
8
+ __delta_version__="3.3.0"
9
9
  __hadoop_version__="3.3.4"
@@ -0,0 +1,213 @@
1
+ # Copyright 2025 Commonwealth Scientific and Industrial Research
2
+ # Organisation (CSIRO) ABN 41 687 119 230.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from datetime import datetime
17
+ from typing import List, Optional
18
+
19
+ from pathling import PathlingContext
20
+
21
+
22
+ class BulkExportClient:
23
+ """
24
+ A client for exporting data from the FHIR Bulk Data Access API.
25
+ """
26
+ def __init__(self, java_client):
27
+ """
28
+ Create a new BulkExportClient that wraps a Java BulkExportClient.
29
+
30
+ :param java_client: The Java BulkExportClient instance to wrap
31
+ """
32
+ self._java_client = java_client
33
+
34
+ def export(self):
35
+ """
36
+ Export data from the FHIR server.
37
+
38
+ :return: The result of the export operation
39
+ """
40
+ return self._java_client.export()
41
+
42
+ @classmethod
43
+ def create(cls, jvm, fhir_endpoint_url: str, output_dir: str,
44
+ group_id: Optional[str] = None,
45
+ patients: Optional[List[str]] = None,
46
+ output_format: str = "application/fhir+ndjson",
47
+ since: Optional[datetime] = None,
48
+ types: Optional[List[str]] = None,
49
+ elements: Optional[List[str]] = None,
50
+ include_associated_data: Optional[List[str]] = None,
51
+ type_filters: Optional[List[str]] = None,
52
+ output_extension: str = "ndjson",
53
+ timeout: Optional[int] = None,
54
+ max_concurrent_downloads: int = 10,
55
+ auth_config: Optional[dict] = None) -> 'BulkExportClient':
56
+ """
57
+ Create a BulkExportClient for the appropriate export level based on the provided arguments.
58
+
59
+ :param jvm: The JVM instance
60
+ :param fhir_endpoint_url: The URL of the FHIR server to export from
61
+ :param output_dir: The directory to write the output files to
62
+ :param group_id: Optional group ID for group-level export
63
+ :param patients: Optional list of patient IDs for patient-level export
64
+ :param output_format: The format of the output data
65
+ :param since: Only include resources modified after this timestamp
66
+ :param types: List of FHIR resource types to include
67
+ :param elements: List of FHIR elements to include
68
+ :param include_associated_data: Pre-defined set of FHIR resources to include
69
+ :param type_filters: FHIR search queries to filter resources
70
+ :param output_extension: File extension for output files
71
+ :param timeout: Optional timeout duration in seconds
72
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
73
+ :param auth_config: Optional authentication configuration dictionary with the following possible keys:
74
+ - enabled: Whether authentication is enabled (default: False)
75
+ - client_id: The client ID to use for authentication
76
+ - private_key_jwk: The private key in JWK format
77
+ - client_secret: The client secret to use for authentication
78
+ - token_endpoint: The token endpoint URL
79
+ - use_smart: Whether to use SMART authentication (default: True)
80
+ - use_form_for_basic_auth: Whether to use form-based basic auth (default: False)
81
+ - scope: The scope to request
82
+ - token_expiry_tolerance: The token expiry tolerance in seconds (default: 120)
83
+ :return: A BulkExportClient configured for the appropriate export level
84
+ """
85
+ client_class = jvm.au.csiro.fhir.export.BulkExportClient
86
+
87
+ # Determine the export level based on the provided arguments
88
+ if group_id is not None:
89
+ # Group-level export
90
+ builder = client_class.groupBuilder(group_id)
91
+ elif patients is not None and len(patients) > 0:
92
+ # Patient-level export
93
+ builder = client_class.patientBuilder()
94
+ for patient in patients:
95
+ ref = jvm.au.csiro.fhir.model.Reference.of(patient)
96
+ builder.withPatient(ref)
97
+ else:
98
+ # System-level export
99
+ builder = client_class.systemBuilder()
100
+
101
+ # Configure the builder with common settings
102
+ cls._configure_builder(jvm, builder, fhir_endpoint_url, output_dir, output_format,
103
+ output_extension, max_concurrent_downloads, since, types,
104
+ elements, include_associated_data, type_filters, timeout, auth_config)
105
+
106
+ # Build and return the client
107
+ return cls(builder.build())
108
+
109
+ @staticmethod
110
+ def _configure_builder(jvm, builder, fhir_endpoint_url: str, output_dir: str,
111
+ output_format: str, output_extension: str, max_concurrent_downloads: int,
112
+ since: Optional[datetime], types: Optional[List[str]],
113
+ elements: Optional[List[str]], include_associated_data: Optional[List[str]],
114
+ type_filters: Optional[List[str]], timeout: Optional[int],
115
+ auth_config: Optional[dict]):
116
+ """
117
+ Configure the builder with common settings.
118
+
119
+ :param jvm: The JVM instance
120
+ :param builder: The builder to configure
121
+ :param fhir_endpoint_url: The URL of the FHIR server
122
+ :param output_dir: Output directory
123
+ :param output_format: Output format
124
+ :param output_extension: File extension for output files
125
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
126
+ :param since: Timestamp filter
127
+ :param types: Resource types to include
128
+ :param elements: Elements to include
129
+ :param include_associated_data: Associated data to include
130
+ :param type_filters: Resource filters
131
+ :param timeout: Optional timeout duration in seconds
132
+ :param auth_config: Optional authentication configuration
133
+ """
134
+ # Configure basic settings
135
+ builder.withFhirEndpointUrl(fhir_endpoint_url)
136
+ builder.withOutputDir(output_dir)
137
+ builder.withOutputFormat(output_format)
138
+ builder.withOutputExtension(output_extension)
139
+ builder.withMaxConcurrentDownloads(max_concurrent_downloads)
140
+
141
+ # Configure timeout if provided
142
+ if timeout is not None:
143
+ java_duration = jvm.java.time.Duration.ofSeconds(timeout)
144
+ builder.withTimeout(java_duration)
145
+
146
+ # Configure since timestamp if provided
147
+ if since is not None:
148
+ if since.tzinfo is None:
149
+ raise ValueError("datetime must include timezone information")
150
+ # Format with microsecond precision and timezone offset
151
+ instant_str = since.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] # Truncate to milliseconds
152
+ if since.utcoffset() is None:
153
+ instant_str += 'Z'
154
+ else:
155
+ offset = since.strftime('%z')
156
+ # Insert colon in timezone offset
157
+ instant_str += f"{offset[:3]}:{offset[3:]}"
158
+ java_instant = jvm.java.time.Instant.parse(instant_str)
159
+ builder.withSince(java_instant)
160
+
161
+ # Configure resource types if provided
162
+ if types is not None:
163
+ for type_ in types:
164
+ builder.withType(type_)
165
+
166
+ # Configure elements if provided
167
+ if elements is not None:
168
+ for element in elements:
169
+ builder.withElement(element)
170
+
171
+ # Configure associated data if provided
172
+ if include_associated_data is not None:
173
+ for data in include_associated_data:
174
+ j_object = jvm.au.csiro.fhir.export.ws.AssociatedData.fromCode(data)
175
+ builder.withIncludeAssociatedDatum(j_object)
176
+
177
+ # Configure type filters if provided
178
+ if type_filters is not None:
179
+ for filter_ in type_filters:
180
+ builder.withTypeFilter(filter_)
181
+
182
+ # Configure authentication if provided
183
+ if auth_config is not None:
184
+ auth_builder = jvm.au.csiro.fhir.auth.AuthConfig.builder()
185
+
186
+ # Set defaults to match Java class
187
+ auth_builder.enabled(False)
188
+ auth_builder.useSMART(True)
189
+ auth_builder.useFormForBasicAuth(False)
190
+ auth_builder.tokenExpiryTolerance(120)
191
+
192
+ # Map Python config to Java builder methods
193
+ if 'enabled' in auth_config:
194
+ auth_builder.enabled(auth_config['enabled'])
195
+ if 'use_smart' in auth_config:
196
+ auth_builder.useSMART(auth_config['use_smart'])
197
+ if 'token_endpoint' in auth_config:
198
+ auth_builder.tokenEndpoint(auth_config['token_endpoint'])
199
+ if 'client_id' in auth_config:
200
+ auth_builder.clientId(auth_config['client_id'])
201
+ if 'client_secret' in auth_config:
202
+ auth_builder.clientSecret(auth_config['client_secret'])
203
+ if 'private_key_jwk' in auth_config:
204
+ auth_builder.privateKeyJWK(auth_config['private_key_jwk'])
205
+ if 'use_form_for_basic_auth' in auth_config:
206
+ auth_builder.useFormForBasicAuth(auth_config['use_form_for_basic_auth'])
207
+ if 'scope' in auth_config:
208
+ auth_builder.scope(auth_config['scope'])
209
+ if 'token_expiry_tolerance' in auth_config:
210
+ auth_builder.tokenExpiryTolerance(auth_config['token_expiry_tolerance'])
211
+
212
+ auth_config_obj = auth_builder.build()
213
+ builder.withAuthConfig(auth_config_obj)
@@ -16,7 +16,9 @@
16
16
  from typing import Callable, Optional
17
17
 
18
18
  from pathling.core import SparkConversionsMixin, StringMapper
19
- from pathling.datasource import DataSource
19
+ from typing import TYPE_CHECKING
20
+ if TYPE_CHECKING:
21
+ from pathling.datasource import DataSource
20
22
 
21
23
 
22
24
  class ImportMode:
@@ -49,7 +51,7 @@ class DataSinks(SparkConversionsMixin):
49
51
  A class for writing FHIR data to a variety of different targets.
50
52
  """
51
53
 
52
- def __init__(self, datasource: DataSource):
54
+ def __init__(self, datasource: "DataSource"):
53
55
  SparkConversionsMixin.__init__(self, datasource.spark)
54
56
  self._datasinks = (
55
57
  self.spark._jvm.au.csiro.pathling.library.io.sink.DataSinkBuilder(
@@ -13,17 +13,25 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
-
16
+ from datetime import datetime
17
17
  from typing import Dict, Sequence, Optional, Callable
18
+ from typing import List, TYPE_CHECKING
18
19
 
19
20
  from py4j.java_collections import SetConverter
20
21
  from py4j.java_gateway import JavaObject
21
22
  from pyspark.sql import DataFrame
22
23
 
23
24
  from pathling import PathlingContext
24
- from pathling.core import ExpOrStr, StringToStringSetMapper, SparkConversionsMixin
25
+ from pathling.core import (
26
+ ExpOrStr,
27
+ StringToStringSetMapper,
28
+ SparkConversionsMixin,
29
+ )
25
30
  from pathling.fhir import MimeType
26
31
 
32
+ if TYPE_CHECKING:
33
+ from pathling.datasink import DataSinks
34
+
27
35
 
28
36
  class DataSource(SparkConversionsMixin):
29
37
  """
@@ -45,13 +53,21 @@ class DataSource(SparkConversionsMixin):
45
53
  """
46
54
  return self._wrap_df(self._jds.read(resource_code))
47
55
 
56
+ def resource_types(self):
57
+ """
58
+ Returns a list of the resource types that are available in the data source.
59
+
60
+ :return: A list of strings representing the resource types.
61
+ """
62
+ return [r.toCode() for r in self._jds.getResourceTypes()]
63
+
48
64
  @property
49
65
  def write(self) -> "DataSinks":
50
66
  """
51
67
  Provides access to a :class:`DataSinks` object that can be used to persist data.
52
68
  """
69
+ # Import here to avoid circular dependency
53
70
  from pathling.datasink import DataSinks
54
-
55
71
  return DataSinks(self)
56
72
 
57
73
  def extract(
@@ -225,3 +241,75 @@ class DataSources(SparkConversionsMixin):
225
241
  return self._wrap_ds(self._jdataSources.tables(schema))
226
242
  else:
227
243
  return self._wrap_ds(self._jdataSources.tables())
244
+
245
+ def bulk(
246
+ self,
247
+ fhir_endpoint_url: str,
248
+ output_dir: str,
249
+ group_id: Optional[str] = None,
250
+ patients: Optional[List[str]] = None,
251
+ output_format: str = "application/fhir+ndjson",
252
+ since: Optional[datetime] = None,
253
+ types: Optional[List[str]] = None,
254
+ elements: Optional[List[str]] = None,
255
+ include_associated_data: Optional[List[str]] = None,
256
+ type_filters: Optional[List[str]] = None,
257
+ output_extension: str = "ndjson",
258
+ timeout: Optional[int] = None,
259
+ max_concurrent_downloads: int = 10,
260
+ auth_config: Optional[Dict] = None
261
+ ) -> DataSource:
262
+ """
263
+ Creates a data source from a FHIR Bulk Data Access API endpoint.
264
+
265
+ :param fhir_endpoint_url: The URL of the FHIR server to export from
266
+ :param output_dir: The directory to write the output files to
267
+ :param group_id: Optional group ID for group-level export
268
+ :param patients: Optional list of patient references for patient-level export
269
+ :param output_format: The format of the output data
270
+ :param since: Only include resources modified after this timestamp
271
+ :param types: List of FHIR resource types to include
272
+ :param elements: List of FHIR elements to include
273
+ :param include_associated_data: Pre-defined set of FHIR resources to include
274
+ :param type_filters: FHIR search queries to filter resources
275
+ :param output_extension: File extension for output files. Defaults to "ndjson"
276
+ :param timeout: Optional timeout duration in seconds
277
+ :param max_concurrent_downloads: Maximum number of concurrent downloads. Defaults to 10
278
+ :param auth_config: Optional authentication configuration dictionary with the following possible keys:
279
+ - enabled: Whether authentication is enabled (default: False)
280
+ - client_id: The client ID to use for authentication
281
+ - private_key_jwk: The private key in JWK format
282
+ - client_secret: The client secret to use for authentication
283
+ - token_endpoint: The token endpoint URL
284
+ - use_smart: Whether to use SMART authentication (default: True)
285
+ - use_form_for_basic_auth: Whether to use form-based basic auth (default: False)
286
+ - scope: The scope to request
287
+ - token_expiry_tolerance: The token expiry tolerance in seconds (default: 120)
288
+ :return: A DataSource object that can be used to run queries against the data
289
+ """
290
+ from pathling.bulk import BulkExportClient
291
+
292
+ # Create client using the simplified API
293
+ client = BulkExportClient.create(
294
+ self.spark._jvm,
295
+ fhir_endpoint_url=fhir_endpoint_url,
296
+ output_dir=output_dir,
297
+ group_id=group_id,
298
+ patients=patients,
299
+ output_format=output_format,
300
+ since=since,
301
+ types=types,
302
+ elements=elements,
303
+ include_associated_data=include_associated_data,
304
+ type_filters=type_filters,
305
+ output_extension=output_extension,
306
+ timeout=timeout,
307
+ max_concurrent_downloads=max_concurrent_downloads,
308
+ auth_config=auth_config
309
+ )
310
+
311
+ # Perform the export
312
+ client.export()
313
+
314
+ # Return a DataSource that reads from the exported files
315
+ return self.ndjson(output_dir)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pathling
3
- Version: 7.0.1
3
+ Version: 7.2.0.dev0
4
4
  Summary: Python API for Pathling
5
5
  Home-page: https://github.com/aehrc/pathling
6
6
  Author: Australian e-Health Research Centre, CSIRO
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.10
15
15
  Requires-Python: >=3.8
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: pyspark<3.6.0,>=3.5.0
18
+ Requires-Dist: pyspark<3.6.0,>=3.5.3
19
19
  Requires-Dist: deprecated>=1.2.13
20
20
 
21
21
  Python API for Pathling
@@ -3,6 +3,7 @@ MANIFEST.in
3
3
  README.md
4
4
  setup.cfg
5
5
  setup.py
6
+ examples/bulk.py
6
7
  examples/designation.py
7
8
  examples/display.py
8
9
  examples/encode_bundles.py
@@ -21,6 +22,7 @@ examples/data/resources/Condition.ndjson
21
22
  examples/data/resources/Patient.ndjson
22
23
  pathling/__init__.py
23
24
  pathling/_version.py
25
+ pathling/bulk.py
24
26
  pathling/coding.py
25
27
  pathling/context.py
26
28
  pathling/core.py
@@ -0,0 +1,2 @@
1
+ pyspark<3.6.0,>=3.5.3
2
+ deprecated>=1.2.13
@@ -62,7 +62,7 @@ setup(
62
62
  ],
63
63
  license="Apache License, version 2.0",
64
64
  python_requires=">=3.8",
65
- install_requires=["pyspark>=3.5.0,<3.6.0", "deprecated>=1.2.13"],
65
+ install_requires=["pyspark>=3.5.3,<3.6.0", "deprecated>=1.2.13"],
66
66
  include_package_data=True,
67
67
  data_files=[
68
68
  ("share/pathling/examples", glob.glob("examples/*.py")),
@@ -1,2 +0,0 @@
1
- pyspark<3.6.0,>=3.5.0
2
- deprecated>=1.2.13
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes