pathling 8.0.0.dev0__tar.gz → 8.0.0.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. {pathling-8.0.0.dev0/pathling.egg-info → pathling-8.0.0.dev1}/PKG-INFO +4 -3
  2. pathling-8.0.0.dev1/examples/bulk.py +151 -0
  3. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/__init__.py +0 -3
  4. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/_version.py +2 -2
  5. pathling-8.0.0.dev1/pathling/bulk.py +234 -0
  6. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/context.py +13 -0
  7. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/datasink.py +4 -2
  8. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/datasource.py +119 -65
  9. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1/pathling.egg-info}/PKG-INFO +4 -3
  10. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling.egg-info/SOURCES.txt +4 -4
  11. pathling-8.0.0.dev1/pathling.egg-info/requires.txt +2 -0
  12. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/setup.py +1 -1
  13. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/tests/test_datasource.py +23 -13
  14. pathling-8.0.0.dev1/tests/test_view.py +47 -0
  15. pathling-8.0.0.dev0/examples/query.py +0 -148
  16. pathling-8.0.0.dev0/pathling/query.py +0 -245
  17. pathling-8.0.0.dev0/pathling.egg-info/requires.txt +0 -2
  18. pathling-8.0.0.dev0/tests/test_query.py +0 -160
  19. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/LICENSE +0 -0
  20. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/MANIFEST.in +0 -0
  21. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/README.md +0 -0
  22. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/data/bundles/Bennett146_Swaniawski813_704c9750-f6e6-473b-ee83-fbd48e07fe3f.json +0 -0
  23. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/data/bundles/Dino214_Parisian75_40d82b80-b682-cd8b-da6d-396809878641.json +0 -0
  24. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/data/resources/Condition.ndjson +0 -0
  25. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/data/resources/Patient.ndjson +0 -0
  26. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/designation.py +0 -0
  27. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/display.py +0 -0
  28. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/encode_bundles.py +0 -0
  29. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/encode_resources.py +0 -0
  30. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/fhir_view.py +0 -0
  31. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/member_of.py +0 -0
  32. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/member_of_old.py +0 -0
  33. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/property_of.py +0 -0
  34. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/subsumes.py +0 -0
  35. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/subsumes_old.py +0 -0
  36. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/translate.py +0 -0
  37. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/examples/translate_old.py +0 -0
  38. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/coding.py +0 -0
  39. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/core.py +0 -0
  40. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/fhir.py +0 -0
  41. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/functions.py +0 -0
  42. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling/udfs.py +0 -0
  43. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling.egg-info/dependency_links.txt +0 -0
  44. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/pathling.egg-info/top_level.txt +0 -0
  45. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/setup.cfg +0 -0
  46. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/tests/test_encoders.py +0 -0
  47. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/tests/test_functions.py +0 -0
  48. {pathling-8.0.0.dev0 → pathling-8.0.0.dev1}/tests/test_udfs.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: pathling
3
- Version: 8.0.0.dev0
3
+ Version: 8.0.0.dev1
4
4
  Summary: Python API for Pathling
5
5
  Home-page: https://github.com/aehrc/pathling
6
6
  Author: Australian e-Health Research Centre, CSIRO
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.10
15
15
  Requires-Python: >=3.8
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: pyspark<3.6.0,>=3.5.0
18
+ Requires-Dist: pyspark<3.6.0,>=3.5.2
19
19
  Requires-Dist: deprecated>=1.2.13
20
20
  Dynamic: author
21
21
  Dynamic: author-email
@@ -25,6 +25,7 @@ Dynamic: description-content-type
25
25
  Dynamic: home-page
26
26
  Dynamic: keywords
27
27
  Dynamic: license
28
+ Dynamic: license-file
28
29
  Dynamic: requires-dist
29
30
  Dynamic: requires-python
30
31
  Dynamic: summary
@@ -0,0 +1,151 @@
1
+ # Copyright 2025 Commonwealth Scientific and Industrial Research
2
+ # Organisation (CSIRO) ABN 41 687 119 230.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import os
17
+ import tempfile
18
+ from datetime import datetime, timezone
19
+
20
+ from pathling import PathlingContext
21
+
22
+ jwk = """
23
+ {
24
+ "kty": "RSA",
25
+ "alg": "RS384",
26
+ "n": "jcrw7Jio4RVAMlo2clxqkmT9nmg_w1pXhpChg0jp41fKKfDXAtlIRhL_Ij8_N71l5KVxNQWeNeGsO0op73Rj28HR885fxJ2jimYFyD0fsftjjHvYkV_GskFubhcURbHAvx3lVrwLFyILq8sydF2G48A-XSfVAHPE6yEimusRRNihPmbM-MDlBuQkLBtwnT0bDXUEIlpDvlPB30Im2QOgvYTsMAnI-MzemOAtF5Xe5wCsj27nityK5AlnAJLFXfeeFqySoIyR7FaaQ1eay40MV-ZyDULSPtV4C-58eh3V2SL-qkQEsfQSuu3rqb-lgOz1-gl4FqTIz2JGtpEsTM7Uww",
27
+ "e": "AQAB",
28
+ "d": "kwNFEgpaxeAeHTtrypSZoXjLM7u-YM3czV9w8huCrjSg1SSXgFykAJX6zT40BHJbMv8xhcgEQZBMub69vBqoAOirWPky5KiNMPG7VirlRDaGJSJDH-UJQzaUJCM3c-bYzQXpDE3rxBBkCXHJcJQabAkwDa8-4F26YFjWGqUMsFOE1sxTXPnJG8qBTYxTSFxnWNf6U_kbGOQlWtHd1TgxPjXzmU6H472igte6SZEATh9eyYgPJrAqnw4qRNGy5pnAHkuIrCHIMaktR34LKFHHl3_xsLSHo9QmPfEdR5soKIKQIph2KRYArx4U03larr7vbZMSOypLqBtoRVlzvx0h",
29
+ "p": "wmc-aV7SbViyP50B0s_6wrDlOjGid9kO7QePuohFLmJWuC8TP8VYeSBscCPf6gX40O8agiCrsBuz2ZUTZDlYBXPHRiYprdV11SgCXkfTw6-G5CD2Xjq43gcTzFOy2q2FlU5YtBkPVTrsYMH8p6F09sZRu-4rnCOpgoahbAawGXs",
30
+ "q": "urhDpiHoZj0SBjmfn8GTHNh3FoUE8xiG3s0e64xSIBE3PzXCmskZpJuKGqgPX-wSXer2_WtmJUzOCucajcd4HQp222PWMKhc1HVZMj4073XQKDGqe_M7ZH29RbS9x93zhNgvFFiSdubQTg9SHJXL3Ja0f3IYxReha13G9YDSG1k",
31
+ "dp": "thWN15QA9HpHOl4M_y_eZ8zYZ5Fl42tjF5Alh0lrwu5I22r8VJa7L3i3GLIBYGkHjGroIUoIhYLtCbcf2pf7Yd_3njTQhQmSvHwk-7m7F2aoqbRWDhxiW1O1r4QV2cz9ecNQQh_WxLXUASyxQTFxJFLM64FBR5X_h0oil9QLzVE",
32
+ "dq": "W_t8L_JSR1Ncdr6aWRwGOdaVS_25g3wYrNeFnOoiZvO0MKpuNMxOmp2Y-irCcDGelq-yfwMSbduZQRu6JBAYps3J4agcExpNqMgqaarlbvWt1q8o2ijnoEilHhq8xyIa3d2Vy8MaXAK2qU242KYeqIuBXas6cpWCip7G7ZhJaPk",
33
+ "qi": "bZvyduEpMUYyGXhd-MnHyKOiJtCUF_kbM0hUGr8AfJ6_bi8MEjBNMt5qZKGYYT6bXFJWiTTUFq6nZLmQJ7cY5lv57gAQOTlLy6hp-nqkNrH1P-5UAzEbUMhIdnPQcDEkBEjpfObHlwtrmaFFVKgpm5vqFFD-szMHPuZ43o0vS98",
34
+ "key_ops": [
35
+ "sign"
36
+ ],
37
+ "ext": true,
38
+ "kid": "b31ab1cd8db2c39287b3267a2914600c"
39
+ }
40
+ """
41
+
42
+ client_id = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6InJlZ2lzdHJhdGlvbi10b2tlbiJ9.eyJqd2tzIjp7ImtleXMiOlt7Imt0eSI6IlJTQSIsImFsZyI6IlJTMzg0IiwibiI6Impjcnc3SmlvNFJWQU1sbzJjbHhxa21UOW5tZ193MXBYaHBDaGcwanA0MWZLS2ZEWEF0bElSaExfSWo4X043MWw1S1Z4TlFXZU5lR3NPMG9wNzNSajI4SFI4ODVmeEoyamltWUZ5RDBmc2Z0ampIdllrVl9Hc2tGdWJoY1VSYkhBdngzbFZyd0xGeUlMcThzeWRGMkc0OEEtWFNmVkFIUEU2eUVpbXVzUlJOaWhQbWJNLU1EbEJ1UWtMQnR3blQwYkRYVUVJbHBEdmxQQjMwSW0yUU9ndllUc01BbkktTXplbU9BdEY1WGU1d0NzajI3bml0eUs1QWxuQUpMRlhmZWVGcXlTb0l5UjdGYWFRMWVheTQwTVYtWnlEVUxTUHRWNEMtNThlaDNWMlNMLXFrUUVzZlFTdXUzcnFiLWxnT3oxLWdsNEZxVEl6MkpHdHBFc1RNN1V3dyIsImUiOiJBUUFCIiwia2V5X29wcyI6WyJ2ZXJpZnkiXSwiZXh0Ijp0cnVlLCJraWQiOiJiMzFhYjFjZDhkYjJjMzkyODdiMzI2N2EyOTE0NjAwYyJ9LHsia3R5IjoiUlNBIiwiYWxnIjoiUlMzODQiLCJuIjoiamNydzdKaW80UlZBTWxvMmNseHFrbVQ5bm1nX3cxcFhocENoZzBqcDQxZktLZkRYQXRsSVJoTF9JajhfTjcxbDVLVnhOUVdlTmVHc08wb3A3M1JqMjhIUjg4NWZ4SjJqaW1ZRnlEMGZzZnRqakh2WWtWX0dza0Z1YmhjVVJiSEF2eDNsVnJ3TEZ5SUxxOHN5ZEYyRzQ4QS1YU2ZWQUhQRTZ5RWltdXNSUk5paFBtYk0tTURsQnVRa0xCdHduVDBiRFhVRUlscER2bFBCMzBJbTJRT2d2WVRzTUFuSS1NemVtT0F0RjVYZTV3Q3NqMjduaXR5SzVBbG5BSkxGWGZlZUZxeVNvSXlSN0ZhYVExZWF5NDBNVi1aeURVTFNQdFY0Qy01OGVoM1YyU0wtcWtRRXNmUVN1dTNycWItbGdPejEtZ2w0RnFUSXoySkd0cEVzVE03VXd3IiwiZSI6IkFRQUIiLCJkIjoia3dORkVncGF4ZUFlSFR0cnlwU1pvWGpMTTd1LVlNM2N6Vjl3OGh1Q3JqU2cxU1NYZ0Z5a0FKWDZ6VDQwQkhKYk12OHhoY2dFUVpCTXViNjl2QnFvQU9pcldQa3k1S2lOTVBHN1ZpcmxSRGFHSlNKREgtVUpRemFVSkNNM2MtYll6UVhwREUzcnhCQmtDWEhKY0pRYWJBa3dEYTgtNEYyNllGaldHcVVNc0ZPRTFzeFRYUG5KRzhxQlRZeFRTRnhuV05mNlVfa2JHT1FsV3RIZDFUZ3hQalh6bVU2SDQ3MmlndGU2U1pFQVRoOWV5WWdQSnJBcW53NHFSTkd5NXBuQUhrdUlyQ0hJTWFrdFIzNExLRkhIbDNfeHNMU0hvOVFtUGZFZFI1c29LSUtRSXBoMktSWUFyeDRVMDNsYXJyN3ZiWk1TT3lwTHFCdG9SVmx6dngwaCIsInAiOiJ3bWMtYVY3U2JWaXlQNTBCMHNfNndyRGxPakdpZDlrTzdRZVB1b2hGTG1KV3VDOFRQOFZZZVNCc2NDUGY2Z1g0ME84YWdpQ3JzQnV6MlpVVFpEbFlCWFBIUmlZcHJkVjExU2dDWGtmVHc2LUc1Q0QyWGpxNDNnY1R6Rk95MnEyRmxVNVl0QmtQVlRyc1lNSDhwNkYwOXNaUnUtNHJuQ09wZ29haGJBYXdHWHMiLCJxIjoidXJoRHBpSG9aajBTQmptZm44R1RITmgzRm9VRTh4aUczczBlNjR4U0lCRTNQelhDbXNrWnBKdUtHcWdQWC13U1hlcjJfV3RtSlV6T0N1Y2FqY2Q0SFFwMjIyUFdNS2hjMUhWWk1qNDA3M1hRS0RHcWVfTTdaSDI5UmJTOXg5M3poTmd2RkZpU2R1YlFUZzlTSEpYTDNKYTBmM0lZeFJlaGExM0c5WURTRzFrIiwiZHAiOiJ0aFdOMTVRQTlIcEhPbDRNX3lfZVo4ellaNUZsNDJ0akY1QWxoMGxyd3U1STIycjhWSmE3TDNpM0dMSUJZR2tIakdyb0lVb0loWUx0Q2JjZjJwZjdZZF8zbmpUUWhRbVN2SHdrLTdtN0YyYW9xYlJXRGh4aVcxTzFyNFFWMmN6OWVjTlFRaF9XeExYVUFTeXhRVEZ4SkZMTTY0RkJSNVhfaDBvaWw5UUx6VkUiLCJkcSI6IldfdDhMX0pTUjFOY2RyNmFXUndHT2RhVlNfMjVnM3dZck5lRm5Pb2ladk8wTUtwdU5NeE9tcDJZLWlyQ2NER2VscS15ZndNU2JkdVpRUnU2SkJBWXBzM0o0YWdjRXhwTnFNZ3FhYXJsYnZXdDFxOG8yaWpub0VpbEhocTh4eUlhM2QyVnk4TWFYQUsycVUyNDJLWWVxSXVCWGFzNmNwV0NpcDdHN1poSmFQayIsInFpIjoiYlp2eWR1RXBNVVl5R1hoZC1Nbkh5S09pSnRDVUZfa2JNMGhVR3I4QWZKNl9iaThNRWpCTk10NXFaS0dZWVQ2YlhGSldpVFRVRnE2blpMbVFKN2NZNWx2NTdnQVFPVGxMeTZocC1ucWtOckgxUC01VUF6RWJVTWhJZG5QUWNERWtCRWpwZk9iSGx3dHJtYUZGVktncG01dnFGRkQtc3pNSFB1WjQzbzB2Uzk4Iiwia2V5X29wcyI6WyJzaWduIl0sImV4dCI6dHJ1ZSwia2lkIjoiYjMxYWIxY2Q4ZGIyYzM5Mjg3YjMyNjdhMjkxNDYwMGMifV19LCJhY2Nlc3NUb2tlbnNFeHBpcmVJbiI6MTUsImlhdCI6MTc0MDMwNTQ3OH0.qI-820847HN1S37IGMVMKJRGeXQBrgbx91UZ7Av9djs"
43
+
44
+
45
+ def test_bulk_exports():
46
+ # Initialize PathlingContext.
47
+ pc = PathlingContext.create()
48
+
49
+ # Base parameters from the demo server
50
+ fhir_server = "https://bulk-data.smarthealthit.org/eyJlcnIiOiIiLCJwYWdlIjoxMDAwMCwidGx0IjoxNSwibSI6MSwiZGVsIjowLCJzZWN1cmUiOjEsIm9wcCI6MTB9/fhir"
51
+ output_base = os.path.join(tempfile.gettempdir(), "bulk_export_test")
52
+
53
+ if os.path.exists(output_base):
54
+ import shutil
55
+ shutil.rmtree(output_base)
56
+ os.makedirs(output_base)
57
+
58
+ # Test 1: System level export with all parameters.
59
+ print("\n=== Testing system level export with all parameters ===")
60
+ pc.read.bulk(
61
+ fhir_endpoint_url=fhir_server,
62
+ output_dir=f"{output_base}/system_detailed",
63
+ output_format="application/fhir+ndjson",
64
+ since=datetime(2015, 1, 1, tzinfo=timezone.utc),
65
+ types=["Patient", "Observation"],
66
+ elements=["id", "status"],
67
+ include_associated_data=["LatestProvenanceResources"],
68
+ type_filters=["Patient?status=active"],
69
+ output_extension="ndjson",
70
+ timeout=3600,
71
+ max_concurrent_downloads=5,
72
+ auth_config={
73
+ "enabled": True,
74
+ "client_id": client_id,
75
+ "private_key_jwk": jwk,
76
+ "token_endpoint": "https://bulk-data.smarthealthit.org/auth/token",
77
+ "use_smart": True,
78
+ "use_form_for_basic_auth": False,
79
+ "scope": "system/Patient.r system/Observation.r",
80
+ "token_expiry_tolerance": 120
81
+ }
82
+ )
83
+ print("System export completed successfully")
84
+
85
+ # Test 2: Group level export with minimal parameters.
86
+ print("\n=== Testing group level export with minimal parameters ===")
87
+ pc.read.bulk(
88
+ fhir_endpoint_url=fhir_server,
89
+ output_dir=f"{output_base}/group_basic",
90
+ group_id="BMCHealthNet"
91
+ )
92
+ print("Group export completed successfully")
93
+
94
+ # Test 3: Group level export with all parameters.
95
+ print("\n=== Testing group level export with all parameters ===")
96
+ pc.read.bulk(
97
+ fhir_endpoint_url=fhir_server,
98
+ output_dir=f"{output_base}/group_detailed",
99
+ group_id="BMCHealthNet",
100
+ output_format="application/fhir+ndjson",
101
+ since=datetime(2015, 1, 1, tzinfo=timezone.utc),
102
+ types=["Patient", "Condition", "Observation"],
103
+ elements=["id", "status"],
104
+ include_associated_data=["LatestProvenanceResources"],
105
+ type_filters=["Patient?status=active"],
106
+ output_extension="ndjson",
107
+ timeout=1800,
108
+ max_concurrent_downloads=8
109
+ )
110
+ print("Group export completed successfully")
111
+
112
+ # Test 4: Patient level export with minimal parameters.
113
+ print("\n=== Testing patient level export with minimal parameters ===")
114
+ pc.read.bulk(
115
+ fhir_endpoint_url=fhir_server,
116
+ output_dir=f"{output_base}/patient_basic",
117
+ patients=[
118
+ "Patient/58c297c4-d684-4677-8024-01131d93835e",
119
+ "Patient/118616a4-f0b2-411f-8050-39d5d27c738c"
120
+ ]
121
+ )
122
+ print("Patient export completed successfully")
123
+
124
+ # Test 5: Patient level export with all parameters.
125
+ print("\n=== Testing patient level export with all parameters ===")
126
+ pc.read.bulk(
127
+ fhir_endpoint_url=fhir_server,
128
+ output_dir=f"{output_base}/patient_detailed",
129
+ patients=[
130
+ "Patient/58c297c4-d684-4677-8024-01131d93835e",
131
+ "Patient/118616a4-f0b2-411f-8050-39d5d27c738c",
132
+ "Patient/21fba439-ca79-411f-a081-37a432a78f3a"
133
+ ],
134
+ output_format="application/fhir+ndjson",
135
+ since=datetime(2020, 1, 1, tzinfo=timezone.utc),
136
+ types=["Observation", "MedicationRequest"],
137
+ elements=["id", "status", "code"],
138
+ include_associated_data=["LatestProvenanceResources"],
139
+ type_filters=["Observation?category=vital-signs"],
140
+ output_extension="ndjson",
141
+ timeout=2400,
142
+ max_concurrent_downloads=3
143
+ )
144
+ print("Patient export completed successfully")
145
+
146
+ print("\nAll bulk exports completed successfully!")
147
+ print(f"Output written to: {output_base}")
148
+
149
+
150
+ if __name__ == "__main__":
151
+ test_bulk_exports()
@@ -19,7 +19,6 @@ from .core import Expression, VariableExpression
19
19
  from .datasource import DataSources, DataSource
20
20
  from .fhir import MimeType, Version
21
21
  from .functions import to_coding, to_snomed_coding, to_ecl_value_set
22
- from .query import ExtractQuery, AggregateQuery
23
22
  from .udfs import (
24
23
  member_of,
25
24
  translate,
@@ -52,8 +51,6 @@ __all__ = [
52
51
  "to_ecl_value_set",
53
52
  "Expression",
54
53
  "VariableExpression",
55
- "ExtractQuery",
56
- "AggregateQuery",
57
54
  "DataSources",
58
55
  "DataSource",
59
56
  ]
@@ -2,8 +2,8 @@
2
2
  # Auto generated from POM project version.
3
3
  # Please do not modify.
4
4
  #
5
- __version__="8.0.0.dev0"
5
+ __version__="8.0.0.dev1"
6
6
  __java_version__="8.0.0-SNAPSHOT"
7
7
  __scala_version__="2.12"
8
- __delta_version__="3.2.0"
8
+ __delta_version__="3.3.2"
9
9
  __hadoop_version__="3.3.4"
@@ -0,0 +1,234 @@
1
+ # Copyright 2025 Commonwealth Scientific and Industrial Research
2
+ # Organisation (CSIRO) ABN 41 687 119 230.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from datetime import datetime
17
+ from typing import List, Optional
18
+
19
+ from pathling import PathlingContext
20
+
21
+
22
+ class BulkExportClient:
23
+ """
24
+ A client for exporting data from the FHIR Bulk Data Access API.
25
+ """
26
+ def __init__(self, java_client):
27
+ """
28
+ Create a new BulkExportClient that wraps a Java BulkExportClient.
29
+
30
+ :param java_client: The Java BulkExportClient instance to wrap
31
+ """
32
+ self._java_client = java_client
33
+
34
+ def export(self):
35
+ """
36
+ Export data from the FHIR server.
37
+
38
+ :return: The result of the export operation
39
+ """
40
+ return self._java_client.export()
41
+
42
+ @classmethod
43
+ def _configure_builder(cls, jvm, builder, fhir_endpoint_url: str, output_dir: str,
44
+ output_format: str = "application/fhir+ndjson",
45
+ since: Optional[datetime] = None,
46
+ types: Optional[List[str]] = None,
47
+ elements: Optional[List[str]] = None,
48
+ include_associated_data: Optional[List[str]] = None,
49
+ type_filters: Optional[List[str]] = None,
50
+ output_extension: str = "ndjson",
51
+ timeout: Optional[int] = None,
52
+ max_concurrent_downloads: int = 10,
53
+ auth_config: Optional[dict] = None):
54
+ """
55
+ Configure common builder parameters.
56
+
57
+ :param jvm: The JVM instance
58
+ :param builder: The builder instance to configure
59
+ :param fhir_endpoint_url: The URL of the FHIR server
60
+ :param output_dir: Output directory
61
+ :param output_format: Output format
62
+ :param since: Timestamp filter (must include timezone information)
63
+ :param types: Resource types to include
64
+ :param elements: Elements to include
65
+ :param include_associated_data: Associated data to include
66
+ :param type_filters: Resource filters
67
+ :param output_extension: File extension for output files
68
+ :param timeout: Optional timeout duration in seconds
69
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
70
+ :param auth_config: Optional authentication configuration dictionary with the following possible keys:
71
+ - enabled: Whether authentication is enabled (default: False)
72
+ - client_id: The client ID to use for authentication
73
+ - private_key_jwk: The private key in JWK format
74
+ - client_secret: The client secret to use for authentication
75
+ - token_endpoint: The token endpoint URL
76
+ - use_smart: Whether to use SMART authentication (default: True)
77
+ - use_form_for_basic_auth: Whether to use form-based basic auth (default: False)
78
+ - scope: The scope to request
79
+ - token_expiry_tolerance: The token expiry tolerance in seconds (default: 120)
80
+ """
81
+ builder.withFhirEndpointUrl(fhir_endpoint_url)
82
+ builder.withOutputDir(output_dir)
83
+ builder.withOutputFormat(output_format)
84
+ builder.withOutputExtension(output_extension)
85
+ builder.withMaxConcurrentDownloads(max_concurrent_downloads)
86
+
87
+ if timeout is not None:
88
+ java_duration = jvm.java.time.Duration.ofSeconds(timeout)
89
+ builder.withTimeout(java_duration)
90
+
91
+ if since is not None:
92
+ if since.tzinfo is None:
93
+ raise ValueError("datetime must include timezone information")
94
+ # Format with microsecond precision and timezone offset
95
+ instant_str = since.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] # Truncate to milliseconds
96
+ if since.utcoffset() is None:
97
+ instant_str += 'Z'
98
+ else:
99
+ offset = since.strftime('%z')
100
+ # Insert colon in timezone offset
101
+ instant_str += f"{offset[:3]}:{offset[3:]}"
102
+ java_instant = jvm.java.time.Instant.parse(instant_str)
103
+ builder.withSince(java_instant)
104
+ if types is not None:
105
+ for type_ in types:
106
+ builder.withType(type_)
107
+ if elements is not None:
108
+ for element in elements:
109
+ builder.withElement(element)
110
+ if include_associated_data is not None:
111
+ # Convert Python list to Java List<String>
112
+ java_list = jvm.java.util.ArrayList()
113
+ for data in include_associated_data:
114
+ java_list.add(data)
115
+ builder.withIncludeAssociatedData(java_list)
116
+ if type_filters is not None:
117
+ for filter_ in type_filters:
118
+ builder.withTypeFilter(filter_)
119
+
120
+ if auth_config is not None:
121
+ auth_builder = jvm.au.csiro.fhir.auth.AuthConfig.builder()
122
+
123
+ # Set defaults to match Java class
124
+ auth_builder.enabled(False)
125
+ auth_builder.useSMART(True)
126
+ auth_builder.useFormForBasicAuth(False)
127
+ auth_builder.tokenExpiryTolerance(120)
128
+
129
+ # Map Python config to Java builder methods
130
+ if 'enabled' in auth_config:
131
+ auth_builder.enabled(auth_config['enabled'])
132
+ if 'use_smart' in auth_config:
133
+ auth_builder.useSMART(auth_config['use_smart'])
134
+ if 'token_endpoint' in auth_config:
135
+ auth_builder.tokenEndpoint(auth_config['token_endpoint'])
136
+ if 'client_id' in auth_config:
137
+ auth_builder.clientId(auth_config['client_id'])
138
+ if 'client_secret' in auth_config:
139
+ auth_builder.clientSecret(auth_config['client_secret'])
140
+ if 'private_key_jwk' in auth_config:
141
+ auth_builder.privateKeyJWK(auth_config['private_key_jwk'])
142
+ if 'use_form_for_basic_auth' in auth_config:
143
+ auth_builder.useFormForBasicAuth(auth_config['use_form_for_basic_auth'])
144
+ if 'scope' in auth_config:
145
+ auth_builder.scope(auth_config['scope'])
146
+ if 'token_expiry_tolerance' in auth_config:
147
+ auth_builder.tokenExpiryTolerance(auth_config['token_expiry_tolerance'])
148
+
149
+ auth_config_obj = auth_builder.build()
150
+ builder.withAuthConfig(auth_config_obj)
151
+
152
+ @classmethod
153
+ def for_system(cls, jvm, *args, **kwargs) -> 'BulkExportClient':
154
+ """
155
+ Create a builder for a system-level export.
156
+
157
+ :param jvm: The JVM instance
158
+ :param fhir_endpoint_url: The URL of the FHIR server to export from
159
+ :param output_dir: The directory to write the output files to
160
+ :param output_format: The format of the output data
161
+ :param since: Only include resources modified after this timestamp
162
+ :param types: List of FHIR resource types to include
163
+ :param elements: List of FHIR elements to include
164
+ :param include_associated_data: Pre-defined set of FHIR resources to include
165
+ :param type_filters: FHIR search queries to filter resources
166
+ :param output_extension: File extension for output files
167
+ :param timeout: Optional timeout duration in seconds
168
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
169
+ :param auth_config: Optional authentication configuration dictionary
170
+ :return: A BulkExportClient configured for system-level export
171
+ """
172
+ client_class = jvm.au.csiro.fhir.export.BulkExportClient
173
+ builder = client_class.systemBuilder() # Returns a builder directly
174
+ cls._configure_builder(jvm, builder, *args, **kwargs)
175
+ return cls(builder.build())
176
+
177
+ @classmethod
178
+ def for_group(cls, jvm, fhir_endpoint_url: str, output_dir: str,
179
+ group_id: str, *args, **kwargs) -> 'BulkExportClient':
180
+ """
181
+ Create a builder for a group-level export.
182
+
183
+ :param jvm: The JVM instance
184
+ :param fhir_endpoint_url: The URL of the FHIR server to export from
185
+ :param output_dir: The directory to write the output files to
186
+ :param group_id: The ID of the group to export
187
+ :param output_format: The format of the output data
188
+ :param since: Only include resources modified after this timestamp
189
+ :param types: List of FHIR resource types to include
190
+ :param elements: List of FHIR elements to include
191
+ :param include_associated_data: Pre-defined set of FHIR resources to include
192
+ :param type_filters: FHIR search queries to filter resources
193
+ :param output_extension: File extension for output files
194
+ :param timeout: Optional timeout duration in seconds
195
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
196
+ :param auth_config: Optional authentication configuration dictionary
197
+ :return: A BulkExportClient configured for group-level export
198
+ """
199
+ client_class = jvm.au.csiro.fhir.export.BulkExportClient
200
+ # Pass group_id directly to groupBuilder
201
+ builder = client_class.groupBuilder(group_id)
202
+ cls._configure_builder(jvm, builder, fhir_endpoint_url, output_dir, *args, **kwargs)
203
+ return cls(builder.build())
204
+
205
+ @classmethod
206
+ def for_patient(cls, jvm, fhir_endpoint_url: str, output_dir: str,
207
+ patients: Optional[List[str]] = None, *args, **kwargs) -> 'BulkExportClient':
208
+ """
209
+ Create a builder for a patient-level export.
210
+
211
+ :param jvm: The JVM instance
212
+ :param fhir_endpoint_url: The URL of the FHIR server to export from
213
+ :param output_dir: The directory to write the output files to
214
+ :param patients: List of patient references to include
215
+ :param output_format: The format of the output data
216
+ :param since: Only include resources modified after this timestamp
217
+ :param types: List of FHIR resource types to include
218
+ :param elements: List of FHIR elements to include
219
+ :param include_associated_data: Pre-defined set of FHIR resources to include
220
+ :param type_filters: FHIR search queries to filter resources
221
+ :param output_extension: File extension for output files
222
+ :param timeout: Optional timeout duration in seconds
223
+ :param max_concurrent_downloads: Maximum number of concurrent downloads
224
+ :param auth_config: Optional authentication configuration dictionary
225
+ :return: A BulkExportClient configured for patient-level export
226
+ """
227
+ client_class = jvm.au.csiro.fhir.export.BulkExportClient
228
+ builder = client_class.patientBuilder() # Returns a builder directly
229
+ if patients is not None:
230
+ for patient in patients:
231
+ ref = jvm.au.csiro.fhir.model.Reference.of(patient)
232
+ builder.withPatient(ref)
233
+ cls._configure_builder(jvm, builder, fhir_endpoint_url, output_dir, *args, **kwargs)
234
+ return cls(builder.build())
@@ -104,6 +104,9 @@ class PathlingContext:
104
104
  token_expiry_tolerance: Optional[int] = 120,
105
105
  accept_language: Optional[str] = None,
106
106
  enable_delta=False,
107
+ enable_remote_debugging: Optional[bool] = False,
108
+ debug_port: Optional[int] = 5005,
109
+ debug_suspend: Optional[bool] = True,
107
110
  ) -> "PathlingContext":
108
111
  """
109
112
  Creates a :class:`PathlingContext` with the given configuration options. This should only
@@ -176,6 +179,9 @@ class PathlingContext:
176
179
  implementation and the code systems used.
177
180
  :param enable_delta: enables the use of Delta for storage of FHIR data.
178
181
  Only supported when no SparkSession is provided.
182
+ :param enable_remote_debugging: enables remote debugging for the JVM process.
183
+ :param debug_port: the port for the debugger to listen on (default: 5005)
184
+ :param debug_suspend: if true, the JVM will suspend until a debugger is attached
179
185
  :return: a :class:`PathlingContext` instance initialized with the specified configuration
180
186
  """
181
187
 
@@ -195,6 +201,13 @@ class PathlingContext:
195
201
  "org.apache.spark.sql.delta.catalog.DeltaCatalog",
196
202
  )
197
203
  )
204
+
205
+ # Add remote debugging configuration if enabled
206
+ if enable_remote_debugging:
207
+ suspend_option = "y" if debug_suspend else "n"
208
+ debug_options = f"-agentlib:jdwp=transport=dt_socket,server=y,suspend={suspend_option},address={debug_port}"
209
+ spark_builder = spark_builder.config("spark.driver.extraJavaOptions", debug_options)
210
+
198
211
  return spark_builder.getOrCreate()
199
212
 
200
213
  spark = spark or SparkSession.getActiveSession() or _new_spark_session()
@@ -16,7 +16,9 @@
16
16
  from typing import Callable, Optional
17
17
 
18
18
  from pathling.core import SparkConversionsMixin, StringMapper
19
- from pathling.datasource import DataSource
19
+ from typing import TYPE_CHECKING
20
+ if TYPE_CHECKING:
21
+ from pathling.datasource import DataSource
20
22
 
21
23
 
22
24
  class ImportMode:
@@ -49,7 +51,7 @@ class DataSinks(SparkConversionsMixin):
49
51
  A class for writing FHIR data to a variety of different targets.
50
52
  """
51
53
 
52
- def __init__(self, datasource: DataSource):
54
+ def __init__(self, datasource: "DataSource"):
53
55
  SparkConversionsMixin.__init__(self, datasource.spark)
54
56
  self._datasinks = (
55
57
  self.spark._jvm.au.csiro.pathling.library.io.sink.DataSinkBuilder(