fhir-pyrate 0.2.0b9__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fhir_pyrate/__init__.py +1 -1
- fhir_pyrate/ahoy.py +14 -7
- fhir_pyrate/dicom_downloader.py +140 -68
- fhir_pyrate/miner.py +17 -18
- fhir_pyrate/pirate.py +106 -80
- fhir_pyrate/util/__init__.py +2 -6
- fhir_pyrate/util/bundle_processing_templates.py +7 -4
- fhir_pyrate/util/fhirobj.py +2 -2
- fhir_pyrate/util/imports.py +3 -3
- fhir_pyrate/util/token_auth.py +27 -23
- fhir_pyrate/util/util.py +9 -5
- {fhir_pyrate-0.2.0b9.dist-info → fhir_pyrate-0.2.2.dist-info}/METADATA +79 -24
- fhir_pyrate-0.2.2.dist-info/RECORD +15 -0
- {fhir_pyrate-0.2.0b9.dist-info → fhir_pyrate-0.2.2.dist-info}/WHEEL +1 -1
- fhir_pyrate-0.2.0b9.dist-info/RECORD +0 -15
- {fhir_pyrate-0.2.0b9.dist-info → fhir_pyrate-0.2.2.dist-info}/LICENSE +0 -0
fhir_pyrate/pirate.py
CHANGED
|
@@ -9,7 +9,21 @@ import warnings
|
|
|
9
9
|
from functools import partial
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from types import TracebackType
|
|
12
|
-
from typing import
|
|
12
|
+
from typing import (
|
|
13
|
+
Any,
|
|
14
|
+
Callable,
|
|
15
|
+
ClassVar,
|
|
16
|
+
Dict,
|
|
17
|
+
Generator,
|
|
18
|
+
Iterable,
|
|
19
|
+
List,
|
|
20
|
+
Optional,
|
|
21
|
+
Tuple,
|
|
22
|
+
Type,
|
|
23
|
+
TypeAlias,
|
|
24
|
+
TypeVar,
|
|
25
|
+
Union,
|
|
26
|
+
)
|
|
13
27
|
|
|
14
28
|
import pandas as pd
|
|
15
29
|
import requests
|
|
@@ -24,6 +38,11 @@ from fhir_pyrate.util import FHIRObj, string_from_column
|
|
|
24
38
|
from fhir_pyrate.util.bundle_processing_templates import flatten_data, parse_fhir_path
|
|
25
39
|
from fhir_pyrate.util.imports import optional_import
|
|
26
40
|
|
|
41
|
+
TParam = TypeVar("TParam")
|
|
42
|
+
TFHIRObj = TypeVar("TFHIRObj", bound=FHIRObj)
|
|
43
|
+
ProcessFunc: TypeAlias = Callable[[FHIRObj], Any]
|
|
44
|
+
|
|
45
|
+
# Note to people from the future. This actually should be an optional import to avoid that, if people want to use another version of antlr, this creates crazy errors that the version does not match. In such cases, it is not possible to use fhirpathpy, but the processing functions can still be used.
|
|
27
46
|
fhirpathpy, _ = optional_import(module="fhirpathpy")
|
|
28
47
|
|
|
29
48
|
logger = logging.getLogger(__name__)
|
|
@@ -31,7 +50,7 @@ logger = logging.getLogger(__name__)
|
|
|
31
50
|
|
|
32
51
|
def create_key(request: requests.PreparedRequest, **kwargs: Any) -> str:
|
|
33
52
|
"""
|
|
34
|
-
|
|
53
|
+
Create a unique key for each request URL.
|
|
35
54
|
|
|
36
55
|
:param request: The request to create a key for
|
|
37
56
|
:param kwargs: Unused, needed for compatibility with the library
|
|
@@ -75,7 +94,7 @@ class Pirate:
|
|
|
75
94
|
:param optional_get_params: Optional parameters that will be passed to the session's get calls
|
|
76
95
|
"""
|
|
77
96
|
|
|
78
|
-
FHIRPATH_INVALID_TOKENS =
|
|
97
|
+
FHIRPATH_INVALID_TOKENS: ClassVar[Tuple[str, ...]] = (
|
|
79
98
|
"div",
|
|
80
99
|
"mod",
|
|
81
100
|
"in",
|
|
@@ -83,7 +102,7 @@ class Pirate:
|
|
|
83
102
|
"or",
|
|
84
103
|
"xor",
|
|
85
104
|
"implies",
|
|
86
|
-
|
|
105
|
+
)
|
|
87
106
|
|
|
88
107
|
def __init__(
|
|
89
108
|
self,
|
|
@@ -92,14 +111,14 @@ class Pirate:
|
|
|
92
111
|
num_processes: int = 1,
|
|
93
112
|
print_request_url: bool = False,
|
|
94
113
|
time_format: str = "%Y-%m-%dT%H:%M",
|
|
95
|
-
default_count: int = None,
|
|
96
|
-
cache_folder: Union[str, Path] = None,
|
|
114
|
+
default_count: Optional[int] = None,
|
|
115
|
+
cache_folder: Optional[Union[str, Path]] = None,
|
|
97
116
|
cache_expiry_time: Union[datetime.datetime, int] = -1, # -1 = does not expire
|
|
98
|
-
retry_requests: Retry = None,
|
|
117
|
+
retry_requests: Optional[Retry] = None,
|
|
99
118
|
disable_multiprocessing_requests: bool = False,
|
|
100
119
|
disable_multiprocessing_build: bool = False,
|
|
101
120
|
silence_fhirpath_warning: bool = False,
|
|
102
|
-
optional_get_params: Dict[Any, Any] = None,
|
|
121
|
+
optional_get_params: Optional[Dict[Any, Any]] = None,
|
|
103
122
|
):
|
|
104
123
|
# Remove the last character if they added it
|
|
105
124
|
url_search = re.search(
|
|
@@ -127,6 +146,9 @@ class Pirate:
|
|
|
127
146
|
self.session = requests.session()
|
|
128
147
|
self.disable_multiprocessing_requests = disable_multiprocessing_requests
|
|
129
148
|
self.disable_multiprocessing_build = disable_multiprocessing_build
|
|
149
|
+
if num_processes == 1:
|
|
150
|
+
self.disable_multiprocessing_requests = True
|
|
151
|
+
self.disable_multiprocessing_build = True
|
|
130
152
|
self.caching = False
|
|
131
153
|
if cache_folder is not None:
|
|
132
154
|
# TODO: Change this to work with context managers
|
|
@@ -178,7 +200,7 @@ class Pirate:
|
|
|
178
200
|
def get_bundle_total(
|
|
179
201
|
self,
|
|
180
202
|
resource_type: str,
|
|
181
|
-
request_params: Dict[str, Any] = None,
|
|
203
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
182
204
|
count_entries: bool = False,
|
|
183
205
|
) -> Optional[int]:
|
|
184
206
|
"""
|
|
@@ -202,11 +224,11 @@ class Pirate:
|
|
|
202
224
|
def steal_bundles(
|
|
203
225
|
self,
|
|
204
226
|
resource_type: str,
|
|
205
|
-
request_params: Dict[str, Any] = None,
|
|
227
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
206
228
|
num_pages: int = -1,
|
|
207
229
|
) -> Generator[FHIRObj, None, int]:
|
|
208
230
|
"""
|
|
209
|
-
|
|
231
|
+
Execute a request, iterates through the result pages and returns all the bundles as a
|
|
210
232
|
generator.
|
|
211
233
|
|
|
212
234
|
:param resource_type: The resource to be queried, e.g. DiagnosticReport
|
|
@@ -229,14 +251,14 @@ class Pirate:
|
|
|
229
251
|
def steal_bundles_to_dataframe(
|
|
230
252
|
self,
|
|
231
253
|
resource_type: str,
|
|
232
|
-
request_params: Dict[str, Any] = None,
|
|
254
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
233
255
|
num_pages: int = -1,
|
|
234
|
-
process_function:
|
|
235
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
256
|
+
process_function: ProcessFunc = flatten_data,
|
|
257
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
236
258
|
build_df_after_query: bool = False,
|
|
237
259
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
238
260
|
"""
|
|
239
|
-
|
|
261
|
+
Execute a request, iterates through the result pages, and builds a DataFrame with their
|
|
240
262
|
information. The DataFrames are either built after each
|
|
241
263
|
bundle is retrieved, or after we collected all bundles.
|
|
242
264
|
|
|
@@ -275,10 +297,10 @@ class Pirate:
|
|
|
275
297
|
time_attribute_name: str,
|
|
276
298
|
date_init: Union[str, datetime.date],
|
|
277
299
|
date_end: Union[str, datetime.date],
|
|
278
|
-
request_params: Dict[str, Any] = None,
|
|
300
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
279
301
|
) -> Generator[FHIRObj, None, int]:
|
|
280
302
|
"""
|
|
281
|
-
|
|
303
|
+
Use the multiprocessing module to speed up some queries. The time frame is
|
|
282
304
|
divided into multiple time spans (as many as there are processes) and each smaller
|
|
283
305
|
time frame is investigated simultaneously.
|
|
284
306
|
|
|
@@ -306,13 +328,13 @@ class Pirate:
|
|
|
306
328
|
time_attribute_name: str,
|
|
307
329
|
date_init: Union[str, datetime.date],
|
|
308
330
|
date_end: Union[str, datetime.date],
|
|
309
|
-
request_params: Dict[str, Any] = None,
|
|
310
|
-
process_function:
|
|
311
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
331
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
332
|
+
process_function: ProcessFunc = flatten_data,
|
|
333
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
312
334
|
build_df_after_query: bool = False,
|
|
313
335
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
314
336
|
"""
|
|
315
|
-
|
|
337
|
+
Use the multiprocessing module to speed up some queries. The time frame is
|
|
316
338
|
divided into multiple time spans (as many as there are processes) and each smaller
|
|
317
339
|
time frame is investigated simultaneously. Finally, it builds a DataFrame with the
|
|
318
340
|
information from all timespans. The DataFrames are either built after each
|
|
@@ -356,7 +378,7 @@ class Pirate:
|
|
|
356
378
|
df_constraints: Dict[
|
|
357
379
|
str, Union[Union[str, Tuple[str, str]], List[Union[str, Tuple[str, str]]]]
|
|
358
380
|
],
|
|
359
|
-
request_params: Dict[str, Any] = None,
|
|
381
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
360
382
|
num_pages: int = -1,
|
|
361
383
|
) -> Generator[FHIRObj, None, int]:
|
|
362
384
|
"""
|
|
@@ -397,12 +419,12 @@ class Pirate:
|
|
|
397
419
|
df_constraints: Dict[
|
|
398
420
|
str, Union[Union[str, Tuple[str, str]], List[Union[str, Tuple[str, str]]]]
|
|
399
421
|
],
|
|
400
|
-
process_function:
|
|
401
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
402
|
-
request_params: Dict[str, Any] = None,
|
|
422
|
+
process_function: ProcessFunc = flatten_data,
|
|
423
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
424
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
403
425
|
num_pages: int = -1,
|
|
404
426
|
with_ref: bool = True,
|
|
405
|
-
with_columns: List[Union[str, Tuple[str, str]]] = None,
|
|
427
|
+
with_columns: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
406
428
|
build_df_after_query: bool = False,
|
|
407
429
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
408
430
|
"""
|
|
@@ -548,7 +570,7 @@ class Pirate:
|
|
|
548
570
|
):
|
|
549
571
|
# If we don't want multiprocessing
|
|
550
572
|
for param, input_param in tqdm(
|
|
551
|
-
zip(params_per_sample, input_params_per_sample),
|
|
573
|
+
zip(params_per_sample, input_params_per_sample, strict=False),
|
|
552
574
|
total=len(params_per_sample),
|
|
553
575
|
desc=tqdm_text,
|
|
554
576
|
):
|
|
@@ -572,7 +594,7 @@ class Pirate:
|
|
|
572
594
|
pool = multiprocessing.Pool(self.num_processes)
|
|
573
595
|
results = []
|
|
574
596
|
for param, input_param in tqdm(
|
|
575
|
-
zip(params_per_sample, input_params_per_sample),
|
|
597
|
+
zip(params_per_sample, input_params_per_sample, strict=False),
|
|
576
598
|
total=len(params_per_sample),
|
|
577
599
|
desc=tqdm_text,
|
|
578
600
|
):
|
|
@@ -581,13 +603,13 @@ class Pirate:
|
|
|
581
603
|
(
|
|
582
604
|
pool.apply_async(
|
|
583
605
|
self._bundles_to_dataframe,
|
|
584
|
-
kwds=
|
|
585
|
-
bundles
|
|
586
|
-
process_function
|
|
587
|
-
build_df_after_query
|
|
588
|
-
disable_multiprocessing
|
|
589
|
-
always_return_dict
|
|
590
|
-
|
|
606
|
+
kwds={
|
|
607
|
+
"bundles": list(self._get_bundles(**param)),
|
|
608
|
+
"process_function": process_function,
|
|
609
|
+
"build_df_after_query": False,
|
|
610
|
+
"disable_multiprocessing": True,
|
|
611
|
+
"always_return_dict": True,
|
|
612
|
+
},
|
|
591
613
|
),
|
|
592
614
|
input_param,
|
|
593
615
|
)
|
|
@@ -609,13 +631,13 @@ class Pirate:
|
|
|
609
631
|
resource_type: pd.concat(final_dfs[resource_type], ignore_index=True)
|
|
610
632
|
for resource_type in final_dfs
|
|
611
633
|
}
|
|
612
|
-
return
|
|
634
|
+
return next(iter(dfs.values())) if len(dfs) == 1 else dfs
|
|
613
635
|
|
|
614
636
|
def bundles_to_dataframe(
|
|
615
637
|
self,
|
|
616
638
|
bundles: Union[List[FHIRObj], Generator[FHIRObj, None, int]],
|
|
617
|
-
process_function:
|
|
618
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
639
|
+
process_function: ProcessFunc = flatten_data,
|
|
640
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
619
641
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
620
642
|
"""
|
|
621
643
|
Convert a bundle into a DataFrame using either the `flatten_data` function (default),
|
|
@@ -782,7 +804,7 @@ class Pirate:
|
|
|
782
804
|
def _adjust_df_constraints(
|
|
783
805
|
df_constraints: Dict[
|
|
784
806
|
str, Union[Union[str, Tuple[str, str]], List[Union[str, Tuple[str, str]]]]
|
|
785
|
-
]
|
|
807
|
+
],
|
|
786
808
|
) -> Dict[str, List[Tuple[str, str]]]:
|
|
787
809
|
"""
|
|
788
810
|
Adjust the constraint dictionary to always have the same structure, which makes it easier
|
|
@@ -834,7 +856,7 @@ class Pirate:
|
|
|
834
856
|
df_constraints: Dict[str, List[Tuple[str, str]]],
|
|
835
857
|
) -> List[Dict[str, List[str]]]:
|
|
836
858
|
"""
|
|
837
|
-
|
|
859
|
+
Build the request parameters for each sample by checking the constraint set on each row.
|
|
838
860
|
The resulting request parameters are given by the general `request_params` and by the
|
|
839
861
|
constraint given by each row. E.g. if df_constraints = {"subject": "patient_id"}, then
|
|
840
862
|
the resulting list will contain {"subject": row.patient_id} for each row of the DataFrame.
|
|
@@ -910,7 +932,7 @@ class Pirate:
|
|
|
910
932
|
|
|
911
933
|
def _get_response(self, request_url: str) -> Optional[FHIRObj]:
|
|
912
934
|
"""
|
|
913
|
-
|
|
935
|
+
Perform the API request and returns the response as a dictionary.
|
|
914
936
|
|
|
915
937
|
:param request_url: The request string
|
|
916
938
|
:return: A FHIR bundle
|
|
@@ -984,13 +1006,13 @@ class Pirate:
|
|
|
984
1006
|
def _get_bundles(
|
|
985
1007
|
self,
|
|
986
1008
|
resource_type: str,
|
|
987
|
-
request_params: Dict[str, Any] = None,
|
|
1009
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
988
1010
|
num_pages: int = -1,
|
|
989
1011
|
silence_tqdm: bool = False,
|
|
990
1012
|
tqdm_df_build: bool = False,
|
|
991
1013
|
) -> Generator[FHIRObj, None, int]:
|
|
992
1014
|
"""
|
|
993
|
-
|
|
1015
|
+
Execute a request, iterates through the result pages and returns all the bundles as a
|
|
994
1016
|
generator.
|
|
995
1017
|
Additionally, some checks are performed, and the corresponding warnings are returned:
|
|
996
1018
|
- Whether a sorting has been defined
|
|
@@ -1030,7 +1052,8 @@ class Pirate:
|
|
|
1030
1052
|
warnings.warn(
|
|
1031
1053
|
f"We detected multiple pages (approximately {bundle_total} pages) but "
|
|
1032
1054
|
f"no sorting method has been defined, which may yield incorrect results. "
|
|
1033
|
-
f"We will set the sorting parameter to _id."
|
|
1055
|
+
f"We will set the sorting parameter to _id.",
|
|
1056
|
+
stacklevel=2,
|
|
1034
1057
|
)
|
|
1035
1058
|
current_params["_sort"] = "_id"
|
|
1036
1059
|
request_params_string = self._concat_request_params(current_params)
|
|
@@ -1079,7 +1102,7 @@ class Pirate:
|
|
|
1079
1102
|
tqdm_df_build: bool,
|
|
1080
1103
|
) -> Generator[FHIRObj, None, int]:
|
|
1081
1104
|
"""
|
|
1082
|
-
|
|
1105
|
+
Set the `time_attribute_name` date parameters for the
|
|
1083
1106
|
`sail_through_search_space function`.
|
|
1084
1107
|
"""
|
|
1085
1108
|
request_params[time_attribute_name] = (
|
|
@@ -1095,9 +1118,11 @@ class Pirate:
|
|
|
1095
1118
|
)
|
|
1096
1119
|
|
|
1097
1120
|
@staticmethod
|
|
1098
|
-
def _generator_to_list(
|
|
1121
|
+
def _generator_to_list(
|
|
1122
|
+
f: Callable[..., Iterable[TFHIRObj]], *args: Any, **kwargs: Any
|
|
1123
|
+
) -> List[FHIRObj]:
|
|
1099
1124
|
"""
|
|
1100
|
-
|
|
1125
|
+
Convert the result of a function returning a generator to a list.
|
|
1101
1126
|
"""
|
|
1102
1127
|
return list(f(*args, **kwargs))
|
|
1103
1128
|
|
|
@@ -1107,7 +1132,7 @@ class Pirate:
|
|
|
1107
1132
|
|
|
1108
1133
|
def _run_multiquery(
|
|
1109
1134
|
self,
|
|
1110
|
-
func: Callable,
|
|
1135
|
+
func: Callable[[TParam], Iterable[TFHIRObj]],
|
|
1111
1136
|
query_params: List[Any],
|
|
1112
1137
|
tqdm_text: str,
|
|
1113
1138
|
) -> Generator[FHIRObj, None, int]:
|
|
@@ -1138,11 +1163,11 @@ class Pirate:
|
|
|
1138
1163
|
time_attribute_name: str,
|
|
1139
1164
|
date_init: Union[str, datetime.date],
|
|
1140
1165
|
date_end: Union[str, datetime.date],
|
|
1141
|
-
request_params: Dict[str, Any] = None,
|
|
1166
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
1142
1167
|
tqdm_df_build: bool = False,
|
|
1143
1168
|
) -> Generator[FHIRObj, None, int]:
|
|
1144
1169
|
"""
|
|
1145
|
-
|
|
1170
|
+
Use the multiprocessing module to speed up some queries. The time frame is
|
|
1146
1171
|
divided into multiple time spans (as many as there are processes) and each smaller
|
|
1147
1172
|
time frame is investigated simultaneously.
|
|
1148
1173
|
|
|
@@ -1168,7 +1193,8 @@ class Pirate:
|
|
|
1168
1193
|
warnings.warn(
|
|
1169
1194
|
f"Detected use of parameter {time_attribute_name} "
|
|
1170
1195
|
f"in the request parameters. Please use the date_init (inclusive) and "
|
|
1171
|
-
f"date_end (exclusive) parameters instead."
|
|
1196
|
+
f"date_end (exclusive) parameters instead.",
|
|
1197
|
+
stacklevel=2,
|
|
1172
1198
|
)
|
|
1173
1199
|
# Remove all elements that refer to a date
|
|
1174
1200
|
request_params = {
|
|
@@ -1188,7 +1214,7 @@ class Pirate:
|
|
|
1188
1214
|
)
|
|
1189
1215
|
# Divide the current time period into smaller spans
|
|
1190
1216
|
timespans = self._get_timespan_list(date_init, date_end)
|
|
1191
|
-
func
|
|
1217
|
+
func = partial(
|
|
1192
1218
|
self._get_bundles_for_timespan,
|
|
1193
1219
|
resource_type,
|
|
1194
1220
|
request_params,
|
|
@@ -1212,7 +1238,7 @@ class Pirate:
|
|
|
1212
1238
|
df_constraints: Dict[
|
|
1213
1239
|
str, Union[Union[str, Tuple[str, str]], List[Union[str, Tuple[str, str]]]]
|
|
1214
1240
|
],
|
|
1215
|
-
request_params: Dict[str, Any] = None,
|
|
1241
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
1216
1242
|
num_pages: int = -1,
|
|
1217
1243
|
tqdm_df_build: bool = False,
|
|
1218
1244
|
) -> Generator[FHIRObj, None, int]:
|
|
@@ -1245,7 +1271,7 @@ class Pirate:
|
|
|
1245
1271
|
request_params=request_params,
|
|
1246
1272
|
df_constraints=self._adjust_df_constraints(df_constraints),
|
|
1247
1273
|
)
|
|
1248
|
-
func
|
|
1274
|
+
func = partial(
|
|
1249
1275
|
self._get_bundles,
|
|
1250
1276
|
resource_type,
|
|
1251
1277
|
num_pages=num_pages,
|
|
@@ -1277,16 +1303,17 @@ class Pirate:
|
|
|
1277
1303
|
if key_mapping[key] in df.columns:
|
|
1278
1304
|
warnings.warn(
|
|
1279
1305
|
f"A column with name {key_mapping[key]} already exists in the output"
|
|
1280
|
-
f"DataFrame, and the column {key} will not be copied."
|
|
1306
|
+
f"DataFrame, and the column {key} will not be copied.",
|
|
1307
|
+
stacklevel=2,
|
|
1281
1308
|
)
|
|
1282
1309
|
else:
|
|
1283
1310
|
df[key_mapping[key]] = value
|
|
1284
1311
|
|
|
1285
1312
|
def _set_up_fhirpath_function(
|
|
1286
1313
|
self, fhir_paths: List[Union[str, Tuple[str, str]]]
|
|
1287
|
-
) ->
|
|
1314
|
+
) -> ProcessFunc:
|
|
1288
1315
|
"""
|
|
1289
|
-
|
|
1316
|
+
Prepare and compile the FHIRPath and sets them as the processing function for building
|
|
1290
1317
|
the DataFrames.
|
|
1291
1318
|
:param fhir_paths: A list of FHIR paths (https://hl7.org/fhirpath/) to be used to build the
|
|
1292
1319
|
DataFrame, alternatively, a list of tuples can be used to specify the column name of the
|
|
@@ -1315,17 +1342,18 @@ class Pirate:
|
|
|
1315
1342
|
f"If you really want to do this, please use processing functions "
|
|
1316
1343
|
f"instead. If you are using the FHIRPath expressions correctly as "
|
|
1317
1344
|
f"they are intended, you can silence the warning when "
|
|
1318
|
-
f"initializing the class."
|
|
1345
|
+
f"initializing the class.",
|
|
1346
|
+
stacklevel=2,
|
|
1319
1347
|
)
|
|
1320
|
-
|
|
1348
|
+
compiled_paths = [
|
|
1321
1349
|
(name, fhirpathpy.compile(path=path)) for name, path in fhir_paths_with_name
|
|
1322
1350
|
]
|
|
1323
|
-
return partial(parse_fhir_path, compiled_fhir_paths=
|
|
1351
|
+
return partial(parse_fhir_path, compiled_fhir_paths=compiled_paths)
|
|
1324
1352
|
|
|
1325
1353
|
def _bundles_to_dataframe(
|
|
1326
1354
|
self,
|
|
1327
|
-
bundles:
|
|
1328
|
-
process_function:
|
|
1355
|
+
bundles: Iterable[FHIRObj],
|
|
1356
|
+
process_function: ProcessFunc = flatten_data,
|
|
1329
1357
|
build_df_after_query: bool = False,
|
|
1330
1358
|
disable_multiprocessing: bool = False,
|
|
1331
1359
|
always_return_dict: bool = False,
|
|
@@ -1353,19 +1381,15 @@ class Pirate:
|
|
|
1353
1381
|
pool = multiprocessing.Pool(self.num_processes)
|
|
1354
1382
|
if build_df_after_query or isinstance(bundles, List):
|
|
1355
1383
|
bundles = list(bundles)
|
|
1356
|
-
processed_bundles =
|
|
1357
|
-
|
|
1358
|
-
for bundle_output in tqdm(
|
|
1384
|
+
processed_bundles = list(
|
|
1385
|
+
tqdm(
|
|
1359
1386
|
pool.imap(process_function, bundles),
|
|
1360
1387
|
total=len(bundles),
|
|
1361
1388
|
desc="Build DF",
|
|
1362
1389
|
)
|
|
1363
|
-
|
|
1390
|
+
)
|
|
1364
1391
|
else:
|
|
1365
|
-
processed_bundles =
|
|
1366
|
-
bundle_output
|
|
1367
|
-
for bundle_output in pool.imap(process_function, bundles)
|
|
1368
|
-
]
|
|
1392
|
+
processed_bundles = list(pool.imap(process_function, bundles))
|
|
1369
1393
|
pool.close()
|
|
1370
1394
|
pool.join()
|
|
1371
1395
|
results: Dict[str, List[Dict[str, Any]]] = {}
|
|
@@ -1384,14 +1408,14 @@ class Pirate:
|
|
|
1384
1408
|
if always_return_dict:
|
|
1385
1409
|
return dfs
|
|
1386
1410
|
else:
|
|
1387
|
-
return
|
|
1411
|
+
return next(iter(dfs.values())) if len(dfs) == 1 else dfs
|
|
1388
1412
|
|
|
1389
1413
|
def _query_to_dataframe(
|
|
1390
1414
|
self,
|
|
1391
|
-
bundles_function: Callable,
|
|
1392
|
-
) -> Callable:
|
|
1415
|
+
bundles_function: Callable[..., Iterable[FHIRObj]],
|
|
1416
|
+
) -> Callable[..., Union[pd.DataFrame, Dict[str, pd.DataFrame]]]:
|
|
1393
1417
|
"""
|
|
1394
|
-
|
|
1418
|
+
Transform any function return Lists/Generators of
|
|
1395
1419
|
bundles into DataFrames.
|
|
1396
1420
|
|
|
1397
1421
|
:param bundles_function: The function that returns a Generator/List of bundles and that
|
|
@@ -1400,8 +1424,8 @@ class Pirate:
|
|
|
1400
1424
|
"""
|
|
1401
1425
|
|
|
1402
1426
|
def wrap(
|
|
1403
|
-
process_function:
|
|
1404
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
1427
|
+
process_function: ProcessFunc = flatten_data,
|
|
1428
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
1405
1429
|
build_df_after_query: bool = False,
|
|
1406
1430
|
disable_multiprocessing_build: bool = False,
|
|
1407
1431
|
always_return_dict: bool = False,
|
|
@@ -1429,14 +1453,16 @@ class Pirate:
|
|
|
1429
1453
|
|
|
1430
1454
|
def query_to_dataframe(
|
|
1431
1455
|
self,
|
|
1432
|
-
bundles_function: Callable
|
|
1433
|
-
|
|
1434
|
-
|
|
1456
|
+
bundles_function: Callable[
|
|
1457
|
+
..., Union[List[FHIRObj], Generator[FHIRObj, None, int]]
|
|
1458
|
+
],
|
|
1459
|
+
process_function: ProcessFunc = flatten_data,
|
|
1460
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
1435
1461
|
build_df_after_query: bool = False,
|
|
1436
1462
|
**kwargs: Any,
|
|
1437
1463
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
1438
1464
|
"""
|
|
1439
|
-
|
|
1465
|
+
Given any of the functions that return bundles, builds the
|
|
1440
1466
|
DataFrame straight away.
|
|
1441
1467
|
:param bundles_function: The function that should be used to get the bundles,
|
|
1442
1468
|
e.g. self.sail_through_search_space, trade_rows_for_bundles
|
fhir_pyrate/util/__init__.py
CHANGED
|
@@ -1,8 +1,4 @@
|
|
|
1
1
|
from fhir_pyrate.util.fhirobj import FHIRObj
|
|
2
|
-
from fhir_pyrate.util.util import get_datetime, string_from_column
|
|
2
|
+
from fhir_pyrate.util.util import get_datetime, now_utc, string_from_column
|
|
3
3
|
|
|
4
|
-
__all__ = [
|
|
5
|
-
"string_from_column",
|
|
6
|
-
"get_datetime",
|
|
7
|
-
"FHIRObj",
|
|
8
|
-
]
|
|
4
|
+
__all__ = ["FHIRObj", "get_datetime", "now_utc", "string_from_column"]
|
|
@@ -7,7 +7,9 @@ from fhir_pyrate.util import FHIRObj
|
|
|
7
7
|
logger = logging.getLogger(__name__)
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def flatten_data(
|
|
10
|
+
def flatten_data(
|
|
11
|
+
bundle: FHIRObj, col_sep: str = "_"
|
|
12
|
+
) -> Dict[str, List[Dict[str, Any]]]:
|
|
11
13
|
"""
|
|
12
14
|
Preprocessing function that goes through the JSON bundle and returns lists of dictionaries
|
|
13
15
|
for all possible attributes
|
|
@@ -68,8 +70,8 @@ def recurse_resource(
|
|
|
68
70
|
|
|
69
71
|
|
|
70
72
|
def parse_fhir_path(
|
|
71
|
-
bundle: FHIRObj, compiled_fhir_paths: List[Tuple[str, Callable]]
|
|
72
|
-
) -> Dict[str, List[Dict]]:
|
|
73
|
+
bundle: FHIRObj, compiled_fhir_paths: List[Tuple[str, Callable[..., Any]]]
|
|
74
|
+
) -> Dict[str, List[Dict[str, Any]]]:
|
|
73
75
|
"""
|
|
74
76
|
Preprocessing function that goes through the JSON bundle and returns lists of dictionaries
|
|
75
77
|
for all possible attributes, which have been specified using a list of compiled FHIRPath
|
|
@@ -93,7 +95,8 @@ def parse_fhir_path(
|
|
|
93
95
|
if name in base_dict and base_dict[name] is not None and len(result) > 0:
|
|
94
96
|
warnings.warn(
|
|
95
97
|
f"The field {name} has already been filled with {base_dict[name]}, "
|
|
96
|
-
f"so it will not be overwritten."
|
|
98
|
+
f"so it will not be overwritten.",
|
|
99
|
+
stacklevel=2,
|
|
97
100
|
)
|
|
98
101
|
if name not in base_dict or base_dict[name] is None:
|
|
99
102
|
base_dict[name] = result
|
fhir_pyrate/util/fhirobj.py
CHANGED
|
@@ -17,10 +17,10 @@ class FHIRObj(SimpleNamespace):
|
|
|
17
17
|
def __getattr__(self, item: str) -> None:
|
|
18
18
|
return None
|
|
19
19
|
|
|
20
|
-
def __getstate__(self) -> Dict:
|
|
20
|
+
def __getstate__(self) -> Dict[str, Any]:
|
|
21
21
|
return self.__dict__
|
|
22
22
|
|
|
23
|
-
def __setstate__(self, state: Dict) -> None:
|
|
23
|
+
def __setstate__(self, state: Dict[str, Any]) -> None:
|
|
24
24
|
self.__dict__.update(state)
|
|
25
25
|
|
|
26
26
|
def to_json(self) -> str:
|
fhir_pyrate/util/imports.py
CHANGED
|
@@ -40,7 +40,7 @@ def optional_import(
|
|
|
40
40
|
allow_namespace_pkg: bool = False,
|
|
41
41
|
) -> Tuple[Any, bool]:
|
|
42
42
|
"""
|
|
43
|
-
|
|
43
|
+
Import an optional module specified by `module` string.
|
|
44
44
|
Any importing related exceptions will be stored, and exceptions raise lazily
|
|
45
45
|
when attempting to use the failed-to-import module.
|
|
46
46
|
|
|
@@ -100,14 +100,14 @@ def optional_import(
|
|
|
100
100
|
|
|
101
101
|
def __getattr__(self, name: str) -> str:
|
|
102
102
|
"""
|
|
103
|
-
|
|
103
|
+
Raise:
|
|
104
104
|
OptionalImportError: When you call this method.
|
|
105
105
|
"""
|
|
106
106
|
raise self._exception
|
|
107
107
|
|
|
108
108
|
def __call__(self, *_args: Any, **_kwargs: Any) -> str:
|
|
109
109
|
"""
|
|
110
|
-
|
|
110
|
+
Raise:
|
|
111
111
|
OptionalImportError: When you call this method.
|
|
112
112
|
"""
|
|
113
113
|
raise self._exception
|