fhir-pyrate 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fhir_pyrate/__init__.py +1 -1
- fhir_pyrate/ahoy.py +7 -7
- fhir_pyrate/dicom_downloader.py +53 -37
- fhir_pyrate/miner.py +17 -18
- fhir_pyrate/pirate.py +106 -81
- fhir_pyrate/util/__init__.py +1 -1
- fhir_pyrate/util/bundle_processing_templates.py +7 -4
- fhir_pyrate/util/fhirobj.py +2 -2
- fhir_pyrate/util/imports.py +3 -3
- fhir_pyrate/util/token_auth.py +16 -18
- fhir_pyrate/util/util.py +3 -3
- {fhir_pyrate-0.2.1.dist-info → fhir_pyrate-0.2.2.dist-info}/METADATA +9 -13
- fhir_pyrate-0.2.2.dist-info/RECORD +15 -0
- {fhir_pyrate-0.2.1.dist-info → fhir_pyrate-0.2.2.dist-info}/WHEEL +1 -1
- fhir_pyrate-0.2.1.dist-info/RECORD +0 -15
- {fhir_pyrate-0.2.1.dist-info → fhir_pyrate-0.2.2.dist-info}/LICENSE +0 -0
fhir_pyrate/__init__.py
CHANGED
fhir_pyrate/ahoy.py
CHANGED
|
@@ -40,15 +40,15 @@ class Ahoy:
|
|
|
40
40
|
|
|
41
41
|
def __init__(
|
|
42
42
|
self,
|
|
43
|
-
auth_url: str = None,
|
|
43
|
+
auth_url: Optional[str] = None,
|
|
44
44
|
auth_type: Optional[str] = "token",
|
|
45
|
-
refresh_url: str = None,
|
|
46
|
-
username: str = None,
|
|
45
|
+
refresh_url: Optional[str] = None,
|
|
46
|
+
username: Optional[str] = None,
|
|
47
47
|
auth_method: Optional[str] = "password",
|
|
48
|
-
token: str = None,
|
|
48
|
+
token: Optional[str] = None,
|
|
49
49
|
max_login_attempts: int = 5,
|
|
50
|
-
token_refresh_delta: Union[int, timedelta] = None,
|
|
51
|
-
session: requests.Session = None,
|
|
50
|
+
token_refresh_delta: Optional[Union[int, timedelta]] = None,
|
|
51
|
+
session: Optional[requests.Session] = None,
|
|
52
52
|
) -> None:
|
|
53
53
|
self.auth_type = auth_type
|
|
54
54
|
self.auth_method = auth_method
|
|
@@ -82,7 +82,7 @@ class Ahoy:
|
|
|
82
82
|
self.close()
|
|
83
83
|
|
|
84
84
|
def change_environment_variable_name(
|
|
85
|
-
self, user_env: str = None, pass_env: str = None
|
|
85
|
+
self, user_env: Optional[str] = None, pass_env: Optional[str] = None
|
|
86
86
|
) -> None:
|
|
87
87
|
"""
|
|
88
88
|
Change the name of the variables used to retrieve username and password.
|
fhir_pyrate/dicom_downloader.py
CHANGED
|
@@ -14,7 +14,18 @@ import warnings
|
|
|
14
14
|
from contextlib import contextmanager
|
|
15
15
|
from functools import partial
|
|
16
16
|
from types import TracebackType
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import (
|
|
18
|
+
ClassVar,
|
|
19
|
+
Dict,
|
|
20
|
+
FrozenSet,
|
|
21
|
+
Generator,
|
|
22
|
+
List,
|
|
23
|
+
Optional,
|
|
24
|
+
TextIO,
|
|
25
|
+
Tuple,
|
|
26
|
+
Type,
|
|
27
|
+
Union,
|
|
28
|
+
)
|
|
18
29
|
|
|
19
30
|
import pandas as pd
|
|
20
31
|
import pydicom
|
|
@@ -67,7 +78,7 @@ def fileno(file_or_fd: TextIO) -> Optional[int]:
|
|
|
67
78
|
@contextmanager
|
|
68
79
|
def stdout_redirected(
|
|
69
80
|
to: Union[str, TextIO] = os.devnull, stdout: Optional[TextIO] = None
|
|
70
|
-
) -> Generator:
|
|
81
|
+
) -> Generator[Optional[TextIO], None, None]:
|
|
71
82
|
if platform.system() == "Windows":
|
|
72
83
|
yield None
|
|
73
84
|
return
|
|
@@ -136,23 +147,25 @@ class DicomDownloader:
|
|
|
136
147
|
:param num_processes: The number of processes to run for downloading
|
|
137
148
|
"""
|
|
138
149
|
|
|
139
|
-
ACCEPTED_FORMATS =
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
150
|
+
ACCEPTED_FORMATS: ClassVar[FrozenSet[str]] = frozenset(
|
|
151
|
+
{
|
|
152
|
+
".dcm",
|
|
153
|
+
".nia",
|
|
154
|
+
".nii",
|
|
155
|
+
".nii.gz",
|
|
156
|
+
".hdr",
|
|
157
|
+
".img",
|
|
158
|
+
".img.gz",
|
|
159
|
+
".tif",
|
|
160
|
+
".TIF",
|
|
161
|
+
".tiff",
|
|
162
|
+
".TIFF",
|
|
163
|
+
".mha",
|
|
164
|
+
".mhd",
|
|
165
|
+
".nrrd",
|
|
166
|
+
".nhdr",
|
|
167
|
+
}
|
|
168
|
+
)
|
|
156
169
|
|
|
157
170
|
def __init__(
|
|
158
171
|
self,
|
|
@@ -239,7 +252,7 @@ class DicomDownloader:
|
|
|
239
252
|
@staticmethod
|
|
240
253
|
def get_download_id(
|
|
241
254
|
study_uid: str,
|
|
242
|
-
series_uid: str = None,
|
|
255
|
+
series_uid: Optional[str] = None,
|
|
243
256
|
always_download_in_study_folder: bool = False,
|
|
244
257
|
) -> str:
|
|
245
258
|
"""
|
|
@@ -259,7 +272,7 @@ class DicomDownloader:
|
|
|
259
272
|
|
|
260
273
|
def get_download_path(self, download_id: str) -> pathlib.Path:
|
|
261
274
|
"""
|
|
262
|
-
|
|
275
|
+
Build the folder hierarchy where the data will be stored. The hierarchy depends on the
|
|
263
276
|
`hierarchical_storage` parameter. Given a download ID
|
|
264
277
|
263a1dad02916f5eca3c4eec51dc9d281735b47b8eb8bc2343c56e6ccd and `hierarchical_storage` = 2,
|
|
265
278
|
the data will be stored in 26/3a/1dad02916f5eca3c4eec51dc9d281735b47b8eb8bc2343c56e6ccd.
|
|
@@ -277,13 +290,13 @@ class DicomDownloader:
|
|
|
277
290
|
def download_data(
|
|
278
291
|
self,
|
|
279
292
|
study_uid: str,
|
|
280
|
-
series_uid: str = None,
|
|
293
|
+
series_uid: Optional[str] = None,
|
|
281
294
|
output_dir: Union[str, pathlib.Path] = "out",
|
|
282
295
|
save_metadata: bool = True,
|
|
283
296
|
existing_ids: Optional[List[str]] = None,
|
|
284
297
|
) -> Tuple[List[Dict[str, str]], List[Dict[str, str]]]:
|
|
285
298
|
"""
|
|
286
|
-
|
|
299
|
+
Download the data related to the StudyInstanceUID and SeriesInstanceUID (if given,
|
|
287
300
|
otherwise the entire study will be downloaded).
|
|
288
301
|
|
|
289
302
|
:param study_uid: The StudyInstanceUID
|
|
@@ -333,7 +346,7 @@ class DicomDownloader:
|
|
|
333
346
|
base_dict[self.series_instance_uid_field] = series_uid
|
|
334
347
|
|
|
335
348
|
# Init the readers/writers
|
|
336
|
-
series_reader = sitk.ImageSeriesReader()
|
|
349
|
+
series_reader = sitk.ImageSeriesReader()
|
|
337
350
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
|
338
351
|
# Create the download dir
|
|
339
352
|
current_tmp_dir = pathlib.Path(tmp_dir)
|
|
@@ -361,11 +374,11 @@ class DicomDownloader:
|
|
|
361
374
|
progress_bar.close()
|
|
362
375
|
|
|
363
376
|
# Get Series ID names from folder
|
|
364
|
-
series_uids = sitk.ImageSeriesReader.GetGDCMSeriesIDs(str(current_tmp_dir))
|
|
377
|
+
series_uids = sitk.ImageSeriesReader.GetGDCMSeriesIDs(str(current_tmp_dir))
|
|
365
378
|
logger.info(f"Study ID has {len(series_uids)} series.")
|
|
366
379
|
for series in series_uids:
|
|
367
380
|
# Get the DICOMs corresponding to the series
|
|
368
|
-
files = series_reader.GetGDCMSeriesFileNames(
|
|
381
|
+
files = series_reader.GetGDCMSeriesFileNames(
|
|
369
382
|
str(current_tmp_dir), series
|
|
370
383
|
)
|
|
371
384
|
current_dict = base_dict.copy()
|
|
@@ -374,11 +387,12 @@ class DicomDownloader:
|
|
|
374
387
|
)
|
|
375
388
|
try:
|
|
376
389
|
# Read the series
|
|
377
|
-
with
|
|
378
|
-
f,
|
|
390
|
+
with (
|
|
391
|
+
simpleitk_warning_file.open("w") as f,
|
|
392
|
+
stdout_redirected(f, stdout=sys.stderr),
|
|
379
393
|
):
|
|
380
|
-
series_reader.SetFileNames(files)
|
|
381
|
-
image = series_reader.Execute()
|
|
394
|
+
series_reader.SetFileNames(files)
|
|
395
|
+
image = series_reader.Execute()
|
|
382
396
|
with simpleitk_warning_file.open("r") as f:
|
|
383
397
|
content = f.read()
|
|
384
398
|
if "warning" in content.lower():
|
|
@@ -431,9 +445,9 @@ class DicomDownloader:
|
|
|
431
445
|
series_download_dir / f"{series}_meta.dcm",
|
|
432
446
|
)
|
|
433
447
|
dcm_info = pydicom.dcmread(str(files[0]), stop_before_pixels=True)
|
|
434
|
-
current_dict[
|
|
435
|
-
|
|
436
|
-
|
|
448
|
+
current_dict[self.deid_study_instance_uid_field] = (
|
|
449
|
+
dcm_info.StudyInstanceUID
|
|
450
|
+
)
|
|
437
451
|
current_dict[self.deid_series_instance_uid_field] = series
|
|
438
452
|
downloaded_series_info.append(current_dict)
|
|
439
453
|
|
|
@@ -442,7 +456,7 @@ class DicomDownloader:
|
|
|
442
456
|
def fix_mapping_dataframe(
|
|
443
457
|
self,
|
|
444
458
|
df: pd.DataFrame,
|
|
445
|
-
mapping_df: pd.DataFrame = None,
|
|
459
|
+
mapping_df: Optional[pd.DataFrame] = None,
|
|
446
460
|
output_dir: Union[str, pathlib.Path] = "out",
|
|
447
461
|
study_uid_col: str = "study_instance_uid",
|
|
448
462
|
series_uid_col: str = "series_instance_uid",
|
|
@@ -464,7 +478,8 @@ class DicomDownloader:
|
|
|
464
478
|
output_dir = pathlib.Path(output_dir)
|
|
465
479
|
if not output_dir.exists() or not len(list(output_dir.glob("*"))):
|
|
466
480
|
warnings.warn(
|
|
467
|
-
"Cannot fix the mapping file if the output directory does not exist."
|
|
481
|
+
"Cannot fix the mapping file if the output directory does not exist.",
|
|
482
|
+
stacklevel=2,
|
|
468
483
|
)
|
|
469
484
|
return None
|
|
470
485
|
if mapping_df is None:
|
|
@@ -547,7 +562,7 @@ class DicomDownloader:
|
|
|
547
562
|
output_dir: Union[str, pathlib.Path] = "out",
|
|
548
563
|
study_uid_col: str = "study_instance_uid",
|
|
549
564
|
series_uid_col: Optional[str] = "series_instance_uid",
|
|
550
|
-
mapping_df: pd.DataFrame = None,
|
|
565
|
+
mapping_df: Optional[pd.DataFrame] = None,
|
|
551
566
|
download_full_study: bool = False,
|
|
552
567
|
save_metadata: bool = True,
|
|
553
568
|
) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
@@ -593,7 +608,8 @@ class DicomDownloader:
|
|
|
593
608
|
warnings.warn(
|
|
594
609
|
"download_full_study = False will only download a specified series but "
|
|
595
610
|
"have not provided a valid Series UID column of the DataFrame, "
|
|
596
|
-
"as a result the full study will be downloaded."
|
|
611
|
+
"as a result the full study will be downloaded.",
|
|
612
|
+
stacklevel=2,
|
|
597
613
|
)
|
|
598
614
|
|
|
599
615
|
# Create list of rows
|
fhir_pyrate/miner.py
CHANGED
|
@@ -29,9 +29,9 @@ class Miner:
|
|
|
29
29
|
def __init__(
|
|
30
30
|
self,
|
|
31
31
|
target_regex: str,
|
|
32
|
-
negation_regex: str = None,
|
|
33
|
-
regex_flags: Union[int, re.RegexFlag] = None,
|
|
34
|
-
decode_text: Callable = None,
|
|
32
|
+
negation_regex: Optional[str] = None,
|
|
33
|
+
regex_flags: Optional[Union[int, re.RegexFlag]] = None,
|
|
34
|
+
decode_text: Optional[Callable[[str], str]] = None,
|
|
35
35
|
nlp_lib: str = "de_core_news_sm",
|
|
36
36
|
num_processes: int = 1,
|
|
37
37
|
) -> None:
|
|
@@ -49,6 +49,7 @@ class Miner:
|
|
|
49
49
|
"this will probably not work, because it needs access to your home "
|
|
50
50
|
"directory. Please run python -m spacy download {nlp_lib} in your "
|
|
51
51
|
"docker file.",
|
|
52
|
+
stacklevel=2,
|
|
52
53
|
)
|
|
53
54
|
subprocess.run(
|
|
54
55
|
f"python3 -m spacy download {nlp_lib}".split(" "),
|
|
@@ -66,7 +67,7 @@ class Miner:
|
|
|
66
67
|
@staticmethod
|
|
67
68
|
def _remove_header(sentences: List[Span], main_document_keyword: str) -> List[Span]:
|
|
68
69
|
"""
|
|
69
|
-
|
|
70
|
+
Remove all sentences that come before a sentence that contains the `main_document_keyword`.
|
|
70
71
|
This is useful when a document has a header, and we know what the first viable word of a
|
|
71
72
|
document is, or we know that we are interested in some particular part of the
|
|
72
73
|
document that comes after a certain keyword.
|
|
@@ -86,10 +87,10 @@ class Miner:
|
|
|
86
87
|
def _check_diagnostic_report(
|
|
87
88
|
self,
|
|
88
89
|
report_text: str,
|
|
89
|
-
main_document_keyword: str = "",
|
|
90
|
+
main_document_keyword: Optional[str] = "",
|
|
90
91
|
) -> Optional[List[Span]]:
|
|
91
92
|
"""
|
|
92
|
-
|
|
93
|
+
Check whether a report contains the relevant RegEx and does not contain the negation
|
|
93
94
|
RegEx (if specified).
|
|
94
95
|
|
|
95
96
|
:param report_text: The text to be searched
|
|
@@ -103,7 +104,7 @@ class Miner:
|
|
|
103
104
|
contains_target = re.search(self.target_regex, report_text, self.regex_flags)
|
|
104
105
|
relevant_sentences = []
|
|
105
106
|
if contains_target:
|
|
106
|
-
sentences =
|
|
107
|
+
sentences = list(self.nlp(report_text).sents)
|
|
107
108
|
if main_document_keyword is not None:
|
|
108
109
|
sentences = self._remove_header(sentences, main_document_keyword)
|
|
109
110
|
|
|
@@ -129,10 +130,10 @@ class Miner:
|
|
|
129
130
|
df: pd.DataFrame,
|
|
130
131
|
text_column_name: str,
|
|
131
132
|
new_column_name: str = "text_found",
|
|
132
|
-
main_document_keyword: str = None,
|
|
133
|
+
main_document_keyword: Optional[str] = None,
|
|
133
134
|
) -> pd.DataFrame:
|
|
134
135
|
"""
|
|
135
|
-
|
|
136
|
+
Search the strings contained in `text_column_name` for the selected RegEx, and adds two
|
|
136
137
|
columns to the DataFrame with the output of the NLP search. The negation RegEx can be
|
|
137
138
|
used to exclude sentences. Additionally, it is possible to define a `main_document_keyword`,
|
|
138
139
|
which is a string that can be used to filter out the header of the document.
|
|
@@ -151,31 +152,29 @@ class Miner:
|
|
|
151
152
|
self._check_diagnostic_report,
|
|
152
153
|
main_document_keyword=main_document_keyword,
|
|
153
154
|
)
|
|
154
|
-
texts =
|
|
155
|
+
texts = list(df[text_column_name].values)
|
|
155
156
|
tqdm_text = f"Searching for Sentences with {self.target_regex}"
|
|
156
157
|
if self.negation_regex is not None:
|
|
157
158
|
tqdm_text += f" and without {self.negation_regex}"
|
|
158
159
|
if self.num_processes > 1:
|
|
159
160
|
pool = multiprocessing.Pool(self.num_processes)
|
|
160
|
-
results =
|
|
161
|
-
|
|
162
|
-
for result in tqdm(
|
|
161
|
+
results = list(
|
|
162
|
+
tqdm(
|
|
163
163
|
pool.imap(func, texts),
|
|
164
164
|
total=len(df),
|
|
165
165
|
desc=tqdm_text,
|
|
166
166
|
)
|
|
167
|
-
|
|
167
|
+
)
|
|
168
168
|
pool.close()
|
|
169
169
|
pool.join()
|
|
170
170
|
else:
|
|
171
|
-
results =
|
|
172
|
-
|
|
173
|
-
for result in tqdm(
|
|
171
|
+
results = list(
|
|
172
|
+
tqdm(
|
|
174
173
|
[func(text) for text in texts],
|
|
175
174
|
total=len(df),
|
|
176
175
|
desc=tqdm_text,
|
|
177
176
|
)
|
|
178
|
-
|
|
177
|
+
)
|
|
179
178
|
|
|
180
179
|
df[new_column_name + "_sentences"] = results
|
|
181
180
|
df[new_column_name] = ~df[new_column_name + "_sentences"].isna()
|
fhir_pyrate/pirate.py
CHANGED
|
@@ -9,9 +9,22 @@ import warnings
|
|
|
9
9
|
from functools import partial
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from types import TracebackType
|
|
12
|
-
from typing import
|
|
12
|
+
from typing import (
|
|
13
|
+
Any,
|
|
14
|
+
Callable,
|
|
15
|
+
ClassVar,
|
|
16
|
+
Dict,
|
|
17
|
+
Generator,
|
|
18
|
+
Iterable,
|
|
19
|
+
List,
|
|
20
|
+
Optional,
|
|
21
|
+
Tuple,
|
|
22
|
+
Type,
|
|
23
|
+
TypeAlias,
|
|
24
|
+
TypeVar,
|
|
25
|
+
Union,
|
|
26
|
+
)
|
|
13
27
|
|
|
14
|
-
import fhirpathpy
|
|
15
28
|
import pandas as pd
|
|
16
29
|
import requests
|
|
17
30
|
from dateutil.parser import parse
|
|
@@ -23,13 +36,21 @@ from tqdm.contrib.logging import logging_redirect_tqdm
|
|
|
23
36
|
from fhir_pyrate import Ahoy
|
|
24
37
|
from fhir_pyrate.util import FHIRObj, string_from_column
|
|
25
38
|
from fhir_pyrate.util.bundle_processing_templates import flatten_data, parse_fhir_path
|
|
39
|
+
from fhir_pyrate.util.imports import optional_import
|
|
40
|
+
|
|
41
|
+
TParam = TypeVar("TParam")
|
|
42
|
+
TFHIRObj = TypeVar("TFHIRObj", bound=FHIRObj)
|
|
43
|
+
ProcessFunc: TypeAlias = Callable[[FHIRObj], Any]
|
|
44
|
+
|
|
45
|
+
# Note to people from the future. This actually should be an optional import to avoid that, if people want to use another version of antlr, this creates crazy errors that the version does not match. In such cases, it is not possible to use fhirpathpy, but the processing functions can still be used.
|
|
46
|
+
fhirpathpy, _ = optional_import(module="fhirpathpy")
|
|
26
47
|
|
|
27
48
|
logger = logging.getLogger(__name__)
|
|
28
49
|
|
|
29
50
|
|
|
30
51
|
def create_key(request: requests.PreparedRequest, **kwargs: Any) -> str:
|
|
31
52
|
"""
|
|
32
|
-
|
|
53
|
+
Create a unique key for each request URL.
|
|
33
54
|
|
|
34
55
|
:param request: The request to create a key for
|
|
35
56
|
:param kwargs: Unused, needed for compatibility with the library
|
|
@@ -73,7 +94,7 @@ class Pirate:
|
|
|
73
94
|
:param optional_get_params: Optional parameters that will be passed to the session's get calls
|
|
74
95
|
"""
|
|
75
96
|
|
|
76
|
-
FHIRPATH_INVALID_TOKENS =
|
|
97
|
+
FHIRPATH_INVALID_TOKENS: ClassVar[Tuple[str, ...]] = (
|
|
77
98
|
"div",
|
|
78
99
|
"mod",
|
|
79
100
|
"in",
|
|
@@ -81,7 +102,7 @@ class Pirate:
|
|
|
81
102
|
"or",
|
|
82
103
|
"xor",
|
|
83
104
|
"implies",
|
|
84
|
-
|
|
105
|
+
)
|
|
85
106
|
|
|
86
107
|
def __init__(
|
|
87
108
|
self,
|
|
@@ -90,14 +111,14 @@ class Pirate:
|
|
|
90
111
|
num_processes: int = 1,
|
|
91
112
|
print_request_url: bool = False,
|
|
92
113
|
time_format: str = "%Y-%m-%dT%H:%M",
|
|
93
|
-
default_count: int = None,
|
|
94
|
-
cache_folder: Union[str, Path] = None,
|
|
114
|
+
default_count: Optional[int] = None,
|
|
115
|
+
cache_folder: Optional[Union[str, Path]] = None,
|
|
95
116
|
cache_expiry_time: Union[datetime.datetime, int] = -1, # -1 = does not expire
|
|
96
|
-
retry_requests: Retry = None,
|
|
117
|
+
retry_requests: Optional[Retry] = None,
|
|
97
118
|
disable_multiprocessing_requests: bool = False,
|
|
98
119
|
disable_multiprocessing_build: bool = False,
|
|
99
120
|
silence_fhirpath_warning: bool = False,
|
|
100
|
-
optional_get_params: Dict[Any, Any] = None,
|
|
121
|
+
optional_get_params: Optional[Dict[Any, Any]] = None,
|
|
101
122
|
):
|
|
102
123
|
# Remove the last character if they added it
|
|
103
124
|
url_search = re.search(
|
|
@@ -179,7 +200,7 @@ class Pirate:
|
|
|
179
200
|
def get_bundle_total(
|
|
180
201
|
self,
|
|
181
202
|
resource_type: str,
|
|
182
|
-
request_params: Dict[str, Any] = None,
|
|
203
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
183
204
|
count_entries: bool = False,
|
|
184
205
|
) -> Optional[int]:
|
|
185
206
|
"""
|
|
@@ -203,11 +224,11 @@ class Pirate:
|
|
|
203
224
|
def steal_bundles(
|
|
204
225
|
self,
|
|
205
226
|
resource_type: str,
|
|
206
|
-
request_params: Dict[str, Any] = None,
|
|
227
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
207
228
|
num_pages: int = -1,
|
|
208
229
|
) -> Generator[FHIRObj, None, int]:
|
|
209
230
|
"""
|
|
210
|
-
|
|
231
|
+
Execute a request, iterates through the result pages and returns all the bundles as a
|
|
211
232
|
generator.
|
|
212
233
|
|
|
213
234
|
:param resource_type: The resource to be queried, e.g. DiagnosticReport
|
|
@@ -230,14 +251,14 @@ class Pirate:
|
|
|
230
251
|
def steal_bundles_to_dataframe(
|
|
231
252
|
self,
|
|
232
253
|
resource_type: str,
|
|
233
|
-
request_params: Dict[str, Any] = None,
|
|
254
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
234
255
|
num_pages: int = -1,
|
|
235
|
-
process_function:
|
|
236
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
256
|
+
process_function: ProcessFunc = flatten_data,
|
|
257
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
237
258
|
build_df_after_query: bool = False,
|
|
238
259
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
239
260
|
"""
|
|
240
|
-
|
|
261
|
+
Execute a request, iterates through the result pages, and builds a DataFrame with their
|
|
241
262
|
information. The DataFrames are either built after each
|
|
242
263
|
bundle is retrieved, or after we collected all bundles.
|
|
243
264
|
|
|
@@ -276,10 +297,10 @@ class Pirate:
|
|
|
276
297
|
time_attribute_name: str,
|
|
277
298
|
date_init: Union[str, datetime.date],
|
|
278
299
|
date_end: Union[str, datetime.date],
|
|
279
|
-
request_params: Dict[str, Any] = None,
|
|
300
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
280
301
|
) -> Generator[FHIRObj, None, int]:
|
|
281
302
|
"""
|
|
282
|
-
|
|
303
|
+
Use the multiprocessing module to speed up some queries. The time frame is
|
|
283
304
|
divided into multiple time spans (as many as there are processes) and each smaller
|
|
284
305
|
time frame is investigated simultaneously.
|
|
285
306
|
|
|
@@ -307,13 +328,13 @@ class Pirate:
|
|
|
307
328
|
time_attribute_name: str,
|
|
308
329
|
date_init: Union[str, datetime.date],
|
|
309
330
|
date_end: Union[str, datetime.date],
|
|
310
|
-
request_params: Dict[str, Any] = None,
|
|
311
|
-
process_function:
|
|
312
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
331
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
332
|
+
process_function: ProcessFunc = flatten_data,
|
|
333
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
313
334
|
build_df_after_query: bool = False,
|
|
314
335
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
315
336
|
"""
|
|
316
|
-
|
|
337
|
+
Use the multiprocessing module to speed up some queries. The time frame is
|
|
317
338
|
divided into multiple time spans (as many as there are processes) and each smaller
|
|
318
339
|
time frame is investigated simultaneously. Finally, it builds a DataFrame with the
|
|
319
340
|
information from all timespans. The DataFrames are either built after each
|
|
@@ -357,7 +378,7 @@ class Pirate:
|
|
|
357
378
|
df_constraints: Dict[
|
|
358
379
|
str, Union[Union[str, Tuple[str, str]], List[Union[str, Tuple[str, str]]]]
|
|
359
380
|
],
|
|
360
|
-
request_params: Dict[str, Any] = None,
|
|
381
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
361
382
|
num_pages: int = -1,
|
|
362
383
|
) -> Generator[FHIRObj, None, int]:
|
|
363
384
|
"""
|
|
@@ -398,12 +419,12 @@ class Pirate:
|
|
|
398
419
|
df_constraints: Dict[
|
|
399
420
|
str, Union[Union[str, Tuple[str, str]], List[Union[str, Tuple[str, str]]]]
|
|
400
421
|
],
|
|
401
|
-
process_function:
|
|
402
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
403
|
-
request_params: Dict[str, Any] = None,
|
|
422
|
+
process_function: ProcessFunc = flatten_data,
|
|
423
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
424
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
404
425
|
num_pages: int = -1,
|
|
405
426
|
with_ref: bool = True,
|
|
406
|
-
with_columns: List[Union[str, Tuple[str, str]]] = None,
|
|
427
|
+
with_columns: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
407
428
|
build_df_after_query: bool = False,
|
|
408
429
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
409
430
|
"""
|
|
@@ -549,7 +570,7 @@ class Pirate:
|
|
|
549
570
|
):
|
|
550
571
|
# If we don't want multiprocessing
|
|
551
572
|
for param, input_param in tqdm(
|
|
552
|
-
zip(params_per_sample, input_params_per_sample),
|
|
573
|
+
zip(params_per_sample, input_params_per_sample, strict=False),
|
|
553
574
|
total=len(params_per_sample),
|
|
554
575
|
desc=tqdm_text,
|
|
555
576
|
):
|
|
@@ -573,7 +594,7 @@ class Pirate:
|
|
|
573
594
|
pool = multiprocessing.Pool(self.num_processes)
|
|
574
595
|
results = []
|
|
575
596
|
for param, input_param in tqdm(
|
|
576
|
-
zip(params_per_sample, input_params_per_sample),
|
|
597
|
+
zip(params_per_sample, input_params_per_sample, strict=False),
|
|
577
598
|
total=len(params_per_sample),
|
|
578
599
|
desc=tqdm_text,
|
|
579
600
|
):
|
|
@@ -582,13 +603,13 @@ class Pirate:
|
|
|
582
603
|
(
|
|
583
604
|
pool.apply_async(
|
|
584
605
|
self._bundles_to_dataframe,
|
|
585
|
-
kwds=
|
|
586
|
-
bundles
|
|
587
|
-
process_function
|
|
588
|
-
build_df_after_query
|
|
589
|
-
disable_multiprocessing
|
|
590
|
-
always_return_dict
|
|
591
|
-
|
|
606
|
+
kwds={
|
|
607
|
+
"bundles": list(self._get_bundles(**param)),
|
|
608
|
+
"process_function": process_function,
|
|
609
|
+
"build_df_after_query": False,
|
|
610
|
+
"disable_multiprocessing": True,
|
|
611
|
+
"always_return_dict": True,
|
|
612
|
+
},
|
|
592
613
|
),
|
|
593
614
|
input_param,
|
|
594
615
|
)
|
|
@@ -610,13 +631,13 @@ class Pirate:
|
|
|
610
631
|
resource_type: pd.concat(final_dfs[resource_type], ignore_index=True)
|
|
611
632
|
for resource_type in final_dfs
|
|
612
633
|
}
|
|
613
|
-
return
|
|
634
|
+
return next(iter(dfs.values())) if len(dfs) == 1 else dfs
|
|
614
635
|
|
|
615
636
|
def bundles_to_dataframe(
|
|
616
637
|
self,
|
|
617
638
|
bundles: Union[List[FHIRObj], Generator[FHIRObj, None, int]],
|
|
618
|
-
process_function:
|
|
619
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
639
|
+
process_function: ProcessFunc = flatten_data,
|
|
640
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
620
641
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
621
642
|
"""
|
|
622
643
|
Convert a bundle into a DataFrame using either the `flatten_data` function (default),
|
|
@@ -783,7 +804,7 @@ class Pirate:
|
|
|
783
804
|
def _adjust_df_constraints(
|
|
784
805
|
df_constraints: Dict[
|
|
785
806
|
str, Union[Union[str, Tuple[str, str]], List[Union[str, Tuple[str, str]]]]
|
|
786
|
-
]
|
|
807
|
+
],
|
|
787
808
|
) -> Dict[str, List[Tuple[str, str]]]:
|
|
788
809
|
"""
|
|
789
810
|
Adjust the constraint dictionary to always have the same structure, which makes it easier
|
|
@@ -835,7 +856,7 @@ class Pirate:
|
|
|
835
856
|
df_constraints: Dict[str, List[Tuple[str, str]]],
|
|
836
857
|
) -> List[Dict[str, List[str]]]:
|
|
837
858
|
"""
|
|
838
|
-
|
|
859
|
+
Build the request parameters for each sample by checking the constraint set on each row.
|
|
839
860
|
The resulting request parameters are given by the general `request_params` and by the
|
|
840
861
|
constraint given by each row. E.g. if df_constraints = {"subject": "patient_id"}, then
|
|
841
862
|
the resulting list will contain {"subject": row.patient_id} for each row of the DataFrame.
|
|
@@ -911,7 +932,7 @@ class Pirate:
|
|
|
911
932
|
|
|
912
933
|
def _get_response(self, request_url: str) -> Optional[FHIRObj]:
|
|
913
934
|
"""
|
|
914
|
-
|
|
935
|
+
Perform the API request and returns the response as a dictionary.
|
|
915
936
|
|
|
916
937
|
:param request_url: The request string
|
|
917
938
|
:return: A FHIR bundle
|
|
@@ -985,13 +1006,13 @@ class Pirate:
|
|
|
985
1006
|
def _get_bundles(
|
|
986
1007
|
self,
|
|
987
1008
|
resource_type: str,
|
|
988
|
-
request_params: Dict[str, Any] = None,
|
|
1009
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
989
1010
|
num_pages: int = -1,
|
|
990
1011
|
silence_tqdm: bool = False,
|
|
991
1012
|
tqdm_df_build: bool = False,
|
|
992
1013
|
) -> Generator[FHIRObj, None, int]:
|
|
993
1014
|
"""
|
|
994
|
-
|
|
1015
|
+
Execute a request, iterates through the result pages and returns all the bundles as a
|
|
995
1016
|
generator.
|
|
996
1017
|
Additionally, some checks are performed, and the corresponding warnings are returned:
|
|
997
1018
|
- Whether a sorting has been defined
|
|
@@ -1031,7 +1052,8 @@ class Pirate:
|
|
|
1031
1052
|
warnings.warn(
|
|
1032
1053
|
f"We detected multiple pages (approximately {bundle_total} pages) but "
|
|
1033
1054
|
f"no sorting method has been defined, which may yield incorrect results. "
|
|
1034
|
-
f"We will set the sorting parameter to _id."
|
|
1055
|
+
f"We will set the sorting parameter to _id.",
|
|
1056
|
+
stacklevel=2,
|
|
1035
1057
|
)
|
|
1036
1058
|
current_params["_sort"] = "_id"
|
|
1037
1059
|
request_params_string = self._concat_request_params(current_params)
|
|
@@ -1080,7 +1102,7 @@ class Pirate:
|
|
|
1080
1102
|
tqdm_df_build: bool,
|
|
1081
1103
|
) -> Generator[FHIRObj, None, int]:
|
|
1082
1104
|
"""
|
|
1083
|
-
|
|
1105
|
+
Set the `time_attribute_name` date parameters for the
|
|
1084
1106
|
`sail_through_search_space function`.
|
|
1085
1107
|
"""
|
|
1086
1108
|
request_params[time_attribute_name] = (
|
|
@@ -1096,9 +1118,11 @@ class Pirate:
|
|
|
1096
1118
|
)
|
|
1097
1119
|
|
|
1098
1120
|
@staticmethod
|
|
1099
|
-
def _generator_to_list(
|
|
1121
|
+
def _generator_to_list(
|
|
1122
|
+
f: Callable[..., Iterable[TFHIRObj]], *args: Any, **kwargs: Any
|
|
1123
|
+
) -> List[FHIRObj]:
|
|
1100
1124
|
"""
|
|
1101
|
-
|
|
1125
|
+
Convert the result of a function returning a generator to a list.
|
|
1102
1126
|
"""
|
|
1103
1127
|
return list(f(*args, **kwargs))
|
|
1104
1128
|
|
|
@@ -1108,7 +1132,7 @@ class Pirate:
|
|
|
1108
1132
|
|
|
1109
1133
|
def _run_multiquery(
|
|
1110
1134
|
self,
|
|
1111
|
-
func: Callable,
|
|
1135
|
+
func: Callable[[TParam], Iterable[TFHIRObj]],
|
|
1112
1136
|
query_params: List[Any],
|
|
1113
1137
|
tqdm_text: str,
|
|
1114
1138
|
) -> Generator[FHIRObj, None, int]:
|
|
@@ -1139,11 +1163,11 @@ class Pirate:
|
|
|
1139
1163
|
time_attribute_name: str,
|
|
1140
1164
|
date_init: Union[str, datetime.date],
|
|
1141
1165
|
date_end: Union[str, datetime.date],
|
|
1142
|
-
request_params: Dict[str, Any] = None,
|
|
1166
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
1143
1167
|
tqdm_df_build: bool = False,
|
|
1144
1168
|
) -> Generator[FHIRObj, None, int]:
|
|
1145
1169
|
"""
|
|
1146
|
-
|
|
1170
|
+
Use the multiprocessing module to speed up some queries. The time frame is
|
|
1147
1171
|
divided into multiple time spans (as many as there are processes) and each smaller
|
|
1148
1172
|
time frame is investigated simultaneously.
|
|
1149
1173
|
|
|
@@ -1169,7 +1193,8 @@ class Pirate:
|
|
|
1169
1193
|
warnings.warn(
|
|
1170
1194
|
f"Detected use of parameter {time_attribute_name} "
|
|
1171
1195
|
f"in the request parameters. Please use the date_init (inclusive) and "
|
|
1172
|
-
f"date_end (exclusive) parameters instead."
|
|
1196
|
+
f"date_end (exclusive) parameters instead.",
|
|
1197
|
+
stacklevel=2,
|
|
1173
1198
|
)
|
|
1174
1199
|
# Remove all elements that refer to a date
|
|
1175
1200
|
request_params = {
|
|
@@ -1189,7 +1214,7 @@ class Pirate:
|
|
|
1189
1214
|
)
|
|
1190
1215
|
# Divide the current time period into smaller spans
|
|
1191
1216
|
timespans = self._get_timespan_list(date_init, date_end)
|
|
1192
|
-
func
|
|
1217
|
+
func = partial(
|
|
1193
1218
|
self._get_bundles_for_timespan,
|
|
1194
1219
|
resource_type,
|
|
1195
1220
|
request_params,
|
|
@@ -1213,7 +1238,7 @@ class Pirate:
|
|
|
1213
1238
|
df_constraints: Dict[
|
|
1214
1239
|
str, Union[Union[str, Tuple[str, str]], List[Union[str, Tuple[str, str]]]]
|
|
1215
1240
|
],
|
|
1216
|
-
request_params: Dict[str, Any] = None,
|
|
1241
|
+
request_params: Optional[Dict[str, Any]] = None,
|
|
1217
1242
|
num_pages: int = -1,
|
|
1218
1243
|
tqdm_df_build: bool = False,
|
|
1219
1244
|
) -> Generator[FHIRObj, None, int]:
|
|
@@ -1246,7 +1271,7 @@ class Pirate:
|
|
|
1246
1271
|
request_params=request_params,
|
|
1247
1272
|
df_constraints=self._adjust_df_constraints(df_constraints),
|
|
1248
1273
|
)
|
|
1249
|
-
func
|
|
1274
|
+
func = partial(
|
|
1250
1275
|
self._get_bundles,
|
|
1251
1276
|
resource_type,
|
|
1252
1277
|
num_pages=num_pages,
|
|
@@ -1278,16 +1303,17 @@ class Pirate:
|
|
|
1278
1303
|
if key_mapping[key] in df.columns:
|
|
1279
1304
|
warnings.warn(
|
|
1280
1305
|
f"A column with name {key_mapping[key]} already exists in the output"
|
|
1281
|
-
f"DataFrame, and the column {key} will not be copied."
|
|
1306
|
+
f"DataFrame, and the column {key} will not be copied.",
|
|
1307
|
+
stacklevel=2,
|
|
1282
1308
|
)
|
|
1283
1309
|
else:
|
|
1284
1310
|
df[key_mapping[key]] = value
|
|
1285
1311
|
|
|
1286
1312
|
def _set_up_fhirpath_function(
|
|
1287
1313
|
self, fhir_paths: List[Union[str, Tuple[str, str]]]
|
|
1288
|
-
) ->
|
|
1314
|
+
) -> ProcessFunc:
|
|
1289
1315
|
"""
|
|
1290
|
-
|
|
1316
|
+
Prepare and compile the FHIRPath and sets them as the processing function for building
|
|
1291
1317
|
the DataFrames.
|
|
1292
1318
|
:param fhir_paths: A list of FHIR paths (https://hl7.org/fhirpath/) to be used to build the
|
|
1293
1319
|
DataFrame, alternatively, a list of tuples can be used to specify the column name of the
|
|
@@ -1316,17 +1342,18 @@ class Pirate:
|
|
|
1316
1342
|
f"If you really want to do this, please use processing functions "
|
|
1317
1343
|
f"instead. If you are using the FHIRPath expressions correctly as "
|
|
1318
1344
|
f"they are intended, you can silence the warning when "
|
|
1319
|
-
f"initializing the class."
|
|
1345
|
+
f"initializing the class.",
|
|
1346
|
+
stacklevel=2,
|
|
1320
1347
|
)
|
|
1321
|
-
|
|
1348
|
+
compiled_paths = [
|
|
1322
1349
|
(name, fhirpathpy.compile(path=path)) for name, path in fhir_paths_with_name
|
|
1323
1350
|
]
|
|
1324
|
-
return partial(parse_fhir_path, compiled_fhir_paths=
|
|
1351
|
+
return partial(parse_fhir_path, compiled_fhir_paths=compiled_paths)
|
|
1325
1352
|
|
|
1326
1353
|
def _bundles_to_dataframe(
|
|
1327
1354
|
self,
|
|
1328
|
-
bundles:
|
|
1329
|
-
process_function:
|
|
1355
|
+
bundles: Iterable[FHIRObj],
|
|
1356
|
+
process_function: ProcessFunc = flatten_data,
|
|
1330
1357
|
build_df_after_query: bool = False,
|
|
1331
1358
|
disable_multiprocessing: bool = False,
|
|
1332
1359
|
always_return_dict: bool = False,
|
|
@@ -1354,19 +1381,15 @@ class Pirate:
|
|
|
1354
1381
|
pool = multiprocessing.Pool(self.num_processes)
|
|
1355
1382
|
if build_df_after_query or isinstance(bundles, List):
|
|
1356
1383
|
bundles = list(bundles)
|
|
1357
|
-
processed_bundles =
|
|
1358
|
-
|
|
1359
|
-
for bundle_output in tqdm(
|
|
1384
|
+
processed_bundles = list(
|
|
1385
|
+
tqdm(
|
|
1360
1386
|
pool.imap(process_function, bundles),
|
|
1361
1387
|
total=len(bundles),
|
|
1362
1388
|
desc="Build DF",
|
|
1363
1389
|
)
|
|
1364
|
-
|
|
1390
|
+
)
|
|
1365
1391
|
else:
|
|
1366
|
-
processed_bundles =
|
|
1367
|
-
bundle_output
|
|
1368
|
-
for bundle_output in pool.imap(process_function, bundles)
|
|
1369
|
-
]
|
|
1392
|
+
processed_bundles = list(pool.imap(process_function, bundles))
|
|
1370
1393
|
pool.close()
|
|
1371
1394
|
pool.join()
|
|
1372
1395
|
results: Dict[str, List[Dict[str, Any]]] = {}
|
|
@@ -1385,14 +1408,14 @@ class Pirate:
|
|
|
1385
1408
|
if always_return_dict:
|
|
1386
1409
|
return dfs
|
|
1387
1410
|
else:
|
|
1388
|
-
return
|
|
1411
|
+
return next(iter(dfs.values())) if len(dfs) == 1 else dfs
|
|
1389
1412
|
|
|
1390
1413
|
def _query_to_dataframe(
|
|
1391
1414
|
self,
|
|
1392
|
-
bundles_function: Callable,
|
|
1393
|
-
) -> Callable:
|
|
1415
|
+
bundles_function: Callable[..., Iterable[FHIRObj]],
|
|
1416
|
+
) -> Callable[..., Union[pd.DataFrame, Dict[str, pd.DataFrame]]]:
|
|
1394
1417
|
"""
|
|
1395
|
-
|
|
1418
|
+
Transform any function return Lists/Generators of
|
|
1396
1419
|
bundles into DataFrames.
|
|
1397
1420
|
|
|
1398
1421
|
:param bundles_function: The function that returns a Generator/List of bundles and that
|
|
@@ -1401,8 +1424,8 @@ class Pirate:
|
|
|
1401
1424
|
"""
|
|
1402
1425
|
|
|
1403
1426
|
def wrap(
|
|
1404
|
-
process_function:
|
|
1405
|
-
fhir_paths: List[Union[str, Tuple[str, str]]] = None,
|
|
1427
|
+
process_function: ProcessFunc = flatten_data,
|
|
1428
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
1406
1429
|
build_df_after_query: bool = False,
|
|
1407
1430
|
disable_multiprocessing_build: bool = False,
|
|
1408
1431
|
always_return_dict: bool = False,
|
|
@@ -1430,14 +1453,16 @@ class Pirate:
|
|
|
1430
1453
|
|
|
1431
1454
|
def query_to_dataframe(
|
|
1432
1455
|
self,
|
|
1433
|
-
bundles_function: Callable
|
|
1434
|
-
|
|
1435
|
-
|
|
1456
|
+
bundles_function: Callable[
|
|
1457
|
+
..., Union[List[FHIRObj], Generator[FHIRObj, None, int]]
|
|
1458
|
+
],
|
|
1459
|
+
process_function: ProcessFunc = flatten_data,
|
|
1460
|
+
fhir_paths: Optional[List[Union[str, Tuple[str, str]]]] = None,
|
|
1436
1461
|
build_df_after_query: bool = False,
|
|
1437
1462
|
**kwargs: Any,
|
|
1438
1463
|
) -> Union[pd.DataFrame, Dict[str, pd.DataFrame]]:
|
|
1439
1464
|
"""
|
|
1440
|
-
|
|
1465
|
+
Given any of the functions that return bundles, builds the
|
|
1441
1466
|
DataFrame straight away.
|
|
1442
1467
|
:param bundles_function: The function that should be used to get the bundles,
|
|
1443
1468
|
e.g. self.sail_through_search_space, trade_rows_for_bundles
|
fhir_pyrate/util/__init__.py
CHANGED
|
@@ -7,7 +7,9 @@ from fhir_pyrate.util import FHIRObj
|
|
|
7
7
|
logger = logging.getLogger(__name__)
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
def flatten_data(
|
|
10
|
+
def flatten_data(
|
|
11
|
+
bundle: FHIRObj, col_sep: str = "_"
|
|
12
|
+
) -> Dict[str, List[Dict[str, Any]]]:
|
|
11
13
|
"""
|
|
12
14
|
Preprocessing function that goes through the JSON bundle and returns lists of dictionaries
|
|
13
15
|
for all possible attributes
|
|
@@ -68,8 +70,8 @@ def recurse_resource(
|
|
|
68
70
|
|
|
69
71
|
|
|
70
72
|
def parse_fhir_path(
|
|
71
|
-
bundle: FHIRObj, compiled_fhir_paths: List[Tuple[str, Callable]]
|
|
72
|
-
) -> Dict[str, List[Dict]]:
|
|
73
|
+
bundle: FHIRObj, compiled_fhir_paths: List[Tuple[str, Callable[..., Any]]]
|
|
74
|
+
) -> Dict[str, List[Dict[str, Any]]]:
|
|
73
75
|
"""
|
|
74
76
|
Preprocessing function that goes through the JSON bundle and returns lists of dictionaries
|
|
75
77
|
for all possible attributes, which have been specified using a list of compiled FHIRPath
|
|
@@ -93,7 +95,8 @@ def parse_fhir_path(
|
|
|
93
95
|
if name in base_dict and base_dict[name] is not None and len(result) > 0:
|
|
94
96
|
warnings.warn(
|
|
95
97
|
f"The field {name} has already been filled with {base_dict[name]}, "
|
|
96
|
-
f"so it will not be overwritten."
|
|
98
|
+
f"so it will not be overwritten.",
|
|
99
|
+
stacklevel=2,
|
|
97
100
|
)
|
|
98
101
|
if name not in base_dict or base_dict[name] is None:
|
|
99
102
|
base_dict[name] = result
|
fhir_pyrate/util/fhirobj.py
CHANGED
|
@@ -17,10 +17,10 @@ class FHIRObj(SimpleNamespace):
|
|
|
17
17
|
def __getattr__(self, item: str) -> None:
|
|
18
18
|
return None
|
|
19
19
|
|
|
20
|
-
def __getstate__(self) -> Dict:
|
|
20
|
+
def __getstate__(self) -> Dict[str, Any]:
|
|
21
21
|
return self.__dict__
|
|
22
22
|
|
|
23
|
-
def __setstate__(self, state: Dict) -> None:
|
|
23
|
+
def __setstate__(self, state: Dict[str, Any]) -> None:
|
|
24
24
|
self.__dict__.update(state)
|
|
25
25
|
|
|
26
26
|
def to_json(self) -> str:
|
fhir_pyrate/util/imports.py
CHANGED
|
@@ -40,7 +40,7 @@ def optional_import(
|
|
|
40
40
|
allow_namespace_pkg: bool = False,
|
|
41
41
|
) -> Tuple[Any, bool]:
|
|
42
42
|
"""
|
|
43
|
-
|
|
43
|
+
Import an optional module specified by `module` string.
|
|
44
44
|
Any importing related exceptions will be stored, and exceptions raise lazily
|
|
45
45
|
when attempting to use the failed-to-import module.
|
|
46
46
|
|
|
@@ -100,14 +100,14 @@ def optional_import(
|
|
|
100
100
|
|
|
101
101
|
def __getattr__(self, name: str) -> str:
|
|
102
102
|
"""
|
|
103
|
-
|
|
103
|
+
Raise:
|
|
104
104
|
OptionalImportError: When you call this method.
|
|
105
105
|
"""
|
|
106
106
|
raise self._exception
|
|
107
107
|
|
|
108
108
|
def __call__(self, *_args: Any, **_kwargs: Any) -> str:
|
|
109
109
|
"""
|
|
110
|
-
|
|
110
|
+
Raise:
|
|
111
111
|
OptionalImportError: When you call this method.
|
|
112
112
|
"""
|
|
113
113
|
raise self._exception
|
fhir_pyrate/util/token_auth.py
CHANGED
|
@@ -37,10 +37,10 @@ class TokenAuth(requests.auth.AuthBase):
|
|
|
37
37
|
username: str,
|
|
38
38
|
password: str,
|
|
39
39
|
auth_url: str,
|
|
40
|
-
refresh_url: str = None,
|
|
41
|
-
session: requests.Session = None,
|
|
40
|
+
refresh_url: Optional[str] = None,
|
|
41
|
+
session: Optional[requests.Session] = None,
|
|
42
42
|
max_login_attempts: int = 5,
|
|
43
|
-
token_refresh_delta: Union[int, timedelta] = None,
|
|
43
|
+
token_refresh_delta: Optional[Union[int, timedelta]] = None,
|
|
44
44
|
) -> None:
|
|
45
45
|
self._username = username
|
|
46
46
|
self._password = password
|
|
@@ -69,7 +69,7 @@ class TokenAuth(requests.auth.AuthBase):
|
|
|
69
69
|
|
|
70
70
|
def _authenticate(self) -> None:
|
|
71
71
|
"""
|
|
72
|
-
|
|
72
|
+
Authenticate the user using the authentication URL and sets the token.
|
|
73
73
|
"""
|
|
74
74
|
# Authentication to get the token
|
|
75
75
|
response = self._token_session.get(
|
|
@@ -80,7 +80,7 @@ class TokenAuth(requests.auth.AuthBase):
|
|
|
80
80
|
|
|
81
81
|
def __call__(self, r: requests.PreparedRequest) -> requests.PreparedRequest:
|
|
82
82
|
"""
|
|
83
|
-
|
|
83
|
+
Set the necessary authentication header of the current request.
|
|
84
84
|
|
|
85
85
|
:param r: The prepared request that should be sent
|
|
86
86
|
:return: The prepared request
|
|
@@ -90,7 +90,7 @@ class TokenAuth(requests.auth.AuthBase):
|
|
|
90
90
|
|
|
91
91
|
def is_refresh_required(self) -> bool:
|
|
92
92
|
"""
|
|
93
|
-
|
|
93
|
+
Compute whether the token should be refreshed according to the given token and to the
|
|
94
94
|
_token_refresh_delta variable.
|
|
95
95
|
|
|
96
96
|
:return: Whether the token is about to expire and should thus be refreshed
|
|
@@ -123,7 +123,7 @@ class TokenAuth(requests.auth.AuthBase):
|
|
|
123
123
|
and (now_utc() - self.auth_time) > self._token_refresh_delta
|
|
124
124
|
)
|
|
125
125
|
|
|
126
|
-
def refresh_token(self, token: str = None) -> None:
|
|
126
|
+
def refresh_token(self, token: Optional[str] = None) -> None:
|
|
127
127
|
"""
|
|
128
128
|
Refresh the current session either by logging in again or by refreshing the token.
|
|
129
129
|
|
|
@@ -150,7 +150,7 @@ class TokenAuth(requests.auth.AuthBase):
|
|
|
150
150
|
self, response: requests.Response, *args: Any, **kwargs: Any
|
|
151
151
|
) -> Optional[requests.Response]:
|
|
152
152
|
"""
|
|
153
|
-
|
|
153
|
+
Check whether the login was successful and
|
|
154
154
|
if it was not, it either refreshes the token or authenticates the user again.
|
|
155
155
|
|
|
156
156
|
:param response: The received response
|
|
@@ -166,16 +166,14 @@ class TokenAuth(requests.auth.AuthBase):
|
|
|
166
166
|
# If the state is unauthorized,
|
|
167
167
|
# then we should set how many times we have tried logging in
|
|
168
168
|
if response.status_code == requests.codes.unauthorized:
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
else:
|
|
178
|
-
response.request.login_reattempted_times = 1 # type: ignore
|
|
169
|
+
login_attempts: int = getattr(
|
|
170
|
+
response.request, "login_reattempted_times", 0
|
|
171
|
+
)
|
|
172
|
+
logger.info("Refreshing token because of unauthorized status.")
|
|
173
|
+
login_attempts += 1
|
|
174
|
+
if login_attempts >= self._max_login_attempts:
|
|
175
|
+
response.raise_for_status()
|
|
176
|
+
setattr(response.request, "login_reattempted_times", login_attempts) # noqa
|
|
179
177
|
else:
|
|
180
178
|
logger.info("Refreshing token refresh is required.")
|
|
181
179
|
|
fhir_pyrate/util/util.py
CHANGED
|
@@ -16,7 +16,7 @@ def string_from_column(
|
|
|
16
16
|
sort_reverse: bool = False,
|
|
17
17
|
) -> Any:
|
|
18
18
|
"""
|
|
19
|
-
|
|
19
|
+
Transform the values contained in a pandas Series into a string of (if desired unique) values.
|
|
20
20
|
|
|
21
21
|
:param col:
|
|
22
22
|
:param separator: The separator for the values
|
|
@@ -25,7 +25,7 @@ def string_from_column(
|
|
|
25
25
|
:param sort_reverse: Whether the values should sorted in reverse order
|
|
26
26
|
:return: A string containing the values of the Series.
|
|
27
27
|
"""
|
|
28
|
-
existing_values =
|
|
28
|
+
existing_values = []
|
|
29
29
|
for el in col.values:
|
|
30
30
|
if not pd.isnull(el) and el != "":
|
|
31
31
|
existing_values.append(el)
|
|
@@ -44,7 +44,7 @@ def string_from_column(
|
|
|
44
44
|
|
|
45
45
|
def get_datetime(dt_format: str = "%Y-%m-%d %H:%M:%S") -> str:
|
|
46
46
|
"""
|
|
47
|
-
|
|
47
|
+
Create a datetime string according to the given format
|
|
48
48
|
|
|
49
49
|
:param dt_format: The format to use for the printing
|
|
50
50
|
:return: The formatted string
|
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: fhir-pyrate
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: FHIR-PYrate is a package that provides a high-level API to query FHIR Servers for bundles of resources and return the structured information as pandas DataFrames. It can also be used to filter resources using RegEx and SpaCy and download DICOM studies and series.
|
|
5
5
|
Home-page: https://github.com/UMEssen/FHIR-PYrate
|
|
6
6
|
License: MIT
|
|
7
7
|
Keywords: python,fhir,data-science,fhirpath,healthcare
|
|
8
8
|
Author: Rene Hosch
|
|
9
9
|
Author-email: rene.hosch@uk-essen.de
|
|
10
|
-
Requires-Python: >=3.
|
|
10
|
+
Requires-Python: >=3.10,<4.0
|
|
11
11
|
Classifier: License :: OSI Approved :: MIT License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
15
13
|
Classifier: Programming Language :: Python :: 3.10
|
|
16
14
|
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
17
|
Provides-Extra: all
|
|
18
18
|
Provides-Extra: downloader
|
|
19
19
|
Provides-Extra: miner
|
|
@@ -21,8 +21,8 @@ Requires-Dist: PyJWT (>=2.4.0,<3.0.0)
|
|
|
21
21
|
Requires-Dist: SimpleITK (>=2.0.2,<3.0.0) ; extra == "downloader" or extra == "all"
|
|
22
22
|
Requires-Dist: dicomweb-client (>=0.52.0,<0.53.0) ; extra == "downloader" or extra == "all"
|
|
23
23
|
Requires-Dist: fhirpathpy (>=0.2.2,<0.3.0)
|
|
24
|
-
Requires-Dist: numpy (>=
|
|
25
|
-
Requires-Dist: pandas (>=
|
|
24
|
+
Requires-Dist: numpy (>=2.0.0,<3.0.0)
|
|
25
|
+
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
|
26
26
|
Requires-Dist: pydicom (>=2.1.2,<3.0.0) ; extra == "downloader" or extra == "all"
|
|
27
27
|
Requires-Dist: requests (>=2.28.0,<3.0.0)
|
|
28
28
|
Requires-Dist: requests-cache (>=0.9.7,<0.10.0)
|
|
@@ -32,18 +32,14 @@ Project-URL: Repository, https://github.com/UMEssen/FHIR-PYrate
|
|
|
32
32
|
Description-Content-Type: text/markdown
|
|
33
33
|
|
|
34
34
|
[](https://opensource.org/licenses/MIT)
|
|
35
|
-
[](https://www.python.org/downloads/release/python-31011/)
|
|
36
36
|
[](https://pypi.org/project/fhir-pyrate/)
|
|
37
37
|
[](https://pypi.org/project/fhir-pyrate/#history)
|
|
38
38
|
[](https://zenodo.org/badge/latestdoi/456893108)
|
|
39
|
+
[](https://wispermed.org/)
|
|
39
40
|
|
|
40
41
|
<!-- PROJECT LOGO -->
|
|
41
|
-
|
|
42
|
-
<div align="center">
|
|
43
|
-
<a href="https://github.com/UMEssen/FHIR-PYrate">
|
|
44
|
-
<img src="https://raw.githubusercontent.com/UMEssen/FHIR-PYrate/main/images/logo.svg" alt="Logo" width="600">
|
|
45
|
-
</a>
|
|
46
|
-
</div>
|
|
42
|
+

|
|
47
43
|
|
|
48
44
|
This package is meant to provide a simple abstraction to query and structure FHIR resources as
|
|
49
45
|
pandas DataFrames. Want to use R instead? Try out [fhircrackr](https://github.com/POLAR-fhiR/fhircrackr)!
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
fhir_pyrate/__init__.py,sha256=GrVFlghs2Q8pGCVLdNjD40R0Xt8ptvF5NhCtT1FUazk,864
|
|
2
|
+
fhir_pyrate/ahoy.py,sha256=dOw94Khg3_4fXnJHAnZFlazxy_Tbb32Yi0HGOYVaHLU,5611
|
|
3
|
+
fhir_pyrate/dicom_downloader.py,sha256=GNoOSlyt_pdykdcfvHS287iXMrVBHeyY1WIeegHnbvk,28215
|
|
4
|
+
fhir_pyrate/miner.py,sha256=wydCL6zmVHSf4ctwz2760mZAfe9byVQciUWUQyRHVtQ,7331
|
|
5
|
+
fhir_pyrate/pirate.py,sha256=xNqmz5g5EnB3-RLZM46JYXkoGISFVmYEQUg2M-bTcKA,70318
|
|
6
|
+
fhir_pyrate/util/__init__.py,sha256=-P4jBpsH6XzQELrLAod2_P9oBocfaYUOWb_LQ-QjyKI,193
|
|
7
|
+
fhir_pyrate/util/bundle_processing_templates.py,sha256=EERL26gbv2hkYCoxWj1ZguJK5TKJ3e03zMAJuKSkczY,4519
|
|
8
|
+
fhir_pyrate/util/fhirobj.py,sha256=nTkSUbsmOisgDDgD_cEggF8hlPVjm7CWVSrkA8dOq3E,903
|
|
9
|
+
fhir_pyrate/util/imports.py,sha256=3s0hvuonX_susm5anw4fZORh7V3JMJ4hoLPBVSoj7Lw,4333
|
|
10
|
+
fhir_pyrate/util/token_auth.py,sha256=mfLzlZxCnWbiOOMZh_IDYgREp_9fhI3FshB8mDULtaQ,7862
|
|
11
|
+
fhir_pyrate/util/util.py,sha256=1qIdoriWNksD3DRYm7qSRCli5t9524wa3mVf21fETgs,1507
|
|
12
|
+
fhir_pyrate-0.2.2.dist-info/LICENSE,sha256=1IS7y51_JRtpCVVbUpYyztTMj3A6FskEZJBQkAQ9w9o,1084
|
|
13
|
+
fhir_pyrate-0.2.2.dist-info/METADATA,sha256=V5L-hgjfdmrikX1nmZNsBHbf1utdOj-zSsQfKeV6ybQ,29679
|
|
14
|
+
fhir_pyrate-0.2.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
15
|
+
fhir_pyrate-0.2.2.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
fhir_pyrate/__init__.py,sha256=-25N2f8w3P0YXynSIqZcp2naR5lq0Q4_Xag2TZ4Ln7g,864
|
|
2
|
-
fhir_pyrate/ahoy.py,sha256=pjzqDTadeLLIozygOKA-h1762uNN5mRWiiTDVLiiFQU,5531
|
|
3
|
-
fhir_pyrate/dicom_downloader.py,sha256=kmguN9-qmwFMwBlz9veMEiXX7mwTHsfqYtlKAS3sN3A,27955
|
|
4
|
-
fhir_pyrate/miner.py,sha256=sCiRhvBRUB548g2M9PcV8AptODdTQzd_KXJ8YccZKNU,7327
|
|
5
|
-
fhir_pyrate/pirate.py,sha256=Ay2XnfjW0HNseQ-yW45EtooZXiUtQNNb-9gIUU7BhgY,69474
|
|
6
|
-
fhir_pyrate/util/__init__.py,sha256=vlDDL-4G8gvv544TBmYZtAuNpSc23APjJs_1R9vv5ag,193
|
|
7
|
-
fhir_pyrate/util/bundle_processing_templates.py,sha256=rxeUCRKiMMblT_-c55kYm1JofdAAbmDgu5spfBy99uE,4448
|
|
8
|
-
fhir_pyrate/util/fhirobj.py,sha256=GX6iwbXtBYpe_DiRag0fYF3qenaLD2bQh31WYPDke44,883
|
|
9
|
-
fhir_pyrate/util/imports.py,sha256=jKxiMYTDjbmstqbGCGYjr6EAdbTsvOrZ7GSZo1W6y2g,4336
|
|
10
|
-
fhir_pyrate/util/token_auth.py,sha256=JcEOYCjTP3DXNsMP-Szv3m0y1TO4-SEWAYl10PkPBks,7993
|
|
11
|
-
fhir_pyrate/util/util.py,sha256=DIr5McePaG0m_Uu2B6BTUd2MExXH2PtN-cKOT7gtLRY,1513
|
|
12
|
-
fhir_pyrate-0.2.1.dist-info/LICENSE,sha256=1IS7y51_JRtpCVVbUpYyztTMj3A6FskEZJBQkAQ9w9o,1084
|
|
13
|
-
fhir_pyrate-0.2.1.dist-info/METADATA,sha256=BdfZnzPMip6fI_0WIy3q8_YIz6k6N9VH4kq9kC1rU1c,29707
|
|
14
|
-
fhir_pyrate-0.2.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
|
|
15
|
-
fhir_pyrate-0.2.1.dist-info/RECORD,,
|
|
File without changes
|