datamint 1.7.3__tar.gz → 1.7.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamint might be problematic. Click here for more details.
- {datamint-1.7.3 → datamint-1.7.5}/PKG-INFO +2 -2
- {datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/base_api_handler.py +21 -13
- {datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/root_api_handler.py +103 -70
- {datamint-1.7.3 → datamint-1.7.5}/datamint/dataset/base_dataset.py +20 -14
- {datamint-1.7.3 → datamint-1.7.5}/datamint/dataset/dataset.py +1 -1
- {datamint-1.7.3 → datamint-1.7.5}/pyproject.toml +2 -2
- {datamint-1.7.3 → datamint-1.7.5}/README.md +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/__init__.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/annotation_api_handler.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/api_handler.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/dto/annotation_dto.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/exp_api_handler.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/client_cmd_tools/__init__.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/client_cmd_tools/datamint_config.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/client_cmd_tools/datamint_upload.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/configs.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/dataset/__init__.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/dataset/annotation.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/examples/__init__.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/examples/example_projects.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/experiment/__init__.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/experiment/_patcher.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/experiment/experiment.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/logging.yaml +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/utils/logging_utils.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/utils/torchmetrics.py +0 -0
- {datamint-1.7.3 → datamint-1.7.5}/datamint/utils/visualization.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datamint
|
|
3
|
-
Version: 1.7.
|
|
3
|
+
Version: 1.7.5
|
|
4
4
|
Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -19,7 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
|
|
|
19
19
|
Requires-Dist: lazy-loader (>=0.3.0)
|
|
20
20
|
Requires-Dist: lightning
|
|
21
21
|
Requires-Dist: matplotlib
|
|
22
|
-
Requires-Dist: medimgkit
|
|
22
|
+
Requires-Dist: medimgkit (>=0.2.1)
|
|
23
23
|
Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
|
|
24
24
|
Requires-Dist: nibabel (>=4.0.0)
|
|
25
25
|
Requires-Dist: numpy
|
|
@@ -14,7 +14,7 @@ import cv2
|
|
|
14
14
|
import nibabel as nib
|
|
15
15
|
from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
|
|
16
16
|
from datamint import configs
|
|
17
|
-
|
|
17
|
+
import gzip
|
|
18
18
|
|
|
19
19
|
_LOGGER = logging.getLogger(__name__)
|
|
20
20
|
|
|
@@ -138,7 +138,7 @@ class BaseAPIHandler:
|
|
|
138
138
|
if isinstance(data, aiohttp.FormData): # Check if it's aiohttp.FormData
|
|
139
139
|
# Handle FormData by extracting fields
|
|
140
140
|
form_parts = []
|
|
141
|
-
for options,headers,value in data._fields:
|
|
141
|
+
for options, headers, value in data._fields:
|
|
142
142
|
# get the name from options
|
|
143
143
|
name = options.get('name', 'file')
|
|
144
144
|
if hasattr(value, 'read'): # File-like object
|
|
@@ -161,7 +161,7 @@ class BaseAPIHandler:
|
|
|
161
161
|
if session is None:
|
|
162
162
|
async with aiohttp.ClientSession() as s:
|
|
163
163
|
return await self._run_request_async(request_args, s, data_to_get)
|
|
164
|
-
|
|
164
|
+
|
|
165
165
|
async with self.semaphore:
|
|
166
166
|
try:
|
|
167
167
|
_LOGGER.debug(f"Running request to {request_args['url']}")
|
|
@@ -222,7 +222,7 @@ class BaseAPIHandler:
|
|
|
222
222
|
|
|
223
223
|
def _run_request(self,
|
|
224
224
|
request_args: dict,
|
|
225
|
-
session: Session = None):
|
|
225
|
+
session: Session | None = None):
|
|
226
226
|
if session is None:
|
|
227
227
|
with Session() as s:
|
|
228
228
|
return self._run_request(request_args, s)
|
|
@@ -281,24 +281,32 @@ class BaseAPIHandler:
|
|
|
281
281
|
@staticmethod
|
|
282
282
|
def convert_format(bytes_array: bytes,
|
|
283
283
|
mimetype: str,
|
|
284
|
-
file_path: str = None
|
|
284
|
+
file_path: str | None = None
|
|
285
285
|
) -> pydicom.dataset.Dataset | Image.Image | cv2.VideoCapture | bytes | nib_FileBasedImage:
|
|
286
|
+
""" Convert the bytes array to the appropriate format based on the mimetype."""
|
|
286
287
|
content_io = BytesIO(bytes_array)
|
|
287
|
-
if mimetype
|
|
288
|
+
if mimetype.endswith('/dicom'):
|
|
288
289
|
return pydicom.dcmread(content_io)
|
|
289
|
-
elif mimetype
|
|
290
|
+
elif mimetype.startswith('image/'):
|
|
290
291
|
return Image.open(content_io)
|
|
291
|
-
elif mimetype
|
|
292
|
+
elif mimetype.startswith('video/'):
|
|
292
293
|
if file_path is None:
|
|
293
|
-
raise NotImplementedError("file_path=None is not implemented yet for video
|
|
294
|
+
raise NotImplementedError("file_path=None is not implemented yet for video/* mimetypes.")
|
|
294
295
|
return cv2.VideoCapture(file_path)
|
|
295
296
|
elif mimetype == 'application/json':
|
|
296
297
|
return json.loads(bytes_array)
|
|
297
298
|
elif mimetype == 'application/octet-stream':
|
|
298
299
|
return bytes_array
|
|
299
|
-
elif mimetype
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
300
|
+
elif mimetype.endswith('nifti'):
|
|
301
|
+
try:
|
|
302
|
+
return nib.Nifti1Image.from_stream(content_io)
|
|
303
|
+
except Exception as e:
|
|
304
|
+
if file_path is not None:
|
|
305
|
+
return nib.load(file_path)
|
|
306
|
+
raise e
|
|
307
|
+
elif mimetype == 'application/gzip':
|
|
308
|
+
# let's hope it's a .nii.gz
|
|
309
|
+
with gzip.open(content_io, 'rb') as f:
|
|
310
|
+
return nib.Nifti1Image.from_stream(f)
|
|
303
311
|
|
|
304
312
|
raise ValueError(f"Unsupported mimetype: {mimetype}")
|
|
@@ -6,12 +6,13 @@ from requests.exceptions import HTTPError
|
|
|
6
6
|
import logging
|
|
7
7
|
import asyncio
|
|
8
8
|
import aiohttp
|
|
9
|
-
from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
|
|
9
|
+
from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report
|
|
10
10
|
from medimgkit import dicom_utils
|
|
11
|
+
from medimgkit.io_utils import is_io_object
|
|
12
|
+
from medimgkit.format_detection import guess_typez, guess_extension, DEFAULT_MIME_TYPE
|
|
11
13
|
import pydicom
|
|
12
14
|
from pathlib import Path
|
|
13
15
|
from datetime import date
|
|
14
|
-
import mimetypes
|
|
15
16
|
from PIL import Image
|
|
16
17
|
import cv2
|
|
17
18
|
from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
|
|
@@ -26,13 +27,6 @@ _LOGGER = logging.getLogger(__name__)
|
|
|
26
27
|
_USER_LOGGER = logging.getLogger('user_logger')
|
|
27
28
|
|
|
28
29
|
|
|
29
|
-
def _is_io_object(obj):
|
|
30
|
-
"""
|
|
31
|
-
Check if an object is a file-like object.
|
|
32
|
-
"""
|
|
33
|
-
return callable(getattr(obj, "read", None))
|
|
34
|
-
|
|
35
|
-
|
|
36
30
|
def _infinite_gen(x):
|
|
37
31
|
while True:
|
|
38
32
|
yield x
|
|
@@ -65,7 +59,7 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
65
59
|
publish: bool = False,
|
|
66
60
|
metadata_file: Optional[str | dict] = None,
|
|
67
61
|
) -> str:
|
|
68
|
-
if
|
|
62
|
+
if is_io_object(file_path):
|
|
69
63
|
name = file_path.name
|
|
70
64
|
else:
|
|
71
65
|
name = file_path
|
|
@@ -91,15 +85,14 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
91
85
|
name = new_file_path
|
|
92
86
|
_LOGGER.debug(f"New file path: {name}")
|
|
93
87
|
|
|
94
|
-
if mimetype is None:
|
|
95
|
-
mimetype = mimetypes.guess_type(name)[0]
|
|
96
88
|
is_a_dicom_file = None
|
|
97
89
|
if mimetype is None:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
90
|
+
mimetype_list, ext = guess_typez(file_path, use_magic=True)
|
|
91
|
+
mimetype = mimetype_list[-1]
|
|
92
|
+
if mimetype == 'application/gzip':
|
|
93
|
+
# Special case for gzipped NIfTI files
|
|
94
|
+
if ext == '.nii.gz' or name.lower().endswith('nii.gz'):
|
|
95
|
+
mimetype = 'image/x.nifti'
|
|
103
96
|
|
|
104
97
|
filename = os.path.basename(name)
|
|
105
98
|
_LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
|
|
@@ -364,35 +357,6 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
364
357
|
|
|
365
358
|
return result[0]
|
|
366
359
|
|
|
367
|
-
@staticmethod
|
|
368
|
-
def _is_dicom_report(file_path: str | IO) -> bool:
|
|
369
|
-
"""
|
|
370
|
-
Check if a DICOM file is a report (e.g., Structured Report).
|
|
371
|
-
|
|
372
|
-
Args:
|
|
373
|
-
file_path: Path to the DICOM file or file-like object.
|
|
374
|
-
|
|
375
|
-
Returns:
|
|
376
|
-
bool: True if the DICOM file is a report, False otherwise.
|
|
377
|
-
"""
|
|
378
|
-
try:
|
|
379
|
-
if not is_dicom(file_path):
|
|
380
|
-
return False
|
|
381
|
-
|
|
382
|
-
ds = pydicom.dcmread(file_path, stop_before_pixels=True)
|
|
383
|
-
if hasattr(file_path, 'seek'):
|
|
384
|
-
file_path.seek(0)
|
|
385
|
-
modality = getattr(ds, 'Modality', None)
|
|
386
|
-
|
|
387
|
-
# Common report modalities
|
|
388
|
-
# SR=Structured Report, DOC=Document, KO=Key Object, PR=Presentation State
|
|
389
|
-
report_modalities = {'SR', 'DOC', 'KO', 'PR', 'ESR'}
|
|
390
|
-
|
|
391
|
-
return modality in report_modalities
|
|
392
|
-
except Exception as e:
|
|
393
|
-
_LOGGER.warning(f"Error checking if DICOM is a report: {e}")
|
|
394
|
-
return False
|
|
395
|
-
|
|
396
360
|
def upload_resources(self,
|
|
397
361
|
files_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset,
|
|
398
362
|
mimetype: Optional[str] = None,
|
|
@@ -451,9 +415,9 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
451
415
|
|
|
452
416
|
files_path, is_multiple_resources = RootAPIHandler.__process_files_parameter(files_path)
|
|
453
417
|
|
|
454
|
-
|
|
418
|
+
# Discard DICOM reports
|
|
455
419
|
if discard_dicom_reports:
|
|
456
|
-
files_path = [f for f in files_path if not
|
|
420
|
+
files_path = [f for f in files_path if not is_dicom_report(f)]
|
|
457
421
|
old_size = len(files_path)
|
|
458
422
|
if old_size is not None and old_size != len(files_path):
|
|
459
423
|
_LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
|
|
@@ -678,7 +642,7 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
678
642
|
is_list = False
|
|
679
643
|
new_file_path = [file_path]
|
|
680
644
|
# Check if is an IO object
|
|
681
|
-
elif
|
|
645
|
+
elif is_io_object(file_path):
|
|
682
646
|
is_list = False
|
|
683
647
|
new_file_path = [file_path]
|
|
684
648
|
elif not hasattr(file_path, '__len__'):
|
|
@@ -728,8 +692,8 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
728
692
|
|
|
729
693
|
def get_resources(self,
|
|
730
694
|
status: Optional[ResourceStatus] = None,
|
|
731
|
-
from_date:
|
|
732
|
-
to_date:
|
|
695
|
+
from_date: date | str | None = None,
|
|
696
|
+
to_date: date | str | None = None,
|
|
733
697
|
tags: Optional[Sequence[str]] = None,
|
|
734
698
|
modality: Optional[str] = None,
|
|
735
699
|
mimetype: Optional[str] = None,
|
|
@@ -747,8 +711,8 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
747
711
|
|
|
748
712
|
Args:
|
|
749
713
|
status (ResourceStatus): The resource status. Possible values: 'inbox', 'published', 'archived' or None. If None, it will return all resources.
|
|
750
|
-
from_date (
|
|
751
|
-
to_date (
|
|
714
|
+
from_date (date | str | None): The start date.
|
|
715
|
+
to_date (date | str | None): The end date.
|
|
752
716
|
tags (Optional[list[str]]): The tags to filter the resources.
|
|
753
717
|
modality (Optional[str]): The modality of the resources.
|
|
754
718
|
mimetype (Optional[str]): The mimetype of the resources.
|
|
@@ -767,9 +731,15 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
767
731
|
"""
|
|
768
732
|
# Convert datetime objects to ISO format
|
|
769
733
|
if from_date:
|
|
770
|
-
from_date
|
|
734
|
+
if isinstance(from_date, str):
|
|
735
|
+
date.fromisoformat(from_date)
|
|
736
|
+
else:
|
|
737
|
+
from_date = from_date.isoformat()
|
|
771
738
|
if to_date:
|
|
772
|
-
to_date
|
|
739
|
+
if isinstance(to_date, str):
|
|
740
|
+
date.fromisoformat(to_date)
|
|
741
|
+
else:
|
|
742
|
+
to_date = to_date.isoformat()
|
|
773
743
|
|
|
774
744
|
# Prepare the payload
|
|
775
745
|
payload = {
|
|
@@ -869,9 +839,10 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
869
839
|
|
|
870
840
|
async def _async_download_file(self,
|
|
871
841
|
resource_id: str,
|
|
872
|
-
save_path: str,
|
|
842
|
+
save_path: str | Path,
|
|
873
843
|
session: aiohttp.ClientSession | None = None,
|
|
874
|
-
progress_bar: tqdm | None = None
|
|
844
|
+
progress_bar: tqdm | None = None,
|
|
845
|
+
add_extension: bool = False) -> str:
|
|
875
846
|
"""
|
|
876
847
|
Asynchronously download a file from the server.
|
|
877
848
|
|
|
@@ -880,6 +851,10 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
880
851
|
save_path (str): The path to save the file.
|
|
881
852
|
session (aiohttp.ClientSession): The aiohttp session to use for the request.
|
|
882
853
|
progress_bar (tqdm | None): Optional progress bar to update after download completion.
|
|
854
|
+
add_extension (bool): Whether to add the appropriate file extension based on content type.
|
|
855
|
+
|
|
856
|
+
Returns:
|
|
857
|
+
str: The actual path where the file was saved (important when add_extension=True).
|
|
883
858
|
"""
|
|
884
859
|
url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
|
|
885
860
|
request_params = {
|
|
@@ -887,41 +862,90 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
887
862
|
'headers': {'accept': 'application/octet-stream'},
|
|
888
863
|
'url': url
|
|
889
864
|
}
|
|
865
|
+
save_path = str(save_path) # Ensure save_path is a string for file operations
|
|
890
866
|
try:
|
|
891
867
|
data_bytes = await self._run_request_async(request_params, session, 'content')
|
|
892
|
-
|
|
893
|
-
|
|
868
|
+
|
|
869
|
+
final_save_path = save_path
|
|
870
|
+
if add_extension:
|
|
871
|
+
# Save to temporary file first to determine mimetype from content
|
|
872
|
+
temp_path = f"{save_path}.tmp"
|
|
873
|
+
with open(temp_path, 'wb') as f:
|
|
874
|
+
f.write(data_bytes)
|
|
875
|
+
|
|
876
|
+
# Determine mimetype from file content
|
|
877
|
+
mimetype_list, ext = guess_typez(temp_path, use_magic=True)
|
|
878
|
+
mimetype = mimetype_list[-1]
|
|
879
|
+
|
|
880
|
+
# get mimetype from resource info if not detected
|
|
881
|
+
if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
|
|
882
|
+
resource_info = self.get_resources_by_ids(resource_id)
|
|
883
|
+
mimetype = resource_info.get('mimetype', mimetype)
|
|
884
|
+
|
|
885
|
+
# Generate final path with extension if needed
|
|
886
|
+
if mimetype is not None and mimetype != DEFAULT_MIME_TYPE:
|
|
887
|
+
if ext is None:
|
|
888
|
+
ext = guess_extension(mimetype)
|
|
889
|
+
if ext is not None and not save_path.endswith(ext):
|
|
890
|
+
final_save_path = save_path + ext
|
|
891
|
+
|
|
892
|
+
# Move file to final location
|
|
893
|
+
os.rename(temp_path, final_save_path)
|
|
894
|
+
else:
|
|
895
|
+
# Standard save without extension detection
|
|
896
|
+
with open(final_save_path, 'wb') as f:
|
|
897
|
+
f.write(data_bytes)
|
|
898
|
+
|
|
894
899
|
if progress_bar:
|
|
895
900
|
progress_bar.update(1)
|
|
901
|
+
|
|
902
|
+
return final_save_path
|
|
903
|
+
|
|
896
904
|
except ResourceNotFoundError as e:
|
|
897
905
|
e.set_params('resource', {'resource_id': resource_id})
|
|
898
906
|
raise e
|
|
899
907
|
|
|
900
908
|
def download_multiple_resources(self,
|
|
901
909
|
resource_ids: list[str],
|
|
902
|
-
save_path: list[str] | str
|
|
903
|
-
|
|
910
|
+
save_path: list[str] | str,
|
|
911
|
+
add_extension: bool = False,
|
|
912
|
+
) -> list[str]:
|
|
904
913
|
"""
|
|
905
914
|
Download multiple resources and save them to the specified paths.
|
|
906
915
|
|
|
907
916
|
Args:
|
|
908
917
|
resource_ids (list[str]): A list of resource unique ids.
|
|
909
918
|
save_path (list[str] | str): A list of paths to save the files or a directory path.
|
|
919
|
+
add_extension (bool): Whether to add the appropriate file extension to the save_path based on the content type.
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
list[str]: A list of paths where the files were saved. Important if `add_extension=True`.
|
|
910
923
|
"""
|
|
924
|
+
if isinstance(resource_ids, str):
|
|
925
|
+
raise ValueError("resource_ids must be a list of strings.")
|
|
926
|
+
|
|
911
927
|
async def _download_all_async():
|
|
912
928
|
async with aiohttp.ClientSession() as session:
|
|
913
929
|
tasks = [
|
|
914
|
-
self._async_download_file(
|
|
930
|
+
self._async_download_file(
|
|
931
|
+
resource_id=resource_id,
|
|
932
|
+
save_path=path,
|
|
933
|
+
session=session,
|
|
934
|
+
progress_bar=progress_bar,
|
|
935
|
+
add_extension=add_extension
|
|
936
|
+
)
|
|
915
937
|
for resource_id, path in zip(resource_ids, save_path)
|
|
916
938
|
]
|
|
917
|
-
await asyncio.gather(*tasks)
|
|
939
|
+
return await asyncio.gather(*tasks)
|
|
918
940
|
|
|
919
941
|
if isinstance(save_path, str):
|
|
920
942
|
save_path = [os.path.join(save_path, r) for r in resource_ids]
|
|
921
943
|
|
|
922
944
|
with tqdm(total=len(resource_ids), desc="Downloading resources", unit="file") as progress_bar:
|
|
923
945
|
loop = asyncio.get_event_loop()
|
|
924
|
-
loop.run_until_complete(_download_all_async())
|
|
946
|
+
final_save_paths = loop.run_until_complete(_download_all_async())
|
|
947
|
+
|
|
948
|
+
return final_save_paths
|
|
925
949
|
|
|
926
950
|
def download_resource_file(self,
|
|
927
951
|
resource_id: str,
|
|
@@ -953,6 +977,9 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
953
977
|
>>> api_handler.download_resource_file('resource_id', save_path='path/to/dicomfile.dcm')
|
|
954
978
|
saves the file in the specified path.
|
|
955
979
|
"""
|
|
980
|
+
if save_path is None and add_extension:
|
|
981
|
+
raise ValueError("If add_extension is True, save_path must be provided.")
|
|
982
|
+
|
|
956
983
|
url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
|
|
957
984
|
request_params = {'method': 'GET',
|
|
958
985
|
'headers': {'accept': 'application/octet-stream'},
|
|
@@ -962,9 +989,14 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
962
989
|
|
|
963
990
|
# Get mimetype if needed for auto_convert or add_extension
|
|
964
991
|
mimetype = None
|
|
992
|
+
mimetype_list = []
|
|
993
|
+
ext = None
|
|
965
994
|
if auto_convert or add_extension:
|
|
966
|
-
|
|
967
|
-
mimetype =
|
|
995
|
+
mimetype_list, ext = guess_typez(response.content)
|
|
996
|
+
mimetype = mimetype_list[-1]
|
|
997
|
+
if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
|
|
998
|
+
resource_info = self.get_resources_by_ids(resource_id)
|
|
999
|
+
mimetype = resource_info.get('mimetype', None)
|
|
968
1000
|
|
|
969
1001
|
if auto_convert:
|
|
970
1002
|
try:
|
|
@@ -985,15 +1017,16 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
985
1017
|
raise e
|
|
986
1018
|
|
|
987
1019
|
if save_path is not None:
|
|
988
|
-
if add_extension:
|
|
989
|
-
ext
|
|
1020
|
+
if add_extension and mimetype is not None:
|
|
1021
|
+
if ext is None:
|
|
1022
|
+
ext = guess_extension(mimetype)
|
|
990
1023
|
if ext is not None and not save_path.endswith(ext):
|
|
991
1024
|
save_path += ext
|
|
992
1025
|
with open(save_path, 'wb') as f:
|
|
993
1026
|
f.write(response.content)
|
|
994
1027
|
|
|
995
|
-
|
|
996
|
-
|
|
1028
|
+
if add_extension:
|
|
1029
|
+
return resource_file, save_path
|
|
997
1030
|
return resource_file
|
|
998
1031
|
|
|
999
1032
|
def download_resource_frame(self,
|
|
@@ -15,10 +15,10 @@ import torch
|
|
|
15
15
|
from torch import Tensor
|
|
16
16
|
from datamint.apihandler.base_api_handler import DatamintException
|
|
17
17
|
from medimgkit.dicom_utils import is_dicom
|
|
18
|
-
from medimgkit.
|
|
18
|
+
from medimgkit.readers import read_array_normalized
|
|
19
|
+
from medimgkit.format_detection import guess_extension
|
|
19
20
|
from datetime import datetime
|
|
20
21
|
from pathlib import Path
|
|
21
|
-
from mimetypes import guess_extension
|
|
22
22
|
from datamint.dataset.annotation import Annotation
|
|
23
23
|
import cv2
|
|
24
24
|
|
|
@@ -283,10 +283,12 @@ class DatamintBaseDataset:
|
|
|
283
283
|
"""Post-process data after loading metadata."""
|
|
284
284
|
self._check_integrity()
|
|
285
285
|
self._calculate_dataset_length()
|
|
286
|
-
self.
|
|
286
|
+
if self.return_frame_by_frame:
|
|
287
|
+
self._precompute_frame_data()
|
|
288
|
+
self.subset_indices = list(range(self.dataset_length))
|
|
287
289
|
self._setup_labels()
|
|
288
290
|
|
|
289
|
-
if self.discard_without_annotations
|
|
291
|
+
if self.discard_without_annotations:
|
|
290
292
|
self._filter_unannotated()
|
|
291
293
|
|
|
292
294
|
def _calculate_dataset_length(self) -> None:
|
|
@@ -301,9 +303,8 @@ class DatamintBaseDataset:
|
|
|
301
303
|
|
|
302
304
|
def _precompute_frame_data(self) -> None:
|
|
303
305
|
"""Precompute frame-related data for efficient indexing."""
|
|
304
|
-
|
|
305
|
-
self._cumulative_frames = np.cumsum([0] +
|
|
306
|
-
self.subset_indices = list(range(self.dataset_length))
|
|
306
|
+
num_frames_per_resource = self.__compute_num_frames_per_resource()
|
|
307
|
+
self._cumulative_frames = np.cumsum([0] + num_frames_per_resource)
|
|
307
308
|
|
|
308
309
|
def _setup_labels(self) -> None:
|
|
309
310
|
"""Setup label sets and mappings."""
|
|
@@ -897,8 +898,11 @@ class DatamintBaseDataset:
|
|
|
897
898
|
new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
|
|
898
899
|
new_resources_ids = [r['id'] for r in new_resources]
|
|
899
900
|
_LOGGER.info(f"Downloading {len(new_resources)} new resources...")
|
|
900
|
-
self.api_handler.download_multiple_resources(new_resources_ids,
|
|
901
|
-
|
|
901
|
+
new_res_paths = self.api_handler.download_multiple_resources(new_resources_ids,
|
|
902
|
+
save_path=new_resources_path,
|
|
903
|
+
add_extension=True)
|
|
904
|
+
for new_rpath, r in zip(new_res_paths, new_resources):
|
|
905
|
+
r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
|
|
902
906
|
_LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
|
|
903
907
|
|
|
904
908
|
for r in deleted_resources:
|
|
@@ -988,11 +992,13 @@ class DatamintBaseDataset:
|
|
|
988
992
|
if 'file' in resource and resource['file'] is not None:
|
|
989
993
|
return Path(resource['file'])
|
|
990
994
|
else:
|
|
991
|
-
ext = guess_extension(resource['mimetype']
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
995
|
+
# ext = guess_extension(resource['mimetype'])
|
|
996
|
+
# _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
|
|
997
|
+
# if ext is None:
|
|
998
|
+
# _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
|
|
999
|
+
# ext = ''
|
|
1000
|
+
# return Path('images', f"{resource['id']}{ext}")
|
|
1001
|
+
return Path('images', resource['id'])
|
|
996
1002
|
|
|
997
1003
|
def _get_annotation_file_path(self, annotation: dict | Annotation) -> Path | None:
|
|
998
1004
|
"""Get the local file path for an annotation."""
|
|
@@ -478,7 +478,7 @@ class DatamintDataset(DatamintBaseDataset):
|
|
|
478
478
|
|
|
479
479
|
def _convert_labels_annotations(self,
|
|
480
480
|
annotations: list[Annotation],
|
|
481
|
-
num_frames: int = None) -> dict[str, torch.Tensor]:
|
|
481
|
+
num_frames: int | None = None) -> dict[str, torch.Tensor]:
|
|
482
482
|
"""
|
|
483
483
|
Converts the annotations, of the same type and scope, to tensor of shape (num_frames, num_labels)
|
|
484
484
|
for each annotator.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "datamint"
|
|
3
3
|
description = "A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows."
|
|
4
|
-
version = "1.7.
|
|
4
|
+
version = "1.7.5"
|
|
5
5
|
dynamic = ["dependencies"]
|
|
6
6
|
requires-python = ">=3.10"
|
|
7
7
|
readme = "README.md"
|
|
@@ -40,7 +40,7 @@ matplotlib = "*"
|
|
|
40
40
|
lightning = "*"
|
|
41
41
|
albumentations = ">=2.0.0"
|
|
42
42
|
lazy-loader = ">=0.3.0"
|
|
43
|
-
medimgkit = "
|
|
43
|
+
medimgkit = ">=0.2.1"
|
|
44
44
|
# For compatibility with the datamintapi package
|
|
45
45
|
datamintapi = "0.0.*"
|
|
46
46
|
# Extra dependencies for docs
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|