datamint 1.7.4__py3-none-any.whl → 1.7.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

@@ -14,7 +14,7 @@ import cv2
14
14
  import nibabel as nib
15
15
  from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
16
16
  from datamint import configs
17
- from functools import wraps
17
+ import gzip
18
18
 
19
19
  _LOGGER = logging.getLogger(__name__)
20
20
 
@@ -298,8 +298,15 @@ class BaseAPIHandler:
298
298
  elif mimetype == 'application/octet-stream':
299
299
  return bytes_array
300
300
  elif mimetype.endswith('nifti'):
301
- if file_path is None:
302
- raise NotImplementedError(f"file_path=None is not implemented yet for {mimetype}.")
303
- return nib.load(file_path)
301
+ try:
302
+ return nib.Nifti1Image.from_stream(content_io)
303
+ except Exception as e:
304
+ if file_path is not None:
305
+ return nib.load(file_path)
306
+ raise e
307
+ elif mimetype == 'application/gzip':
308
+ # let's hope it's a .nii.gz
309
+ with gzip.open(content_io, 'rb') as f:
310
+ return nib.Nifti1Image.from_stream(f)
304
311
 
305
312
  raise ValueError(f"Unsupported mimetype: {mimetype}")
@@ -6,10 +6,10 @@ from requests.exceptions import HTTPError
6
6
  import logging
7
7
  import asyncio
8
8
  import aiohttp
9
- from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
9
+ from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report
10
10
  from medimgkit import dicom_utils
11
11
  from medimgkit.io_utils import is_io_object
12
- from medimgkit.format_detection import guess_type, guess_extension
12
+ from medimgkit.format_detection import guess_typez, guess_extension, DEFAULT_MIME_TYPE
13
13
  import pydicom
14
14
  from pathlib import Path
15
15
  from datetime import date
@@ -87,10 +87,12 @@ class RootAPIHandler(BaseAPIHandler):
87
87
 
88
88
  is_a_dicom_file = None
89
89
  if mimetype is None:
90
- mimetype, _ = guess_type(file_path, use_magic=True)
91
- if mimetype == 'application/gzip' and name.lower().endswith('nii.gz'):
90
+ mimetype_list, ext = guess_typez(file_path, use_magic=True)
91
+ mimetype = mimetype_list[-1]
92
+ if mimetype == 'application/gzip':
92
93
  # Special case for gzipped NIfTI files
93
- mimetype = 'image/x.nifti'
94
+ if ext == '.nii.gz' or name.lower().endswith('nii.gz'):
95
+ mimetype = 'image/x.nifti'
94
96
 
95
97
  filename = os.path.basename(name)
96
98
  _LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
@@ -355,35 +357,6 @@ class RootAPIHandler(BaseAPIHandler):
355
357
 
356
358
  return result[0]
357
359
 
358
- @staticmethod
359
- def _is_dicom_report(file_path: str | IO) -> bool:
360
- """
361
- Check if a DICOM file is a report (e.g., Structured Report).
362
-
363
- Args:
364
- file_path: Path to the DICOM file or file-like object.
365
-
366
- Returns:
367
- bool: True if the DICOM file is a report, False otherwise.
368
- """
369
- try:
370
- if not is_dicom(file_path):
371
- return False
372
-
373
- ds = pydicom.dcmread(file_path, stop_before_pixels=True)
374
- if hasattr(file_path, 'seek'):
375
- file_path.seek(0)
376
- modality = getattr(ds, 'Modality', None)
377
-
378
- # Common report modalities
379
- # SR=Structured Report, DOC=Document, KO=Key Object, PR=Presentation State
380
- report_modalities = {'SR', 'DOC', 'KO', 'PR', 'ESR'}
381
-
382
- return modality in report_modalities
383
- except Exception as e:
384
- _LOGGER.warning(f"Error checking if DICOM is a report: {e}")
385
- return False
386
-
387
360
  def upload_resources(self,
388
361
  files_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset,
389
362
  mimetype: Optional[str] = None,
@@ -444,7 +417,7 @@ class RootAPIHandler(BaseAPIHandler):
444
417
 
445
418
  # Discard DICOM reports
446
419
  if discard_dicom_reports:
447
- files_path = [f for f in files_path if not RootAPIHandler._is_dicom_report(f)]
420
+ files_path = [f for f in files_path if not is_dicom_report(f)]
448
421
  old_size = len(files_path)
449
422
  if old_size is not None and old_size != len(files_path):
450
423
  _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
@@ -866,9 +839,10 @@ class RootAPIHandler(BaseAPIHandler):
866
839
 
867
840
  async def _async_download_file(self,
868
841
  resource_id: str,
869
- save_path: str,
842
+ save_path: str | Path,
870
843
  session: aiohttp.ClientSession | None = None,
871
- progress_bar: tqdm | None = None):
844
+ progress_bar: tqdm | None = None,
845
+ add_extension: bool = False) -> str:
872
846
  """
873
847
  Asynchronously download a file from the server.
874
848
 
@@ -877,6 +851,10 @@ class RootAPIHandler(BaseAPIHandler):
877
851
  save_path (str): The path to save the file.
878
852
  session (aiohttp.ClientSession): The aiohttp session to use for the request.
879
853
  progress_bar (tqdm | None): Optional progress bar to update after download completion.
854
+ add_extension (bool): Whether to add the appropriate file extension based on content type.
855
+
856
+ Returns:
857
+ str: The actual path where the file was saved (important when add_extension=True).
880
858
  """
881
859
  url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
882
860
  request_params = {
@@ -884,41 +862,90 @@ class RootAPIHandler(BaseAPIHandler):
884
862
  'headers': {'accept': 'application/octet-stream'},
885
863
  'url': url
886
864
  }
865
+ save_path = str(save_path) # Ensure save_path is a string for file operations
887
866
  try:
888
867
  data_bytes = await self._run_request_async(request_params, session, 'content')
889
- with open(save_path, 'wb') as f:
890
- f.write(data_bytes)
868
+
869
+ final_save_path = save_path
870
+ if add_extension:
871
+ # Save to temporary file first to determine mimetype from content
872
+ temp_path = f"{save_path}.tmp"
873
+ with open(temp_path, 'wb') as f:
874
+ f.write(data_bytes)
875
+
876
+ # Determine mimetype from file content
877
+ mimetype_list, ext = guess_typez(temp_path, use_magic=True)
878
+ mimetype = mimetype_list[-1]
879
+
880
+ # get mimetype from resource info if not detected
881
+ if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
882
+ resource_info = self.get_resources_by_ids(resource_id)
883
+ mimetype = resource_info.get('mimetype', mimetype)
884
+
885
+ # Generate final path with extension if needed
886
+ if mimetype is not None and mimetype != DEFAULT_MIME_TYPE:
887
+ if ext is None:
888
+ ext = guess_extension(mimetype)
889
+ if ext is not None and not save_path.endswith(ext):
890
+ final_save_path = save_path + ext
891
+
892
+ # Move file to final location
893
+ os.rename(temp_path, final_save_path)
894
+ else:
895
+ # Standard save without extension detection
896
+ with open(final_save_path, 'wb') as f:
897
+ f.write(data_bytes)
898
+
891
899
  if progress_bar:
892
900
  progress_bar.update(1)
901
+
902
+ return final_save_path
903
+
893
904
  except ResourceNotFoundError as e:
894
905
  e.set_params('resource', {'resource_id': resource_id})
895
906
  raise e
896
907
 
897
908
  def download_multiple_resources(self,
898
909
  resource_ids: list[str],
899
- save_path: list[str] | str
900
- ) -> None:
910
+ save_path: list[str] | str,
911
+ add_extension: bool = False,
912
+ ) -> list[str]:
901
913
  """
902
914
  Download multiple resources and save them to the specified paths.
903
915
 
904
916
  Args:
905
917
  resource_ids (list[str]): A list of resource unique ids.
906
918
  save_path (list[str] | str): A list of paths to save the files or a directory path.
919
+ add_extension (bool): Whether to add the appropriate file extension to the save_path based on the content type.
920
+
921
+ Returns:
922
+ list[str]: A list of paths where the files were saved. Important if `add_extension=True`.
907
923
  """
924
+ if isinstance(resource_ids, str):
925
+ raise ValueError("resource_ids must be a list of strings.")
926
+
908
927
  async def _download_all_async():
909
928
  async with aiohttp.ClientSession() as session:
910
929
  tasks = [
911
- self._async_download_file(resource_id, save_path=path, session=session, progress_bar=progress_bar)
930
+ self._async_download_file(
931
+ resource_id=resource_id,
932
+ save_path=path,
933
+ session=session,
934
+ progress_bar=progress_bar,
935
+ add_extension=add_extension
936
+ )
912
937
  for resource_id, path in zip(resource_ids, save_path)
913
938
  ]
914
- await asyncio.gather(*tasks)
939
+ return await asyncio.gather(*tasks)
915
940
 
916
941
  if isinstance(save_path, str):
917
942
  save_path = [os.path.join(save_path, r) for r in resource_ids]
918
943
 
919
944
  with tqdm(total=len(resource_ids), desc="Downloading resources", unit="file") as progress_bar:
920
945
  loop = asyncio.get_event_loop()
921
- loop.run_until_complete(_download_all_async())
946
+ final_save_paths = loop.run_until_complete(_download_all_async())
947
+
948
+ return final_save_paths
922
949
 
923
950
  def download_resource_file(self,
924
951
  resource_id: str,
@@ -962,10 +989,14 @@ class RootAPIHandler(BaseAPIHandler):
962
989
 
963
990
  # Get mimetype if needed for auto_convert or add_extension
964
991
  mimetype = None
992
+ mimetype_list = []
993
+ ext = None
965
994
  if auto_convert or add_extension:
966
- resource_info = self.get_resources_by_ids(resource_id)
967
- mimetype = resource_info.get('mimetype', guess_type(response.content)[0])
968
-
995
+ mimetype_list, ext = guess_typez(response.content)
996
+ mimetype = mimetype_list[-1]
997
+ if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
998
+ resource_info = self.get_resources_by_ids(resource_id)
999
+ mimetype = resource_info.get('mimetype', None)
969
1000
 
970
1001
  if auto_convert:
971
1002
  try:
@@ -987,7 +1018,8 @@ class RootAPIHandler(BaseAPIHandler):
987
1018
 
988
1019
  if save_path is not None:
989
1020
  if add_extension and mimetype is not None:
990
- ext = guess_extension(mimetype)
1021
+ if ext is None:
1022
+ ext = guess_extension(mimetype)
991
1023
  if ext is not None and not save_path.endswith(ext):
992
1024
  save_path += ext
993
1025
  with open(save_path, 'wb') as f:
@@ -15,10 +15,10 @@ import torch
15
15
  from torch import Tensor
16
16
  from datamint.apihandler.base_api_handler import DatamintException
17
17
  from medimgkit.dicom_utils import is_dicom
18
- from medimgkit.io_utils import read_array_normalized
18
+ from medimgkit.readers import read_array_normalized
19
+ from medimgkit.format_detection import guess_extension
19
20
  from datetime import datetime
20
21
  from pathlib import Path
21
- from mimetypes import guess_extension
22
22
  from datamint.dataset.annotation import Annotation
23
23
  import cv2
24
24
 
@@ -898,8 +898,11 @@ class DatamintBaseDataset:
898
898
  new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
899
899
  new_resources_ids = [r['id'] for r in new_resources]
900
900
  _LOGGER.info(f"Downloading {len(new_resources)} new resources...")
901
- self.api_handler.download_multiple_resources(new_resources_ids,
902
- save_path=new_resources_path)
901
+ new_res_paths = self.api_handler.download_multiple_resources(new_resources_ids,
902
+ save_path=new_resources_path,
903
+ add_extension=True)
904
+ for new_rpath, r in zip(new_res_paths, new_resources):
905
+ r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
903
906
  _LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
904
907
 
905
908
  for r in deleted_resources:
@@ -989,13 +992,13 @@ class DatamintBaseDataset:
989
992
  if 'file' in resource and resource['file'] is not None:
990
993
  return Path(resource['file'])
991
994
  else:
992
- ext = guess_extension(resource['mimetype'], strict=False)
993
- _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
994
- if ext is None:
995
- _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
996
- ext = ''
997
- raise Exception
998
- return Path('images', f"{resource['id']}{ext}")
995
+ # ext = guess_extension(resource['mimetype'])
996
+ # _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
997
+ # if ext is None:
998
+ # _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
999
+ # ext = ''
1000
+ # return Path('images', f"{resource['id']}{ext}")
1001
+ return Path('images', resource['id'])
999
1002
 
1000
1003
  def _get_annotation_file_path(self, annotation: dict | Annotation) -> Path | None:
1001
1004
  """Get the local file path for an annotation."""
@@ -478,7 +478,7 @@ class DatamintDataset(DatamintBaseDataset):
478
478
 
479
479
  def _convert_labels_annotations(self,
480
480
  annotations: list[Annotation],
481
- num_frames: int = None) -> dict[str, torch.Tensor]:
481
+ num_frames: int | None = None) -> dict[str, torch.Tensor]:
482
482
  """
483
483
  Converts the annotations, of the same type and scope, to tensor of shape (num_frames, num_labels)
484
484
  for each annotator.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamint
3
- Version: 1.7.4
3
+ Version: 1.7.5
4
4
  Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
5
5
  Requires-Python: >=3.10
6
6
  Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
19
19
  Requires-Dist: lazy-loader (>=0.3.0)
20
20
  Requires-Dist: lightning
21
21
  Requires-Dist: matplotlib
22
- Requires-Dist: medimgkit (>=0.2.0)
22
+ Requires-Dist: medimgkit (>=0.2.1)
23
23
  Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
24
24
  Requires-Dist: nibabel (>=4.0.0)
25
25
  Requires-Dist: numpy
@@ -1,18 +1,18 @@
1
1
  datamint/__init__.py,sha256=7rKCCsaa4RBRTIfuHB708rai1xwDHLtkFNFJGKYG5D4,757
2
2
  datamint/apihandler/annotation_api_handler.py,sha256=HnWiG2ebq08mdaazTXVbkuwvh6fmKIKt8uqAOf3Y1jU,57013
3
3
  datamint/apihandler/api_handler.py,sha256=cdVSddrFCKlF_BJ81LO1aJ0OP49rssjpNEFzJ6Q7YyY,384
4
- datamint/apihandler/base_api_handler.py,sha256=t2no7gTIdPFfR_TXlZmh3rsncaL9p1G8eIS2m9Q7ALE,11978
4
+ datamint/apihandler/base_api_handler.py,sha256=HSMi8vN-xU0LLUEh3fgcuO7mXpMsX0wrA5nO9FVvrA4,12207
5
5
  datamint/apihandler/dto/annotation_dto.py,sha256=qId1RK1VO7dXrvGJ7dqJ31jBQB7Z8yy5x0tLSiMxTB4,7105
6
6
  datamint/apihandler/exp_api_handler.py,sha256=hFUgUgBc5rL7odK7gTW3MnrvMY1pVfJUpUdzRNobMQE,6226
7
- datamint/apihandler/root_api_handler.py,sha256=IHpr2Bki1WPTKzM133U68c7p_50Fq5t-JEkqGFlSl8k,57152
7
+ datamint/apihandler/root_api_handler.py,sha256=8VanqWbcLLkwQ0y2Z_a5Mqr5knplRCUSi5iRVQeHFzU,58901
8
8
  datamint/client_cmd_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  datamint/client_cmd_tools/datamint_config.py,sha256=md7dnWrbl10lPtXKbmD9yo6onLJsajeG8Vz0ZWH1v4M,8181
10
10
  datamint/client_cmd_tools/datamint_upload.py,sha256=890USkrtaH23mNjTRYVHWce2q9jSmkUNHIP_e8fnJRM,29502
11
11
  datamint/configs.py,sha256=Bdp6NydYwyCJ2dk19_gf_o3M2ZyQOmMHpLi8wEWNHUk,1426
12
12
  datamint/dataset/__init__.py,sha256=4PlUKSvVhdfQvvuq8jQXrkdqnot-iTTizM3aM1vgSwg,47
13
13
  datamint/dataset/annotation.py,sha256=qN1IMjdfLD2ceQ6va3l76jOXA8Vb_c-eBk1oWQu6hW0,7994
14
- datamint/dataset/base_dataset.py,sha256=vYM1Q7uVdtF6DfkbJjTjdleF-kG612rSBa2c2ynohX4,49393
15
- datamint/dataset/dataset.py,sha256=fdcgxB9NKvPEdr9S6TOeAIqFW38PdhmCiYsit6u5Wxc,27314
14
+ datamint/dataset/base_dataset.py,sha256=S0pboog2yB2LCBGOocBIlOU8to7Wgov3gXTOJ9gbvz0,49697
15
+ datamint/dataset/dataset.py,sha256=8e0MFgINgbw6_UJh7pNQIREp2XxstIVCupyduW05Nfw,27321
16
16
  datamint/examples/__init__.py,sha256=zcYnd5nLVme9GCTPYH-1JpGo8xXK2WEYvhzcy_2alZc,39
17
17
  datamint/examples/example_projects.py,sha256=7Nb_EaIdzJTQa9zopqc-WhTBQWQJSoQZ_KjRS4PB4FI,2931
18
18
  datamint/experiment/__init__.py,sha256=5qQOMzoG17DEd1YnTF-vS0qiM-DGdbNh42EUo91CRhQ,34
@@ -22,7 +22,7 @@ datamint/logging.yaml,sha256=a5dsATpul7QHeUHB2TjABFjWaPXBMbO--dgn8GlRqwk,483
22
22
  datamint/utils/logging_utils.py,sha256=DvoA35ATYG3JTwfXEXYawDyKRfHeCrH0a9czfkmz8kM,1851
23
23
  datamint/utils/torchmetrics.py,sha256=lwU0nOtsSWfebyp7dvjlAggaqXtj5ohSEUXOg3L0hJE,2837
24
24
  datamint/utils/visualization.py,sha256=yaUVAOHar59VrGUjpAWv5eVvQSfztFG0eP9p5Vt3l-M,4470
25
- datamint-1.7.4.dist-info/METADATA,sha256=Fej5PDwzs3NQPuPf9ijJt0x57sY5f4Y9JGx_IEAwkX4,4100
26
- datamint-1.7.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
27
- datamint-1.7.4.dist-info/entry_points.txt,sha256=mn5H6jPjO-rY0W0CAZ6Z_KKWhMLvyVaSpoqk77jlTI4,145
28
- datamint-1.7.4.dist-info/RECORD,,
25
+ datamint-1.7.5.dist-info/METADATA,sha256=vWoLCzDqK33Lzb0DRTLLRAYpYJRX6xmv0p30Lo46Hwg,4100
26
+ datamint-1.7.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
27
+ datamint-1.7.5.dist-info/entry_points.txt,sha256=mn5H6jPjO-rY0W0CAZ6Z_KKWhMLvyVaSpoqk77jlTI4,145
28
+ datamint-1.7.5.dist-info/RECORD,,