datamint 1.7.4__tar.gz → 1.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

Files changed (27) hide show
  1. {datamint-1.7.4 → datamint-1.7.6}/PKG-INFO +2 -2
  2. {datamint-1.7.4 → datamint-1.7.6}/datamint/apihandler/annotation_api_handler.py +2 -1
  3. {datamint-1.7.4 → datamint-1.7.6}/datamint/apihandler/base_api_handler.py +11 -4
  4. {datamint-1.7.4 → datamint-1.7.6}/datamint/apihandler/root_api_handler.py +88 -50
  5. {datamint-1.7.4 → datamint-1.7.6}/datamint/dataset/base_dataset.py +14 -11
  6. {datamint-1.7.4 → datamint-1.7.6}/datamint/dataset/dataset.py +1 -1
  7. {datamint-1.7.4 → datamint-1.7.6}/pyproject.toml +2 -2
  8. {datamint-1.7.4 → datamint-1.7.6}/README.md +0 -0
  9. {datamint-1.7.4 → datamint-1.7.6}/datamint/__init__.py +0 -0
  10. {datamint-1.7.4 → datamint-1.7.6}/datamint/apihandler/api_handler.py +0 -0
  11. {datamint-1.7.4 → datamint-1.7.6}/datamint/apihandler/dto/annotation_dto.py +0 -0
  12. {datamint-1.7.4 → datamint-1.7.6}/datamint/apihandler/exp_api_handler.py +0 -0
  13. {datamint-1.7.4 → datamint-1.7.6}/datamint/client_cmd_tools/__init__.py +0 -0
  14. {datamint-1.7.4 → datamint-1.7.6}/datamint/client_cmd_tools/datamint_config.py +0 -0
  15. {datamint-1.7.4 → datamint-1.7.6}/datamint/client_cmd_tools/datamint_upload.py +0 -0
  16. {datamint-1.7.4 → datamint-1.7.6}/datamint/configs.py +0 -0
  17. {datamint-1.7.4 → datamint-1.7.6}/datamint/dataset/__init__.py +0 -0
  18. {datamint-1.7.4 → datamint-1.7.6}/datamint/dataset/annotation.py +0 -0
  19. {datamint-1.7.4 → datamint-1.7.6}/datamint/examples/__init__.py +0 -0
  20. {datamint-1.7.4 → datamint-1.7.6}/datamint/examples/example_projects.py +0 -0
  21. {datamint-1.7.4 → datamint-1.7.6}/datamint/experiment/__init__.py +0 -0
  22. {datamint-1.7.4 → datamint-1.7.6}/datamint/experiment/_patcher.py +0 -0
  23. {datamint-1.7.4 → datamint-1.7.6}/datamint/experiment/experiment.py +0 -0
  24. {datamint-1.7.4 → datamint-1.7.6}/datamint/logging.yaml +0 -0
  25. {datamint-1.7.4 → datamint-1.7.6}/datamint/utils/logging_utils.py +0 -0
  26. {datamint-1.7.4 → datamint-1.7.6}/datamint/utils/torchmetrics.py +0 -0
  27. {datamint-1.7.4 → datamint-1.7.6}/datamint/utils/visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamint
3
- Version: 1.7.4
3
+ Version: 1.7.6
4
4
  Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
5
5
  Requires-Python: >=3.10
6
6
  Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
19
19
  Requires-Dist: lazy-loader (>=0.3.0)
20
20
  Requires-Dist: lightning
21
21
  Requires-Dist: matplotlib
22
- Requires-Dist: medimgkit (>=0.2.0)
22
+ Requires-Dist: medimgkit (>=0.2.2)
23
23
  Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
24
24
  Requires-Dist: nibabel (>=4.0.0)
25
25
  Requires-Dist: numpy
@@ -16,6 +16,7 @@ import json
16
16
  from deprecated import deprecated
17
17
  from pathlib import Path
18
18
  from tqdm.auto import tqdm
19
+ from medimgkit.nifti_utils import DEFAULT_NIFTI_MIME
19
20
 
20
21
  _LOGGER = logging.getLogger(__name__)
21
22
  _USER_LOGGER = logging.getLogger('user_logger')
@@ -280,7 +281,7 @@ class AnnotationAPIHandler(BaseAPIHandler):
280
281
  with open(file_path, 'rb') as f:
281
282
  filename = os.path.basename(file_path)
282
283
  form = aiohttp.FormData()
283
- form.add_field('file', f, filename=filename, content_type='application/x-nifti')
284
+ form.add_field('file', f, filename=filename, content_type=DEFAULT_NIFTI_MIME)
284
285
  if model_id is not None:
285
286
  form.add_field('model_id', model_id) # Add model_id if provided
286
287
  if worklist_id is not None:
@@ -14,7 +14,7 @@ import cv2
14
14
  import nibabel as nib
15
15
  from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
16
16
  from datamint import configs
17
- from functools import wraps
17
+ import gzip
18
18
 
19
19
  _LOGGER = logging.getLogger(__name__)
20
20
 
@@ -298,8 +298,15 @@ class BaseAPIHandler:
298
298
  elif mimetype == 'application/octet-stream':
299
299
  return bytes_array
300
300
  elif mimetype.endswith('nifti'):
301
- if file_path is None:
302
- raise NotImplementedError(f"file_path=None is not implemented yet for {mimetype}.")
303
- return nib.load(file_path)
301
+ try:
302
+ return nib.Nifti1Image.from_stream(content_io)
303
+ except Exception as e:
304
+ if file_path is not None:
305
+ return nib.load(file_path)
306
+ raise e
307
+ elif mimetype == 'application/gzip':
308
+ # let's hope it's a .nii.gz
309
+ with gzip.open(content_io, 'rb') as f:
310
+ return nib.Nifti1Image.from_stream(f)
304
311
 
305
312
  raise ValueError(f"Unsupported mimetype: {mimetype}")
@@ -6,10 +6,11 @@ from requests.exceptions import HTTPError
6
6
  import logging
7
7
  import asyncio
8
8
  import aiohttp
9
- from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
10
- from medimgkit import dicom_utils
9
+ from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report
10
+ from medimgkit import dicom_utils, standardize_mimetype
11
11
  from medimgkit.io_utils import is_io_object
12
- from medimgkit.format_detection import guess_type, guess_extension
12
+ from medimgkit.format_detection import guess_typez, guess_extension, DEFAULT_MIME_TYPE
13
+ from medimgkit.nifti_utils import DEFAULT_NIFTI_MIME, NIFTI_MIMES
13
14
  import pydicom
14
15
  from pathlib import Path
15
16
  from datetime import date
@@ -87,11 +88,18 @@ class RootAPIHandler(BaseAPIHandler):
87
88
 
88
89
  is_a_dicom_file = None
89
90
  if mimetype is None:
90
- mimetype, _ = guess_type(file_path, use_magic=True)
91
- if mimetype == 'application/gzip' and name.lower().endswith('nii.gz'):
92
- # Special case for gzipped NIfTI files
93
- mimetype = 'image/x.nifti'
91
+ mimetype_list, ext = guess_typez(file_path, use_magic=True)
92
+ for mime in mimetype_list:
93
+ if mime in NIFTI_MIMES:
94
+ mimetype = DEFAULT_NIFTI_MIME
95
+ break
96
+ else:
97
+ if ext == '.nii.gz' or name.lower().endswith('nii.gz'):
98
+ mimetype = DEFAULT_NIFTI_MIME
99
+ else:
100
+ mimetype = mimetype_list[-1] if mimetype_list else DEFAULT_MIME_TYPE
94
101
 
102
+ mimetype = standardize_mimetype(mimetype)
95
103
  filename = os.path.basename(name)
96
104
  _LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
97
105
 
@@ -355,35 +363,6 @@ class RootAPIHandler(BaseAPIHandler):
355
363
 
356
364
  return result[0]
357
365
 
358
- @staticmethod
359
- def _is_dicom_report(file_path: str | IO) -> bool:
360
- """
361
- Check if a DICOM file is a report (e.g., Structured Report).
362
-
363
- Args:
364
- file_path: Path to the DICOM file or file-like object.
365
-
366
- Returns:
367
- bool: True if the DICOM file is a report, False otherwise.
368
- """
369
- try:
370
- if not is_dicom(file_path):
371
- return False
372
-
373
- ds = pydicom.dcmread(file_path, stop_before_pixels=True)
374
- if hasattr(file_path, 'seek'):
375
- file_path.seek(0)
376
- modality = getattr(ds, 'Modality', None)
377
-
378
- # Common report modalities
379
- # SR=Structured Report, DOC=Document, KO=Key Object, PR=Presentation State
380
- report_modalities = {'SR', 'DOC', 'KO', 'PR', 'ESR'}
381
-
382
- return modality in report_modalities
383
- except Exception as e:
384
- _LOGGER.warning(f"Error checking if DICOM is a report: {e}")
385
- return False
386
-
387
366
  def upload_resources(self,
388
367
  files_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset,
389
368
  mimetype: Optional[str] = None,
@@ -444,7 +423,7 @@ class RootAPIHandler(BaseAPIHandler):
444
423
 
445
424
  # Discard DICOM reports
446
425
  if discard_dicom_reports:
447
- files_path = [f for f in files_path if not RootAPIHandler._is_dicom_report(f)]
426
+ files_path = [f for f in files_path if not is_dicom_report(f)]
448
427
  old_size = len(files_path)
449
428
  if old_size is not None and old_size != len(files_path):
450
429
  _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
@@ -866,9 +845,10 @@ class RootAPIHandler(BaseAPIHandler):
866
845
 
867
846
  async def _async_download_file(self,
868
847
  resource_id: str,
869
- save_path: str,
848
+ save_path: str | Path,
870
849
  session: aiohttp.ClientSession | None = None,
871
- progress_bar: tqdm | None = None):
850
+ progress_bar: tqdm | None = None,
851
+ add_extension: bool = False) -> str:
872
852
  """
873
853
  Asynchronously download a file from the server.
874
854
 
@@ -877,6 +857,10 @@ class RootAPIHandler(BaseAPIHandler):
877
857
  save_path (str): The path to save the file.
878
858
  session (aiohttp.ClientSession): The aiohttp session to use for the request.
879
859
  progress_bar (tqdm | None): Optional progress bar to update after download completion.
860
+ add_extension (bool): Whether to add the appropriate file extension based on content type.
861
+
862
+ Returns:
863
+ str: The actual path where the file was saved (important when add_extension=True).
880
864
  """
881
865
  url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
882
866
  request_params = {
@@ -884,41 +868,90 @@ class RootAPIHandler(BaseAPIHandler):
884
868
  'headers': {'accept': 'application/octet-stream'},
885
869
  'url': url
886
870
  }
871
+ save_path = str(save_path) # Ensure save_path is a string for file operations
887
872
  try:
888
873
  data_bytes = await self._run_request_async(request_params, session, 'content')
889
- with open(save_path, 'wb') as f:
890
- f.write(data_bytes)
874
+
875
+ final_save_path = save_path
876
+ if add_extension:
877
+ # Save to temporary file first to determine mimetype from content
878
+ temp_path = f"{save_path}.tmp"
879
+ with open(temp_path, 'wb') as f:
880
+ f.write(data_bytes)
881
+
882
+ # Determine mimetype from file content
883
+ mimetype_list, ext = guess_typez(temp_path, use_magic=True)
884
+ mimetype = mimetype_list[-1]
885
+
886
+ # get mimetype from resource info if not detected
887
+ if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
888
+ resource_info = self.get_resources_by_ids(resource_id)
889
+ mimetype = resource_info.get('mimetype', mimetype)
890
+
891
+ # Generate final path with extension if needed
892
+ if mimetype is not None and mimetype != DEFAULT_MIME_TYPE:
893
+ if ext is None:
894
+ ext = guess_extension(mimetype)
895
+ if ext is not None and not save_path.endswith(ext):
896
+ final_save_path = save_path + ext
897
+
898
+ # Move file to final location
899
+ os.rename(temp_path, final_save_path)
900
+ else:
901
+ # Standard save without extension detection
902
+ with open(final_save_path, 'wb') as f:
903
+ f.write(data_bytes)
904
+
891
905
  if progress_bar:
892
906
  progress_bar.update(1)
907
+
908
+ return final_save_path
909
+
893
910
  except ResourceNotFoundError as e:
894
911
  e.set_params('resource', {'resource_id': resource_id})
895
912
  raise e
896
913
 
897
914
  def download_multiple_resources(self,
898
915
  resource_ids: list[str],
899
- save_path: list[str] | str
900
- ) -> None:
916
+ save_path: list[str] | str,
917
+ add_extension: bool = False,
918
+ ) -> list[str]:
901
919
  """
902
920
  Download multiple resources and save them to the specified paths.
903
921
 
904
922
  Args:
905
923
  resource_ids (list[str]): A list of resource unique ids.
906
924
  save_path (list[str] | str): A list of paths to save the files or a directory path.
925
+ add_extension (bool): Whether to add the appropriate file extension to the save_path based on the content type.
926
+
927
+ Returns:
928
+ list[str]: A list of paths where the files were saved. Important if `add_extension=True`.
907
929
  """
930
+ if isinstance(resource_ids, str):
931
+ raise ValueError("resource_ids must be a list of strings.")
932
+
908
933
  async def _download_all_async():
909
934
  async with aiohttp.ClientSession() as session:
910
935
  tasks = [
911
- self._async_download_file(resource_id, save_path=path, session=session, progress_bar=progress_bar)
936
+ self._async_download_file(
937
+ resource_id=resource_id,
938
+ save_path=path,
939
+ session=session,
940
+ progress_bar=progress_bar,
941
+ add_extension=add_extension
942
+ )
912
943
  for resource_id, path in zip(resource_ids, save_path)
913
944
  ]
914
- await asyncio.gather(*tasks)
945
+ return await asyncio.gather(*tasks)
915
946
 
916
947
  if isinstance(save_path, str):
917
948
  save_path = [os.path.join(save_path, r) for r in resource_ids]
918
949
 
919
950
  with tqdm(total=len(resource_ids), desc="Downloading resources", unit="file") as progress_bar:
920
951
  loop = asyncio.get_event_loop()
921
- loop.run_until_complete(_download_all_async())
952
+ final_save_paths = loop.run_until_complete(_download_all_async())
953
+
954
+ return final_save_paths
922
955
 
923
956
  def download_resource_file(self,
924
957
  resource_id: str,
@@ -962,10 +995,14 @@ class RootAPIHandler(BaseAPIHandler):
962
995
 
963
996
  # Get mimetype if needed for auto_convert or add_extension
964
997
  mimetype = None
998
+ mimetype_list = []
999
+ ext = None
965
1000
  if auto_convert or add_extension:
966
- resource_info = self.get_resources_by_ids(resource_id)
967
- mimetype = resource_info.get('mimetype', guess_type(response.content)[0])
968
-
1001
+ mimetype_list, ext = guess_typez(response.content)
1002
+ mimetype = mimetype_list[-1]
1003
+ if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
1004
+ resource_info = self.get_resources_by_ids(resource_id)
1005
+ mimetype = resource_info.get('mimetype', None)
969
1006
 
970
1007
  if auto_convert:
971
1008
  try:
@@ -987,7 +1024,8 @@ class RootAPIHandler(BaseAPIHandler):
987
1024
 
988
1025
  if save_path is not None:
989
1026
  if add_extension and mimetype is not None:
990
- ext = guess_extension(mimetype)
1027
+ if ext is None:
1028
+ ext = guess_extension(mimetype)
991
1029
  if ext is not None and not save_path.endswith(ext):
992
1030
  save_path += ext
993
1031
  with open(save_path, 'wb') as f:
@@ -15,10 +15,10 @@ import torch
15
15
  from torch import Tensor
16
16
  from datamint.apihandler.base_api_handler import DatamintException
17
17
  from medimgkit.dicom_utils import is_dicom
18
- from medimgkit.io_utils import read_array_normalized
18
+ from medimgkit.readers import read_array_normalized
19
+ from medimgkit.format_detection import guess_extension
19
20
  from datetime import datetime
20
21
  from pathlib import Path
21
- from mimetypes import guess_extension
22
22
  from datamint.dataset.annotation import Annotation
23
23
  import cv2
24
24
 
@@ -898,8 +898,11 @@ class DatamintBaseDataset:
898
898
  new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
899
899
  new_resources_ids = [r['id'] for r in new_resources]
900
900
  _LOGGER.info(f"Downloading {len(new_resources)} new resources...")
901
- self.api_handler.download_multiple_resources(new_resources_ids,
902
- save_path=new_resources_path)
901
+ new_res_paths = self.api_handler.download_multiple_resources(new_resources_ids,
902
+ save_path=new_resources_path,
903
+ add_extension=True)
904
+ for new_rpath, r in zip(new_res_paths, new_resources):
905
+ r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
903
906
  _LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
904
907
 
905
908
  for r in deleted_resources:
@@ -989,13 +992,13 @@ class DatamintBaseDataset:
989
992
  if 'file' in resource and resource['file'] is not None:
990
993
  return Path(resource['file'])
991
994
  else:
992
- ext = guess_extension(resource['mimetype'], strict=False)
993
- _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
994
- if ext is None:
995
- _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
996
- ext = ''
997
- raise Exception
998
- return Path('images', f"{resource['id']}{ext}")
995
+ # ext = guess_extension(resource['mimetype'])
996
+ # _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
997
+ # if ext is None:
998
+ # _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
999
+ # ext = ''
1000
+ # return Path('images', f"{resource['id']}{ext}")
1001
+ return Path('images', resource['id'])
999
1002
 
1000
1003
  def _get_annotation_file_path(self, annotation: dict | Annotation) -> Path | None:
1001
1004
  """Get the local file path for an annotation."""
@@ -478,7 +478,7 @@ class DatamintDataset(DatamintBaseDataset):
478
478
 
479
479
  def _convert_labels_annotations(self,
480
480
  annotations: list[Annotation],
481
- num_frames: int = None) -> dict[str, torch.Tensor]:
481
+ num_frames: int | None = None) -> dict[str, torch.Tensor]:
482
482
  """
483
483
  Converts the annotations, of the same type and scope, to tensor of shape (num_frames, num_labels)
484
484
  for each annotator.
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "datamint"
3
3
  description = "A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows."
4
- version = "1.7.4"
4
+ version = "1.7.6"
5
5
  dynamic = ["dependencies"]
6
6
  requires-python = ">=3.10"
7
7
  readme = "README.md"
@@ -40,7 +40,7 @@ matplotlib = "*"
40
40
  lightning = "*"
41
41
  albumentations = ">=2.0.0"
42
42
  lazy-loader = ">=0.3.0"
43
- medimgkit = ">=0.2.0"
43
+ medimgkit = ">=0.2.2"
44
44
  # For compatibility with the datamintapi package
45
45
  datamintapi = "0.0.*"
46
46
  # Extra dependencies for docs
File without changes
File without changes
File without changes
File without changes