datamint 1.7.2__tar.gz → 1.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

Files changed (27) hide show
  1. {datamint-1.7.2 → datamint-1.7.4}/PKG-INFO +2 -2
  2. {datamint-1.7.2 → datamint-1.7.4}/datamint/apihandler/base_api_handler.py +11 -10
  3. {datamint-1.7.2 → datamint-1.7.4}/datamint/apihandler/root_api_handler.py +51 -52
  4. {datamint-1.7.2 → datamint-1.7.4}/datamint/dataset/base_dataset.py +8 -5
  5. {datamint-1.7.2 → datamint-1.7.4}/pyproject.toml +2 -2
  6. {datamint-1.7.2 → datamint-1.7.4}/README.md +0 -0
  7. {datamint-1.7.2 → datamint-1.7.4}/datamint/__init__.py +0 -0
  8. {datamint-1.7.2 → datamint-1.7.4}/datamint/apihandler/annotation_api_handler.py +0 -0
  9. {datamint-1.7.2 → datamint-1.7.4}/datamint/apihandler/api_handler.py +0 -0
  10. {datamint-1.7.2 → datamint-1.7.4}/datamint/apihandler/dto/annotation_dto.py +0 -0
  11. {datamint-1.7.2 → datamint-1.7.4}/datamint/apihandler/exp_api_handler.py +0 -0
  12. {datamint-1.7.2 → datamint-1.7.4}/datamint/client_cmd_tools/__init__.py +0 -0
  13. {datamint-1.7.2 → datamint-1.7.4}/datamint/client_cmd_tools/datamint_config.py +0 -0
  14. {datamint-1.7.2 → datamint-1.7.4}/datamint/client_cmd_tools/datamint_upload.py +0 -0
  15. {datamint-1.7.2 → datamint-1.7.4}/datamint/configs.py +0 -0
  16. {datamint-1.7.2 → datamint-1.7.4}/datamint/dataset/__init__.py +0 -0
  17. {datamint-1.7.2 → datamint-1.7.4}/datamint/dataset/annotation.py +0 -0
  18. {datamint-1.7.2 → datamint-1.7.4}/datamint/dataset/dataset.py +0 -0
  19. {datamint-1.7.2 → datamint-1.7.4}/datamint/examples/__init__.py +0 -0
  20. {datamint-1.7.2 → datamint-1.7.4}/datamint/examples/example_projects.py +0 -0
  21. {datamint-1.7.2 → datamint-1.7.4}/datamint/experiment/__init__.py +0 -0
  22. {datamint-1.7.2 → datamint-1.7.4}/datamint/experiment/_patcher.py +0 -0
  23. {datamint-1.7.2 → datamint-1.7.4}/datamint/experiment/experiment.py +0 -0
  24. {datamint-1.7.2 → datamint-1.7.4}/datamint/logging.yaml +0 -0
  25. {datamint-1.7.2 → datamint-1.7.4}/datamint/utils/logging_utils.py +0 -0
  26. {datamint-1.7.2 → datamint-1.7.4}/datamint/utils/torchmetrics.py +0 -0
  27. {datamint-1.7.2 → datamint-1.7.4}/datamint/utils/visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamint
3
- Version: 1.7.2
3
+ Version: 1.7.4
4
4
  Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
5
5
  Requires-Python: >=3.10
6
6
  Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
19
19
  Requires-Dist: lazy-loader (>=0.3.0)
20
20
  Requires-Dist: lightning
21
21
  Requires-Dist: matplotlib
22
- Requires-Dist: medimgkit
22
+ Requires-Dist: medimgkit (>=0.2.0)
23
23
  Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
24
24
  Requires-Dist: nibabel (>=4.0.0)
25
25
  Requires-Dist: numpy
@@ -138,7 +138,7 @@ class BaseAPIHandler:
138
138
  if isinstance(data, aiohttp.FormData): # Check if it's aiohttp.FormData
139
139
  # Handle FormData by extracting fields
140
140
  form_parts = []
141
- for options,headers,value in data._fields:
141
+ for options, headers, value in data._fields:
142
142
  # get the name from options
143
143
  name = options.get('name', 'file')
144
144
  if hasattr(value, 'read'): # File-like object
@@ -161,7 +161,7 @@ class BaseAPIHandler:
161
161
  if session is None:
162
162
  async with aiohttp.ClientSession() as s:
163
163
  return await self._run_request_async(request_args, s, data_to_get)
164
-
164
+
165
165
  async with self.semaphore:
166
166
  try:
167
167
  _LOGGER.debug(f"Running request to {request_args['url']}")
@@ -222,7 +222,7 @@ class BaseAPIHandler:
222
222
 
223
223
  def _run_request(self,
224
224
  request_args: dict,
225
- session: Session = None):
225
+ session: Session | None = None):
226
226
  if session is None:
227
227
  with Session() as s:
228
228
  return self._run_request(request_args, s)
@@ -281,24 +281,25 @@ class BaseAPIHandler:
281
281
  @staticmethod
282
282
  def convert_format(bytes_array: bytes,
283
283
  mimetype: str,
284
- file_path: str = None
284
+ file_path: str | None = None
285
285
  ) -> pydicom.dataset.Dataset | Image.Image | cv2.VideoCapture | bytes | nib_FileBasedImage:
286
+ """ Convert the bytes array to the appropriate format based on the mimetype."""
286
287
  content_io = BytesIO(bytes_array)
287
- if mimetype == 'application/dicom':
288
+ if mimetype.endswith('/dicom'):
288
289
  return pydicom.dcmread(content_io)
289
- elif mimetype in ('image/jpeg', 'image/png', 'image/tiff'):
290
+ elif mimetype.startswith('image/'):
290
291
  return Image.open(content_io)
291
- elif mimetype == 'video/mp4':
292
+ elif mimetype.startswith('video/'):
292
293
  if file_path is None:
293
- raise NotImplementedError("file_path=None is not implemented yet for video/mp4.")
294
+ raise NotImplementedError("file_path=None is not implemented yet for video/* mimetypes.")
294
295
  return cv2.VideoCapture(file_path)
295
296
  elif mimetype == 'application/json':
296
297
  return json.loads(bytes_array)
297
298
  elif mimetype == 'application/octet-stream':
298
299
  return bytes_array
299
- elif mimetype == 'application/nifti':
300
+ elif mimetype.endswith('nifti'):
300
301
  if file_path is None:
301
- raise NotImplementedError("file_path=None is not implemented yet for application/nifti.")
302
+ raise NotImplementedError(f"file_path=None is not implemented yet for {mimetype}.")
302
303
  return nib.load(file_path)
303
304
 
304
305
  raise ValueError(f"Unsupported mimetype: {mimetype}")
@@ -8,10 +8,11 @@ import asyncio
8
8
  import aiohttp
9
9
  from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
10
10
  from medimgkit import dicom_utils
11
+ from medimgkit.io_utils import is_io_object
12
+ from medimgkit.format_detection import guess_type, guess_extension
11
13
  import pydicom
12
14
  from pathlib import Path
13
15
  from datetime import date
14
- import mimetypes
15
16
  from PIL import Image
16
17
  import cv2
17
18
  from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
@@ -26,13 +27,6 @@ _LOGGER = logging.getLogger(__name__)
26
27
  _USER_LOGGER = logging.getLogger('user_logger')
27
28
 
28
29
 
29
- def _is_io_object(obj):
30
- """
31
- Check if an object is a file-like object.
32
- """
33
- return callable(getattr(obj, "read", None))
34
-
35
-
36
30
  def _infinite_gen(x):
37
31
  while True:
38
32
  yield x
@@ -65,7 +59,7 @@ class RootAPIHandler(BaseAPIHandler):
65
59
  publish: bool = False,
66
60
  metadata_file: Optional[str | dict] = None,
67
61
  ) -> str:
68
- if _is_io_object(file_path):
62
+ if is_io_object(file_path):
69
63
  name = file_path.name
70
64
  else:
71
65
  name = file_path
@@ -91,15 +85,12 @@ class RootAPIHandler(BaseAPIHandler):
91
85
  name = new_file_path
92
86
  _LOGGER.debug(f"New file path: {name}")
93
87
 
94
- if mimetype is None:
95
- mimetype = mimetypes.guess_type(name)[0]
96
88
  is_a_dicom_file = None
97
89
  if mimetype is None:
98
- is_a_dicom_file = is_dicom(name) or is_dicom(file_path)
99
- if is_a_dicom_file:
100
- mimetype = 'application/dicom'
101
- elif name.endswith('.nii') or name.endswith('.nii.gz'):
102
- mimetype = 'application/x-nifti'
90
+ mimetype, _ = guess_type(file_path, use_magic=True)
91
+ if mimetype == 'application/gzip' and name.lower().endswith('nii.gz'):
92
+ # Special case for gzipped NIfTI files
93
+ mimetype = 'image/x.nifti'
103
94
 
104
95
  filename = os.path.basename(name)
105
96
  _LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
@@ -239,14 +230,14 @@ class RootAPIHandler(BaseAPIHandler):
239
230
  names = _infinite_gen(names)
240
231
  frame_indices = segfiles.get('frame_index', _infinite_gen(None))
241
232
  for f, name, frame_index in tqdm(zip(fpaths, names, frame_indices),
242
- desc=f"Uploading segmentations for {file_path}",
243
- total=len(fpaths)):
233
+ desc=f"Uploading segmentations for {file_path}",
234
+ total=len(fpaths)):
244
235
  if f is not None:
245
236
  await self._upload_segmentations_async(rid,
246
- file_path=f,
247
- name=name,
248
- frame_index=frame_index,
249
- transpose_segmentation=transpose_segmentation)
237
+ file_path=f,
238
+ name=name,
239
+ frame_index=frame_index,
240
+ transpose_segmentation=transpose_segmentation)
250
241
  return rid
251
242
 
252
243
  tasks = [__upload_single_resource(f, segfiles, metadata_file)
@@ -368,25 +359,26 @@ class RootAPIHandler(BaseAPIHandler):
368
359
  def _is_dicom_report(file_path: str | IO) -> bool:
369
360
  """
370
361
  Check if a DICOM file is a report (e.g., Structured Report).
371
-
362
+
372
363
  Args:
373
364
  file_path: Path to the DICOM file or file-like object.
374
-
365
+
375
366
  Returns:
376
367
  bool: True if the DICOM file is a report, False otherwise.
377
368
  """
378
369
  try:
379
370
  if not is_dicom(file_path):
380
371
  return False
381
-
372
+
382
373
  ds = pydicom.dcmread(file_path, stop_before_pixels=True)
383
374
  if hasattr(file_path, 'seek'):
384
375
  file_path.seek(0)
385
376
  modality = getattr(ds, 'Modality', None)
386
-
377
+
387
378
  # Common report modalities
388
- report_modalities = {'SR', 'DOC', 'KO', 'PR', 'ESR'} # SR=Structured Report, DOC=Document, KO=Key Object, PR=Presentation State
389
-
379
+ # SR=Structured Report, DOC=Document, KO=Key Object, PR=Presentation State
380
+ report_modalities = {'SR', 'DOC', 'KO', 'PR', 'ESR'}
381
+
390
382
  return modality in report_modalities
391
383
  except Exception as e:
392
384
  _LOGGER.warning(f"Error checking if DICOM is a report: {e}")
@@ -445,21 +437,18 @@ class RootAPIHandler(BaseAPIHandler):
445
437
  list[str | Exception]: A list of resource IDs or errors.
446
438
  """
447
439
 
448
- if discard_dicom_reports:
449
- if isinstance(files_path, (str, Path)):
450
- files_path = [files_path]
451
- elif isinstance(files_path, pydicom.dataset.Dataset):
452
- files_path = [files_path]
453
-
454
- old_size = len(files_path)
455
- files_path = [f for f in files_path if not RootAPIHandler._is_dicom_report(f)]
456
- if old_size != len(files_path):
457
- _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
458
-
459
440
  if on_error not in ['raise', 'skip']:
460
441
  raise ValueError("on_error must be either 'raise' or 'skip'")
461
442
 
462
443
  files_path, is_multiple_resources = RootAPIHandler.__process_files_parameter(files_path)
444
+
445
+ # Discard DICOM reports
446
+ if discard_dicom_reports:
447
+ files_path = [f for f in files_path if not RootAPIHandler._is_dicom_report(f)]
448
+ old_size = len(files_path)
449
+ if old_size is not None and old_size != len(files_path):
450
+ _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
451
+
463
452
  if isinstance(metadata, (str, dict)):
464
453
  _LOGGER.debug("Converting metadatas to a list")
465
454
  metadata = [metadata]
@@ -680,7 +669,7 @@ class RootAPIHandler(BaseAPIHandler):
680
669
  is_list = False
681
670
  new_file_path = [file_path]
682
671
  # Check if is an IO object
683
- elif _is_io_object(file_path):
672
+ elif is_io_object(file_path):
684
673
  is_list = False
685
674
  new_file_path = [file_path]
686
675
  elif not hasattr(file_path, '__len__'):
@@ -730,8 +719,8 @@ class RootAPIHandler(BaseAPIHandler):
730
719
 
731
720
  def get_resources(self,
732
721
  status: Optional[ResourceStatus] = None,
733
- from_date: Optional[date] = None,
734
- to_date: Optional[date] = None,
722
+ from_date: date | str | None = None,
723
+ to_date: date | str | None = None,
735
724
  tags: Optional[Sequence[str]] = None,
736
725
  modality: Optional[str] = None,
737
726
  mimetype: Optional[str] = None,
@@ -749,8 +738,8 @@ class RootAPIHandler(BaseAPIHandler):
749
738
 
750
739
  Args:
751
740
  status (ResourceStatus): The resource status. Possible values: 'inbox', 'published', 'archived' or None. If None, it will return all resources.
752
- from_date (Optional[date]): The start date.
753
- to_date (Optional[date]): The end date.
741
+ from_date (date | str | None): The start date.
742
+ to_date (date | str | None): The end date.
754
743
  tags (Optional[list[str]]): The tags to filter the resources.
755
744
  modality (Optional[str]): The modality of the resources.
756
745
  mimetype (Optional[str]): The mimetype of the resources.
@@ -769,9 +758,15 @@ class RootAPIHandler(BaseAPIHandler):
769
758
  """
770
759
  # Convert datetime objects to ISO format
771
760
  if from_date:
772
- from_date = from_date.isoformat()
761
+ if isinstance(from_date, str):
762
+ date.fromisoformat(from_date)
763
+ else:
764
+ from_date = from_date.isoformat()
773
765
  if to_date:
774
- to_date = to_date.isoformat()
766
+ if isinstance(to_date, str):
767
+ date.fromisoformat(to_date)
768
+ else:
769
+ to_date = to_date.isoformat()
775
770
 
776
771
  # Prepare the payload
777
772
  payload = {
@@ -905,7 +900,7 @@ class RootAPIHandler(BaseAPIHandler):
905
900
  ) -> None:
906
901
  """
907
902
  Download multiple resources and save them to the specified paths.
908
-
903
+
909
904
  Args:
910
905
  resource_ids (list[str]): A list of resource unique ids.
911
906
  save_path (list[str] | str): A list of paths to save the files or a directory path.
@@ -955,6 +950,9 @@ class RootAPIHandler(BaseAPIHandler):
955
950
  >>> api_handler.download_resource_file('resource_id', save_path='path/to/dicomfile.dcm')
956
951
  saves the file in the specified path.
957
952
  """
953
+ if save_path is None and add_extension:
954
+ raise ValueError("If add_extension is True, save_path must be provided.")
955
+
958
956
  url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
959
957
  request_params = {'method': 'GET',
960
958
  'headers': {'accept': 'application/octet-stream'},
@@ -966,7 +964,8 @@ class RootAPIHandler(BaseAPIHandler):
966
964
  mimetype = None
967
965
  if auto_convert or add_extension:
968
966
  resource_info = self.get_resources_by_ids(resource_id)
969
- mimetype = resource_info['mimetype']
967
+ mimetype = resource_info.get('mimetype', guess_type(response.content)[0])
968
+
970
969
 
971
970
  if auto_convert:
972
971
  try:
@@ -987,15 +986,15 @@ class RootAPIHandler(BaseAPIHandler):
987
986
  raise e
988
987
 
989
988
  if save_path is not None:
990
- if add_extension:
991
- ext = mimetypes.guess_extension(mimetype)
989
+ if add_extension and mimetype is not None:
990
+ ext = guess_extension(mimetype)
992
991
  if ext is not None and not save_path.endswith(ext):
993
992
  save_path += ext
994
993
  with open(save_path, 'wb') as f:
995
994
  f.write(response.content)
996
995
 
997
- if add_extension:
998
- return resource_file, save_path
996
+ if add_extension:
997
+ return resource_file, save_path
999
998
  return resource_file
1000
999
 
1001
1000
  def download_resource_frame(self,
@@ -283,10 +283,12 @@ class DatamintBaseDataset:
283
283
  """Post-process data after loading metadata."""
284
284
  self._check_integrity()
285
285
  self._calculate_dataset_length()
286
- self._precompute_frame_data()
286
+ if self.return_frame_by_frame:
287
+ self._precompute_frame_data()
288
+ self.subset_indices = list(range(self.dataset_length))
287
289
  self._setup_labels()
288
290
 
289
- if self.discard_without_annotations and self.return_frame_by_frame:
291
+ if self.discard_without_annotations:
290
292
  self._filter_unannotated()
291
293
 
292
294
  def _calculate_dataset_length(self) -> None:
@@ -301,9 +303,8 @@ class DatamintBaseDataset:
301
303
 
302
304
  def _precompute_frame_data(self) -> None:
303
305
  """Precompute frame-related data for efficient indexing."""
304
- self.num_frames_per_resource = self.__compute_num_frames_per_resource()
305
- self._cumulative_frames = np.cumsum([0] + self.num_frames_per_resource)
306
- self.subset_indices = list(range(self.dataset_length))
306
+ num_frames_per_resource = self.__compute_num_frames_per_resource()
307
+ self._cumulative_frames = np.cumsum([0] + num_frames_per_resource)
307
308
 
308
309
  def _setup_labels(self) -> None:
309
310
  """Setup label sets and mappings."""
@@ -989,9 +990,11 @@ class DatamintBaseDataset:
989
990
  return Path(resource['file'])
990
991
  else:
991
992
  ext = guess_extension(resource['mimetype'], strict=False)
993
+ _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
992
994
  if ext is None:
993
995
  _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
994
996
  ext = ''
997
+ raise Exception
995
998
  return Path('images', f"{resource['id']}{ext}")
996
999
 
997
1000
  def _get_annotation_file_path(self, annotation: dict | Annotation) -> Path | None:
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "datamint"
3
3
  description = "A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows."
4
- version = "1.7.2"
4
+ version = "1.7.4"
5
5
  dynamic = ["dependencies"]
6
6
  requires-python = ">=3.10"
7
7
  readme = "README.md"
@@ -40,7 +40,7 @@ matplotlib = "*"
40
40
  lightning = "*"
41
41
  albumentations = ">=2.0.0"
42
42
  lazy-loader = ">=0.3.0"
43
- medimgkit = "*"
43
+ medimgkit = ">=0.2.0"
44
44
  # For compatibility with the datamintapi package
45
45
  datamintapi = "0.0.*"
46
46
  # Extra dependencies for docs
File without changes
File without changes
File without changes
File without changes