PyPI - datamint - Versions diffs - 1.7.3__tar.gz → 1.7.5__tar.gz - Mend

datamint 1.7.3tar.gz → 1.7.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datamint might be problematic. Click here for more details.

Files changed (27) hide show

{datamint-1.7.3 → datamint-1.7.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: datamint
-Version: 1.7.3
+Version: 1.7.5
 Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
 Requires-Python: >=3.10
 Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
 Requires-Dist: lazy-loader (>=0.3.0)
 Requires-Dist: lightning
 Requires-Dist: matplotlib
-Requires-Dist: medimgkit
+Requires-Dist: medimgkit (>=0.2.1)
 Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
 Requires-Dist: nibabel (>=4.0.0)
 Requires-Dist: numpy

{datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/base_api_handler.py RENAMED Viewed

@@ -14,7 +14,7 @@ import cv2
 import nibabel as nib
 from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
 from datamint import configs
-from functools import wraps
+import gzip
 _LOGGER = logging.getLogger(__name__)
@@ -138,7 +138,7 @@ class BaseAPIHandler:
             if isinstance(data, aiohttp.FormData):  # Check if it's aiohttp.FormData
                 # Handle FormData by extracting fields
                 form_parts = []
-                for options,headers,value in data._fields:
+                for options, headers, value in data._fields:
                     # get the name from options
                     name = options.get('name', 'file')
                     if hasattr(value, 'read'):  # File-like object
@@ -161,7 +161,7 @@ class BaseAPIHandler:
         if session is None:
             async with aiohttp.ClientSession() as s:
                 return await self._run_request_async(request_args, s, data_to_get)
         async with self.semaphore:
             try:
                 _LOGGER.debug(f"Running request to {request_args['url']}")
@@ -222,7 +222,7 @@ class BaseAPIHandler:
     def _run_request(self,
                      request_args: dict,
-                     session: Session = None):
+                     session: Session | None = None):
         if session is None:
             with Session() as s:
                 return self._run_request(request_args, s)
@@ -281,24 +281,32 @@ class BaseAPIHandler:
     @staticmethod
     def convert_format(bytes_array: bytes,
                        mimetype: str,
-                       file_path: str = None
+                       file_path: str | None = None
                        ) -> pydicom.dataset.Dataset | Image.Image | cv2.VideoCapture | bytes | nib_FileBasedImage:
+        """ Convert the bytes array to the appropriate format based on the mimetype."""
         content_io = BytesIO(bytes_array)
-        if mimetype == 'application/dicom':
+        if mimetype.endswith('/dicom'):
             return pydicom.dcmread(content_io)
-        elif mimetype in ('image/jpeg', 'image/png', 'image/tiff'):
+        elif mimetype.startswith('image/'):
             return Image.open(content_io)
-        elif mimetype == 'video/mp4':
+        elif mimetype.startswith('video/'):
             if file_path is None:
-                raise NotImplementedError("file_path=None is not implemented yet for video/mp4.")
+                raise NotImplementedError("file_path=None is not implemented yet for video/* mimetypes.")
             return cv2.VideoCapture(file_path)
         elif mimetype == 'application/json':
             return json.loads(bytes_array)
         elif mimetype == 'application/octet-stream':
             return bytes_array
-        elif mimetype == 'application/nifti':
-            if file_path is None:
-                raise NotImplementedError("file_path=None is not implemented yet for application/nifti.")
-            return nib.load(file_path)
+        elif mimetype.endswith('nifti'):
+            try:
+                return nib.Nifti1Image.from_stream(content_io)
+            except Exception as e:
+                if file_path is not None:
+                    return nib.load(file_path)
+                raise e
+        elif mimetype == 'application/gzip':
+            # let's hope it's a .nii.gz
+            with gzip.open(content_io, 'rb') as f:
+                return nib.Nifti1Image.from_stream(f)
         raise ValueError(f"Unsupported mimetype: {mimetype}")

{datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/root_api_handler.py RENAMED Viewed

@@ -6,12 +6,13 @@ from requests.exceptions import HTTPError
 import logging
 import asyncio
 import aiohttp
-from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
+from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report
 from medimgkit import dicom_utils
+from medimgkit.io_utils import is_io_object
+from medimgkit.format_detection import guess_typez, guess_extension, DEFAULT_MIME_TYPE
 import pydicom
 from pathlib import Path
 from datetime import date
-import mimetypes
 from PIL import Image
 import cv2
 from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
@@ -26,13 +27,6 @@ _LOGGER = logging.getLogger(__name__)
 _USER_LOGGER = logging.getLogger('user_logger')
-def _is_io_object(obj):
-    """
-    Check if an object is a file-like object.
-    """
-    return callable(getattr(obj, "read", None))
 def _infinite_gen(x):
     while True:
         yield x
@@ -65,7 +59,7 @@ class RootAPIHandler(BaseAPIHandler):
                                             publish: bool = False,
                                             metadata_file: Optional[str | dict] = None,
                                             ) -> str:
-        if _is_io_object(file_path):
+        if is_io_object(file_path):
             name = file_path.name
         else:
             name = file_path
@@ -91,15 +85,14 @@ class RootAPIHandler(BaseAPIHandler):
             name = new_file_path
             _LOGGER.debug(f"New file path: {name}")
-        if mimetype is None:
-            mimetype = mimetypes.guess_type(name)[0]
         is_a_dicom_file = None
         if mimetype is None:
-            is_a_dicom_file = is_dicom(name) or is_dicom(file_path)
-            if is_a_dicom_file:
-                mimetype = 'application/dicom'
-            elif name.endswith('.nii') or name.endswith('.nii.gz'):
-                mimetype = 'application/x-nifti'
+            mimetype_list, ext = guess_typez(file_path, use_magic=True)
+            mimetype = mimetype_list[-1]
+            if mimetype == 'application/gzip':
+                # Special case for gzipped NIfTI files
+                if ext == '.nii.gz' or name.lower().endswith('nii.gz'):
+                    mimetype = 'image/x.nifti'
         filename = os.path.basename(name)
         _LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
@@ -364,35 +357,6 @@ class RootAPIHandler(BaseAPIHandler):
         return result[0]
-    @staticmethod
-    def _is_dicom_report(file_path: str | IO) -> bool:
-        """
-        Check if a DICOM file is a report (e.g., Structured Report).
-        Args:
-            file_path: Path to the DICOM file or file-like object.
-        Returns:
-            bool: True if the DICOM file is a report, False otherwise.
-        """
-        try:
-            if not is_dicom(file_path):
-                return False
-            ds = pydicom.dcmread(file_path, stop_before_pixels=True)
-            if hasattr(file_path, 'seek'):
-                file_path.seek(0)
-            modality = getattr(ds, 'Modality', None)
-            # Common report modalities
-            # SR=Structured Report, DOC=Document, KO=Key Object, PR=Presentation State
-            report_modalities = {'SR', 'DOC', 'KO', 'PR', 'ESR'}
-            return modality in report_modalities
-        except Exception as e:
-            _LOGGER.warning(f"Error checking if DICOM is a report: {e}")
-            return False
     def upload_resources(self,
                          files_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset,
                          mimetype: Optional[str] = None,
@@ -451,9 +415,9 @@ class RootAPIHandler(BaseAPIHandler):
         files_path, is_multiple_resources = RootAPIHandler.__process_files_parameter(files_path)
-        ### Discard DICOM reports
+        # Discard DICOM reports
         if discard_dicom_reports:
-            files_path = [f for f in files_path if not RootAPIHandler._is_dicom_report(f)]
+            files_path = [f for f in files_path if not is_dicom_report(f)]
             old_size = len(files_path)
             if old_size is not None and old_size != len(files_path):
                 _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
@@ -678,7 +642,7 @@ class RootAPIHandler(BaseAPIHandler):
                 is_list = False
                 new_file_path = [file_path]
         # Check if is an IO object
-        elif _is_io_object(file_path):
+        elif is_io_object(file_path):
             is_list = False
             new_file_path = [file_path]
         elif not hasattr(file_path, '__len__'):
@@ -728,8 +692,8 @@ class RootAPIHandler(BaseAPIHandler):
     def get_resources(self,
                       status: Optional[ResourceStatus] = None,
-                      from_date: Optional[date] = None,
-                      to_date: Optional[date] = None,
+                      from_date: date | str | None = None,
+                      to_date: date | str | None = None,
                       tags: Optional[Sequence[str]] = None,
                       modality: Optional[str] = None,
                       mimetype: Optional[str] = None,
@@ -747,8 +711,8 @@ class RootAPIHandler(BaseAPIHandler):
         Args:
             status (ResourceStatus): The resource status. Possible values: 'inbox', 'published', 'archived' or None. If None, it will return all resources.
-            from_date (Optional[date]): The start date.
-            to_date (Optional[date]): The end date.
+            from_date (date | str | None): The start date.
+            to_date (date | str | None): The end date.
             tags (Optional[list[str]]): The tags to filter the resources.
             modality (Optional[str]): The modality of the resources.
             mimetype (Optional[str]): The mimetype of the resources.
@@ -767,9 +731,15 @@ class RootAPIHandler(BaseAPIHandler):
         """
         # Convert datetime objects to ISO format
         if from_date:
-            from_date = from_date.isoformat()
+            if isinstance(from_date, str):
+                date.fromisoformat(from_date)
+            else:
+                from_date = from_date.isoformat()
         if to_date:
-            to_date = to_date.isoformat()
+            if isinstance(to_date, str):
+                date.fromisoformat(to_date)
+            else:
+                to_date = to_date.isoformat()
         # Prepare the payload
         payload = {
@@ -869,9 +839,10 @@ class RootAPIHandler(BaseAPIHandler):
     async def _async_download_file(self,
                                    resource_id: str,
-                                   save_path: str,
+                                   save_path: str | Path,
                                    session: aiohttp.ClientSession | None = None,
-                                   progress_bar: tqdm | None = None):
+                                   progress_bar: tqdm | None = None,
+                                   add_extension: bool = False) -> str:
         """
         Asynchronously download a file from the server.
@@ -880,6 +851,10 @@ class RootAPIHandler(BaseAPIHandler):
             save_path (str): The path to save the file.
             session (aiohttp.ClientSession): The aiohttp session to use for the request.
             progress_bar (tqdm | None): Optional progress bar to update after download completion.
+            add_extension (bool): Whether to add the appropriate file extension based on content type.
+        Returns:
+            str: The actual path where the file was saved (important when add_extension=True).
         """
         url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
         request_params = {
@@ -887,41 +862,90 @@ class RootAPIHandler(BaseAPIHandler):
             'headers': {'accept': 'application/octet-stream'},
             'url': url
         }
+        save_path = str(save_path)  # Ensure save_path is a string for file operations
         try:
             data_bytes = await self._run_request_async(request_params, session, 'content')
-            with open(save_path, 'wb') as f:
-                f.write(data_bytes)
+            final_save_path = save_path
+            if add_extension:
+                # Save to temporary file first to determine mimetype from content
+                temp_path = f"{save_path}.tmp"
+                with open(temp_path, 'wb') as f:
+                    f.write(data_bytes)
+                # Determine mimetype from file content
+                mimetype_list, ext = guess_typez(temp_path, use_magic=True)
+                mimetype = mimetype_list[-1]
+                # get mimetype from resource info if not detected
+                if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
+                    resource_info = self.get_resources_by_ids(resource_id)
+                    mimetype = resource_info.get('mimetype', mimetype)
+                # Generate final path with extension if needed
+                if mimetype is not None and mimetype != DEFAULT_MIME_TYPE:
+                    if ext is None:
+                        ext = guess_extension(mimetype)
+                    if ext is not None and not save_path.endswith(ext):
+                        final_save_path = save_path + ext
+                # Move file to final location
+                os.rename(temp_path, final_save_path)
+            else:
+                # Standard save without extension detection
+                with open(final_save_path, 'wb') as f:
+                    f.write(data_bytes)
             if progress_bar:
                 progress_bar.update(1)
+            return final_save_path
         except ResourceNotFoundError as e:
             e.set_params('resource', {'resource_id': resource_id})
             raise e
     def download_multiple_resources(self,
                                     resource_ids: list[str],
-                                    save_path: list[str] | str
-                                    ) -> None:
+                                    save_path: list[str] | str,
+                                    add_extension: bool = False,
+                                    ) -> list[str]:
         """
         Download multiple resources and save them to the specified paths.
         Args:
             resource_ids (list[str]): A list of resource unique ids.
             save_path (list[str] | str): A list of paths to save the files or a directory path.
+            add_extension (bool): Whether to add the appropriate file extension to the save_path based on the content type.
+        Returns:
+            list[str]: A list of paths where the files were saved. Important if `add_extension=True`.
         """
+        if isinstance(resource_ids, str):
+            raise ValueError("resource_ids must be a list of strings.")
         async def _download_all_async():
             async with aiohttp.ClientSession() as session:
                 tasks = [
-                    self._async_download_file(resource_id, save_path=path, session=session, progress_bar=progress_bar)
+                    self._async_download_file(
+                        resource_id=resource_id,
+                        save_path=path,
+                        session=session,
+                        progress_bar=progress_bar,
+                        add_extension=add_extension
+                    )
                     for resource_id, path in zip(resource_ids, save_path)
                 ]
-                await asyncio.gather(*tasks)
+                return await asyncio.gather(*tasks)
         if isinstance(save_path, str):
             save_path = [os.path.join(save_path, r) for r in resource_ids]
         with tqdm(total=len(resource_ids), desc="Downloading resources", unit="file") as progress_bar:
             loop = asyncio.get_event_loop()
-            loop.run_until_complete(_download_all_async())
+            final_save_paths = loop.run_until_complete(_download_all_async())
+        return final_save_paths
     def download_resource_file(self,
                                resource_id: str,
@@ -953,6 +977,9 @@ class RootAPIHandler(BaseAPIHandler):
             >>> api_handler.download_resource_file('resource_id', save_path='path/to/dicomfile.dcm')
                 saves the file in the specified path.
         """
+        if save_path is None and add_extension:
+            raise ValueError("If add_extension is True, save_path must be provided.")
         url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
         request_params = {'method': 'GET',
                           'headers': {'accept': 'application/octet-stream'},
@@ -962,9 +989,14 @@ class RootAPIHandler(BaseAPIHandler):
             # Get mimetype if needed for auto_convert or add_extension
             mimetype = None
+            mimetype_list = []
+            ext = None
             if auto_convert or add_extension:
-                resource_info = self.get_resources_by_ids(resource_id)
-                mimetype = resource_info['mimetype']
+                mimetype_list, ext = guess_typez(response.content)
+                mimetype = mimetype_list[-1]
+                if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
+                    resource_info = self.get_resources_by_ids(resource_id)
+                    mimetype = resource_info.get('mimetype', None)
             if auto_convert:
                 try:
@@ -985,15 +1017,16 @@ class RootAPIHandler(BaseAPIHandler):
             raise e
         if save_path is not None:
-            if add_extension:
-                ext = mimetypes.guess_extension(mimetype)
+            if add_extension and mimetype is not None:
+                if ext is None:
+                    ext = guess_extension(mimetype)
                 if ext is not None and not save_path.endswith(ext):
                     save_path += ext
             with open(save_path, 'wb') as f:
                 f.write(response.content)
-        if add_extension:
-            return resource_file, save_path
+            if add_extension:
+                return resource_file, save_path
         return resource_file
     def download_resource_frame(self,

{datamint-1.7.3 → datamint-1.7.5}/datamint/dataset/base_dataset.py RENAMED Viewed

@@ -15,10 +15,10 @@ import torch
 from torch import Tensor
 from datamint.apihandler.base_api_handler import DatamintException
 from medimgkit.dicom_utils import is_dicom
-from medimgkit.io_utils import read_array_normalized
+from medimgkit.readers import read_array_normalized
+from medimgkit.format_detection import guess_extension
 from datetime import datetime
 from pathlib import Path
-from mimetypes import guess_extension
 from datamint.dataset.annotation import Annotation
 import cv2
@@ -283,10 +283,12 @@ class DatamintBaseDataset:
         """Post-process data after loading metadata."""
         self._check_integrity()
         self._calculate_dataset_length()
-        self._precompute_frame_data()
+        if self.return_frame_by_frame:
+            self._precompute_frame_data()
+        self.subset_indices = list(range(self.dataset_length))
         self._setup_labels()
-        if self.discard_without_annotations and self.return_frame_by_frame:
+        if self.discard_without_annotations:
             self._filter_unannotated()
     def _calculate_dataset_length(self) -> None:
@@ -301,9 +303,8 @@ class DatamintBaseDataset:
     def _precompute_frame_data(self) -> None:
         """Precompute frame-related data for efficient indexing."""
-        self.num_frames_per_resource = self.__compute_num_frames_per_resource()
-        self._cumulative_frames = np.cumsum([0] + self.num_frames_per_resource)
-        self.subset_indices = list(range(self.dataset_length))
+        num_frames_per_resource = self.__compute_num_frames_per_resource()
+        self._cumulative_frames = np.cumsum([0] + num_frames_per_resource)
     def _setup_labels(self) -> None:
         """Setup label sets and mappings."""
@@ -897,8 +898,11 @@ class DatamintBaseDataset:
             new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
             new_resources_ids = [r['id'] for r in new_resources]
             _LOGGER.info(f"Downloading {len(new_resources)} new resources...")
-            self.api_handler.download_multiple_resources(new_resources_ids,
-                                                        save_path=new_resources_path)
+            new_res_paths = self.api_handler.download_multiple_resources(new_resources_ids,
+                                                                         save_path=new_resources_path,
+                                                                         add_extension=True)
+            for new_rpath, r in zip(new_res_paths, new_resources):
+                r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
             _LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
         for r in deleted_resources:
@@ -988,11 +992,13 @@ class DatamintBaseDataset:
         if 'file' in resource and resource['file'] is not None:
             return Path(resource['file'])
         else:
-            ext = guess_extension(resource['mimetype'], strict=False)
-            if ext is None:
-                _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
-                ext = ''
-            return Path('images', f"{resource['id']}{ext}")
+            # ext = guess_extension(resource['mimetype'])
+            # _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
+            # if ext is None:
+            #     _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
+            #     ext = ''
+            # return Path('images', f"{resource['id']}{ext}")
+            return Path('images', resource['id'])
     def _get_annotation_file_path(self, annotation: dict | Annotation) -> Path | None:
         """Get the local file path for an annotation."""

{datamint-1.7.3 → datamint-1.7.5}/datamint/dataset/dataset.py RENAMED Viewed

@@ -478,7 +478,7 @@ class DatamintDataset(DatamintBaseDataset):
     def _convert_labels_annotations(self,
                                     annotations: list[Annotation],
-                                    num_frames: int = None) -> dict[str, torch.Tensor]:
+                                    num_frames: int | None = None) -> dict[str, torch.Tensor]:
         """
         Converts the annotations, of the same type and scope, to tensor of shape (num_frames, num_labels)
         for each annotator.

{datamint-1.7.3 → datamint-1.7.5}/pyproject.toml RENAMED Viewed

@@ -1,7 +1,7 @@
 [project]
 name = "datamint"
 description = "A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows."
-version = "1.7.3"
+version = "1.7.5"
 dynamic = ["dependencies"]
 requires-python = ">=3.10"
 readme = "README.md"
@@ -40,7 +40,7 @@ matplotlib = "*"
 lightning = "*"
 albumentations = ">=2.0.0"
 lazy-loader = ">=0.3.0"
-medimgkit = "*"
+medimgkit = ">=0.2.1"
 # For compatibility with the datamintapi package
 datamintapi = "0.0.*"
 # Extra dependencies for docs

{datamint-1.7.3 → datamint-1.7.5}/README.md RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/__init__.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/annotation_api_handler.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/api_handler.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/dto/annotation_dto.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/apihandler/exp_api_handler.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/client_cmd_tools/__init__.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/client_cmd_tools/datamint_config.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/client_cmd_tools/datamint_upload.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/configs.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/dataset/__init__.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/dataset/annotation.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/examples/__init__.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/examples/example_projects.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/experiment/__init__.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/experiment/_patcher.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/experiment/experiment.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/logging.yaml RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/utils/logging_utils.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/utils/torchmetrics.py RENAMED Viewed

File without changes

{datamint-1.7.3 → datamint-1.7.5}/datamint/utils/visualization.py RENAMED Viewed

File without changes

datamint 1.7.3__tar.gz → 1.7.5__tar.gz

Potentially problematic release.

datamint 1.7.3tar.gz → 1.7.5tar.gz