PyPI - datamint - Versions diffs - 1.7.4__py3-none-any.whl → 1.7.5__py3-none-any.whl - Mend

datamint 1.7.4py3-none-any.whl → 1.7.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datamint might be problematic. Click here for more details.

Files changed (8) hide show

datamint/apihandler/base_api_handler.py CHANGED Viewed

@@ -14,7 +14,7 @@ import cv2
 import nibabel as nib
 from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
 from datamint import configs
-from functools import wraps
+import gzip
 _LOGGER = logging.getLogger(__name__)
@@ -298,8 +298,15 @@ class BaseAPIHandler:
         elif mimetype == 'application/octet-stream':
             return bytes_array
         elif mimetype.endswith('nifti'):
-            if file_path is None:
-                raise NotImplementedError(f"file_path=None is not implemented yet for {mimetype}.")
-            return nib.load(file_path)
+            try:
+                return nib.Nifti1Image.from_stream(content_io)
+            except Exception as e:
+                if file_path is not None:
+                    return nib.load(file_path)
+                raise e
+        elif mimetype == 'application/gzip':
+            # let's hope it's a .nii.gz
+            with gzip.open(content_io, 'rb') as f:
+                return nib.Nifti1Image.from_stream(f)
         raise ValueError(f"Unsupported mimetype: {mimetype}")

datamint/apihandler/root_api_handler.py CHANGED Viewed

@@ -6,10 +6,10 @@ from requests.exceptions import HTTPError
 import logging
 import asyncio
 import aiohttp
-from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
+from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report
 from medimgkit import dicom_utils
 from medimgkit.io_utils import is_io_object
-from medimgkit.format_detection import guess_type, guess_extension
+from medimgkit.format_detection import guess_typez, guess_extension, DEFAULT_MIME_TYPE
 import pydicom
 from pathlib import Path
 from datetime import date
@@ -87,10 +87,12 @@ class RootAPIHandler(BaseAPIHandler):
         is_a_dicom_file = None
         if mimetype is None:
-            mimetype, _ = guess_type(file_path, use_magic=True)
-            if mimetype == 'application/gzip' and name.lower().endswith('nii.gz'):
+            mimetype_list, ext = guess_typez(file_path, use_magic=True)
+            mimetype = mimetype_list[-1]
+            if mimetype == 'application/gzip':
                 # Special case for gzipped NIfTI files
-                mimetype = 'image/x.nifti'
+                if ext == '.nii.gz' or name.lower().endswith('nii.gz'):
+                    mimetype = 'image/x.nifti'
         filename = os.path.basename(name)
         _LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
@@ -355,35 +357,6 @@ class RootAPIHandler(BaseAPIHandler):
         return result[0]
-    @staticmethod
-    def _is_dicom_report(file_path: str | IO) -> bool:
-        """
-        Check if a DICOM file is a report (e.g., Structured Report).
-        Args:
-            file_path: Path to the DICOM file or file-like object.
-        Returns:
-            bool: True if the DICOM file is a report, False otherwise.
-        """
-        try:
-            if not is_dicom(file_path):
-                return False
-            ds = pydicom.dcmread(file_path, stop_before_pixels=True)
-            if hasattr(file_path, 'seek'):
-                file_path.seek(0)
-            modality = getattr(ds, 'Modality', None)
-            # Common report modalities
-            # SR=Structured Report, DOC=Document, KO=Key Object, PR=Presentation State
-            report_modalities = {'SR', 'DOC', 'KO', 'PR', 'ESR'}
-            return modality in report_modalities
-        except Exception as e:
-            _LOGGER.warning(f"Error checking if DICOM is a report: {e}")
-            return False
     def upload_resources(self,
                          files_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset,
                          mimetype: Optional[str] = None,
@@ -444,7 +417,7 @@ class RootAPIHandler(BaseAPIHandler):
         # Discard DICOM reports
         if discard_dicom_reports:
-            files_path = [f for f in files_path if not RootAPIHandler._is_dicom_report(f)]
+            files_path = [f for f in files_path if not is_dicom_report(f)]
             old_size = len(files_path)
             if old_size is not None and old_size != len(files_path):
                 _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
@@ -866,9 +839,10 @@ class RootAPIHandler(BaseAPIHandler):
     async def _async_download_file(self,
                                    resource_id: str,
-                                   save_path: str,
+                                   save_path: str | Path,
                                    session: aiohttp.ClientSession | None = None,
-                                   progress_bar: tqdm | None = None):
+                                   progress_bar: tqdm | None = None,
+                                   add_extension: bool = False) -> str:
         """
         Asynchronously download a file from the server.
@@ -877,6 +851,10 @@ class RootAPIHandler(BaseAPIHandler):
             save_path (str): The path to save the file.
             session (aiohttp.ClientSession): The aiohttp session to use for the request.
             progress_bar (tqdm | None): Optional progress bar to update after download completion.
+            add_extension (bool): Whether to add the appropriate file extension based on content type.
+        Returns:
+            str: The actual path where the file was saved (important when add_extension=True).
         """
         url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
         request_params = {
@@ -884,41 +862,90 @@ class RootAPIHandler(BaseAPIHandler):
             'headers': {'accept': 'application/octet-stream'},
             'url': url
         }
+        save_path = str(save_path)  # Ensure save_path is a string for file operations
         try:
             data_bytes = await self._run_request_async(request_params, session, 'content')
-            with open(save_path, 'wb') as f:
-                f.write(data_bytes)
+            final_save_path = save_path
+            if add_extension:
+                # Save to temporary file first to determine mimetype from content
+                temp_path = f"{save_path}.tmp"
+                with open(temp_path, 'wb') as f:
+                    f.write(data_bytes)
+                # Determine mimetype from file content
+                mimetype_list, ext = guess_typez(temp_path, use_magic=True)
+                mimetype = mimetype_list[-1]
+                # get mimetype from resource info if not detected
+                if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
+                    resource_info = self.get_resources_by_ids(resource_id)
+                    mimetype = resource_info.get('mimetype', mimetype)
+                # Generate final path with extension if needed
+                if mimetype is not None and mimetype != DEFAULT_MIME_TYPE:
+                    if ext is None:
+                        ext = guess_extension(mimetype)
+                    if ext is not None and not save_path.endswith(ext):
+                        final_save_path = save_path + ext
+                # Move file to final location
+                os.rename(temp_path, final_save_path)
+            else:
+                # Standard save without extension detection
+                with open(final_save_path, 'wb') as f:
+                    f.write(data_bytes)
             if progress_bar:
                 progress_bar.update(1)
+            return final_save_path
         except ResourceNotFoundError as e:
             e.set_params('resource', {'resource_id': resource_id})
             raise e
     def download_multiple_resources(self,
                                     resource_ids: list[str],
-                                    save_path: list[str] | str
-                                    ) -> None:
+                                    save_path: list[str] | str,
+                                    add_extension: bool = False,
+                                    ) -> list[str]:
         """
         Download multiple resources and save them to the specified paths.
         Args:
             resource_ids (list[str]): A list of resource unique ids.
             save_path (list[str] | str): A list of paths to save the files or a directory path.
+            add_extension (bool): Whether to add the appropriate file extension to the save_path based on the content type.
+        Returns:
+            list[str]: A list of paths where the files were saved. Important if `add_extension=True`.
         """
+        if isinstance(resource_ids, str):
+            raise ValueError("resource_ids must be a list of strings.")
         async def _download_all_async():
             async with aiohttp.ClientSession() as session:
                 tasks = [
-                    self._async_download_file(resource_id, save_path=path, session=session, progress_bar=progress_bar)
+                    self._async_download_file(
+                        resource_id=resource_id,
+                        save_path=path,
+                        session=session,
+                        progress_bar=progress_bar,
+                        add_extension=add_extension
+                    )
                     for resource_id, path in zip(resource_ids, save_path)
                 ]
-                await asyncio.gather(*tasks)
+                return await asyncio.gather(*tasks)
         if isinstance(save_path, str):
             save_path = [os.path.join(save_path, r) for r in resource_ids]
         with tqdm(total=len(resource_ids), desc="Downloading resources", unit="file") as progress_bar:
             loop = asyncio.get_event_loop()
-            loop.run_until_complete(_download_all_async())
+            final_save_paths = loop.run_until_complete(_download_all_async())
+        return final_save_paths
     def download_resource_file(self,
                                resource_id: str,
@@ -962,10 +989,14 @@ class RootAPIHandler(BaseAPIHandler):
             # Get mimetype if needed for auto_convert or add_extension
             mimetype = None
+            mimetype_list = []
+            ext = None
             if auto_convert or add_extension:
-                resource_info = self.get_resources_by_ids(resource_id)
-                mimetype = resource_info.get('mimetype', guess_type(response.content)[0])
+                mimetype_list, ext = guess_typez(response.content)
+                mimetype = mimetype_list[-1]
+                if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
+                    resource_info = self.get_resources_by_ids(resource_id)
+                    mimetype = resource_info.get('mimetype', None)
             if auto_convert:
                 try:
@@ -987,7 +1018,8 @@ class RootAPIHandler(BaseAPIHandler):
         if save_path is not None:
             if add_extension and mimetype is not None:
-                ext = guess_extension(mimetype)
+                if ext is None:
+                    ext = guess_extension(mimetype)
                 if ext is not None and not save_path.endswith(ext):
                     save_path += ext
             with open(save_path, 'wb') as f:

datamint/dataset/base_dataset.py CHANGED Viewed

@@ -15,10 +15,10 @@ import torch
 from torch import Tensor
 from datamint.apihandler.base_api_handler import DatamintException
 from medimgkit.dicom_utils import is_dicom
-from medimgkit.io_utils import read_array_normalized
+from medimgkit.readers import read_array_normalized
+from medimgkit.format_detection import guess_extension
 from datetime import datetime
 from pathlib import Path
-from mimetypes import guess_extension
 from datamint.dataset.annotation import Annotation
 import cv2
@@ -898,8 +898,11 @@ class DatamintBaseDataset:
             new_resources_path = [Path(self.dataset_dir) / r['file'] for r in new_resources]
             new_resources_ids = [r['id'] for r in new_resources]
             _LOGGER.info(f"Downloading {len(new_resources)} new resources...")
-            self.api_handler.download_multiple_resources(new_resources_ids,
-                                                        save_path=new_resources_path)
+            new_res_paths = self.api_handler.download_multiple_resources(new_resources_ids,
+                                                                         save_path=new_resources_path,
+                                                                         add_extension=True)
+            for new_rpath, r in zip(new_res_paths, new_resources):
+                r['file'] = str(Path(new_rpath).relative_to(self.dataset_dir))
             _LOGGER.info(f"Downloaded {len(new_resources)} new resources.")
         for r in deleted_resources:
@@ -989,13 +992,13 @@ class DatamintBaseDataset:
         if 'file' in resource and resource['file'] is not None:
             return Path(resource['file'])
         else:
-            ext = guess_extension(resource['mimetype'], strict=False)
-            _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
-            if ext is None:
-                _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
-                ext = ''
-                raise Exception
-            return Path('images', f"{resource['id']}{ext}")
+            # ext = guess_extension(resource['mimetype'])
+            # _LOGGER.debug(f"Guessed extension for resource {resource['id']}|{resource['mimetype']}: {ext}")
+            # if ext is None:
+            #     _LOGGER.warning(f"Could not guess extension for resource {resource['id']}.")
+            #     ext = ''
+            # return Path('images', f"{resource['id']}{ext}")
+            return Path('images', resource['id'])
     def _get_annotation_file_path(self, annotation: dict | Annotation) -> Path | None:
         """Get the local file path for an annotation."""

datamint/dataset/dataset.py CHANGED Viewed

@@ -478,7 +478,7 @@ class DatamintDataset(DatamintBaseDataset):
     def _convert_labels_annotations(self,
                                     annotations: list[Annotation],
-                                    num_frames: int = None) -> dict[str, torch.Tensor]:
+                                    num_frames: int | None = None) -> dict[str, torch.Tensor]:
         """
         Converts the annotations, of the same type and scope, to tensor of shape (num_frames, num_labels)
         for each annotator.

{datamint-1.7.4.dist-info → datamint-1.7.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: datamint
-Version: 1.7.4
+Version: 1.7.5
 Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
 Requires-Python: >=3.10
 Classifier: Programming Language :: Python :: 3
@@ -19,7 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
 Requires-Dist: lazy-loader (>=0.3.0)
 Requires-Dist: lightning
 Requires-Dist: matplotlib
-Requires-Dist: medimgkit (>=0.2.0)
+Requires-Dist: medimgkit (>=0.2.1)
 Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
 Requires-Dist: nibabel (>=4.0.0)
 Requires-Dist: numpy

{datamint-1.7.4.dist-info → datamint-1.7.5.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 datamint/__init__.py,sha256=7rKCCsaa4RBRTIfuHB708rai1xwDHLtkFNFJGKYG5D4,757
 datamint/apihandler/annotation_api_handler.py,sha256=HnWiG2ebq08mdaazTXVbkuwvh6fmKIKt8uqAOf3Y1jU,57013
 datamint/apihandler/api_handler.py,sha256=cdVSddrFCKlF_BJ81LO1aJ0OP49rssjpNEFzJ6Q7YyY,384
-datamint/apihandler/base_api_handler.py,sha256=t2no7gTIdPFfR_TXlZmh3rsncaL9p1G8eIS2m9Q7ALE,11978
+datamint/apihandler/base_api_handler.py,sha256=HSMi8vN-xU0LLUEh3fgcuO7mXpMsX0wrA5nO9FVvrA4,12207
 datamint/apihandler/dto/annotation_dto.py,sha256=qId1RK1VO7dXrvGJ7dqJ31jBQB7Z8yy5x0tLSiMxTB4,7105
 datamint/apihandler/exp_api_handler.py,sha256=hFUgUgBc5rL7odK7gTW3MnrvMY1pVfJUpUdzRNobMQE,6226
-datamint/apihandler/root_api_handler.py,sha256=IHpr2Bki1WPTKzM133U68c7p_50Fq5t-JEkqGFlSl8k,57152
+datamint/apihandler/root_api_handler.py,sha256=8VanqWbcLLkwQ0y2Z_a5Mqr5knplRCUSi5iRVQeHFzU,58901
 datamint/client_cmd_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 datamint/client_cmd_tools/datamint_config.py,sha256=md7dnWrbl10lPtXKbmD9yo6onLJsajeG8Vz0ZWH1v4M,8181
 datamint/client_cmd_tools/datamint_upload.py,sha256=890USkrtaH23mNjTRYVHWce2q9jSmkUNHIP_e8fnJRM,29502
 datamint/configs.py,sha256=Bdp6NydYwyCJ2dk19_gf_o3M2ZyQOmMHpLi8wEWNHUk,1426
 datamint/dataset/__init__.py,sha256=4PlUKSvVhdfQvvuq8jQXrkdqnot-iTTizM3aM1vgSwg,47
 datamint/dataset/annotation.py,sha256=qN1IMjdfLD2ceQ6va3l76jOXA8Vb_c-eBk1oWQu6hW0,7994
-datamint/dataset/base_dataset.py,sha256=vYM1Q7uVdtF6DfkbJjTjdleF-kG612rSBa2c2ynohX4,49393
-datamint/dataset/dataset.py,sha256=fdcgxB9NKvPEdr9S6TOeAIqFW38PdhmCiYsit6u5Wxc,27314
+datamint/dataset/base_dataset.py,sha256=S0pboog2yB2LCBGOocBIlOU8to7Wgov3gXTOJ9gbvz0,49697
+datamint/dataset/dataset.py,sha256=8e0MFgINgbw6_UJh7pNQIREp2XxstIVCupyduW05Nfw,27321
 datamint/examples/__init__.py,sha256=zcYnd5nLVme9GCTPYH-1JpGo8xXK2WEYvhzcy_2alZc,39
 datamint/examples/example_projects.py,sha256=7Nb_EaIdzJTQa9zopqc-WhTBQWQJSoQZ_KjRS4PB4FI,2931
 datamint/experiment/__init__.py,sha256=5qQOMzoG17DEd1YnTF-vS0qiM-DGdbNh42EUo91CRhQ,34
@@ -22,7 +22,7 @@ datamint/logging.yaml,sha256=a5dsATpul7QHeUHB2TjABFjWaPXBMbO--dgn8GlRqwk,483
 datamint/utils/logging_utils.py,sha256=DvoA35ATYG3JTwfXEXYawDyKRfHeCrH0a9czfkmz8kM,1851
 datamint/utils/torchmetrics.py,sha256=lwU0nOtsSWfebyp7dvjlAggaqXtj5ohSEUXOg3L0hJE,2837
 datamint/utils/visualization.py,sha256=yaUVAOHar59VrGUjpAWv5eVvQSfztFG0eP9p5Vt3l-M,4470
-datamint-1.7.4.dist-info/METADATA,sha256=Fej5PDwzs3NQPuPf9ijJt0x57sY5f4Y9JGx_IEAwkX4,4100
-datamint-1.7.4.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
-datamint-1.7.4.dist-info/entry_points.txt,sha256=mn5H6jPjO-rY0W0CAZ6Z_KKWhMLvyVaSpoqk77jlTI4,145
-datamint-1.7.4.dist-info/RECORD,,
+datamint-1.7.5.dist-info/METADATA,sha256=vWoLCzDqK33Lzb0DRTLLRAYpYJRX6xmv0p30Lo46Hwg,4100
+datamint-1.7.5.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
+datamint-1.7.5.dist-info/entry_points.txt,sha256=mn5H6jPjO-rY0W0CAZ6Z_KKWhMLvyVaSpoqk77jlTI4,145
+datamint-1.7.5.dist-info/RECORD,,

{datamint-1.7.4.dist-info → datamint-1.7.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{datamint-1.7.4.dist-info → datamint-1.7.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

datamint 1.7.4__py3-none-any.whl → 1.7.5__py3-none-any.whl

Potentially problematic release.

datamint 1.7.4py3-none-any.whl → 1.7.5py3-none-any.whl