PyPI - datamint - Versions diffs - 1.6.3.post1__py3-none-any.whl → 1.7.1__py3-none-any.whl - Mend

datamint 1.6.3.post1py3-none-any.whl → 1.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of datamint might be problematic. Click here for more details.

Files changed (10) hide show

datamint/apihandler/annotation_api_handler.py +125 -3
datamint/apihandler/base_api_handler.py +30 -26
datamint/apihandler/root_api_handler.py +160 -36
datamint/dataset/annotation.py +221 -0
datamint/dataset/base_dataset.py +735 -483
datamint/dataset/dataset.py +33 -16
{datamint-1.6.3.post1.dist-info → datamint-1.7.1.dist-info}/METADATA +1 -1
{datamint-1.6.3.post1.dist-info → datamint-1.7.1.dist-info}/RECORD +10 -9
{datamint-1.6.3.post1.dist-info → datamint-1.7.1.dist-info}/WHEEL +0 -0
{datamint-1.6.3.post1.dist-info → datamint-1.7.1.dist-info}/entry_points.txt +0 -0

datamint/apihandler/annotation_api_handler.py CHANGED Viewed

@@ -13,6 +13,9 @@ from requests.exceptions import HTTPError
 from .dto.annotation_dto import CreateAnnotationDto, LineGeometry, BoxGeometry, CoordinateSystem, AnnotationType
 import pydicom
 import json
+from deprecated import deprecated
+from pathlib import Path
+from tqdm.auto import tqdm
 _LOGGER = logging.getLogger(__name__)
 _USER_LOGGER = logging.getLogger('user_logger')
@@ -267,8 +270,9 @@ class AnnotationAPIHandler(BaseAPIHandler):
             raise NotImplementedError("`name=string` is not supported yet for volume segmentation.")
         if isinstance(name, dict):
             if any(isinstance(k, tuple) for k in name.keys()):
-                raise NotImplementedError("For volume segmentations, `name` must be a dictionary with integer keys only.")
+                raise NotImplementedError(
+                    "For volume segmentations, `name` must be a dictionary with integer keys only.")
         # Prepare file for upload
         if isinstance(file_path, str):
             if file_path.endswith('.nii') or file_path.endswith('.nii.gz'):
@@ -892,7 +896,7 @@ class AnnotationAPIHandler(BaseAPIHandler):
                         dataset_id: Optional[str] = None,
                         worklist_id: Optional[str] = None,
                         status: Optional[Literal['new', 'published']] = None,
-                        load_ai_segmentations: bool = None,
+                        load_ai_segmentations: bool | None = None,
                         ) -> Generator[dict, None, None]:
         """
         Get annotations for a resource.
@@ -1098,6 +1102,29 @@ class AnnotationAPIHandler(BaseAPIHandler):
         resp = self._run_request(request_params)
         self._check_errors_response_json(resp)
+    def get_annotation_by_id(self, annotation_id: str) -> dict:
+        """
+        Get an annotation by its unique id.
+        Args:
+            annotation_id (str): The annotation unique id.
+        Returns:
+            dict: The annotation information.
+        """
+        request_params = {
+            'method': 'GET',
+            'url': f'{self.root_url}/annotations/{annotation_id}',
+        }
+        try:
+            resp = self._run_request(request_params)
+            return resp.json()
+        except HTTPError as e:
+            _LOGGER.error(f"Error getting annotation by id {annotation_id}: {e}")
+            raise
+    @deprecated(reason="Use download_segmentation_file instead")
     def get_segmentation_file(self, resource_id: str, annotation_id: str) -> bytes:
         request_params = {
             'method': 'GET',
@@ -1107,6 +1134,35 @@ class AnnotationAPIHandler(BaseAPIHandler):
         resp = self._run_request(request_params)
         return resp.content
+    def download_segmentation_file(self, annotation: str | dict, fpath_out: str | Path | None) -> bytes:
+        """
+        Download the segmentation file for a given resource and annotation.
+        Args:
+            annotation (str | dict): The annotation unique id or an annotation object.
+            fpath_out (str | None): (Optional) The file path to save the downloaded segmentation file.
+        Returns:
+            bytes: The content of the downloaded segmentation file in bytes format.
+        """
+        if isinstance(annotation, dict):
+            annotation_id = annotation['id']
+            resource_id = annotation['resource_id']
+        else:
+            annotation_id = annotation
+            resource_id = self.get_annotation_by_id(annotation_id)['resource_id']
+        request_params = {
+            'method': 'GET',
+            'url': f'{self.root_url}/annotations/{resource_id}/annotations/{annotation_id}/file',
+        }
+        resp = self._run_request(request_params)
+        if fpath_out is not None:
+            with open(str(fpath_out), 'wb') as f:
+                f.write(resp.content)
+        return resp.content
     def set_annotation_status(self,
                               project_id: str,
                               resource_id: str,
@@ -1124,3 +1180,69 @@ class AnnotationAPIHandler(BaseAPIHandler):
         }
         resp = self._run_request(request_params)
         self._check_errors_response_json(resp)
+    async def _async_download_segmentation_file(self,
+                                                annotation: str | dict,
+                                                save_path: str | Path,
+                                                session: aiohttp.ClientSession | None = None,
+                                                progress_bar: tqdm | None = None):
+        """
+        Asynchronously download a segmentation file.
+        Args:
+            annotation (str | dict): The annotation unique id or an annotation object.
+            save_path (str | Path): The path to save the file.
+            session (aiohttp.ClientSession): The aiohttp session to use for the request.
+            progress_bar (tqdm | None): Optional progress bar to update after download completion.
+        """
+        if isinstance(annotation, dict):
+            annotation_id = annotation['id']
+            resource_id = annotation['resource_id']
+        else:
+            annotation_id = annotation
+            # TODO: This is inefficient as it requires an extra API call per annotation
+            # Consider passing resource_id separately or caching annotation info
+            resource_id = self.get_annotation_by_id(annotation_id)['resource_id']
+        url = f'{self.root_url}/annotations/{resource_id}/annotations/{annotation_id}/file'
+        request_params = {
+            'method': 'GET',
+            'url': url
+        }
+        try:
+            data_bytes = await self._run_request_async(request_params, session, 'content')
+            with open(save_path, 'wb') as f:
+                f.write(data_bytes)
+            if progress_bar:
+                progress_bar.update(1)
+        except ResourceNotFoundError as e:
+            e.set_params('annotation', {'annotation_id': annotation_id})
+            raise e
+    def download_multiple_segmentations(self,
+                                        annotations: list[str | dict],
+                                        save_paths: list[str | Path] | str
+                                        ) -> None:
+        """
+        Download multiple segmentation files and save them to the specified paths.
+        Args:
+            annotations (list[str | dict]): A list of annotation unique ids or annotation objects.
+            save_paths (list[str | Path] | str): A list of paths to save the files or a directory path.
+        """
+        async def _download_all_async():
+            async with aiohttp.ClientSession() as session:
+                tasks = [
+                    self._async_download_segmentation_file(annotation, save_path=path, session=session, progress_bar=progress_bar)
+                    for annotation, path in zip(annotations, save_paths)
+                ]
+                await asyncio.gather(*tasks)
+        if isinstance(save_paths, str):
+            save_paths = [os.path.join(save_paths, f"{ann['id'] if isinstance(ann, dict) else ann}") for ann in annotations]
+        with tqdm(total=len(annotations), desc="Downloading segmentations", unit="file") as progress_bar:
+            loop = asyncio.get_event_loop()
+            loop.run_until_complete(_download_all_async())

datamint/apihandler/base_api_handler.py CHANGED Viewed

@@ -85,7 +85,7 @@ class BaseAPIHandler:
             msg = f"API key not provided! Use the environment variable " + \
                 f"{BaseAPIHandler.DATAMINT_API_VENV_NAME} or pass it as an argument."
             raise DatamintException(msg)
-        self.semaphore = asyncio.Semaphore(10)  # Limit to 10 parallel requests
+        self.semaphore = asyncio.Semaphore(20)
         if check_connection:
             self.check_connection()
@@ -157,30 +157,34 @@ class BaseAPIHandler:
     async def _run_request_async(self,
                                  request_args: dict,
                                  session: aiohttp.ClientSession | None = None,
-                                 data_to_get: str = 'json'):
+                                 data_to_get: Literal['json', 'text', 'content'] = 'json'):
         if session is None:
             async with aiohttp.ClientSession() as s:
-                return await self._run_request_async(request_args, s)
-        try:
-            _LOGGER.debug(f"Running request to {request_args['url']}")
-            _LOGGER.debug(f'Equivalent curl command: "{self._generate_curl_command(request_args)}"')
-        except Exception as e:
-            _LOGGER.debug(f"Error generating curl command: {e}")
-        # add apikey to the headers
-        if 'headers' not in request_args:
-            request_args['headers'] = {}
-        request_args['headers']['apikey'] = self.api_key
-        async with session.request(**request_args) as response:
-            self._check_errors_response(response, request_args)
-            if data_to_get == 'json':
-                return await response.json()
-            elif data_to_get == 'text':
-                return await response.text()
-            else:
-                raise ValueError("data_to_get must be either 'json' or 'text'")
+                return await self._run_request_async(request_args, s, data_to_get)
+        async with self.semaphore:
+            try:
+                _LOGGER.debug(f"Running request to {request_args['url']}")
+                _LOGGER.debug(f'Equivalent curl command: "{self._generate_curl_command(request_args)}"')
+            except Exception as e:
+                _LOGGER.debug(f"Error generating curl command: {e}")
+            # add apikey to the headers
+            if 'headers' not in request_args:
+                request_args['headers'] = {}
+            request_args['headers']['apikey'] = self.api_key
+            async with session.request(**request_args) as response:
+                self._check_errors_response(response, request_args)
+                if data_to_get == 'json':
+                    return await response.json()
+                elif data_to_get == 'text':
+                    return await response.text()
+                elif data_to_get == 'content':
+                    return await response.read()
+                else:
+                    raise ValueError("data_to_get must be either 'json' or 'text'")
     def _check_errors_response(self,
                                response,
@@ -237,9 +241,9 @@ class BaseAPIHandler:
         return f'{self.root_url}/{endpoint}'
     def _run_pagination_request(self,
-                                request_params: Dict,
-                                return_field: Optional[Union[str, List]] = None
-                                ) -> Generator[Dict, None, None]:
+                                request_params: dict,
+                                return_field: str | list | None = None
+                                ) -> Generator[dict | list, None, None]:
         offset = 0
         params = request_params.get('params', {})
         while True:

datamint/apihandler/root_api_handler.py CHANGED Viewed

@@ -219,36 +219,35 @@ class RootAPIHandler(BaseAPIHandler):
         async with aiohttp.ClientSession() as session:
             async def __upload_single_resource(file_path, segfiles: dict[str, list | dict], metadata_file: str | dict | None):
-                async with self.semaphore:
-                    rid = await self._upload_single_resource_async(
-                        file_path=file_path,
-                        mimetype=mimetype,
-                        anonymize=anonymize,
-                        anonymize_retain_codes=anonymize_retain_codes,
-                        tags=tags,
-                        session=session,
-                        mung_filename=mung_filename,
-                        channel=channel,
-                        modality=modality,
-                        publish=publish,
-                        metadata_file=metadata_file,
-                    )
-                    if segfiles is not None:
-                        fpaths = segfiles['files']
-                        names = segfiles.get('names', _infinite_gen(None))
-                        if isinstance(names, dict):
-                            names = _infinite_gen(names)
-                        frame_indices = segfiles.get('frame_index', _infinite_gen(None))
-                        for f, name, frame_index in tqdm(zip(fpaths, names, frame_indices),
-                                                         desc=f"Uploading segmentations for {file_path}",
-                                                         total=len(fpaths)):
-                            if f is not None:
-                                await self._upload_segmentations_async(rid,
-                                                                       file_path=f,
-                                                                       name=name,
-                                                                       frame_index=frame_index,
-                                                                       transpose_segmentation=transpose_segmentation)
-                    return rid
+                rid = await self._upload_single_resource_async(
+                    file_path=file_path,
+                    mimetype=mimetype,
+                    anonymize=anonymize,
+                    anonymize_retain_codes=anonymize_retain_codes,
+                    tags=tags,
+                    session=session,
+                    mung_filename=mung_filename,
+                    channel=channel,
+                    modality=modality,
+                    publish=publish,
+                    metadata_file=metadata_file,
+                )
+                if segfiles is not None:
+                    fpaths = segfiles['files']
+                    names = segfiles.get('names', _infinite_gen(None))
+                    if isinstance(names, dict):
+                        names = _infinite_gen(names)
+                    frame_indices = segfiles.get('frame_index', _infinite_gen(None))
+                    for f, name, frame_index in tqdm(zip(fpaths, names, frame_indices),
+                                                        desc=f"Uploading segmentations for {file_path}",
+                                                        total=len(fpaths)):
+                        if f is not None:
+                            await self._upload_segmentations_async(rid,
+                                                                    file_path=f,
+                                                                    name=name,
+                                                                    frame_index=frame_index,
+                                                                    transpose_segmentation=transpose_segmentation)
+                return rid
             tasks = [__upload_single_resource(f, segfiles, metadata_file)
                      for f, segfiles, metadata_file in zip(files_path, segmentation_files, metadata_files)]
@@ -365,6 +364,32 @@ class RootAPIHandler(BaseAPIHandler):
         return result[0]
+    @staticmethod
+    def _is_dicom_report(file_path: str | IO) -> bool:
+        """
+        Check if a DICOM file is a report (e.g., Structured Report).
+        Args:
+            file_path: Path to the DICOM file or file-like object.
+        Returns:
+            bool: True if the DICOM file is a report, False otherwise.
+        """
+        try:
+            if not is_dicom(file_path):
+                return False
+            ds = pydicom.dcmread(file_path, stop_before_pixels=True)
+            modality = getattr(ds, 'Modality', None)
+            # Common report modalities
+            report_modalities = {'SR', 'DOC', 'KO', 'PR', 'ESR'}  # SR=Structured Report, DOC=Document, KO=Key Object, PR=Presentation State
+            return modality in report_modalities
+        except Exception as e:
+            _LOGGER.debug(f"Error checking if DICOM is a report: {e}")
+            return False
     def upload_resources(self,
                          files_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset,
                          mimetype: Optional[str] = None,
@@ -380,7 +405,8 @@ class RootAPIHandler(BaseAPIHandler):
                          transpose_segmentation: bool = False,
                          modality: Optional[str] = None,
                          assemble_dicoms: bool = True,
-                         metadata: list[str | dict | None] | dict | str | None = None
+                         metadata: list[str | dict | None] | dict | str | None = None,
+                         discard_dicom_reports: bool = True
                          ) -> list[str | Exception] | str | Exception:
         """
         Upload resources.
@@ -417,6 +443,17 @@ class RootAPIHandler(BaseAPIHandler):
             list[str | Exception]: A list of resource IDs or errors.
         """
+        if discard_dicom_reports:
+            if isinstance(files_path, (str, Path)):
+                files_path = [files_path]
+            elif isinstance(files_path, pydicom.dataset.Dataset):
+                files_path = [files_path]
+            old_size = len(files_path)
+            files_path = [f for f in files_path if not RootAPIHandler._is_dicom_report(f)]
+            if old_size != len(files_path):
+                _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
         if on_error not in ['raise', 'skip']:
             raise ValueError("on_error must be either 'raise' or 'skip'")
@@ -445,7 +482,7 @@ class RootAPIHandler(BaseAPIHandler):
             segmentation_files = [segfiles if (isinstance(segfiles, dict) or segfiles is None) else {'files': segfiles}
                                   for segfiles in segmentation_files]
             for segfiles in segmentation_files:
                 if segfiles is None:
                     continue
@@ -454,7 +491,8 @@ class RootAPIHandler(BaseAPIHandler):
                 if 'names' in segfiles:
                     # same length as files
                     if isinstance(segfiles['names'], (list, tuple)) and len(segfiles['names']) != len(segfiles['files']):
-                        raise ValueError("segmentation_files['names'] must have the same length as segmentation_files['files'].")
+                        raise ValueError(
+                            "segmentation_files['names'] must have the same length as segmentation_files['files'].")
         loop = asyncio.get_event_loop()
         task = self._upload_resources_async(files_path=files_path,
@@ -699,7 +737,7 @@ class RootAPIHandler(BaseAPIHandler):
                       order_field: Optional[ResourceFields] = None,
                       order_ascending: Optional[bool] = None,
                       channel: Optional[str] = None,
-                      project_name: Optional[str] = None,
+                      project_name: str | list[str] | None = None,
                       filename: Optional[str] = None
                       ) -> Generator[dict, None, None]:
         """
@@ -717,6 +755,8 @@ class RootAPIHandler(BaseAPIHandler):
             return_ids_only (bool): Whether to return only the ids of the resources.
             order_field (Optional[ResourceFields]): The field to order the resources. See :data:`~.base_api_handler.ResourceFields`.
             order_ascending (Optional[bool]): Whether to order the resources in ascending order.
+            project_name (str | list[str] | None): The project name or a list of project names to filter resources by project.
+                If multiple projects are provided, resources will be filtered to include only those belonging to ALL of the specified projects.
         Returns:
             Generator[dict, None, None]: A generator of dictionaries with the resources information.
@@ -745,7 +785,10 @@ class RootAPIHandler(BaseAPIHandler):
             "filename": filename,
         }
         if project_name is not None:
-            payload["project"] = json.dumps({'items': [project_name], 'filterType': 'union'})
+            if isinstance(project_name, str):
+                project_name = [project_name]
+            payload["project"] = json.dumps({'items': project_name,
+                                             'filterType': 'intersection'})  # union or intersection
         if tags is not None:
             if isinstance(tags, str):
@@ -802,7 +845,7 @@ class RootAPIHandler(BaseAPIHandler):
         yield from self._run_pagination_request(request_params,
                                                 return_field='data')
-    def set_resource_tags(self,
+    def set_resource_tags(self,
                           resource_id: str,
                           tags: Sequence[str],
                           ):
@@ -824,6 +867,62 @@ class RootAPIHandler(BaseAPIHandler):
     def _has_status_code(e, status_code: int) -> bool:
         return hasattr(e, 'response') and (e.response is not None) and e.response.status_code == status_code
+    async def _async_download_file(self,
+                                   resource_id: str,
+                                   save_path: str,
+                                   session: aiohttp.ClientSession | None = None,
+                                   progress_bar: tqdm | None = None):
+        """
+        Asynchronously download a file from the server.
+        Args:
+            resource_id (str): The resource unique id.
+            save_path (str): The path to save the file.
+            session (aiohttp.ClientSession): The aiohttp session to use for the request.
+            progress_bar (tqdm | None): Optional progress bar to update after download completion.
+        """
+        url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
+        request_params = {
+            'method': 'GET',
+            'headers': {'accept': 'application/octet-stream'},
+            'url': url
+        }
+        try:
+            data_bytes = await self._run_request_async(request_params, session, 'content')
+            with open(save_path, 'wb') as f:
+                f.write(data_bytes)
+            if progress_bar:
+                progress_bar.update(1)
+        except ResourceNotFoundError as e:
+            e.set_params('resource', {'resource_id': resource_id})
+            raise e
+    def download_multiple_resources(self,
+                                    resource_ids: list[str],
+                                    save_path: list[str] | str
+                                    ) -> None:
+        """
+        Download multiple resources and save them to the specified paths.
+        Args:
+            resource_ids (list[str]): A list of resource unique ids.
+            save_path (list[str] | str): A list of paths to save the files or a directory path.
+        """
+        async def _download_all_async():
+            async with aiohttp.ClientSession() as session:
+                tasks = [
+                    self._async_download_file(resource_id, save_path=path, session=session, progress_bar=progress_bar)
+                    for resource_id, path in zip(resource_ids, save_path)
+                ]
+                await asyncio.gather(*tasks)
+        if isinstance(save_path, str):
+            save_path = [os.path.join(save_path, r) for r in resource_ids]
+        with tqdm(total=len(resource_ids), desc="Downloading resources", unit="file") as progress_bar:
+            loop = asyncio.get_event_loop()
+            loop.run_until_complete(_download_all_async())
     def download_resource_file(self,
                                resource_id: str,
                                save_path: Optional[str] = None,
@@ -982,6 +1081,7 @@ class RootAPIHandler(BaseAPIHandler):
         response = self._run_request(request_params)
         return response.json()['data']
+    @deprecated(version='1.7')
     def get_datasetsinfo_by_name(self, dataset_name: str) -> list[dict]:
         request_params = {
             'method': 'GET',
@@ -1076,6 +1176,30 @@ class RootAPIHandler(BaseAPIHandler):
         }
         return self._run_request(request_params).json()['data']
+    def get_project_resources(self, project_id: str) -> list[dict]:
+        """
+        Get the resources of a project by its id.
+        Args:
+            project_id (str): The project id.
+        Returns:
+            list[dict]: The list of resources in the project.
+        Raises:
+            ResourceNotFoundError: If the project does not exists.
+        """
+        request_params = {
+            'method': 'GET',
+            'url': f'{self.root_url}/projects/{project_id}/resources'
+        }
+        try:
+            return self._run_request(request_params).json()
+        except HTTPError as e:
+            if e.response is not None and e.response.status_code == 500:
+                raise ResourceNotFoundError('project', {'project_id': project_id})
+            raise e
     def create_project(self,
                        name: str,
                        description: str,

datamint 1.6.3.post1__py3-none-any.whl → 1.7.1__py3-none-any.whl

Potentially problematic release.

datamint 1.6.3.post1py3-none-any.whl → 1.7.1py3-none-any.whl