datamint 1.6.3.post1__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

@@ -13,6 +13,9 @@ from requests.exceptions import HTTPError
13
13
  from .dto.annotation_dto import CreateAnnotationDto, LineGeometry, BoxGeometry, CoordinateSystem, AnnotationType
14
14
  import pydicom
15
15
  import json
16
+ from deprecated import deprecated
17
+ from pathlib import Path
18
+ from tqdm.auto import tqdm
16
19
 
17
20
  _LOGGER = logging.getLogger(__name__)
18
21
  _USER_LOGGER = logging.getLogger('user_logger')
@@ -267,8 +270,9 @@ class AnnotationAPIHandler(BaseAPIHandler):
267
270
  raise NotImplementedError("`name=string` is not supported yet for volume segmentation.")
268
271
  if isinstance(name, dict):
269
272
  if any(isinstance(k, tuple) for k in name.keys()):
270
- raise NotImplementedError("For volume segmentations, `name` must be a dictionary with integer keys only.")
271
-
273
+ raise NotImplementedError(
274
+ "For volume segmentations, `name` must be a dictionary with integer keys only.")
275
+
272
276
  # Prepare file for upload
273
277
  if isinstance(file_path, str):
274
278
  if file_path.endswith('.nii') or file_path.endswith('.nii.gz'):
@@ -892,7 +896,7 @@ class AnnotationAPIHandler(BaseAPIHandler):
892
896
  dataset_id: Optional[str] = None,
893
897
  worklist_id: Optional[str] = None,
894
898
  status: Optional[Literal['new', 'published']] = None,
895
- load_ai_segmentations: bool = None,
899
+ load_ai_segmentations: bool | None = None,
896
900
  ) -> Generator[dict, None, None]:
897
901
  """
898
902
  Get annotations for a resource.
@@ -1098,6 +1102,29 @@ class AnnotationAPIHandler(BaseAPIHandler):
1098
1102
  resp = self._run_request(request_params)
1099
1103
  self._check_errors_response_json(resp)
1100
1104
 
1105
+ def get_annotation_by_id(self, annotation_id: str) -> dict:
1106
+ """
1107
+ Get an annotation by its unique id.
1108
+
1109
+ Args:
1110
+ annotation_id (str): The annotation unique id.
1111
+
1112
+ Returns:
1113
+ dict: The annotation information.
1114
+ """
1115
+ request_params = {
1116
+ 'method': 'GET',
1117
+ 'url': f'{self.root_url}/annotations/{annotation_id}',
1118
+ }
1119
+
1120
+ try:
1121
+ resp = self._run_request(request_params)
1122
+ return resp.json()
1123
+ except HTTPError as e:
1124
+ _LOGGER.error(f"Error getting annotation by id {annotation_id}: {e}")
1125
+ raise
1126
+
1127
+ @deprecated(reason="Use download_segmentation_file instead")
1101
1128
  def get_segmentation_file(self, resource_id: str, annotation_id: str) -> bytes:
1102
1129
  request_params = {
1103
1130
  'method': 'GET',
@@ -1107,6 +1134,35 @@ class AnnotationAPIHandler(BaseAPIHandler):
1107
1134
  resp = self._run_request(request_params)
1108
1135
  return resp.content
1109
1136
 
1137
+ def download_segmentation_file(self, annotation: str | dict, fpath_out: str | Path | None) -> bytes:
1138
+ """
1139
+ Download the segmentation file for a given resource and annotation.
1140
+
1141
+ Args:
1142
+ annotation (str | dict): The annotation unique id or an annotation object.
1143
+ fpath_out (str | None): (Optional) The file path to save the downloaded segmentation file.
1144
+
1145
+ Returns:
1146
+ bytes: The content of the downloaded segmentation file in bytes format.
1147
+ """
1148
+ if isinstance(annotation, dict):
1149
+ annotation_id = annotation['id']
1150
+ resource_id = annotation['resource_id']
1151
+ else:
1152
+ annotation_id = annotation
1153
+ resource_id = self.get_annotation_by_id(annotation_id)['resource_id']
1154
+
1155
+ request_params = {
1156
+ 'method': 'GET',
1157
+ 'url': f'{self.root_url}/annotations/{resource_id}/annotations/{annotation_id}/file',
1158
+ }
1159
+
1160
+ resp = self._run_request(request_params)
1161
+ if fpath_out is not None:
1162
+ with open(str(fpath_out), 'wb') as f:
1163
+ f.write(resp.content)
1164
+ return resp.content
1165
+
1110
1166
  def set_annotation_status(self,
1111
1167
  project_id: str,
1112
1168
  resource_id: str,
@@ -1124,3 +1180,69 @@ class AnnotationAPIHandler(BaseAPIHandler):
1124
1180
  }
1125
1181
  resp = self._run_request(request_params)
1126
1182
  self._check_errors_response_json(resp)
1183
+
1184
+
1185
+ async def _async_download_segmentation_file(self,
1186
+ annotation: str | dict,
1187
+ save_path: str | Path,
1188
+ session: aiohttp.ClientSession | None = None,
1189
+ progress_bar: tqdm | None = None):
1190
+ """
1191
+ Asynchronously download a segmentation file.
1192
+
1193
+ Args:
1194
+ annotation (str | dict): The annotation unique id or an annotation object.
1195
+ save_path (str | Path): The path to save the file.
1196
+ session (aiohttp.ClientSession): The aiohttp session to use for the request.
1197
+ progress_bar (tqdm | None): Optional progress bar to update after download completion.
1198
+ """
1199
+ if isinstance(annotation, dict):
1200
+ annotation_id = annotation['id']
1201
+ resource_id = annotation['resource_id']
1202
+ else:
1203
+ annotation_id = annotation
1204
+ # TODO: This is inefficient as it requires an extra API call per annotation
1205
+ # Consider passing resource_id separately or caching annotation info
1206
+ resource_id = self.get_annotation_by_id(annotation_id)['resource_id']
1207
+
1208
+ url = f'{self.root_url}/annotations/{resource_id}/annotations/{annotation_id}/file'
1209
+ request_params = {
1210
+ 'method': 'GET',
1211
+ 'url': url
1212
+ }
1213
+
1214
+ try:
1215
+ data_bytes = await self._run_request_async(request_params, session, 'content')
1216
+ with open(save_path, 'wb') as f:
1217
+ f.write(data_bytes)
1218
+ if progress_bar:
1219
+ progress_bar.update(1)
1220
+ except ResourceNotFoundError as e:
1221
+ e.set_params('annotation', {'annotation_id': annotation_id})
1222
+ raise e
1223
+
1224
+ def download_multiple_segmentations(self,
1225
+ annotations: list[str | dict],
1226
+ save_paths: list[str | Path] | str
1227
+ ) -> None:
1228
+ """
1229
+ Download multiple segmentation files and save them to the specified paths.
1230
+
1231
+ Args:
1232
+ annotations (list[str | dict]): A list of annotation unique ids or annotation objects.
1233
+ save_paths (list[str | Path] | str): A list of paths to save the files or a directory path.
1234
+ """
1235
+ async def _download_all_async():
1236
+ async with aiohttp.ClientSession() as session:
1237
+ tasks = [
1238
+ self._async_download_segmentation_file(annotation, save_path=path, session=session, progress_bar=progress_bar)
1239
+ for annotation, path in zip(annotations, save_paths)
1240
+ ]
1241
+ await asyncio.gather(*tasks)
1242
+
1243
+ if isinstance(save_paths, str):
1244
+ save_paths = [os.path.join(save_paths, f"{ann['id'] if isinstance(ann, dict) else ann}") for ann in annotations]
1245
+
1246
+ with tqdm(total=len(annotations), desc="Downloading segmentations", unit="file") as progress_bar:
1247
+ loop = asyncio.get_event_loop()
1248
+ loop.run_until_complete(_download_all_async())
@@ -85,7 +85,7 @@ class BaseAPIHandler:
85
85
  msg = f"API key not provided! Use the environment variable " + \
86
86
  f"{BaseAPIHandler.DATAMINT_API_VENV_NAME} or pass it as an argument."
87
87
  raise DatamintException(msg)
88
- self.semaphore = asyncio.Semaphore(10) # Limit to 10 parallel requests
88
+ self.semaphore = asyncio.Semaphore(20)
89
89
 
90
90
  if check_connection:
91
91
  self.check_connection()
@@ -157,30 +157,34 @@ class BaseAPIHandler:
157
157
  async def _run_request_async(self,
158
158
  request_args: dict,
159
159
  session: aiohttp.ClientSession | None = None,
160
- data_to_get: str = 'json'):
160
+ data_to_get: Literal['json', 'text', 'content'] = 'json'):
161
161
  if session is None:
162
162
  async with aiohttp.ClientSession() as s:
163
- return await self._run_request_async(request_args, s)
164
- try:
165
- _LOGGER.debug(f"Running request to {request_args['url']}")
166
- _LOGGER.debug(f'Equivalent curl command: "{self._generate_curl_command(request_args)}"')
167
- except Exception as e:
168
- _LOGGER.debug(f"Error generating curl command: {e}")
169
-
170
- # add apikey to the headers
171
- if 'headers' not in request_args:
172
- request_args['headers'] = {}
173
-
174
- request_args['headers']['apikey'] = self.api_key
175
-
176
- async with session.request(**request_args) as response:
177
- self._check_errors_response(response, request_args)
178
- if data_to_get == 'json':
179
- return await response.json()
180
- elif data_to_get == 'text':
181
- return await response.text()
182
- else:
183
- raise ValueError("data_to_get must be either 'json' or 'text'")
163
+ return await self._run_request_async(request_args, s, data_to_get)
164
+
165
+ async with self.semaphore:
166
+ try:
167
+ _LOGGER.debug(f"Running request to {request_args['url']}")
168
+ _LOGGER.debug(f'Equivalent curl command: "{self._generate_curl_command(request_args)}"')
169
+ except Exception as e:
170
+ _LOGGER.debug(f"Error generating curl command: {e}")
171
+
172
+ # add apikey to the headers
173
+ if 'headers' not in request_args:
174
+ request_args['headers'] = {}
175
+
176
+ request_args['headers']['apikey'] = self.api_key
177
+
178
+ async with session.request(**request_args) as response:
179
+ self._check_errors_response(response, request_args)
180
+ if data_to_get == 'json':
181
+ return await response.json()
182
+ elif data_to_get == 'text':
183
+ return await response.text()
184
+ elif data_to_get == 'content':
185
+ return await response.read()
186
+ else:
187
+ raise ValueError("data_to_get must be either 'json' or 'text'")
184
188
 
185
189
  def _check_errors_response(self,
186
190
  response,
@@ -237,9 +241,9 @@ class BaseAPIHandler:
237
241
  return f'{self.root_url}/{endpoint}'
238
242
 
239
243
  def _run_pagination_request(self,
240
- request_params: Dict,
241
- return_field: Optional[Union[str, List]] = None
242
- ) -> Generator[Dict, None, None]:
244
+ request_params: dict,
245
+ return_field: str | list | None = None
246
+ ) -> Generator[dict | list, None, None]:
243
247
  offset = 0
244
248
  params = request_params.get('params', {})
245
249
  while True:
@@ -219,36 +219,35 @@ class RootAPIHandler(BaseAPIHandler):
219
219
 
220
220
  async with aiohttp.ClientSession() as session:
221
221
  async def __upload_single_resource(file_path, segfiles: dict[str, list | dict], metadata_file: str | dict | None):
222
- async with self.semaphore:
223
- rid = await self._upload_single_resource_async(
224
- file_path=file_path,
225
- mimetype=mimetype,
226
- anonymize=anonymize,
227
- anonymize_retain_codes=anonymize_retain_codes,
228
- tags=tags,
229
- session=session,
230
- mung_filename=mung_filename,
231
- channel=channel,
232
- modality=modality,
233
- publish=publish,
234
- metadata_file=metadata_file,
235
- )
236
- if segfiles is not None:
237
- fpaths = segfiles['files']
238
- names = segfiles.get('names', _infinite_gen(None))
239
- if isinstance(names, dict):
240
- names = _infinite_gen(names)
241
- frame_indices = segfiles.get('frame_index', _infinite_gen(None))
242
- for f, name, frame_index in tqdm(zip(fpaths, names, frame_indices),
243
- desc=f"Uploading segmentations for {file_path}",
244
- total=len(fpaths)):
245
- if f is not None:
246
- await self._upload_segmentations_async(rid,
247
- file_path=f,
248
- name=name,
249
- frame_index=frame_index,
250
- transpose_segmentation=transpose_segmentation)
251
- return rid
222
+ rid = await self._upload_single_resource_async(
223
+ file_path=file_path,
224
+ mimetype=mimetype,
225
+ anonymize=anonymize,
226
+ anonymize_retain_codes=anonymize_retain_codes,
227
+ tags=tags,
228
+ session=session,
229
+ mung_filename=mung_filename,
230
+ channel=channel,
231
+ modality=modality,
232
+ publish=publish,
233
+ metadata_file=metadata_file,
234
+ )
235
+ if segfiles is not None:
236
+ fpaths = segfiles['files']
237
+ names = segfiles.get('names', _infinite_gen(None))
238
+ if isinstance(names, dict):
239
+ names = _infinite_gen(names)
240
+ frame_indices = segfiles.get('frame_index', _infinite_gen(None))
241
+ for f, name, frame_index in tqdm(zip(fpaths, names, frame_indices),
242
+ desc=f"Uploading segmentations for {file_path}",
243
+ total=len(fpaths)):
244
+ if f is not None:
245
+ await self._upload_segmentations_async(rid,
246
+ file_path=f,
247
+ name=name,
248
+ frame_index=frame_index,
249
+ transpose_segmentation=transpose_segmentation)
250
+ return rid
252
251
 
253
252
  tasks = [__upload_single_resource(f, segfiles, metadata_file)
254
253
  for f, segfiles, metadata_file in zip(files_path, segmentation_files, metadata_files)]
@@ -445,7 +444,7 @@ class RootAPIHandler(BaseAPIHandler):
445
444
 
446
445
  segmentation_files = [segfiles if (isinstance(segfiles, dict) or segfiles is None) else {'files': segfiles}
447
446
  for segfiles in segmentation_files]
448
-
447
+
449
448
  for segfiles in segmentation_files:
450
449
  if segfiles is None:
451
450
  continue
@@ -454,7 +453,8 @@ class RootAPIHandler(BaseAPIHandler):
454
453
  if 'names' in segfiles:
455
454
  # same length as files
456
455
  if isinstance(segfiles['names'], (list, tuple)) and len(segfiles['names']) != len(segfiles['files']):
457
- raise ValueError("segmentation_files['names'] must have the same length as segmentation_files['files'].")
456
+ raise ValueError(
457
+ "segmentation_files['names'] must have the same length as segmentation_files['files'].")
458
458
 
459
459
  loop = asyncio.get_event_loop()
460
460
  task = self._upload_resources_async(files_path=files_path,
@@ -699,7 +699,7 @@ class RootAPIHandler(BaseAPIHandler):
699
699
  order_field: Optional[ResourceFields] = None,
700
700
  order_ascending: Optional[bool] = None,
701
701
  channel: Optional[str] = None,
702
- project_name: Optional[str] = None,
702
+ project_name: str | list[str] | None = None,
703
703
  filename: Optional[str] = None
704
704
  ) -> Generator[dict, None, None]:
705
705
  """
@@ -717,6 +717,8 @@ class RootAPIHandler(BaseAPIHandler):
717
717
  return_ids_only (bool): Whether to return only the ids of the resources.
718
718
  order_field (Optional[ResourceFields]): The field to order the resources. See :data:`~.base_api_handler.ResourceFields`.
719
719
  order_ascending (Optional[bool]): Whether to order the resources in ascending order.
720
+ project_name (str | list[str] | None): The project name or a list of project names to filter resources by project.
721
+ If multiple projects are provided, resources will be filtered to include only those belonging to ALL of the specified projects.
720
722
 
721
723
  Returns:
722
724
  Generator[dict, None, None]: A generator of dictionaries with the resources information.
@@ -745,7 +747,10 @@ class RootAPIHandler(BaseAPIHandler):
745
747
  "filename": filename,
746
748
  }
747
749
  if project_name is not None:
748
- payload["project"] = json.dumps({'items': [project_name], 'filterType': 'union'})
750
+ if isinstance(project_name, str):
751
+ project_name = [project_name]
752
+ payload["project"] = json.dumps({'items': project_name,
753
+ 'filterType': 'intersection'}) # union or intersection
749
754
 
750
755
  if tags is not None:
751
756
  if isinstance(tags, str):
@@ -802,7 +807,7 @@ class RootAPIHandler(BaseAPIHandler):
802
807
  yield from self._run_pagination_request(request_params,
803
808
  return_field='data')
804
809
 
805
- def set_resource_tags(self,
810
+ def set_resource_tags(self,
806
811
  resource_id: str,
807
812
  tags: Sequence[str],
808
813
  ):
@@ -824,6 +829,62 @@ class RootAPIHandler(BaseAPIHandler):
824
829
  def _has_status_code(e, status_code: int) -> bool:
825
830
  return hasattr(e, 'response') and (e.response is not None) and e.response.status_code == status_code
826
831
 
832
+ async def _async_download_file(self,
833
+ resource_id: str,
834
+ save_path: str,
835
+ session: aiohttp.ClientSession | None = None,
836
+ progress_bar: tqdm | None = None):
837
+ """
838
+ Asynchronously download a file from the server.
839
+
840
+ Args:
841
+ resource_id (str): The resource unique id.
842
+ save_path (str): The path to save the file.
843
+ session (aiohttp.ClientSession): The aiohttp session to use for the request.
844
+ progress_bar (tqdm | None): Optional progress bar to update after download completion.
845
+ """
846
+ url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
847
+ request_params = {
848
+ 'method': 'GET',
849
+ 'headers': {'accept': 'application/octet-stream'},
850
+ 'url': url
851
+ }
852
+ try:
853
+ data_bytes = await self._run_request_async(request_params, session, 'content')
854
+ with open(save_path, 'wb') as f:
855
+ f.write(data_bytes)
856
+ if progress_bar:
857
+ progress_bar.update(1)
858
+ except ResourceNotFoundError as e:
859
+ e.set_params('resource', {'resource_id': resource_id})
860
+ raise e
861
+
862
+ def download_multiple_resources(self,
863
+ resource_ids: list[str],
864
+ save_path: list[str] | str
865
+ ) -> None:
866
+ """
867
+ Download multiple resources and save them to the specified paths.
868
+
869
+ Args:
870
+ resource_ids (list[str]): A list of resource unique ids.
871
+ save_path (list[str] | str): A list of paths to save the files or a directory path.
872
+ """
873
+ async def _download_all_async():
874
+ async with aiohttp.ClientSession() as session:
875
+ tasks = [
876
+ self._async_download_file(resource_id, save_path=path, session=session, progress_bar=progress_bar)
877
+ for resource_id, path in zip(resource_ids, save_path)
878
+ ]
879
+ await asyncio.gather(*tasks)
880
+
881
+ if isinstance(save_path, str):
882
+ save_path = [os.path.join(save_path, r) for r in resource_ids]
883
+
884
+ with tqdm(total=len(resource_ids), desc="Downloading resources", unit="file") as progress_bar:
885
+ loop = asyncio.get_event_loop()
886
+ loop.run_until_complete(_download_all_async())
887
+
827
888
  def download_resource_file(self,
828
889
  resource_id: str,
829
890
  save_path: Optional[str] = None,
@@ -982,6 +1043,7 @@ class RootAPIHandler(BaseAPIHandler):
982
1043
  response = self._run_request(request_params)
983
1044
  return response.json()['data']
984
1045
 
1046
+ @deprecated(version='1.7')
985
1047
  def get_datasetsinfo_by_name(self, dataset_name: str) -> list[dict]:
986
1048
  request_params = {
987
1049
  'method': 'GET',
@@ -1076,6 +1138,30 @@ class RootAPIHandler(BaseAPIHandler):
1076
1138
  }
1077
1139
  return self._run_request(request_params).json()['data']
1078
1140
 
1141
+ def get_project_resources(self, project_id: str) -> list[dict]:
1142
+ """
1143
+ Get the resources of a project by its id.
1144
+
1145
+ Args:
1146
+ project_id (str): The project id.
1147
+
1148
+ Returns:
1149
+ list[dict]: The list of resources in the project.
1150
+
1151
+ Raises:
1152
+ ResourceNotFoundError: If the project does not exists.
1153
+ """
1154
+ request_params = {
1155
+ 'method': 'GET',
1156
+ 'url': f'{self.root_url}/projects/{project_id}/resources'
1157
+ }
1158
+ try:
1159
+ return self._run_request(request_params).json()
1160
+ except HTTPError as e:
1161
+ if e.response is not None and e.response.status_code == 500:
1162
+ raise ResourceNotFoundError('project', {'project_id': project_id})
1163
+ raise e
1164
+
1079
1165
  def create_project(self,
1080
1166
  name: str,
1081
1167
  description: str,