datamint 1.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

@@ -0,0 +1,1013 @@
1
+ from typing import Optional, IO, Sequence, Literal, Generator, Union, Any
2
+ import os
3
+ import io
4
+ import pydicom.dataset
5
+ from requests.exceptions import HTTPError
6
+ import logging
7
+ import asyncio
8
+ import aiohttp
9
+ from datamintapi.utils.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
10
+ from datamintapi.utils import dicom_utils
11
+ import pydicom
12
+ from pathlib import Path
13
+ from datetime import date
14
+ import mimetypes
15
+ from PIL import Image
16
+ import cv2
17
+ from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
18
+ from datamintapi import configs
19
+ from .base_api_handler import BaseAPIHandler, DatamintException, ResourceNotFoundError, ResourceFields, ResourceStatus
20
+ from deprecated.sphinx import deprecated
21
+ import json
22
+ import itertools
23
+ from tqdm.auto import tqdm
24
+
25
+ _LOGGER = logging.getLogger(__name__)
26
+ _USER_LOGGER = logging.getLogger('user_logger')
27
+
28
+
29
+ def _is_io_object(obj):
30
+ """
31
+ Check if an object is a file-like object.
32
+ """
33
+ return callable(getattr(obj, "read", None))
34
+
35
+
36
+ def _infinite_gen(x):
37
+ while True:
38
+ yield x
39
+
40
+
41
+ def _open_io(file_path: str | Path | IO, mode: str = 'rb') -> IO:
42
+ if isinstance(file_path, str) or isinstance(file_path, Path):
43
+ return open(file_path, 'rb')
44
+ return file_path
45
+
46
+
47
+ class RootAPIHandler(BaseAPIHandler):
48
+ """
49
+ Class to handle the API requests to the Datamint API
50
+ """
51
+ DATAMINT_API_VENV_NAME = configs.ENV_VARS[configs.APIKEY_KEY]
52
+ ENDPOINT_RESOURCES = 'resources'
53
+ ENDPOINT_CHANNELS = f'{ENDPOINT_RESOURCES}/channels'
54
+
55
+ async def _upload_single_resource_async(self,
56
+ file_path: str | IO,
57
+ mimetype: Optional[str] = None,
58
+ anonymize: bool = False,
59
+ anonymize_retain_codes: Sequence[tuple] = [],
60
+ tags: list[str] = None,
61
+ mung_filename: Sequence[int] | Literal['all'] = None,
62
+ channel: Optional[str] = None,
63
+ session=None,
64
+ modality: Optional[str] = None,
65
+ publish: bool = False,
66
+ ) -> str:
67
+ if _is_io_object(file_path):
68
+ name = file_path.name
69
+ else:
70
+ name = file_path
71
+
72
+ if session is not None and not isinstance(session, aiohttp.ClientSession):
73
+ raise ValueError("session must be an aiohttp.ClientSession object.")
74
+
75
+ name = os.path.expanduser(os.path.normpath(name))
76
+ if len(Path(name).parts) == 0:
77
+ raise ValueError(f"File path '{name}' is not valid.")
78
+ name = os.path.join(*[x if x != '..' else '_' for x in Path(name).parts])
79
+
80
+ if mung_filename is not None:
81
+ file_parts = Path(name).parts
82
+ if file_parts[0] == os.path.sep:
83
+ file_parts = file_parts[1:]
84
+ if mung_filename == 'all':
85
+ new_file_path = '_'.join(file_parts)
86
+ else:
87
+ folder_parts = file_parts[:-1]
88
+ new_file_path = '_'.join([folder_parts[i-1] for i in mung_filename if i <= len(folder_parts)])
89
+ new_file_path += '_' + file_parts[-1]
90
+ name = new_file_path
91
+ _LOGGER.debug(f"New file path: {name}")
92
+
93
+ if mimetype is None:
94
+ mimetype = mimetypes.guess_type(name)[0]
95
+ is_a_dicom_file = None
96
+ if mimetype is None:
97
+ is_a_dicom_file = is_dicom(name) or is_dicom(file_path)
98
+ if is_a_dicom_file:
99
+ mimetype = 'application/dicom'
100
+
101
+ filename = os.path.basename(name)
102
+ _LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
103
+
104
+ if anonymize:
105
+ if is_a_dicom_file == True or is_dicom(file_path):
106
+ ds = pydicom.dcmread(file_path)
107
+ _LOGGER.info(f"Anonymizing {file_path}")
108
+ ds = anonymize_dicom(ds, retain_codes=anonymize_retain_codes)
109
+ # make the dicom `ds` object a file-like object in order to avoid unnecessary disk writes
110
+ f = to_bytesio(ds, name)
111
+ else:
112
+ _LOGGER.warning(f"File {file_path} is not a dicom file. Skipping anonymization.")
113
+ f = _open_io(file_path)
114
+ else:
115
+ f = _open_io(file_path)
116
+
117
+ try:
118
+ form = aiohttp.FormData()
119
+ url = self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)
120
+ file_key = 'resource'
121
+ form.add_field('source', 'api')
122
+
123
+ form.add_field(file_key, f, filename=filename, content_type=mimetype)
124
+ form.add_field('source_filepath', name) # full path to the file
125
+ if mimetype is not None:
126
+ form.add_field('mimetype', mimetype)
127
+ if channel is not None:
128
+ form.add_field('channel', channel)
129
+ if modality is not None:
130
+ form.add_field('modality', modality)
131
+ # form.add_field('bypass_inbox', 'true' if publish else 'false') # Does not work!
132
+ if tags is not None and len(tags) > 0:
133
+ # comma separated list of tags
134
+ tags = ','.join([l.strip() for l in tags])
135
+ form.add_field('tags', tags)
136
+
137
+ request_params = {
138
+ 'method': 'POST',
139
+ 'url': url,
140
+ 'data': form
141
+ }
142
+
143
+ resp_data = await self._run_request_async(request_params, session)
144
+ if 'error' in resp_data:
145
+ raise DatamintException(resp_data['error'])
146
+ _LOGGER.info(f"Response on uploading {name}: {resp_data}")
147
+
148
+ _USER_LOGGER.info(f'"{name}" uploaded')
149
+ return resp_data['id']
150
+ except Exception as e:
151
+ if 'name' in locals():
152
+ _LOGGER.error(f"Error uploading {name}: {e}")
153
+ else:
154
+ _LOGGER.error(f"Error uploading {file_path}: {e}")
155
+ raise e
156
+ finally:
157
+ f.close()
158
+
159
+ async def _upload_resources_async(self,
160
+ files_path: Sequence[str | IO],
161
+ mimetype: Optional[str] = None,
162
+ batch_id: Optional[str] = None,
163
+ anonymize: bool = False,
164
+ anonymize_retain_codes: Sequence[tuple] = [],
165
+ on_error: Literal['raise', 'skip'] = 'raise',
166
+ tags=None,
167
+ mung_filename: Sequence[int] | Literal['all'] = None,
168
+ channel: Optional[str] = None,
169
+ modality: Optional[str] = None,
170
+ publish: bool = False,
171
+ segmentation_files: Optional[list[dict]] = None,
172
+ transpose_segmentation: bool = False,
173
+ ) -> list[str]:
174
+ if on_error not in ['raise', 'skip']:
175
+ raise ValueError("on_error must be either 'raise' or 'skip'")
176
+
177
+ if segmentation_files is None:
178
+ segmentation_files = _infinite_gen(None)
179
+
180
+ async with aiohttp.ClientSession() as session:
181
+ async def __upload_single_resource(file_path, segfiles: dict):
182
+ async with self.semaphore:
183
+ rid = await self._upload_single_resource_async(
184
+ file_path=file_path,
185
+ mimetype=mimetype,
186
+ anonymize=anonymize,
187
+ anonymize_retain_codes=anonymize_retain_codes,
188
+ tags=tags,
189
+ session=session,
190
+ mung_filename=mung_filename,
191
+ channel=channel,
192
+ modality=modality,
193
+ publish=publish,
194
+ )
195
+ if segfiles is not None:
196
+ fpaths = segfiles['files']
197
+ names = segfiles.get('names', _infinite_gen(None))
198
+ if isinstance(names, dict):
199
+ names = _infinite_gen(names)
200
+ frame_indices = segfiles.get('frame_index', _infinite_gen(None))
201
+ _LOGGER.debug(f"Segmentation files: {fpaths}")
202
+ for f, name, frame_index in zip(fpaths, names, frame_indices):
203
+ if f is not None:
204
+ await self._upload_segmentations_async(rid,
205
+ file_path=f,
206
+ name=name,
207
+ frame_index=frame_index,
208
+ transpose_segmentation=transpose_segmentation)
209
+ return rid
210
+
211
+ tasks = [__upload_single_resource(f, segfiles) for f, segfiles in zip(files_path, segmentation_files)]
212
+ return await asyncio.gather(*tasks, return_exceptions=on_error == 'skip')
213
+
214
+ def _assemble_dicoms(self, files_path: Sequence[str | IO]) -> tuple[Sequence[str | IO], bool]:
215
+ dicoms_files_path = []
216
+ other_files_path = []
217
+ for f in files_path:
218
+ if is_dicom(f):
219
+ dicoms_files_path.append(f)
220
+ else:
221
+ other_files_path.append(f)
222
+
223
+ orig_len = len(dicoms_files_path)
224
+ dicoms_files_path = dicom_utils.assemble_dicoms(dicoms_files_path, return_as_IO=True)
225
+
226
+ new_len = len(dicoms_files_path)
227
+ if new_len != orig_len:
228
+ _LOGGER.info(f"Assembled {new_len} dicom files out of {orig_len} files.")
229
+ files_path = itertools.chain(dicoms_files_path, other_files_path)
230
+ assembled = True
231
+ else:
232
+ assembled = False
233
+
234
+ return files_path, assembled
235
+
236
+ def upload_resources(self,
237
+ files_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset,
238
+ mimetype: Optional[str] = None,
239
+ anonymize: bool = False,
240
+ anonymize_retain_codes: Sequence[tuple] = [],
241
+ on_error: Literal['raise', 'skip'] = 'raise',
242
+ labels=None,
243
+ tags: Optional[Sequence[str]] = None,
244
+ mung_filename: Sequence[int] | Literal['all'] = None,
245
+ channel: Optional[str] = None,
246
+ publish: bool = False,
247
+ publish_to: Optional[str] = None,
248
+ segmentation_files: Optional[list[Union[list[str], dict]]] = None,
249
+ transpose_segmentation: bool = False,
250
+ modality: Optional[str] = None,
251
+ assemble_dicoms: bool = True
252
+ ) -> list[str | Exception] | str | Exception:
253
+ """
254
+ Upload resources.
255
+
256
+ Args:
257
+ files_path (str | IO | Sequence[str | IO]): The path to the resource file or a list of paths to resources files.
258
+ mimetype (str): The mimetype of the resources. If None, it will be guessed.
259
+ anonymize (bool): Whether to anonymize the dicoms or not.
260
+ anonymize_retain_codes (Sequence[tuple]): The tags to retain when anonymizing the dicoms.
261
+ on_error (Literal['raise', 'skip']): Whether to raise an exception when an error occurs or to skip the error.
262
+ labels:
263
+ .. deprecated:: 0.11.0
264
+ Use `tags` instead.
265
+ tags (Optional[Sequence[str]]): The tags to add to the resources.
266
+ mung_filename (Sequence[int] | Literal['all']): The parts of the filepath to keep when renaming the resource file.
267
+ ''all'' keeps all parts.
268
+ channel (Optional[str]): The channel to upload the resources to. An arbitrary name to group the resources.
269
+ publish (bool): Whether to directly publish the resources or not. They will have the 'published' status.
270
+ publish_to (Optional[str]): The project name or id to publish the resources to.
271
+ They will have the 'published' status and will be added to the project.
272
+ If this is set, `publish` parameter is ignored.
273
+ segmentation_files (Optional[list[Union[list[str], dict]]]): The segmentation files to upload.
274
+ transpose_segmentation (bool): Whether to transpose the segmentation files or not.
275
+ modality (Optional[str]): The modality of the resources.
276
+ assemble_dicoms (bool): Whether to assemble the dicom files or not based on the SOPInstanceUID and InstanceNumber attributes.
277
+
278
+ Raises:
279
+ ResourceNotFoundError: If `publish_to` is supplied, and the project does not exists.
280
+
281
+ Returns:
282
+ list[str]: The list of new created dicom_ids.
283
+ """
284
+
285
+ if on_error not in ['raise', 'skip']:
286
+ raise ValueError("on_error must be either 'raise' or 'skip'")
287
+ if labels is not None and tags is None:
288
+ tags = labels
289
+
290
+ files_path, is_list = RootAPIHandler.__process_files_parameter(files_path)
291
+ if assemble_dicoms:
292
+ files_path, assembled = self._assemble_dicoms(files_path)
293
+ assemble_dicoms = assembled
294
+
295
+ if segmentation_files is not None:
296
+ if assemble_dicoms:
297
+ raise NotImplementedError("Segmentation files cannot be uploaded when assembling dicoms yet.")
298
+ if is_list:
299
+ if len(segmentation_files) != len(files_path):
300
+ raise ValueError("The number of segmentation files must match the number of resources.")
301
+ else:
302
+ if isinstance(segmentation_files, list) and isinstance(segmentation_files[0], list):
303
+ raise ValueError("segmentation_files should not be a list of lists if files_path is not a list.")
304
+ if isinstance(segmentation_files, dict):
305
+ segmentation_files = [segmentation_files]
306
+
307
+ segmentation_files = [segfiles if (isinstance(segfiles, dict) or segfiles is None) else {'files': segfiles}
308
+ for segfiles in segmentation_files]
309
+ loop = asyncio.get_event_loop()
310
+ task = self._upload_resources_async(files_path=files_path,
311
+ mimetype=mimetype,
312
+ anonymize=anonymize,
313
+ anonymize_retain_codes=anonymize_retain_codes,
314
+ on_error=on_error,
315
+ tags=tags,
316
+ mung_filename=mung_filename,
317
+ channel=channel,
318
+ publish=publish,
319
+ segmentation_files=segmentation_files,
320
+ transpose_segmentation=transpose_segmentation,
321
+ modality=modality,
322
+ )
323
+
324
+ resource_ids = loop.run_until_complete(task)
325
+ _LOGGER.info(f"Resources uploaded: {resource_ids}")
326
+
327
+ if publish_to is not None or publish:
328
+ _USER_LOGGER.info('Publishing resources')
329
+ resource_ids_succ = [rid for rid in resource_ids if not isinstance(rid, Exception)]
330
+ try:
331
+ self.publish_resources(resource_ids_succ, publish_to)
332
+ except Exception as e:
333
+ _LOGGER.error(f"Error publishing resources: {e}")
334
+ if on_error == 'raise':
335
+ raise e
336
+
337
+ if is_list:
338
+ return resource_ids
339
+ return resource_ids[0]
340
+
341
+ def publish_resources(self,
342
+ resource_ids: Union[str, Sequence[str]],
343
+ project_name: Optional[str] = None,
344
+ ) -> None:
345
+ """
346
+ Publish a resource, chaging its status to 'published'.
347
+
348
+ Args:
349
+ resource_ids (str|Sequence[str]): The resource unique id or a list of resource unique ids.
350
+ project_name (str): The project name or id to publish the resource to.
351
+
352
+ Raises:
353
+ ResourceNotFoundError: If the resource does not exists or the project does not exists.
354
+
355
+ """
356
+ if isinstance(resource_ids, str):
357
+ resource_ids = [resource_ids]
358
+
359
+ for resource_id in resource_ids:
360
+ params = {
361
+ 'method': 'POST',
362
+ 'url': f'{self.root_url}/resources/{resource_id}/publish',
363
+ }
364
+
365
+ try:
366
+ self._run_request(params)
367
+ except ResourceNotFoundError as e:
368
+ e.set_params('resource', {'resource_id': resource_id})
369
+ raise e
370
+ except HTTPError as e:
371
+ if project_name is None and BaseAPIHandler._has_status_code(e, 400) and 'Resource must be in inbox status to be approved' in e.response.text:
372
+ _LOGGER.warning(f"Resource {resource_id} is not in inbox status. Skipping publishing")
373
+ else:
374
+ raise e
375
+
376
+ if project_name is None:
377
+ return
378
+
379
+ # get the project id by its name
380
+ project = self.get_project_by_name(project_name)
381
+ if 'error' in project:
382
+ raise ResourceNotFoundError('project', {'project_name': project_name})
383
+
384
+ dataset_id = project['dataset_id']
385
+
386
+ params = {
387
+ 'method': 'POST',
388
+ 'url': f'{self.root_url}/datasets/{dataset_id}/resources',
389
+ 'json': {'resource_ids_to_add': resource_ids, 'all_files_selected': False}
390
+ }
391
+
392
+ self._run_request(params)
393
+
394
+ def get_project_by_id(self, project_id: str) -> dict:
395
+ """
396
+ Get a project by its id.
397
+
398
+ Args:
399
+ project_id (str): The project id.
400
+
401
+ Returns:
402
+ dict: The project information.
403
+
404
+ Raises:
405
+ ResourceNotFoundError: If the project does not exists.
406
+ """
407
+ try:
408
+ request_params = {
409
+ 'method': 'GET',
410
+ 'url': f'{self.root_url}/projects/{project_id}',
411
+ }
412
+ return self._run_request(request_params).json()
413
+ except HTTPError as e:
414
+ if e.response is not None and e.response.status_code == 500:
415
+ raise ResourceNotFoundError('project', {'project_id': project_id})
416
+ raise e
417
+
418
+ def get_project_by_name(self, project_name: str) -> dict:
419
+ """
420
+ Get a project by its name.
421
+
422
+ Args:
423
+ project_name (str): The project name.
424
+
425
+ Returns:
426
+ dict: The project information.
427
+
428
+ Raises:
429
+ ResourceNotFoundError: If the project does not exists.
430
+ """
431
+ try:
432
+ all_projects = self.get_projects()
433
+ for project in all_projects:
434
+ if project['name'] == project_name or project['id'] == project_name:
435
+ return project
436
+ return {'error': 'No project with specified name found',
437
+ 'all_projects': [project['name'] for project in all_projects]}
438
+
439
+ except ResourceNotFoundError as e:
440
+ e.set_params('project', {'project_name': project_name})
441
+ raise e
442
+
443
+ @staticmethod
444
+ def __process_files_parameter(file_path: str | IO | Sequence[str | IO] | pydicom.dataset.Dataset) -> tuple[Sequence[str | IO], bool]:
445
+ if isinstance(file_path, pydicom.dataset.Dataset):
446
+ file_path = to_bytesio(file_path, file_path.filename)
447
+
448
+ if isinstance(file_path, str):
449
+ if os.path.isdir(file_path):
450
+ is_list = True
451
+ file_path = [f'{file_path}/{f}' for f in os.listdir(file_path)]
452
+ else:
453
+ is_list = False
454
+ file_path = [file_path]
455
+ # Check if is an IO object
456
+ elif _is_io_object(file_path):
457
+ is_list = False
458
+ file_path = [file_path]
459
+ elif not hasattr(file_path, '__len__'):
460
+ if hasattr(file_path, '__iter__'):
461
+ is_list = True
462
+ file_path = list(file_path)
463
+ else:
464
+ is_list = False
465
+ file_path = [file_path]
466
+ else:
467
+ is_list = True
468
+
469
+ return file_path, is_list
470
+
471
+ def get_resources_by_ids(self, ids: str | Sequence[str]) -> dict[str, Any] | Sequence[dict[str, Any]]:
472
+ """
473
+ Get resources by their unique ids.
474
+
475
+ Args:
476
+ ids (str | Sequence[str]): The resource unique id or a list of resource unique ids.
477
+
478
+ Returns:
479
+ dict | Sequence[dict]: The resource information or a list of resource information.
480
+
481
+ Raises:
482
+ ResourceNotFoundError: If the resource does not exists.
483
+
484
+ Example:
485
+ >>> api_handler.get_resources_by_ids('resource_id')
486
+ >>> api_handler.get_resources_by_ids(['resource_id1', 'resource_id2'])
487
+ """
488
+ input_is_a_string = isinstance(ids, str) # used later to return a single object or a list of objects
489
+ if input_is_a_string:
490
+ ids = [ids]
491
+
492
+ resources = []
493
+ try:
494
+ for i in ids:
495
+ request_params = {
496
+ 'method': 'GET',
497
+ 'url': f'{self.root_url}/resources/{i}',
498
+ }
499
+
500
+ resources.append(self._run_request(request_params).json())
501
+ except ResourceNotFoundError as e:
502
+ e.set_params('resource', {'resource_id': i})
503
+ raise e
504
+
505
+ return resources[0] if input_is_a_string else resources
506
+
507
+ def get_resources(self,
508
+ status: Optional[ResourceStatus] = None,
509
+ from_date: Optional[date] = None,
510
+ to_date: Optional[date] = None,
511
+ labels=None,
512
+ tags: Optional[Sequence[str]] = None,
513
+ modality: Optional[str] = None,
514
+ mimetype: Optional[str] = None,
515
+ return_ids_only: bool = False,
516
+ order_field: Optional[ResourceFields] = None,
517
+ order_ascending: Optional[bool] = None,
518
+ channel: Optional[str] = None,
519
+ project_id: Optional[str] = None,
520
+ project_name: Optional[str] = None,
521
+ filename: Optional[str] = None
522
+ ) -> Generator[dict, None, None]:
523
+ """
524
+ Iterates over resources with the specified filters.
525
+ Filters can be combined to narrow down the search.
526
+ It returns full information of the resources by default, but it can be configured to return only the ids with parameter `return_ids_only`.
527
+
528
+ Args:
529
+ status (ResourceStatus): The resource status. Possible values: 'inbox', 'published', 'archived' or None. If None, it will return all resources.
530
+ from_date (Optional[date]): The start date.
531
+ to_date (Optional[date]): The end date.
532
+ labels:
533
+ .. deprecated:: 0.11.0
534
+ Use `tags` instead.
535
+ tags (Optional[list[str]]): The tags to filter the resources.
536
+ modality (Optional[str]): The modality of the resources.
537
+ mimetype (Optional[str]): The mimetype of the resources.
538
+ return_ids_only (bool): Whether to return only the ids of the resources.
539
+ order_field (Optional[ResourceFields]): The field to order the resources. See :data:`~.base_api_handler.ResourceFields`.
540
+ order_ascending (Optional[bool]): Whether to order the resources in ascending order.
541
+
542
+ Returns:
543
+ Generator[dict, None, None]: A generator of dictionaries with the resources information.
544
+
545
+ Example:
546
+ >>> for resource in api_handler.get_resources(status='inbox'):
547
+ >>> print(resource)
548
+ """
549
+ if labels is not None and tags is None:
550
+ tags = labels
551
+
552
+ if project_id is not None and project_name is not None:
553
+ _LOGGER.warning("Both project_id and project_name were provided.")
554
+
555
+ # Convert datetime objects to ISO format
556
+ if from_date:
557
+ from_date = from_date.isoformat()
558
+ if to_date:
559
+ to_date = to_date.isoformat()
560
+
561
+ # Prepare the payload
562
+ payload = {
563
+ "from": from_date,
564
+ "to": to_date,
565
+ "status": status if status is not None else "",
566
+ "modality": modality,
567
+ "mimetype": mimetype,
568
+ "ids": return_ids_only,
569
+ "order_field": order_field,
570
+ "order_by_asc": order_ascending,
571
+ "channel_name": channel,
572
+ "projectId": project_id,
573
+ "filename": filename,
574
+ }
575
+ if project_name is not None:
576
+ payload["project"] = json.dumps({'items': [project_name], 'filterType': 'union'})
577
+
578
+ if tags is not None:
579
+ if isinstance(tags, str):
580
+ tags = [tags]
581
+ tags_filter = {
582
+ "items": tags,
583
+ "filterType": "union"
584
+ }
585
+ payload['tags'] = json.dumps(tags_filter)
586
+
587
+ # Remove None values from the payload.
588
+ # Maybe it is not necessary.
589
+ for k in list(payload.keys()):
590
+ if payload[k] is None:
591
+ del payload[k]
592
+
593
+ request_params = {
594
+ 'method': 'GET',
595
+ 'url': f'{self.root_url}/resources',
596
+ 'params': payload
597
+ }
598
+
599
+ yield from self._run_pagination_request(request_params,
600
+ return_field=['data', 0, 'resources'])
601
+
602
+ def get_channels(self) -> Generator[dict, None, None]:
603
+ """
604
+ Iterates over the channels with the specified filters.
605
+
606
+ Returns:
607
+ Generator[dict, None, None]: A generator of dictionaries with the channels information.
608
+
609
+ Example:
610
+ >>> list(api_handler.get_channels()) # Gets all channels
611
+ [{'channel_name': 'test_channel',
612
+ 'resource_data': [{'created_by': 'datamint-dev@mail.com',
613
+ 'customer_id': '79113ed1-0535-4f53-9359-7fe3fa9f28a8',
614
+ 'resource_id': 'a05fe46d-2f66-46fc-b7ef-666464ad3a28',
615
+ 'resource_file_name': 'image.png',
616
+ 'resource_mimetype': 'image/png'}],
617
+ 'deleted': False,
618
+ 'created_at': '2024-06-04T12:38:12.976Z',
619
+ 'updated_at': '2024-06-04T12:38:12.976Z',
620
+ 'resource_count': '1'}]
621
+
622
+ """
623
+
624
+ request_params = {
625
+ 'method': 'GET',
626
+ 'url': self._get_endpoint_url(RootAPIHandler.ENDPOINT_CHANNELS),
627
+ 'params': {}
628
+ }
629
+
630
+ yield from self._run_pagination_request(request_params,
631
+ return_field='data')
632
+
633
+ def set_resource_tags(self, resource_id: str,
634
+ tags: Sequence[str] = None,
635
+ frame_labels: Sequence[dict] = None
636
+ ):
637
+ url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/tags"
638
+ data = {}
639
+
640
+ if tags is not None:
641
+ data['tags'] = tags
642
+ if frame_labels is not None:
643
+ data['frame_labels'] = frame_labels
644
+
645
+ request_params = {'method': 'PUT',
646
+ 'url': url,
647
+ 'json': data
648
+ }
649
+
650
+ response = self._run_request(request_params)
651
+ return response
652
+
653
+ @staticmethod
654
+ def _has_status_code(e, status_code: int) -> bool:
655
+ return hasattr(e, 'response') and (e.response is not None) and e.response.status_code == status_code
656
+
657
+ def download_resource_file(self,
658
+ resource_id: str,
659
+ save_path: Optional[str] = None,
660
+ auto_convert: bool = True,
661
+ add_extension: bool = False
662
+ ) -> bytes | pydicom.dataset.Dataset | Image.Image | cv2.VideoCapture | nib_FileBasedImage | tuple[Any, str]:
663
+ """
664
+ Download a resource file.
665
+
666
+ Args:
667
+ resource_id (str): The resource unique id.
668
+ save_path (Optional[str]): The path to save the file.
669
+ auto_convert (bool): Whether to convert the file to a known format or not.
670
+ add_extension (bool): Whether to add the appropriate file extension to the save_path based on the content type.
671
+
672
+ Returns:
673
+ The resource content in bytes (if `auto_convert=False`) or the resource object (if `auto_convert=True`).
674
+ if `add_extension=True`, the function will return a tuple of (resource_data, save_path).
675
+
676
+ Raises:
677
+ ResourceNotFoundError: If the resource does not exists.
678
+
679
+ Example:
680
+ >>> api_handler.download_resource_file('resource_id', auto_convert=False)
681
+ returns the resource content in bytes.
682
+ >>> api_handler.download_resource_file('resource_id', auto_convert=True)
683
+ Assuming this resource is a dicom file, it will return a pydicom.dataset.Dataset object.
684
+ >>> api_handler.download_resource_file('resource_id', save_path='path/to/dicomfile.dcm')
685
+ saves the file in the specified path.
686
+ """
687
+ url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/file"
688
+ request_params = {'method': 'GET',
689
+ 'headers': {'accept': 'application/octet-stream'},
690
+ 'url': url}
691
+ try:
692
+ response = self._run_request(request_params)
693
+
694
+ # Get mimetype if needed for auto_convert or add_extension
695
+ mimetype = None
696
+ if auto_convert or add_extension:
697
+ resource_info = self.get_resources_by_ids(resource_id)
698
+ mimetype = resource_info['mimetype']
699
+
700
+ if auto_convert:
701
+ try:
702
+ resource_file = BaseAPIHandler.convert_format(response.content,
703
+ mimetype,
704
+ save_path)
705
+ except ValueError as e:
706
+ _LOGGER.warning(f"Could not convert file to a known format: {e}")
707
+ resource_file = response.content
708
+ except NotImplementedError as e:
709
+ _LOGGER.warning(f"Conversion not implemented yet for {mimetype} and save_path=None." +
710
+ " Returning a bytes array. If you want the conversion for this mimetype, provide a save_path.")
711
+ resource_file = response.content
712
+ else:
713
+ resource_file = response.content
714
+ except ResourceNotFoundError as e:
715
+ e.set_params('resource', {'resource_id': resource_id})
716
+ raise e
717
+
718
+ if save_path is not None:
719
+ if add_extension:
720
+ ext = mimetypes.guess_extension(mimetype)
721
+ if ext is not None and not save_path.endswith(ext):
722
+ save_path += ext
723
+ with open(save_path, 'wb') as f:
724
+ f.write(response.content)
725
+
726
+ if add_extension:
727
+ return resource_file, save_path
728
+ return resource_file
729
+
730
+ def download_resource_frame(self,
731
+ resource_id: str,
732
+ frame_index: int) -> Image.Image:
733
+ """
734
+ Download a frame of a resource.
735
+ This is faster than downloading the whole resource and then extracting the frame.
736
+
737
+ Args:
738
+ resource_id: The resource unique id.
739
+ frame_index: The index of the frame to download.
740
+
741
+ Returns:
742
+ Image.Image: The frame as a PIL image.
743
+
744
+ Raises:
745
+ ResourceNotFoundError: If the resource does not exists.
746
+ DatamintException: If the resource is not a video or dicom.
747
+ """
748
+ # check if the resource is an single frame image (png,jpeg,...) first.
749
+ # If so, download the whole resource file and return the image.
750
+ resource_info = self.get_resources_by_ids(resource_id)
751
+ if resource_info['mimetype'].startswith('image/') or resource_info.get('storage') == 'ImageResource':
752
+ if frame_index != 0:
753
+ raise DatamintException(f"Resource {resource_id} is a single frame image, "
754
+ f"but frame_index is {frame_index}.")
755
+ return self.download_resource_file(resource_id, auto_convert=True)
756
+
757
+ url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{resource_id}/frames/{frame_index}"
758
+ request_params = {'method': 'GET',
759
+ 'headers': {'accept': 'image/png'},
760
+ 'url': url}
761
+ try:
762
+ response = self._run_request(request_params)
763
+ if response.status_code == 200:
764
+ return Image.open(io.BytesIO(response.content))
765
+ else:
766
+ raise DatamintException(
767
+ f"Error downloading frame {frame_index} of resource {resource_id}: {response.text}")
768
+ except ResourceNotFoundError as e:
769
+ e.set_params('resource', {'resource_id': resource_id})
770
+ raise e
771
+
772
+ def delete_resources(self, resource_ids: Sequence[str] | str) -> None:
773
+ """
774
+ Delete resources by their unique ids.
775
+
776
+ Args:
777
+ resource_ids (Sequence[str] | str): The resource unique id or a list of resource unique ids.
778
+
779
+ Raises:
780
+ ResourceNotFoundError: If the resource does not exists.
781
+
782
+ Example:
783
+ >>> api_handler.delete_resources('e8b78358-656d-481f-8c98-d13b9ba6be1b')
784
+ >>> api_handler.delete_resources(['e8b78358-656d-481f-8c98-d13b9ba6be1b', '6f8b506c-6ea1-4e85-8e67-254767f95a7b'])
785
+ """
786
+ if isinstance(resource_ids, str):
787
+ resource_ids = [resource_ids]
788
+ for rid in resource_ids:
789
+ url = f"{self._get_endpoint_url(RootAPIHandler.ENDPOINT_RESOURCES)}/{rid}"
790
+ request_params = {'method': 'DELETE',
791
+ 'url': url
792
+ }
793
+ try:
794
+ self._run_request(request_params)
795
+ except ResourceNotFoundError as e:
796
+ e.set_params('resource', {'resource_id': rid})
797
+ raise e
798
+
799
+ def get_datasets(self) -> list[dict]:
800
+ """
801
+ Get all datasets.
802
+
803
+ Returns:
804
+ list[dict]: A list of dictionaries with the datasets information.
805
+
806
+ """
807
+ request_params = {
808
+ 'method': 'GET',
809
+ 'url': f'{self.root_url}/datasets',
810
+ }
811
+
812
+ response = self._run_request(request_params)
813
+ return response.json()['data']
814
+
815
+ def get_datasetsinfo_by_name(self, dataset_name: str) -> list[dict]:
816
+ request_params = {
817
+ 'method': 'GET',
818
+ 'url': f'{self.root_url}/datasets',
819
+ }
820
+ # FIXME: inefficient to get all datasets and then filter by name
821
+ resp = self._run_request(request_params).json()
822
+ datasets = [d for d in resp['data'] if d['name'] == dataset_name]
823
+ return datasets
824
+
825
+ def get_dataset_by_id(self, dataset_id: str) -> dict:
826
+ try:
827
+ request_params = {
828
+ 'method': 'GET',
829
+ 'url': f'{self.root_url}/datasets/{dataset_id}',
830
+ }
831
+ return self._run_request(request_params).json()
832
+ except HTTPError as e:
833
+ if e.response is not None and e.response.status_code == 500:
834
+ raise ResourceNotFoundError('dataset', {'dataset_id': dataset_id})
835
+ raise e
836
+
837
+ def get_users(self) -> list[dict]:
838
+ """
839
+ Get all users.
840
+
841
+ Returns:
842
+ list[dict]: A list of dictionaries with the users information.
843
+
844
+ Example:
845
+ >>> api_handler.get_users()
846
+ """
847
+ request_params = {
848
+ 'method': 'GET',
849
+ 'url': f'{self.root_url}/users',
850
+ }
851
+
852
+ response = self._run_request(request_params)
853
+ return response.json()
854
+
855
+ def create_user(self,
856
+ email: str,
857
+ password: Optional[str] = None,
858
+ firstname: Optional[str] = None,
859
+ lastname: Optional[str] = None,
860
+ roles: Optional[list[str]] = None) -> dict:
861
+ """
862
+ Create a user.
863
+
864
+ Args:
865
+ email (str): The user email.
866
+ password (Optional[str]): The user password.
867
+ firstname (Optional[str]): The user first name.
868
+ lastname (Optional[str]): The user last name.
869
+
870
+ Returns:
871
+ dict: The user information.
872
+ """
873
+
874
+ request_params = {
875
+ 'method': 'POST',
876
+ 'url': f'{self.root_url}/users',
877
+ 'json': {'email': email, 'password': password, 'firstname': firstname, 'lastname': lastname, 'roles': roles}
878
+ }
879
+
880
+ try:
881
+ resp = self._run_request(request_params)
882
+ return resp.json()
883
+ except HTTPError as e:
884
+ _LOGGER.error(f"Error creating user: {e.response.text}")
885
+ raise e
886
+
887
+ def get_projects(self) -> list[dict]:
888
+ """
889
+ Get the list of projects.
890
+
891
+ Returns:
892
+ list[dict]: The list of projects.
893
+
894
+ Example:
895
+ >>> api_handler.get_projects()
896
+ """
897
+ request_params = {
898
+ 'method': 'GET',
899
+ 'url': f'{self.root_url}/projects'
900
+ }
901
+ return self._run_request(request_params).json()['data']
902
+
903
+ @deprecated(version='0.12.0', reason="Use :meth:`~get_resources` with project_id parameter instead.")
904
+ def get_resources_by_project(self, project_id: str) -> Generator[dict, None, None]:
905
+ """
906
+ Get the resources by project.
907
+
908
+ Args:
909
+ project_id (str): The project id.
910
+
911
+ Returns:
912
+ list[dict]: The list of resources.
913
+
914
+ Example:
915
+ >>> api_handler.get_resources_by_project('project_id')
916
+ """
917
+ request_params = {
918
+ 'method': 'GET',
919
+ 'url': f'{self.root_url}/projects/{project_id}/resources'
920
+ }
921
+ return self._run_pagination_request(request_params)
922
+
923
+ def create_project(self,
924
+ name: str,
925
+ description: str,
926
+ resources_ids: list[str],
927
+ is_active_learning: bool = False) -> dict:
928
+ """
929
+ Create a new project.
930
+
931
+ Args:
932
+ name (str): The name of the project.
933
+
934
+ Returns:
935
+ dict: The created project.
936
+
937
+ Raises:
938
+ DatamintException: If the project could not be created.
939
+ """
940
+ request_args = {
941
+ 'url': self._get_endpoint_url('projects'),
942
+ 'method': 'POST',
943
+ 'json': {'name': name,
944
+ 'is_active_learning': is_active_learning,
945
+ 'resource_ids': resources_ids,
946
+ 'annotation_set': {
947
+ "annotators": [],
948
+ "resource_ids": resources_ids,
949
+ "annotations": [],
950
+ "frame_labels": [],
951
+ "image_labels": [],
952
+ },
953
+ "two_up_display": False,
954
+ "require_review": False,
955
+ 'description': description}
956
+ }
957
+ response = self._run_request(request_args)
958
+ self._check_errors_response_json(response)
959
+ return response.json()
960
+
961
+ def delete_project(self, project_id: str) -> None:
962
+ """
963
+ Delete a project by its id.
964
+
965
+ Args:
966
+ project_id (str): The project id.
967
+
968
+ Raises:
969
+ ResourceNotFoundError: If the project does not exists.
970
+ """
971
+ url = f"{self._get_endpoint_url('projects')}/{project_id}"
972
+ request_params = {'method': 'DELETE',
973
+ 'url': url
974
+ }
975
+ try:
976
+ print(self._run_request(request_params))
977
+ except ResourceNotFoundError as e:
978
+ e.set_params('project', {'project_id': project_id})
979
+ raise e
980
+
981
+ def download_project(self, project_id: str,
982
+ outpath: str,
983
+ all_annotations: bool = False,
984
+ include_unannotated: bool = False,
985
+ ) -> None:
986
+ """
987
+ Download a project by its id.
988
+
989
+ Args:
990
+ project_id (str): The project id.
991
+ outpath (str): The path to save the project zip file.
992
+
993
+ Example:
994
+ >>> api_handler.download_project('project_id', 'path/to/project.zip')
995
+ """
996
+ url = f"{self._get_endpoint_url('projects')}/{project_id}/annotated_dataset"
997
+ request_params = {'method': 'GET',
998
+ 'url': url,
999
+ 'stream': True,
1000
+ 'params': {'all_annotations': all_annotations}
1001
+ }
1002
+ if include_unannotated:
1003
+ request_params['params']['include_unannotated'] = include_unannotated
1004
+
1005
+ response = self._run_request(request_params)
1006
+ total_size = int(response.headers.get('content-length', 0))
1007
+ if total_size == 0:
1008
+ total_size = None
1009
+ with tqdm(total=total_size, unit='B', unit_scale=True) as progress_bar:
1010
+ with open(outpath, 'wb') as file:
1011
+ for data in response.iter_content(1024):
1012
+ progress_bar.update(len(data))
1013
+ file.write(data)