datamint 1.9.3__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

Files changed (35) hide show
  1. datamint/__init__.py +2 -0
  2. datamint/api/__init__.py +3 -0
  3. datamint/api/base_api.py +430 -0
  4. datamint/api/client.py +91 -0
  5. datamint/api/dto/__init__.py +10 -0
  6. datamint/api/endpoints/__init__.py +17 -0
  7. datamint/api/endpoints/annotations_api.py +984 -0
  8. datamint/api/endpoints/channels_api.py +28 -0
  9. datamint/api/endpoints/datasetsinfo_api.py +16 -0
  10. datamint/api/endpoints/projects_api.py +203 -0
  11. datamint/api/endpoints/resources_api.py +1013 -0
  12. datamint/api/endpoints/users_api.py +38 -0
  13. datamint/api/entity_base_api.py +347 -0
  14. datamint/apihandler/api_handler.py +3 -6
  15. datamint/apihandler/base_api_handler.py +6 -28
  16. datamint/apihandler/dto/__init__.py +0 -0
  17. datamint/apihandler/dto/annotation_dto.py +1 -1
  18. datamint/client_cmd_tools/datamint_upload.py +19 -30
  19. datamint/dataset/base_dataset.py +83 -86
  20. datamint/dataset/dataset.py +2 -2
  21. datamint/entities/__init__.py +20 -0
  22. datamint/entities/annotation.py +178 -0
  23. datamint/entities/base_entity.py +51 -0
  24. datamint/entities/channel.py +46 -0
  25. datamint/entities/datasetinfo.py +22 -0
  26. datamint/entities/project.py +64 -0
  27. datamint/entities/resource.py +130 -0
  28. datamint/entities/user.py +21 -0
  29. datamint/examples/example_projects.py +41 -44
  30. datamint/exceptions.py +27 -1
  31. {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/METADATA +13 -9
  32. datamint-2.0.1.dist-info/RECORD +50 -0
  33. {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/WHEEL +1 -1
  34. datamint-1.9.3.dist-info/RECORD +0 -29
  35. {datamint-1.9.3.dist-info → datamint-2.0.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1013 @@
1
+ from typing import Any, Optional, Sequence, TypeAlias, Literal, IO
2
+ from ..base_api import ApiConfig, BaseApi
3
+ from ..entity_base_api import EntityBaseApi, CreatableEntityApi, DeletableEntityApi
4
+ from .annotations_api import AnnotationsApi
5
+ from .projects_api import ProjectsApi
6
+ from datamint.entities.resource import Resource
7
+ from datamint.entities.annotation import Annotation
8
+ from datamint.exceptions import DatamintException, ResourceNotFoundError
9
+ import httpx
10
+ from datetime import date
11
+ import json
12
+ import logging
13
+ import pydicom
14
+ from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report, GeneratorWithLength
15
+ from medimgkit import dicom_utils, standardize_mimetype
16
+ from medimgkit.io_utils import is_io_object, peek
17
+ from medimgkit.format_detection import guess_typez, guess_extension, DEFAULT_MIME_TYPE
18
+ from medimgkit.nifti_utils import DEFAULT_NIFTI_MIME, NIFTI_MIMES
19
+ import os
20
+ import itertools
21
+ from tqdm.auto import tqdm
22
+ import asyncio
23
+ import aiohttp
24
+ from pathlib import Path
25
+ import nest_asyncio # For running asyncio in jupyter notebooks
26
+ import cv2
27
+ from PIL import Image
28
+ from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
29
+ import io
30
+
31
+
32
+ _LOGGER = logging.getLogger(__name__)
33
+ _USER_LOGGER = logging.getLogger('user_logger')
34
+
35
+ ResourceStatus: TypeAlias = Literal['new', 'inbox', 'published', 'archived']
36
+ """TypeAlias: The available resource status. Possible values: 'new', 'inbox', 'published', 'archived'.
37
+ """
38
+ ResourceFields: TypeAlias = Literal['modality', 'created_by', 'published_by', 'published_on', 'filename', 'created_at']
39
+ """TypeAlias: The available fields to order resources. Possible values: 'modality', 'created_by', 'published_by', 'published_on', 'filename', 'created_at' (default).
40
+ """
41
+
42
+
43
+ def _infinite_gen(x):
44
+ while True:
45
+ yield x
46
+
47
+
48
+ def _open_io(file_path: str | Path | IO, mode: str = 'rb') -> IO:
49
+ if isinstance(file_path, str) or isinstance(file_path, Path):
50
+ return open(file_path, 'rb')
51
+ return file_path
52
+
53
+
54
+ class ResourcesApi(CreatableEntityApi[Resource], DeletableEntityApi[Resource]):
55
+ """API handler for resource-related endpoints."""
56
+
57
+ def __init__(self, config: ApiConfig, client: Optional[httpx.Client] = None) -> None:
58
+ """Initialize the resources API handler.
59
+
60
+ Args:
61
+ config: API configuration containing base URL, API key, etc.
62
+ client: Optional HTTP client instance. If None, a new one will be created.
63
+ """
64
+ super().__init__(config, Resource, 'resources', client)
65
+ nest_asyncio.apply()
66
+ self.annotations_api = AnnotationsApi(config, client)
67
+ self.projects_api = ProjectsApi(config, client)
68
+
69
+ def get_list(self,
70
+ status: Optional[ResourceStatus] = None,
71
+ from_date: date | str | None = None,
72
+ to_date: date | str | None = None,
73
+ tags: Optional[Sequence[str]] = None,
74
+ modality: Optional[str] = None,
75
+ mimetype: Optional[str] = None,
76
+ # return_ids_only: bool = False,
77
+ order_field: Optional[ResourceFields] = None,
78
+ order_ascending: Optional[bool] = None,
79
+ channel: Optional[str] = None,
80
+ project_name: str | list[str] | None = None,
81
+ filename: Optional[str] = None,
82
+ limit: int | None = None
83
+ ) -> Sequence[Resource]:
84
+ """Get resources with optional filtering.
85
+
86
+ Args:
87
+ status: The resource status. Possible values: 'inbox', 'published', 'archived' or None. If None, it will return all resources.
88
+ from_date : The start date.
89
+ to_date: The end date.
90
+ tags: The tags to filter the resources.
91
+ modality: The modality of the resources.
92
+ mimetype: The mimetype of the resources.
93
+ order_field: The field to order the resources. See :data:`~ResourceFields`.
94
+ order_ascending: Whether to order the resources in ascending order.
95
+ project_name: The project name or a list of project names to filter resources by project.
96
+ If multiple projects are provided, resources will be filtered to include only those belonging to ALL of the specified projects.
97
+ """
98
+
99
+ # Convert datetime objects to ISO format
100
+ if from_date:
101
+ if isinstance(from_date, str):
102
+ date.fromisoformat(from_date)
103
+ else:
104
+ from_date = from_date.isoformat()
105
+ if to_date:
106
+ if isinstance(to_date, str):
107
+ date.fromisoformat(to_date)
108
+ else:
109
+ to_date = to_date.isoformat()
110
+
111
+ # Prepare the payload
112
+ payload = {
113
+ "from": from_date,
114
+ "to": to_date,
115
+ "status": status if status is not None else "",
116
+ "modality": modality,
117
+ "mimetype": mimetype,
118
+ # "ids": return_ids_only,
119
+ "order_field": order_field,
120
+ "order_by_asc": order_ascending,
121
+ "channel_name": channel,
122
+ "filename": filename,
123
+ }
124
+ # remove nones from payload
125
+ payload = {k: v for k, v in payload.items() if v is not None}
126
+ if project_name is not None:
127
+ if isinstance(project_name, str):
128
+ project_name = [project_name]
129
+ payload["project"] = json.dumps({'items': project_name,
130
+ 'filterType': 'intersection'}) # union or intersection
131
+
132
+ if tags is not None:
133
+ if isinstance(tags, str):
134
+ tags = [tags]
135
+ tags_filter = {
136
+ "items": tags,
137
+ "filterType": "union"
138
+ }
139
+ payload['tags'] = json.dumps(tags_filter)
140
+
141
+ return super().get_list(limit=limit, params=payload)
142
+
143
+ def get_annotations(self, resource: str | Resource) -> Sequence[Annotation]:
144
+ """Get annotations for a specific resource.
145
+
146
+ Args:
147
+ resource: The resource ID or Resource instance to fetch annotations for.
148
+
149
+ Returns:
150
+ A sequence of Annotation objects associated with the specified resource.
151
+ """
152
+ return self.annotations_api.get_list(resource=resource)
153
+
154
+ @staticmethod
155
+ def __process_files_parameter(file_path: str | Sequence[str | IO | pydicom.Dataset]
156
+ ) -> Sequence[str | IO]:
157
+ """
158
+ Process the file_path parameter to ensure it is a list of file paths or IO objects.
159
+ """
160
+ if isinstance(file_path, str) and os.path.isdir(file_path):
161
+ return [f'{file_path}/{f}' for f in os.listdir(file_path) if os.path.isfile(f'{file_path}/{f}')]
162
+
163
+ processed_files = []
164
+ for item in file_path:
165
+ if isinstance(item, pydicom.Dataset):
166
+ processed_files.append(to_bytesio(item, item.filename))
167
+ else:
168
+ processed_files.append(item)
169
+ return processed_files
170
+
171
+ def _assemble_dicoms(self, files_path: Sequence[str | IO],
172
+ progress_bar: bool = False
173
+ ) -> tuple[Sequence[str | IO], bool, Sequence[int]]:
174
+ """
175
+ Assembles DICOM files into a single file.
176
+
177
+ Args:
178
+ files_path: The paths to the DICOM files to assemble.
179
+
180
+ Returns:
181
+ A tuple containing:
182
+ - The paths to the assembled DICOM files.
183
+ - A boolean indicating if the assembly was necessary.
184
+ - same length as the output assembled DICOMs, mapping assembled DICOM to original DICOMs.
185
+ """
186
+ dicoms_files_path = []
187
+ other_files_path = []
188
+ dicom_original_idxs = []
189
+ others_original_idxs = []
190
+ for i, f in enumerate(files_path):
191
+ if is_dicom(f):
192
+ dicoms_files_path.append(f)
193
+ dicom_original_idxs.append(i)
194
+ else:
195
+ other_files_path.append(f)
196
+ others_original_idxs.append(i)
197
+
198
+ orig_len = len(dicoms_files_path)
199
+ if orig_len == 0:
200
+ _LOGGER.debug("No DICOM files found to assemble.")
201
+ return files_path, False, []
202
+ dicoms_files_path = dicom_utils.assemble_dicoms(dicoms_files_path,
203
+ return_as_IO=True,
204
+ progress_bar=progress_bar)
205
+
206
+ new_len = len(dicoms_files_path)
207
+ if new_len != orig_len:
208
+ _LOGGER.info(f"Assembled {new_len} dicom files out of {orig_len} files.")
209
+ mapping_idx = [None] * len(files_path)
210
+
211
+ files_path = GeneratorWithLength(itertools.chain(dicoms_files_path, other_files_path),
212
+ length=new_len + len(other_files_path))
213
+ assembled = True
214
+ for orig_idx, value in zip(dicom_original_idxs, dicoms_files_path.inverse_mapping_idx):
215
+ mapping_idx[orig_idx] = value
216
+ for i, orig_idx in enumerate(others_original_idxs):
217
+ mapping_idx[orig_idx] = new_len + i
218
+ else:
219
+ assembled = False
220
+ mapping_idx = [i for i in range(len(files_path))]
221
+
222
+ return files_path, assembled, mapping_idx
223
+
224
+ async def _upload_single_resource_async(self,
225
+ file_path: str | IO,
226
+ mimetype: Optional[str] = None,
227
+ anonymize: bool = False,
228
+ anonymize_retain_codes: Sequence[tuple] = [],
229
+ tags: list[str] = [],
230
+ mung_filename: Sequence[int] | Literal['all'] | None = None,
231
+ channel: Optional[str] = None,
232
+ session=None,
233
+ modality: Optional[str] = None,
234
+ publish: bool = False,
235
+ metadata_file: Optional[str | dict] = None,
236
+ ) -> str:
237
+ if is_io_object(file_path):
238
+ name = file_path.name
239
+ else:
240
+ name = file_path
241
+
242
+ if session is not None and not isinstance(session, aiohttp.ClientSession):
243
+ raise ValueError("session must be an aiohttp.ClientSession object.")
244
+
245
+ name = os.path.expanduser(os.path.normpath(name))
246
+ if len(Path(name).parts) == 0:
247
+ raise ValueError(f"File path '{name}' is not valid.")
248
+ name = os.path.join(*[x if x != '..' else '_' for x in Path(name).parts])
249
+
250
+ if mung_filename is not None:
251
+ file_parts = Path(name).parts
252
+ if file_parts[0] == os.path.sep:
253
+ file_parts = file_parts[1:]
254
+ if mung_filename == 'all':
255
+ new_file_path = '_'.join(file_parts)
256
+ else:
257
+ folder_parts = file_parts[:-1]
258
+ new_file_path = '_'.join([folder_parts[i-1] for i in mung_filename if i <= len(folder_parts)])
259
+ new_file_path += '_' + file_parts[-1]
260
+ name = new_file_path
261
+ _LOGGER.debug(f"New file path: {name}")
262
+
263
+ is_a_dicom_file = None
264
+ if mimetype is None:
265
+ mimetype_list, ext = guess_typez(file_path, use_magic=True)
266
+ for mime in mimetype_list:
267
+ if mime in NIFTI_MIMES:
268
+ mimetype = DEFAULT_NIFTI_MIME
269
+ break
270
+ else:
271
+ if ext == '.nii.gz' or name.lower().endswith('nii.gz'):
272
+ mimetype = DEFAULT_NIFTI_MIME
273
+ else:
274
+ mimetype = mimetype_list[-1] if mimetype_list else DEFAULT_MIME_TYPE
275
+
276
+ mimetype = standardize_mimetype(mimetype)
277
+ filename = os.path.basename(name)
278
+ _LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
279
+
280
+ if is_a_dicom_file == True or is_dicom(file_path):
281
+ if tags is None:
282
+ tags = []
283
+ else:
284
+ tags = list(tags)
285
+ ds = pydicom.dcmread(file_path)
286
+ if anonymize:
287
+ _LOGGER.info(f"Anonymizing {file_path}")
288
+ ds = anonymize_dicom(ds, retain_codes=anonymize_retain_codes)
289
+ lat = dicom_utils.get_dicom_laterality(ds)
290
+ if lat == 'L':
291
+ tags.append("left")
292
+ elif lat == 'R':
293
+ tags.append("right")
294
+ # make the dicom `ds` object a file-like object in order to avoid unnecessary disk writes
295
+ f = to_bytesio(ds, name)
296
+ else:
297
+ f = _open_io(file_path)
298
+
299
+ try:
300
+ metadata_content = None
301
+ metadata_dict = None
302
+ if metadata_file is not None:
303
+ if isinstance(metadata_file, dict):
304
+ # Metadata is already a dictionary
305
+ metadata_dict = metadata_file
306
+ metadata_content = json.dumps(metadata_dict)
307
+ _LOGGER.debug("Using provided metadata dictionary")
308
+ else:
309
+ # Metadata is a file path
310
+ try:
311
+ with open(metadata_file, 'r') as metadata_f:
312
+ metadata_content = metadata_f.read()
313
+ metadata_dict = json.loads(metadata_content)
314
+ except Exception as e:
315
+ _LOGGER.warning(f"Failed to read metadata file {metadata_file}: {e}")
316
+
317
+ # Extract modality from metadata if available
318
+ if metadata_dict is not None:
319
+ metadata_dict_lower = {k.lower(): v for k, v in metadata_dict.items() if isinstance(k, str)}
320
+ try:
321
+ if modality is None:
322
+ if 'modality' in metadata_dict_lower:
323
+ modality = metadata_dict_lower['modality']
324
+ except Exception as e:
325
+ _LOGGER.debug(f"Failed to extract modality from metadata: {e}")
326
+
327
+ form = aiohttp.FormData()
328
+ file_key = 'resource'
329
+ form.add_field('source', 'api')
330
+
331
+ form.add_field(file_key, f, filename=filename, content_type=mimetype)
332
+ form.add_field('source_filepath', name) # full path to the file
333
+ if mimetype is not None:
334
+ form.add_field('mimetype', mimetype)
335
+ if channel is not None:
336
+ form.add_field('channel', channel)
337
+ if modality is not None:
338
+ form.add_field('modality', modality)
339
+ form.add_field('bypass_inbox', 'true' if publish else 'false')
340
+ if tags is not None and len(tags) > 0:
341
+ # comma separated list of tags
342
+ form.add_field('tags', ','.join([l.strip() for l in tags]))
343
+
344
+ # Add JSON metadata if provided
345
+ if metadata_content is not None:
346
+ try:
347
+ _LOGGER.debug("Adding metadata to form data")
348
+ form.add_field('metadata', metadata_content, content_type='application/json')
349
+ except Exception as e:
350
+ _LOGGER.warning(f"Failed to add metadata to form: {e}")
351
+
352
+ resp_data = await self._make_request_async_json('POST',
353
+ endpoint=self.endpoint_base,
354
+ data=form)
355
+ if 'error' in resp_data:
356
+ raise DatamintException(resp_data['error'])
357
+ _LOGGER.debug(f"Response on uploading {name}: {resp_data}")
358
+ return resp_data['id']
359
+ except Exception as e:
360
+ if 'name' in locals():
361
+ _LOGGER.error(f"Error uploading {name}: {e}")
362
+ else:
363
+ _LOGGER.error(f"Error uploading {file_path}: {e}")
364
+ raise
365
+ finally:
366
+ f.close()
367
+
368
+ async def _upload_resources_async(self,
369
+ files_path: Sequence[str | IO],
370
+ mimetype: Optional[str] = None,
371
+ anonymize: bool = False,
372
+ anonymize_retain_codes: Sequence[tuple] = [],
373
+ on_error: Literal['raise', 'skip'] = 'raise',
374
+ tags=None,
375
+ mung_filename: Sequence[int] | Literal['all'] | None = None,
376
+ channel: Optional[str] = None,
377
+ modality: Optional[str] = None,
378
+ publish: bool = False,
379
+ segmentation_files: Sequence[dict] | None = None,
380
+ transpose_segmentation: bool = False,
381
+ metadata_files: Sequence[str | dict | None] | None = None,
382
+ progress_bar: tqdm | None = None,
383
+ ) -> list[str]:
384
+ if on_error not in ['raise', 'skip']:
385
+ raise ValueError("on_error must be either 'raise' or 'skip'")
386
+
387
+ if segmentation_files is None:
388
+ segmentation_files = _infinite_gen(None)
389
+
390
+ if metadata_files is None:
391
+ metadata_files = _infinite_gen(None)
392
+
393
+ async with aiohttp.ClientSession() as session:
394
+ async def __upload_single_resource(file_path, segfiles: dict[str, list | dict],
395
+ metadata_file: str | dict | None):
396
+ name = file_path.name if is_io_object(file_path) else file_path
397
+ name = os.path.basename(name)
398
+ rid = await self._upload_single_resource_async(
399
+ file_path=file_path,
400
+ mimetype=mimetype,
401
+ anonymize=anonymize,
402
+ anonymize_retain_codes=anonymize_retain_codes,
403
+ tags=tags,
404
+ session=session,
405
+ mung_filename=mung_filename,
406
+ channel=channel,
407
+ modality=modality,
408
+ publish=publish,
409
+ metadata_file=metadata_file,
410
+ )
411
+ if progress_bar:
412
+ progress_bar.update(1)
413
+ progress_bar.set_postfix(file=name)
414
+ else:
415
+ _USER_LOGGER.info(f'"{name}" uploaded')
416
+
417
+ if segfiles is not None:
418
+ fpaths = segfiles['files']
419
+ names = segfiles.get('names', _infinite_gen(None))
420
+ if isinstance(names, dict):
421
+ names = _infinite_gen(names)
422
+ frame_indices = segfiles.get('frame_index', _infinite_gen(None))
423
+ for f, name, frame_index in tqdm(zip(fpaths, names, frame_indices),
424
+ desc=f"Uploading segmentations for {file_path}",
425
+ total=len(fpaths)):
426
+ if f is not None:
427
+ await self.annotations_api._upload_segmentations_async(
428
+ rid,
429
+ file_path=f,
430
+ name=name,
431
+ frame_index=frame_index,
432
+ transpose_segmentation=transpose_segmentation
433
+ )
434
+ return rid
435
+
436
+ tasks = [__upload_single_resource(f, segfiles, metadata_file)
437
+ for f, segfiles, metadata_file in zip(files_path, segmentation_files, metadata_files)]
438
+ return await asyncio.gather(*tasks, return_exceptions=on_error == 'skip')
439
+
440
+ def upload_resources(self,
441
+ files_path: Sequence[str | IO | pydicom.Dataset],
442
+ mimetype: str | None = None,
443
+ anonymize: bool = False,
444
+ anonymize_retain_codes: Sequence[tuple] = [],
445
+ on_error: Literal['raise', 'skip'] = 'raise',
446
+ tags: Sequence[str] | None = None,
447
+ mung_filename: Sequence[int] | Literal['all'] | None = None,
448
+ channel: str | None = None,
449
+ publish: bool = False,
450
+ publish_to: str | None = None,
451
+ segmentation_files: Sequence[Sequence[str] | dict] | None = None,
452
+ transpose_segmentation: bool = False,
453
+ modality: str | None = None,
454
+ assemble_dicoms: bool = True,
455
+ metadata: Sequence[str | dict | None] | None = None,
456
+ discard_dicom_reports: bool = True,
457
+ progress_bar: bool = False
458
+ ) -> Sequence[str | Exception]:
459
+ """
460
+ Upload multiple resources.
461
+
462
+ Note: For uploading a single resource, use `upload_resource()` instead.
463
+
464
+ Args:
465
+ files_path: A sequence of paths to resource files, IO objects, or pydicom.Dataset objects.
466
+ Must contain at least 2 items. Supports mixed types within the sequence.
467
+ mimetype (str): The mimetype of the resources. If None, it will be guessed.
468
+ anonymize (bool): Whether to anonymize the dicoms or not.
469
+ anonymize_retain_codes (Sequence[tuple]): The tags to retain when anonymizing the dicoms.
470
+ on_error (Literal['raise', 'skip']): Whether to raise an exception when an error occurs or to skip the error.
471
+ tags (Optional[Sequence[str]]): The tags to add to the resources.
472
+ mung_filename (Sequence[int] | Literal['all']): The parts of the filepath to keep when renaming the resource file.
473
+ ''all'' keeps all parts.
474
+ channel (Optional[str]): The channel to upload the resources to. An arbitrary name to group the resources.
475
+ publish (bool): Whether to directly publish the resources or not. They will have the 'published' status.
476
+ publish_to (Optional[str]): The project name or id to publish the resources to.
477
+ They will have the 'published' status and will be added to the project.
478
+ If this is set, `publish` parameter is ignored.
479
+ segmentation_files (Optional[list[Union[list[str], dict]]]): The segmentation files to upload.
480
+ If each element is a dict, it should have two keys: 'files' and 'names'.
481
+ - files: A list of paths to the segmentation files. Example: ['seg1.nii.gz', 'seg2.nii.gz'].
482
+ - names: Can be a list (same size of `files`) of labels for the segmentation files. Example: ['Brain', 'Lung'].
483
+ transpose_segmentation (bool): Whether to transpose the segmentation files or not.
484
+ modality (Optional[str]): The modality of the resources.
485
+ assemble_dicoms (bool): Whether to assemble the dicom files or not based on the SeriesInstanceUID and InstanceNumber attributes.
486
+ metadata (Optional[list[str | dict | None]]): JSON metadata to include with each resource.
487
+ Must have the same length as `files_path`.
488
+ Can be file paths (str) or already loaded dictionaries (dict).
489
+
490
+ Raises:
491
+ ValueError: If a single resource is provided instead of multiple resources.
492
+ ResourceNotFoundError: If `publish_to` is supplied, and the project does not exists.
493
+
494
+ Returns:
495
+ list[str | Exception]: A list of resource IDs or errors.
496
+ """
497
+
498
+ if on_error not in ['raise', 'skip']:
499
+ raise ValueError("on_error must be either 'raise' or 'skip'")
500
+
501
+ # Check if single resource provided and raise error (list of 1 item is allowed)
502
+ if isinstance(files_path, IO) or isinstance(files_path, pydicom.Dataset) or (isinstance(files_path, str) and not os.path.isdir(files_path)):
503
+ raise ValueError(
504
+ "upload_resources() only accepts multiple resources. For single resource upload, use upload_resource() instead.")
505
+
506
+ files_path = ResourcesApi.__process_files_parameter(files_path)
507
+
508
+ # Discard DICOM reports
509
+ if discard_dicom_reports:
510
+ old_size = len(files_path)
511
+ # Create filtered lists maintaining index correspondence
512
+ filtered_files = []
513
+ filtered_metadata = []
514
+
515
+ for i, f in enumerate(files_path):
516
+ if not is_dicom_report(f):
517
+ filtered_files.append(f)
518
+ if metadata is not None:
519
+ filtered_metadata.append(metadata[i])
520
+
521
+ files_path = filtered_files
522
+ if metadata is not None:
523
+ metadata = filtered_metadata
524
+
525
+ if old_size is not None and old_size != len(files_path):
526
+ _LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
527
+
528
+ if isinstance(metadata, (str, dict)):
529
+ _LOGGER.debug("Converting metadatas to a list")
530
+ metadata = [metadata]
531
+
532
+ if metadata is not None and len(metadata) != len(files_path):
533
+ raise ValueError("The number of metadata files must match the number of resources.")
534
+ if assemble_dicoms:
535
+ files_path, assembled, mapping_idx = self._assemble_dicoms(files_path, progress_bar=progress_bar)
536
+ assemble_dicoms = assembled
537
+ else:
538
+ mapping_idx = [i for i in range(len(files_path))]
539
+ n_files = len(files_path)
540
+
541
+ if n_files <= 1:
542
+ # Disable progress bar for single file uploads
543
+ progress_bar = False
544
+
545
+ if segmentation_files is not None:
546
+ if assemble_dicoms:
547
+ raise NotImplementedError("Segmentation files cannot be uploaded when assembling dicoms yet.")
548
+ if len(segmentation_files) != len(files_path):
549
+ raise ValueError("The number of segmentation files must match the number of resources.")
550
+ else:
551
+ if isinstance(segmentation_files, list) and isinstance(segmentation_files[0], list):
552
+ raise ValueError("segmentation_files should not be a list of lists if files_path is not a list.")
553
+ if isinstance(segmentation_files, dict):
554
+ segmentation_files = [segmentation_files]
555
+
556
+ segmentation_files = [segfiles if (isinstance(segfiles, dict) or segfiles is None) else {'files': segfiles}
557
+ for segfiles in segmentation_files]
558
+
559
+ for segfiles in segmentation_files:
560
+ if segfiles is None:
561
+ continue
562
+ if 'files' not in segfiles:
563
+ raise ValueError("segmentation_files must contain a 'files' key with a list of file paths.")
564
+ if 'names' in segfiles:
565
+ # same length as files
566
+ if isinstance(segfiles['names'], (list, tuple)) and len(segfiles['names']) != len(segfiles['files']):
567
+ raise ValueError(
568
+ "segmentation_files['names'] must have the same length as segmentation_files['files'].")
569
+
570
+ loop = asyncio.get_event_loop()
571
+ pbar = None
572
+ try:
573
+ if progress_bar:
574
+ pbar = tqdm(total=n_files, desc="Uploading resources", unit="file")
575
+
576
+ task = self._upload_resources_async(files_path=files_path,
577
+ mimetype=mimetype,
578
+ anonymize=anonymize,
579
+ anonymize_retain_codes=anonymize_retain_codes,
580
+ on_error=on_error,
581
+ tags=tags,
582
+ mung_filename=mung_filename,
583
+ channel=channel,
584
+ publish=publish,
585
+ segmentation_files=segmentation_files,
586
+ transpose_segmentation=transpose_segmentation,
587
+ modality=modality,
588
+ metadata_files=metadata,
589
+ progress_bar=pbar
590
+ )
591
+
592
+ resource_ids = loop.run_until_complete(task)
593
+ finally:
594
+ if pbar:
595
+ pbar.close()
596
+
597
+ _LOGGER.info(f"Resources uploaded: {resource_ids}")
598
+
599
+ if publish_to is not None:
600
+ _USER_LOGGER.info('Adding resources to project')
601
+ resource_ids_succ = [rid for rid in resource_ids if not isinstance(rid, Exception)]
602
+ try:
603
+ self.projects_api.add_resources(resource_ids_succ, publish_to)
604
+ except Exception as e:
605
+ _LOGGER.error(f"Error adding resources to project: {e}")
606
+ if on_error == 'raise':
607
+ raise e
608
+
609
+ if mapping_idx:
610
+ _LOGGER.debug(f"Mapping indices for DICOM files: {mapping_idx}")
611
+ resource_ids = [resource_ids[idx] for idx in mapping_idx]
612
+
613
+ return resource_ids
614
+
615
+ def upload_resource(self,
616
+ file_path: str | IO | pydicom.Dataset,
617
+ mimetype: str | None = None,
618
+ anonymize: bool = False,
619
+ anonymize_retain_codes: Sequence[tuple] = [],
620
+ tags: Sequence[str] | None = None,
621
+ mung_filename: Sequence[int] | Literal['all'] | None = None,
622
+ channel: str | None = None,
623
+ publish: bool = False,
624
+ publish_to: str | None = None,
625
+ segmentation_files: dict | None = None,
626
+ transpose_segmentation: bool = False,
627
+ modality: str | None = None,
628
+ metadata: dict | str | None = None,
629
+ discard_dicom_reports: bool = True
630
+ ) -> str:
631
+ """
632
+ Upload a single resource.
633
+
634
+ This is a convenience method that wraps upload_resources for single file uploads.
635
+ It provides a cleaner interface when uploading just one file.
636
+
637
+ Args:
638
+ file_path: The path to the resource file or IO object.
639
+ mimetype: The mimetype of the resource. If None, it will be guessed.
640
+ anonymize: Whether to anonymize the DICOM or not.
641
+ anonymize_retain_codes: The tags to retain when anonymizing the DICOM.
642
+ tags: The tags to add to the resource.
643
+ mung_filename: The parts of the filepath to keep when renaming the resource file.
644
+ 'all' keeps all parts.
645
+ channel: The channel to upload the resource to. An arbitrary name to group the resources.
646
+ publish: Whether to directly publish the resource or not. It will have the 'published' status.
647
+ publish_to: The project name or id to publish the resource to.
648
+ It will have the 'published' status and will be added to the project.
649
+ If this is set, `publish` parameter is ignored.
650
+ segmentation_files: The segmentation files to upload. Should be a dict with:
651
+ - 'files': A list of paths to the segmentation files. Example: ['seg1.nii.gz', 'seg2.nii.gz'].
652
+ - 'names': A dict mapping pixel values to class names. Example: {1: 'Brain', 2: 'Lung'}.
653
+ transpose_segmentation: Whether to transpose the segmentation files or not.
654
+ modality: The modality of the resource.
655
+ metadata: JSON metadata to include with the resource.
656
+ Can be a file path (str) or already loaded dictionary (dict).
657
+ discard_dicom_reports: Whether to discard DICOM reports or not.
658
+
659
+ Returns:
660
+ str: The resource ID of the uploaded resource.
661
+
662
+ Raises:
663
+ ResourceNotFoundError: If `publish_to` is supplied, and the project does not exist.
664
+ DatamintException: If the upload fails.
665
+
666
+ Example:
667
+ .. code-block:: python
668
+
669
+ # Simple upload
670
+ resource_id = api.resources.upload_resource('path/to/file.dcm')
671
+
672
+ # Upload with metadata and segmentation
673
+ resource_id = api.resources.upload_resource(
674
+ 'path/to/file.dcm',
675
+ tags=['tutorial', 'case1'],
676
+ channel='study_channel',
677
+ segmentation_files={
678
+ 'files': ['path/to/segmentation.nii.gz'],
679
+ 'names': {1: 'Bone', 2: 'Tissue'}
680
+ },
681
+ metadata={'patient_age': 45, 'modality': 'CT'}
682
+ )
683
+ """
684
+ # Convert segmentation_files to the format expected by upload_resources
685
+ segmentation_files_list: Optional[list[list[str] | dict]] = None
686
+ if segmentation_files is not None:
687
+ segmentation_files_list = [segmentation_files]
688
+
689
+ # Call upload_resources with single file
690
+ result = self.upload_resources(
691
+ files_path=[file_path],
692
+ mimetype=mimetype,
693
+ anonymize=anonymize,
694
+ anonymize_retain_codes=anonymize_retain_codes,
695
+ tags=tags,
696
+ mung_filename=mung_filename,
697
+ channel=channel,
698
+ publish=publish,
699
+ publish_to=publish_to,
700
+ segmentation_files=segmentation_files_list,
701
+ transpose_segmentation=transpose_segmentation,
702
+ modality=modality,
703
+ metadata=[metadata],
704
+ discard_dicom_reports=discard_dicom_reports,
705
+ assemble_dicoms=False, # No need to assemble for single file
706
+ progress_bar=False # Disable progress bar for single uploads
707
+ )
708
+
709
+ # upload_resources returns a list, so we extract the first element
710
+ if isinstance(result, Sequence) and len(result) == 1:
711
+ r = result[0]
712
+ if isinstance(r, Exception):
713
+ raise r
714
+ return r
715
+ else:
716
+ # This should not happen with single file uploads, but handle it just in case
717
+ raise DatamintException(f"Unexpected return from upload_resources: {type(result)} | {result}")
718
+
719
+ def _determine_mimetype(self,
720
+ content,
721
+ resource: str | Resource) -> tuple[str | None, str | None]:
722
+ # Determine mimetype from file content
723
+ mimetype_list, ext = guess_typez(content, use_magic=True)
724
+ mimetype = mimetype_list[-1]
725
+
726
+ # get mimetype from resource info if not detected
727
+ if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
728
+ if not isinstance(resource, Resource):
729
+ resource = self.get_by_id(resource)
730
+ mimetype = resource.mimetype or mimetype
731
+
732
+ return mimetype, ext
733
+
734
+ async def _async_download_file(self,
735
+ resource: str | Resource,
736
+ save_path: str | Path,
737
+ session: aiohttp.ClientSession | None = None,
738
+ progress_bar: tqdm | None = None,
739
+ add_extension: bool = False) -> str:
740
+ """
741
+ Asynchronously download a file from the server.
742
+
743
+ Args:
744
+ resource: The resource unique id or Resource object.
745
+ save_path: The path to save the file.
746
+ session: The aiohttp session to use for the request.
747
+ progress_bar: Optional progress bar to update after download completion.
748
+ add_extension: Whether to add the appropriate file extension based on content type.
749
+
750
+ Returns:
751
+ str: The actual path where the file was saved (important when add_extension=True).
752
+ """
753
+ save_path = str(save_path) # Ensure save_path is a string for file operations
754
+ resource_id = self._entid(resource)
755
+ try:
756
+ async with self._make_request_async('GET',
757
+ f'{self.endpoint_base}/{resource_id}/file',
758
+ session=session,
759
+ headers={'accept': 'application/octet-stream'}) as resp:
760
+ data_bytes = await resp.read()
761
+
762
+ final_save_path = save_path
763
+ if add_extension:
764
+ # Save to temporary file first to determine mimetype from content
765
+ temp_path = f"{save_path}.tmp"
766
+ with open(temp_path, 'wb') as f:
767
+ f.write(data_bytes)
768
+
769
+ # Determine mimetype from file content
770
+ mimetype, ext = self._determine_mimetype(content=data_bytes,
771
+ resource=resource)
772
+
773
+ # Generate final path with extension if needed
774
+ if mimetype is not None and mimetype != DEFAULT_MIME_TYPE:
775
+ if ext is None:
776
+ ext = guess_extension(mimetype)
777
+ if ext is not None and not save_path.endswith(ext):
778
+ final_save_path = save_path + ext
779
+
780
+ # Move file to final location
781
+ os.rename(temp_path, final_save_path)
782
+ else:
783
+ # Standard save without extension detection
784
+ with open(final_save_path, 'wb') as f:
785
+ f.write(data_bytes)
786
+
787
+ if progress_bar:
788
+ progress_bar.update(1)
789
+
790
+ return final_save_path
791
+
792
+ except ResourceNotFoundError as e:
793
+ e.set_params('resource', {'resource_id': resource_id})
794
+ raise e
795
+
796
+ def download_multiple_resources(self,
797
+ resources: Sequence[str] | Sequence[Resource],
798
+ save_path: Sequence[str] | str,
799
+ add_extension: bool = False,
800
+ overwrite: bool = True
801
+ ) -> list[str]:
802
+ """
803
+ Download multiple resources and save them to the specified paths.
804
+ This is faster than downloading them one by one.
805
+
806
+ Args:
807
+ resources: A list of resource unique ids.
808
+ save_path : A list of paths to save the files or a directory path, of same length as resources.
809
+ If a directory path is provided, files will be saved in that directory.
810
+ add_extension: Whether to add the appropriate file extension to the save_path based on the content type.
811
+
812
+ Returns:
813
+ list[str]: A list of paths where the files were saved. Important if `add_extension=True`.
814
+ """
815
+ if isinstance(resources, str):
816
+ raise ValueError("resources must be a list of resources")
817
+
818
+ async def _download_all_async():
819
+ async with aiohttp.ClientSession() as session:
820
+ tasks = [
821
+ self._async_download_file(
822
+ resource=r,
823
+ save_path=path,
824
+ session=session,
825
+ progress_bar=progress_bar,
826
+ add_extension=add_extension
827
+ )
828
+ for r, path in zip(resources, save_path)
829
+ ]
830
+ return await asyncio.gather(*tasks)
831
+
832
+ if isinstance(save_path, str):
833
+ save_path = [os.path.join(save_path, self._entid(r)) for r in resources]
834
+
835
+ if len(save_path) != len(resources):
836
+ raise ValueError("The number of save paths must match the number of resources.")
837
+
838
+ if not overwrite:
839
+ new_resources = []
840
+ new_save_path = []
841
+ for i in range(len(resources)):
842
+ if not os.path.exists(save_path[i]):
843
+ new_resources.append(resources[i])
844
+ new_save_path.append(save_path[i])
845
+ resources = new_resources
846
+ save_path = new_save_path
847
+
848
+ with tqdm(total=len(resources), desc="Downloading resources", unit="file") as progress_bar:
849
+ loop = asyncio.get_event_loop()
850
+ final_save_paths = loop.run_until_complete(_download_all_async())
851
+
852
+ return final_save_paths
853
+
854
+ def download_resource_file(self,
855
+ resource: str | Resource,
856
+ save_path: Optional[str] = None,
857
+ auto_convert: bool = True,
858
+ add_extension: bool = False
859
+ ) -> bytes | pydicom.Dataset | Image.Image | cv2.VideoCapture | nib_FileBasedImage | tuple[Any, str]:
860
+ """
861
+ Download a resource file.
862
+
863
+ Args:
864
+ resource: The resource unique id or Resource instance.
865
+ save_path: The path to save the file.
866
+ auto_convert: Whether to convert the file to a known format or not.
867
+ add_extension: Whether to add the appropriate file extension to the save_path based on the content type.
868
+
869
+ Returns:
870
+ The resource content in bytes (if `auto_convert=False`) or the resource object (if `auto_convert=True`).
871
+ if `add_extension=True`, the function will return a tuple of (resource_data, save_path).
872
+
873
+ Raises:
874
+ ResourceNotFoundError: If the resource does not exists.
875
+
876
+ Example:
877
+ >>> api_handler.download_resource_file('resource_id', auto_convert=False)
878
+ returns the resource content in bytes.
879
+ >>> api_handler.download_resource_file('resource_id', auto_convert=True)
880
+ Assuming this resource is a dicom file, it will return a pydicom.Dataset object.
881
+ >>> api_handler.download_resource_file('resource_id', save_path='path/to/dicomfile.dcm')
882
+ saves the file in the specified path.
883
+ """
884
+ if save_path is None and add_extension:
885
+ raise ValueError("If add_extension is True, save_path must be provided.")
886
+
887
+ try:
888
+ response = self._make_entity_request('GET',
889
+ resource,
890
+ add_path='file',
891
+ headers={'accept': 'application/octet-stream'})
892
+
893
+ # Get mimetype if needed for auto_convert or add_extension
894
+ mimetype = None
895
+ ext = None
896
+ if auto_convert or add_extension:
897
+ mimetype, ext = self._determine_mimetype(content=response.content,
898
+ resource=resource)
899
+ if auto_convert:
900
+ if mimetype is None:
901
+ _LOGGER.warning("Could not determine mimetype. Returning a bytes array.")
902
+ resource_file = response.content
903
+ else:
904
+ try:
905
+ resource_file = BaseApi.convert_format(response.content,
906
+ mimetype,
907
+ save_path)
908
+ except ValueError as e:
909
+ _LOGGER.warning(f"Could not convert file to a known format: {e}")
910
+ resource_file = response.content
911
+ except NotImplementedError as e:
912
+ _LOGGER.warning(f"Conversion not implemented yet for {mimetype} and save_path=None." +
913
+ " Returning a bytes array. If you want the conversion for this mimetype, provide a save_path.")
914
+ resource_file = response.content
915
+ else:
916
+ resource_file = response.content
917
+ except ResourceNotFoundError as e:
918
+ e.set_params('resource', {'resource_id': self._entid(resource)})
919
+ raise e
920
+
921
+ if save_path is not None:
922
+ if add_extension and mimetype is not None:
923
+ if ext is None:
924
+ ext = guess_extension(mimetype)
925
+ if ext is not None and not save_path.endswith(ext):
926
+ save_path += ext
927
+ with open(save_path, 'wb') as f:
928
+ f.write(response.content)
929
+
930
+ if add_extension:
931
+ return resource_file, save_path
932
+ return resource_file
933
+
934
+ def download_resource_frame(self,
935
+ resource: str | Resource,
936
+ frame_index: int) -> Image.Image:
937
+ """
938
+ Download a frame of a resource.
939
+ This is faster than downloading the whole resource and then extracting the frame.
940
+
941
+ Args:
942
+ resource: The resource unique id or Resource object.
943
+ frame_index: The index of the frame to download.
944
+
945
+ Returns:
946
+ Image.Image: The frame as a PIL image.
947
+
948
+ Raises:
949
+ ResourceNotFoundError: If the resource does not exists.
950
+ DatamintException: If the resource is not a video or dicom.
951
+ """
952
+ # check if the resource is an single frame image (png,jpeg,...) first.
953
+ # If so, download the whole resource file and return the image.
954
+ if not isinstance(resource, Resource):
955
+ resource = self.get_by_id(resource)
956
+ if resource.mimetype.startswith('image/') or resource.storage == 'ImageResource':
957
+ if frame_index != 0:
958
+ raise DatamintException(f"Resource {resource.id} is a single frame image, "
959
+ f"but frame_index is {frame_index}.")
960
+ return self.download_resource_file(resource, auto_convert=True)
961
+
962
+ try:
963
+ response = self._make_entity_request('GET',
964
+ resource,
965
+ add_path=f'frames/{frame_index}',
966
+ headers={'accept': 'image/*'})
967
+ if response.status_code == 200:
968
+ return Image.open(io.BytesIO(response.content))
969
+ else:
970
+ raise DatamintException(
971
+ f"Error downloading frame {frame_index} of resource {self._entid(resource)}: {response.text}")
972
+ except ResourceNotFoundError as e:
973
+ e.set_params('resource', {'resource_id': self._entid(resource)})
974
+ raise e
975
+
976
+ def publish_resources(self,
977
+ resources: str | Resource | Sequence[str | Resource]) -> None:
978
+ """
979
+ Publish resources, changing their status to 'published'.
980
+
981
+ Args:
982
+ resources: The resources to publish. Can be a Resource object (instead of a list)
983
+
984
+ Raises:
985
+ ResourceNotFoundError: If the resource does not exists or the project does not exists.
986
+ """
987
+ if isinstance(resources, (Resource, str)):
988
+ resources = [resources]
989
+
990
+ for resource in resources:
991
+ try:
992
+ self._make_entity_request('POST', resource, add_path='publish')
993
+ except ResourceNotFoundError as e:
994
+ e.set_params('resource', {'resource_id': self._entid(resource)})
995
+ raise
996
+ except httpx.HTTPError as e:
997
+ if BaseApi._has_status_code(e, 400) and 'Resource must be in inbox status to be approved' in e.response.text:
998
+ _LOGGER.warning(f"Resource {resource} is not in inbox status. Skipping publishing")
999
+ else:
1000
+ raise
1001
+
1002
+ def set_tags(self,
1003
+ resource: str | Resource,
1004
+ tags: Sequence[str],
1005
+ ):
1006
+ data = {'tags': tags}
1007
+ resource_id = self._entid(resource)
1008
+
1009
+ response = self._make_entity_request('PUT',
1010
+ resource_id,
1011
+ add_path='tags',
1012
+ json=data)
1013
+ return response