datamint 1.9.2__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamint might be problematic. Click here for more details.
- datamint/__init__.py +2 -0
- datamint/api/__init__.py +3 -0
- datamint/api/base_api.py +430 -0
- datamint/api/client.py +91 -0
- datamint/api/dto/__init__.py +10 -0
- datamint/api/endpoints/__init__.py +17 -0
- datamint/api/endpoints/annotations_api.py +984 -0
- datamint/api/endpoints/channels_api.py +28 -0
- datamint/api/endpoints/datasetsinfo_api.py +16 -0
- datamint/api/endpoints/projects_api.py +203 -0
- datamint/api/endpoints/resources_api.py +1013 -0
- datamint/api/endpoints/users_api.py +38 -0
- datamint/api/entity_base_api.py +347 -0
- datamint/apihandler/annotation_api_handler.py +5 -5
- datamint/apihandler/api_handler.py +3 -6
- datamint/apihandler/base_api_handler.py +6 -28
- datamint/apihandler/dto/__init__.py +0 -0
- datamint/apihandler/dto/annotation_dto.py +1 -1
- datamint/apihandler/root_api_handler.py +53 -28
- datamint/client_cmd_tools/datamint_config.py +6 -37
- datamint/client_cmd_tools/datamint_upload.py +84 -58
- datamint/dataset/base_dataset.py +65 -75
- datamint/dataset/dataset.py +2 -2
- datamint/entities/__init__.py +20 -0
- datamint/entities/annotation.py +178 -0
- datamint/entities/base_entity.py +51 -0
- datamint/entities/channel.py +46 -0
- datamint/entities/datasetinfo.py +22 -0
- datamint/entities/project.py +64 -0
- datamint/entities/resource.py +130 -0
- datamint/entities/user.py +21 -0
- datamint/examples/example_projects.py +41 -44
- datamint/exceptions.py +27 -1
- datamint/logging.yaml +1 -1
- datamint/utils/logging_utils.py +75 -0
- {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/METADATA +13 -9
- datamint-2.0.0.dist-info/RECORD +50 -0
- {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/WHEEL +1 -1
- datamint-1.9.2.dist-info/RECORD +0 -29
- {datamint-1.9.2.dist-info → datamint-2.0.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,1013 @@
|
|
|
1
|
+
from typing import Any, Optional, Sequence, TypeAlias, Literal, IO
|
|
2
|
+
from ..base_api import ApiConfig, BaseApi
|
|
3
|
+
from ..entity_base_api import EntityBaseApi, CreatableEntityApi, DeletableEntityApi
|
|
4
|
+
from .annotations_api import AnnotationsApi
|
|
5
|
+
from .projects_api import ProjectsApi
|
|
6
|
+
from datamint.entities.resource import Resource
|
|
7
|
+
from datamint.entities.annotation import Annotation
|
|
8
|
+
from datamint.exceptions import DatamintException, ResourceNotFoundError
|
|
9
|
+
import httpx
|
|
10
|
+
from datetime import date
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
import pydicom
|
|
14
|
+
from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom, is_dicom_report, GeneratorWithLength
|
|
15
|
+
from medimgkit import dicom_utils, standardize_mimetype
|
|
16
|
+
from medimgkit.io_utils import is_io_object, peek
|
|
17
|
+
from medimgkit.format_detection import guess_typez, guess_extension, DEFAULT_MIME_TYPE
|
|
18
|
+
from medimgkit.nifti_utils import DEFAULT_NIFTI_MIME, NIFTI_MIMES
|
|
19
|
+
import os
|
|
20
|
+
import itertools
|
|
21
|
+
from tqdm.auto import tqdm
|
|
22
|
+
import asyncio
|
|
23
|
+
import aiohttp
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
import nest_asyncio # For running asyncio in jupyter notebooks
|
|
26
|
+
import cv2
|
|
27
|
+
from PIL import Image
|
|
28
|
+
from nibabel.filebasedimages import FileBasedImage as nib_FileBasedImage
|
|
29
|
+
import io
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
_LOGGER = logging.getLogger(__name__)
|
|
33
|
+
_USER_LOGGER = logging.getLogger('user_logger')
|
|
34
|
+
|
|
35
|
+
ResourceStatus: TypeAlias = Literal['new', 'inbox', 'published', 'archived']
|
|
36
|
+
"""TypeAlias: The available resource status. Possible values: 'new', 'inbox', 'published', 'archived'.
|
|
37
|
+
"""
|
|
38
|
+
ResourceFields: TypeAlias = Literal['modality', 'created_by', 'published_by', 'published_on', 'filename', 'created_at']
|
|
39
|
+
"""TypeAlias: The available fields to order resources. Possible values: 'modality', 'created_by', 'published_by', 'published_on', 'filename', 'created_at' (default).
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _infinite_gen(x):
|
|
44
|
+
while True:
|
|
45
|
+
yield x
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _open_io(file_path: str | Path | IO, mode: str = 'rb') -> IO:
|
|
49
|
+
if isinstance(file_path, str) or isinstance(file_path, Path):
|
|
50
|
+
return open(file_path, 'rb')
|
|
51
|
+
return file_path
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class ResourcesApi(CreatableEntityApi[Resource], DeletableEntityApi[Resource]):
|
|
55
|
+
"""API handler for resource-related endpoints."""
|
|
56
|
+
|
|
57
|
+
def __init__(self, config: ApiConfig, client: Optional[httpx.Client] = None) -> None:
|
|
58
|
+
"""Initialize the resources API handler.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
config: API configuration containing base URL, API key, etc.
|
|
62
|
+
client: Optional HTTP client instance. If None, a new one will be created.
|
|
63
|
+
"""
|
|
64
|
+
super().__init__(config, Resource, 'resources', client)
|
|
65
|
+
nest_asyncio.apply()
|
|
66
|
+
self.annotations_api = AnnotationsApi(config, client)
|
|
67
|
+
self.projects_api = ProjectsApi(config, client)
|
|
68
|
+
|
|
69
|
+
def get_list(self,
|
|
70
|
+
status: Optional[ResourceStatus] = None,
|
|
71
|
+
from_date: date | str | None = None,
|
|
72
|
+
to_date: date | str | None = None,
|
|
73
|
+
tags: Optional[Sequence[str]] = None,
|
|
74
|
+
modality: Optional[str] = None,
|
|
75
|
+
mimetype: Optional[str] = None,
|
|
76
|
+
# return_ids_only: bool = False,
|
|
77
|
+
order_field: Optional[ResourceFields] = None,
|
|
78
|
+
order_ascending: Optional[bool] = None,
|
|
79
|
+
channel: Optional[str] = None,
|
|
80
|
+
project_name: str | list[str] | None = None,
|
|
81
|
+
filename: Optional[str] = None,
|
|
82
|
+
limit: int | None = None
|
|
83
|
+
) -> Sequence[Resource]:
|
|
84
|
+
"""Get resources with optional filtering.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
status: The resource status. Possible values: 'inbox', 'published', 'archived' or None. If None, it will return all resources.
|
|
88
|
+
from_date : The start date.
|
|
89
|
+
to_date: The end date.
|
|
90
|
+
tags: The tags to filter the resources.
|
|
91
|
+
modality: The modality of the resources.
|
|
92
|
+
mimetype: The mimetype of the resources.
|
|
93
|
+
order_field: The field to order the resources. See :data:`~ResourceFields`.
|
|
94
|
+
order_ascending: Whether to order the resources in ascending order.
|
|
95
|
+
project_name: The project name or a list of project names to filter resources by project.
|
|
96
|
+
If multiple projects are provided, resources will be filtered to include only those belonging to ALL of the specified projects.
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
# Convert datetime objects to ISO format
|
|
100
|
+
if from_date:
|
|
101
|
+
if isinstance(from_date, str):
|
|
102
|
+
date.fromisoformat(from_date)
|
|
103
|
+
else:
|
|
104
|
+
from_date = from_date.isoformat()
|
|
105
|
+
if to_date:
|
|
106
|
+
if isinstance(to_date, str):
|
|
107
|
+
date.fromisoformat(to_date)
|
|
108
|
+
else:
|
|
109
|
+
to_date = to_date.isoformat()
|
|
110
|
+
|
|
111
|
+
# Prepare the payload
|
|
112
|
+
payload = {
|
|
113
|
+
"from": from_date,
|
|
114
|
+
"to": to_date,
|
|
115
|
+
"status": status if status is not None else "",
|
|
116
|
+
"modality": modality,
|
|
117
|
+
"mimetype": mimetype,
|
|
118
|
+
# "ids": return_ids_only,
|
|
119
|
+
"order_field": order_field,
|
|
120
|
+
"order_by_asc": order_ascending,
|
|
121
|
+
"channel_name": channel,
|
|
122
|
+
"filename": filename,
|
|
123
|
+
}
|
|
124
|
+
# remove nones from payload
|
|
125
|
+
payload = {k: v for k, v in payload.items() if v is not None}
|
|
126
|
+
if project_name is not None:
|
|
127
|
+
if isinstance(project_name, str):
|
|
128
|
+
project_name = [project_name]
|
|
129
|
+
payload["project"] = json.dumps({'items': project_name,
|
|
130
|
+
'filterType': 'intersection'}) # union or intersection
|
|
131
|
+
|
|
132
|
+
if tags is not None:
|
|
133
|
+
if isinstance(tags, str):
|
|
134
|
+
tags = [tags]
|
|
135
|
+
tags_filter = {
|
|
136
|
+
"items": tags,
|
|
137
|
+
"filterType": "union"
|
|
138
|
+
}
|
|
139
|
+
payload['tags'] = json.dumps(tags_filter)
|
|
140
|
+
|
|
141
|
+
return super().get_list(limit=limit, params=payload)
|
|
142
|
+
|
|
143
|
+
def get_annotations(self, resource: str | Resource) -> Sequence[Annotation]:
|
|
144
|
+
"""Get annotations for a specific resource.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
resource: The resource ID or Resource instance to fetch annotations for.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
A sequence of Annotation objects associated with the specified resource.
|
|
151
|
+
"""
|
|
152
|
+
return self.annotations_api.get_list(resource=resource)
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def __process_files_parameter(file_path: str | Sequence[str | IO | pydicom.Dataset]
|
|
156
|
+
) -> Sequence[str | IO]:
|
|
157
|
+
"""
|
|
158
|
+
Process the file_path parameter to ensure it is a list of file paths or IO objects.
|
|
159
|
+
"""
|
|
160
|
+
if isinstance(file_path, str) and os.path.isdir(file_path):
|
|
161
|
+
return [f'{file_path}/{f}' for f in os.listdir(file_path) if os.path.isfile(f'{file_path}/{f}')]
|
|
162
|
+
|
|
163
|
+
processed_files = []
|
|
164
|
+
for item in file_path:
|
|
165
|
+
if isinstance(item, pydicom.Dataset):
|
|
166
|
+
processed_files.append(to_bytesio(item, item.filename))
|
|
167
|
+
else:
|
|
168
|
+
processed_files.append(item)
|
|
169
|
+
return processed_files
|
|
170
|
+
|
|
171
|
+
def _assemble_dicoms(self, files_path: Sequence[str | IO],
|
|
172
|
+
progress_bar: bool = False
|
|
173
|
+
) -> tuple[Sequence[str | IO], bool, Sequence[int]]:
|
|
174
|
+
"""
|
|
175
|
+
Assembles DICOM files into a single file.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
files_path: The paths to the DICOM files to assemble.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
A tuple containing:
|
|
182
|
+
- The paths to the assembled DICOM files.
|
|
183
|
+
- A boolean indicating if the assembly was necessary.
|
|
184
|
+
- same length as the output assembled DICOMs, mapping assembled DICOM to original DICOMs.
|
|
185
|
+
"""
|
|
186
|
+
dicoms_files_path = []
|
|
187
|
+
other_files_path = []
|
|
188
|
+
dicom_original_idxs = []
|
|
189
|
+
others_original_idxs = []
|
|
190
|
+
for i, f in enumerate(files_path):
|
|
191
|
+
if is_dicom(f):
|
|
192
|
+
dicoms_files_path.append(f)
|
|
193
|
+
dicom_original_idxs.append(i)
|
|
194
|
+
else:
|
|
195
|
+
other_files_path.append(f)
|
|
196
|
+
others_original_idxs.append(i)
|
|
197
|
+
|
|
198
|
+
orig_len = len(dicoms_files_path)
|
|
199
|
+
if orig_len == 0:
|
|
200
|
+
_LOGGER.debug("No DICOM files found to assemble.")
|
|
201
|
+
return files_path, False, []
|
|
202
|
+
dicoms_files_path = dicom_utils.assemble_dicoms(dicoms_files_path,
|
|
203
|
+
return_as_IO=True,
|
|
204
|
+
progress_bar=progress_bar)
|
|
205
|
+
|
|
206
|
+
new_len = len(dicoms_files_path)
|
|
207
|
+
if new_len != orig_len:
|
|
208
|
+
_LOGGER.info(f"Assembled {new_len} dicom files out of {orig_len} files.")
|
|
209
|
+
mapping_idx = [None] * len(files_path)
|
|
210
|
+
|
|
211
|
+
files_path = GeneratorWithLength(itertools.chain(dicoms_files_path, other_files_path),
|
|
212
|
+
length=new_len + len(other_files_path))
|
|
213
|
+
assembled = True
|
|
214
|
+
for orig_idx, value in zip(dicom_original_idxs, dicoms_files_path.inverse_mapping_idx):
|
|
215
|
+
mapping_idx[orig_idx] = value
|
|
216
|
+
for i, orig_idx in enumerate(others_original_idxs):
|
|
217
|
+
mapping_idx[orig_idx] = new_len + i
|
|
218
|
+
else:
|
|
219
|
+
assembled = False
|
|
220
|
+
mapping_idx = [i for i in range(len(files_path))]
|
|
221
|
+
|
|
222
|
+
return files_path, assembled, mapping_idx
|
|
223
|
+
|
|
224
|
+
async def _upload_single_resource_async(self,
|
|
225
|
+
file_path: str | IO,
|
|
226
|
+
mimetype: Optional[str] = None,
|
|
227
|
+
anonymize: bool = False,
|
|
228
|
+
anonymize_retain_codes: Sequence[tuple] = [],
|
|
229
|
+
tags: list[str] = [],
|
|
230
|
+
mung_filename: Sequence[int] | Literal['all'] | None = None,
|
|
231
|
+
channel: Optional[str] = None,
|
|
232
|
+
session=None,
|
|
233
|
+
modality: Optional[str] = None,
|
|
234
|
+
publish: bool = False,
|
|
235
|
+
metadata_file: Optional[str | dict] = None,
|
|
236
|
+
) -> str:
|
|
237
|
+
if is_io_object(file_path):
|
|
238
|
+
name = file_path.name
|
|
239
|
+
else:
|
|
240
|
+
name = file_path
|
|
241
|
+
|
|
242
|
+
if session is not None and not isinstance(session, aiohttp.ClientSession):
|
|
243
|
+
raise ValueError("session must be an aiohttp.ClientSession object.")
|
|
244
|
+
|
|
245
|
+
name = os.path.expanduser(os.path.normpath(name))
|
|
246
|
+
if len(Path(name).parts) == 0:
|
|
247
|
+
raise ValueError(f"File path '{name}' is not valid.")
|
|
248
|
+
name = os.path.join(*[x if x != '..' else '_' for x in Path(name).parts])
|
|
249
|
+
|
|
250
|
+
if mung_filename is not None:
|
|
251
|
+
file_parts = Path(name).parts
|
|
252
|
+
if file_parts[0] == os.path.sep:
|
|
253
|
+
file_parts = file_parts[1:]
|
|
254
|
+
if mung_filename == 'all':
|
|
255
|
+
new_file_path = '_'.join(file_parts)
|
|
256
|
+
else:
|
|
257
|
+
folder_parts = file_parts[:-1]
|
|
258
|
+
new_file_path = '_'.join([folder_parts[i-1] for i in mung_filename if i <= len(folder_parts)])
|
|
259
|
+
new_file_path += '_' + file_parts[-1]
|
|
260
|
+
name = new_file_path
|
|
261
|
+
_LOGGER.debug(f"New file path: {name}")
|
|
262
|
+
|
|
263
|
+
is_a_dicom_file = None
|
|
264
|
+
if mimetype is None:
|
|
265
|
+
mimetype_list, ext = guess_typez(file_path, use_magic=True)
|
|
266
|
+
for mime in mimetype_list:
|
|
267
|
+
if mime in NIFTI_MIMES:
|
|
268
|
+
mimetype = DEFAULT_NIFTI_MIME
|
|
269
|
+
break
|
|
270
|
+
else:
|
|
271
|
+
if ext == '.nii.gz' or name.lower().endswith('nii.gz'):
|
|
272
|
+
mimetype = DEFAULT_NIFTI_MIME
|
|
273
|
+
else:
|
|
274
|
+
mimetype = mimetype_list[-1] if mimetype_list else DEFAULT_MIME_TYPE
|
|
275
|
+
|
|
276
|
+
mimetype = standardize_mimetype(mimetype)
|
|
277
|
+
filename = os.path.basename(name)
|
|
278
|
+
_LOGGER.debug(f"File name '{filename}' mimetype: {mimetype}")
|
|
279
|
+
|
|
280
|
+
if is_a_dicom_file == True or is_dicom(file_path):
|
|
281
|
+
if tags is None:
|
|
282
|
+
tags = []
|
|
283
|
+
else:
|
|
284
|
+
tags = list(tags)
|
|
285
|
+
ds = pydicom.dcmread(file_path)
|
|
286
|
+
if anonymize:
|
|
287
|
+
_LOGGER.info(f"Anonymizing {file_path}")
|
|
288
|
+
ds = anonymize_dicom(ds, retain_codes=anonymize_retain_codes)
|
|
289
|
+
lat = dicom_utils.get_dicom_laterality(ds)
|
|
290
|
+
if lat == 'L':
|
|
291
|
+
tags.append("left")
|
|
292
|
+
elif lat == 'R':
|
|
293
|
+
tags.append("right")
|
|
294
|
+
# make the dicom `ds` object a file-like object in order to avoid unnecessary disk writes
|
|
295
|
+
f = to_bytesio(ds, name)
|
|
296
|
+
else:
|
|
297
|
+
f = _open_io(file_path)
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
metadata_content = None
|
|
301
|
+
metadata_dict = None
|
|
302
|
+
if metadata_file is not None:
|
|
303
|
+
if isinstance(metadata_file, dict):
|
|
304
|
+
# Metadata is already a dictionary
|
|
305
|
+
metadata_dict = metadata_file
|
|
306
|
+
metadata_content = json.dumps(metadata_dict)
|
|
307
|
+
_LOGGER.debug("Using provided metadata dictionary")
|
|
308
|
+
else:
|
|
309
|
+
# Metadata is a file path
|
|
310
|
+
try:
|
|
311
|
+
with open(metadata_file, 'r') as metadata_f:
|
|
312
|
+
metadata_content = metadata_f.read()
|
|
313
|
+
metadata_dict = json.loads(metadata_content)
|
|
314
|
+
except Exception as e:
|
|
315
|
+
_LOGGER.warning(f"Failed to read metadata file {metadata_file}: {e}")
|
|
316
|
+
|
|
317
|
+
# Extract modality from metadata if available
|
|
318
|
+
if metadata_dict is not None:
|
|
319
|
+
metadata_dict_lower = {k.lower(): v for k, v in metadata_dict.items() if isinstance(k, str)}
|
|
320
|
+
try:
|
|
321
|
+
if modality is None:
|
|
322
|
+
if 'modality' in metadata_dict_lower:
|
|
323
|
+
modality = metadata_dict_lower['modality']
|
|
324
|
+
except Exception as e:
|
|
325
|
+
_LOGGER.debug(f"Failed to extract modality from metadata: {e}")
|
|
326
|
+
|
|
327
|
+
form = aiohttp.FormData()
|
|
328
|
+
file_key = 'resource'
|
|
329
|
+
form.add_field('source', 'api')
|
|
330
|
+
|
|
331
|
+
form.add_field(file_key, f, filename=filename, content_type=mimetype)
|
|
332
|
+
form.add_field('source_filepath', name) # full path to the file
|
|
333
|
+
if mimetype is not None:
|
|
334
|
+
form.add_field('mimetype', mimetype)
|
|
335
|
+
if channel is not None:
|
|
336
|
+
form.add_field('channel', channel)
|
|
337
|
+
if modality is not None:
|
|
338
|
+
form.add_field('modality', modality)
|
|
339
|
+
form.add_field('bypass_inbox', 'true' if publish else 'false')
|
|
340
|
+
if tags is not None and len(tags) > 0:
|
|
341
|
+
# comma separated list of tags
|
|
342
|
+
form.add_field('tags', ','.join([l.strip() for l in tags]))
|
|
343
|
+
|
|
344
|
+
# Add JSON metadata if provided
|
|
345
|
+
if metadata_content is not None:
|
|
346
|
+
try:
|
|
347
|
+
_LOGGER.debug("Adding metadata to form data")
|
|
348
|
+
form.add_field('metadata', metadata_content, content_type='application/json')
|
|
349
|
+
except Exception as e:
|
|
350
|
+
_LOGGER.warning(f"Failed to add metadata to form: {e}")
|
|
351
|
+
|
|
352
|
+
resp_data = await self._make_request_async_json('POST',
|
|
353
|
+
endpoint=self.endpoint_base,
|
|
354
|
+
data=form)
|
|
355
|
+
if 'error' in resp_data:
|
|
356
|
+
raise DatamintException(resp_data['error'])
|
|
357
|
+
_LOGGER.debug(f"Response on uploading {name}: {resp_data}")
|
|
358
|
+
return resp_data['id']
|
|
359
|
+
except Exception as e:
|
|
360
|
+
if 'name' in locals():
|
|
361
|
+
_LOGGER.error(f"Error uploading {name}: {e}")
|
|
362
|
+
else:
|
|
363
|
+
_LOGGER.error(f"Error uploading {file_path}: {e}")
|
|
364
|
+
raise
|
|
365
|
+
finally:
|
|
366
|
+
f.close()
|
|
367
|
+
|
|
368
|
+
async def _upload_resources_async(self,
|
|
369
|
+
files_path: Sequence[str | IO],
|
|
370
|
+
mimetype: Optional[str] = None,
|
|
371
|
+
anonymize: bool = False,
|
|
372
|
+
anonymize_retain_codes: Sequence[tuple] = [],
|
|
373
|
+
on_error: Literal['raise', 'skip'] = 'raise',
|
|
374
|
+
tags=None,
|
|
375
|
+
mung_filename: Sequence[int] | Literal['all'] | None = None,
|
|
376
|
+
channel: Optional[str] = None,
|
|
377
|
+
modality: Optional[str] = None,
|
|
378
|
+
publish: bool = False,
|
|
379
|
+
segmentation_files: Sequence[dict] | None = None,
|
|
380
|
+
transpose_segmentation: bool = False,
|
|
381
|
+
metadata_files: Sequence[str | dict | None] | None = None,
|
|
382
|
+
progress_bar: tqdm | None = None,
|
|
383
|
+
) -> list[str]:
|
|
384
|
+
if on_error not in ['raise', 'skip']:
|
|
385
|
+
raise ValueError("on_error must be either 'raise' or 'skip'")
|
|
386
|
+
|
|
387
|
+
if segmentation_files is None:
|
|
388
|
+
segmentation_files = _infinite_gen(None)
|
|
389
|
+
|
|
390
|
+
if metadata_files is None:
|
|
391
|
+
metadata_files = _infinite_gen(None)
|
|
392
|
+
|
|
393
|
+
async with aiohttp.ClientSession() as session:
|
|
394
|
+
async def __upload_single_resource(file_path, segfiles: dict[str, list | dict],
|
|
395
|
+
metadata_file: str | dict | None):
|
|
396
|
+
name = file_path.name if is_io_object(file_path) else file_path
|
|
397
|
+
name = os.path.basename(name)
|
|
398
|
+
rid = await self._upload_single_resource_async(
|
|
399
|
+
file_path=file_path,
|
|
400
|
+
mimetype=mimetype,
|
|
401
|
+
anonymize=anonymize,
|
|
402
|
+
anonymize_retain_codes=anonymize_retain_codes,
|
|
403
|
+
tags=tags,
|
|
404
|
+
session=session,
|
|
405
|
+
mung_filename=mung_filename,
|
|
406
|
+
channel=channel,
|
|
407
|
+
modality=modality,
|
|
408
|
+
publish=publish,
|
|
409
|
+
metadata_file=metadata_file,
|
|
410
|
+
)
|
|
411
|
+
if progress_bar:
|
|
412
|
+
progress_bar.update(1)
|
|
413
|
+
progress_bar.set_postfix(file=name)
|
|
414
|
+
else:
|
|
415
|
+
_USER_LOGGER.info(f'"{name}" uploaded')
|
|
416
|
+
|
|
417
|
+
if segfiles is not None:
|
|
418
|
+
fpaths = segfiles['files']
|
|
419
|
+
names = segfiles.get('names', _infinite_gen(None))
|
|
420
|
+
if isinstance(names, dict):
|
|
421
|
+
names = _infinite_gen(names)
|
|
422
|
+
frame_indices = segfiles.get('frame_index', _infinite_gen(None))
|
|
423
|
+
for f, name, frame_index in tqdm(zip(fpaths, names, frame_indices),
|
|
424
|
+
desc=f"Uploading segmentations for {file_path}",
|
|
425
|
+
total=len(fpaths)):
|
|
426
|
+
if f is not None:
|
|
427
|
+
await self.annotations_api._upload_segmentations_async(
|
|
428
|
+
rid,
|
|
429
|
+
file_path=f,
|
|
430
|
+
name=name,
|
|
431
|
+
frame_index=frame_index,
|
|
432
|
+
transpose_segmentation=transpose_segmentation
|
|
433
|
+
)
|
|
434
|
+
return rid
|
|
435
|
+
|
|
436
|
+
tasks = [__upload_single_resource(f, segfiles, metadata_file)
|
|
437
|
+
for f, segfiles, metadata_file in zip(files_path, segmentation_files, metadata_files)]
|
|
438
|
+
return await asyncio.gather(*tasks, return_exceptions=on_error == 'skip')
|
|
439
|
+
|
|
440
|
+
def upload_resources(self,
|
|
441
|
+
files_path: Sequence[str | IO | pydicom.Dataset],
|
|
442
|
+
mimetype: str | None = None,
|
|
443
|
+
anonymize: bool = False,
|
|
444
|
+
anonymize_retain_codes: Sequence[tuple] = [],
|
|
445
|
+
on_error: Literal['raise', 'skip'] = 'raise',
|
|
446
|
+
tags: Sequence[str] | None = None,
|
|
447
|
+
mung_filename: Sequence[int] | Literal['all'] | None = None,
|
|
448
|
+
channel: str | None = None,
|
|
449
|
+
publish: bool = False,
|
|
450
|
+
publish_to: str | None = None,
|
|
451
|
+
segmentation_files: Sequence[Sequence[str] | dict] | None = None,
|
|
452
|
+
transpose_segmentation: bool = False,
|
|
453
|
+
modality: str | None = None,
|
|
454
|
+
assemble_dicoms: bool = True,
|
|
455
|
+
metadata: Sequence[str | dict | None] | None = None,
|
|
456
|
+
discard_dicom_reports: bool = True,
|
|
457
|
+
progress_bar: bool = False
|
|
458
|
+
) -> Sequence[str | Exception]:
|
|
459
|
+
"""
|
|
460
|
+
Upload multiple resources.
|
|
461
|
+
|
|
462
|
+
Note: For uploading a single resource, use `upload_resource()` instead.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
files_path: A sequence of paths to resource files, IO objects, or pydicom.Dataset objects.
|
|
466
|
+
Must contain at least 2 items. Supports mixed types within the sequence.
|
|
467
|
+
mimetype (str): The mimetype of the resources. If None, it will be guessed.
|
|
468
|
+
anonymize (bool): Whether to anonymize the dicoms or not.
|
|
469
|
+
anonymize_retain_codes (Sequence[tuple]): The tags to retain when anonymizing the dicoms.
|
|
470
|
+
on_error (Literal['raise', 'skip']): Whether to raise an exception when an error occurs or to skip the error.
|
|
471
|
+
tags (Optional[Sequence[str]]): The tags to add to the resources.
|
|
472
|
+
mung_filename (Sequence[int] | Literal['all']): The parts of the filepath to keep when renaming the resource file.
|
|
473
|
+
''all'' keeps all parts.
|
|
474
|
+
channel (Optional[str]): The channel to upload the resources to. An arbitrary name to group the resources.
|
|
475
|
+
publish (bool): Whether to directly publish the resources or not. They will have the 'published' status.
|
|
476
|
+
publish_to (Optional[str]): The project name or id to publish the resources to.
|
|
477
|
+
They will have the 'published' status and will be added to the project.
|
|
478
|
+
If this is set, `publish` parameter is ignored.
|
|
479
|
+
segmentation_files (Optional[list[Union[list[str], dict]]]): The segmentation files to upload.
|
|
480
|
+
If each element is a dict, it should have two keys: 'files' and 'names'.
|
|
481
|
+
- files: A list of paths to the segmentation files. Example: ['seg1.nii.gz', 'seg2.nii.gz'].
|
|
482
|
+
- names: Can be a list (same size of `files`) of labels for the segmentation files. Example: ['Brain', 'Lung'].
|
|
483
|
+
transpose_segmentation (bool): Whether to transpose the segmentation files or not.
|
|
484
|
+
modality (Optional[str]): The modality of the resources.
|
|
485
|
+
assemble_dicoms (bool): Whether to assemble the dicom files or not based on the SeriesInstanceUID and InstanceNumber attributes.
|
|
486
|
+
metadata (Optional[list[str | dict | None]]): JSON metadata to include with each resource.
|
|
487
|
+
Must have the same length as `files_path`.
|
|
488
|
+
Can be file paths (str) or already loaded dictionaries (dict).
|
|
489
|
+
|
|
490
|
+
Raises:
|
|
491
|
+
ValueError: If a single resource is provided instead of multiple resources.
|
|
492
|
+
ResourceNotFoundError: If `publish_to` is supplied, and the project does not exists.
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
list[str | Exception]: A list of resource IDs or errors.
|
|
496
|
+
"""
|
|
497
|
+
|
|
498
|
+
if on_error not in ['raise', 'skip']:
|
|
499
|
+
raise ValueError("on_error must be either 'raise' or 'skip'")
|
|
500
|
+
|
|
501
|
+
# Check if single resource provided and raise error (list of 1 item is allowed)
|
|
502
|
+
if isinstance(files_path, IO) or isinstance(files_path, pydicom.Dataset) or (isinstance(files_path, str) and not os.path.isdir(files_path)):
|
|
503
|
+
raise ValueError(
|
|
504
|
+
"upload_resources() only accepts multiple resources. For single resource upload, use upload_resource() instead.")
|
|
505
|
+
|
|
506
|
+
files_path = ResourcesApi.__process_files_parameter(files_path)
|
|
507
|
+
|
|
508
|
+
# Discard DICOM reports
|
|
509
|
+
if discard_dicom_reports:
|
|
510
|
+
old_size = len(files_path)
|
|
511
|
+
# Create filtered lists maintaining index correspondence
|
|
512
|
+
filtered_files = []
|
|
513
|
+
filtered_metadata = []
|
|
514
|
+
|
|
515
|
+
for i, f in enumerate(files_path):
|
|
516
|
+
if not is_dicom_report(f):
|
|
517
|
+
filtered_files.append(f)
|
|
518
|
+
if metadata is not None:
|
|
519
|
+
filtered_metadata.append(metadata[i])
|
|
520
|
+
|
|
521
|
+
files_path = filtered_files
|
|
522
|
+
if metadata is not None:
|
|
523
|
+
metadata = filtered_metadata
|
|
524
|
+
|
|
525
|
+
if old_size is not None and old_size != len(files_path):
|
|
526
|
+
_LOGGER.info(f"Discarded {old_size - len(files_path)} DICOM report files from upload.")
|
|
527
|
+
|
|
528
|
+
if isinstance(metadata, (str, dict)):
|
|
529
|
+
_LOGGER.debug("Converting metadatas to a list")
|
|
530
|
+
metadata = [metadata]
|
|
531
|
+
|
|
532
|
+
if metadata is not None and len(metadata) != len(files_path):
|
|
533
|
+
raise ValueError("The number of metadata files must match the number of resources.")
|
|
534
|
+
if assemble_dicoms:
|
|
535
|
+
files_path, assembled, mapping_idx = self._assemble_dicoms(files_path, progress_bar=progress_bar)
|
|
536
|
+
assemble_dicoms = assembled
|
|
537
|
+
else:
|
|
538
|
+
mapping_idx = [i for i in range(len(files_path))]
|
|
539
|
+
n_files = len(files_path)
|
|
540
|
+
|
|
541
|
+
if n_files <= 1:
|
|
542
|
+
# Disable progress bar for single file uploads
|
|
543
|
+
progress_bar = False
|
|
544
|
+
|
|
545
|
+
if segmentation_files is not None:
|
|
546
|
+
if assemble_dicoms:
|
|
547
|
+
raise NotImplementedError("Segmentation files cannot be uploaded when assembling dicoms yet.")
|
|
548
|
+
if len(segmentation_files) != len(files_path):
|
|
549
|
+
raise ValueError("The number of segmentation files must match the number of resources.")
|
|
550
|
+
else:
|
|
551
|
+
if isinstance(segmentation_files, list) and isinstance(segmentation_files[0], list):
|
|
552
|
+
raise ValueError("segmentation_files should not be a list of lists if files_path is not a list.")
|
|
553
|
+
if isinstance(segmentation_files, dict):
|
|
554
|
+
segmentation_files = [segmentation_files]
|
|
555
|
+
|
|
556
|
+
segmentation_files = [segfiles if (isinstance(segfiles, dict) or segfiles is None) else {'files': segfiles}
|
|
557
|
+
for segfiles in segmentation_files]
|
|
558
|
+
|
|
559
|
+
for segfiles in segmentation_files:
|
|
560
|
+
if segfiles is None:
|
|
561
|
+
continue
|
|
562
|
+
if 'files' not in segfiles:
|
|
563
|
+
raise ValueError("segmentation_files must contain a 'files' key with a list of file paths.")
|
|
564
|
+
if 'names' in segfiles:
|
|
565
|
+
# same length as files
|
|
566
|
+
if isinstance(segfiles['names'], (list, tuple)) and len(segfiles['names']) != len(segfiles['files']):
|
|
567
|
+
raise ValueError(
|
|
568
|
+
"segmentation_files['names'] must have the same length as segmentation_files['files'].")
|
|
569
|
+
|
|
570
|
+
loop = asyncio.get_event_loop()
|
|
571
|
+
pbar = None
|
|
572
|
+
try:
|
|
573
|
+
if progress_bar:
|
|
574
|
+
pbar = tqdm(total=n_files, desc="Uploading resources", unit="file")
|
|
575
|
+
|
|
576
|
+
task = self._upload_resources_async(files_path=files_path,
|
|
577
|
+
mimetype=mimetype,
|
|
578
|
+
anonymize=anonymize,
|
|
579
|
+
anonymize_retain_codes=anonymize_retain_codes,
|
|
580
|
+
on_error=on_error,
|
|
581
|
+
tags=tags,
|
|
582
|
+
mung_filename=mung_filename,
|
|
583
|
+
channel=channel,
|
|
584
|
+
publish=publish,
|
|
585
|
+
segmentation_files=segmentation_files,
|
|
586
|
+
transpose_segmentation=transpose_segmentation,
|
|
587
|
+
modality=modality,
|
|
588
|
+
metadata_files=metadata,
|
|
589
|
+
progress_bar=pbar
|
|
590
|
+
)
|
|
591
|
+
|
|
592
|
+
resource_ids = loop.run_until_complete(task)
|
|
593
|
+
finally:
|
|
594
|
+
if pbar:
|
|
595
|
+
pbar.close()
|
|
596
|
+
|
|
597
|
+
_LOGGER.info(f"Resources uploaded: {resource_ids}")
|
|
598
|
+
|
|
599
|
+
if publish_to is not None:
|
|
600
|
+
_USER_LOGGER.info('Adding resources to project')
|
|
601
|
+
resource_ids_succ = [rid for rid in resource_ids if not isinstance(rid, Exception)]
|
|
602
|
+
try:
|
|
603
|
+
self.projects_api.add_resources(resource_ids_succ, publish_to)
|
|
604
|
+
except Exception as e:
|
|
605
|
+
_LOGGER.error(f"Error adding resources to project: {e}")
|
|
606
|
+
if on_error == 'raise':
|
|
607
|
+
raise e
|
|
608
|
+
|
|
609
|
+
if mapping_idx:
|
|
610
|
+
_LOGGER.debug(f"Mapping indices for DICOM files: {mapping_idx}")
|
|
611
|
+
resource_ids = [resource_ids[idx] for idx in mapping_idx]
|
|
612
|
+
|
|
613
|
+
return resource_ids
|
|
614
|
+
|
|
615
|
+
def upload_resource(self,
|
|
616
|
+
file_path: str | IO | pydicom.Dataset,
|
|
617
|
+
mimetype: str | None = None,
|
|
618
|
+
anonymize: bool = False,
|
|
619
|
+
anonymize_retain_codes: Sequence[tuple] = [],
|
|
620
|
+
tags: Sequence[str] | None = None,
|
|
621
|
+
mung_filename: Sequence[int] | Literal['all'] | None = None,
|
|
622
|
+
channel: str | None = None,
|
|
623
|
+
publish: bool = False,
|
|
624
|
+
publish_to: str | None = None,
|
|
625
|
+
segmentation_files: dict | None = None,
|
|
626
|
+
transpose_segmentation: bool = False,
|
|
627
|
+
modality: str | None = None,
|
|
628
|
+
metadata: dict | str | None = None,
|
|
629
|
+
discard_dicom_reports: bool = True
|
|
630
|
+
) -> str:
|
|
631
|
+
"""
|
|
632
|
+
Upload a single resource.
|
|
633
|
+
|
|
634
|
+
This is a convenience method that wraps upload_resources for single file uploads.
|
|
635
|
+
It provides a cleaner interface when uploading just one file.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
file_path: The path to the resource file or IO object.
|
|
639
|
+
mimetype: The mimetype of the resource. If None, it will be guessed.
|
|
640
|
+
anonymize: Whether to anonymize the DICOM or not.
|
|
641
|
+
anonymize_retain_codes: The tags to retain when anonymizing the DICOM.
|
|
642
|
+
tags: The tags to add to the resource.
|
|
643
|
+
mung_filename: The parts of the filepath to keep when renaming the resource file.
|
|
644
|
+
'all' keeps all parts.
|
|
645
|
+
channel: The channel to upload the resource to. An arbitrary name to group the resources.
|
|
646
|
+
publish: Whether to directly publish the resource or not. It will have the 'published' status.
|
|
647
|
+
publish_to: The project name or id to publish the resource to.
|
|
648
|
+
It will have the 'published' status and will be added to the project.
|
|
649
|
+
If this is set, `publish` parameter is ignored.
|
|
650
|
+
segmentation_files: The segmentation files to upload. Should be a dict with:
|
|
651
|
+
- 'files': A list of paths to the segmentation files. Example: ['seg1.nii.gz', 'seg2.nii.gz'].
|
|
652
|
+
- 'names': A dict mapping pixel values to class names. Example: {1: 'Brain', 2: 'Lung'}.
|
|
653
|
+
transpose_segmentation: Whether to transpose the segmentation files or not.
|
|
654
|
+
modality: The modality of the resource.
|
|
655
|
+
metadata: JSON metadata to include with the resource.
|
|
656
|
+
Can be a file path (str) or already loaded dictionary (dict).
|
|
657
|
+
discard_dicom_reports: Whether to discard DICOM reports or not.
|
|
658
|
+
|
|
659
|
+
Returns:
|
|
660
|
+
str: The resource ID of the uploaded resource.
|
|
661
|
+
|
|
662
|
+
Raises:
|
|
663
|
+
ResourceNotFoundError: If `publish_to` is supplied, and the project does not exist.
|
|
664
|
+
DatamintException: If the upload fails.
|
|
665
|
+
|
|
666
|
+
Example:
|
|
667
|
+
.. code-block:: python
|
|
668
|
+
|
|
669
|
+
# Simple upload
|
|
670
|
+
resource_id = api.resources.upload_resource('path/to/file.dcm')
|
|
671
|
+
|
|
672
|
+
# Upload with metadata and segmentation
|
|
673
|
+
resource_id = api.resources.upload_resource(
|
|
674
|
+
'path/to/file.dcm',
|
|
675
|
+
tags=['tutorial', 'case1'],
|
|
676
|
+
channel='study_channel',
|
|
677
|
+
segmentation_files={
|
|
678
|
+
'files': ['path/to/segmentation.nii.gz'],
|
|
679
|
+
'names': {1: 'Bone', 2: 'Tissue'}
|
|
680
|
+
},
|
|
681
|
+
metadata={'patient_age': 45, 'modality': 'CT'}
|
|
682
|
+
)
|
|
683
|
+
"""
|
|
684
|
+
# Convert segmentation_files to the format expected by upload_resources
|
|
685
|
+
segmentation_files_list: Optional[list[list[str] | dict]] = None
|
|
686
|
+
if segmentation_files is not None:
|
|
687
|
+
segmentation_files_list = [segmentation_files]
|
|
688
|
+
|
|
689
|
+
# Call upload_resources with single file
|
|
690
|
+
result = self.upload_resources(
|
|
691
|
+
files_path=[file_path],
|
|
692
|
+
mimetype=mimetype,
|
|
693
|
+
anonymize=anonymize,
|
|
694
|
+
anonymize_retain_codes=anonymize_retain_codes,
|
|
695
|
+
tags=tags,
|
|
696
|
+
mung_filename=mung_filename,
|
|
697
|
+
channel=channel,
|
|
698
|
+
publish=publish,
|
|
699
|
+
publish_to=publish_to,
|
|
700
|
+
segmentation_files=segmentation_files_list,
|
|
701
|
+
transpose_segmentation=transpose_segmentation,
|
|
702
|
+
modality=modality,
|
|
703
|
+
metadata=[metadata],
|
|
704
|
+
discard_dicom_reports=discard_dicom_reports,
|
|
705
|
+
assemble_dicoms=False, # No need to assemble for single file
|
|
706
|
+
progress_bar=False # Disable progress bar for single uploads
|
|
707
|
+
)
|
|
708
|
+
|
|
709
|
+
# upload_resources returns a list, so we extract the first element
|
|
710
|
+
if isinstance(result, Sequence) and len(result) == 1:
|
|
711
|
+
r = result[0]
|
|
712
|
+
if isinstance(r, Exception):
|
|
713
|
+
raise r
|
|
714
|
+
return r
|
|
715
|
+
else:
|
|
716
|
+
# This should not happen with single file uploads, but handle it just in case
|
|
717
|
+
raise DatamintException(f"Unexpected return from upload_resources: {type(result)} | {result}")
|
|
718
|
+
|
|
719
|
+
def _determine_mimetype(self,
|
|
720
|
+
content,
|
|
721
|
+
resource: str | Resource) -> tuple[str | None, str | None]:
|
|
722
|
+
# Determine mimetype from file content
|
|
723
|
+
mimetype_list, ext = guess_typez(content, use_magic=True)
|
|
724
|
+
mimetype = mimetype_list[-1]
|
|
725
|
+
|
|
726
|
+
# get mimetype from resource info if not detected
|
|
727
|
+
if mimetype is None or mimetype == DEFAULT_MIME_TYPE:
|
|
728
|
+
if not isinstance(resource, Resource):
|
|
729
|
+
resource = self.get_by_id(resource)
|
|
730
|
+
mimetype = resource.mimetype or mimetype
|
|
731
|
+
|
|
732
|
+
return mimetype, ext
|
|
733
|
+
|
|
734
|
+
async def _async_download_file(self,
|
|
735
|
+
resource: str | Resource,
|
|
736
|
+
save_path: str | Path,
|
|
737
|
+
session: aiohttp.ClientSession | None = None,
|
|
738
|
+
progress_bar: tqdm | None = None,
|
|
739
|
+
add_extension: bool = False) -> str:
|
|
740
|
+
"""
|
|
741
|
+
Asynchronously download a file from the server.
|
|
742
|
+
|
|
743
|
+
Args:
|
|
744
|
+
resource: The resource unique id or Resource object.
|
|
745
|
+
save_path: The path to save the file.
|
|
746
|
+
session: The aiohttp session to use for the request.
|
|
747
|
+
progress_bar: Optional progress bar to update after download completion.
|
|
748
|
+
add_extension: Whether to add the appropriate file extension based on content type.
|
|
749
|
+
|
|
750
|
+
Returns:
|
|
751
|
+
str: The actual path where the file was saved (important when add_extension=True).
|
|
752
|
+
"""
|
|
753
|
+
save_path = str(save_path) # Ensure save_path is a string for file operations
|
|
754
|
+
resource_id = self._entid(resource)
|
|
755
|
+
try:
|
|
756
|
+
async with self._make_request_async('GET',
|
|
757
|
+
f'{self.endpoint_base}/{resource_id}/file',
|
|
758
|
+
session=session,
|
|
759
|
+
headers={'accept': 'application/octet-stream'}) as resp:
|
|
760
|
+
data_bytes = await resp.read()
|
|
761
|
+
|
|
762
|
+
final_save_path = save_path
|
|
763
|
+
if add_extension:
|
|
764
|
+
# Save to temporary file first to determine mimetype from content
|
|
765
|
+
temp_path = f"{save_path}.tmp"
|
|
766
|
+
with open(temp_path, 'wb') as f:
|
|
767
|
+
f.write(data_bytes)
|
|
768
|
+
|
|
769
|
+
# Determine mimetype from file content
|
|
770
|
+
mimetype, ext = self._determine_mimetype(content=data_bytes,
|
|
771
|
+
resource=resource)
|
|
772
|
+
|
|
773
|
+
# Generate final path with extension if needed
|
|
774
|
+
if mimetype is not None and mimetype != DEFAULT_MIME_TYPE:
|
|
775
|
+
if ext is None:
|
|
776
|
+
ext = guess_extension(mimetype)
|
|
777
|
+
if ext is not None and not save_path.endswith(ext):
|
|
778
|
+
final_save_path = save_path + ext
|
|
779
|
+
|
|
780
|
+
# Move file to final location
|
|
781
|
+
os.rename(temp_path, final_save_path)
|
|
782
|
+
else:
|
|
783
|
+
# Standard save without extension detection
|
|
784
|
+
with open(final_save_path, 'wb') as f:
|
|
785
|
+
f.write(data_bytes)
|
|
786
|
+
|
|
787
|
+
if progress_bar:
|
|
788
|
+
progress_bar.update(1)
|
|
789
|
+
|
|
790
|
+
return final_save_path
|
|
791
|
+
|
|
792
|
+
except ResourceNotFoundError as e:
|
|
793
|
+
e.set_params('resource', {'resource_id': resource_id})
|
|
794
|
+
raise e
|
|
795
|
+
|
|
796
|
+
def download_multiple_resources(self,
|
|
797
|
+
resources: Sequence[str] | Sequence[Resource],
|
|
798
|
+
save_path: Sequence[str] | str,
|
|
799
|
+
add_extension: bool = False,
|
|
800
|
+
overwrite: bool = True
|
|
801
|
+
) -> list[str]:
|
|
802
|
+
"""
|
|
803
|
+
Download multiple resources and save them to the specified paths.
|
|
804
|
+
This is faster than downloading them one by one.
|
|
805
|
+
|
|
806
|
+
Args:
|
|
807
|
+
resources: A list of resource unique ids.
|
|
808
|
+
save_path : A list of paths to save the files or a directory path, of same length as resources.
|
|
809
|
+
If a directory path is provided, files will be saved in that directory.
|
|
810
|
+
add_extension: Whether to add the appropriate file extension to the save_path based on the content type.
|
|
811
|
+
|
|
812
|
+
Returns:
|
|
813
|
+
list[str]: A list of paths where the files were saved. Important if `add_extension=True`.
|
|
814
|
+
"""
|
|
815
|
+
if isinstance(resources, str):
|
|
816
|
+
raise ValueError("resources must be a list of resources")
|
|
817
|
+
|
|
818
|
+
async def _download_all_async():
|
|
819
|
+
async with aiohttp.ClientSession() as session:
|
|
820
|
+
tasks = [
|
|
821
|
+
self._async_download_file(
|
|
822
|
+
resource=r,
|
|
823
|
+
save_path=path,
|
|
824
|
+
session=session,
|
|
825
|
+
progress_bar=progress_bar,
|
|
826
|
+
add_extension=add_extension
|
|
827
|
+
)
|
|
828
|
+
for r, path in zip(resources, save_path)
|
|
829
|
+
]
|
|
830
|
+
return await asyncio.gather(*tasks)
|
|
831
|
+
|
|
832
|
+
if isinstance(save_path, str):
|
|
833
|
+
save_path = [os.path.join(save_path, self._entid(r)) for r in resources]
|
|
834
|
+
|
|
835
|
+
if len(save_path) != len(resources):
|
|
836
|
+
raise ValueError("The number of save paths must match the number of resources.")
|
|
837
|
+
|
|
838
|
+
if not overwrite:
|
|
839
|
+
new_resources = []
|
|
840
|
+
new_save_path = []
|
|
841
|
+
for i in range(len(resources)):
|
|
842
|
+
if not os.path.exists(save_path[i]):
|
|
843
|
+
new_resources.append(resources[i])
|
|
844
|
+
new_save_path.append(save_path[i])
|
|
845
|
+
resources = new_resources
|
|
846
|
+
save_path = new_save_path
|
|
847
|
+
|
|
848
|
+
with tqdm(total=len(resources), desc="Downloading resources", unit="file") as progress_bar:
|
|
849
|
+
loop = asyncio.get_event_loop()
|
|
850
|
+
final_save_paths = loop.run_until_complete(_download_all_async())
|
|
851
|
+
|
|
852
|
+
return final_save_paths
|
|
853
|
+
|
|
854
|
+
def download_resource_file(self,
|
|
855
|
+
resource: str | Resource,
|
|
856
|
+
save_path: Optional[str] = None,
|
|
857
|
+
auto_convert: bool = True,
|
|
858
|
+
add_extension: bool = False
|
|
859
|
+
) -> bytes | pydicom.Dataset | Image.Image | cv2.VideoCapture | nib_FileBasedImage | tuple[Any, str]:
|
|
860
|
+
"""
|
|
861
|
+
Download a resource file.
|
|
862
|
+
|
|
863
|
+
Args:
|
|
864
|
+
resource: The resource unique id or Resource instance.
|
|
865
|
+
save_path: The path to save the file.
|
|
866
|
+
auto_convert: Whether to convert the file to a known format or not.
|
|
867
|
+
add_extension: Whether to add the appropriate file extension to the save_path based on the content type.
|
|
868
|
+
|
|
869
|
+
Returns:
|
|
870
|
+
The resource content in bytes (if `auto_convert=False`) or the resource object (if `auto_convert=True`).
|
|
871
|
+
if `add_extension=True`, the function will return a tuple of (resource_data, save_path).
|
|
872
|
+
|
|
873
|
+
Raises:
|
|
874
|
+
ResourceNotFoundError: If the resource does not exists.
|
|
875
|
+
|
|
876
|
+
Example:
|
|
877
|
+
>>> api_handler.download_resource_file('resource_id', auto_convert=False)
|
|
878
|
+
returns the resource content in bytes.
|
|
879
|
+
>>> api_handler.download_resource_file('resource_id', auto_convert=True)
|
|
880
|
+
Assuming this resource is a dicom file, it will return a pydicom.Dataset object.
|
|
881
|
+
>>> api_handler.download_resource_file('resource_id', save_path='path/to/dicomfile.dcm')
|
|
882
|
+
saves the file in the specified path.
|
|
883
|
+
"""
|
|
884
|
+
if save_path is None and add_extension:
|
|
885
|
+
raise ValueError("If add_extension is True, save_path must be provided.")
|
|
886
|
+
|
|
887
|
+
try:
|
|
888
|
+
response = self._make_entity_request('GET',
|
|
889
|
+
resource,
|
|
890
|
+
add_path='file',
|
|
891
|
+
headers={'accept': 'application/octet-stream'})
|
|
892
|
+
|
|
893
|
+
# Get mimetype if needed for auto_convert or add_extension
|
|
894
|
+
mimetype = None
|
|
895
|
+
ext = None
|
|
896
|
+
if auto_convert or add_extension:
|
|
897
|
+
mimetype, ext = self._determine_mimetype(content=response.content,
|
|
898
|
+
resource=resource)
|
|
899
|
+
if auto_convert:
|
|
900
|
+
if mimetype is None:
|
|
901
|
+
_LOGGER.warning("Could not determine mimetype. Returning a bytes array.")
|
|
902
|
+
resource_file = response.content
|
|
903
|
+
else:
|
|
904
|
+
try:
|
|
905
|
+
resource_file = BaseApi.convert_format(response.content,
|
|
906
|
+
mimetype,
|
|
907
|
+
save_path)
|
|
908
|
+
except ValueError as e:
|
|
909
|
+
_LOGGER.warning(f"Could not convert file to a known format: {e}")
|
|
910
|
+
resource_file = response.content
|
|
911
|
+
except NotImplementedError as e:
|
|
912
|
+
_LOGGER.warning(f"Conversion not implemented yet for {mimetype} and save_path=None." +
|
|
913
|
+
" Returning a bytes array. If you want the conversion for this mimetype, provide a save_path.")
|
|
914
|
+
resource_file = response.content
|
|
915
|
+
else:
|
|
916
|
+
resource_file = response.content
|
|
917
|
+
except ResourceNotFoundError as e:
|
|
918
|
+
e.set_params('resource', {'resource_id': self._entid(resource)})
|
|
919
|
+
raise e
|
|
920
|
+
|
|
921
|
+
if save_path is not None:
|
|
922
|
+
if add_extension and mimetype is not None:
|
|
923
|
+
if ext is None:
|
|
924
|
+
ext = guess_extension(mimetype)
|
|
925
|
+
if ext is not None and not save_path.endswith(ext):
|
|
926
|
+
save_path += ext
|
|
927
|
+
with open(save_path, 'wb') as f:
|
|
928
|
+
f.write(response.content)
|
|
929
|
+
|
|
930
|
+
if add_extension:
|
|
931
|
+
return resource_file, save_path
|
|
932
|
+
return resource_file
|
|
933
|
+
|
|
934
|
+
def download_resource_frame(self,
|
|
935
|
+
resource: str | Resource,
|
|
936
|
+
frame_index: int) -> Image.Image:
|
|
937
|
+
"""
|
|
938
|
+
Download a frame of a resource.
|
|
939
|
+
This is faster than downloading the whole resource and then extracting the frame.
|
|
940
|
+
|
|
941
|
+
Args:
|
|
942
|
+
resource: The resource unique id or Resource object.
|
|
943
|
+
frame_index: The index of the frame to download.
|
|
944
|
+
|
|
945
|
+
Returns:
|
|
946
|
+
Image.Image: The frame as a PIL image.
|
|
947
|
+
|
|
948
|
+
Raises:
|
|
949
|
+
ResourceNotFoundError: If the resource does not exists.
|
|
950
|
+
DatamintException: If the resource is not a video or dicom.
|
|
951
|
+
"""
|
|
952
|
+
# check if the resource is an single frame image (png,jpeg,...) first.
|
|
953
|
+
# If so, download the whole resource file and return the image.
|
|
954
|
+
if not isinstance(resource, Resource):
|
|
955
|
+
resource = self.get_by_id(resource)
|
|
956
|
+
if resource.mimetype.startswith('image/') or resource.storage == 'ImageResource':
|
|
957
|
+
if frame_index != 0:
|
|
958
|
+
raise DatamintException(f"Resource {resource.id} is a single frame image, "
|
|
959
|
+
f"but frame_index is {frame_index}.")
|
|
960
|
+
return self.download_resource_file(resource, auto_convert=True)
|
|
961
|
+
|
|
962
|
+
try:
|
|
963
|
+
response = self._make_entity_request('GET',
|
|
964
|
+
resource,
|
|
965
|
+
add_path=f'frames/{frame_index}',
|
|
966
|
+
headers={'accept': 'image/*'})
|
|
967
|
+
if response.status_code == 200:
|
|
968
|
+
return Image.open(io.BytesIO(response.content))
|
|
969
|
+
else:
|
|
970
|
+
raise DatamintException(
|
|
971
|
+
f"Error downloading frame {frame_index} of resource {self._entid(resource)}: {response.text}")
|
|
972
|
+
except ResourceNotFoundError as e:
|
|
973
|
+
e.set_params('resource', {'resource_id': self._entid(resource)})
|
|
974
|
+
raise e
|
|
975
|
+
|
|
976
|
+
def publish_resources(self,
|
|
977
|
+
resources: str | Resource | Sequence[str | Resource]) -> None:
|
|
978
|
+
"""
|
|
979
|
+
Publish resources, changing their status to 'published'.
|
|
980
|
+
|
|
981
|
+
Args:
|
|
982
|
+
resources: The resources to publish. Can be a Resource object (instead of a list)
|
|
983
|
+
|
|
984
|
+
Raises:
|
|
985
|
+
ResourceNotFoundError: If the resource does not exists or the project does not exists.
|
|
986
|
+
"""
|
|
987
|
+
if isinstance(resources, (Resource, str)):
|
|
988
|
+
resources = [resources]
|
|
989
|
+
|
|
990
|
+
for resource in resources:
|
|
991
|
+
try:
|
|
992
|
+
self._make_entity_request('POST', resource, add_path='publish')
|
|
993
|
+
except ResourceNotFoundError as e:
|
|
994
|
+
e.set_params('resource', {'resource_id': self._entid(resource)})
|
|
995
|
+
raise
|
|
996
|
+
except httpx.HTTPError as e:
|
|
997
|
+
if BaseApi._has_status_code(e, 400) and 'Resource must be in inbox status to be approved' in e.response.text:
|
|
998
|
+
_LOGGER.warning(f"Resource {resource} is not in inbox status. Skipping publishing")
|
|
999
|
+
else:
|
|
1000
|
+
raise
|
|
1001
|
+
|
|
1002
|
+
def set_tags(self,
|
|
1003
|
+
resource: str | Resource,
|
|
1004
|
+
tags: Sequence[str],
|
|
1005
|
+
):
|
|
1006
|
+
data = {'tags': tags}
|
|
1007
|
+
resource_id = self._entid(resource)
|
|
1008
|
+
|
|
1009
|
+
response = self._make_entity_request('PUT',
|
|
1010
|
+
resource_id,
|
|
1011
|
+
add_path='tags',
|
|
1012
|
+
json=data)
|
|
1013
|
+
return response
|