datamint 1.5.4__tar.gz → 1.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

Files changed (28) hide show
  1. {datamint-1.5.4 → datamint-1.6.0}/PKG-INFO +1 -1
  2. {datamint-1.5.4 → datamint-1.6.0}/datamint/apihandler/annotation_api_handler.py +231 -147
  3. {datamint-1.5.4 → datamint-1.6.0}/datamint/dataset/base_dataset.py +27 -21
  4. {datamint-1.5.4 → datamint-1.6.0}/datamint/dataset/dataset.py +34 -1
  5. {datamint-1.5.4 → datamint-1.6.0}/datamint/utils/dicom_utils.py +67 -0
  6. {datamint-1.5.4 → datamint-1.6.0}/pyproject.toml +1 -1
  7. {datamint-1.5.4 → datamint-1.6.0}/README.md +0 -0
  8. {datamint-1.5.4 → datamint-1.6.0}/datamint/__init__.py +0 -0
  9. {datamint-1.5.4 → datamint-1.6.0}/datamint/apihandler/api_handler.py +0 -0
  10. {datamint-1.5.4 → datamint-1.6.0}/datamint/apihandler/base_api_handler.py +0 -0
  11. {datamint-1.5.4 → datamint-1.6.0}/datamint/apihandler/dto/annotation_dto.py +0 -0
  12. {datamint-1.5.4 → datamint-1.6.0}/datamint/apihandler/exp_api_handler.py +0 -0
  13. {datamint-1.5.4 → datamint-1.6.0}/datamint/apihandler/root_api_handler.py +0 -0
  14. {datamint-1.5.4 → datamint-1.6.0}/datamint/client_cmd_tools/__init__.py +0 -0
  15. {datamint-1.5.4 → datamint-1.6.0}/datamint/client_cmd_tools/datamint_config.py +0 -0
  16. {datamint-1.5.4 → datamint-1.6.0}/datamint/client_cmd_tools/datamint_upload.py +0 -0
  17. {datamint-1.5.4 → datamint-1.6.0}/datamint/configs.py +0 -0
  18. {datamint-1.5.4 → datamint-1.6.0}/datamint/dataset/__init__.py +0 -0
  19. {datamint-1.5.4 → datamint-1.6.0}/datamint/examples/__init__.py +0 -0
  20. {datamint-1.5.4 → datamint-1.6.0}/datamint/examples/example_projects.py +0 -0
  21. {datamint-1.5.4 → datamint-1.6.0}/datamint/experiment/__init__.py +0 -0
  22. {datamint-1.5.4 → datamint-1.6.0}/datamint/experiment/_patcher.py +0 -0
  23. {datamint-1.5.4 → datamint-1.6.0}/datamint/experiment/experiment.py +0 -0
  24. {datamint-1.5.4 → datamint-1.6.0}/datamint/logging.yaml +0 -0
  25. {datamint-1.5.4 → datamint-1.6.0}/datamint/utils/io_utils.py +0 -0
  26. {datamint-1.5.4 → datamint-1.6.0}/datamint/utils/logging_utils.py +0 -0
  27. {datamint-1.5.4 → datamint-1.6.0}/datamint/utils/torchmetrics.py +0 -0
  28. {datamint-1.5.4 → datamint-1.6.0}/datamint/utils/visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamint
3
- Version: 1.5.4
3
+ Version: 1.6.0
4
4
  Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
5
5
  Requires-Python: >=3.10
6
6
  Classifier: Programming Language :: Python :: 3
@@ -17,25 +17,57 @@ import json
17
17
 
18
18
  _LOGGER = logging.getLogger(__name__)
19
19
  _USER_LOGGER = logging.getLogger('user_logger')
20
+ MAX_NUMBER_DISTINCT_COLORS = 2048 # Maximum number of distinct colors in a segmentation image
20
21
 
21
22
 
22
23
  class AnnotationAPIHandler(BaseAPIHandler):
23
24
  @staticmethod
24
- def _numpy_to_bytesio_png(seg_imgs: np.ndarray) -> Generator[BinaryIO, None, None]:
25
+ def _normalize_segmentation_array(seg_imgs: np.ndarray) -> np.ndarray:
25
26
  """
27
+ Normalize segmentation array to a consistent format.
28
+
26
29
  Args:
27
- seg_img (np.ndarray): The segmentation image with dimensions (height, width, #frames).
30
+ seg_imgs: Input segmentation array in various formats: (height, width, #frames), (height, width), (3, height, width, #frames).
31
+
32
+ Returns:
33
+ np.ndarray: Shape (#channels, height, width, #frames)
28
34
  """
35
+ if seg_imgs.ndim == 4:
36
+ return seg_imgs # .transpose(1, 2, 0, 3)
29
37
 
38
+ # Handle grayscale segmentations
30
39
  if seg_imgs.ndim == 2:
40
+ # Add frame dimension: (height, width) -> (height, width, 1)
31
41
  seg_imgs = seg_imgs[..., None]
42
+ if seg_imgs.ndim == 3:
43
+ # (height, width, #frames)
44
+ seg_imgs = seg_imgs[np.newaxis, ...] # Add channel dimension: (1, height, width, #frames)
45
+
46
+ return seg_imgs
47
+
48
+ @staticmethod
49
+ def _numpy_to_bytesio_png(seg_imgs: np.ndarray) -> Generator[BinaryIO, None, None]:
50
+ """
51
+ Convert normalized segmentation images to PNG BytesIO objects.
52
+
53
+ Args:
54
+ seg_imgs: Normalized segmentation array in shape (channels, height, width, frames).
32
55
 
33
- seg_imgs = seg_imgs.astype(np.uint8)
34
- for i in range(seg_imgs.shape[2]):
35
- img = seg_imgs[:, :, i]
36
- img = Image.fromarray(img).convert('L')
56
+ Yields:
57
+ BinaryIO: PNG image data as BytesIO objects
58
+ """
59
+ # PIL RGB format is: (height, width, channels)
60
+ if seg_imgs.shape[0] not in [1, 3, 4]:
61
+ raise ValueError(f"Unsupported number of channels: {seg_imgs.shape[0]}. Expected 1 or 3")
62
+ nframes = seg_imgs.shape[3]
63
+ for i in range(nframes):
64
+ img = seg_imgs[:, :, :, i].astype(np.uint8)
65
+ if img.shape[0] == 1:
66
+ pil_img = Image.fromarray(img[0]).convert('RGB')
67
+ else:
68
+ pil_img = Image.fromarray(img.transpose(1, 2, 0))
37
69
  img_bytes = BytesIO()
38
- img.save(img_bytes, format='PNG')
70
+ pil_img.save(img_bytes, format='PNG')
39
71
  img_bytes.seek(0)
40
72
  yield img_bytes
41
73
 
@@ -46,29 +78,42 @@ class AnnotationAPIHandler(BaseAPIHandler):
46
78
  raise ValueError(f"Unsupported file type: {type(file_path)}")
47
79
 
48
80
  if isinstance(file_path, np.ndarray):
49
- segs_imgs = file_path # (#frames, height, width) or (height, width)
81
+ normalized_imgs = AnnotationAPIHandler._normalize_segmentation_array(file_path)
82
+ # normalized_imgs shape: (3, height, width, #frames)
83
+
84
+ # Apply transpose if requested
50
85
  if transpose_segmentation:
51
- segs_imgs = segs_imgs.transpose(1, 0, 2) if segs_imgs.ndim == 3 else segs_imgs.transpose(1, 0)
52
- nframes = segs_imgs.shape[2] if segs_imgs.ndim == 3 else 1
53
- fios = AnnotationAPIHandler._numpy_to_bytesio_png(segs_imgs)
86
+ # (channels, height, width, frames) -> (channels, width, height, frames)
87
+ normalized_imgs = normalized_imgs.transpose(0, 2, 1, 3)
88
+
89
+ nframes = normalized_imgs.shape[3]
90
+ fios = AnnotationAPIHandler._numpy_to_bytesio_png(normalized_imgs)
91
+
54
92
  elif file_path.endswith('.nii') or file_path.endswith('.nii.gz'):
55
93
  segs_imgs = nib.load(file_path).get_fdata()
56
94
  if segs_imgs.ndim != 3 and segs_imgs.ndim != 2:
57
95
  raise ValueError(f"Invalid segmentation shape: {segs_imgs.shape}")
96
+
97
+ # Normalize and apply transpose
98
+ normalized_imgs = AnnotationAPIHandler._normalize_segmentation_array(segs_imgs)
58
99
  if not transpose_segmentation:
59
- # The if is correct. The image is already in a different shape than nifty images.
60
- segs_imgs = segs_imgs.transpose(1, 0, 2) if segs_imgs.ndim == 3 else segs_imgs.transpose(1, 0)
100
+ # Apply default NIfTI transpose
101
+ # (channels, width, height, frames) -> (channels, height, width, frames)
102
+ normalized_imgs = normalized_imgs.transpose(0, 2, 1, 3)
103
+
104
+ nframes = normalized_imgs.shape[3]
105
+ fios = AnnotationAPIHandler._numpy_to_bytesio_png(normalized_imgs)
61
106
 
62
- fios = AnnotationAPIHandler._numpy_to_bytesio_png(segs_imgs)
63
- nframes = segs_imgs.shape[2] if segs_imgs.ndim == 3 else 1
64
107
  elif file_path.endswith('.png'):
65
- if transpose_segmentation:
66
- with Image.open(file_path) as img:
67
- segs_imgs = np.array(img).transpose(1, 0)
68
- fios = AnnotationAPIHandler._numpy_to_bytesio_png(segs_imgs)
69
- else:
70
- fios = (open(file_path, 'rb') for _ in range(1))
71
- nframes = 1
108
+ with Image.open(file_path) as img:
109
+ img_array = np.array(img)
110
+ normalized_imgs = AnnotationAPIHandler._normalize_segmentation_array(img_array)
111
+
112
+ if transpose_segmentation:
113
+ normalized_imgs = normalized_imgs.transpose(0, 2, 1, 3)
114
+
115
+ fios = AnnotationAPIHandler._numpy_to_bytesio_png(normalized_imgs)
116
+ nframes = 1
72
117
  else:
73
118
  raise ValueError(f"Unsupported file format of '{file_path}'")
74
119
 
@@ -91,9 +136,9 @@ class AnnotationAPIHandler(BaseAPIHandler):
91
136
 
92
137
  async def _upload_single_frame_segmentation_async(self,
93
138
  resource_id: str,
94
- frame_index: int,
139
+ frame_index: int | None,
95
140
  fio: IO,
96
- name: Optional[str | dict[int, str]] = None,
141
+ name: dict[int, str] | dict[tuple, str],
97
142
  imported_from: Optional[str] = None,
98
143
  author_email: Optional[str] = None,
99
144
  discard_empty_segmentations: bool = True,
@@ -107,7 +152,8 @@ class AnnotationAPIHandler(BaseAPIHandler):
107
152
  resource_id: The resource unique id.
108
153
  frame_index: The frame index for the segmentation.
109
154
  fio: File-like object containing the segmentation image.
110
- name: The name of the segmentation or a dictionary mapping pixel values to names.
155
+ name: The name of the segmentation, a dictionary mapping pixel values to names,
156
+ or a dictionary mapping RGB tuples to names.
111
157
  imported_from: The imported from value.
112
158
  author_email: The author email.
113
159
  discard_empty_segmentations: Whether to discard empty segmentations.
@@ -119,21 +165,29 @@ class AnnotationAPIHandler(BaseAPIHandler):
119
165
  """
120
166
  try:
121
167
  try:
122
- img = np.array(Image.open(fio))
168
+ img_pil = Image.open(fio)
169
+ img_array = np.array(img_pil) # shape: (height, width, channels)
170
+ # Returns a list of (count, color) tuples
171
+ unique_vals = img_pil.getcolors(maxcolors=MAX_NUMBER_DISTINCT_COLORS)
172
+ # convert to list of RGB tuples
173
+ if unique_vals is None:
174
+ raise ValueError(f'Number of unique colors exceeds {MAX_NUMBER_DISTINCT_COLORS}.')
175
+ unique_vals = [color for count, color in unique_vals]
176
+ # Remove black/transparent pixels
177
+ black_pixel = (0, 0, 0)
178
+ unique_vals = [rgb for rgb in unique_vals if rgb != black_pixel]
123
179
 
124
- # Check that frame is not empty
125
- uniq_vals = np.unique(img)
126
180
  if discard_empty_segmentations:
127
- if len(uniq_vals) == 1 and uniq_vals[0] == 0:
128
- msg = f"Discarding empty segmentation for frame {frame_index}"
181
+ if len(unique_vals) == 0:
182
+ msg = f"Discarding empty RGB segmentation for frame {frame_index}"
129
183
  _LOGGER.debug(msg)
130
184
  _USER_LOGGER.debug(msg)
131
185
  return []
132
- fio.seek(0)
133
- # TODO: Optimize this. It is not necessary to open the image twice.
186
+ segnames = AnnotationAPIHandler._get_segmentation_names_rgb(unique_vals, names=name)
187
+ segs_generator = AnnotationAPIHandler._split_rgb_segmentations(img_array, unique_vals)
134
188
 
135
- segnames = AnnotationAPIHandler._get_segmentation_names(uniq_vals, names=name)
136
- segs_generator = AnnotationAPIHandler._split_segmentations(img, uniq_vals, fio)
189
+ fio.seek(0)
190
+ # TODO: Optimize this. It is not necessary to open the image twice.
137
191
 
138
192
  # Create annotations
139
193
  annotations: list[CreateAnnotationDto] = []
@@ -174,7 +228,6 @@ class AnnotationAPIHandler(BaseAPIHandler):
174
228
  resp = await self._run_request_async(request_params)
175
229
  if 'error' in resp:
176
230
  raise DatamintException(resp['error'])
177
-
178
231
  return annotids
179
232
  finally:
180
233
  fio.close()
@@ -184,7 +237,7 @@ class AnnotationAPIHandler(BaseAPIHandler):
184
237
  async def _upload_volume_segmentation_async(self,
185
238
  resource_id: str,
186
239
  file_path: str | np.ndarray,
187
- name: str | dict[int, str] | None = None,
240
+ name: dict[int, str] | dict[tuple, str],
188
241
  imported_from: Optional[str] = None,
189
242
  author_email: Optional[str] = None,
190
243
  worklist_id: Optional[str] = None,
@@ -210,9 +263,6 @@ class AnnotationAPIHandler(BaseAPIHandler):
210
263
  Raises:
211
264
  ValueError: If name is not a string or file format is unsupported for volume upload.
212
265
  """
213
- if name is None:
214
- name = 'volume_segmentation'
215
-
216
266
  # Prepare file for upload
217
267
  if isinstance(file_path, str):
218
268
  if file_path.endswith('.nii') or file_path.endswith('.nii.gz'):
@@ -248,9 +298,8 @@ class AnnotationAPIHandler(BaseAPIHandler):
248
298
  async def _upload_segmentations_async(self,
249
299
  resource_id: str,
250
300
  frame_index: int | None,
251
- file_path: str | np.ndarray | None = None,
252
- fio: IO | None = None,
253
- name: Optional[str | dict[int, str]] = None,
301
+ file_path: str | np.ndarray,
302
+ name: dict[int, str] | dict[tuple, str],
254
303
  imported_from: Optional[str] = None,
255
304
  author_email: Optional[str] = None,
256
305
  discard_empty_segmentations: bool = True,
@@ -266,7 +315,6 @@ class AnnotationAPIHandler(BaseAPIHandler):
266
315
  resource_id: The resource unique id.
267
316
  frame_index: The frame index or None for multiple frames.
268
317
  file_path: Path to segmentation file or numpy array.
269
- fio: File-like object containing segmentation data.
270
318
  name: The name of the segmentation or mapping of pixel values to names.
271
319
  imported_from: The imported from value.
272
320
  author_email: The author email.
@@ -280,60 +328,44 @@ class AnnotationAPIHandler(BaseAPIHandler):
280
328
  List of annotation IDs created.
281
329
  """
282
330
  if upload_volume == 'auto':
283
- if file_path is not None and (file_path.endswith('.nii') or file_path.endswith('.nii.gz')):
331
+ if isinstance(file_path, str) and (file_path.endswith('.nii') or file_path.endswith('.nii.gz')):
284
332
  upload_volume = True
285
333
  else:
286
334
  upload_volume = False
287
335
 
288
- if file_path is not None:
289
- # Handle volume upload
290
- if upload_volume:
291
- if frame_index is not None:
292
- _LOGGER.warning("frame_index parameter ignored when upload_volume=True")
293
-
294
- return await self._upload_volume_segmentation_async(
295
- resource_id=resource_id,
296
- file_path=file_path,
297
- name=name,
298
- imported_from=imported_from,
299
- author_email=author_email,
300
- worklist_id=worklist_id,
301
- model_id=model_id,
302
- transpose_segmentation=transpose_segmentation
303
- )
304
-
305
- # Handle frame-by-frame upload (existing logic)
306
- nframes, fios = AnnotationAPIHandler._generate_segmentations_ios(
307
- file_path, transpose_segmentation=transpose_segmentation
336
+ # Handle volume upload
337
+ if upload_volume:
338
+ if frame_index is not None:
339
+ _LOGGER.warning("frame_index parameter ignored when upload_volume=True")
340
+
341
+ return await self._upload_volume_segmentation_async(
342
+ resource_id=resource_id,
343
+ file_path=file_path,
344
+ name=name,
345
+ imported_from=imported_from,
346
+ author_email=author_email,
347
+ worklist_id=worklist_id,
348
+ model_id=model_id,
349
+ transpose_segmentation=transpose_segmentation
308
350
  )
309
- if frame_index is None:
310
- frame_index = list(range(nframes))
311
-
312
- annotids = []
313
- for fidx, f in zip(frame_index, fios):
314
- frame_annotids = await self._upload_single_frame_segmentation_async(
315
- resource_id=resource_id,
316
- frame_index=fidx,
317
- fio=f,
318
- name=name,
319
- imported_from=imported_from,
320
- author_email=author_email,
321
- discard_empty_segmentations=discard_empty_segmentations,
322
- worklist_id=worklist_id,
323
- model_id=model_id
324
- )
325
- annotids.extend(frame_annotids)
326
- return annotids
327
-
328
- # Handle single file-like object
329
- if fio is not None:
330
- if upload_volume:
331
- raise ValueError("upload_volume=True is not supported when providing fio parameter")
332
-
333
- return await self._upload_single_frame_segmentation_async(
351
+
352
+ # Handle frame-by-frame upload (existing logic)
353
+ nframes, fios = AnnotationAPIHandler._generate_segmentations_ios(
354
+ file_path, transpose_segmentation=transpose_segmentation
355
+ )
356
+ if frame_index is None:
357
+ frames_indices = list(range(nframes))
358
+ elif isinstance(frame_index, int):
359
+ frames_indices = [frame_index]
360
+ else:
361
+ raise ValueError("frame_index must be an int or None")
362
+
363
+ annotids = []
364
+ for fidx, f in zip(frames_indices, fios):
365
+ frame_annotids = await self._upload_single_frame_segmentation_async(
334
366
  resource_id=resource_id,
335
- frame_index=frame_index,
336
- fio=fio,
367
+ frame_index=fidx,
368
+ fio=f,
337
369
  name=name,
338
370
  imported_from=imported_from,
339
371
  author_email=author_email,
@@ -341,13 +373,30 @@ class AnnotationAPIHandler(BaseAPIHandler):
341
373
  worklist_id=worklist_id,
342
374
  model_id=model_id
343
375
  )
376
+ annotids.extend(frame_annotids)
377
+ return annotids
344
378
 
345
- raise ValueError("Either file_path or fio must be provided")
379
+ @staticmethod
380
+ def standardize_segmentation_names(name: str | dict[int, str] | dict[tuple, str] | None) -> dict[tuple[int, int, int], str]:
381
+ if name is None:
382
+ return {}
383
+ elif isinstance(name, str):
384
+ return {'default': name}
385
+ elif isinstance(name, dict):
386
+ name = {
387
+ tuple(k) if isinstance(k, (list, tuple)) else k if isinstance(k, str) else (k, k, k): v
388
+ for k, v in name.items()
389
+ }
390
+ if 'default' not in name:
391
+ name['default'] = None
392
+ return name
393
+ else:
394
+ raise ValueError("Invalid name format")
346
395
 
347
396
  def upload_segmentations(self,
348
397
  resource_id: str,
349
398
  file_path: str | np.ndarray,
350
- name: Optional[str | dict[int, str]] = None,
399
+ name: str | dict[int, str] | dict[tuple, str] | None = None,
351
400
  frame_index: int | list[int] | None = None,
352
401
  imported_from: Optional[str] = None,
353
402
  author_email: Optional[str] = None,
@@ -362,30 +411,46 @@ class AnnotationAPIHandler(BaseAPIHandler):
362
411
  Args:
363
412
  resource_id (str): The resource unique id.
364
413
  file_path (str|np.ndarray): The path to the segmentation file or a numpy array.
365
- If a numpy array is provided, it must have the shape (height, width, #frames) or (height, width).
414
+ If a numpy array is provided, it can have the shape:
415
+ - (height, width, #frames) or (height, width) for grayscale segmentations
416
+ - (3, height, width, #frames) for RGB segmentations
366
417
  For NIfTI files (.nii/.nii.gz), the entire volume is uploaded as a single segmentation.
367
- name (Optional[Union[str, Dict[int, str]]]): The name of the segmentation or a dictionary mapping pixel values to names.
368
- example: {1: 'Femur', 2: 'Tibia'}. For NIfTI files, only string names are supported.
418
+ name: The name of the segmentation.
419
+ Can be:
420
+ - str: Single name for all segmentations
421
+ - dict[int, str]: Mapping pixel values to names for grayscale segmentations
422
+ - dict[tuple[int, int, int], str]: Mapping RGB tuples to names for RGB segmentations
423
+ Example: {(255, 0, 0): 'Red_Region', (0, 255, 0): 'Green_Region'}
369
424
  frame_index (int | list[int]): The frame index of the segmentation.
370
425
  If a list, it must have the same length as the number of frames in the segmentation.
371
426
  If None, it is assumed that the segmentations are in sequential order starting from 0.
372
427
  This parameter is ignored for NIfTI files as they are treated as volume segmentations.
373
428
  discard_empty_segmentations (bool): Whether to discard empty segmentations or not.
374
- This is ignored for NIfTI files.
375
429
 
376
430
  Returns:
377
- str: The segmentation unique id.
431
+ List[str]: List of segmentation unique ids.
378
432
 
379
433
  Raises:
380
434
  ResourceNotFoundError: If the resource does not exists or the segmentation is invalid.
381
435
 
382
436
  Example:
437
+ >>> # Grayscale segmentation
383
438
  >>> api_handler.upload_segmentation(resource_id, 'path/to/segmentation.png', 'SegmentationName')
439
+ >>>
440
+ >>> # RGB segmentation with numpy array
441
+ >>> seg_data = np.random.randint(0, 3, size=(3, 2140, 1760, 1), dtype=np.uint8)
442
+ >>> rgb_names = {(1, 0, 0): 'Red_Region', (0, 1, 0): 'Green_Region', (0, 0, 1): 'Blue_Region'}
443
+ >>> api_handler.upload_segmentation(resource_id, seg_data, rgb_names)
444
+ >>>
445
+ >>> # Volume segmentation
384
446
  >>> api_handler.upload_segmentation(resource_id, 'path/to/segmentation.nii.gz', 'VolumeSegmentation')
385
447
  """
448
+
386
449
  if isinstance(file_path, str) and not os.path.exists(file_path):
387
450
  raise FileNotFoundError(f"File {file_path} not found.")
388
451
 
452
+ name = AnnotationAPIHandler.standardize_segmentation_names(name)
453
+
389
454
  # Handle NIfTI files specially - upload as single volume
390
455
  if isinstance(file_path, str) and (file_path.endswith('.nii') or file_path.endswith('.nii.gz')):
391
456
  _LOGGER.info(f"Uploading NIfTI segmentation file: {file_path}")
@@ -407,33 +472,32 @@ class AnnotationAPIHandler(BaseAPIHandler):
407
472
  )
408
473
  return loop.run_until_complete(task)
409
474
  # All other file types are converted to multiple PNGs and uploaded frame by frame.
410
- if isinstance(frame_index, int):
411
- frame_index = [frame_index]
412
475
 
413
- loop = asyncio.get_event_loop()
414
476
  to_run = []
415
477
  # Generate IOs for the segmentations.
416
478
  nframes, fios = AnnotationAPIHandler._generate_segmentations_ios(file_path,
417
479
  transpose_segmentation=transpose_segmentation)
418
480
  if frame_index is None:
419
481
  frame_index = list(range(nframes))
420
- elif len(frame_index) != nframes:
421
- raise ValueError("Do not provide frame_index for images of multiple frames.")
422
- #######
482
+ elif isinstance(frame_index, int):
483
+ frame_index = [frame_index]
484
+ if len(frame_index) != nframes:
485
+ raise ValueError(f'Expected {nframes} frame_index values, but got {len(frame_index)}.')
423
486
 
424
487
  # For each frame, create the annotations and upload the segmentations.
425
488
  for fidx, f in zip(frame_index, fios):
426
- task = self._upload_segmentations_async(resource_id,
427
- fio=f,
428
- name=name,
429
- frame_index=fidx,
430
- imported_from=imported_from,
431
- author_email=author_email,
432
- discard_empty_segmentations=discard_empty_segmentations,
433
- worklist_id=worklist_id,
434
- model_id=model_id)
489
+ task = self._upload_single_frame_segmentation_async(resource_id,
490
+ fio=f,
491
+ name=name,
492
+ frame_index=fidx,
493
+ imported_from=imported_from,
494
+ author_email=author_email,
495
+ discard_empty_segmentations=discard_empty_segmentations,
496
+ worklist_id=worklist_id,
497
+ model_id=model_id)
435
498
  to_run.append(task)
436
499
 
500
+ loop = asyncio.get_event_loop()
437
501
  ret = loop.run_until_complete(asyncio.gather(*to_run))
438
502
  # merge the results in a single list
439
503
  ret = [item for sublist in ret for item in sublist]
@@ -831,7 +895,7 @@ class AnnotationAPIHandler(BaseAPIHandler):
831
895
 
832
896
  Args:
833
897
  resource_id (Optional[str]): The resource unique id.
834
- annotation_type (Optional[str]): The annotation type. See :class:`~datamint.dto.annotation_dto.AnnotationType`.
898
+ annotation_type (AnnotationType | str | None): The annotation type. See :class:`~datamint.dto.annotation_dto.AnnotationType`.
835
899
  annotator_email (Optional[str]): The annotator email.
836
900
  date_from (Optional[date]): The start date.
837
901
  date_to (Optional[date]): The end date.
@@ -843,7 +907,6 @@ class AnnotationAPIHandler(BaseAPIHandler):
843
907
  Returns:
844
908
  Generator[dict, None, None]: A generator of dictionaries with the annotations information.
845
909
  """
846
- # TODO: create annotation_type enum
847
910
 
848
911
  if annotation_type is not None and isinstance(annotation_type, AnnotationType):
849
912
  annotation_type = annotation_type.value
@@ -962,40 +1025,61 @@ class AnnotationAPIHandler(BaseAPIHandler):
962
1025
  self._run_request(request_params)
963
1026
 
964
1027
  @staticmethod
965
- def _get_segmentation_names(uniq_vals: np.ndarray,
966
- names: Optional[str | dict[int, str]] = None
967
- ) -> list[str]:
968
- uniq_vals = uniq_vals[uniq_vals != 0]
969
- if names is None:
970
- names = 'seg'
971
- if isinstance(names, str):
972
- if len(uniq_vals) == 1:
973
- return [names]
974
- return [f'{names}_{v}' for v in uniq_vals]
975
- if isinstance(names, dict):
976
- for v in uniq_vals:
977
- new_name = names.get(v, names.get('default', None))
978
- if new_name is None:
979
- raise ValueError(f"Value {v} not found in names dictionary." +
980
- f" Provide a name for {v} or use 'default' key to provide a prefix.")
981
- return [names.get(v, names.get('default', '')+'_'+str(v)) for v in uniq_vals]
982
- raise ValueError("names must be a string or a dictionary.")
1028
+ def _get_segmentation_names_rgb(uniq_rgb_vals: list[tuple[int, int, int]],
1029
+ names: dict[tuple[int, int, int], str]
1030
+ ) -> list[str]:
1031
+ """
1032
+ Generate segmentation names for RGB combinations.
1033
+
1034
+ Args:
1035
+ uniq_rgb_vals: List of unique RGB combinations as (R,G,B) tuples
1036
+ names: Name mapping for RGB combinations
1037
+
1038
+ Returns:
1039
+ List of segmentation names
1040
+ """
1041
+ result = []
1042
+ for rgb_tuple in uniq_rgb_vals:
1043
+ seg_name = names.get(rgb_tuple, names.get('default', f'seg_{"_".join(map(str, rgb_tuple))}'))
1044
+ if seg_name is None:
1045
+ if rgb_tuple[0] == rgb_tuple[1] and rgb_tuple[1] == rgb_tuple[2]:
1046
+ msg = f"Provide a name for {rgb_tuple} or {rgb_tuple[0]} or use 'default' key."
1047
+ else:
1048
+ msg = f"Provide a name for {rgb_tuple} or use 'default' key."
1049
+ raise ValueError(f"RGB combination {rgb_tuple} not found in names dictionary. " +
1050
+ msg)
1051
+ # If using default prefix, append RGB values
1052
+ # if rgb_tuple not in names and 'default' in names:
1053
+ # seg_name = f"{seg_name}_{'_'.join(map(str, rgb_tuple))}"
1054
+ result.append(seg_name)
1055
+ return result
983
1056
 
984
1057
  @staticmethod
985
- def _split_segmentations(img: np.ndarray,
986
- uniq_vals: np.ndarray,
987
- f: IO,
988
- ) -> Generator[BytesIO, None, None]:
989
- # remove zero from uniq_vals
990
- uniq_vals = uniq_vals[uniq_vals != 0]
991
-
992
- for v in uniq_vals:
993
- img_v = (img == v).astype(np.uint8)
994
-
995
- f = BytesIO()
996
- Image.fromarray(img_v*255).convert('RGB').save(f, format='PNG')
997
- f.seek(0)
998
- yield f
1058
+ def _split_rgb_segmentations(img: np.ndarray,
1059
+ uniq_rgb_vals: list[tuple[int, int, int]]
1060
+ ) -> Generator[BytesIO, None, None]:
1061
+ """
1062
+ Split RGB segmentations into individual binary masks.
1063
+
1064
+ Args:
1065
+ img: RGB image array of shape (height, width, channels)
1066
+ uniq_rgb_vals: List of unique RGB combinations as (R,G,B) tuples
1067
+
1068
+ Yields:
1069
+ BytesIO objects containing individual segmentation masks
1070
+ """
1071
+ for rgb_tuple in uniq_rgb_vals:
1072
+ # Create binary mask for this RGB combination
1073
+ rgb_array = np.array(rgb_tuple[:3]) # Ensure only R,G,B values
1074
+ mask = np.all(img[:, :, :3] == rgb_array, axis=2)
1075
+
1076
+ # Convert to uint8 and create PNG
1077
+ mask_img = (mask * 255).astype(np.uint8)
1078
+
1079
+ f_out = BytesIO()
1080
+ Image.fromarray(mask_img).convert('L').save(f_out, format='PNG')
1081
+ f_out.seek(0)
1082
+ yield f_out
999
1083
 
1000
1084
  def delete_annotation(self, annotation_id: str | dict):
1001
1085
  if isinstance(annotation_id, dict):
@@ -7,15 +7,16 @@ import shutil
7
7
  import json
8
8
  import yaml
9
9
  import pydicom
10
+ from pydicom.dataset import FileDataset
10
11
  import numpy as np
11
12
  from datamint import configs
12
13
  from torch.utils.data import DataLoader
13
14
  import torch
15
+ from torch import Tensor
14
16
  from datamint.apihandler.base_api_handler import DatamintException
15
17
  from datamint.utils.dicom_utils import is_dicom
16
18
  import cv2
17
19
  from datamint.utils.io_utils import read_array_normalized
18
- from deprecated import deprecated
19
20
  from datetime import datetime
20
21
 
21
22
  _LOGGER = logging.getLogger(__name__)
@@ -80,7 +81,7 @@ class DatamintBaseDataset:
80
81
  exclude_frame_label_names: Optional[list[str]] = None
81
82
  ):
82
83
  from datamint.apihandler.api_handler import APIHandler
83
-
84
+
84
85
  if project_name is None:
85
86
  raise ValueError("project_name is required.")
86
87
 
@@ -204,6 +205,9 @@ class DatamintBaseDataset:
204
205
  self.dataset_length = len(self.images_metainfo)
205
206
 
206
207
  self.num_frames_per_resource = self.__compute_num_frames_per_resource()
208
+
209
+ # Precompute cumulative frame counts for faster index lookup
210
+ self._cumulative_frames = np.cumsum([0] + self.num_frames_per_resource)
207
211
 
208
212
  self.subset_indices = list(range(self.dataset_length))
209
213
  # self.labels_set, self.label2code, self.segmentation_labels, self.segmentation_label2code = self.get_labels_set()
@@ -309,7 +313,7 @@ class DatamintBaseDataset:
309
313
  scope (str): The scope of the annotations. It can be 'frame', 'image' or 'all'.
310
314
 
311
315
  Returns:
312
- List[Dict]: The annotations of the image.
316
+ list[dict]: The annotations of the image.
313
317
  """
314
318
  if index >= len(self):
315
319
  raise IndexError(f"Index {index} out of bounds for dataset of length {len(self)}")
@@ -591,7 +595,8 @@ class DatamintBaseDataset:
591
595
  with open(datasetjson, 'w') as file:
592
596
  json.dump(self.metainfo, file)
593
597
 
594
- def _load_image(self, filepath: str, index: int = None) -> tuple[torch.Tensor, pydicom.FileDataset]:
598
+ def _load_image(self, filepath: str,
599
+ index: int | None = None) -> tuple[Tensor, FileDataset | None]:
595
600
  if os.path.isdir(filepath):
596
601
  raise NotImplementedError("Loading a image from a directory is not supported yet.")
597
602
 
@@ -601,14 +606,14 @@ class DatamintBaseDataset:
601
606
  img, ds = read_array_normalized(filepath, return_metainfo=True)
602
607
 
603
608
  if img.dtype == np.uint16:
604
- # Pytorch doesn't support uint16
605
- if self.__logged_uint16_conversion == False:
609
+ if not self.__logged_uint16_conversion:
606
610
  _LOGGER.info("Original image is uint16, converting to uint8")
607
611
  self.__logged_uint16_conversion = True
608
612
 
609
613
  # min-max normalization
610
614
  img = img.astype(np.float32)
611
- img = (img - img.min()) / (img.max() - img.min()) * 255
615
+ mn = img.min()
616
+ img = (img - mn) / (img.max() - mn) * 255
612
617
  img = img.astype(np.uint8)
613
618
 
614
619
  img = torch.from_numpy(img).contiguous()
@@ -618,7 +623,7 @@ class DatamintBaseDataset:
618
623
  return img, ds
619
624
 
620
625
  def _get_image_metainfo(self, index: int, bypass_subset_indices=False) -> dict[str, Any]:
621
- if bypass_subset_indices == False:
626
+ if not bypass_subset_indices:
622
627
  index = self.subset_indices[index]
623
628
  if self.return_frame_by_frame:
624
629
  # Find the correct filepath and index
@@ -635,17 +640,18 @@ class DatamintBaseDataset:
635
640
  return img_metainfo
636
641
 
637
642
  def __find_index(self, index: int) -> tuple[int, int]:
638
- frame_index = index
639
- for i, num_frames in enumerate(self.num_frames_per_resource):
640
- if frame_index < num_frames:
641
- break
642
- frame_index -= num_frames
643
- else:
644
- raise IndexError(f"Index {index} out of bounds for dataset of length {len(self)}")
645
-
646
- return i, frame_index
643
+ """
644
+ Find the resource index and frame index for a given global frame index.
645
+
646
+ """
647
+ # Use binary search to find the resource containing this frame
648
+ resource_index = np.searchsorted(self._cumulative_frames[1:], index, side='right')
649
+ frame_index = index - self._cumulative_frames[resource_index]
650
+
651
+ return resource_index, frame_index
647
652
 
648
- def __getitem_internal(self, index: int, only_load_metainfo=False) -> dict[str, Any]:
653
+ def __getitem_internal(self, index: int,
654
+ only_load_metainfo=False) -> dict[str, Tensor | FileDataset | dict | list]:
649
655
  if self.return_frame_by_frame:
650
656
  resource_index, frame_idx = self.__find_index(index)
651
657
  else:
@@ -711,7 +717,7 @@ class DatamintBaseDataset:
711
717
 
712
718
  return filtered_annotations
713
719
 
714
- def __getitem__(self, index: int) -> dict[str, Any]:
720
+ def __getitem__(self, index: int) -> dict[str, Tensor | FileDataset | dict | list]:
715
721
  """
716
722
  Args:
717
723
  index (int): Index
@@ -725,8 +731,8 @@ class DatamintBaseDataset:
725
731
  return self.__getitem_internal(self.subset_indices[index])
726
732
 
727
733
  def __iter__(self):
728
- for i in range(len(self)):
729
- yield self[i]
734
+ for index in self.subset_indices:
735
+ yield self.__getitem_internal(index)
730
736
 
731
737
  def __len__(self) -> int:
732
738
  return len(self.subset_indices)
@@ -287,7 +287,7 @@ class DatamintDataset(DatamintBaseDataset):
287
287
  if len(all_masks_list) != 0:
288
288
  all_masks_list = torch.concatenate(all_masks_list).numpy().astype(np.uint8)
289
289
  else:
290
- all_masks_list = None#np.empty((0,img.shape[-2], img.shape[-1]), dtype=np.uint8)
290
+ all_masks_list = None # np.empty((0,img.shape[-2], img.shape[-1]), dtype=np.uint8)
291
291
 
292
292
  augmented = self.alb_transform(image=img.numpy().transpose(1, 2, 0),
293
293
  masks=all_masks_list)
@@ -308,6 +308,36 @@ class DatamintDataset(DatamintBaseDataset):
308
308
 
309
309
  return augmented['image'], new_segmentations
310
310
 
311
+ def _seg_labels_to_names(self, seg_labels: dict | list | None) -> dict | list | None:
312
+ """
313
+ Convert segmentation label codes to label names.
314
+
315
+ Args:
316
+ seg_labels: Segmentation labels in various formats:
317
+ - dict[str, list[Tensor]]: author -> list of frame tensors with label codes
318
+ - dict[str, Tensor]: author -> tensor with label codes
319
+ - list[Tensor]: list of frame tensors with label codes
320
+ - Tensor: tensor with label codes
321
+ - None: when no segmentation labels are available
322
+
323
+ Returns:
324
+ Same structure as input but with label codes converted to label names.
325
+ Returns None if input is None.
326
+ """
327
+ if seg_labels is None:
328
+ return None
329
+
330
+ code_to_name = self.segmentation_labels_set
331
+ if isinstance(seg_labels, dict):
332
+ # author -> list of frame tensors
333
+ return {author: [code_to_name[code.item()] for code in labels] for author, labels in seg_labels.items()}
334
+ elif isinstance(seg_labels, list):
335
+ # list of frame tensors
336
+ return [[code_to_name[code.item()] for code in labels] for labels in seg_labels]
337
+
338
+ _LOGGER.warning(f"Unexpected segmentation labels format: {type(seg_labels)}. Returning None")
339
+ return None
340
+
311
341
  def __getitem__(self, index) -> dict[str, Any]:
312
342
  """
313
343
  Get the item at the given index.
@@ -401,6 +431,9 @@ class DatamintDataset(DatamintBaseDataset):
401
431
  seg_labels = seg_labels[0]
402
432
  new_item['segmentations'] = segmentations
403
433
  new_item['seg_labels'] = seg_labels
434
+ # process seg_labels to convert from code to label names
435
+ new_item['seg_labels_names'] = self._seg_labels_to_names(seg_labels)
436
+
404
437
  except Exception:
405
438
  _LOGGER.error(f'Error in loading/processing segmentations of {metainfo}')
406
439
  raise
@@ -638,3 +638,70 @@ def pixel_to_patient(ds: pydicom.Dataset,
638
638
  patient_coords = image_position + pixel_x * pixel_spacing[0] * row_vector + pixel_y * pixel_spacing[1] * col_vector
639
639
 
640
640
  return patient_coords
641
+
642
+
643
+ def determine_anatomical_plane(ds: pydicom.Dataset,
644
+ slice_axis: int,
645
+ alignment_threshold: float = 0.95) -> str:
646
+ """
647
+ Determine the anatomical plane of a DICOM slice (Axial, Sagittal, Coronal, Oblique, or Unknown).
648
+
649
+ Args:
650
+ ds (pydicom.Dataset): The DICOM dataset containing the image metadata.
651
+ slice_axis (int): The axis of the slice to analyze (0, 1, or 2).
652
+ alignment_threshold (float): Threshold for considering alignment with anatomical axes.
653
+
654
+ Returns:
655
+ str: The name of the anatomical plane ('Axial', 'Sagittal', 'Coronal', 'Oblique', or 'Unknown').
656
+
657
+ Raises:
658
+ ValueError: If `slice_index` is not 0, 1, or 2.
659
+ """
660
+
661
+ if slice_axis not in [0, 1, 2]:
662
+ raise ValueError("slice_index must be 0, 1 or 2")
663
+ # Check if Image Orientation Patient exists
664
+ if not hasattr(ds, 'ImageOrientationPatient') or ds.ImageOrientationPatient is None:
665
+ return "Unknown"
666
+ # Get the Image Orientation Patient (IOP) - 6 values defining row and column directions
667
+ iop = np.array(ds.ImageOrientationPatient, dtype=float)
668
+ if len(iop) != 6:
669
+ return "Unknown"
670
+ # Extract row and column direction vectors
671
+ row_dir = iop[:3] # First 3 values: row direction cosines
672
+ col_dir = iop[3:] # Last 3 values: column direction cosines
673
+ # Calculate the normal vector (slice direction) using cross product
674
+ normal = np.cross(row_dir, col_dir)
675
+ normal = normal / np.linalg.norm(normal) # Normalize
676
+ # Define standard anatomical axes
677
+ # LPS coordinate system: L = Left, P = Posterior, S = Superior
678
+ axes = {
679
+ 'sagittal': np.array([1, 0, 0]), # L-R axis (left-right)
680
+ 'coronal': np.array([0, 1, 0]), # A-P axis (anterior-posterior)
681
+ 'axial': np.array([0, 0, 1]) # S-I axis (superior-inferior)
682
+ }
683
+ # For each slice_index, determine which axis we're examining
684
+ if slice_axis == 0:
685
+ # ds.pixel_array[0,:,:] - slicing along first dimension
686
+ # The normal vector corresponds to the direction we're slicing through
687
+ examine_vector = normal
688
+ elif slice_axis == 1:
689
+ # ds.pixel_array[:,0,:] - slicing along second dimension
690
+ # This corresponds to the row direction
691
+ examine_vector = row_dir
692
+ elif slice_axis == 2:
693
+ # ds.pixel_array[:,:,0] - slicing along third dimension
694
+ # This corresponds to the column direction
695
+ examine_vector = col_dir
696
+ # Find which anatomical axis is most aligned with our examine_vector
697
+ max_dot = 0
698
+ best_axis = "Unknown"
699
+ for axis_name, axis_vector in axes.items():
700
+ dot_product = abs(np.dot(examine_vector, axis_vector))
701
+ if dot_product > max_dot:
702
+ max_dot = dot_product
703
+ best_axis = axis_name
704
+ if max_dot >= alignment_threshold:
705
+ return best_axis.capitalize()
706
+ else:
707
+ return "Oblique"
@@ -1,7 +1,7 @@
1
1
  [project]
2
2
  name = "datamint"
3
3
  description = "A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows."
4
- version = "1.5.4"
4
+ version = "1.6.0"
5
5
  dynamic = ["dependencies"]
6
6
  requires-python = ">=3.10"
7
7
  readme = "README.md"
File without changes
File without changes
File without changes
File without changes