datamint 1.6.0__py3-none-any.whl → 1.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datamint might be problematic. Click here for more details.

@@ -10,7 +10,6 @@ import os
10
10
  import asyncio
11
11
  import aiohttp
12
12
  from requests.exceptions import HTTPError
13
- from deprecated.sphinx import deprecated
14
13
  from .dto.annotation_dto import CreateAnnotationDto, LineGeometry, BoxGeometry, CoordinateSystem, AnnotationType
15
14
  import pydicom
16
15
  import json
@@ -237,7 +236,7 @@ class AnnotationAPIHandler(BaseAPIHandler):
237
236
  async def _upload_volume_segmentation_async(self,
238
237
  resource_id: str,
239
238
  file_path: str | np.ndarray,
240
- name: dict[int, str] | dict[tuple, str],
239
+ name: str | dict[int, str] | dict[tuple, str] | None,
241
240
  imported_from: Optional[str] = None,
242
241
  author_email: Optional[str] = None,
243
242
  worklist_id: Optional[str] = None,
@@ -263,6 +262,13 @@ class AnnotationAPIHandler(BaseAPIHandler):
263
262
  Raises:
264
263
  ValueError: If name is not a string or file format is unsupported for volume upload.
265
264
  """
265
+
266
+ if isinstance(name, str):
267
+ raise NotImplementedError("`name=string` is not supported yet for volume segmentation.")
268
+ if isinstance(name, dict):
269
+ if any(isinstance(k, tuple) for k in name.keys()):
270
+ raise NotImplementedError("For volume segmentations, `name` must be a dictionary with integer keys only.")
271
+
266
272
  # Prepare file for upload
267
273
  if isinstance(file_path, str):
268
274
  if file_path.endswith('.nii') or file_path.endswith('.nii.gz'):
@@ -275,7 +281,8 @@ class AnnotationAPIHandler(BaseAPIHandler):
275
281
  form.add_field('model_id', model_id) # Add model_id if provided
276
282
  if worklist_id is not None:
277
283
  form.add_field('annotation_worklist_id', worklist_id)
278
- form.add_field('segmentation_map', json.dumps(name), content_type='application/json')
284
+ if name is not None:
285
+ form.add_field('segmentation_map', json.dumps(name), content_type='application/json')
279
286
 
280
287
  request_params = dict(
281
288
  method='POST',
@@ -449,30 +456,27 @@ class AnnotationAPIHandler(BaseAPIHandler):
449
456
  if isinstance(file_path, str) and not os.path.exists(file_path):
450
457
  raise FileNotFoundError(f"File {file_path} not found.")
451
458
 
452
- name = AnnotationAPIHandler.standardize_segmentation_names(name)
453
-
454
459
  # Handle NIfTI files specially - upload as single volume
455
460
  if isinstance(file_path, str) and (file_path.endswith('.nii') or file_path.endswith('.nii.gz')):
456
461
  _LOGGER.info(f"Uploading NIfTI segmentation file: {file_path}")
457
462
  if frame_index is not None:
458
463
  raise ValueError("Do not provide frame_index for NIfTI segmentations.")
459
464
  loop = asyncio.get_event_loop()
460
- task = self._upload_segmentations_async(
465
+ task = self._upload_volume_segmentation_async(
461
466
  resource_id=resource_id,
462
- frame_index=None,
463
467
  file_path=file_path,
464
468
  name=name,
465
469
  imported_from=imported_from,
466
470
  author_email=author_email,
467
- discard_empty_segmentations=False,
468
471
  worklist_id=worklist_id,
469
472
  model_id=model_id,
470
- transpose_segmentation=transpose_segmentation,
471
- upload_volume=True
473
+ transpose_segmentation=transpose_segmentation
472
474
  )
473
475
  return loop.run_until_complete(task)
474
476
  # All other file types are converted to multiple PNGs and uploaded frame by frame.
475
477
 
478
+ name = AnnotationAPIHandler.standardize_segmentation_names(name)
479
+
476
480
  to_run = []
477
481
  # Generate IOs for the segmentations.
478
482
  nframes, fios = AnnotationAPIHandler._generate_segmentations_ios(file_path,
@@ -18,7 +18,7 @@ import json
18
18
  from typing import Any, TypeAlias, Literal
19
19
  import logging
20
20
  from enum import Enum
21
- from datamint.utils.dicom_utils import pixel_to_patient
21
+ from medimgkit.dicom_utils import pixel_to_patient
22
22
  import pydicom
23
23
  import numpy as np
24
24
 
@@ -6,8 +6,8 @@ from requests.exceptions import HTTPError
6
6
  import logging
7
7
  import asyncio
8
8
  import aiohttp
9
- from datamint.utils.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
10
- from datamint.utils import dicom_utils
9
+ from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
10
+ from medimgkit import dicom_utils
11
11
  import pydicom
12
12
  from pathlib import Path
13
13
  from datetime import date
@@ -447,6 +447,8 @@ class RootAPIHandler(BaseAPIHandler):
447
447
  for segfiles in segmentation_files]
448
448
 
449
449
  for segfiles in segmentation_files:
450
+ if segfiles is None:
451
+ continue
450
452
  if 'files' not in segfiles:
451
453
  raise ValueError("segmentation_files must contain a 'files' key with a list of file paths.")
452
454
  if 'names' in segfiles:
@@ -5,7 +5,7 @@ from humanize import naturalsize
5
5
  import logging
6
6
  from pathlib import Path
7
7
  import sys
8
- from datamint.utils.dicom_utils import is_dicom
8
+ from medimgkit.dicom_utils import is_dicom
9
9
  import fnmatch
10
10
  from typing import Generator, Optional, Any
11
11
  from collections import defaultdict
@@ -15,6 +15,7 @@ from datamint.client_cmd_tools.datamint_config import ask_api_key
15
15
  from datamint.utils.logging_utils import load_cmdline_logging_config
16
16
  import yaml
17
17
  from collections.abc import Iterable
18
+ import pandas as pd
18
19
 
19
20
  # Create two loggings: one for the user and one for the developer
20
21
  _LOGGER = logging.getLogger(__name__)
@@ -23,6 +24,38 @@ _USER_LOGGER = logging.getLogger('user_logger')
23
24
  MAX_RECURSION_LIMIT = 1000
24
25
 
25
26
 
27
+ def _read_segmentation_names(segmentation_names_path: str | Path) -> dict:
28
+ """
29
+ Read a segmentation names file (yaml or csv) and return its content as a dictionary.
30
+ If the file is a YAML file, it should contain two keys: "segmentation_names" and "class_names".
31
+ If the file is a CSV file, it should contain the following columns:
32
+ index, r, g, b, ..., name
33
+ """
34
+ segmentation_names_path = Path(segmentation_names_path)
35
+ if segmentation_names_path.suffix in ['.yaml', '.yml']:
36
+ with open(segmentation_names_path, 'r') as f:
37
+ metadata = yaml.safe_load(f)
38
+ elif segmentation_names_path.suffix in ['.csv', '.tsv']:
39
+ df = pd.read_csv(segmentation_names_path,
40
+ header=None,
41
+ index_col=0,
42
+ sep=None, # use sep=None to automatically detect the separator
43
+ engine='python'
44
+ )
45
+ df = df.rename(columns={1: 'r', 2: 'g', 3: 'b', df.columns[-1]: 'name'})
46
+ # df = df.set_index(['r', 'g', 'b'])
47
+ metadata = {'class_names': df['name'].to_dict()}
48
+ else:
49
+ raise ValueError(f"Unsupported file format: {segmentation_names_path.suffix}")
50
+
51
+ if 'segmentation_names' in metadata:
52
+ segnames = sorted(metadata['segmentation_names'],
53
+ key=lambda x: len(x))
54
+ metadata['segmentation_names'] = segnames
55
+
56
+ return metadata
57
+
58
+
26
59
  def _is_valid_path_argparse(x):
27
60
  """
28
61
  argparse type that checks if the path exists
@@ -101,7 +134,6 @@ def walk_to_depth(path: str | Path,
101
134
  continue
102
135
  yield from walk_to_depth(child, depth-1, exclude_pattern)
103
136
  else:
104
- _LOGGER.debug(f"yielding {child} from {path}")
105
137
  yield child
106
138
 
107
139
 
@@ -157,31 +189,32 @@ def handle_api_key() -> str | None:
157
189
 
158
190
  def _find_segmentation_files(segmentation_root_path: str,
159
191
  images_files: list[str],
160
- segmentation_metainfo: dict = None
161
- ) -> Optional[list[dict]]:
192
+ segmentation_metainfo: dict | None = None
193
+ ) -> list[dict]:
162
194
  """
163
195
  Find the segmentation files that match the images files based on the same folder structure
164
196
  """
165
197
 
166
- if segmentation_root_path is None:
167
- return None
168
-
169
- if len(images_files) == 1 and os.path.isfile(images_files[0]) and os.path.isfile(segmentation_root_path):
170
- return [{'files': [segmentation_root_path]}]
171
-
172
- segmentation_files = []
173
- acceptable_extensions = ['.nii.gz', '.nii', '.png']
174
-
198
+ segnames = None
199
+ classnames = None
175
200
  if segmentation_metainfo is not None:
176
201
  if 'segmentation_names' in segmentation_metainfo:
177
202
  segnames = sorted(segmentation_metainfo['segmentation_names'],
178
203
  key=lambda x: len(x))
179
- else:
180
- segnames = None
181
204
  classnames = segmentation_metainfo.get('class_names', None)
182
205
  if classnames is not None:
183
206
  _LOGGER.debug(f"Number of class names: {len(classnames)}")
184
207
 
208
+ if len(images_files) == 1 and os.path.isfile(images_files[0]) and os.path.isfile(segmentation_root_path):
209
+ ret = [{'files': [segmentation_root_path]}]
210
+ if classnames is not None:
211
+ ret[0]['names'] = classnames
212
+ _LOGGER.debug(f"Returning segmentation files: {ret}")
213
+ return ret
214
+
215
+ segmentation_files = []
216
+ acceptable_extensions = ['.nii.gz', '.nii', '.png']
217
+
185
218
  segmentation_root_path = Path(segmentation_root_path).absolute()
186
219
 
187
220
  for imgpath in images_files:
@@ -197,7 +230,6 @@ def _find_segmentation_files(segmentation_root_path: str,
197
230
  else:
198
231
  common_parent = Path(*common_parent)
199
232
 
200
- _LOGGER.debug(f"_find_segmentation_files::common_parent: {common_parent}")
201
233
  path_structure = imgpath_parent.relative_to(common_parent).parts[1:]
202
234
 
203
235
  # path_structure = imgpath_parent.relative_to(root_path).parts[1:]
@@ -230,24 +262,47 @@ def _find_segmentation_files(segmentation_root_path: str,
230
262
  if len(frame_indices) > 0:
231
263
  seginfo['frame_index'] = frame_indices
232
264
 
233
- if segmentation_metainfo is not None:
234
- snames_associated = []
235
- for segfile in seg_files:
236
- if segnames is None:
237
- snames_associated.append(classnames)
265
+ snames_associated = []
266
+ for segfile in seg_files:
267
+ # check if there is a metadata file associated, besides json, with the segmentation
268
+ for ext in ['.yaml', '.yml', '.csv']:
269
+ if str(segfile).endswith('nii.gz'):
270
+ # has two extensions, so we need to remove both
271
+ metadata_file = segfile.with_suffix('').with_suffix(ext)
272
+ if not metadata_file.exists():
273
+ metadata_file = segfile.with_suffix(ext)
274
+ else:
275
+ metadata_file = segfile.with_suffix(ext)
276
+ if metadata_file.exists():
277
+ _LOGGER.debug(f"Found metadata file: {metadata_file}")
278
+ try:
279
+ new_segmentation_metainfo = _read_segmentation_names(metadata_file)
280
+ cur_segnames = new_segmentation_metainfo.get('segmentation_names', segnames)
281
+ cur_classnames = new_segmentation_metainfo.get('class_names', classnames)
282
+ break
283
+ except Exception as e:
284
+ _LOGGER.warning(f"Error reading metadata file {metadata_file}: {e}")
285
+ else:
286
+ cur_segnames = segnames
287
+ cur_classnames = classnames
288
+
289
+ if cur_segnames is None:
290
+ _LOGGER.debug(f'adding {cur_classnames}')
291
+ snames_associated.append(cur_classnames)
292
+ else:
293
+ for segname in cur_segnames:
294
+ if segname in str(segfile):
295
+ if cur_classnames is not None:
296
+ new_segname = {cid: f'{segname}_{cname}' for cid, cname in cur_classnames.items()}
297
+ new_segname.update({'default': segname})
298
+ else:
299
+ new_segname = segname
300
+ snames_associated.append(new_segname)
301
+ break
238
302
  else:
239
- for segname in segnames:
240
- if segname in str(segfile):
241
- if classnames is not None:
242
- new_segname = {cid: f'{segname}_{cname}' for cid, cname in classnames.items()}
243
- new_segname.update({'default': segname})
244
- else:
245
- new_segname = segname
246
- snames_associated.append(new_segname)
247
- break
248
- else:
249
- _USER_LOGGER.warning(f"Segmentation file {segname} does not match any segmentation name.")
250
- snames_associated.append(None)
303
+ _USER_LOGGER.warning(f"Segmentation file {segfile} does not match any segmentation name.")
304
+ snames_associated.append(None)
305
+ if len(snames_associated) > 0:
251
306
  seginfo['names'] = snames_associated
252
307
 
253
308
  segmentation_files.append(seginfo)
@@ -268,7 +323,7 @@ def _find_json_metadata(file_path: str | Path) -> Optional[str]:
268
323
  Optional[str]: Path to the JSON metadata file if found, None otherwise
269
324
  """
270
325
  file_path = Path(file_path)
271
-
326
+
272
327
  # Handle .nii.gz files specially - need to remove both extensions
273
328
  if file_path.name.endswith('.nii.gz'):
274
329
  base_name = file_path.name[:-7] # Remove .nii.gz
@@ -320,7 +375,7 @@ def _collect_metadata_files(files_path: list[str], auto_detect_json: bool) -> tu
320
375
  if used_json_files:
321
376
  _LOGGER.debug(f"Filtering out {len(used_json_files)} JSON metadata files from main upload list")
322
377
  filtered_metadata_files = []
323
-
378
+
324
379
  for original_file in files_path:
325
380
  if original_file not in used_json_files:
326
381
  original_index = files_path.index(original_file)
@@ -376,8 +431,10 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
376
431
  help='Path to the segmentation file(s) or a directory')
377
432
  parser.add_argument('--segmentation_names', type=_is_valid_path_argparse, metavar="FILE",
378
433
  required=False,
379
- help='Path to a yaml file containing the segmentation names.' +
380
- ' The file may contain two keys: "segmentation_names" and "class_names".')
434
+ help='Path to a yaml or csv file containing the segmentation names.' +
435
+ ' If yaml, the file may contain two keys: "segmentation_names" and "class_names".'
436
+ ' If csv, the file should contain the following columns:'
437
+ ' index, r, g, b, ..., name')
381
438
  parser.add_argument('--yes', action='store_true',
382
439
  help='Automatically answer yes to all prompts')
383
440
  parser.add_argument('--transpose-segmentation', action='store_true', default=False,
@@ -407,6 +464,7 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
407
464
  if args.verbose:
408
465
  # Get the console handler and set to debug
409
466
  logging.getLogger().handlers[0].setLevel(logging.DEBUG)
467
+ logging.getLogger('datamint').handlers[0].setLevel(logging.DEBUG)
410
468
  _LOGGER.setLevel(logging.DEBUG)
411
469
  _USER_LOGGER.setLevel(logging.DEBUG)
412
470
 
@@ -446,15 +504,17 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
446
504
  raise ValueError(f"No valid non-metadata files found in {args.path}")
447
505
 
448
506
  if args.segmentation_names is not None:
449
- with open(args.segmentation_names, 'r') as f:
450
- segmentation_names = yaml.safe_load(f)
507
+ segmentation_names = _read_segmentation_names(args.segmentation_names)
451
508
  else:
452
509
  segmentation_names = None
453
510
 
454
511
  _LOGGER.debug(f'finding segmentations at {args.segmentation_path}')
455
- segmentation_files = _find_segmentation_files(args.segmentation_path,
456
- file_path,
457
- segmentation_metainfo=segmentation_names)
512
+ if args.segmentation_path is None:
513
+ segmentation_files = None
514
+ else:
515
+ segmentation_files = _find_segmentation_files(args.segmentation_path,
516
+ file_path,
517
+ segmentation_metainfo=segmentation_names)
458
518
 
459
519
  _LOGGER.info(f"args parsed: {args}")
460
520
 
@@ -14,9 +14,9 @@ from torch.utils.data import DataLoader
14
14
  import torch
15
15
  from torch import Tensor
16
16
  from datamint.apihandler.base_api_handler import DatamintException
17
- from datamint.utils.dicom_utils import is_dicom
17
+ from medimgkit.dicom_utils import is_dicom
18
18
  import cv2
19
- from datamint.utils.io_utils import read_array_normalized
19
+ from medimgkit.io_utils import read_array_normalized
20
20
  from datetime import datetime
21
21
 
22
22
  _LOGGER = logging.getLogger(__name__)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: datamint
3
- Version: 1.6.0
3
+ Version: 1.6.3
4
4
  Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
5
5
  Requires-Python: >=3.10
6
6
  Classifier: Programming Language :: Python :: 3
@@ -19,6 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
19
19
  Requires-Dist: lazy-loader (>=0.3.0)
20
20
  Requires-Dist: lightning
21
21
  Requires-Dist: matplotlib
22
+ Requires-Dist: medimgkit
22
23
  Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
23
24
  Requires-Dist: nibabel (>=4.0.0)
24
25
  Requires-Dist: numpy
@@ -1,16 +1,16 @@
1
1
  datamint/__init__.py,sha256=7rKCCsaa4RBRTIfuHB708rai1xwDHLtkFNFJGKYG5D4,757
2
- datamint/apihandler/annotation_api_handler.py,sha256=jEY0Ka5RikkD2435cDNQ59l3M4NSkOJ1NwRreWQYl4c,51616
2
+ datamint/apihandler/annotation_api_handler.py,sha256=ChwaSYjoOAVS7vuyP3-cfpDHaHwk_wXLf8QQaSU_oSM,51893
3
3
  datamint/apihandler/api_handler.py,sha256=cdVSddrFCKlF_BJ81LO1aJ0OP49rssjpNEFzJ6Q7YyY,384
4
4
  datamint/apihandler/base_api_handler.py,sha256=XSxZEQEkbQpuixGDu_P9jbxUQht3Z3JgxaeiFKPkVDM,11690
5
- datamint/apihandler/dto/annotation_dto.py,sha256=otCIesoqGBlbSOw4ErqFsXp2HwJsPNUQlkynQh_7pHg,7110
5
+ datamint/apihandler/dto/annotation_dto.py,sha256=qId1RK1VO7dXrvGJ7dqJ31jBQB7Z8yy5x0tLSiMxTB4,7105
6
6
  datamint/apihandler/exp_api_handler.py,sha256=hFUgUgBc5rL7odK7gTW3MnrvMY1pVfJUpUdzRNobMQE,6226
7
- datamint/apihandler/root_api_handler.py,sha256=OIGq6aHX64B94MmAikcFzF0rdekRH4l1S59x2Pa_DJA,51739
7
+ datamint/apihandler/root_api_handler.py,sha256=O8Gn1Gp3w7AYeuT_FbwH413o6P_eAYLoRiW0baGY_b4,51795
8
8
  datamint/client_cmd_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  datamint/client_cmd_tools/datamint_config.py,sha256=md7dnWrbl10lPtXKbmD9yo6onLJsajeG8Vz0ZWH1v4M,8181
10
- datamint/client_cmd_tools/datamint_upload.py,sha256=VyLL2FgY9ibfbdp4K6HrKt0jgkQH-SVuU71D6e77074,26436
10
+ datamint/client_cmd_tools/datamint_upload.py,sha256=pSCZ6PYSQEOaHYW1KcVodthArsEoQhYwEzVeh_qdjTk,29470
11
11
  datamint/configs.py,sha256=Bdp6NydYwyCJ2dk19_gf_o3M2ZyQOmMHpLi8wEWNHUk,1426
12
12
  datamint/dataset/__init__.py,sha256=4PlUKSvVhdfQvvuq8jQXrkdqnot-iTTizM3aM1vgSwg,47
13
- datamint/dataset/base_dataset.py,sha256=MQZ_wNFex4BKBfb4fAcXV6-fQXFV_zBK1ybWrMm6_pg,39092
13
+ datamint/dataset/base_dataset.py,sha256=bSMuNHUzU7heN0awGemTn3e2zPLhuCsh-qSs_Qt6i9w,39082
14
14
  datamint/dataset/dataset.py,sha256=AwS92t5kdmpm9NKFfXFmDmZxEbbPfb_FOMn-FWfu3bE,26590
15
15
  datamint/examples/__init__.py,sha256=zcYnd5nLVme9GCTPYH-1JpGo8xXK2WEYvhzcy_2alZc,39
16
16
  datamint/examples/example_projects.py,sha256=7Nb_EaIdzJTQa9zopqc-WhTBQWQJSoQZ_KjRS4PB4FI,2931
@@ -18,12 +18,10 @@ datamint/experiment/__init__.py,sha256=5qQOMzoG17DEd1YnTF-vS0qiM-DGdbNh42EUo91CR
18
18
  datamint/experiment/_patcher.py,sha256=ZgbezoevAYhJsbiJTvWPALGTcUiMT371xddcTllt3H4,23296
19
19
  datamint/experiment/experiment.py,sha256=aHK9dRFdQTi569xgUg1KqlCZLHZpDmSH3g3ndPIZvXw,44546
20
20
  datamint/logging.yaml,sha256=a5dsATpul7QHeUHB2TjABFjWaPXBMbO--dgn8GlRqwk,483
21
- datamint/utils/dicom_utils.py,sha256=sLukP6MB_acx7t868O2HDd_RDEILa97mEe_V9m1EMCY,28991
22
- datamint/utils/io_utils.py,sha256=lKnUCJEip7W9Xj9wOWsTAA855HnKbjwQON1WjMGqJmM,7374
23
21
  datamint/utils/logging_utils.py,sha256=DvoA35ATYG3JTwfXEXYawDyKRfHeCrH0a9czfkmz8kM,1851
24
22
  datamint/utils/torchmetrics.py,sha256=lwU0nOtsSWfebyp7dvjlAggaqXtj5ohSEUXOg3L0hJE,2837
25
23
  datamint/utils/visualization.py,sha256=yaUVAOHar59VrGUjpAWv5eVvQSfztFG0eP9p5Vt3l-M,4470
26
- datamint-1.6.0.dist-info/METADATA,sha256=F73Llyz1xUSDM5luVjsjL8EZwLP8VAcMV91vpi2BVqw,4065
27
- datamint-1.6.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
28
- datamint-1.6.0.dist-info/entry_points.txt,sha256=mn5H6jPjO-rY0W0CAZ6Z_KKWhMLvyVaSpoqk77jlTI4,145
29
- datamint-1.6.0.dist-info/RECORD,,
24
+ datamint-1.6.3.dist-info/METADATA,sha256=lfTFnSMYn7LTxb3jmUv_bAFBBDGQc6csTw91adX_xqI,4090
25
+ datamint-1.6.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
26
+ datamint-1.6.3.dist-info/entry_points.txt,sha256=mn5H6jPjO-rY0W0CAZ6Z_KKWhMLvyVaSpoqk77jlTI4,145
27
+ datamint-1.6.3.dist-info/RECORD,,
@@ -1,707 +0,0 @@
1
- from pydicom.pixels import pixel_array
2
- import pydicom
3
- from pydicom.uid import generate_uid
4
- from typing import Sequence, Generator, IO, TypeVar, Generic
5
- import warnings
6
- from copy import deepcopy
7
- import logging
8
- from pathlib import Path
9
- from pydicom.misc import is_dicom as pydicom_is_dicom
10
- from io import BytesIO
11
- import os
12
- import numpy as np
13
- from collections import defaultdict
14
- import uuid
15
- import hashlib
16
- from tqdm import tqdm
17
-
18
- import pydicom.uid
19
-
20
- _LOGGER = logging.getLogger(__name__)
21
-
22
- CLEARED_STR = "CLEARED_BY_DATAMINT"
23
-
24
- T = TypeVar('T')
25
-
26
-
27
- class GeneratorWithLength(Generic[T]):
28
- def __init__(self, generator: Generator[T, None, None], length: int):
29
- self.generator = generator
30
- self.length = length
31
-
32
- def __len__(self):
33
- return self.length
34
-
35
- def __iter__(self):
36
- return self.generator
37
-
38
- def __next__(self) -> T:
39
- return next(self.generator)
40
-
41
- def close(self):
42
- self.generator.close()
43
-
44
- def throw(self, *args):
45
- return self.generator.throw(*args)
46
-
47
- def send(self, *args):
48
- return self.generator.send(*args)
49
-
50
-
51
- class TokenMapper:
52
- def __init__(self, seed: int = 42):
53
- self.seed = seed
54
-
55
- def get_token(self, tag: tuple, value: str, simple_id=False) -> str:
56
- """Get a consistent token for a given tag and value pair."""
57
- if value is None or value == CLEARED_STR:
58
- return CLEARED_STR
59
-
60
- # Use a hash function to generate a consistent token
61
- token = hashlib.md5(f"{tag}{value}{self.seed}".encode()).hexdigest()
62
- if simple_id:
63
- return token
64
- return generate_uid(entropy_srcs=['DATAMINT', token])
65
-
66
-
67
- _TOKEN_MAPPER = TokenMapper()
68
-
69
-
70
- def anonymize_dicom(ds: pydicom.Dataset,
71
- retain_codes: Sequence[tuple] = [],
72
- copy=False,
73
- token_mapper: TokenMapper = None) -> pydicom.Dataset:
74
- """
75
- Anonymize a DICOM file by clearing all the specified DICOM tags
76
- according to the DICOM standard https://www.dicomstandard.org/News-dir/ftsup/docs/sups/sup55.pdf.
77
- This function will generate a new UID for the new DICOM file and clear the specified DICOM tags
78
- with consistent tokens for related identifiers.
79
-
80
- Args:
81
- ds: pydicom Dataset object.
82
- retain_codes: A list of DICOM tag codes to retain the value of.
83
- copy: If True, the function will return a copy of the input Dataset object.
84
- token_mapper: TokenMapper instance to maintain consistent tokens across calls.
85
- If None, uses a global instance.
86
-
87
- Returns:
88
- pydicom Dataset object with specified DICOM tags cleared
89
- """
90
- if copy:
91
- ds = deepcopy(ds)
92
-
93
- if token_mapper is None:
94
- token_mapper = _TOKEN_MAPPER
95
-
96
- # https://www.dicomstandard.org/News-dir/ftsup/docs/sups/sup55.pdf
97
- tags_to_clear = [
98
- (0x0008, 0x0014), (0x0008, 0x0050), (0x0008, 0x0080), (0x0008, 0x0081), (0x0008, 0x0090),
99
- (0x0008, 0x0092), (0x0008, 0x0094), (0x0008, 0x1010), (0x0008, 0x1030), (0x0008, 0x103E),
100
- (0x0008, 0x1040), (0x0008, 0x1048), (0x0008, 0x1050), (0x0008, 0x1060), (0x0008, 0x1070),
101
- (0x0008, 0x1080), (0x0008, 0x1155), (0x0008, 0x2111), (0x0010, 0x0010), (0x0010, 0x0020),
102
- (0x0010, 0x0030), (0x0010, 0x0032), (0x0010, 0x0040), (0x0010, 0x1000), (0x0010, 0x1001),
103
- (0x0010, 0x1010), (0x0010, 0x1020), (0x0010, 0x1030), (0x0010, 0x1090), (0x0010, 0x2160),
104
- (0x0010, 0x2180), (0x0010, 0x21B0), (0x0010, 0x4000), (0x0018, 0x1000), (0x0018, 0x1030),
105
- (0x0020, 0x000D), (0x0020, 0x000E), # StudyInstanceUID and SeriesInstanceUID
106
- (0x0020, 0x0010), (0x0020, 0x0052), (0x0020, 0x0200), (0x0020, 0x4000), (0x0008, 0x0018),
107
- (0x0040, 0x0275), (0x0040, 0xA730), (0x0088, 0x0140), (0x3006, 0x0024), (0x3006, 0x00C2)
108
- ]
109
-
110
- # Frame of Reference UID, Series Instance UID, Concatenation UID, and Instance UID, and StudyInstanceUID are converted to new UIDs
111
- uid_tags = [(0x0020, 0x0052), (0x0020, 0x000E), (0x0020, 0x9161),
112
- (0x0010, 0x0020), (0x0008, 0x0018), (0x0020, 0x000D)]
113
- simple_id_tags = [(0x0010, 0x0020)] # Patient ID
114
-
115
- for code in retain_codes:
116
- if code in tags_to_clear:
117
- tags_to_clear.remove(code)
118
-
119
- # Clear the specified DICOM tags
120
- with warnings.catch_warnings(): # Supress UserWarning from pydicom
121
- warnings.filterwarnings("ignore", category=UserWarning, module='pydicom')
122
- for tag in tags_to_clear:
123
- if tag in ds:
124
- if tag == (0x0008, 0x0094): # Phone number
125
- ds[tag].value = "000-000-0000"
126
- # If tag is a floating point number, set it to 0.0
127
- elif ds[tag].VR in ['FL', 'FD', 'DS']:
128
- ds[tag].value = 0
129
- elif ds[tag].VR == 'SQ':
130
- del ds[tag]
131
- else:
132
- if tag in uid_tags:
133
- try:
134
- # Use consistent token mapping for identifiers
135
- original_value = ds[tag].value
136
- ds[tag].value = token_mapper.get_token(tag, original_value, simple_id=tag in simple_id_tags)
137
- tag_name = pydicom.datadict.keyword_for_tag(tag)
138
- except ValueError as e:
139
- ds[tag].value = CLEARED_STR
140
- else:
141
- ds[tag].value = CLEARED_STR
142
- if hasattr(ds, 'file_meta') and hasattr(ds, 'SOPInstanceUID'):
143
- ds.file_meta.MediaStorageSOPInstanceUID = ds.SOPInstanceUID
144
- return ds
145
-
146
-
147
- def is_dicom(f: str | Path | BytesIO) -> bool:
148
- if isinstance(f, BytesIO):
149
- fp = BytesIO(f.getbuffer()) # Avoid modifying the original BytesIO object
150
- fp.read(128) # preamble
151
-
152
- return fp.read(4) == b"DICM"
153
-
154
- if isinstance(f, Path):
155
- f = str(f)
156
- if os.path.isdir(f):
157
- return False
158
-
159
- fname = f.lower()
160
- if fname.endswith('.dcm') or fname.endswith('.dicom'):
161
- return True
162
-
163
- # Check if the file has an extension
164
- if os.path.splitext(f)[1] != '':
165
- return False
166
-
167
- try:
168
- return pydicom_is_dicom(f)
169
- except FileNotFoundError as e:
170
- return None
171
-
172
-
173
- def to_bytesio(ds: pydicom.Dataset, name: str) -> BytesIO:
174
- """
175
- Convert a pydicom Dataset object to BytesIO object.
176
- """
177
- dicom_bytes = BytesIO()
178
- pydicom.dcmwrite(dicom_bytes, ds)
179
- dicom_bytes.seek(0)
180
- dicom_bytes.name = name
181
- dicom_bytes.mode = 'rb'
182
- return dicom_bytes
183
-
184
-
185
- def load_image_normalized(dicom: pydicom.Dataset, index: int = None) -> np.ndarray:
186
- """
187
- Normalizes the shape of an array of images to (n, c, y, x)=(#slices, #channels, height, width).
188
- It uses dicom.Rows, dicom.Columns, and other information to determine the shape.
189
-
190
- Args:
191
- dicom: A dicom with images of varying shapes.
192
-
193
- Returns:
194
- A numpy array of shape (n, c, y, x)=(#slices, #channels, height, width).
195
- """
196
- n = dicom.get('NumberOfFrames', 1)
197
- if index is None:
198
- images = dicom.pixel_array
199
- else:
200
- if index is not None and index >= n:
201
- raise ValueError(f"Index {index} is out of bounds. The number of frames is {n}.")
202
- images = pixel_array(dicom, index=index)
203
- n = 1
204
- shape = images.shape
205
-
206
- c = dicom.get('SamplesPerPixel')
207
-
208
- # x=width, y=height
209
- if images.ndim == 2:
210
- # Single grayscale image (y, x)
211
- # Reshape to (1, 1, y, x)
212
- return images.reshape((1, 1) + images.shape)
213
- elif images.ndim == 3:
214
- # (n, y, x) or (y, x, c)
215
- if shape[0] == 1 or (n is not None and n > 1):
216
- # (n, y, x)
217
- return images.reshape(shape[0], 1, shape[1], shape[2])
218
- if shape[2] in (1, 3, 4) or (c is not None and c > 1):
219
- # (y, x, c)
220
- images = images.transpose(2, 0, 1)
221
- return images.reshape(1, *images.shape)
222
- elif images.ndim == 4:
223
- if shape[3] == c or shape[3] in (1, 3, 4) or (c is not None and c > 1):
224
- # (n, y, x, c) -> (n, c, y, x)
225
- return images.transpose(0, 3, 1, 2)
226
-
227
- raise ValueError(f"Unsupported DICOM normalization with shape: {shape}, SamplesPerPixel: {c}, NumberOfFrames: {n}")
228
-
229
-
230
- def assemble_dicoms(files_path: list[str | IO],
231
- return_as_IO: bool = False) -> GeneratorWithLength[pydicom.Dataset | IO]:
232
- """
233
- Assemble multiple DICOM files into a single multi-frame DICOM file.
234
- This function will merge the pixel data of the DICOM files and generate a new DICOM file with the combined pixel data.
235
-
236
- Args:
237
- files_path: A list of file paths to the DICOM files to be merged.
238
-
239
- Returns:
240
- A generator that yields the merged DICOM files.
241
- """
242
- dicoms_map = defaultdict(list)
243
-
244
- for file_path in tqdm(files_path, desc="Reading DICOMs metadata", unit="file"):
245
- dicom = pydicom.dcmread(file_path,
246
- specific_tags=['SeriesInstanceUID', 'InstanceNumber', 'Rows', 'Columns'])
247
- series_uid = dicom.get('SeriesInstanceUID', None)
248
- if series_uid is None:
249
- # generate a random uid
250
- series_uid = pydicom.uid.generate_uid()
251
- instance_number = dicom.get('InstanceNumber', 0)
252
- rows = dicom.get('Rows', None)
253
- columns = dicom.get('Columns', None)
254
- dicoms_map[series_uid].append((instance_number, file_path, rows, columns))
255
- if hasattr(file_path, "seek"):
256
- file_path.seek(0)
257
-
258
- # Validate that all DICOMs with the same SeriesInstanceUID have matching dimensions
259
- for series_uid, dicom_list in dicoms_map.items():
260
- if len(dicom_list) <= 1:
261
- continue
262
-
263
- # Get dimensions from first DICOM
264
- first_rows = dicom_list[0][2]
265
- first_columns = dicom_list[0][3]
266
-
267
- # Check all other DICOMs have the same dimensions
268
- for instance_number, file_path, rows, columns in dicom_list:
269
- if rows != first_rows or columns != first_columns:
270
- msg = (
271
- f"Dimension mismatch in SeriesInstanceUID {series_uid}: "
272
- f"Expected {first_rows}x{first_columns}, got {rows}x{columns} "
273
- f"for file {file_path} and {dicom_list[0][1]}"
274
- )
275
- _LOGGER.error(msg)
276
- raise ValueError(msg)
277
-
278
- # filter out the two last elements of the tuple (rows, columns)
279
- dicoms_map = {fr_uid: [(instance_number, file_path) for instance_number, file_path, _, _ in dicoms]
280
- for fr_uid, dicoms in dicoms_map.items()}
281
-
282
- gen = _generate_merged_dicoms(dicoms_map, return_as_IO=return_as_IO)
283
- return GeneratorWithLength(gen, len(dicoms_map))
284
-
285
-
286
- def _create_multiframe_attributes(merged_ds: pydicom.Dataset,
287
- all_dicoms: list[pydicom.Dataset]) -> pydicom.Dataset:
288
- ### Shared Functional Groups Sequence ###
289
- shared_seq_dataset = pydicom.dataset.Dataset()
290
-
291
- # check if pixel spacing or spacing between slices are equal for all dicoms
292
- pixel_spacing = merged_ds.get('PixelSpacing', None)
293
- all_pixel_spacing_equal = all(ds.get('PixelSpacing', None) == pixel_spacing
294
- for ds in all_dicoms)
295
- spacing_between_slices = merged_ds.get('SpacingBetweenSlices', None)
296
- all_spacing_b_slices_equal = all(ds.get('SpacingBetweenSlices', None) == spacing_between_slices
297
- for ds in all_dicoms)
298
-
299
- # if they are equal, add them to the shared functional groups sequence
300
- if (pixel_spacing is not None and all_pixel_spacing_equal) or (spacing_between_slices is not None and all_spacing_b_slices_equal):
301
- pixel_measure = pydicom.dataset.Dataset()
302
- if pixel_spacing is not None:
303
- pixel_measure.PixelSpacing = pixel_spacing
304
- if spacing_between_slices is not None:
305
- pixel_measure.SpacingBetweenSlices = spacing_between_slices
306
- pixel_measures_seq = pydicom.Sequence([pixel_measure])
307
- shared_seq_dataset.PixelMeasuresSequence = pixel_measures_seq
308
-
309
- if len(shared_seq_dataset) > 0:
310
- shared_seq = pydicom.Sequence([shared_seq_dataset])
311
- merged_ds.SharedFunctionalGroupsSequence = shared_seq
312
- #######
313
-
314
- ### Per-Frame Functional Groups Sequence ###
315
- perframe_seq_list = []
316
- for ds in all_dicoms:
317
- per_frame_dataset = pydicom.dataset.Dataset() # root dataset for each frame
318
- pos_dataset = pydicom.dataset.Dataset()
319
- orient_dataset = pydicom.dataset.Dataset()
320
- pixel_measure = pydicom.dataset.Dataset()
321
- framenumber_dataset = pydicom.dataset.Dataset()
322
-
323
- if 'ImagePositionPatient' in ds:
324
- pos_dataset.ImagePositionPatient = ds.ImagePositionPatient
325
- if 'ImageOrientationPatient' in ds:
326
- orient_dataset.ImageOrientationPatient = ds.ImageOrientationPatient
327
- if 'PixelSpacing' in ds and all_pixel_spacing_equal == False:
328
- pixel_measure.PixelSpacing = ds.PixelSpacing
329
- if 'SpacingBetweenSlices' in ds and all_spacing_b_slices_equal == False:
330
- pixel_measure.SpacingBetweenSlices = ds.SpacingBetweenSlices
331
-
332
- # Add datasets to the per-frame dataset
333
- per_frame_dataset.PlanePositionSequence = pydicom.Sequence([pos_dataset])
334
- per_frame_dataset.PlaneOrientationSequence = pydicom.Sequence([orient_dataset])
335
- per_frame_dataset.PixelMeasuresSequence = pydicom.Sequence([pixel_measure])
336
- per_frame_dataset.FrameContentSequence = pydicom.Sequence([framenumber_dataset])
337
-
338
- perframe_seq_list.append(per_frame_dataset)
339
- if len(perframe_seq_list[0]) > 0:
340
- perframe_seq = pydicom.Sequence(perframe_seq_list)
341
- merged_ds.PerFrameFunctionalGroupsSequence = perframe_seq
342
- merged_ds.FrameIncrementPointer = (0x5200, 0x9230)
343
-
344
- return merged_ds
345
-
346
-
347
- def _generate_dicom_name(ds: pydicom.Dataset) -> str:
348
- """
349
- Generate a meaningful name for a DICOM dataset using its attributes.
350
-
351
- Args:
352
- ds: pydicom Dataset object
353
-
354
- Returns:
355
- A string containing a descriptive name with .dcm extension
356
- """
357
- components = []
358
-
359
- # if hasattr(ds, 'filename'):
360
- # components.append(os.path.basename(ds.filename))
361
- if hasattr(ds, 'SeriesDescription'):
362
- components.append(ds.SeriesDescription)
363
- if len(components) == 0 and hasattr(ds, 'SeriesNumber'):
364
- components.append(f"ser{ds.SeriesNumber}")
365
- if hasattr(ds, 'StudyDescription'):
366
- components.append(ds.StudyDescription)
367
- elif hasattr(ds, 'StudyID'):
368
- components.append(ds.StudyID)
369
-
370
- # Join components and add extension
371
- if len(components) > 0:
372
- description = "_".join(str(x) for x in components) + ".dcm"
373
- # Clean description - remove special chars and spaces
374
- description = "".join(c if c.isalnum() else "_" for c in description)
375
- if len(description) > 0:
376
- return description
377
-
378
- if hasattr(ds, 'SeriesInstanceUID'):
379
- return ds.SeriesInstanceUID + ".dcm"
380
-
381
- # Fallback to generic name if no attributes found
382
- return ds.filename if hasattr(ds, 'filename') else f"merged_dicom_{uuid.uuid4()}.dcm"
383
-
384
-
385
- def _generate_merged_dicoms(dicoms_map: dict[str, list],
386
- return_as_IO: bool = False) -> Generator[pydicom.Dataset, None, None]:
387
- for _, dicoms in dicoms_map.items():
388
- dicoms.sort(key=lambda x: x[0])
389
- files_path = [file_path for _, file_path in dicoms]
390
-
391
- all_dicoms = [pydicom.dcmread(file_path) for file_path in files_path]
392
-
393
- # Use the first dicom as a template
394
- merged_dicom = all_dicoms[0]
395
-
396
- # Combine pixel data
397
- pixel_arrays = np.stack([ds.pixel_array for ds in all_dicoms], axis=0)
398
-
399
- # Update the merged dicom
400
- merged_dicom.PixelData = pixel_arrays.tobytes()
401
- merged_dicom.NumberOfFrames = len(pixel_arrays) # Set number of frames
402
- merged_dicom.SOPInstanceUID = pydicom.uid.generate_uid() # Generate new SOP Instance UID
403
- # Removed deprecated attributes and set Transfer Syntax UID instead:
404
- merged_dicom.file_meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian
405
-
406
- # Free up memory
407
- for ds in all_dicoms[1:]:
408
- del ds.PixelData
409
-
410
- # create multi-frame attributes
411
- # check if FramTime is equal for all dicoms
412
- frame_time = merged_dicom.get('FrameTime', None)
413
- all_frame_time_equal = all(ds.get('FrameTime', None) == frame_time for ds in all_dicoms)
414
- if frame_time is not None and all_frame_time_equal:
415
- merged_dicom.FrameTime = frame_time # (0x0018,0x1063)
416
- merged_dicom.FrameIncrementPointer = (0x0018, 0x1063) # points to 'FrameTime'
417
- else:
418
- # TODO: Sometimes FrameTime is present but not equal for all dicoms. In this case, check out 'FrameTimeVector'.
419
- merged_dicom = _create_multiframe_attributes(merged_dicom, all_dicoms)
420
-
421
- # Remove tags of single frame dicoms
422
- for attr in ['ImagePositionPatient', 'SliceLocation', 'ImageOrientationPatient',
423
- 'PixelSpacing', 'SpacingBetweenSlices', 'InstanceNumber']:
424
- if hasattr(merged_dicom, attr):
425
- delattr(merged_dicom, attr)
426
-
427
- if return_as_IO:
428
- name = _generate_dicom_name(merged_dicom)
429
- yield to_bytesio(merged_dicom, name=name)
430
- else:
431
- yield merged_dicom
432
-
433
-
434
- """
435
- - The Slice Location (0020,1041) is usually a derived attribute,
436
- typically computed from Image Position (Patient) (0020,0032)
437
- """
438
-
439
-
440
- def get_space_between_slices(ds: pydicom.Dataset) -> float:
441
- """
442
- Get the space between slices from a DICOM dataset.
443
-
444
- Parameters:
445
- ds (pydicom.Dataset): The DICOM dataset containing image metadata.
446
-
447
- Returns:
448
- float: Space between slices in millimeters.
449
- """
450
- # Get the Spacing Between Slices attribute
451
- if 'SpacingBetweenSlices' in ds:
452
- return ds.SpacingBetweenSlices
453
-
454
- if 'SharedFunctionalGroupsSequence' in ds:
455
- shared_group = ds.SharedFunctionalGroupsSequence[0]
456
- if 'PixelMeasuresSequence' in shared_group and 'SpacingBetweenSlices' in shared_group.PixelMeasuresSequence[0]:
457
- return shared_group.PixelMeasuresSequence[0].SpacingBetweenSlices
458
-
459
- if 'SliceThickness' in ds:
460
- return ds.SliceThickness
461
-
462
- return 1.0 # Default value if not found
463
-
464
-
465
- def get_image_orientation(ds: pydicom.Dataset, slice_index: int) -> np.ndarray:
466
- """
467
- Get the image orientation from a DICOM dataset.
468
-
469
- Parameters:
470
- ds (pydicom.Dataset): The DICOM dataset containing image metadata.
471
-
472
- Returns:
473
- numpy.ndarray: Image orientation (X, Y, Z) for the specified slice.
474
- """
475
- # Get the Image Orientation Patient attribute
476
- if 'ImageOrientationPatient' in ds:
477
- return ds.ImageOrientationPatient
478
-
479
- if 'PerFrameFunctionalGroupsSequence' in ds:
480
- if 'PlaneOrientationSequence' in ds.PerFrameFunctionalGroupsSequence[slice_index]:
481
- return ds.PerFrameFunctionalGroupsSequence[slice_index].PlaneOrientationSequence[0].ImageOrientationPatient
482
-
483
- if 'SharedFunctionalGroupsSequence' in ds:
484
- return ds.SharedFunctionalGroupsSequence[0].PlaneOrientationSequence[0].ImageOrientationPatient
485
-
486
- raise ValueError("ImageOrientationPatient not found in DICOM dataset.")
487
-
488
-
489
- def get_slice_orientation(ds: pydicom.Dataset, slice_index: int) -> np.ndarray:
490
- """
491
- Get the slice orientation from a DICOM dataset.
492
-
493
- Parameters:
494
- ds (pydicom.Dataset): The DICOM dataset containing image metadata.
495
- slice_index (int): 0-based index of the slice in the 3D volume. This is the `InstanceNumber-1`.
496
-
497
- Returns:
498
- numpy.ndarray: Slice orientation (X, Y, Z) for the specified slice.
499
- """
500
- # Get the Image Orientation Patient attribute
501
-
502
- x_orient, y_orient = np.array(get_image_orientation(ds, slice_index), dtype=np.float64).reshape(2, 3)
503
- # compute the normal vector of the slice
504
- slice_orient = np.cross(x_orient, y_orient)
505
- # normalize the vector to space_between_slices
506
- space_between_slices = get_space_between_slices(ds)
507
- slice_orient = slice_orient / np.linalg.norm(slice_orient) * space_between_slices
508
-
509
- return slice_orient
510
-
511
-
512
- def _get_instance_number(ds: pydicom.Dataset, slice_index: int | None = None) -> int:
513
- if slice_index is None:
514
- if 'InstanceNumber' in ds and ds.InstanceNumber is not None:
515
- return ds.InstanceNumber
516
- elif 'NumberOfFrames' in ds and ds.NumberOfFrames == 1:
517
- return 0
518
- else:
519
- raise ValueError("Slice index is required for multi-frame images.")
520
- else:
521
- if slice_index < 0:
522
- raise ValueError("Slice index must be a non-negative integer.")
523
- if 'NumberOfFrames' in ds and slice_index >= ds.NumberOfFrames:
524
- _LOGGER.warning(f"Slice index {slice_index} exceeds number of frames {ds.NumberOfFrames}.")
525
- root_instance_number = ds.get('InstanceNumber', 1)
526
- if root_instance_number is None:
527
- root_instance_number = 1
528
- return root_instance_number + slice_index
529
-
530
-
531
- def get_image_position(ds: pydicom.Dataset,
532
- slice_index: int | None = None) -> np.ndarray:
533
- """
534
- Get the image position for a specific slice in a DICOM dataset.
535
-
536
- Parameters:
537
- ds (pydicom.Dataset): The DICOM dataset containing image metadata.
538
- slice_index (int): Index of the slice in the 3D volume.
539
-
540
- Returns:
541
- numpy.ndarray: Image position (X, Y, Z) for the specified slice.
542
- """
543
-
544
- instance_number = _get_instance_number(ds, slice_index)
545
-
546
- if 'PerFrameFunctionalGroupsSequence' in ds:
547
- if slice_index is not None:
548
- frame_groups = ds.PerFrameFunctionalGroupsSequence[slice_index]
549
- if 'PlanePositionSequence' in frame_groups and 'ImagePositionPatient' in frame_groups.PlanePositionSequence[0]:
550
- return frame_groups.PlanePositionSequence[0].ImagePositionPatient
551
- else:
552
- logging.warning("PerFrameFunctionalGroupsSequence is available, but slice_index is not provided.")
553
-
554
- # Get the Image Position Patient attribute
555
- if 'ImagePositionPatient' in ds:
556
- if 'SliceLocation' in ds:
557
- _LOGGER.debug("SliceLocation attribute is available, but not accounted for in calculation.")
558
- x = np.array(ds.ImagePositionPatient, dtype=np.float64)
559
- sc_orient = get_slice_orientation(ds, slice_index)
560
- return x + sc_orient*(instance_number-ds.get('InstanceNumber', 1))
561
-
562
- raise ValueError("ImagePositionPatient not found in DICOM dataset.")
563
-
564
-
565
- def get_pixel_spacing(ds: pydicom.Dataset, slice_index: int) -> np.ndarray:
566
- """
567
- Get the pixel spacing from a DICOM dataset.
568
-
569
- Parameters:
570
- ds (pydicom.Dataset): The DICOM dataset containing image metadata.
571
- slice_index (int): Index of the slice in the 3D volume.
572
-
573
- Returns:
574
- numpy.ndarray: Pixel spacing (X, Y) for the specified slice.
575
- """
576
- # Get the Pixel Spacing attribute
577
- if 'PixelSpacing' in ds:
578
- return np.array(ds.PixelSpacing, dtype=np.float64)
579
-
580
- if 'PerFrameFunctionalGroupsSequence' in ds:
581
- if 'PixelMeasuresSequence' in ds.PerFrameFunctionalGroupsSequence[slice_index]:
582
- return ds.PerFrameFunctionalGroupsSequence[slice_index].PixelMeasuresSequence[0].PixelSpacing
583
-
584
- if 'SharedFunctionalGroupsSequence' in ds:
585
- if 'PixelMeasuresSequence' in ds.SharedFunctionalGroupsSequence[0]:
586
- return ds.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].PixelSpacing
587
-
588
- raise ValueError("PixelSpacing not found in DICOM dataset.")
589
-
590
-
591
- def pixel_to_patient(ds: pydicom.Dataset,
592
- pixel_x, pixel_y,
593
- slice_index: int | None = None,
594
- instance_number: int | None = None) -> np.ndarray:
595
- """
596
- Convert pixel coordinates (pixel_x, pixel_y) to patient coordinates in DICOM.
597
-
598
- Parameters:
599
- ds (pydicom.Dataset): The DICOM dataset containing image metadata.
600
- pixel_x (float): X coordinate in pixel space.
601
- pixel_y (float): Y coordinate in pixel space.
602
- slice_index (int): Index of the slice of the `ds.pixel_array`.
603
- instance_number (int): Instance number of the slice in the 3D volume.
604
-
605
-
606
- Returns:
607
- numpy.ndarray: Patient coordinates (X, Y, Z).
608
- """
609
-
610
- # - image_position is the origin of the image in patient coordinates (ImagePositionPatient)
611
- # - row_vector and col_vector are the direction cosines from ImageOrientationPatient
612
- # - pixel_spacing is the physical distance between the centers of adjacent pixels
613
-
614
- if slice_index is not None and instance_number is not None:
615
- raise ValueError("Either slice_index or instance_number should be provided, not both.")
616
-
617
- if slice_index is None:
618
- if instance_number is None:
619
- instance_number = _get_instance_number(ds)
620
- root_instance_number = ds.get('InstanceNumber', 1)
621
- if root_instance_number is None:
622
- root_instance_number = 1
623
- slice_index = instance_number - root_instance_number
624
-
625
- # Get required DICOM attributes
626
- image_position = np.array(get_image_position(ds, slice_index), dtype=np.float64)
627
- image_orientation = np.array(get_image_orientation(ds, slice_index), dtype=np.float64).reshape(2, 3)
628
- # image_position = np.array(ds.ImagePositionPatient, dtype=np.float64) # (0020,0032)
629
- # image_orientation = np.array(ds.ImageOrientationPatient, dtype=np.float64).reshape(2, 3) # (0020,0037)
630
- # pixel_spacing = np.array(ds.PixelSpacing, dtype=np.float64) # (0028,0030)
631
- pixel_spacing = np.array(get_pixel_spacing(ds, slice_index), dtype=np.float64) # (0028,0030)
632
-
633
- # Compute row and column vectors from image orientation
634
- row_vector = image_orientation[0]
635
- col_vector = image_orientation[1]
636
-
637
- # Compute patient coordinates
638
- patient_coords = image_position + pixel_x * pixel_spacing[0] * row_vector + pixel_y * pixel_spacing[1] * col_vector
639
-
640
- return patient_coords
641
-
642
-
643
- def determine_anatomical_plane(ds: pydicom.Dataset,
644
- slice_axis: int,
645
- alignment_threshold: float = 0.95) -> str:
646
- """
647
- Determine the anatomical plane of a DICOM slice (Axial, Sagittal, Coronal, Oblique, or Unknown).
648
-
649
- Args:
650
- ds (pydicom.Dataset): The DICOM dataset containing the image metadata.
651
- slice_axis (int): The axis of the slice to analyze (0, 1, or 2).
652
- alignment_threshold (float): Threshold for considering alignment with anatomical axes.
653
-
654
- Returns:
655
- str: The name of the anatomical plane ('Axial', 'Sagittal', 'Coronal', 'Oblique', or 'Unknown').
656
-
657
- Raises:
658
- ValueError: If `slice_index` is not 0, 1, or 2.
659
- """
660
-
661
- if slice_axis not in [0, 1, 2]:
662
- raise ValueError("slice_index must be 0, 1 or 2")
663
- # Check if Image Orientation Patient exists
664
- if not hasattr(ds, 'ImageOrientationPatient') or ds.ImageOrientationPatient is None:
665
- return "Unknown"
666
- # Get the Image Orientation Patient (IOP) - 6 values defining row and column directions
667
- iop = np.array(ds.ImageOrientationPatient, dtype=float)
668
- if len(iop) != 6:
669
- return "Unknown"
670
- # Extract row and column direction vectors
671
- row_dir = iop[:3] # First 3 values: row direction cosines
672
- col_dir = iop[3:] # Last 3 values: column direction cosines
673
- # Calculate the normal vector (slice direction) using cross product
674
- normal = np.cross(row_dir, col_dir)
675
- normal = normal / np.linalg.norm(normal) # Normalize
676
- # Define standard anatomical axes
677
- # LPS coordinate system: L = Left, P = Posterior, S = Superior
678
- axes = {
679
- 'sagittal': np.array([1, 0, 0]), # L-R axis (left-right)
680
- 'coronal': np.array([0, 1, 0]), # A-P axis (anterior-posterior)
681
- 'axial': np.array([0, 0, 1]) # S-I axis (superior-inferior)
682
- }
683
- # For each slice_index, determine which axis we're examining
684
- if slice_axis == 0:
685
- # ds.pixel_array[0,:,:] - slicing along first dimension
686
- # The normal vector corresponds to the direction we're slicing through
687
- examine_vector = normal
688
- elif slice_axis == 1:
689
- # ds.pixel_array[:,0,:] - slicing along second dimension
690
- # This corresponds to the row direction
691
- examine_vector = row_dir
692
- elif slice_axis == 2:
693
- # ds.pixel_array[:,:,0] - slicing along third dimension
694
- # This corresponds to the column direction
695
- examine_vector = col_dir
696
- # Find which anatomical axis is most aligned with our examine_vector
697
- max_dot = 0
698
- best_axis = "Unknown"
699
- for axis_name, axis_vector in axes.items():
700
- dot_product = abs(np.dot(examine_vector, axis_vector))
701
- if dot_product > max_dot:
702
- max_dot = dot_product
703
- best_axis = axis_name
704
- if max_dot >= alignment_threshold:
705
- return best_axis.capitalize()
706
- else:
707
- return "Oblique"
@@ -1,187 +0,0 @@
1
- import numpy as np
2
- import nibabel as nib
3
- from PIL import Image
4
- from .dicom_utils import load_image_normalized, is_dicom
5
- import pydicom
6
- import os
7
- from typing import Any
8
- import logging
9
- from PIL import ImageFile
10
- import cv2
11
-
12
- ImageFile.LOAD_TRUNCATED_IMAGES = True
13
-
14
- _LOGGER = logging.getLogger(__name__)
15
-
16
- IMAGE_EXTS = ('.png', '.jpg', '.jpeg')
17
- NII_EXTS = ('.nii', '.nii.gz')
18
- VIDEO_EXTS = ('.mp4', '.avi', '.mov', '.mkv')
19
-
20
-
21
- def read_video(file_path: str, index: int = None) -> np.ndarray:
22
- cap = cv2.VideoCapture(file_path)
23
- if not cap.isOpened():
24
- raise ValueError(f"Failed to open video file: {file_path}")
25
- try:
26
- if index is None:
27
- frames = []
28
- while True:
29
- ret, frame = cap.read()
30
- if not ret:
31
- break
32
- # Convert BGR to RGB and transpose to (C, H, W) format
33
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
34
- frame = frame.transpose(2, 0, 1)
35
- frames.append(frame)
36
- imgs = np.array(frames) # shape: (#frames, C, H, W)
37
- else:
38
- while index > 0:
39
- cap.grab()
40
- index -= 1
41
- ret, frame = cap.read()
42
- if not ret:
43
- raise ValueError(f"Failed to read frame {index} from video file: {file_path}")
44
- # Convert BGR to RGB and transpose to (C, H, W) format
45
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
46
- imgs = frame.transpose(2, 0, 1)
47
- finally:
48
- cap.release()
49
-
50
- if imgs is None or len(imgs) == 0:
51
- raise ValueError(f"No frames found in video file: {file_path}")
52
-
53
- return imgs
54
-
55
-
56
- def read_nifti(file_path: str, mimetype: str | None = None) -> np.ndarray:
57
- """
58
- Read a NIfTI file and return the image data in standardized format.
59
-
60
- Args:
61
- file_path: Path to the NIfTI file (.nii or .nii.gz)
62
- mimetype: Optional MIME type of the file. If provided, it can help in determining how to read the file.
63
-
64
- Returns:
65
- np.ndarray: Image data with shape (#frames, C, H, W)
66
- """
67
- from nibabel.filebasedimages import ImageFileError
68
- try:
69
- imgs = nib.load(file_path).get_fdata() # shape: (W, H, #frame) or (W, H)
70
- except ImageFileError as e:
71
- if mimetype is None:
72
- raise e
73
- # has_ext = os.path.splitext(file_path)[1] != ''
74
- if mimetype == 'application/gzip':
75
- with gzip.open(file_path, 'rb') as f:
76
- imgs = nib.Nifti1Image.from_stream(f).get_fdata()
77
- elif mimetype in ('image/x.nifti', 'application/x-nifti'):
78
- with open(file_path, 'rb') as f:
79
- imgs = nib.Nifti1Image.from_stream(f).get_fdata()
80
- else:
81
- raise e
82
- if imgs.ndim == 2:
83
- imgs = imgs.transpose(1, 0)
84
- imgs = imgs[np.newaxis, np.newaxis]
85
- elif imgs.ndim == 3:
86
- imgs = imgs.transpose(2, 1, 0)
87
- imgs = imgs[:, np.newaxis]
88
- else:
89
- raise ValueError(f"Unsupported number of dimensions in '{file_path}': {imgs.ndim}")
90
-
91
- return imgs
92
-
93
-
94
- def read_image(file_path: str) -> np.ndarray:
95
- with Image.open(file_path) as pilimg:
96
- imgs = np.array(pilimg)
97
- if imgs.ndim == 2: # (H, W)
98
- imgs = imgs[np.newaxis, np.newaxis]
99
- elif imgs.ndim == 3: # (H, W, C)
100
- imgs = imgs.transpose(2, 0, 1)[np.newaxis] # (H, W, C) -> (1, C, H, W)
101
-
102
- return imgs
103
-
104
-
105
- def read_array_normalized(file_path: str,
106
- index: int | None = None,
107
- return_metainfo: bool = False,
108
- use_magic=False) -> np.ndarray | tuple[np.ndarray, Any]:
109
- """
110
- Read an array from a file.
111
-
112
- Args:
113
- file_path: The path to the file.
114
- index: If specified, read only the frame at this index (0-based).
115
- If None, read all frames.
116
- Supported file formats are NIfTI (.nii, .nii.gz), PNG (.png), JPEG (.jpg, .jpeg) and npy (.npy).
117
-
118
- Returns:
119
- The array read from the file in shape (#frames, C, H, W), if `index=None`,
120
- or (C, H, W) if `index` is specified.
121
- """
122
- if not os.path.exists(file_path):
123
- raise FileNotFoundError(f"File not found: {file_path}")
124
-
125
- metainfo = None
126
-
127
- try:
128
- if is_dicom(file_path):
129
- ds = pydicom.dcmread(file_path)
130
- if index is not None:
131
- imgs = load_image_normalized(ds, index=index)[0]
132
- else:
133
- imgs = load_image_normalized(ds)
134
- # Free up memory
135
- if hasattr(ds, '_pixel_array'):
136
- ds._pixel_array = None
137
- if hasattr(ds, 'PixelData'):
138
- ds.PixelData = None
139
- metainfo = ds
140
- else:
141
- if use_magic:
142
- import magic # it is important to import here because magic has an OS lib dependency.
143
- mime_type = magic.from_file(file_path, mime=True)
144
- else:
145
- mime_type = ""
146
-
147
- if mime_type.startswith('video/') or file_path.endswith(VIDEO_EXTS):
148
- imgs = read_video(file_path, index)
149
- else:
150
- if mime_type in ('image/x.nifti', 'application/x-nifti') or mime_type == 'application/gzip' or file_path.endswith(NII_EXTS):
151
- imgs = read_nifti(file_path, mimetype=mime_type)
152
- # For NIfTI files, try to load associated JSON metadata
153
- if return_metainfo:
154
- json_path = file_path.replace('.nii.gz', '.json').replace('.nii', '.json')
155
- if os.path.exists(json_path):
156
- try:
157
- import json
158
- with open(json_path, 'r') as f:
159
- metainfo = json.load(f)
160
- _LOGGER.debug(f"Loaded JSON metadata from {json_path}")
161
- except Exception as e:
162
- _LOGGER.warning(f"Failed to load JSON metadata from {json_path}: {e}")
163
- metainfo = None
164
- elif mime_type.startswith('image/') or file_path.endswith(IMAGE_EXTS):
165
- imgs = read_image(file_path)
166
- elif file_path.endswith('.npy') or mime_type == 'application/x-numpy-data':
167
- imgs = np.load(file_path)
168
- if imgs.ndim != 4:
169
- raise ValueError(f"Unsupported number of dimensions in '{file_path}': {imgs.ndim}")
170
- else:
171
- raise ValueError(f"Unsupported file format '{mime_type}' of '{file_path}'")
172
-
173
- if index is not None:
174
- if len(imgs) > 1:
175
- _LOGGER.warning(f"It is inefficient to load all frames from '{file_path}' to access a single frame." +
176
- " Consider converting the file to a format that supports random access (DICOM), or" +
177
- " convert to png/jpeg files or" +
178
- " manually handle all frames at once instead of loading a specific frame.")
179
- imgs = imgs[index]
180
-
181
- if return_metainfo:
182
- return imgs, metainfo
183
- return imgs
184
-
185
- except Exception as e:
186
- _LOGGER.error(f"Failed to read array from '{file_path}': {e}")
187
- raise e