datamint 1.6.0__py3-none-any.whl → 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of datamint might be problematic. Click here for more details.
- datamint/apihandler/annotation_api_handler.py +14 -10
- datamint/apihandler/dto/annotation_dto.py +1 -1
- datamint/apihandler/root_api_handler.py +4 -2
- datamint/client_cmd_tools/datamint_upload.py +101 -42
- datamint/dataset/base_dataset.py +2 -2
- {datamint-1.6.0.dist-info → datamint-1.6.2.dist-info}/METADATA +2 -1
- {datamint-1.6.0.dist-info → datamint-1.6.2.dist-info}/RECORD +9 -11
- datamint/utils/dicom_utils.py +0 -707
- datamint/utils/io_utils.py +0 -187
- {datamint-1.6.0.dist-info → datamint-1.6.2.dist-info}/WHEEL +0 -0
- {datamint-1.6.0.dist-info → datamint-1.6.2.dist-info}/entry_points.txt +0 -0
|
@@ -10,7 +10,6 @@ import os
|
|
|
10
10
|
import asyncio
|
|
11
11
|
import aiohttp
|
|
12
12
|
from requests.exceptions import HTTPError
|
|
13
|
-
from deprecated.sphinx import deprecated
|
|
14
13
|
from .dto.annotation_dto import CreateAnnotationDto, LineGeometry, BoxGeometry, CoordinateSystem, AnnotationType
|
|
15
14
|
import pydicom
|
|
16
15
|
import json
|
|
@@ -237,7 +236,7 @@ class AnnotationAPIHandler(BaseAPIHandler):
|
|
|
237
236
|
async def _upload_volume_segmentation_async(self,
|
|
238
237
|
resource_id: str,
|
|
239
238
|
file_path: str | np.ndarray,
|
|
240
|
-
name: dict[int, str] | dict[tuple, str],
|
|
239
|
+
name: str | dict[int, str] | dict[tuple, str] | None,
|
|
241
240
|
imported_from: Optional[str] = None,
|
|
242
241
|
author_email: Optional[str] = None,
|
|
243
242
|
worklist_id: Optional[str] = None,
|
|
@@ -263,6 +262,13 @@ class AnnotationAPIHandler(BaseAPIHandler):
|
|
|
263
262
|
Raises:
|
|
264
263
|
ValueError: If name is not a string or file format is unsupported for volume upload.
|
|
265
264
|
"""
|
|
265
|
+
|
|
266
|
+
if isinstance(name, str):
|
|
267
|
+
raise NotImplementedError("`name=string` is not supported yet for volume segmentation.")
|
|
268
|
+
if isinstance(name, dict):
|
|
269
|
+
if any(isinstance(k, tuple) for k in name.keys()):
|
|
270
|
+
raise NotImplementedError("For volume segmentations, `name` must be a dictionary with integer keys only.")
|
|
271
|
+
|
|
266
272
|
# Prepare file for upload
|
|
267
273
|
if isinstance(file_path, str):
|
|
268
274
|
if file_path.endswith('.nii') or file_path.endswith('.nii.gz'):
|
|
@@ -275,7 +281,8 @@ class AnnotationAPIHandler(BaseAPIHandler):
|
|
|
275
281
|
form.add_field('model_id', model_id) # Add model_id if provided
|
|
276
282
|
if worklist_id is not None:
|
|
277
283
|
form.add_field('annotation_worklist_id', worklist_id)
|
|
278
|
-
|
|
284
|
+
if name is not None:
|
|
285
|
+
form.add_field('segmentation_map', json.dumps(name), content_type='application/json')
|
|
279
286
|
|
|
280
287
|
request_params = dict(
|
|
281
288
|
method='POST',
|
|
@@ -449,30 +456,27 @@ class AnnotationAPIHandler(BaseAPIHandler):
|
|
|
449
456
|
if isinstance(file_path, str) and not os.path.exists(file_path):
|
|
450
457
|
raise FileNotFoundError(f"File {file_path} not found.")
|
|
451
458
|
|
|
452
|
-
name = AnnotationAPIHandler.standardize_segmentation_names(name)
|
|
453
|
-
|
|
454
459
|
# Handle NIfTI files specially - upload as single volume
|
|
455
460
|
if isinstance(file_path, str) and (file_path.endswith('.nii') or file_path.endswith('.nii.gz')):
|
|
456
461
|
_LOGGER.info(f"Uploading NIfTI segmentation file: {file_path}")
|
|
457
462
|
if frame_index is not None:
|
|
458
463
|
raise ValueError("Do not provide frame_index for NIfTI segmentations.")
|
|
459
464
|
loop = asyncio.get_event_loop()
|
|
460
|
-
task = self.
|
|
465
|
+
task = self._upload_volume_segmentation_async(
|
|
461
466
|
resource_id=resource_id,
|
|
462
|
-
frame_index=None,
|
|
463
467
|
file_path=file_path,
|
|
464
468
|
name=name,
|
|
465
469
|
imported_from=imported_from,
|
|
466
470
|
author_email=author_email,
|
|
467
|
-
discard_empty_segmentations=False,
|
|
468
471
|
worklist_id=worklist_id,
|
|
469
472
|
model_id=model_id,
|
|
470
|
-
transpose_segmentation=transpose_segmentation
|
|
471
|
-
upload_volume=True
|
|
473
|
+
transpose_segmentation=transpose_segmentation
|
|
472
474
|
)
|
|
473
475
|
return loop.run_until_complete(task)
|
|
474
476
|
# All other file types are converted to multiple PNGs and uploaded frame by frame.
|
|
475
477
|
|
|
478
|
+
name = AnnotationAPIHandler.standardize_segmentation_names(name)
|
|
479
|
+
|
|
476
480
|
to_run = []
|
|
477
481
|
# Generate IOs for the segmentations.
|
|
478
482
|
nframes, fios = AnnotationAPIHandler._generate_segmentations_ios(file_path,
|
|
@@ -6,8 +6,8 @@ from requests.exceptions import HTTPError
|
|
|
6
6
|
import logging
|
|
7
7
|
import asyncio
|
|
8
8
|
import aiohttp
|
|
9
|
-
from
|
|
10
|
-
from
|
|
9
|
+
from medimgkit.dicom_utils import anonymize_dicom, to_bytesio, is_dicom
|
|
10
|
+
from medimgkit import dicom_utils
|
|
11
11
|
import pydicom
|
|
12
12
|
from pathlib import Path
|
|
13
13
|
from datetime import date
|
|
@@ -447,6 +447,8 @@ class RootAPIHandler(BaseAPIHandler):
|
|
|
447
447
|
for segfiles in segmentation_files]
|
|
448
448
|
|
|
449
449
|
for segfiles in segmentation_files:
|
|
450
|
+
if segfiles is None:
|
|
451
|
+
continue
|
|
450
452
|
if 'files' not in segfiles:
|
|
451
453
|
raise ValueError("segmentation_files must contain a 'files' key with a list of file paths.")
|
|
452
454
|
if 'names' in segfiles:
|
|
@@ -5,7 +5,7 @@ from humanize import naturalsize
|
|
|
5
5
|
import logging
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import sys
|
|
8
|
-
from
|
|
8
|
+
from medimgkit.dicom_utils import is_dicom
|
|
9
9
|
import fnmatch
|
|
10
10
|
from typing import Generator, Optional, Any
|
|
11
11
|
from collections import defaultdict
|
|
@@ -15,6 +15,7 @@ from datamint.client_cmd_tools.datamint_config import ask_api_key
|
|
|
15
15
|
from datamint.utils.logging_utils import load_cmdline_logging_config
|
|
16
16
|
import yaml
|
|
17
17
|
from collections.abc import Iterable
|
|
18
|
+
import pandas as pd
|
|
18
19
|
|
|
19
20
|
# Create two loggings: one for the user and one for the developer
|
|
20
21
|
_LOGGER = logging.getLogger(__name__)
|
|
@@ -23,6 +24,38 @@ _USER_LOGGER = logging.getLogger('user_logger')
|
|
|
23
24
|
MAX_RECURSION_LIMIT = 1000
|
|
24
25
|
|
|
25
26
|
|
|
27
|
+
def _read_segmentation_names(segmentation_names_path: str | Path) -> dict:
|
|
28
|
+
"""
|
|
29
|
+
Read a segmentation names file (yaml or csv) and return its content as a dictionary.
|
|
30
|
+
If the file is a YAML file, it should contain two keys: "segmentation_names" and "class_names".
|
|
31
|
+
If the file is a CSV file, it should contain the following columns:
|
|
32
|
+
index, r, g, b, ..., name
|
|
33
|
+
"""
|
|
34
|
+
segmentation_names_path = Path(segmentation_names_path)
|
|
35
|
+
if segmentation_names_path.suffix in ['.yaml', '.yml']:
|
|
36
|
+
with open(segmentation_names_path, 'r') as f:
|
|
37
|
+
metadata = yaml.safe_load(f)
|
|
38
|
+
elif segmentation_names_path.suffix in ['.csv', '.tsv']:
|
|
39
|
+
df = pd.read_csv(segmentation_names_path,
|
|
40
|
+
header=None,
|
|
41
|
+
index_col=0,
|
|
42
|
+
sep=None, # use sep=None to automatically detect the separator
|
|
43
|
+
engine='python'
|
|
44
|
+
)
|
|
45
|
+
df = df.rename(columns={1: 'r', 2: 'g', 3: 'b', df.columns[-1]: 'name'})
|
|
46
|
+
# df = df.set_index(['r', 'g', 'b'])
|
|
47
|
+
metadata = {'class_names': df['name'].to_dict()}
|
|
48
|
+
else:
|
|
49
|
+
raise ValueError(f"Unsupported file format: {segmentation_names_path.suffix}")
|
|
50
|
+
|
|
51
|
+
if 'segmentation_names' in metadata:
|
|
52
|
+
segnames = sorted(metadata['segmentation_names'],
|
|
53
|
+
key=lambda x: len(x))
|
|
54
|
+
metadata['segmentation_names'] = segnames
|
|
55
|
+
|
|
56
|
+
return metadata
|
|
57
|
+
|
|
58
|
+
|
|
26
59
|
def _is_valid_path_argparse(x):
|
|
27
60
|
"""
|
|
28
61
|
argparse type that checks if the path exists
|
|
@@ -101,7 +134,6 @@ def walk_to_depth(path: str | Path,
|
|
|
101
134
|
continue
|
|
102
135
|
yield from walk_to_depth(child, depth-1, exclude_pattern)
|
|
103
136
|
else:
|
|
104
|
-
_LOGGER.debug(f"yielding {child} from {path}")
|
|
105
137
|
yield child
|
|
106
138
|
|
|
107
139
|
|
|
@@ -157,31 +189,32 @@ def handle_api_key() -> str | None:
|
|
|
157
189
|
|
|
158
190
|
def _find_segmentation_files(segmentation_root_path: str,
|
|
159
191
|
images_files: list[str],
|
|
160
|
-
segmentation_metainfo: dict = None
|
|
161
|
-
) ->
|
|
192
|
+
segmentation_metainfo: dict | None = None
|
|
193
|
+
) -> list[dict]:
|
|
162
194
|
"""
|
|
163
195
|
Find the segmentation files that match the images files based on the same folder structure
|
|
164
196
|
"""
|
|
165
197
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
if len(images_files) == 1 and os.path.isfile(images_files[0]) and os.path.isfile(segmentation_root_path):
|
|
170
|
-
return [{'files': [segmentation_root_path]}]
|
|
171
|
-
|
|
172
|
-
segmentation_files = []
|
|
173
|
-
acceptable_extensions = ['.nii.gz', '.nii', '.png']
|
|
174
|
-
|
|
198
|
+
segnames = None
|
|
199
|
+
classnames = None
|
|
175
200
|
if segmentation_metainfo is not None:
|
|
176
201
|
if 'segmentation_names' in segmentation_metainfo:
|
|
177
202
|
segnames = sorted(segmentation_metainfo['segmentation_names'],
|
|
178
203
|
key=lambda x: len(x))
|
|
179
|
-
else:
|
|
180
|
-
segnames = None
|
|
181
204
|
classnames = segmentation_metainfo.get('class_names', None)
|
|
182
205
|
if classnames is not None:
|
|
183
206
|
_LOGGER.debug(f"Number of class names: {len(classnames)}")
|
|
184
207
|
|
|
208
|
+
if len(images_files) == 1 and os.path.isfile(images_files[0]) and os.path.isfile(segmentation_root_path):
|
|
209
|
+
ret = [{'files': [segmentation_root_path]}]
|
|
210
|
+
if classnames is not None:
|
|
211
|
+
ret[0]['names'] = classnames
|
|
212
|
+
_LOGGER.debug(f"Returning segmentation files: {ret}")
|
|
213
|
+
return ret
|
|
214
|
+
|
|
215
|
+
segmentation_files = []
|
|
216
|
+
acceptable_extensions = ['.nii.gz', '.nii', '.png']
|
|
217
|
+
|
|
185
218
|
segmentation_root_path = Path(segmentation_root_path).absolute()
|
|
186
219
|
|
|
187
220
|
for imgpath in images_files:
|
|
@@ -197,7 +230,6 @@ def _find_segmentation_files(segmentation_root_path: str,
|
|
|
197
230
|
else:
|
|
198
231
|
common_parent = Path(*common_parent)
|
|
199
232
|
|
|
200
|
-
_LOGGER.debug(f"_find_segmentation_files::common_parent: {common_parent}")
|
|
201
233
|
path_structure = imgpath_parent.relative_to(common_parent).parts[1:]
|
|
202
234
|
|
|
203
235
|
# path_structure = imgpath_parent.relative_to(root_path).parts[1:]
|
|
@@ -230,24 +262,47 @@ def _find_segmentation_files(segmentation_root_path: str,
|
|
|
230
262
|
if len(frame_indices) > 0:
|
|
231
263
|
seginfo['frame_index'] = frame_indices
|
|
232
264
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
265
|
+
snames_associated = []
|
|
266
|
+
for segfile in seg_files:
|
|
267
|
+
# check if there is a metadata file associated, besides json, with the segmentation
|
|
268
|
+
for ext in ['.yaml', '.yml', '.csv']:
|
|
269
|
+
if str(segfile).endswith('nii.gz'):
|
|
270
|
+
# has two extensions, so we need to remove both
|
|
271
|
+
metadata_file = segfile.with_suffix('').with_suffix(ext)
|
|
272
|
+
if not metadata_file.exists():
|
|
273
|
+
metadata_file = segfile.with_suffix(ext)
|
|
274
|
+
else:
|
|
275
|
+
metadata_file = segfile.with_suffix(ext)
|
|
276
|
+
if metadata_file.exists():
|
|
277
|
+
_LOGGER.debug(f"Found metadata file: {metadata_file}")
|
|
278
|
+
try:
|
|
279
|
+
new_segmentation_metainfo = _read_segmentation_names(metadata_file)
|
|
280
|
+
cur_segnames = new_segmentation_metainfo.get('segmentation_names', segnames)
|
|
281
|
+
cur_classnames = new_segmentation_metainfo.get('class_names', classnames)
|
|
282
|
+
break
|
|
283
|
+
except Exception as e:
|
|
284
|
+
_LOGGER.warning(f"Error reading metadata file {metadata_file}: {e}")
|
|
285
|
+
else:
|
|
286
|
+
cur_segnames = segnames
|
|
287
|
+
cur_classnames = classnames
|
|
288
|
+
|
|
289
|
+
if cur_segnames is None:
|
|
290
|
+
_LOGGER.debug(f'adding {cur_classnames}')
|
|
291
|
+
snames_associated.append(cur_classnames)
|
|
292
|
+
else:
|
|
293
|
+
for segname in cur_segnames:
|
|
294
|
+
if segname in str(segfile):
|
|
295
|
+
if cur_classnames is not None:
|
|
296
|
+
new_segname = {cid: f'{segname}_{cname}' for cid, cname in cur_classnames.items()}
|
|
297
|
+
new_segname.update({'default': segname})
|
|
298
|
+
else:
|
|
299
|
+
new_segname = segname
|
|
300
|
+
snames_associated.append(new_segname)
|
|
301
|
+
break
|
|
238
302
|
else:
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
new_segname = {cid: f'{segname}_{cname}' for cid, cname in classnames.items()}
|
|
243
|
-
new_segname.update({'default': segname})
|
|
244
|
-
else:
|
|
245
|
-
new_segname = segname
|
|
246
|
-
snames_associated.append(new_segname)
|
|
247
|
-
break
|
|
248
|
-
else:
|
|
249
|
-
_USER_LOGGER.warning(f"Segmentation file {segname} does not match any segmentation name.")
|
|
250
|
-
snames_associated.append(None)
|
|
303
|
+
_USER_LOGGER.warning(f"Segmentation file {segfile} does not match any segmentation name.")
|
|
304
|
+
snames_associated.append(None)
|
|
305
|
+
if len(snames_associated) > 0:
|
|
251
306
|
seginfo['names'] = snames_associated
|
|
252
307
|
|
|
253
308
|
segmentation_files.append(seginfo)
|
|
@@ -268,7 +323,7 @@ def _find_json_metadata(file_path: str | Path) -> Optional[str]:
|
|
|
268
323
|
Optional[str]: Path to the JSON metadata file if found, None otherwise
|
|
269
324
|
"""
|
|
270
325
|
file_path = Path(file_path)
|
|
271
|
-
|
|
326
|
+
|
|
272
327
|
# Handle .nii.gz files specially - need to remove both extensions
|
|
273
328
|
if file_path.name.endswith('.nii.gz'):
|
|
274
329
|
base_name = file_path.name[:-7] # Remove .nii.gz
|
|
@@ -320,7 +375,7 @@ def _collect_metadata_files(files_path: list[str], auto_detect_json: bool) -> tu
|
|
|
320
375
|
if used_json_files:
|
|
321
376
|
_LOGGER.debug(f"Filtering out {len(used_json_files)} JSON metadata files from main upload list")
|
|
322
377
|
filtered_metadata_files = []
|
|
323
|
-
|
|
378
|
+
|
|
324
379
|
for original_file in files_path:
|
|
325
380
|
if original_file not in used_json_files:
|
|
326
381
|
original_index = files_path.index(original_file)
|
|
@@ -376,8 +431,10 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
|
|
|
376
431
|
help='Path to the segmentation file(s) or a directory')
|
|
377
432
|
parser.add_argument('--segmentation_names', type=_is_valid_path_argparse, metavar="FILE",
|
|
378
433
|
required=False,
|
|
379
|
-
help='Path to a yaml file containing the segmentation names.' +
|
|
380
|
-
'
|
|
434
|
+
help='Path to a yaml or csv file containing the segmentation names.' +
|
|
435
|
+
' If yaml, the file may contain two keys: "segmentation_names" and "class_names".'
|
|
436
|
+
' If csv, the file should contain the following columns:'
|
|
437
|
+
' index, r, g, b, ..., name')
|
|
381
438
|
parser.add_argument('--yes', action='store_true',
|
|
382
439
|
help='Automatically answer yes to all prompts')
|
|
383
440
|
parser.add_argument('--transpose-segmentation', action='store_true', default=False,
|
|
@@ -446,15 +503,17 @@ def _parse_args() -> tuple[Any, list[str], Optional[list[dict]], Optional[list[s
|
|
|
446
503
|
raise ValueError(f"No valid non-metadata files found in {args.path}")
|
|
447
504
|
|
|
448
505
|
if args.segmentation_names is not None:
|
|
449
|
-
|
|
450
|
-
segmentation_names = yaml.safe_load(f)
|
|
506
|
+
segmentation_names = _read_segmentation_names(args.segmentation_names)
|
|
451
507
|
else:
|
|
452
508
|
segmentation_names = None
|
|
453
509
|
|
|
454
510
|
_LOGGER.debug(f'finding segmentations at {args.segmentation_path}')
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
511
|
+
if args.segmentation_path is None:
|
|
512
|
+
segmentation_files = None
|
|
513
|
+
else:
|
|
514
|
+
segmentation_files = _find_segmentation_files(args.segmentation_path,
|
|
515
|
+
file_path,
|
|
516
|
+
segmentation_metainfo=segmentation_names)
|
|
458
517
|
|
|
459
518
|
_LOGGER.info(f"args parsed: {args}")
|
|
460
519
|
|
datamint/dataset/base_dataset.py
CHANGED
|
@@ -14,9 +14,9 @@ from torch.utils.data import DataLoader
|
|
|
14
14
|
import torch
|
|
15
15
|
from torch import Tensor
|
|
16
16
|
from datamint.apihandler.base_api_handler import DatamintException
|
|
17
|
-
from
|
|
17
|
+
from medimgkit.dicom_utils import is_dicom
|
|
18
18
|
import cv2
|
|
19
|
-
from
|
|
19
|
+
from medimgkit.io_utils import read_array_normalized
|
|
20
20
|
from datetime import datetime
|
|
21
21
|
|
|
22
22
|
_LOGGER = logging.getLogger(__name__)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: datamint
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.2
|
|
4
4
|
Summary: A library for interacting with the Datamint API, designed for efficient data management, processing and Deep Learning workflows.
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -19,6 +19,7 @@ Requires-Dist: humanize (>=4.0.0,<5.0.0)
|
|
|
19
19
|
Requires-Dist: lazy-loader (>=0.3.0)
|
|
20
20
|
Requires-Dist: lightning
|
|
21
21
|
Requires-Dist: matplotlib
|
|
22
|
+
Requires-Dist: medimgkit
|
|
22
23
|
Requires-Dist: nest-asyncio (>=1.0.0,<2.0.0)
|
|
23
24
|
Requires-Dist: nibabel (>=4.0.0)
|
|
24
25
|
Requires-Dist: numpy
|
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
datamint/__init__.py,sha256=7rKCCsaa4RBRTIfuHB708rai1xwDHLtkFNFJGKYG5D4,757
|
|
2
|
-
datamint/apihandler/annotation_api_handler.py,sha256=
|
|
2
|
+
datamint/apihandler/annotation_api_handler.py,sha256=ChwaSYjoOAVS7vuyP3-cfpDHaHwk_wXLf8QQaSU_oSM,51893
|
|
3
3
|
datamint/apihandler/api_handler.py,sha256=cdVSddrFCKlF_BJ81LO1aJ0OP49rssjpNEFzJ6Q7YyY,384
|
|
4
4
|
datamint/apihandler/base_api_handler.py,sha256=XSxZEQEkbQpuixGDu_P9jbxUQht3Z3JgxaeiFKPkVDM,11690
|
|
5
|
-
datamint/apihandler/dto/annotation_dto.py,sha256=
|
|
5
|
+
datamint/apihandler/dto/annotation_dto.py,sha256=qId1RK1VO7dXrvGJ7dqJ31jBQB7Z8yy5x0tLSiMxTB4,7105
|
|
6
6
|
datamint/apihandler/exp_api_handler.py,sha256=hFUgUgBc5rL7odK7gTW3MnrvMY1pVfJUpUdzRNobMQE,6226
|
|
7
|
-
datamint/apihandler/root_api_handler.py,sha256=
|
|
7
|
+
datamint/apihandler/root_api_handler.py,sha256=O8Gn1Gp3w7AYeuT_FbwH413o6P_eAYLoRiW0baGY_b4,51795
|
|
8
8
|
datamint/client_cmd_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
9
|
datamint/client_cmd_tools/datamint_config.py,sha256=md7dnWrbl10lPtXKbmD9yo6onLJsajeG8Vz0ZWH1v4M,8181
|
|
10
|
-
datamint/client_cmd_tools/datamint_upload.py,sha256=
|
|
10
|
+
datamint/client_cmd_tools/datamint_upload.py,sha256=Q_C3HX9EFCdtIqmWY28qcWJmfUZfR1ml_pj0omnrkXw,29396
|
|
11
11
|
datamint/configs.py,sha256=Bdp6NydYwyCJ2dk19_gf_o3M2ZyQOmMHpLi8wEWNHUk,1426
|
|
12
12
|
datamint/dataset/__init__.py,sha256=4PlUKSvVhdfQvvuq8jQXrkdqnot-iTTizM3aM1vgSwg,47
|
|
13
|
-
datamint/dataset/base_dataset.py,sha256=
|
|
13
|
+
datamint/dataset/base_dataset.py,sha256=bSMuNHUzU7heN0awGemTn3e2zPLhuCsh-qSs_Qt6i9w,39082
|
|
14
14
|
datamint/dataset/dataset.py,sha256=AwS92t5kdmpm9NKFfXFmDmZxEbbPfb_FOMn-FWfu3bE,26590
|
|
15
15
|
datamint/examples/__init__.py,sha256=zcYnd5nLVme9GCTPYH-1JpGo8xXK2WEYvhzcy_2alZc,39
|
|
16
16
|
datamint/examples/example_projects.py,sha256=7Nb_EaIdzJTQa9zopqc-WhTBQWQJSoQZ_KjRS4PB4FI,2931
|
|
@@ -18,12 +18,10 @@ datamint/experiment/__init__.py,sha256=5qQOMzoG17DEd1YnTF-vS0qiM-DGdbNh42EUo91CR
|
|
|
18
18
|
datamint/experiment/_patcher.py,sha256=ZgbezoevAYhJsbiJTvWPALGTcUiMT371xddcTllt3H4,23296
|
|
19
19
|
datamint/experiment/experiment.py,sha256=aHK9dRFdQTi569xgUg1KqlCZLHZpDmSH3g3ndPIZvXw,44546
|
|
20
20
|
datamint/logging.yaml,sha256=a5dsATpul7QHeUHB2TjABFjWaPXBMbO--dgn8GlRqwk,483
|
|
21
|
-
datamint/utils/dicom_utils.py,sha256=sLukP6MB_acx7t868O2HDd_RDEILa97mEe_V9m1EMCY,28991
|
|
22
|
-
datamint/utils/io_utils.py,sha256=lKnUCJEip7W9Xj9wOWsTAA855HnKbjwQON1WjMGqJmM,7374
|
|
23
21
|
datamint/utils/logging_utils.py,sha256=DvoA35ATYG3JTwfXEXYawDyKRfHeCrH0a9czfkmz8kM,1851
|
|
24
22
|
datamint/utils/torchmetrics.py,sha256=lwU0nOtsSWfebyp7dvjlAggaqXtj5ohSEUXOg3L0hJE,2837
|
|
25
23
|
datamint/utils/visualization.py,sha256=yaUVAOHar59VrGUjpAWv5eVvQSfztFG0eP9p5Vt3l-M,4470
|
|
26
|
-
datamint-1.6.
|
|
27
|
-
datamint-1.6.
|
|
28
|
-
datamint-1.6.
|
|
29
|
-
datamint-1.6.
|
|
24
|
+
datamint-1.6.2.dist-info/METADATA,sha256=Raq2vLoDKmDH63F1DF6IqwIdk5dJDNr6TIvfbYpWUz4,4090
|
|
25
|
+
datamint-1.6.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
26
|
+
datamint-1.6.2.dist-info/entry_points.txt,sha256=mn5H6jPjO-rY0W0CAZ6Z_KKWhMLvyVaSpoqk77jlTI4,145
|
|
27
|
+
datamint-1.6.2.dist-info/RECORD,,
|
datamint/utils/dicom_utils.py
DELETED
|
@@ -1,707 +0,0 @@
|
|
|
1
|
-
from pydicom.pixels import pixel_array
|
|
2
|
-
import pydicom
|
|
3
|
-
from pydicom.uid import generate_uid
|
|
4
|
-
from typing import Sequence, Generator, IO, TypeVar, Generic
|
|
5
|
-
import warnings
|
|
6
|
-
from copy import deepcopy
|
|
7
|
-
import logging
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from pydicom.misc import is_dicom as pydicom_is_dicom
|
|
10
|
-
from io import BytesIO
|
|
11
|
-
import os
|
|
12
|
-
import numpy as np
|
|
13
|
-
from collections import defaultdict
|
|
14
|
-
import uuid
|
|
15
|
-
import hashlib
|
|
16
|
-
from tqdm import tqdm
|
|
17
|
-
|
|
18
|
-
import pydicom.uid
|
|
19
|
-
|
|
20
|
-
_LOGGER = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
CLEARED_STR = "CLEARED_BY_DATAMINT"
|
|
23
|
-
|
|
24
|
-
T = TypeVar('T')
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class GeneratorWithLength(Generic[T]):
|
|
28
|
-
def __init__(self, generator: Generator[T, None, None], length: int):
|
|
29
|
-
self.generator = generator
|
|
30
|
-
self.length = length
|
|
31
|
-
|
|
32
|
-
def __len__(self):
|
|
33
|
-
return self.length
|
|
34
|
-
|
|
35
|
-
def __iter__(self):
|
|
36
|
-
return self.generator
|
|
37
|
-
|
|
38
|
-
def __next__(self) -> T:
|
|
39
|
-
return next(self.generator)
|
|
40
|
-
|
|
41
|
-
def close(self):
|
|
42
|
-
self.generator.close()
|
|
43
|
-
|
|
44
|
-
def throw(self, *args):
|
|
45
|
-
return self.generator.throw(*args)
|
|
46
|
-
|
|
47
|
-
def send(self, *args):
|
|
48
|
-
return self.generator.send(*args)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
class TokenMapper:
|
|
52
|
-
def __init__(self, seed: int = 42):
|
|
53
|
-
self.seed = seed
|
|
54
|
-
|
|
55
|
-
def get_token(self, tag: tuple, value: str, simple_id=False) -> str:
|
|
56
|
-
"""Get a consistent token for a given tag and value pair."""
|
|
57
|
-
if value is None or value == CLEARED_STR:
|
|
58
|
-
return CLEARED_STR
|
|
59
|
-
|
|
60
|
-
# Use a hash function to generate a consistent token
|
|
61
|
-
token = hashlib.md5(f"{tag}{value}{self.seed}".encode()).hexdigest()
|
|
62
|
-
if simple_id:
|
|
63
|
-
return token
|
|
64
|
-
return generate_uid(entropy_srcs=['DATAMINT', token])
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
_TOKEN_MAPPER = TokenMapper()
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
def anonymize_dicom(ds: pydicom.Dataset,
|
|
71
|
-
retain_codes: Sequence[tuple] = [],
|
|
72
|
-
copy=False,
|
|
73
|
-
token_mapper: TokenMapper = None) -> pydicom.Dataset:
|
|
74
|
-
"""
|
|
75
|
-
Anonymize a DICOM file by clearing all the specified DICOM tags
|
|
76
|
-
according to the DICOM standard https://www.dicomstandard.org/News-dir/ftsup/docs/sups/sup55.pdf.
|
|
77
|
-
This function will generate a new UID for the new DICOM file and clear the specified DICOM tags
|
|
78
|
-
with consistent tokens for related identifiers.
|
|
79
|
-
|
|
80
|
-
Args:
|
|
81
|
-
ds: pydicom Dataset object.
|
|
82
|
-
retain_codes: A list of DICOM tag codes to retain the value of.
|
|
83
|
-
copy: If True, the function will return a copy of the input Dataset object.
|
|
84
|
-
token_mapper: TokenMapper instance to maintain consistent tokens across calls.
|
|
85
|
-
If None, uses a global instance.
|
|
86
|
-
|
|
87
|
-
Returns:
|
|
88
|
-
pydicom Dataset object with specified DICOM tags cleared
|
|
89
|
-
"""
|
|
90
|
-
if copy:
|
|
91
|
-
ds = deepcopy(ds)
|
|
92
|
-
|
|
93
|
-
if token_mapper is None:
|
|
94
|
-
token_mapper = _TOKEN_MAPPER
|
|
95
|
-
|
|
96
|
-
# https://www.dicomstandard.org/News-dir/ftsup/docs/sups/sup55.pdf
|
|
97
|
-
tags_to_clear = [
|
|
98
|
-
(0x0008, 0x0014), (0x0008, 0x0050), (0x0008, 0x0080), (0x0008, 0x0081), (0x0008, 0x0090),
|
|
99
|
-
(0x0008, 0x0092), (0x0008, 0x0094), (0x0008, 0x1010), (0x0008, 0x1030), (0x0008, 0x103E),
|
|
100
|
-
(0x0008, 0x1040), (0x0008, 0x1048), (0x0008, 0x1050), (0x0008, 0x1060), (0x0008, 0x1070),
|
|
101
|
-
(0x0008, 0x1080), (0x0008, 0x1155), (0x0008, 0x2111), (0x0010, 0x0010), (0x0010, 0x0020),
|
|
102
|
-
(0x0010, 0x0030), (0x0010, 0x0032), (0x0010, 0x0040), (0x0010, 0x1000), (0x0010, 0x1001),
|
|
103
|
-
(0x0010, 0x1010), (0x0010, 0x1020), (0x0010, 0x1030), (0x0010, 0x1090), (0x0010, 0x2160),
|
|
104
|
-
(0x0010, 0x2180), (0x0010, 0x21B0), (0x0010, 0x4000), (0x0018, 0x1000), (0x0018, 0x1030),
|
|
105
|
-
(0x0020, 0x000D), (0x0020, 0x000E), # StudyInstanceUID and SeriesInstanceUID
|
|
106
|
-
(0x0020, 0x0010), (0x0020, 0x0052), (0x0020, 0x0200), (0x0020, 0x4000), (0x0008, 0x0018),
|
|
107
|
-
(0x0040, 0x0275), (0x0040, 0xA730), (0x0088, 0x0140), (0x3006, 0x0024), (0x3006, 0x00C2)
|
|
108
|
-
]
|
|
109
|
-
|
|
110
|
-
# Frame of Reference UID, Series Instance UID, Concatenation UID, and Instance UID, and StudyInstanceUID are converted to new UIDs
|
|
111
|
-
uid_tags = [(0x0020, 0x0052), (0x0020, 0x000E), (0x0020, 0x9161),
|
|
112
|
-
(0x0010, 0x0020), (0x0008, 0x0018), (0x0020, 0x000D)]
|
|
113
|
-
simple_id_tags = [(0x0010, 0x0020)] # Patient ID
|
|
114
|
-
|
|
115
|
-
for code in retain_codes:
|
|
116
|
-
if code in tags_to_clear:
|
|
117
|
-
tags_to_clear.remove(code)
|
|
118
|
-
|
|
119
|
-
# Clear the specified DICOM tags
|
|
120
|
-
with warnings.catch_warnings(): # Supress UserWarning from pydicom
|
|
121
|
-
warnings.filterwarnings("ignore", category=UserWarning, module='pydicom')
|
|
122
|
-
for tag in tags_to_clear:
|
|
123
|
-
if tag in ds:
|
|
124
|
-
if tag == (0x0008, 0x0094): # Phone number
|
|
125
|
-
ds[tag].value = "000-000-0000"
|
|
126
|
-
# If tag is a floating point number, set it to 0.0
|
|
127
|
-
elif ds[tag].VR in ['FL', 'FD', 'DS']:
|
|
128
|
-
ds[tag].value = 0
|
|
129
|
-
elif ds[tag].VR == 'SQ':
|
|
130
|
-
del ds[tag]
|
|
131
|
-
else:
|
|
132
|
-
if tag in uid_tags:
|
|
133
|
-
try:
|
|
134
|
-
# Use consistent token mapping for identifiers
|
|
135
|
-
original_value = ds[tag].value
|
|
136
|
-
ds[tag].value = token_mapper.get_token(tag, original_value, simple_id=tag in simple_id_tags)
|
|
137
|
-
tag_name = pydicom.datadict.keyword_for_tag(tag)
|
|
138
|
-
except ValueError as e:
|
|
139
|
-
ds[tag].value = CLEARED_STR
|
|
140
|
-
else:
|
|
141
|
-
ds[tag].value = CLEARED_STR
|
|
142
|
-
if hasattr(ds, 'file_meta') and hasattr(ds, 'SOPInstanceUID'):
|
|
143
|
-
ds.file_meta.MediaStorageSOPInstanceUID = ds.SOPInstanceUID
|
|
144
|
-
return ds
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
def is_dicom(f: str | Path | BytesIO) -> bool:
|
|
148
|
-
if isinstance(f, BytesIO):
|
|
149
|
-
fp = BytesIO(f.getbuffer()) # Avoid modifying the original BytesIO object
|
|
150
|
-
fp.read(128) # preamble
|
|
151
|
-
|
|
152
|
-
return fp.read(4) == b"DICM"
|
|
153
|
-
|
|
154
|
-
if isinstance(f, Path):
|
|
155
|
-
f = str(f)
|
|
156
|
-
if os.path.isdir(f):
|
|
157
|
-
return False
|
|
158
|
-
|
|
159
|
-
fname = f.lower()
|
|
160
|
-
if fname.endswith('.dcm') or fname.endswith('.dicom'):
|
|
161
|
-
return True
|
|
162
|
-
|
|
163
|
-
# Check if the file has an extension
|
|
164
|
-
if os.path.splitext(f)[1] != '':
|
|
165
|
-
return False
|
|
166
|
-
|
|
167
|
-
try:
|
|
168
|
-
return pydicom_is_dicom(f)
|
|
169
|
-
except FileNotFoundError as e:
|
|
170
|
-
return None
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
def to_bytesio(ds: pydicom.Dataset, name: str) -> BytesIO:
|
|
174
|
-
"""
|
|
175
|
-
Convert a pydicom Dataset object to BytesIO object.
|
|
176
|
-
"""
|
|
177
|
-
dicom_bytes = BytesIO()
|
|
178
|
-
pydicom.dcmwrite(dicom_bytes, ds)
|
|
179
|
-
dicom_bytes.seek(0)
|
|
180
|
-
dicom_bytes.name = name
|
|
181
|
-
dicom_bytes.mode = 'rb'
|
|
182
|
-
return dicom_bytes
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
def load_image_normalized(dicom: pydicom.Dataset, index: int = None) -> np.ndarray:
|
|
186
|
-
"""
|
|
187
|
-
Normalizes the shape of an array of images to (n, c, y, x)=(#slices, #channels, height, width).
|
|
188
|
-
It uses dicom.Rows, dicom.Columns, and other information to determine the shape.
|
|
189
|
-
|
|
190
|
-
Args:
|
|
191
|
-
dicom: A dicom with images of varying shapes.
|
|
192
|
-
|
|
193
|
-
Returns:
|
|
194
|
-
A numpy array of shape (n, c, y, x)=(#slices, #channels, height, width).
|
|
195
|
-
"""
|
|
196
|
-
n = dicom.get('NumberOfFrames', 1)
|
|
197
|
-
if index is None:
|
|
198
|
-
images = dicom.pixel_array
|
|
199
|
-
else:
|
|
200
|
-
if index is not None and index >= n:
|
|
201
|
-
raise ValueError(f"Index {index} is out of bounds. The number of frames is {n}.")
|
|
202
|
-
images = pixel_array(dicom, index=index)
|
|
203
|
-
n = 1
|
|
204
|
-
shape = images.shape
|
|
205
|
-
|
|
206
|
-
c = dicom.get('SamplesPerPixel')
|
|
207
|
-
|
|
208
|
-
# x=width, y=height
|
|
209
|
-
if images.ndim == 2:
|
|
210
|
-
# Single grayscale image (y, x)
|
|
211
|
-
# Reshape to (1, 1, y, x)
|
|
212
|
-
return images.reshape((1, 1) + images.shape)
|
|
213
|
-
elif images.ndim == 3:
|
|
214
|
-
# (n, y, x) or (y, x, c)
|
|
215
|
-
if shape[0] == 1 or (n is not None and n > 1):
|
|
216
|
-
# (n, y, x)
|
|
217
|
-
return images.reshape(shape[0], 1, shape[1], shape[2])
|
|
218
|
-
if shape[2] in (1, 3, 4) or (c is not None and c > 1):
|
|
219
|
-
# (y, x, c)
|
|
220
|
-
images = images.transpose(2, 0, 1)
|
|
221
|
-
return images.reshape(1, *images.shape)
|
|
222
|
-
elif images.ndim == 4:
|
|
223
|
-
if shape[3] == c or shape[3] in (1, 3, 4) or (c is not None and c > 1):
|
|
224
|
-
# (n, y, x, c) -> (n, c, y, x)
|
|
225
|
-
return images.transpose(0, 3, 1, 2)
|
|
226
|
-
|
|
227
|
-
raise ValueError(f"Unsupported DICOM normalization with shape: {shape}, SamplesPerPixel: {c}, NumberOfFrames: {n}")
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
def assemble_dicoms(files_path: list[str | IO],
|
|
231
|
-
return_as_IO: bool = False) -> GeneratorWithLength[pydicom.Dataset | IO]:
|
|
232
|
-
"""
|
|
233
|
-
Assemble multiple DICOM files into a single multi-frame DICOM file.
|
|
234
|
-
This function will merge the pixel data of the DICOM files and generate a new DICOM file with the combined pixel data.
|
|
235
|
-
|
|
236
|
-
Args:
|
|
237
|
-
files_path: A list of file paths to the DICOM files to be merged.
|
|
238
|
-
|
|
239
|
-
Returns:
|
|
240
|
-
A generator that yields the merged DICOM files.
|
|
241
|
-
"""
|
|
242
|
-
dicoms_map = defaultdict(list)
|
|
243
|
-
|
|
244
|
-
for file_path in tqdm(files_path, desc="Reading DICOMs metadata", unit="file"):
|
|
245
|
-
dicom = pydicom.dcmread(file_path,
|
|
246
|
-
specific_tags=['SeriesInstanceUID', 'InstanceNumber', 'Rows', 'Columns'])
|
|
247
|
-
series_uid = dicom.get('SeriesInstanceUID', None)
|
|
248
|
-
if series_uid is None:
|
|
249
|
-
# generate a random uid
|
|
250
|
-
series_uid = pydicom.uid.generate_uid()
|
|
251
|
-
instance_number = dicom.get('InstanceNumber', 0)
|
|
252
|
-
rows = dicom.get('Rows', None)
|
|
253
|
-
columns = dicom.get('Columns', None)
|
|
254
|
-
dicoms_map[series_uid].append((instance_number, file_path, rows, columns))
|
|
255
|
-
if hasattr(file_path, "seek"):
|
|
256
|
-
file_path.seek(0)
|
|
257
|
-
|
|
258
|
-
# Validate that all DICOMs with the same SeriesInstanceUID have matching dimensions
|
|
259
|
-
for series_uid, dicom_list in dicoms_map.items():
|
|
260
|
-
if len(dicom_list) <= 1:
|
|
261
|
-
continue
|
|
262
|
-
|
|
263
|
-
# Get dimensions from first DICOM
|
|
264
|
-
first_rows = dicom_list[0][2]
|
|
265
|
-
first_columns = dicom_list[0][3]
|
|
266
|
-
|
|
267
|
-
# Check all other DICOMs have the same dimensions
|
|
268
|
-
for instance_number, file_path, rows, columns in dicom_list:
|
|
269
|
-
if rows != first_rows or columns != first_columns:
|
|
270
|
-
msg = (
|
|
271
|
-
f"Dimension mismatch in SeriesInstanceUID {series_uid}: "
|
|
272
|
-
f"Expected {first_rows}x{first_columns}, got {rows}x{columns} "
|
|
273
|
-
f"for file {file_path} and {dicom_list[0][1]}"
|
|
274
|
-
)
|
|
275
|
-
_LOGGER.error(msg)
|
|
276
|
-
raise ValueError(msg)
|
|
277
|
-
|
|
278
|
-
# filter out the two last elements of the tuple (rows, columns)
|
|
279
|
-
dicoms_map = {fr_uid: [(instance_number, file_path) for instance_number, file_path, _, _ in dicoms]
|
|
280
|
-
for fr_uid, dicoms in dicoms_map.items()}
|
|
281
|
-
|
|
282
|
-
gen = _generate_merged_dicoms(dicoms_map, return_as_IO=return_as_IO)
|
|
283
|
-
return GeneratorWithLength(gen, len(dicoms_map))
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
def _create_multiframe_attributes(merged_ds: pydicom.Dataset,
|
|
287
|
-
all_dicoms: list[pydicom.Dataset]) -> pydicom.Dataset:
|
|
288
|
-
### Shared Functional Groups Sequence ###
|
|
289
|
-
shared_seq_dataset = pydicom.dataset.Dataset()
|
|
290
|
-
|
|
291
|
-
# check if pixel spacing or spacing between slices are equal for all dicoms
|
|
292
|
-
pixel_spacing = merged_ds.get('PixelSpacing', None)
|
|
293
|
-
all_pixel_spacing_equal = all(ds.get('PixelSpacing', None) == pixel_spacing
|
|
294
|
-
for ds in all_dicoms)
|
|
295
|
-
spacing_between_slices = merged_ds.get('SpacingBetweenSlices', None)
|
|
296
|
-
all_spacing_b_slices_equal = all(ds.get('SpacingBetweenSlices', None) == spacing_between_slices
|
|
297
|
-
for ds in all_dicoms)
|
|
298
|
-
|
|
299
|
-
# if they are equal, add them to the shared functional groups sequence
|
|
300
|
-
if (pixel_spacing is not None and all_pixel_spacing_equal) or (spacing_between_slices is not None and all_spacing_b_slices_equal):
|
|
301
|
-
pixel_measure = pydicom.dataset.Dataset()
|
|
302
|
-
if pixel_spacing is not None:
|
|
303
|
-
pixel_measure.PixelSpacing = pixel_spacing
|
|
304
|
-
if spacing_between_slices is not None:
|
|
305
|
-
pixel_measure.SpacingBetweenSlices = spacing_between_slices
|
|
306
|
-
pixel_measures_seq = pydicom.Sequence([pixel_measure])
|
|
307
|
-
shared_seq_dataset.PixelMeasuresSequence = pixel_measures_seq
|
|
308
|
-
|
|
309
|
-
if len(shared_seq_dataset) > 0:
|
|
310
|
-
shared_seq = pydicom.Sequence([shared_seq_dataset])
|
|
311
|
-
merged_ds.SharedFunctionalGroupsSequence = shared_seq
|
|
312
|
-
#######
|
|
313
|
-
|
|
314
|
-
### Per-Frame Functional Groups Sequence ###
|
|
315
|
-
perframe_seq_list = []
|
|
316
|
-
for ds in all_dicoms:
|
|
317
|
-
per_frame_dataset = pydicom.dataset.Dataset() # root dataset for each frame
|
|
318
|
-
pos_dataset = pydicom.dataset.Dataset()
|
|
319
|
-
orient_dataset = pydicom.dataset.Dataset()
|
|
320
|
-
pixel_measure = pydicom.dataset.Dataset()
|
|
321
|
-
framenumber_dataset = pydicom.dataset.Dataset()
|
|
322
|
-
|
|
323
|
-
if 'ImagePositionPatient' in ds:
|
|
324
|
-
pos_dataset.ImagePositionPatient = ds.ImagePositionPatient
|
|
325
|
-
if 'ImageOrientationPatient' in ds:
|
|
326
|
-
orient_dataset.ImageOrientationPatient = ds.ImageOrientationPatient
|
|
327
|
-
if 'PixelSpacing' in ds and all_pixel_spacing_equal == False:
|
|
328
|
-
pixel_measure.PixelSpacing = ds.PixelSpacing
|
|
329
|
-
if 'SpacingBetweenSlices' in ds and all_spacing_b_slices_equal == False:
|
|
330
|
-
pixel_measure.SpacingBetweenSlices = ds.SpacingBetweenSlices
|
|
331
|
-
|
|
332
|
-
# Add datasets to the per-frame dataset
|
|
333
|
-
per_frame_dataset.PlanePositionSequence = pydicom.Sequence([pos_dataset])
|
|
334
|
-
per_frame_dataset.PlaneOrientationSequence = pydicom.Sequence([orient_dataset])
|
|
335
|
-
per_frame_dataset.PixelMeasuresSequence = pydicom.Sequence([pixel_measure])
|
|
336
|
-
per_frame_dataset.FrameContentSequence = pydicom.Sequence([framenumber_dataset])
|
|
337
|
-
|
|
338
|
-
perframe_seq_list.append(per_frame_dataset)
|
|
339
|
-
if len(perframe_seq_list[0]) > 0:
|
|
340
|
-
perframe_seq = pydicom.Sequence(perframe_seq_list)
|
|
341
|
-
merged_ds.PerFrameFunctionalGroupsSequence = perframe_seq
|
|
342
|
-
merged_ds.FrameIncrementPointer = (0x5200, 0x9230)
|
|
343
|
-
|
|
344
|
-
return merged_ds
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
def _generate_dicom_name(ds: pydicom.Dataset) -> str:
|
|
348
|
-
"""
|
|
349
|
-
Generate a meaningful name for a DICOM dataset using its attributes.
|
|
350
|
-
|
|
351
|
-
Args:
|
|
352
|
-
ds: pydicom Dataset object
|
|
353
|
-
|
|
354
|
-
Returns:
|
|
355
|
-
A string containing a descriptive name with .dcm extension
|
|
356
|
-
"""
|
|
357
|
-
components = []
|
|
358
|
-
|
|
359
|
-
# if hasattr(ds, 'filename'):
|
|
360
|
-
# components.append(os.path.basename(ds.filename))
|
|
361
|
-
if hasattr(ds, 'SeriesDescription'):
|
|
362
|
-
components.append(ds.SeriesDescription)
|
|
363
|
-
if len(components) == 0 and hasattr(ds, 'SeriesNumber'):
|
|
364
|
-
components.append(f"ser{ds.SeriesNumber}")
|
|
365
|
-
if hasattr(ds, 'StudyDescription'):
|
|
366
|
-
components.append(ds.StudyDescription)
|
|
367
|
-
elif hasattr(ds, 'StudyID'):
|
|
368
|
-
components.append(ds.StudyID)
|
|
369
|
-
|
|
370
|
-
# Join components and add extension
|
|
371
|
-
if len(components) > 0:
|
|
372
|
-
description = "_".join(str(x) for x in components) + ".dcm"
|
|
373
|
-
# Clean description - remove special chars and spaces
|
|
374
|
-
description = "".join(c if c.isalnum() else "_" for c in description)
|
|
375
|
-
if len(description) > 0:
|
|
376
|
-
return description
|
|
377
|
-
|
|
378
|
-
if hasattr(ds, 'SeriesInstanceUID'):
|
|
379
|
-
return ds.SeriesInstanceUID + ".dcm"
|
|
380
|
-
|
|
381
|
-
# Fallback to generic name if no attributes found
|
|
382
|
-
return ds.filename if hasattr(ds, 'filename') else f"merged_dicom_{uuid.uuid4()}.dcm"
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
def _generate_merged_dicoms(dicoms_map: dict[str, list],
|
|
386
|
-
return_as_IO: bool = False) -> Generator[pydicom.Dataset, None, None]:
|
|
387
|
-
for _, dicoms in dicoms_map.items():
|
|
388
|
-
dicoms.sort(key=lambda x: x[0])
|
|
389
|
-
files_path = [file_path for _, file_path in dicoms]
|
|
390
|
-
|
|
391
|
-
all_dicoms = [pydicom.dcmread(file_path) for file_path in files_path]
|
|
392
|
-
|
|
393
|
-
# Use the first dicom as a template
|
|
394
|
-
merged_dicom = all_dicoms[0]
|
|
395
|
-
|
|
396
|
-
# Combine pixel data
|
|
397
|
-
pixel_arrays = np.stack([ds.pixel_array for ds in all_dicoms], axis=0)
|
|
398
|
-
|
|
399
|
-
# Update the merged dicom
|
|
400
|
-
merged_dicom.PixelData = pixel_arrays.tobytes()
|
|
401
|
-
merged_dicom.NumberOfFrames = len(pixel_arrays) # Set number of frames
|
|
402
|
-
merged_dicom.SOPInstanceUID = pydicom.uid.generate_uid() # Generate new SOP Instance UID
|
|
403
|
-
# Removed deprecated attributes and set Transfer Syntax UID instead:
|
|
404
|
-
merged_dicom.file_meta.TransferSyntaxUID = pydicom.uid.ImplicitVRLittleEndian
|
|
405
|
-
|
|
406
|
-
# Free up memory
|
|
407
|
-
for ds in all_dicoms[1:]:
|
|
408
|
-
del ds.PixelData
|
|
409
|
-
|
|
410
|
-
# create multi-frame attributes
|
|
411
|
-
# check if FramTime is equal for all dicoms
|
|
412
|
-
frame_time = merged_dicom.get('FrameTime', None)
|
|
413
|
-
all_frame_time_equal = all(ds.get('FrameTime', None) == frame_time for ds in all_dicoms)
|
|
414
|
-
if frame_time is not None and all_frame_time_equal:
|
|
415
|
-
merged_dicom.FrameTime = frame_time # (0x0018,0x1063)
|
|
416
|
-
merged_dicom.FrameIncrementPointer = (0x0018, 0x1063) # points to 'FrameTime'
|
|
417
|
-
else:
|
|
418
|
-
# TODO: Sometimes FrameTime is present but not equal for all dicoms. In this case, check out 'FrameTimeVector'.
|
|
419
|
-
merged_dicom = _create_multiframe_attributes(merged_dicom, all_dicoms)
|
|
420
|
-
|
|
421
|
-
# Remove tags of single frame dicoms
|
|
422
|
-
for attr in ['ImagePositionPatient', 'SliceLocation', 'ImageOrientationPatient',
|
|
423
|
-
'PixelSpacing', 'SpacingBetweenSlices', 'InstanceNumber']:
|
|
424
|
-
if hasattr(merged_dicom, attr):
|
|
425
|
-
delattr(merged_dicom, attr)
|
|
426
|
-
|
|
427
|
-
if return_as_IO:
|
|
428
|
-
name = _generate_dicom_name(merged_dicom)
|
|
429
|
-
yield to_bytesio(merged_dicom, name=name)
|
|
430
|
-
else:
|
|
431
|
-
yield merged_dicom
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
"""
|
|
435
|
-
- The Slice Location (0020,1041) is usually a derived attribute,
|
|
436
|
-
typically computed from Image Position (Patient) (0020,0032)
|
|
437
|
-
"""
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
def get_space_between_slices(ds: pydicom.Dataset) -> float:
|
|
441
|
-
"""
|
|
442
|
-
Get the space between slices from a DICOM dataset.
|
|
443
|
-
|
|
444
|
-
Parameters:
|
|
445
|
-
ds (pydicom.Dataset): The DICOM dataset containing image metadata.
|
|
446
|
-
|
|
447
|
-
Returns:
|
|
448
|
-
float: Space between slices in millimeters.
|
|
449
|
-
"""
|
|
450
|
-
# Get the Spacing Between Slices attribute
|
|
451
|
-
if 'SpacingBetweenSlices' in ds:
|
|
452
|
-
return ds.SpacingBetweenSlices
|
|
453
|
-
|
|
454
|
-
if 'SharedFunctionalGroupsSequence' in ds:
|
|
455
|
-
shared_group = ds.SharedFunctionalGroupsSequence[0]
|
|
456
|
-
if 'PixelMeasuresSequence' in shared_group and 'SpacingBetweenSlices' in shared_group.PixelMeasuresSequence[0]:
|
|
457
|
-
return shared_group.PixelMeasuresSequence[0].SpacingBetweenSlices
|
|
458
|
-
|
|
459
|
-
if 'SliceThickness' in ds:
|
|
460
|
-
return ds.SliceThickness
|
|
461
|
-
|
|
462
|
-
return 1.0 # Default value if not found
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
def get_image_orientation(ds: pydicom.Dataset, slice_index: int) -> np.ndarray:
|
|
466
|
-
"""
|
|
467
|
-
Get the image orientation from a DICOM dataset.
|
|
468
|
-
|
|
469
|
-
Parameters:
|
|
470
|
-
ds (pydicom.Dataset): The DICOM dataset containing image metadata.
|
|
471
|
-
|
|
472
|
-
Returns:
|
|
473
|
-
numpy.ndarray: Image orientation (X, Y, Z) for the specified slice.
|
|
474
|
-
"""
|
|
475
|
-
# Get the Image Orientation Patient attribute
|
|
476
|
-
if 'ImageOrientationPatient' in ds:
|
|
477
|
-
return ds.ImageOrientationPatient
|
|
478
|
-
|
|
479
|
-
if 'PerFrameFunctionalGroupsSequence' in ds:
|
|
480
|
-
if 'PlaneOrientationSequence' in ds.PerFrameFunctionalGroupsSequence[slice_index]:
|
|
481
|
-
return ds.PerFrameFunctionalGroupsSequence[slice_index].PlaneOrientationSequence[0].ImageOrientationPatient
|
|
482
|
-
|
|
483
|
-
if 'SharedFunctionalGroupsSequence' in ds:
|
|
484
|
-
return ds.SharedFunctionalGroupsSequence[0].PlaneOrientationSequence[0].ImageOrientationPatient
|
|
485
|
-
|
|
486
|
-
raise ValueError("ImageOrientationPatient not found in DICOM dataset.")
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
def get_slice_orientation(ds: pydicom.Dataset, slice_index: int) -> np.ndarray:
|
|
490
|
-
"""
|
|
491
|
-
Get the slice orientation from a DICOM dataset.
|
|
492
|
-
|
|
493
|
-
Parameters:
|
|
494
|
-
ds (pydicom.Dataset): The DICOM dataset containing image metadata.
|
|
495
|
-
slice_index (int): 0-based index of the slice in the 3D volume. This is the `InstanceNumber-1`.
|
|
496
|
-
|
|
497
|
-
Returns:
|
|
498
|
-
numpy.ndarray: Slice orientation (X, Y, Z) for the specified slice.
|
|
499
|
-
"""
|
|
500
|
-
# Get the Image Orientation Patient attribute
|
|
501
|
-
|
|
502
|
-
x_orient, y_orient = np.array(get_image_orientation(ds, slice_index), dtype=np.float64).reshape(2, 3)
|
|
503
|
-
# compute the normal vector of the slice
|
|
504
|
-
slice_orient = np.cross(x_orient, y_orient)
|
|
505
|
-
# normalize the vector to space_between_slices
|
|
506
|
-
space_between_slices = get_space_between_slices(ds)
|
|
507
|
-
slice_orient = slice_orient / np.linalg.norm(slice_orient) * space_between_slices
|
|
508
|
-
|
|
509
|
-
return slice_orient
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
def _get_instance_number(ds: pydicom.Dataset, slice_index: int | None = None) -> int:
|
|
513
|
-
if slice_index is None:
|
|
514
|
-
if 'InstanceNumber' in ds and ds.InstanceNumber is not None:
|
|
515
|
-
return ds.InstanceNumber
|
|
516
|
-
elif 'NumberOfFrames' in ds and ds.NumberOfFrames == 1:
|
|
517
|
-
return 0
|
|
518
|
-
else:
|
|
519
|
-
raise ValueError("Slice index is required for multi-frame images.")
|
|
520
|
-
else:
|
|
521
|
-
if slice_index < 0:
|
|
522
|
-
raise ValueError("Slice index must be a non-negative integer.")
|
|
523
|
-
if 'NumberOfFrames' in ds and slice_index >= ds.NumberOfFrames:
|
|
524
|
-
_LOGGER.warning(f"Slice index {slice_index} exceeds number of frames {ds.NumberOfFrames}.")
|
|
525
|
-
root_instance_number = ds.get('InstanceNumber', 1)
|
|
526
|
-
if root_instance_number is None:
|
|
527
|
-
root_instance_number = 1
|
|
528
|
-
return root_instance_number + slice_index
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
def get_image_position(ds: pydicom.Dataset,
|
|
532
|
-
slice_index: int | None = None) -> np.ndarray:
|
|
533
|
-
"""
|
|
534
|
-
Get the image position for a specific slice in a DICOM dataset.
|
|
535
|
-
|
|
536
|
-
Parameters:
|
|
537
|
-
ds (pydicom.Dataset): The DICOM dataset containing image metadata.
|
|
538
|
-
slice_index (int): Index of the slice in the 3D volume.
|
|
539
|
-
|
|
540
|
-
Returns:
|
|
541
|
-
numpy.ndarray: Image position (X, Y, Z) for the specified slice.
|
|
542
|
-
"""
|
|
543
|
-
|
|
544
|
-
instance_number = _get_instance_number(ds, slice_index)
|
|
545
|
-
|
|
546
|
-
if 'PerFrameFunctionalGroupsSequence' in ds:
|
|
547
|
-
if slice_index is not None:
|
|
548
|
-
frame_groups = ds.PerFrameFunctionalGroupsSequence[slice_index]
|
|
549
|
-
if 'PlanePositionSequence' in frame_groups and 'ImagePositionPatient' in frame_groups.PlanePositionSequence[0]:
|
|
550
|
-
return frame_groups.PlanePositionSequence[0].ImagePositionPatient
|
|
551
|
-
else:
|
|
552
|
-
logging.warning("PerFrameFunctionalGroupsSequence is available, but slice_index is not provided.")
|
|
553
|
-
|
|
554
|
-
# Get the Image Position Patient attribute
|
|
555
|
-
if 'ImagePositionPatient' in ds:
|
|
556
|
-
if 'SliceLocation' in ds:
|
|
557
|
-
_LOGGER.debug("SliceLocation attribute is available, but not accounted for in calculation.")
|
|
558
|
-
x = np.array(ds.ImagePositionPatient, dtype=np.float64)
|
|
559
|
-
sc_orient = get_slice_orientation(ds, slice_index)
|
|
560
|
-
return x + sc_orient*(instance_number-ds.get('InstanceNumber', 1))
|
|
561
|
-
|
|
562
|
-
raise ValueError("ImagePositionPatient not found in DICOM dataset.")
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
def get_pixel_spacing(ds: pydicom.Dataset, slice_index: int) -> np.ndarray:
|
|
566
|
-
"""
|
|
567
|
-
Get the pixel spacing from a DICOM dataset.
|
|
568
|
-
|
|
569
|
-
Parameters:
|
|
570
|
-
ds (pydicom.Dataset): The DICOM dataset containing image metadata.
|
|
571
|
-
slice_index (int): Index of the slice in the 3D volume.
|
|
572
|
-
|
|
573
|
-
Returns:
|
|
574
|
-
numpy.ndarray: Pixel spacing (X, Y) for the specified slice.
|
|
575
|
-
"""
|
|
576
|
-
# Get the Pixel Spacing attribute
|
|
577
|
-
if 'PixelSpacing' in ds:
|
|
578
|
-
return np.array(ds.PixelSpacing, dtype=np.float64)
|
|
579
|
-
|
|
580
|
-
if 'PerFrameFunctionalGroupsSequence' in ds:
|
|
581
|
-
if 'PixelMeasuresSequence' in ds.PerFrameFunctionalGroupsSequence[slice_index]:
|
|
582
|
-
return ds.PerFrameFunctionalGroupsSequence[slice_index].PixelMeasuresSequence[0].PixelSpacing
|
|
583
|
-
|
|
584
|
-
if 'SharedFunctionalGroupsSequence' in ds:
|
|
585
|
-
if 'PixelMeasuresSequence' in ds.SharedFunctionalGroupsSequence[0]:
|
|
586
|
-
return ds.SharedFunctionalGroupsSequence[0].PixelMeasuresSequence[0].PixelSpacing
|
|
587
|
-
|
|
588
|
-
raise ValueError("PixelSpacing not found in DICOM dataset.")
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
def pixel_to_patient(ds: pydicom.Dataset,
|
|
592
|
-
pixel_x, pixel_y,
|
|
593
|
-
slice_index: int | None = None,
|
|
594
|
-
instance_number: int | None = None) -> np.ndarray:
|
|
595
|
-
"""
|
|
596
|
-
Convert pixel coordinates (pixel_x, pixel_y) to patient coordinates in DICOM.
|
|
597
|
-
|
|
598
|
-
Parameters:
|
|
599
|
-
ds (pydicom.Dataset): The DICOM dataset containing image metadata.
|
|
600
|
-
pixel_x (float): X coordinate in pixel space.
|
|
601
|
-
pixel_y (float): Y coordinate in pixel space.
|
|
602
|
-
slice_index (int): Index of the slice of the `ds.pixel_array`.
|
|
603
|
-
instance_number (int): Instance number of the slice in the 3D volume.
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
Returns:
|
|
607
|
-
numpy.ndarray: Patient coordinates (X, Y, Z).
|
|
608
|
-
"""
|
|
609
|
-
|
|
610
|
-
# - image_position is the origin of the image in patient coordinates (ImagePositionPatient)
|
|
611
|
-
# - row_vector and col_vector are the direction cosines from ImageOrientationPatient
|
|
612
|
-
# - pixel_spacing is the physical distance between the centers of adjacent pixels
|
|
613
|
-
|
|
614
|
-
if slice_index is not None and instance_number is not None:
|
|
615
|
-
raise ValueError("Either slice_index or instance_number should be provided, not both.")
|
|
616
|
-
|
|
617
|
-
if slice_index is None:
|
|
618
|
-
if instance_number is None:
|
|
619
|
-
instance_number = _get_instance_number(ds)
|
|
620
|
-
root_instance_number = ds.get('InstanceNumber', 1)
|
|
621
|
-
if root_instance_number is None:
|
|
622
|
-
root_instance_number = 1
|
|
623
|
-
slice_index = instance_number - root_instance_number
|
|
624
|
-
|
|
625
|
-
# Get required DICOM attributes
|
|
626
|
-
image_position = np.array(get_image_position(ds, slice_index), dtype=np.float64)
|
|
627
|
-
image_orientation = np.array(get_image_orientation(ds, slice_index), dtype=np.float64).reshape(2, 3)
|
|
628
|
-
# image_position = np.array(ds.ImagePositionPatient, dtype=np.float64) # (0020,0032)
|
|
629
|
-
# image_orientation = np.array(ds.ImageOrientationPatient, dtype=np.float64).reshape(2, 3) # (0020,0037)
|
|
630
|
-
# pixel_spacing = np.array(ds.PixelSpacing, dtype=np.float64) # (0028,0030)
|
|
631
|
-
pixel_spacing = np.array(get_pixel_spacing(ds, slice_index), dtype=np.float64) # (0028,0030)
|
|
632
|
-
|
|
633
|
-
# Compute row and column vectors from image orientation
|
|
634
|
-
row_vector = image_orientation[0]
|
|
635
|
-
col_vector = image_orientation[1]
|
|
636
|
-
|
|
637
|
-
# Compute patient coordinates
|
|
638
|
-
patient_coords = image_position + pixel_x * pixel_spacing[0] * row_vector + pixel_y * pixel_spacing[1] * col_vector
|
|
639
|
-
|
|
640
|
-
return patient_coords
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
def determine_anatomical_plane(ds: pydicom.Dataset,
|
|
644
|
-
slice_axis: int,
|
|
645
|
-
alignment_threshold: float = 0.95) -> str:
|
|
646
|
-
"""
|
|
647
|
-
Determine the anatomical plane of a DICOM slice (Axial, Sagittal, Coronal, Oblique, or Unknown).
|
|
648
|
-
|
|
649
|
-
Args:
|
|
650
|
-
ds (pydicom.Dataset): The DICOM dataset containing the image metadata.
|
|
651
|
-
slice_axis (int): The axis of the slice to analyze (0, 1, or 2).
|
|
652
|
-
alignment_threshold (float): Threshold for considering alignment with anatomical axes.
|
|
653
|
-
|
|
654
|
-
Returns:
|
|
655
|
-
str: The name of the anatomical plane ('Axial', 'Sagittal', 'Coronal', 'Oblique', or 'Unknown').
|
|
656
|
-
|
|
657
|
-
Raises:
|
|
658
|
-
ValueError: If `slice_index` is not 0, 1, or 2.
|
|
659
|
-
"""
|
|
660
|
-
|
|
661
|
-
if slice_axis not in [0, 1, 2]:
|
|
662
|
-
raise ValueError("slice_index must be 0, 1 or 2")
|
|
663
|
-
# Check if Image Orientation Patient exists
|
|
664
|
-
if not hasattr(ds, 'ImageOrientationPatient') or ds.ImageOrientationPatient is None:
|
|
665
|
-
return "Unknown"
|
|
666
|
-
# Get the Image Orientation Patient (IOP) - 6 values defining row and column directions
|
|
667
|
-
iop = np.array(ds.ImageOrientationPatient, dtype=float)
|
|
668
|
-
if len(iop) != 6:
|
|
669
|
-
return "Unknown"
|
|
670
|
-
# Extract row and column direction vectors
|
|
671
|
-
row_dir = iop[:3] # First 3 values: row direction cosines
|
|
672
|
-
col_dir = iop[3:] # Last 3 values: column direction cosines
|
|
673
|
-
# Calculate the normal vector (slice direction) using cross product
|
|
674
|
-
normal = np.cross(row_dir, col_dir)
|
|
675
|
-
normal = normal / np.linalg.norm(normal) # Normalize
|
|
676
|
-
# Define standard anatomical axes
|
|
677
|
-
# LPS coordinate system: L = Left, P = Posterior, S = Superior
|
|
678
|
-
axes = {
|
|
679
|
-
'sagittal': np.array([1, 0, 0]), # L-R axis (left-right)
|
|
680
|
-
'coronal': np.array([0, 1, 0]), # A-P axis (anterior-posterior)
|
|
681
|
-
'axial': np.array([0, 0, 1]) # S-I axis (superior-inferior)
|
|
682
|
-
}
|
|
683
|
-
# For each slice_index, determine which axis we're examining
|
|
684
|
-
if slice_axis == 0:
|
|
685
|
-
# ds.pixel_array[0,:,:] - slicing along first dimension
|
|
686
|
-
# The normal vector corresponds to the direction we're slicing through
|
|
687
|
-
examine_vector = normal
|
|
688
|
-
elif slice_axis == 1:
|
|
689
|
-
# ds.pixel_array[:,0,:] - slicing along second dimension
|
|
690
|
-
# This corresponds to the row direction
|
|
691
|
-
examine_vector = row_dir
|
|
692
|
-
elif slice_axis == 2:
|
|
693
|
-
# ds.pixel_array[:,:,0] - slicing along third dimension
|
|
694
|
-
# This corresponds to the column direction
|
|
695
|
-
examine_vector = col_dir
|
|
696
|
-
# Find which anatomical axis is most aligned with our examine_vector
|
|
697
|
-
max_dot = 0
|
|
698
|
-
best_axis = "Unknown"
|
|
699
|
-
for axis_name, axis_vector in axes.items():
|
|
700
|
-
dot_product = abs(np.dot(examine_vector, axis_vector))
|
|
701
|
-
if dot_product > max_dot:
|
|
702
|
-
max_dot = dot_product
|
|
703
|
-
best_axis = axis_name
|
|
704
|
-
if max_dot >= alignment_threshold:
|
|
705
|
-
return best_axis.capitalize()
|
|
706
|
-
else:
|
|
707
|
-
return "Oblique"
|
datamint/utils/io_utils.py
DELETED
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
import nibabel as nib
|
|
3
|
-
from PIL import Image
|
|
4
|
-
from .dicom_utils import load_image_normalized, is_dicom
|
|
5
|
-
import pydicom
|
|
6
|
-
import os
|
|
7
|
-
from typing import Any
|
|
8
|
-
import logging
|
|
9
|
-
from PIL import ImageFile
|
|
10
|
-
import cv2
|
|
11
|
-
|
|
12
|
-
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
|
13
|
-
|
|
14
|
-
_LOGGER = logging.getLogger(__name__)
|
|
15
|
-
|
|
16
|
-
IMAGE_EXTS = ('.png', '.jpg', '.jpeg')
|
|
17
|
-
NII_EXTS = ('.nii', '.nii.gz')
|
|
18
|
-
VIDEO_EXTS = ('.mp4', '.avi', '.mov', '.mkv')
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
def read_video(file_path: str, index: int = None) -> np.ndarray:
|
|
22
|
-
cap = cv2.VideoCapture(file_path)
|
|
23
|
-
if not cap.isOpened():
|
|
24
|
-
raise ValueError(f"Failed to open video file: {file_path}")
|
|
25
|
-
try:
|
|
26
|
-
if index is None:
|
|
27
|
-
frames = []
|
|
28
|
-
while True:
|
|
29
|
-
ret, frame = cap.read()
|
|
30
|
-
if not ret:
|
|
31
|
-
break
|
|
32
|
-
# Convert BGR to RGB and transpose to (C, H, W) format
|
|
33
|
-
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
34
|
-
frame = frame.transpose(2, 0, 1)
|
|
35
|
-
frames.append(frame)
|
|
36
|
-
imgs = np.array(frames) # shape: (#frames, C, H, W)
|
|
37
|
-
else:
|
|
38
|
-
while index > 0:
|
|
39
|
-
cap.grab()
|
|
40
|
-
index -= 1
|
|
41
|
-
ret, frame = cap.read()
|
|
42
|
-
if not ret:
|
|
43
|
-
raise ValueError(f"Failed to read frame {index} from video file: {file_path}")
|
|
44
|
-
# Convert BGR to RGB and transpose to (C, H, W) format
|
|
45
|
-
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
|
46
|
-
imgs = frame.transpose(2, 0, 1)
|
|
47
|
-
finally:
|
|
48
|
-
cap.release()
|
|
49
|
-
|
|
50
|
-
if imgs is None or len(imgs) == 0:
|
|
51
|
-
raise ValueError(f"No frames found in video file: {file_path}")
|
|
52
|
-
|
|
53
|
-
return imgs
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def read_nifti(file_path: str, mimetype: str | None = None) -> np.ndarray:
|
|
57
|
-
"""
|
|
58
|
-
Read a NIfTI file and return the image data in standardized format.
|
|
59
|
-
|
|
60
|
-
Args:
|
|
61
|
-
file_path: Path to the NIfTI file (.nii or .nii.gz)
|
|
62
|
-
mimetype: Optional MIME type of the file. If provided, it can help in determining how to read the file.
|
|
63
|
-
|
|
64
|
-
Returns:
|
|
65
|
-
np.ndarray: Image data with shape (#frames, C, H, W)
|
|
66
|
-
"""
|
|
67
|
-
from nibabel.filebasedimages import ImageFileError
|
|
68
|
-
try:
|
|
69
|
-
imgs = nib.load(file_path).get_fdata() # shape: (W, H, #frame) or (W, H)
|
|
70
|
-
except ImageFileError as e:
|
|
71
|
-
if mimetype is None:
|
|
72
|
-
raise e
|
|
73
|
-
# has_ext = os.path.splitext(file_path)[1] != ''
|
|
74
|
-
if mimetype == 'application/gzip':
|
|
75
|
-
with gzip.open(file_path, 'rb') as f:
|
|
76
|
-
imgs = nib.Nifti1Image.from_stream(f).get_fdata()
|
|
77
|
-
elif mimetype in ('image/x.nifti', 'application/x-nifti'):
|
|
78
|
-
with open(file_path, 'rb') as f:
|
|
79
|
-
imgs = nib.Nifti1Image.from_stream(f).get_fdata()
|
|
80
|
-
else:
|
|
81
|
-
raise e
|
|
82
|
-
if imgs.ndim == 2:
|
|
83
|
-
imgs = imgs.transpose(1, 0)
|
|
84
|
-
imgs = imgs[np.newaxis, np.newaxis]
|
|
85
|
-
elif imgs.ndim == 3:
|
|
86
|
-
imgs = imgs.transpose(2, 1, 0)
|
|
87
|
-
imgs = imgs[:, np.newaxis]
|
|
88
|
-
else:
|
|
89
|
-
raise ValueError(f"Unsupported number of dimensions in '{file_path}': {imgs.ndim}")
|
|
90
|
-
|
|
91
|
-
return imgs
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def read_image(file_path: str) -> np.ndarray:
|
|
95
|
-
with Image.open(file_path) as pilimg:
|
|
96
|
-
imgs = np.array(pilimg)
|
|
97
|
-
if imgs.ndim == 2: # (H, W)
|
|
98
|
-
imgs = imgs[np.newaxis, np.newaxis]
|
|
99
|
-
elif imgs.ndim == 3: # (H, W, C)
|
|
100
|
-
imgs = imgs.transpose(2, 0, 1)[np.newaxis] # (H, W, C) -> (1, C, H, W)
|
|
101
|
-
|
|
102
|
-
return imgs
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def read_array_normalized(file_path: str,
|
|
106
|
-
index: int | None = None,
|
|
107
|
-
return_metainfo: bool = False,
|
|
108
|
-
use_magic=False) -> np.ndarray | tuple[np.ndarray, Any]:
|
|
109
|
-
"""
|
|
110
|
-
Read an array from a file.
|
|
111
|
-
|
|
112
|
-
Args:
|
|
113
|
-
file_path: The path to the file.
|
|
114
|
-
index: If specified, read only the frame at this index (0-based).
|
|
115
|
-
If None, read all frames.
|
|
116
|
-
Supported file formats are NIfTI (.nii, .nii.gz), PNG (.png), JPEG (.jpg, .jpeg) and npy (.npy).
|
|
117
|
-
|
|
118
|
-
Returns:
|
|
119
|
-
The array read from the file in shape (#frames, C, H, W), if `index=None`,
|
|
120
|
-
or (C, H, W) if `index` is specified.
|
|
121
|
-
"""
|
|
122
|
-
if not os.path.exists(file_path):
|
|
123
|
-
raise FileNotFoundError(f"File not found: {file_path}")
|
|
124
|
-
|
|
125
|
-
metainfo = None
|
|
126
|
-
|
|
127
|
-
try:
|
|
128
|
-
if is_dicom(file_path):
|
|
129
|
-
ds = pydicom.dcmread(file_path)
|
|
130
|
-
if index is not None:
|
|
131
|
-
imgs = load_image_normalized(ds, index=index)[0]
|
|
132
|
-
else:
|
|
133
|
-
imgs = load_image_normalized(ds)
|
|
134
|
-
# Free up memory
|
|
135
|
-
if hasattr(ds, '_pixel_array'):
|
|
136
|
-
ds._pixel_array = None
|
|
137
|
-
if hasattr(ds, 'PixelData'):
|
|
138
|
-
ds.PixelData = None
|
|
139
|
-
metainfo = ds
|
|
140
|
-
else:
|
|
141
|
-
if use_magic:
|
|
142
|
-
import magic # it is important to import here because magic has an OS lib dependency.
|
|
143
|
-
mime_type = magic.from_file(file_path, mime=True)
|
|
144
|
-
else:
|
|
145
|
-
mime_type = ""
|
|
146
|
-
|
|
147
|
-
if mime_type.startswith('video/') or file_path.endswith(VIDEO_EXTS):
|
|
148
|
-
imgs = read_video(file_path, index)
|
|
149
|
-
else:
|
|
150
|
-
if mime_type in ('image/x.nifti', 'application/x-nifti') or mime_type == 'application/gzip' or file_path.endswith(NII_EXTS):
|
|
151
|
-
imgs = read_nifti(file_path, mimetype=mime_type)
|
|
152
|
-
# For NIfTI files, try to load associated JSON metadata
|
|
153
|
-
if return_metainfo:
|
|
154
|
-
json_path = file_path.replace('.nii.gz', '.json').replace('.nii', '.json')
|
|
155
|
-
if os.path.exists(json_path):
|
|
156
|
-
try:
|
|
157
|
-
import json
|
|
158
|
-
with open(json_path, 'r') as f:
|
|
159
|
-
metainfo = json.load(f)
|
|
160
|
-
_LOGGER.debug(f"Loaded JSON metadata from {json_path}")
|
|
161
|
-
except Exception as e:
|
|
162
|
-
_LOGGER.warning(f"Failed to load JSON metadata from {json_path}: {e}")
|
|
163
|
-
metainfo = None
|
|
164
|
-
elif mime_type.startswith('image/') or file_path.endswith(IMAGE_EXTS):
|
|
165
|
-
imgs = read_image(file_path)
|
|
166
|
-
elif file_path.endswith('.npy') or mime_type == 'application/x-numpy-data':
|
|
167
|
-
imgs = np.load(file_path)
|
|
168
|
-
if imgs.ndim != 4:
|
|
169
|
-
raise ValueError(f"Unsupported number of dimensions in '{file_path}': {imgs.ndim}")
|
|
170
|
-
else:
|
|
171
|
-
raise ValueError(f"Unsupported file format '{mime_type}' of '{file_path}'")
|
|
172
|
-
|
|
173
|
-
if index is not None:
|
|
174
|
-
if len(imgs) > 1:
|
|
175
|
-
_LOGGER.warning(f"It is inefficient to load all frames from '{file_path}' to access a single frame." +
|
|
176
|
-
" Consider converting the file to a format that supports random access (DICOM), or" +
|
|
177
|
-
" convert to png/jpeg files or" +
|
|
178
|
-
" manually handle all frames at once instead of loading a specific frame.")
|
|
179
|
-
imgs = imgs[index]
|
|
180
|
-
|
|
181
|
-
if return_metainfo:
|
|
182
|
-
return imgs, metainfo
|
|
183
|
-
return imgs
|
|
184
|
-
|
|
185
|
-
except Exception as e:
|
|
186
|
-
_LOGGER.error(f"Failed to read array from '{file_path}': {e}")
|
|
187
|
-
raise e
|
|
File without changes
|
|
File without changes
|