dicube 0.2.2__cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dicube/__init__.py +174 -0
- dicube/codecs/__init__.py +152 -0
- dicube/codecs/jph/__init__.py +15 -0
- dicube/codecs/jph/codec.py +161 -0
- dicube/codecs/jph/ojph_complete.cpython-310-aarch64-linux-gnu.so +0 -0
- dicube/codecs/jph/ojph_complete.cpython-38-aarch64-linux-gnu.so +0 -0
- dicube/codecs/jph/ojph_complete.cpython-39-aarch64-linux-gnu.so +0 -0
- dicube/codecs/jph/ojph_decode_complete.cpython-310-aarch64-linux-gnu.so +0 -0
- dicube/codecs/jph/ojph_decode_complete.cpython-38-aarch64-linux-gnu.so +0 -0
- dicube/codecs/jph/ojph_decode_complete.cpython-39-aarch64-linux-gnu.so +0 -0
- dicube/core/__init__.py +21 -0
- dicube/core/image.py +349 -0
- dicube/core/io.py +408 -0
- dicube/core/pixel_header.py +120 -0
- dicube/dicom/__init__.py +13 -0
- dicube/dicom/dcb_streaming.py +248 -0
- dicube/dicom/dicom_io.py +153 -0
- dicube/dicom/dicom_meta.py +740 -0
- dicube/dicom/dicom_status.py +259 -0
- dicube/dicom/dicom_tags.py +121 -0
- dicube/dicom/merge_utils.py +283 -0
- dicube/dicom/space_from_meta.py +70 -0
- dicube/exceptions.py +189 -0
- dicube/storage/__init__.py +17 -0
- dicube/storage/dcb_file.py +824 -0
- dicube/storage/pixel_utils.py +259 -0
- dicube/utils/__init__.py +6 -0
- dicube/validation.py +380 -0
- dicube-0.2.2.dist-info/METADATA +272 -0
- dicube-0.2.2.dist-info/RECORD +31 -0
- dicube-0.2.2.dist-info/WHEEL +6 -0
@@ -0,0 +1,259 @@
|
|
1
|
+
from enum import Enum
|
2
|
+
|
3
|
+
import numpy as np
|
4
|
+
from pydicom.tag import Tag
|
5
|
+
|
6
|
+
from .dicom_tags import CommonTags
|
7
|
+
|
8
|
+
|
9
|
+
class DicomStatus(Enum):
|
10
|
+
"""
|
11
|
+
Enumeration of possible DICOM series status conditions.
|
12
|
+
|
13
|
+
Each status represents a specific condition or issue that may be present
|
14
|
+
in a DICOM series. The conditions are grouped into categories:
|
15
|
+
- Series UID Issues
|
16
|
+
- Instance Number Issues
|
17
|
+
- Spacing Issues
|
18
|
+
- Shape Issues
|
19
|
+
- Orientation Issues
|
20
|
+
- Data Type Issues
|
21
|
+
- Location Issues
|
22
|
+
- Consistency Status
|
23
|
+
"""
|
24
|
+
|
25
|
+
# Series UID Issues
|
26
|
+
NON_UNIFORM_SERIES_UID = (
|
27
|
+
"non_uniform_series_uid" # Multiple Series UIDs in one series
|
28
|
+
)
|
29
|
+
MISSING_SERIES_UID = "missing_series_uid" # No Series UIDs present
|
30
|
+
|
31
|
+
# Instance Number Issues
|
32
|
+
DUPLICATE_INSTANCE_NUMBERS = (
|
33
|
+
"duplicate_instance_numbers" # Duplicated instance numbers (e.g., 1,1,2,2,3,3)
|
34
|
+
)
|
35
|
+
MISSING_INSTANCE_NUMBER = "missing_instance_number" # Missing Instance Number
|
36
|
+
GAP_INSTANCE_NUMBER = "gap_instance_number" # Gaps in instance numbering
|
37
|
+
|
38
|
+
# Spacing Issues
|
39
|
+
MISSING_SPACING = "missing_spacing" # Missing Pixel Spacing
|
40
|
+
NON_UNIFORM_SPACING = (
|
41
|
+
"non_uniform_spacing" # Inconsistent Pixel Spacing (XY intervals)
|
42
|
+
)
|
43
|
+
|
44
|
+
# Shape Issues
|
45
|
+
MISSING_SHAPE = "missing_shape" # Missing image dimensions (Columns or Rows)
|
46
|
+
NON_UNIFORM_SHAPE = "non_uniform_shape" # Inconsistent image dimensions
|
47
|
+
|
48
|
+
# Orientation Issues
|
49
|
+
MISSING_ORIENTATION = "missing_orientation" # Missing Image Orientation Patient
|
50
|
+
NON_UNIFORM_ORIENTATION = (
|
51
|
+
"non_uniform_orientation" # Inconsistent Image Orientation Patient
|
52
|
+
)
|
53
|
+
|
54
|
+
# Data Type Issues
|
55
|
+
NON_UNIFORM_RESCALE_FACTOR = (
|
56
|
+
"non_uniform_rescale_factor" # Inconsistent intercept or slope
|
57
|
+
)
|
58
|
+
MISSING_DTYPE = "missing_dtype" # Missing data type information
|
59
|
+
NON_UNIFORM_DTYPE = "non_uniform_dtype" # Inconsistent data types
|
60
|
+
|
61
|
+
# Location Issues
|
62
|
+
MISSING_LOCATION = (
|
63
|
+
"missing_location" # Missing Slice Location and Image Position Patient
|
64
|
+
)
|
65
|
+
REVERSED_LOCATION = "reversed_location" # Z-values reversed when sorted by instance (e.g., 1,2,3,2,1)
|
66
|
+
DWELLING_LOCATION = (
|
67
|
+
"dwelling_location" # Z-values show stagnation (e.g., 1,2,3,3,4,5)
|
68
|
+
)
|
69
|
+
GAP_LOCATION = "gap_location" # Z-values have gaps (e.g., 1,2,3,5,6)
|
70
|
+
|
71
|
+
# Consistency Status
|
72
|
+
CONSISTENT = "consistent" # All checks pass, data is consistent
|
73
|
+
INCONSISTENT = "inconsistent" # Other inconsistencies not covered above
|
74
|
+
|
75
|
+
|
76
|
+
def calculate_average_z_gap(z_locations: np.ndarray) -> float:
|
77
|
+
"""
|
78
|
+
Calculate the average gap between Z-axis locations.
|
79
|
+
|
80
|
+
Uses a robust method to estimate the typical Z-axis interval:
|
81
|
+
1. If a single interval appears in >80% of cases, use that value
|
82
|
+
2. Otherwise, use the larger absolute value between median and mean
|
83
|
+
|
84
|
+
Args:
|
85
|
+
z_locations: Sorted array of Z-axis locations
|
86
|
+
|
87
|
+
Returns:
|
88
|
+
float: Estimated typical Z-axis interval; 0 if cannot be calculated
|
89
|
+
"""
|
90
|
+
if len(z_locations) < 2:
|
91
|
+
return 0.0
|
92
|
+
diffs = np.diff(z_locations)
|
93
|
+
if len(diffs) == 0:
|
94
|
+
return 0.0
|
95
|
+
|
96
|
+
# If one interval appears in >80% of cases, use it
|
97
|
+
uniq_diffs, counts = np.unique(diffs, return_counts=True)
|
98
|
+
if np.max(counts) / len(diffs) > 0.8:
|
99
|
+
return uniq_diffs[np.argmax(counts)]
|
100
|
+
|
101
|
+
# Otherwise use the larger of median or mean
|
102
|
+
median_diff = np.median(diffs)
|
103
|
+
mean_diff = np.mean(diffs)
|
104
|
+
return max([median_diff, mean_diff], key=abs)
|
105
|
+
|
106
|
+
|
107
|
+
def get_dicom_status(meta) -> DicomStatus:
|
108
|
+
"""
|
109
|
+
Check DICOM metadata and return the corresponding status.
|
110
|
+
|
111
|
+
Performs a series of checks on the DICOM metadata to determine its status.
|
112
|
+
Checks include:
|
113
|
+
- Series UID consistency
|
114
|
+
- Instance number sequence
|
115
|
+
- Pixel spacing uniformity
|
116
|
+
- Image dimensions
|
117
|
+
- Patient orientation
|
118
|
+
- Data type consistency
|
119
|
+
- Z-axis location sequence
|
120
|
+
|
121
|
+
Args:
|
122
|
+
meta: DicomMeta instance providing access to DICOM metadata
|
123
|
+
|
124
|
+
Returns:
|
125
|
+
DicomStatus: The status enum value representing the check results
|
126
|
+
"""
|
127
|
+
# -------------------------- Series UID --------------------------
|
128
|
+
if meta.is_missing(CommonTags.SeriesInstanceUID):
|
129
|
+
return DicomStatus.MISSING_SERIES_UID
|
130
|
+
if not meta.is_shared(CommonTags.SeriesInstanceUID):
|
131
|
+
return DicomStatus.NON_UNIFORM_SERIES_UID
|
132
|
+
|
133
|
+
# -------------------------- Instance Number --------------------------
|
134
|
+
if meta.is_missing(CommonTags.InstanceNumber):
|
135
|
+
return DicomStatus.MISSING_INSTANCE_NUMBER
|
136
|
+
|
137
|
+
# Get instance numbers (always treat as non-shared for this check)
|
138
|
+
instance_numbers = meta.get_values(CommonTags.InstanceNumber)
|
139
|
+
|
140
|
+
# Check for single image
|
141
|
+
if meta.slice_count == 1:
|
142
|
+
# Single image is fine, continue to next check
|
143
|
+
pass
|
144
|
+
else:
|
145
|
+
# Check for duplicate instance numbers
|
146
|
+
if len(set(instance_numbers)) < len(instance_numbers):
|
147
|
+
return DicomStatus.DUPLICATE_INSTANCE_NUMBERS
|
148
|
+
|
149
|
+
# Check for gaps in instance numbering
|
150
|
+
# First convert to integers and sort
|
151
|
+
try:
|
152
|
+
int_instances = [int(num) if num is not None else None for num in instance_numbers]
|
153
|
+
sorted_instances = sorted([num for num in int_instances if num is not None])
|
154
|
+
|
155
|
+
# If we have a sequence with more than one image
|
156
|
+
if len(sorted_instances) > 1:
|
157
|
+
# Check if they form a continuous sequence
|
158
|
+
diffs = np.diff(sorted_instances)
|
159
|
+
if not np.all(diffs == 1):
|
160
|
+
return DicomStatus.GAP_INSTANCE_NUMBER
|
161
|
+
except (ValueError, TypeError):
|
162
|
+
# If conversion fails, we can't check for gaps
|
163
|
+
pass
|
164
|
+
|
165
|
+
# -------------------------- Dtype (Bits) --------------------------
|
166
|
+
dtype_tags = [
|
167
|
+
CommonTags.BitsStored,
|
168
|
+
CommonTags.BitsAllocated,
|
169
|
+
CommonTags.HighBit,
|
170
|
+
CommonTags.PixelRepresentation
|
171
|
+
]
|
172
|
+
|
173
|
+
# Check if any are missing
|
174
|
+
if any(meta.is_missing(tag) for tag in dtype_tags):
|
175
|
+
return DicomStatus.MISSING_DTYPE
|
176
|
+
|
177
|
+
# Check if any are non-shared
|
178
|
+
if any(not meta.is_shared(tag) for tag in dtype_tags):
|
179
|
+
return DicomStatus.NON_UNIFORM_DTYPE
|
180
|
+
|
181
|
+
# -------------------------- Pixel Spacing --------------------------
|
182
|
+
if meta.is_missing(CommonTags.PixelSpacing):
|
183
|
+
return DicomStatus.MISSING_SPACING
|
184
|
+
if not meta.is_shared(CommonTags.PixelSpacing):
|
185
|
+
return DicomStatus.NON_UNIFORM_SPACING
|
186
|
+
|
187
|
+
# -------------------------- Image Shape (Columns/Rows) --------------------------
|
188
|
+
if meta.is_missing(CommonTags.Columns) or meta.is_missing(CommonTags.Rows):
|
189
|
+
return DicomStatus.MISSING_SHAPE
|
190
|
+
if not meta.is_shared(CommonTags.Columns) or not meta.is_shared(CommonTags.Rows):
|
191
|
+
return DicomStatus.NON_UNIFORM_SHAPE
|
192
|
+
|
193
|
+
# -------------------------- Orientation --------------------------
|
194
|
+
if meta.is_missing(CommonTags.ImageOrientationPatient):
|
195
|
+
return DicomStatus.MISSING_ORIENTATION
|
196
|
+
if not meta.is_shared(CommonTags.ImageOrientationPatient):
|
197
|
+
return DicomStatus.NON_UNIFORM_ORIENTATION
|
198
|
+
|
199
|
+
# -------------------------- Location (Z direction) --------------------------
|
200
|
+
# Need either ImagePositionPatient or SliceLocation
|
201
|
+
has_position = not meta.is_missing(CommonTags.ImagePositionPatient)
|
202
|
+
has_location = not meta.is_missing(CommonTags.SliceLocation)
|
203
|
+
|
204
|
+
# If both are missing, mark as missing location
|
205
|
+
if not has_position and not has_location:
|
206
|
+
return DicomStatus.MISSING_LOCATION
|
207
|
+
|
208
|
+
# Get Z locations and check for issues
|
209
|
+
# For multi-slice datasets only
|
210
|
+
if meta.slice_count > 1:
|
211
|
+
# Get Z locations from the DicomMeta helper method
|
212
|
+
z_locations = meta._get_projection_location()
|
213
|
+
|
214
|
+
# Get the order of instance numbers
|
215
|
+
instance_numbers = meta.get_values(CommonTags.InstanceNumber)
|
216
|
+
try:
|
217
|
+
# Convert to integers and get sort order
|
218
|
+
int_instances = [int(num) if num is not None else float('inf') for num in instance_numbers]
|
219
|
+
sort_idx = np.argsort(int_instances)
|
220
|
+
|
221
|
+
# Sort Z locations by instance number
|
222
|
+
sorted_z = np.array([z_locations[i] for i in sort_idx if i < len(z_locations)])
|
223
|
+
|
224
|
+
# Check for direction changes
|
225
|
+
if len(sorted_z) > 1:
|
226
|
+
diffs_z = np.diff(sorted_z)
|
227
|
+
|
228
|
+
# Check for direction changes (sign changes in differences)
|
229
|
+
if np.min(diffs_z) < 0 < np.max(diffs_z):
|
230
|
+
return DicomStatus.REVERSED_LOCATION
|
231
|
+
|
232
|
+
# Check for duplicate positions (zero differences)
|
233
|
+
if np.any(diffs_z == 0):
|
234
|
+
return DicomStatus.DWELLING_LOCATION
|
235
|
+
|
236
|
+
# Check for gaps in Z locations
|
237
|
+
avg_gap = calculate_average_z_gap(sorted_z)
|
238
|
+
if avg_gap > 0.0:
|
239
|
+
# Calculate relative deviations from average gap
|
240
|
+
ratio_diffs = np.abs(diffs_z - avg_gap) / (avg_gap + 1e-8)
|
241
|
+
# If any gap is more than 50% different from average, mark as gap
|
242
|
+
if np.any(ratio_diffs > 0.5):
|
243
|
+
return DicomStatus.GAP_LOCATION
|
244
|
+
except (ValueError, TypeError):
|
245
|
+
# If conversion fails, we can't check for sequence issues
|
246
|
+
pass
|
247
|
+
|
248
|
+
# -------------------------- Rescale Factor (Intercept/Slope) --------------------------
|
249
|
+
# These may not exist, so only check if they're present
|
250
|
+
has_intercept = not meta.is_missing(CommonTags.RescaleIntercept)
|
251
|
+
has_slope = not meta.is_missing(CommonTags.RescaleSlope)
|
252
|
+
|
253
|
+
if has_intercept and has_slope:
|
254
|
+
# If present, check for consistency
|
255
|
+
if not meta.is_shared(CommonTags.RescaleIntercept) or not meta.is_shared(CommonTags.RescaleSlope):
|
256
|
+
return DicomStatus.NON_UNIFORM_RESCALE_FACTOR
|
257
|
+
|
258
|
+
# -------------------------- All checks passed --------------------------
|
259
|
+
return DicomStatus.CONSISTENT
|
@@ -0,0 +1,121 @@
|
|
1
|
+
from pydicom.tag import Tag
|
2
|
+
from typing import Union, Tuple, Set
|
3
|
+
|
4
|
+
|
5
|
+
def get_tag_key(tag: Tag) -> str:
|
6
|
+
"""Get the hexadecimal string representation of a DICOM Tag (format: 'ggggeeee').
|
7
|
+
|
8
|
+
Args:
|
9
|
+
tag: pydicom Tag object
|
10
|
+
|
11
|
+
Returns:
|
12
|
+
str: Hexadecimal string, e.g., '00100020' for PatientID
|
13
|
+
"""
|
14
|
+
return f"{tag:08X}" # or format(tag, "08X")
|
15
|
+
|
16
|
+
|
17
|
+
class CommonTags:
|
18
|
+
"""Common DICOM tags used throughout the library.
|
19
|
+
|
20
|
+
This class provides convenient access to frequently used DICOM tags
|
21
|
+
organized by category (patient, study, series, instance, etc.).
|
22
|
+
All tags are pydicom Tag objects.
|
23
|
+
"""
|
24
|
+
|
25
|
+
# Patient tags
|
26
|
+
PatientID = Tag("PatientID")
|
27
|
+
PatientName = Tag("PatientName")
|
28
|
+
PatientBirthDate = Tag("PatientBirthDate")
|
29
|
+
PatientSex = Tag("PatientSex")
|
30
|
+
PatientAge = Tag("PatientAge")
|
31
|
+
PatientWeight = Tag("PatientWeight")
|
32
|
+
|
33
|
+
# Study tags
|
34
|
+
StudyInstanceUID = Tag("StudyInstanceUID")
|
35
|
+
StudyID = Tag("StudyID")
|
36
|
+
StudyDate = Tag("StudyDate")
|
37
|
+
StudyTime = Tag("StudyTime")
|
38
|
+
AccessionNumber = Tag("AccessionNumber")
|
39
|
+
StudyDescription = Tag("StudyDescription")
|
40
|
+
|
41
|
+
# Series tags
|
42
|
+
SeriesInstanceUID = Tag("SeriesInstanceUID")
|
43
|
+
SeriesNumber = Tag("SeriesNumber")
|
44
|
+
Modality = Tag("Modality")
|
45
|
+
SeriesDescription = Tag("SeriesDescription")
|
46
|
+
|
47
|
+
# Instance tags
|
48
|
+
SOPInstanceUID = Tag("SOPInstanceUID")
|
49
|
+
SOPClassUID = Tag("SOPClassUID")
|
50
|
+
InstanceNumber = Tag("InstanceNumber")
|
51
|
+
|
52
|
+
# Image tags
|
53
|
+
Rows = Tag("Rows")
|
54
|
+
Columns = Tag("Columns")
|
55
|
+
BitsAllocated = Tag("BitsAllocated")
|
56
|
+
BitsStored = Tag("BitsStored")
|
57
|
+
HighBit = Tag("HighBit")
|
58
|
+
SamplesPerPixel = Tag("SamplesPerPixel")
|
59
|
+
PhotometricInterpretation = Tag("PhotometricInterpretation")
|
60
|
+
PixelRepresentation = Tag("PixelRepresentation")
|
61
|
+
|
62
|
+
# Spatial tags
|
63
|
+
ImagePositionPatient = Tag("ImagePositionPatient")
|
64
|
+
ImageOrientationPatient = Tag("ImageOrientationPatient")
|
65
|
+
PixelSpacing = Tag("PixelSpacing")
|
66
|
+
SliceThickness = Tag("SliceThickness")
|
67
|
+
SpacingBetweenSlices = Tag("SpacingBetweenSlices")
|
68
|
+
SliceLocation = Tag("SliceLocation")
|
69
|
+
|
70
|
+
# Value transformations
|
71
|
+
RescaleIntercept = Tag("RescaleIntercept")
|
72
|
+
RescaleSlope = Tag("RescaleSlope")
|
73
|
+
WindowCenter = Tag("WindowCenter")
|
74
|
+
WindowWidth = Tag("WindowWidth")
|
75
|
+
PatientPosition = Tag("PatientPosition")
|
76
|
+
BodyPartExamined = Tag("BodyPartExamined")
|
77
|
+
|
78
|
+
# Pixel data
|
79
|
+
PixelData = Tag("PixelData")
|
80
|
+
|
81
|
+
# Enhanced MR specific tags
|
82
|
+
DimensionIndexSequence = Tag("DimensionIndexSequence")
|
83
|
+
FrameContentSequence = Tag("FrameContentSequence")
|
84
|
+
|
85
|
+
# UID tags
|
86
|
+
ImplementationClassUID = Tag("ImplementationClassUID")
|
87
|
+
|
88
|
+
# Other important tags
|
89
|
+
TransferSyntaxUID = Tag("TransferSyntaxUID")
|
90
|
+
MediaStorageSOPClassUID = Tag("MediaStorageSOPClassUID")
|
91
|
+
MediaStorageSOPInstanceUID = Tag("MediaStorageSOPInstanceUID")
|
92
|
+
SpecificCharacterSet = Tag("SpecificCharacterSet")
|
93
|
+
|
94
|
+
# Manufacturer Information
|
95
|
+
Manufacturer = Tag("Manufacturer")
|
96
|
+
ManufacturerModelName = Tag("ManufacturerModelName")
|
97
|
+
SoftwareVersions = Tag("SoftwareVersions")
|
98
|
+
|
99
|
+
# Other Common Tags
|
100
|
+
FrameOfReferenceUID = Tag("FrameOfReferenceUID")
|
101
|
+
ReferencedImageSequence = Tag("ReferencedImageSequence")
|
102
|
+
ReferencedSOPInstanceUID = Tag("ReferencedSOPInstanceUID")
|
103
|
+
AcquisitionNumber = Tag("AcquisitionNumber")
|
104
|
+
ContrastBolusAgent = Tag("ContrastBolusAgent")
|
105
|
+
|
106
|
+
# Tag sets for hierarchical DICOM levels
|
107
|
+
PATIENT_LEVEL_TAGS: Set[Tag] = {
|
108
|
+
PatientID, PatientName, PatientBirthDate, PatientSex
|
109
|
+
}
|
110
|
+
|
111
|
+
STUDY_LEVEL_TAGS: Set[Tag] = {
|
112
|
+
StudyInstanceUID, StudyID, StudyDate, StudyTime, AccessionNumber
|
113
|
+
}
|
114
|
+
|
115
|
+
SERIES_LEVEL_TAGS: Set[Tag] = {
|
116
|
+
SeriesInstanceUID, SeriesNumber, Modality
|
117
|
+
}
|
118
|
+
|
119
|
+
INSTANCE_LEVEL_TAGS: Set[Tag] = {
|
120
|
+
SOPInstanceUID, SOPClassUID, InstanceNumber
|
121
|
+
}
|
@@ -0,0 +1,283 @@
|
|
1
|
+
from typing import Any, Dict, List, Optional, Tuple
|
2
|
+
|
3
|
+
|
4
|
+
###############################################################################
|
5
|
+
# Helper Functions: Check Value Equality
|
6
|
+
###############################################################################
|
7
|
+
def _all_identical(values: List[Any]) -> bool:
|
8
|
+
"""
|
9
|
+
Check if all elements in a list are identical (including None).
|
10
|
+
|
11
|
+
Args:
|
12
|
+
values: List of values to compare
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
bool: True if list is empty, has one element, or all elements are identical
|
16
|
+
"""
|
17
|
+
if (not values) or (len(values) <= 1):
|
18
|
+
return True
|
19
|
+
first = values[0]
|
20
|
+
return all(v == first for v in values[1:])
|
21
|
+
|
22
|
+
|
23
|
+
###############################################################################
|
24
|
+
# Recursively Merge Dataset JSONs
|
25
|
+
###############################################################################
|
26
|
+
def _merge_dataset_list(
|
27
|
+
dataset_jsons: List[Dict[str, Any]]
|
28
|
+
) -> Dict[str, Dict[str, Any]]:
|
29
|
+
"""
|
30
|
+
Merge a list of pydicom JSON representations at the top level.
|
31
|
+
|
32
|
+
Creates a merged dictionary where each tag entry contains:
|
33
|
+
{
|
34
|
+
"vr": str, # DICOM Value Representation
|
35
|
+
"shared": True/False/None, # None for sequences (SQ)
|
36
|
+
"Value": [single value/list/sequence structure]
|
37
|
+
}
|
38
|
+
|
39
|
+
Args:
|
40
|
+
dataset_jsons: List of pydicom JSON dictionaries to merge
|
41
|
+
|
42
|
+
Returns:
|
43
|
+
dict: Merged data with format {tag: merged_entry, ...}
|
44
|
+
"""
|
45
|
+
# 1. Collect all unique tags
|
46
|
+
all_tags = set()
|
47
|
+
for js in dataset_jsons:
|
48
|
+
all_tags.update(js.keys())
|
49
|
+
|
50
|
+
merged_data = {}
|
51
|
+
for tag in sorted(all_tags):
|
52
|
+
# Collect values for this tag from all datasets
|
53
|
+
# Note: Each value is like {"vr": "XX", "Value": [...]} or None
|
54
|
+
tag_values = [ds_js.get(tag, None) for ds_js in dataset_jsons]
|
55
|
+
|
56
|
+
# 2. Get VR if present in any dataset
|
57
|
+
vrs = [tv["vr"] for tv in tag_values if tv is not None]
|
58
|
+
vr = vrs[0] if vrs else None
|
59
|
+
|
60
|
+
# 3. Merge values
|
61
|
+
merged_data[tag] = _merge_tag_values(vr, tag_values)
|
62
|
+
return merged_data
|
63
|
+
|
64
|
+
|
65
|
+
def _get_value_and_name(tv: Optional[Dict[str, Any]]) -> Tuple[Optional[str], Any]:
|
66
|
+
"""
|
67
|
+
Extract the value and its field name from a tag value dictionary.
|
68
|
+
|
69
|
+
Handles different value storage methods in DICOM:
|
70
|
+
- Standard Value field
|
71
|
+
- InlineBinary for binary data
|
72
|
+
- BulkDataURI for external references
|
73
|
+
|
74
|
+
Args:
|
75
|
+
tv: Tag value dictionary or None
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
tuple: (field_name, actual_value) where both may be None
|
79
|
+
"""
|
80
|
+
if tv is not None and "Value" in tv:
|
81
|
+
value_name = "Value"
|
82
|
+
actual_value = tv["Value"]
|
83
|
+
elif tv is not None and "InlineBinary" in tv:
|
84
|
+
actual_value = tv["InlineBinary"]
|
85
|
+
value_name = "InlineBinary"
|
86
|
+
elif tv is not None and "BulkDataURI" in tv:
|
87
|
+
actual_value = tv["BulkDataURI"]
|
88
|
+
value_name = "BulkDataURI"
|
89
|
+
else:
|
90
|
+
value_name = None
|
91
|
+
actual_value = None
|
92
|
+
return value_name, actual_value
|
93
|
+
|
94
|
+
|
95
|
+
def _merge_tag_values(
|
96
|
+
vr: Optional[str], tag_values: List[Optional[Dict[str, Any]]]
|
97
|
+
) -> Dict[str, Any]:
|
98
|
+
"""
|
99
|
+
Merge values for a single tag across multiple datasets.
|
100
|
+
|
101
|
+
For sequences (VR=SQ), recursively merges nested structures.
|
102
|
+
For other VRs, determines if values are shared across datasets.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
vr: DICOM Value Representation (VR) code
|
106
|
+
tag_values: List of value dictionaries from each dataset
|
107
|
+
|
108
|
+
Returns:
|
109
|
+
dict: Merged entry with format:
|
110
|
+
{
|
111
|
+
"vr": str,
|
112
|
+
"shared": bool/None,
|
113
|
+
"Value": merged_value
|
114
|
+
}
|
115
|
+
"""
|
116
|
+
# If tag is missing from all datasets, return empty shell
|
117
|
+
if all(tv is None for tv in tag_values):
|
118
|
+
return {"vr": vr, "shared": True}
|
119
|
+
|
120
|
+
if vr == "SQ":
|
121
|
+
# Handle sequences recursively
|
122
|
+
return _merge_sequence(tag_values)
|
123
|
+
else:
|
124
|
+
# Handle standard values
|
125
|
+
# Extract actual values (may be list[str], list[float], or single value)
|
126
|
+
actual_values = []
|
127
|
+
value_name = "Value"
|
128
|
+
for tv in tag_values:
|
129
|
+
value_name, actual_value = _get_value_and_name(tv)
|
130
|
+
actual_values.append(actual_value)
|
131
|
+
|
132
|
+
# Check if all values are identical
|
133
|
+
if _all_identical(actual_values):
|
134
|
+
if actual_values[0] is None:
|
135
|
+
return {"vr": vr, "shared": True}
|
136
|
+
else:
|
137
|
+
return {
|
138
|
+
"vr": vr,
|
139
|
+
"shared": True,
|
140
|
+
value_name: actual_values[0],
|
141
|
+
} # Store single value
|
142
|
+
else:
|
143
|
+
for i, v in enumerate(actual_values):
|
144
|
+
if v is None:
|
145
|
+
actual_values[i] = 'None'
|
146
|
+
# Flatten single-element lists
|
147
|
+
if all([len(v) == 1 for v in actual_values]):
|
148
|
+
actual_values = [v[0] for v in actual_values]
|
149
|
+
return {
|
150
|
+
"vr": vr,
|
151
|
+
"shared": False,
|
152
|
+
value_name: actual_values, # Store list for each dataset
|
153
|
+
}
|
154
|
+
|
155
|
+
|
156
|
+
def _merge_sequence(sq_values: List[Optional[Dict[str, Any]]]) -> Dict[str, Any]:
|
157
|
+
"""
|
158
|
+
Merge sequence values across datasets.
|
159
|
+
|
160
|
+
Each element has format: {"vr": "SQ", "Value": [item1, item2, ...]} or None.
|
161
|
+
Returns merged structure:
|
162
|
+
{
|
163
|
+
"vr": "SQ",
|
164
|
+
"shared": None, # Shared status determined at item level
|
165
|
+
"Value": [
|
166
|
+
# Merged items, each a dict with {tag: {vr, shared, Value}}
|
167
|
+
]
|
168
|
+
}
|
169
|
+
|
170
|
+
Args:
|
171
|
+
sq_values: List of sequence value dictionaries from each dataset
|
172
|
+
|
173
|
+
Returns:
|
174
|
+
dict: Merged sequence structure
|
175
|
+
"""
|
176
|
+
# 1. Extract actual sequence values, replacing None with empty list
|
177
|
+
list_of_item_lists = []
|
178
|
+
for sq_val in sq_values:
|
179
|
+
if sq_val and "Value" in sq_val:
|
180
|
+
list_of_item_lists.append(sq_val["Value"])
|
181
|
+
else:
|
182
|
+
list_of_item_lists.append([])
|
183
|
+
|
184
|
+
# 2. Find maximum sequence length
|
185
|
+
max_len = max(len(items) for items in list_of_item_lists)
|
186
|
+
|
187
|
+
# 3. Merge items at each index
|
188
|
+
merged_items = []
|
189
|
+
for i in range(max_len):
|
190
|
+
# Collect i-th item from each dataset (None if index out of range)
|
191
|
+
item_jsons = []
|
192
|
+
for items in list_of_item_lists:
|
193
|
+
if i < len(items):
|
194
|
+
item_jsons.append(items[i])
|
195
|
+
else:
|
196
|
+
item_jsons.append(None)
|
197
|
+
|
198
|
+
# Recursively merge items
|
199
|
+
merged_item = _merge_item(item_jsons)
|
200
|
+
merged_items.append(merged_item)
|
201
|
+
|
202
|
+
return {
|
203
|
+
"vr": "SQ",
|
204
|
+
"shared": None, # Sequence sharing determined at item level
|
205
|
+
"Value": merged_items,
|
206
|
+
}
|
207
|
+
|
208
|
+
|
209
|
+
def _merge_item(item_jsons: List[Optional[Dict[str, Any]]]) -> Dict[str, Any]:
|
210
|
+
"""
|
211
|
+
Merge corresponding sequence items from multiple datasets.
|
212
|
+
|
213
|
+
Each item_json is a simplified dataset with format:
|
214
|
+
{"xxxx": {"vr": "...", "Value": ...}, "yyyy": {"vr": "...", "Value": ...}}
|
215
|
+
|
216
|
+
Args:
|
217
|
+
item_jsons: List of item dictionaries from each dataset
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
dict: Merged item dictionary
|
221
|
+
"""
|
222
|
+
# Replace None with empty dict
|
223
|
+
actual_jsons = [js if js is not None else {} for js in item_jsons]
|
224
|
+
return _merge_dataset_list(actual_jsons)
|
225
|
+
|
226
|
+
|
227
|
+
###############################################################################
|
228
|
+
# Helper Functions: Split Merged Dataset Back to Original DICOM JSON Format
|
229
|
+
###############################################################################
|
230
|
+
|
231
|
+
|
232
|
+
def _slice_merged_data(merged_dataset: Dict[str, Any], idx: int) -> Dict[str, Any]:
|
233
|
+
"""
|
234
|
+
Extract data for a single dataset from merged data.
|
235
|
+
|
236
|
+
Args:
|
237
|
+
merged_dataset: Merged dataset dictionary
|
238
|
+
idx: Index of the dataset to extract
|
239
|
+
|
240
|
+
Returns:
|
241
|
+
dict: Dataset dictionary containing only the specified slice
|
242
|
+
"""
|
243
|
+
json_dict = {}
|
244
|
+
for tag_key, tag_entry in merged_dataset.items():
|
245
|
+
vr = tag_entry.get("vr")
|
246
|
+
shared = tag_entry.get("shared")
|
247
|
+
|
248
|
+
if shared is True:
|
249
|
+
# Shared tags have same value across all datasets
|
250
|
+
tmp = tag_entry.copy()
|
251
|
+
tmp.pop("shared")
|
252
|
+
json_dict[tag_key] = tmp
|
253
|
+
elif shared is False:
|
254
|
+
if "Value" in tag_entry:
|
255
|
+
valuename = "Value"
|
256
|
+
elif "InlineBinary" in tag_entry:
|
257
|
+
valuename = "InlineBinary"
|
258
|
+
elif "BulkDataURI" in tag_entry:
|
259
|
+
valuename = "BulkDataURI"
|
260
|
+
else:
|
261
|
+
valuename = None
|
262
|
+
value = tag_entry.get(valuename)
|
263
|
+
|
264
|
+
value_idx = value[idx]
|
265
|
+
if value_idx is None:
|
266
|
+
json_dict[tag_key] = {"vr": vr}
|
267
|
+
elif isinstance(value_idx, list) or (valuename != "Value"):
|
268
|
+
json_dict[tag_key] = {"vr": vr, valuename: value_idx}
|
269
|
+
else:
|
270
|
+
json_dict[tag_key] = {"vr": vr, valuename: [value_idx]}
|
271
|
+
else:
|
272
|
+
# Handle sequences and special cases
|
273
|
+
if vr == "SQ":
|
274
|
+
value = tag_entry.get("Value")
|
275
|
+
|
276
|
+
if value == []:
|
277
|
+
json_dict[tag_key] = {"vr": vr, "Value": value}
|
278
|
+
else:
|
279
|
+
json_dict[tag_key] = {
|
280
|
+
"vr": vr,
|
281
|
+
"Value": [_slice_merged_data(value[0], idx)],
|
282
|
+
}
|
283
|
+
return json_dict
|