qmenta-anon 2.1.dev374__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qmenta_anon-2.1.dev374/PKG-INFO +19 -0
- qmenta_anon-2.1.dev374/pyproject.toml +38 -0
- qmenta_anon-2.1.dev374/src/qmenta/__init__.py +1 -0
- qmenta_anon-2.1.dev374/src/qmenta/anon/__init__.py +1 -0
- qmenta_anon-2.1.dev374/src/qmenta/anon/auto.py +75 -0
- qmenta_anon-2.1.dev374/src/qmenta/anon/dicom.py +575 -0
- qmenta_anon-2.1.dev374/src/qmenta/anon/nifti.py +48 -0
- qmenta_anon-2.1.dev374/src/qmenta/anon/time_utils.py +127 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: qmenta-anon
|
|
3
|
+
Version: 2.1.dev374
|
|
4
|
+
Summary: The qmenta-anon library is used to anonymize files before transferring them to the QMENTA platform.
|
|
5
|
+
Author: QMENTA
|
|
6
|
+
Author-email: dev@qmenta.com
|
|
7
|
+
Requires-Python: >=3.10,<4.0
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: Other/Proprietary License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
|
|
17
|
+
Requires-Dist: nibabel (>=5.3.2,<6.0.0)
|
|
18
|
+
Requires-Dist: pydicom (>=2.3.0,<3.0.0)
|
|
19
|
+
Requires-Dist: setuptools (>=80.3.1,<81.0.0)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "qmenta-anon"
|
|
3
|
+
version = "2.1.dev374"
|
|
4
|
+
description = "The qmenta-anon library is used to anonymize files before transferring them to the QMENTA platform."
|
|
5
|
+
authors = ["QMENTA <dev@qmenta.com>"]
|
|
6
|
+
classifiers = [
|
|
7
|
+
"Development Status :: 4 - Beta",
|
|
8
|
+
"Intended Audience :: Developers",
|
|
9
|
+
"License :: Other/Proprietary License",
|
|
10
|
+
"Topic :: Scientific/Engineering :: Medical Science Apps."
|
|
11
|
+
]
|
|
12
|
+
packages = [
|
|
13
|
+
{ include = "qmenta", from = "src" }
|
|
14
|
+
]
|
|
15
|
+
|
|
16
|
+
[tool.poetry.dependencies]
|
|
17
|
+
python = "^3.10"
|
|
18
|
+
pydicom = "^2.3.0"
|
|
19
|
+
nibabel = "^5.3.2"
|
|
20
|
+
setuptools = "^80.3.1"
|
|
21
|
+
|
|
22
|
+
[tool.poetry.group.dev.dependencies]
|
|
23
|
+
flake8 = "^6.1.0"
|
|
24
|
+
pytest = "^7.4.0"
|
|
25
|
+
coverage = "^7.2.7"
|
|
26
|
+
scriv = "^1.3.1"
|
|
27
|
+
|
|
28
|
+
[build-system]
|
|
29
|
+
requires = ["poetry-core>=1.0.0"]
|
|
30
|
+
build-backend = "poetry.core.masonry.api"
|
|
31
|
+
|
|
32
|
+
[tool.coverage.report]
|
|
33
|
+
fail_under = 80
|
|
34
|
+
|
|
35
|
+
[tool.scriv]
|
|
36
|
+
format = "md"
|
|
37
|
+
md_header_level = "2"
|
|
38
|
+
insert_marker = "scriv-insert-here"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__path__ = __import__('pkgutil').extend_path(__path__, __name__)
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import pydicom
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from pydicom.errors import InvalidDicomError
|
|
4
|
+
|
|
5
|
+
from qmenta.anon.dicom import anonymise as anonymise_dicom
|
|
6
|
+
from qmenta.anon.nifti import anonymise as anonymise_nifti
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Format(Enum):
|
|
10
|
+
"""
|
|
11
|
+
Enum with the following options:
|
|
12
|
+
UNKNOWN, DICOM, NIFTI
|
|
13
|
+
"""
|
|
14
|
+
UNKNOWN = 0
|
|
15
|
+
DICOM = 1
|
|
16
|
+
NIFTI = 2
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def recognise_file_format(file):
|
|
20
|
+
"""
|
|
21
|
+
Detects whether a file is DICOM or NifTI.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
file : str
|
|
26
|
+
The path of the file.
|
|
27
|
+
|
|
28
|
+
Returns
|
|
29
|
+
-------
|
|
30
|
+
str
|
|
31
|
+
Either 'dicom' or 'nifti'.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
if pydicom.read_file(file):
|
|
36
|
+
return Format.DICOM
|
|
37
|
+
else:
|
|
38
|
+
raise InvalidDicomError
|
|
39
|
+
except InvalidDicomError:
|
|
40
|
+
if file.endswith(".nii") or file.endswith(".nii.gz"):
|
|
41
|
+
return Format.NIFTI
|
|
42
|
+
else:
|
|
43
|
+
return Format.UNKNOWN
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def anonymise(file):
|
|
47
|
+
"""
|
|
48
|
+
Anonymise DICOM and NifTI files.
|
|
49
|
+
This function will replace the given file with the anonymised version.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
file : str
|
|
54
|
+
The path of the file
|
|
55
|
+
|
|
56
|
+
Returns
|
|
57
|
+
-------
|
|
58
|
+
dict
|
|
59
|
+
The boolean return status in the value for the key "OK" and,
|
|
60
|
+
optionally the list of errors on the value for the key "error_tags".
|
|
61
|
+
"""
|
|
62
|
+
format = recognise_file_format(file)
|
|
63
|
+
if format == Format.DICOM:
|
|
64
|
+
return anonymise_dicom(file)
|
|
65
|
+
elif format == Format.NIFTI:
|
|
66
|
+
return anonymise_nifti(file)
|
|
67
|
+
elif format == Format.UNKNOWN:
|
|
68
|
+
return {
|
|
69
|
+
"error": "Some files are not accepted or are corrupted.",
|
|
70
|
+
"OK": False
|
|
71
|
+
}
|
|
72
|
+
else:
|
|
73
|
+
raise RuntimeError(
|
|
74
|
+
'This format is not properly handled by the anonymise function.'
|
|
75
|
+
)
|
|
@@ -0,0 +1,575 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from enum import Enum, unique
|
|
4
|
+
import pydicom
|
|
5
|
+
|
|
6
|
+
from qmenta.anon.time_utils import TimeAnonymise
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def PatchedMultiString(val, valtype=str):
|
|
10
|
+
"""
|
|
11
|
+
Split a bytestring by delimiters if there are any
|
|
12
|
+
|
|
13
|
+
Parameters
|
|
14
|
+
----------
|
|
15
|
+
val: str
|
|
16
|
+
DICOM bytestring to split up
|
|
17
|
+
valtype:
|
|
18
|
+
default str, but can be e.g. UID to overwrite to a specific type
|
|
19
|
+
"""
|
|
20
|
+
# Remove trailing blank used to pad to even length
|
|
21
|
+
# 2005.05.25: also check for trailing 0, error made in PET files we are
|
|
22
|
+
# converting
|
|
23
|
+
|
|
24
|
+
if val and (val.endswith(" ") or val.endswith("\x00")):
|
|
25
|
+
val = val[:-1]
|
|
26
|
+
splitup = val.split("\\")
|
|
27
|
+
if len(splitup) == 1:
|
|
28
|
+
try:
|
|
29
|
+
val = splitup[0]
|
|
30
|
+
return valtype(val) if val else val
|
|
31
|
+
except ValueError:
|
|
32
|
+
if valtype in [str, pydicom.valuerep.PersonName, pydicom.uid.UID]:
|
|
33
|
+
return valtype("XXXX") if val else val
|
|
34
|
+
elif valtype is pydicom.valuerep.DSfloat:
|
|
35
|
+
return valtype(0.0) if val else val
|
|
36
|
+
else:
|
|
37
|
+
return pydicom.multival.MultiValue(valtype, splitup)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# overwriting an existing method in order to prevent exceptions when
|
|
41
|
+
# tags annonymized with data of other type (e.g. float tag gets string)
|
|
42
|
+
pydicom.valuerep.MultiString = PatchedMultiString
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@unique
|
|
46
|
+
class DicomAttribute(Enum):
|
|
47
|
+
@property
|
|
48
|
+
def tag(self):
|
|
49
|
+
"""
|
|
50
|
+
The DICOM tag, represented as (group number, element number)
|
|
51
|
+
"""
|
|
52
|
+
return self.value
|
|
53
|
+
|
|
54
|
+
PatientName = (0x0010, 0x0010)
|
|
55
|
+
PatientID = (0x0010, 0x0020)
|
|
56
|
+
IssuerOfPatientID = (0x0010, 0x0021)
|
|
57
|
+
PatientBirthTime = (0x0010, 0x0032)
|
|
58
|
+
PatientSex = (0x0010, 0x0040)
|
|
59
|
+
PatientBirthName = (0x0010, 0x1005)
|
|
60
|
+
CountryOfResidence = (0x0010, 0x2150)
|
|
61
|
+
RegionOfResidence = (0x0010, 0x2152)
|
|
62
|
+
PatientTelephoneNumbers = (0x0010, 0x2154)
|
|
63
|
+
CurrentPatientLocation = (0x0038, 0x0300)
|
|
64
|
+
PatientInstitutionResidence = (0x0038, 0x0400)
|
|
65
|
+
StudyDate = (0x0008, 0x0020)
|
|
66
|
+
SeriesDate = (0x0008, 0x0021)
|
|
67
|
+
AcquisitionDate = (0x0008, 0x0022)
|
|
68
|
+
ContentDate = (0x0008, 0x0023)
|
|
69
|
+
OverlayDate = (0x0008, 0x0024)
|
|
70
|
+
CurveDate = (0x0008, 0x0025)
|
|
71
|
+
AcquisitionDateTime = (0x0008, 0x002A)
|
|
72
|
+
StudyTime = (0x0008, 0x0030)
|
|
73
|
+
SeriesTime = (0x0008, 0x0031)
|
|
74
|
+
AcquisitionTime = (0x0008, 0x0032)
|
|
75
|
+
ContentTime = (0x0008, 0x0033)
|
|
76
|
+
OverlayTime = (0x0008, 0x0034)
|
|
77
|
+
CurveTime = (0x0008, 0x0035)
|
|
78
|
+
InstitutionAddress = (0x0008, 0x0081)
|
|
79
|
+
ReferringPhysicianName = (0x0008, 0x0090)
|
|
80
|
+
ReferringPhysicianAddress = (0x0008, 0x0092)
|
|
81
|
+
ReferringPhysicianTelephoneNumber = (0x0008, 0x0094)
|
|
82
|
+
InstitutionalDepartmentName = (0x0008, 0x1040)
|
|
83
|
+
OperatorsName = (0x0008, 0x1070)
|
|
84
|
+
StudyID = (0x0020, 0x0010)
|
|
85
|
+
DateTime = (0x0040, 0xA120)
|
|
86
|
+
Date = (0x0040, 0xA121)
|
|
87
|
+
Time = (0x0040, 0xA122)
|
|
88
|
+
PersonName = (0x0040, 0xA123)
|
|
89
|
+
AccessionNumber = (0x0008, 0x0050)
|
|
90
|
+
InstitutionName = (0x0008, 0x0080)
|
|
91
|
+
ReferringPhysicianIDSequence = (0x0008, 0x0096)
|
|
92
|
+
PhysiciansOfRecord = (0x0008, 0x1048)
|
|
93
|
+
PhysiciansOfRecordIDSequence = (0x0008, 0x1049)
|
|
94
|
+
PerformingPhysicianName = (0x0008, 0x1050)
|
|
95
|
+
PerformingPhysicianIDSequence = (0x0008, 0x1052)
|
|
96
|
+
NameOfPhysicianReadingStudy = (0x0008, 0x1060)
|
|
97
|
+
PhysicianReadingStudyIDSequence = (0x0008, 0x1062)
|
|
98
|
+
PatientBirthDate = (0x0010, 0x0030)
|
|
99
|
+
PatientInsurancePlanCodeSequence = (0x0010, 0x0050)
|
|
100
|
+
PatientPrimaryLanguageCodeSeq = (0x0010, 0x0101)
|
|
101
|
+
OtherPatientIDs = (0x0010, 0x1000)
|
|
102
|
+
OtherPatientNames = (0x0010, 0x1001)
|
|
103
|
+
OtherPatientIDsSequence = (0x0010, 0x1002)
|
|
104
|
+
PatientAge = (0x0010, 0x1010)
|
|
105
|
+
PatientAddress = (0x0010, 0x1040)
|
|
106
|
+
PatientMotherBirthName = (0x0010, 0x1060)
|
|
107
|
+
|
|
108
|
+
# File meta information used in _updateMetaInfo():
|
|
109
|
+
ImplementationClassUID = (0x0002, 0x0012)
|
|
110
|
+
MediaStorageSOPClassUID = (0x0002, 0x0002)
|
|
111
|
+
MediaStorageSOPInstanceUID = (0x0002, 0x0003)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# Basic Application Level Confidentiality Profile Attributes
|
|
115
|
+
# ftp://medical.nema.org/medical/dicom/final/sup55_ft.pdf
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@unique
|
|
119
|
+
class ActionCode(Enum):
|
|
120
|
+
"""
|
|
121
|
+
See https://qmenta.atlassian.net/wiki/spaces/QTG/pages/1166409832
|
|
122
|
+
/DICOM+de-identification for the list and references
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
X = "Remove tag"
|
|
126
|
+
Z = (
|
|
127
|
+
"Replace with a zero length value, or a non-zero length value that "
|
|
128
|
+
"may be a dummy value and consistent with the Value Representations"
|
|
129
|
+
)
|
|
130
|
+
D = (
|
|
131
|
+
"Replace with a non-zero length value that may be a dummy value and "
|
|
132
|
+
"consistent with the Value Representations"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
DICOM_ANON_MIN_SUPP_55 = {
|
|
137
|
+
DicomAttribute.PatientName: ActionCode.Z,
|
|
138
|
+
DicomAttribute.PatientID: ActionCode.Z,
|
|
139
|
+
DicomAttribute.IssuerOfPatientID: ActionCode.X,
|
|
140
|
+
DicomAttribute.PatientBirthTime: ActionCode.X,
|
|
141
|
+
DicomAttribute.PatientSex: ActionCode.Z,
|
|
142
|
+
DicomAttribute.PatientBirthName: ActionCode.X,
|
|
143
|
+
DicomAttribute.CountryOfResidence: ActionCode.X,
|
|
144
|
+
DicomAttribute.RegionOfResidence: ActionCode.X,
|
|
145
|
+
DicomAttribute.PatientTelephoneNumbers: ActionCode.X,
|
|
146
|
+
DicomAttribute.CurrentPatientLocation: ActionCode.X,
|
|
147
|
+
DicomAttribute.PatientInstitutionResidence: ActionCode.X,
|
|
148
|
+
DicomAttribute.StudyDate: ActionCode.Z,
|
|
149
|
+
DicomAttribute.SeriesDate: ActionCode.X,
|
|
150
|
+
DicomAttribute.AcquisitionDate: ActionCode.X,
|
|
151
|
+
DicomAttribute.ContentDate: ActionCode.Z,
|
|
152
|
+
DicomAttribute.OverlayDate: ActionCode.X,
|
|
153
|
+
DicomAttribute.CurveDate: ActionCode.X,
|
|
154
|
+
DicomAttribute.AcquisitionDateTime: ActionCode.X,
|
|
155
|
+
DicomAttribute.StudyTime: ActionCode.Z,
|
|
156
|
+
DicomAttribute.SeriesTime: ActionCode.X,
|
|
157
|
+
DicomAttribute.AcquisitionTime: ActionCode.X,
|
|
158
|
+
DicomAttribute.ContentTime: ActionCode.Z,
|
|
159
|
+
DicomAttribute.OverlayTime: ActionCode.X,
|
|
160
|
+
DicomAttribute.CurveTime: ActionCode.X,
|
|
161
|
+
DicomAttribute.InstitutionAddress: ActionCode.X,
|
|
162
|
+
DicomAttribute.ReferringPhysicianName: ActionCode.Z,
|
|
163
|
+
DicomAttribute.ReferringPhysicianAddress: ActionCode.X,
|
|
164
|
+
DicomAttribute.ReferringPhysicianTelephoneNumber: ActionCode.X,
|
|
165
|
+
DicomAttribute.InstitutionalDepartmentName: ActionCode.X,
|
|
166
|
+
DicomAttribute.OperatorsName: ActionCode.X,
|
|
167
|
+
DicomAttribute.StudyID: ActionCode.Z,
|
|
168
|
+
DicomAttribute.DateTime: ActionCode.X,
|
|
169
|
+
DicomAttribute.Date: ActionCode.X,
|
|
170
|
+
DicomAttribute.Time: ActionCode.X,
|
|
171
|
+
DicomAttribute.PersonName: ActionCode.D,
|
|
172
|
+
DicomAttribute.AccessionNumber: ActionCode.Z,
|
|
173
|
+
DicomAttribute.InstitutionName: ActionCode.X,
|
|
174
|
+
DicomAttribute.ReferringPhysicianIDSequence: ActionCode.X,
|
|
175
|
+
DicomAttribute.PhysiciansOfRecord: ActionCode.X,
|
|
176
|
+
DicomAttribute.PhysiciansOfRecordIDSequence: ActionCode.X,
|
|
177
|
+
DicomAttribute.PerformingPhysicianName: ActionCode.X,
|
|
178
|
+
DicomAttribute.PerformingPhysicianIDSequence: ActionCode.X,
|
|
179
|
+
DicomAttribute.NameOfPhysicianReadingStudy: ActionCode.X,
|
|
180
|
+
DicomAttribute.PhysicianReadingStudyIDSequence: ActionCode.X,
|
|
181
|
+
DicomAttribute.PatientBirthDate: ActionCode.Z,
|
|
182
|
+
DicomAttribute.PatientInsurancePlanCodeSequence: ActionCode.X,
|
|
183
|
+
DicomAttribute.PatientPrimaryLanguageCodeSeq: ActionCode.X,
|
|
184
|
+
DicomAttribute.OtherPatientIDs: ActionCode.X,
|
|
185
|
+
DicomAttribute.OtherPatientNames: ActionCode.X,
|
|
186
|
+
DicomAttribute.OtherPatientIDsSequence: ActionCode.X,
|
|
187
|
+
DicomAttribute.PatientAge: ActionCode.X,
|
|
188
|
+
DicomAttribute.PatientAddress: ActionCode.X,
|
|
189
|
+
DicomAttribute.PatientMotherBirthName: ActionCode.X,
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def redact_dicom_attr(header, tag):
|
|
194
|
+
value = header[tag].value
|
|
195
|
+
if isinstance(value, str):
|
|
196
|
+
header[tag].value = "XXXX"
|
|
197
|
+
elif isinstance(value, pydicom.valuerep.PersonName):
|
|
198
|
+
header[tag].value = "XXXX"
|
|
199
|
+
elif isinstance(value, pydicom.valuerep.DSfloat):
|
|
200
|
+
header[tag].value = 0.0
|
|
201
|
+
elif isinstance(value, pydicom.uid.UID):
|
|
202
|
+
header[tag].value = "XXXX"
|
|
203
|
+
elif isinstance(value, pydicom.multival.MultiValue):
|
|
204
|
+
header[tag].value = []
|
|
205
|
+
else:
|
|
206
|
+
raise RuntimeError(
|
|
207
|
+
"Unknown type {} for tag {}".format(type(value), tag)
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def check_tag(header, tag):
|
|
212
|
+
"""
|
|
213
|
+
Parameters
|
|
214
|
+
----------
|
|
215
|
+
header:
|
|
216
|
+
The DICOM header to check
|
|
217
|
+
|
|
218
|
+
tag:
|
|
219
|
+
DICOM tag ID to check
|
|
220
|
+
"""
|
|
221
|
+
try:
|
|
222
|
+
_ = header[tag].value
|
|
223
|
+
return True
|
|
224
|
+
except (NotImplementedError, Exception):
|
|
225
|
+
return False
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def check_meta_tag(header, tag):
|
|
229
|
+
"""
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
header:
|
|
233
|
+
The DICOM header to check
|
|
234
|
+
|
|
235
|
+
tag:
|
|
236
|
+
DICOM meta tag ID to check
|
|
237
|
+
"""
|
|
238
|
+
try:
|
|
239
|
+
_ = header.file_meta[tag].value
|
|
240
|
+
return True
|
|
241
|
+
except (NotImplementedError, Exception):
|
|
242
|
+
return False
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def anonymise_header_attribute(header, attribute, action):
|
|
246
|
+
"""
|
|
247
|
+
Redact or delete the attribute from the header as specified
|
|
248
|
+
by the action.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
header:
|
|
253
|
+
The DICOM header to update
|
|
254
|
+
|
|
255
|
+
attribute: DicomAttribute
|
|
256
|
+
The DICOM attribute to update
|
|
257
|
+
|
|
258
|
+
action: ActionCode
|
|
259
|
+
The type of anonymisation to apply
|
|
260
|
+
|
|
261
|
+
Raises
|
|
262
|
+
------
|
|
263
|
+
NotImplementedError
|
|
264
|
+
If the action is not in [Z, D, X]
|
|
265
|
+
"""
|
|
266
|
+
if action not in [ActionCode.Z, ActionCode.D, ActionCode.X]:
|
|
267
|
+
raise NotImplementedError("Only actions Z, D and X are supported.")
|
|
268
|
+
|
|
269
|
+
if check_tag(header, attribute.tag):
|
|
270
|
+
if action in [ActionCode.Z, ActionCode.D]:
|
|
271
|
+
redact_dicom_attr(header, attribute.tag)
|
|
272
|
+
elif action is ActionCode.X:
|
|
273
|
+
try:
|
|
274
|
+
del header[attribute.tag]
|
|
275
|
+
except Exception as e:
|
|
276
|
+
logger = logging.getLogger(__name__)
|
|
277
|
+
logger.error(str(e))
|
|
278
|
+
delattr(header, attribute.name)
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def anonymise_dicom_dataset(dcm, actions=None):
|
|
282
|
+
"""
|
|
283
|
+
Anonyise the given DICOM header using the specified profile.
|
|
284
|
+
|
|
285
|
+
Parameters
|
|
286
|
+
----------
|
|
287
|
+
dcm:
|
|
288
|
+
The DICOM header to anonymise
|
|
289
|
+
|
|
290
|
+
actions:
|
|
291
|
+
The confidentiality profile to use when redacting the header.
|
|
292
|
+
Default: DICOM_ANON_MIN_SUPP_55
|
|
293
|
+
"""
|
|
294
|
+
actions = actions or DICOM_ANON_MIN_SUPP_55
|
|
295
|
+
logger = logging.getLogger(__name__)
|
|
296
|
+
node_queue = [dcm]
|
|
297
|
+
while node_queue:
|
|
298
|
+
header = node_queue.pop(0)
|
|
299
|
+
|
|
300
|
+
# anonymisation
|
|
301
|
+
for attribute in actions:
|
|
302
|
+
logger.debug(
|
|
303
|
+
f"Anonymizing {attribute} with action {actions[attribute]}."
|
|
304
|
+
)
|
|
305
|
+
anonymise_header_attribute(header, attribute, actions[attribute])
|
|
306
|
+
|
|
307
|
+
# tail recursion
|
|
308
|
+
tags_to_delete = []
|
|
309
|
+
for tag in header.keys():
|
|
310
|
+
if check_tag(header, tag):
|
|
311
|
+
elem = header[tag]
|
|
312
|
+
if isinstance(elem.value, pydicom.sequence.Sequence):
|
|
313
|
+
node_queue.extend(elem.value)
|
|
314
|
+
else:
|
|
315
|
+
template = (
|
|
316
|
+
"deleting key {!r} with invalid data from header "
|
|
317
|
+
"when anonymising dicom file"
|
|
318
|
+
)
|
|
319
|
+
logger.warning(template.format(tag))
|
|
320
|
+
tags_to_delete.append(tag)
|
|
321
|
+
|
|
322
|
+
for tag in tags_to_delete:
|
|
323
|
+
try:
|
|
324
|
+
del header[tag]
|
|
325
|
+
except Exception as e:
|
|
326
|
+
logger = logging.getLogger(__name__)
|
|
327
|
+
logger.error(str(e))
|
|
328
|
+
logger.error("Using delattr to delete the tag")
|
|
329
|
+
delattr(header, tag)
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def anonymise(filename, actions=None):
|
|
333
|
+
header = pydicom.read_file(filename)
|
|
334
|
+
|
|
335
|
+
actions = actions or DICOM_ANON_MIN_SUPP_55
|
|
336
|
+
anonymise_dicom_dataset(header, actions)
|
|
337
|
+
_updateMetaInfo(header)
|
|
338
|
+
|
|
339
|
+
header.save_as(filename)
|
|
340
|
+
return {"OK": True, "error_tags": []}
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def _updateMetaInfo(header):
|
|
344
|
+
"""
|
|
345
|
+
Set DICOM meta information if needed.
|
|
346
|
+
"""
|
|
347
|
+
if not check_meta_tag(header, DicomAttribute.ImplementationClassUID.tag):
|
|
348
|
+
header.file_meta.ImplementationClassUID = "1.2.3.4"
|
|
349
|
+
if not check_meta_tag(header, DicomAttribute.MediaStorageSOPClassUID.tag):
|
|
350
|
+
header.file_meta.MediaStorageSOPClassUID = "1.2.840.10008.5.1.4.1.1.2"
|
|
351
|
+
if not check_meta_tag(header,
|
|
352
|
+
DicomAttribute.MediaStorageSOPInstanceUID.tag):
|
|
353
|
+
header.file_meta.MediaStorageSOPInstanceUID = "1.2.3"
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def check_anonymised_file(input_file, options={}):
|
|
357
|
+
_options = {"return_lines": False, "not_found_as_error": False}
|
|
358
|
+
|
|
359
|
+
_options.update(options)
|
|
360
|
+
|
|
361
|
+
lines = []
|
|
362
|
+
not_anonymised_attr = []
|
|
363
|
+
|
|
364
|
+
try:
|
|
365
|
+
hd = pydicom.read_file(input_file)
|
|
366
|
+
n_errors = 0
|
|
367
|
+
for attribute in DICOM_ANON_MIN_SUPP_55:
|
|
368
|
+
if check_tag(hd, attribute.tag):
|
|
369
|
+
val = hd[attribute.tag].value
|
|
370
|
+
|
|
371
|
+
if _options["return_lines"]:
|
|
372
|
+
lines.append((attribute.name, str(val)))
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
if not check_anonym_dicom_attr(hd, attribute.tag):
|
|
376
|
+
not_anonymised_attr.append(attribute.name)
|
|
377
|
+
except Exception as e:
|
|
378
|
+
logger = logging.getLogger(__name__)
|
|
379
|
+
logger.error(str(e))
|
|
380
|
+
pass
|
|
381
|
+
|
|
382
|
+
else:
|
|
383
|
+
if _options["return_lines"]:
|
|
384
|
+
lines.append((attribute.name, "!!!"))
|
|
385
|
+
if _options["not_found_as_error"]:
|
|
386
|
+
n_errors += 1
|
|
387
|
+
except Exception as e:
|
|
388
|
+
return {"OK": False, "error": str(e)}
|
|
389
|
+
|
|
390
|
+
ret = {
|
|
391
|
+
"OK": True,
|
|
392
|
+
"n_errors": n_errors,
|
|
393
|
+
"not_anonymised_attr": not_anonymised_attr
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if _options["return_lines"]:
|
|
397
|
+
ret["lines"] = lines
|
|
398
|
+
|
|
399
|
+
return ret
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def check_anonym_dicom_attr(header, tag):
|
|
403
|
+
value = header[tag].value
|
|
404
|
+
if isinstance(value, str):
|
|
405
|
+
return str(value) == "XXXX"
|
|
406
|
+
elif isinstance(value, pydicom.valuerep.PersonName):
|
|
407
|
+
return value == "XXXX"
|
|
408
|
+
elif isinstance(value, pydicom.valuerep.DSfloat):
|
|
409
|
+
return value == 0.0
|
|
410
|
+
elif isinstance(value, pydicom.uid.UID):
|
|
411
|
+
return value == "XXXX"
|
|
412
|
+
|
|
413
|
+
return True
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
class RelativeTimeAnonymiser:
|
|
417
|
+
"""
|
|
418
|
+
Anonymise multiple DICOM files, while keeping the relative time/date
|
|
419
|
+
differences for (AcquisitionDate, AcquisitionTime) and
|
|
420
|
+
(ContentDate, ContentTime) tuples intact between all the files that
|
|
421
|
+
are anonymised by the same instance of RelativeTimeAnonymiser.
|
|
422
|
+
|
|
423
|
+
The confidentiality profile is not configurable. It will always use
|
|
424
|
+
DICOM_ANON_MIN_SUPP_55, with the exception that for the tags
|
|
425
|
+
[AcquisitionDate, AcquisitionTime, ContentDate, ContentTime], the original
|
|
426
|
+
action as specified in DICOM_ANON_MIN_SUPP_55 will only be applied if
|
|
427
|
+
anonymisation that keeps the original relative times preserved fails.
|
|
428
|
+
"""
|
|
429
|
+
|
|
430
|
+
def __init__(self):
|
|
431
|
+
self._time_anonymise = TimeAnonymise()
|
|
432
|
+
self._original_actions = DICOM_ANON_MIN_SUPP_55
|
|
433
|
+
|
|
434
|
+
# Note: We currently do not support a single AquisitionDateTime or
|
|
435
|
+
# ContentDateTime tag. Two tags must be used to store date and time.
|
|
436
|
+
self._datetime_attributes = [
|
|
437
|
+
(DicomAttribute.AcquisitionDate, DicomAttribute.AcquisitionTime),
|
|
438
|
+
(DicomAttribute.ContentDate, DicomAttribute.ContentTime),
|
|
439
|
+
]
|
|
440
|
+
|
|
441
|
+
# The confidentiality profile that is used as a fallback when no
|
|
442
|
+
# pair of date, time can be found. In the case where only one of
|
|
443
|
+
# them exists, it will be anonymised as specified in the original
|
|
444
|
+
# confidentiality profile.
|
|
445
|
+
self._restricted_actions = dict(self._original_actions)
|
|
446
|
+
attributes_to_remove = [
|
|
447
|
+
# Flatten the _datetime_attributes list.
|
|
448
|
+
atr
|
|
449
|
+
for atr_pair in self._datetime_attributes
|
|
450
|
+
for atr in atr_pair
|
|
451
|
+
]
|
|
452
|
+
for atr in attributes_to_remove:
|
|
453
|
+
self._restricted_actions.pop(atr, None)
|
|
454
|
+
|
|
455
|
+
@staticmethod
|
|
456
|
+
def _time_to_TM(time):
|
|
457
|
+
"""
|
|
458
|
+
Convert the time component of a Python datetime object into
|
|
459
|
+
DICOM time (TM) value representation.
|
|
460
|
+
"""
|
|
461
|
+
return "{:02}{:02}{:02}.{:06}".format(
|
|
462
|
+
time.hour, time.minute, time.second, time.microsecond
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
@staticmethod
|
|
466
|
+
def _date_to_DA(date):
|
|
467
|
+
"""
|
|
468
|
+
Convert the date component of a Python datetime object into
|
|
469
|
+
DCIOM date (DA) value representation.
|
|
470
|
+
"""
|
|
471
|
+
return "{:04}{:02}{:02}".format(date.year, date.month, date.day)
|
|
472
|
+
|
|
473
|
+
def anonymise_datetime(self, header):
|
|
474
|
+
"""
|
|
475
|
+
Anonymise the datetime values for AcquisitionDate, AcquisitionTime,
|
|
476
|
+
ContentDate and ContentTime tags, while keeping the relative time
|
|
477
|
+
differences of different dates/times for different calls of this
|
|
478
|
+
function intact.
|
|
479
|
+
|
|
480
|
+
Parameters
|
|
481
|
+
----------
|
|
482
|
+
header:
|
|
483
|
+
The DICOM header containing the datetime tags to anonymise
|
|
484
|
+
|
|
485
|
+
Raises
|
|
486
|
+
------
|
|
487
|
+
time_utils.TooLargeDeltaError
|
|
488
|
+
when trying to anonymise multiple DICOM headers of which the
|
|
489
|
+
datetimes to anonymise span more than 24h.
|
|
490
|
+
"""
|
|
491
|
+
# Date and time must be stored in two separate tags
|
|
492
|
+
for date_atr, time_atr in self._datetime_attributes:
|
|
493
|
+
date_ok = check_tag(header, date_atr.tag)
|
|
494
|
+
time_ok = check_tag(header, time_atr.tag)
|
|
495
|
+
if not (date_ok and time_ok):
|
|
496
|
+
# A full datetime cannot be reconstructed. If one of the tags
|
|
497
|
+
# exists, anonymise it as specified in the original
|
|
498
|
+
# confidentiality profile to ensure proper anonymisation.
|
|
499
|
+
if time_ok:
|
|
500
|
+
action = self._original_actions[time_atr]
|
|
501
|
+
anonymise_header_attribute(header, time_atr, action)
|
|
502
|
+
elif date_ok:
|
|
503
|
+
action = self._original_actions[date_atr]
|
|
504
|
+
anonymise_header_attribute(header, date_atr, action)
|
|
505
|
+
continue # Go to the next (date_atr, time_atr) pair
|
|
506
|
+
|
|
507
|
+
date_element = header[date_atr.tag]
|
|
508
|
+
time_element = header[time_atr.tag]
|
|
509
|
+
|
|
510
|
+
assert date_element.VR == "DA"
|
|
511
|
+
assert time_element.VR == "TM"
|
|
512
|
+
|
|
513
|
+
# If date_element not a date string, then set to null to be
|
|
514
|
+
# anonymized
|
|
515
|
+
try:
|
|
516
|
+
in_date = pydicom.valuerep.DA(date_element.value)
|
|
517
|
+
except ValueError:
|
|
518
|
+
in_date = None
|
|
519
|
+
|
|
520
|
+
# If time_element not a date string, then set to null to be
|
|
521
|
+
# anonymized
|
|
522
|
+
try:
|
|
523
|
+
in_time = pydicom.valuerep.TM(time_element.value)
|
|
524
|
+
except ValueError:
|
|
525
|
+
in_time = None
|
|
526
|
+
|
|
527
|
+
if not (in_date and in_time):
|
|
528
|
+
# One of the input values was an empty string
|
|
529
|
+
anonymise_header_attribute(
|
|
530
|
+
header, time_atr, self._original_actions[time_atr])
|
|
531
|
+
anonymise_header_attribute(
|
|
532
|
+
header, date_atr, self._original_actions[date_atr])
|
|
533
|
+
continue # Go to the next (date_atr, time_atr) pair
|
|
534
|
+
|
|
535
|
+
in_datetime = datetime.combine(in_date, in_time)
|
|
536
|
+
|
|
537
|
+
# Compute the target datetime
|
|
538
|
+
out_datetime = self._time_anonymise.anonymise_datetime(in_datetime)
|
|
539
|
+
|
|
540
|
+
header[date_atr.tag].value = self._date_to_DA(out_datetime)
|
|
541
|
+
header[time_atr.tag].value = self._time_to_TM(out_datetime)
|
|
542
|
+
|
|
543
|
+
def anonymise(self, filename, actions=None):
|
|
544
|
+
"""
|
|
545
|
+
Anonymise the DICOM dataset using the
|
|
546
|
+
restricted_actions profile, and replace
|
|
547
|
+
the date/time elements with an anonymised date/time that keeps
|
|
548
|
+
the relative date and time of different DICOM datasets intact.
|
|
549
|
+
|
|
550
|
+
Parameters
|
|
551
|
+
----------
|
|
552
|
+
filename: str
|
|
553
|
+
The file to anonymise
|
|
554
|
+
actions: dict, optional
|
|
555
|
+
|
|
556
|
+
Raises
|
|
557
|
+
------
|
|
558
|
+
time_utils.TooLargeDeltaError
|
|
559
|
+
when trying to anonymise multiple DICOM headers of which the
|
|
560
|
+
datetimes to anonymise span more than 24h.
|
|
561
|
+
"""
|
|
562
|
+
header = pydicom.read_file(filename)
|
|
563
|
+
self.anonymise_datetime(header)
|
|
564
|
+
|
|
565
|
+
actions = actions or DICOM_ANON_MIN_SUPP_55
|
|
566
|
+
restricted_actions = actions.copy()
|
|
567
|
+
restricted_actions.pop(DicomAttribute.AcquisitionDate)
|
|
568
|
+
restricted_actions.pop(DicomAttribute.ContentDate)
|
|
569
|
+
restricted_actions.pop(DicomAttribute.AcquisitionTime)
|
|
570
|
+
restricted_actions.pop(DicomAttribute.ContentTime)
|
|
571
|
+
|
|
572
|
+
anonymise_dicom_dataset(header, restricted_actions)
|
|
573
|
+
_updateMetaInfo(header)
|
|
574
|
+
|
|
575
|
+
header.save_as(filename)
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from tempfile import TemporaryDirectory
|
|
4
|
+
|
|
5
|
+
import nibabel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def anonymise(filename):
|
|
9
|
+
try:
|
|
10
|
+
nifti_file = nibabel.load(filename)
|
|
11
|
+
|
|
12
|
+
hdr = nifti_file.header
|
|
13
|
+
if "db_name" in hdr:
|
|
14
|
+
hdr["db_name"] = "XXXX"
|
|
15
|
+
|
|
16
|
+
with TemporaryDirectory() as tmp_dir:
|
|
17
|
+
new_image_path = os.path.join(tmp_dir, os.path.basename(filename))
|
|
18
|
+
new_image = nibabel.Nifti1Image(
|
|
19
|
+
nifti_file.get_fdata(), nifti_file.affine, hdr
|
|
20
|
+
)
|
|
21
|
+
nibabel.save(new_image, new_image_path)
|
|
22
|
+
shutil.copyfile(new_image_path, filename)
|
|
23
|
+
|
|
24
|
+
return {"OK": True}
|
|
25
|
+
except Exception as e:
|
|
26
|
+
return {
|
|
27
|
+
"OK": False,
|
|
28
|
+
"error": "Some NIFTI files are not accepted or are corrupted. "
|
|
29
|
+
f"\n{str(e)}"
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def check_anonymised_file(filename):
|
|
34
|
+
try:
|
|
35
|
+
nifti_file = nibabel.load(filename)
|
|
36
|
+
hdr = nifti_file.header
|
|
37
|
+
if "db_name" in hdr and (
|
|
38
|
+
str(hdr["db_name"].astype(str)) not in ["XXXX", ""]
|
|
39
|
+
):
|
|
40
|
+
return {"OK": False}
|
|
41
|
+
else:
|
|
42
|
+
return {"OK": True}
|
|
43
|
+
except Exception as e:
|
|
44
|
+
return {
|
|
45
|
+
"OK": False,
|
|
46
|
+
"error": "Some NIFTI files are not accepted or are corrupted. "
|
|
47
|
+
f"\n{str(e)}"
|
|
48
|
+
}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from calendar import monthrange
|
|
2
|
+
from datetime import datetime, timedelta
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TimeUtilsError(Exception):
|
|
6
|
+
"""
|
|
7
|
+
Base class for time utils errors
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
pass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class TooLargeDeltaError(TimeUtilsError):
|
|
14
|
+
"""
|
|
15
|
+
Too large difference between two times.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, t1, t2, max_delta):
|
|
19
|
+
message = (
|
|
20
|
+
"Too large time difference between {} and {}. "
|
|
21
|
+
"Maximum allowed delta: {}."
|
|
22
|
+
).format(t1, t2, max_delta)
|
|
23
|
+
super().__init__(self, message)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def monthdelta(d1, d2):
|
|
27
|
+
"""
|
|
28
|
+
Calculates the difference in months between two timepoints
|
|
29
|
+
"""
|
|
30
|
+
delta = 0
|
|
31
|
+
while True:
|
|
32
|
+
mdays = monthrange(d1.year, d1.month)[1]
|
|
33
|
+
d1 += timedelta(days=mdays)
|
|
34
|
+
if d1 <= d2:
|
|
35
|
+
delta += 1
|
|
36
|
+
else:
|
|
37
|
+
break
|
|
38
|
+
return delta
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def calculate_passed_days(timestamp1, timestamp2):
|
|
42
|
+
"""
|
|
43
|
+
Calculates the number of days between two timestamps.
|
|
44
|
+
"""
|
|
45
|
+
if timestamp2 is None:
|
|
46
|
+
return 0
|
|
47
|
+
|
|
48
|
+
diff = timestamp1 - timestamp2
|
|
49
|
+
min_sec = divmod(diff.days * 86400 + diff.seconds, 60)
|
|
50
|
+
return min_sec[0] / (24 * 60)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TimeAnonymise:
|
|
54
|
+
"""
|
|
55
|
+
Anonymise time while keeping the relative time differences between
|
|
56
|
+
different image files in a single session. Each TimeAnonymise object
|
|
57
|
+
should be used to anonymise exactly one session.
|
|
58
|
+
|
|
59
|
+
Attributes
|
|
60
|
+
----------
|
|
61
|
+
target_base : datetime
|
|
62
|
+
The target date that is the base for the returned anonymised
|
|
63
|
+
datetimes
|
|
64
|
+
source_base : datetime
|
|
65
|
+
The source date that will be used to compute the time difference with
|
|
66
|
+
new input dates. This will be the input date of the first
|
|
67
|
+
anonymise_datetime() function call
|
|
68
|
+
max_delta : timedelta
|
|
69
|
+
The maximum difference between two datetimes that are acceptable
|
|
70
|
+
to be anonymised by a single TimeAnonymise instance. We use this
|
|
71
|
+
to avoid mistakes where multiple sessions are being anonymised
|
|
72
|
+
with a single TimeAnonymise instance.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self):
|
|
76
|
+
# Noon on January 1, 1900.
|
|
77
|
+
self.target_base = datetime(1900, 1, 1, 12)
|
|
78
|
+
# Will be set by the first input date
|
|
79
|
+
self.source_base = None
|
|
80
|
+
self.max_delta = timedelta(hours=24)
|
|
81
|
+
|
|
82
|
+
# The minimum and maximum input times
|
|
83
|
+
self._source_min = None
|
|
84
|
+
self._source_max = None
|
|
85
|
+
|
|
86
|
+
def anonymise_datetime(self, source):
|
|
87
|
+
"""
|
|
88
|
+
Anonymise the input datetime by changing the date to be in the range
|
|
89
|
+
(targetBase - 24h, targetBase + 24h) without changing the relative
|
|
90
|
+
time difference between two any two times that were anonymised by this
|
|
91
|
+
TimeAnonymise object.
|
|
92
|
+
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
source : datetime
|
|
96
|
+
The datetime to anonymise
|
|
97
|
+
|
|
98
|
+
Raises
|
|
99
|
+
------
|
|
100
|
+
TooLargeDeltaError
|
|
101
|
+
When the time difference between source and any of the previously
|
|
102
|
+
anonymised datetime objects is more than max_delta.
|
|
103
|
+
|
|
104
|
+
Returns
|
|
105
|
+
-------
|
|
106
|
+
datetime
|
|
107
|
+
The anonymised datetime object
|
|
108
|
+
"""
|
|
109
|
+
if self.source_base is None:
|
|
110
|
+
# This is the first input to anonymise
|
|
111
|
+
self.source_base = source
|
|
112
|
+
self._source_min = source
|
|
113
|
+
self._source_max = source
|
|
114
|
+
return self.target_base
|
|
115
|
+
|
|
116
|
+
if source < self._source_min:
|
|
117
|
+
self._source_min = source
|
|
118
|
+
elif source > self._source_max:
|
|
119
|
+
self._source_max = source
|
|
120
|
+
|
|
121
|
+
if (self._source_max - self._source_min) > self.max_delta:
|
|
122
|
+
raise TooLargeDeltaError(source, self.source_base, self.max_delta)
|
|
123
|
+
|
|
124
|
+
delta = source - self.source_base
|
|
125
|
+
target = self.target_base + delta
|
|
126
|
+
|
|
127
|
+
return target
|