qmenta-anon 2.1.dev374__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.3
2
+ Name: qmenta-anon
3
+ Version: 2.1.dev374
4
+ Summary: The qmenta-anon library is used to anonymize files before transferring them to the QMENTA platform.
5
+ Author: QMENTA
6
+ Author-email: dev@qmenta.com
7
+ Requires-Python: >=3.10,<4.0
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: Other/Proprietary License
11
+ Classifier: Programming Language :: Python :: 3
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
17
+ Requires-Dist: nibabel (>=5.3.2,<6.0.0)
18
+ Requires-Dist: pydicom (>=2.3.0,<3.0.0)
19
+ Requires-Dist: setuptools (>=80.3.1,<81.0.0)
@@ -0,0 +1,38 @@
1
+ [tool.poetry]
2
+ name = "qmenta-anon"
3
+ version = "2.1.dev374"
4
+ description = "The qmenta-anon library is used to anonymize files before transferring them to the QMENTA platform."
5
+ authors = ["QMENTA <dev@qmenta.com>"]
6
+ classifiers = [
7
+ "Development Status :: 4 - Beta",
8
+ "Intended Audience :: Developers",
9
+ "License :: Other/Proprietary License",
10
+ "Topic :: Scientific/Engineering :: Medical Science Apps."
11
+ ]
12
+ packages = [
13
+ { include = "qmenta", from = "src" }
14
+ ]
15
+
16
+ [tool.poetry.dependencies]
17
+ python = "^3.10"
18
+ pydicom = "^2.3.0"
19
+ nibabel = "^5.3.2"
20
+ setuptools = "^80.3.1"
21
+
22
+ [tool.poetry.group.dev.dependencies]
23
+ flake8 = "^6.1.0"
24
+ pytest = "^7.4.0"
25
+ coverage = "^7.2.7"
26
+ scriv = "^1.3.1"
27
+
28
+ [build-system]
29
+ requires = ["poetry-core>=1.0.0"]
30
+ build-backend = "poetry.core.masonry.api"
31
+
32
+ [tool.coverage.report]
33
+ fail_under = 80
34
+
35
+ [tool.scriv]
36
+ format = "md"
37
+ md_header_level = "2"
38
+ insert_marker = "scriv-insert-here"
@@ -0,0 +1 @@
1
+ __path__ = __import__('pkgutil').extend_path(__path__, __name__)
@@ -0,0 +1 @@
1
+ __path__ = __import__('pkgutil').extend_path(__path__, __name__)
@@ -0,0 +1,75 @@
1
+ import pydicom
2
+ from enum import Enum
3
+ from pydicom.errors import InvalidDicomError
4
+
5
+ from qmenta.anon.dicom import anonymise as anonymise_dicom
6
+ from qmenta.anon.nifti import anonymise as anonymise_nifti
7
+
8
+
9
+ class Format(Enum):
10
+ """
11
+ Enum with the following options:
12
+ UNKNOWN, DICOM, NIFTI
13
+ """
14
+ UNKNOWN = 0
15
+ DICOM = 1
16
+ NIFTI = 2
17
+
18
+
19
+ def recognise_file_format(file):
20
+ """
21
+ Detects whether a file is DICOM or NifTI.
22
+
23
+ Parameters
24
+ ----------
25
+ file : str
26
+ The path of the file.
27
+
28
+ Returns
29
+ -------
30
+ str
31
+ Either 'dicom' or 'nifti'.
32
+ """
33
+
34
+ try:
35
+ if pydicom.read_file(file):
36
+ return Format.DICOM
37
+ else:
38
+ raise InvalidDicomError
39
+ except InvalidDicomError:
40
+ if file.endswith(".nii") or file.endswith(".nii.gz"):
41
+ return Format.NIFTI
42
+ else:
43
+ return Format.UNKNOWN
44
+
45
+
46
+ def anonymise(file):
47
+ """
48
+ Anonymise DICOM and NifTI files.
49
+ This function will replace the given file with the anonymised version.
50
+
51
+ Parameters
52
+ ----------
53
+ file : str
54
+ The path of the file
55
+
56
+ Returns
57
+ -------
58
+ dict
59
+ The boolean return status in the value for the key "OK" and,
60
+ optionally the list of errors on the value for the key "error_tags".
61
+ """
62
+ format = recognise_file_format(file)
63
+ if format == Format.DICOM:
64
+ return anonymise_dicom(file)
65
+ elif format == Format.NIFTI:
66
+ return anonymise_nifti(file)
67
+ elif format == Format.UNKNOWN:
68
+ return {
69
+ "error": "Some files are not accepted or are corrupted.",
70
+ "OK": False
71
+ }
72
+ else:
73
+ raise RuntimeError(
74
+ 'This format is not properly handled by the anonymise function.'
75
+ )
@@ -0,0 +1,575 @@
1
+ import logging
2
+ from datetime import datetime
3
+ from enum import Enum, unique
4
+ import pydicom
5
+
6
+ from qmenta.anon.time_utils import TimeAnonymise
7
+
8
+
9
+ def PatchedMultiString(val, valtype=str):
10
+ """
11
+ Split a bytestring by delimiters if there are any
12
+
13
+ Parameters
14
+ ----------
15
+ val: str
16
+ DICOM bytestring to split up
17
+ valtype:
18
+ default str, but can be e.g. UID to overwrite to a specific type
19
+ """
20
+ # Remove trailing blank used to pad to even length
21
+ # 2005.05.25: also check for trailing 0, error made in PET files we are
22
+ # converting
23
+
24
+ if val and (val.endswith(" ") or val.endswith("\x00")):
25
+ val = val[:-1]
26
+ splitup = val.split("\\")
27
+ if len(splitup) == 1:
28
+ try:
29
+ val = splitup[0]
30
+ return valtype(val) if val else val
31
+ except ValueError:
32
+ if valtype in [str, pydicom.valuerep.PersonName, pydicom.uid.UID]:
33
+ return valtype("XXXX") if val else val
34
+ elif valtype is pydicom.valuerep.DSfloat:
35
+ return valtype(0.0) if val else val
36
+ else:
37
+ return pydicom.multival.MultiValue(valtype, splitup)
38
+
39
+
40
+ # overwriting an existing method in order to prevent exceptions when
41
+ # tags annonymized with data of other type (e.g. float tag gets string)
42
+ pydicom.valuerep.MultiString = PatchedMultiString
43
+
44
+
45
+ @unique
46
+ class DicomAttribute(Enum):
47
+ @property
48
+ def tag(self):
49
+ """
50
+ The DICOM tag, represented as (group number, element number)
51
+ """
52
+ return self.value
53
+
54
+ PatientName = (0x0010, 0x0010)
55
+ PatientID = (0x0010, 0x0020)
56
+ IssuerOfPatientID = (0x0010, 0x0021)
57
+ PatientBirthTime = (0x0010, 0x0032)
58
+ PatientSex = (0x0010, 0x0040)
59
+ PatientBirthName = (0x0010, 0x1005)
60
+ CountryOfResidence = (0x0010, 0x2150)
61
+ RegionOfResidence = (0x0010, 0x2152)
62
+ PatientTelephoneNumbers = (0x0010, 0x2154)
63
+ CurrentPatientLocation = (0x0038, 0x0300)
64
+ PatientInstitutionResidence = (0x0038, 0x0400)
65
+ StudyDate = (0x0008, 0x0020)
66
+ SeriesDate = (0x0008, 0x0021)
67
+ AcquisitionDate = (0x0008, 0x0022)
68
+ ContentDate = (0x0008, 0x0023)
69
+ OverlayDate = (0x0008, 0x0024)
70
+ CurveDate = (0x0008, 0x0025)
71
+ AcquisitionDateTime = (0x0008, 0x002A)
72
+ StudyTime = (0x0008, 0x0030)
73
+ SeriesTime = (0x0008, 0x0031)
74
+ AcquisitionTime = (0x0008, 0x0032)
75
+ ContentTime = (0x0008, 0x0033)
76
+ OverlayTime = (0x0008, 0x0034)
77
+ CurveTime = (0x0008, 0x0035)
78
+ InstitutionAddress = (0x0008, 0x0081)
79
+ ReferringPhysicianName = (0x0008, 0x0090)
80
+ ReferringPhysicianAddress = (0x0008, 0x0092)
81
+ ReferringPhysicianTelephoneNumber = (0x0008, 0x0094)
82
+ InstitutionalDepartmentName = (0x0008, 0x1040)
83
+ OperatorsName = (0x0008, 0x1070)
84
+ StudyID = (0x0020, 0x0010)
85
+ DateTime = (0x0040, 0xA120)
86
+ Date = (0x0040, 0xA121)
87
+ Time = (0x0040, 0xA122)
88
+ PersonName = (0x0040, 0xA123)
89
+ AccessionNumber = (0x0008, 0x0050)
90
+ InstitutionName = (0x0008, 0x0080)
91
+ ReferringPhysicianIDSequence = (0x0008, 0x0096)
92
+ PhysiciansOfRecord = (0x0008, 0x1048)
93
+ PhysiciansOfRecordIDSequence = (0x0008, 0x1049)
94
+ PerformingPhysicianName = (0x0008, 0x1050)
95
+ PerformingPhysicianIDSequence = (0x0008, 0x1052)
96
+ NameOfPhysicianReadingStudy = (0x0008, 0x1060)
97
+ PhysicianReadingStudyIDSequence = (0x0008, 0x1062)
98
+ PatientBirthDate = (0x0010, 0x0030)
99
+ PatientInsurancePlanCodeSequence = (0x0010, 0x0050)
100
+ PatientPrimaryLanguageCodeSeq = (0x0010, 0x0101)
101
+ OtherPatientIDs = (0x0010, 0x1000)
102
+ OtherPatientNames = (0x0010, 0x1001)
103
+ OtherPatientIDsSequence = (0x0010, 0x1002)
104
+ PatientAge = (0x0010, 0x1010)
105
+ PatientAddress = (0x0010, 0x1040)
106
+ PatientMotherBirthName = (0x0010, 0x1060)
107
+
108
+ # File meta information used in _updateMetaInfo():
109
+ ImplementationClassUID = (0x0002, 0x0012)
110
+ MediaStorageSOPClassUID = (0x0002, 0x0002)
111
+ MediaStorageSOPInstanceUID = (0x0002, 0x0003)
112
+
113
+
114
+ # Basic Application Level Confidentiality Profile Attributes
115
+ # ftp://medical.nema.org/medical/dicom/final/sup55_ft.pdf
116
+
117
+
118
+ @unique
119
+ class ActionCode(Enum):
120
+ """
121
+ See https://qmenta.atlassian.net/wiki/spaces/QTG/pages/1166409832
122
+ /DICOM+de-identification for the list and references
123
+ """
124
+
125
+ X = "Remove tag"
126
+ Z = (
127
+ "Replace with a zero length value, or a non-zero length value that "
128
+ "may be a dummy value and consistent with the Value Representations"
129
+ )
130
+ D = (
131
+ "Replace with a non-zero length value that may be a dummy value and "
132
+ "consistent with the Value Representations"
133
+ )
134
+
135
+
136
+ DICOM_ANON_MIN_SUPP_55 = {
137
+ DicomAttribute.PatientName: ActionCode.Z,
138
+ DicomAttribute.PatientID: ActionCode.Z,
139
+ DicomAttribute.IssuerOfPatientID: ActionCode.X,
140
+ DicomAttribute.PatientBirthTime: ActionCode.X,
141
+ DicomAttribute.PatientSex: ActionCode.Z,
142
+ DicomAttribute.PatientBirthName: ActionCode.X,
143
+ DicomAttribute.CountryOfResidence: ActionCode.X,
144
+ DicomAttribute.RegionOfResidence: ActionCode.X,
145
+ DicomAttribute.PatientTelephoneNumbers: ActionCode.X,
146
+ DicomAttribute.CurrentPatientLocation: ActionCode.X,
147
+ DicomAttribute.PatientInstitutionResidence: ActionCode.X,
148
+ DicomAttribute.StudyDate: ActionCode.Z,
149
+ DicomAttribute.SeriesDate: ActionCode.X,
150
+ DicomAttribute.AcquisitionDate: ActionCode.X,
151
+ DicomAttribute.ContentDate: ActionCode.Z,
152
+ DicomAttribute.OverlayDate: ActionCode.X,
153
+ DicomAttribute.CurveDate: ActionCode.X,
154
+ DicomAttribute.AcquisitionDateTime: ActionCode.X,
155
+ DicomAttribute.StudyTime: ActionCode.Z,
156
+ DicomAttribute.SeriesTime: ActionCode.X,
157
+ DicomAttribute.AcquisitionTime: ActionCode.X,
158
+ DicomAttribute.ContentTime: ActionCode.Z,
159
+ DicomAttribute.OverlayTime: ActionCode.X,
160
+ DicomAttribute.CurveTime: ActionCode.X,
161
+ DicomAttribute.InstitutionAddress: ActionCode.X,
162
+ DicomAttribute.ReferringPhysicianName: ActionCode.Z,
163
+ DicomAttribute.ReferringPhysicianAddress: ActionCode.X,
164
+ DicomAttribute.ReferringPhysicianTelephoneNumber: ActionCode.X,
165
+ DicomAttribute.InstitutionalDepartmentName: ActionCode.X,
166
+ DicomAttribute.OperatorsName: ActionCode.X,
167
+ DicomAttribute.StudyID: ActionCode.Z,
168
+ DicomAttribute.DateTime: ActionCode.X,
169
+ DicomAttribute.Date: ActionCode.X,
170
+ DicomAttribute.Time: ActionCode.X,
171
+ DicomAttribute.PersonName: ActionCode.D,
172
+ DicomAttribute.AccessionNumber: ActionCode.Z,
173
+ DicomAttribute.InstitutionName: ActionCode.X,
174
+ DicomAttribute.ReferringPhysicianIDSequence: ActionCode.X,
175
+ DicomAttribute.PhysiciansOfRecord: ActionCode.X,
176
+ DicomAttribute.PhysiciansOfRecordIDSequence: ActionCode.X,
177
+ DicomAttribute.PerformingPhysicianName: ActionCode.X,
178
+ DicomAttribute.PerformingPhysicianIDSequence: ActionCode.X,
179
+ DicomAttribute.NameOfPhysicianReadingStudy: ActionCode.X,
180
+ DicomAttribute.PhysicianReadingStudyIDSequence: ActionCode.X,
181
+ DicomAttribute.PatientBirthDate: ActionCode.Z,
182
+ DicomAttribute.PatientInsurancePlanCodeSequence: ActionCode.X,
183
+ DicomAttribute.PatientPrimaryLanguageCodeSeq: ActionCode.X,
184
+ DicomAttribute.OtherPatientIDs: ActionCode.X,
185
+ DicomAttribute.OtherPatientNames: ActionCode.X,
186
+ DicomAttribute.OtherPatientIDsSequence: ActionCode.X,
187
+ DicomAttribute.PatientAge: ActionCode.X,
188
+ DicomAttribute.PatientAddress: ActionCode.X,
189
+ DicomAttribute.PatientMotherBirthName: ActionCode.X,
190
+ }
191
+
192
+
193
+ def redact_dicom_attr(header, tag):
194
+ value = header[tag].value
195
+ if isinstance(value, str):
196
+ header[tag].value = "XXXX"
197
+ elif isinstance(value, pydicom.valuerep.PersonName):
198
+ header[tag].value = "XXXX"
199
+ elif isinstance(value, pydicom.valuerep.DSfloat):
200
+ header[tag].value = 0.0
201
+ elif isinstance(value, pydicom.uid.UID):
202
+ header[tag].value = "XXXX"
203
+ elif isinstance(value, pydicom.multival.MultiValue):
204
+ header[tag].value = []
205
+ else:
206
+ raise RuntimeError(
207
+ "Unknown type {} for tag {}".format(type(value), tag)
208
+ )
209
+
210
+
211
+ def check_tag(header, tag):
212
+ """
213
+ Parameters
214
+ ----------
215
+ header:
216
+ The DICOM header to check
217
+
218
+ tag:
219
+ DICOM tag ID to check
220
+ """
221
+ try:
222
+ _ = header[tag].value
223
+ return True
224
+ except (NotImplementedError, Exception):
225
+ return False
226
+
227
+
228
+ def check_meta_tag(header, tag):
229
+ """
230
+ Parameters
231
+ ----------
232
+ header:
233
+ The DICOM header to check
234
+
235
+ tag:
236
+ DICOM meta tag ID to check
237
+ """
238
+ try:
239
+ _ = header.file_meta[tag].value
240
+ return True
241
+ except (NotImplementedError, Exception):
242
+ return False
243
+
244
+
245
+ def anonymise_header_attribute(header, attribute, action):
246
+ """
247
+ Redact or delete the attribute from the header as specified
248
+ by the action.
249
+
250
+ Parameters
251
+ ----------
252
+ header:
253
+ The DICOM header to update
254
+
255
+ attribute: DicomAttribute
256
+ The DICOM attribute to update
257
+
258
+ action: ActionCode
259
+ The type of anonymisation to apply
260
+
261
+ Raises
262
+ ------
263
+ NotImplementedError
264
+ If the action is not in [Z, D, X]
265
+ """
266
+ if action not in [ActionCode.Z, ActionCode.D, ActionCode.X]:
267
+ raise NotImplementedError("Only actions Z, D and X are supported.")
268
+
269
+ if check_tag(header, attribute.tag):
270
+ if action in [ActionCode.Z, ActionCode.D]:
271
+ redact_dicom_attr(header, attribute.tag)
272
+ elif action is ActionCode.X:
273
+ try:
274
+ del header[attribute.tag]
275
+ except Exception as e:
276
+ logger = logging.getLogger(__name__)
277
+ logger.error(str(e))
278
+ delattr(header, attribute.name)
279
+
280
+
281
+ def anonymise_dicom_dataset(dcm, actions=None):
282
+ """
283
+ Anonyise the given DICOM header using the specified profile.
284
+
285
+ Parameters
286
+ ----------
287
+ dcm:
288
+ The DICOM header to anonymise
289
+
290
+ actions:
291
+ The confidentiality profile to use when redacting the header.
292
+ Default: DICOM_ANON_MIN_SUPP_55
293
+ """
294
+ actions = actions or DICOM_ANON_MIN_SUPP_55
295
+ logger = logging.getLogger(__name__)
296
+ node_queue = [dcm]
297
+ while node_queue:
298
+ header = node_queue.pop(0)
299
+
300
+ # anonymisation
301
+ for attribute in actions:
302
+ logger.debug(
303
+ f"Anonymizing {attribute} with action {actions[attribute]}."
304
+ )
305
+ anonymise_header_attribute(header, attribute, actions[attribute])
306
+
307
+ # tail recursion
308
+ tags_to_delete = []
309
+ for tag in header.keys():
310
+ if check_tag(header, tag):
311
+ elem = header[tag]
312
+ if isinstance(elem.value, pydicom.sequence.Sequence):
313
+ node_queue.extend(elem.value)
314
+ else:
315
+ template = (
316
+ "deleting key {!r} with invalid data from header "
317
+ "when anonymising dicom file"
318
+ )
319
+ logger.warning(template.format(tag))
320
+ tags_to_delete.append(tag)
321
+
322
+ for tag in tags_to_delete:
323
+ try:
324
+ del header[tag]
325
+ except Exception as e:
326
+ logger = logging.getLogger(__name__)
327
+ logger.error(str(e))
328
+ logger.error("Using delattr to delete the tag")
329
+ delattr(header, tag)
330
+
331
+
332
+ def anonymise(filename, actions=None):
333
+ header = pydicom.read_file(filename)
334
+
335
+ actions = actions or DICOM_ANON_MIN_SUPP_55
336
+ anonymise_dicom_dataset(header, actions)
337
+ _updateMetaInfo(header)
338
+
339
+ header.save_as(filename)
340
+ return {"OK": True, "error_tags": []}
341
+
342
+
343
+ def _updateMetaInfo(header):
344
+ """
345
+ Set DICOM meta information if needed.
346
+ """
347
+ if not check_meta_tag(header, DicomAttribute.ImplementationClassUID.tag):
348
+ header.file_meta.ImplementationClassUID = "1.2.3.4"
349
+ if not check_meta_tag(header, DicomAttribute.MediaStorageSOPClassUID.tag):
350
+ header.file_meta.MediaStorageSOPClassUID = "1.2.840.10008.5.1.4.1.1.2"
351
+ if not check_meta_tag(header,
352
+ DicomAttribute.MediaStorageSOPInstanceUID.tag):
353
+ header.file_meta.MediaStorageSOPInstanceUID = "1.2.3"
354
+
355
+
356
+ def check_anonymised_file(input_file, options={}):
357
+ _options = {"return_lines": False, "not_found_as_error": False}
358
+
359
+ _options.update(options)
360
+
361
+ lines = []
362
+ not_anonymised_attr = []
363
+
364
+ try:
365
+ hd = pydicom.read_file(input_file)
366
+ n_errors = 0
367
+ for attribute in DICOM_ANON_MIN_SUPP_55:
368
+ if check_tag(hd, attribute.tag):
369
+ val = hd[attribute.tag].value
370
+
371
+ if _options["return_lines"]:
372
+ lines.append((attribute.name, str(val)))
373
+
374
+ try:
375
+ if not check_anonym_dicom_attr(hd, attribute.tag):
376
+ not_anonymised_attr.append(attribute.name)
377
+ except Exception as e:
378
+ logger = logging.getLogger(__name__)
379
+ logger.error(str(e))
380
+ pass
381
+
382
+ else:
383
+ if _options["return_lines"]:
384
+ lines.append((attribute.name, "!!!"))
385
+ if _options["not_found_as_error"]:
386
+ n_errors += 1
387
+ except Exception as e:
388
+ return {"OK": False, "error": str(e)}
389
+
390
+ ret = {
391
+ "OK": True,
392
+ "n_errors": n_errors,
393
+ "not_anonymised_attr": not_anonymised_attr
394
+ }
395
+
396
+ if _options["return_lines"]:
397
+ ret["lines"] = lines
398
+
399
+ return ret
400
+
401
+
402
+ def check_anonym_dicom_attr(header, tag):
403
+ value = header[tag].value
404
+ if isinstance(value, str):
405
+ return str(value) == "XXXX"
406
+ elif isinstance(value, pydicom.valuerep.PersonName):
407
+ return value == "XXXX"
408
+ elif isinstance(value, pydicom.valuerep.DSfloat):
409
+ return value == 0.0
410
+ elif isinstance(value, pydicom.uid.UID):
411
+ return value == "XXXX"
412
+
413
+ return True
414
+
415
+
416
+ class RelativeTimeAnonymiser:
417
+ """
418
+ Anonymise multiple DICOM files, while keeping the relative time/date
419
+ differences for (AcquisitionDate, AcquisitionTime) and
420
+ (ContentDate, ContentTime) tuples intact between all the files that
421
+ are anonymised by the same instance of RelativeTimeAnonymiser.
422
+
423
+ The confidentiality profile is not configurable. It will always use
424
+ DICOM_ANON_MIN_SUPP_55, with the exception that for the tags
425
+ [AcquisitionDate, AcquisitionTime, ContentDate, ContentTime], the original
426
+ action as specified in DICOM_ANON_MIN_SUPP_55 will only be applied if
427
+ anonymisation that keeps the original relative times preserved fails.
428
+ """
429
+
430
+ def __init__(self):
431
+ self._time_anonymise = TimeAnonymise()
432
+ self._original_actions = DICOM_ANON_MIN_SUPP_55
433
+
434
+ # Note: We currently do not support a single AquisitionDateTime or
435
+ # ContentDateTime tag. Two tags must be used to store date and time.
436
+ self._datetime_attributes = [
437
+ (DicomAttribute.AcquisitionDate, DicomAttribute.AcquisitionTime),
438
+ (DicomAttribute.ContentDate, DicomAttribute.ContentTime),
439
+ ]
440
+
441
+ # The confidentiality profile that is used as a fallback when no
442
+ # pair of date, time can be found. In the case where only one of
443
+ # them exists, it will be anonymised as specified in the original
444
+ # confidentiality profile.
445
+ self._restricted_actions = dict(self._original_actions)
446
+ attributes_to_remove = [
447
+ # Flatten the _datetime_attributes list.
448
+ atr
449
+ for atr_pair in self._datetime_attributes
450
+ for atr in atr_pair
451
+ ]
452
+ for atr in attributes_to_remove:
453
+ self._restricted_actions.pop(atr, None)
454
+
455
+ @staticmethod
456
+ def _time_to_TM(time):
457
+ """
458
+ Convert the time component of a Python datetime object into
459
+ DICOM time (TM) value representation.
460
+ """
461
+ return "{:02}{:02}{:02}.{:06}".format(
462
+ time.hour, time.minute, time.second, time.microsecond
463
+ )
464
+
465
+ @staticmethod
466
+ def _date_to_DA(date):
467
+ """
468
+ Convert the date component of a Python datetime object into
469
+ DCIOM date (DA) value representation.
470
+ """
471
+ return "{:04}{:02}{:02}".format(date.year, date.month, date.day)
472
+
473
+ def anonymise_datetime(self, header):
474
+ """
475
+ Anonymise the datetime values for AcquisitionDate, AcquisitionTime,
476
+ ContentDate and ContentTime tags, while keeping the relative time
477
+ differences of different dates/times for different calls of this
478
+ function intact.
479
+
480
+ Parameters
481
+ ----------
482
+ header:
483
+ The DICOM header containing the datetime tags to anonymise
484
+
485
+ Raises
486
+ ------
487
+ time_utils.TooLargeDeltaError
488
+ when trying to anonymise multiple DICOM headers of which the
489
+ datetimes to anonymise span more than 24h.
490
+ """
491
+ # Date and time must be stored in two separate tags
492
+ for date_atr, time_atr in self._datetime_attributes:
493
+ date_ok = check_tag(header, date_atr.tag)
494
+ time_ok = check_tag(header, time_atr.tag)
495
+ if not (date_ok and time_ok):
496
+ # A full datetime cannot be reconstructed. If one of the tags
497
+ # exists, anonymise it as specified in the original
498
+ # confidentiality profile to ensure proper anonymisation.
499
+ if time_ok:
500
+ action = self._original_actions[time_atr]
501
+ anonymise_header_attribute(header, time_atr, action)
502
+ elif date_ok:
503
+ action = self._original_actions[date_atr]
504
+ anonymise_header_attribute(header, date_atr, action)
505
+ continue # Go to the next (date_atr, time_atr) pair
506
+
507
+ date_element = header[date_atr.tag]
508
+ time_element = header[time_atr.tag]
509
+
510
+ assert date_element.VR == "DA"
511
+ assert time_element.VR == "TM"
512
+
513
+ # If date_element not a date string, then set to null to be
514
+ # anonymized
515
+ try:
516
+ in_date = pydicom.valuerep.DA(date_element.value)
517
+ except ValueError:
518
+ in_date = None
519
+
520
+ # If time_element not a date string, then set to null to be
521
+ # anonymized
522
+ try:
523
+ in_time = pydicom.valuerep.TM(time_element.value)
524
+ except ValueError:
525
+ in_time = None
526
+
527
+ if not (in_date and in_time):
528
+ # One of the input values was an empty string
529
+ anonymise_header_attribute(
530
+ header, time_atr, self._original_actions[time_atr])
531
+ anonymise_header_attribute(
532
+ header, date_atr, self._original_actions[date_atr])
533
+ continue # Go to the next (date_atr, time_atr) pair
534
+
535
+ in_datetime = datetime.combine(in_date, in_time)
536
+
537
+ # Compute the target datetime
538
+ out_datetime = self._time_anonymise.anonymise_datetime(in_datetime)
539
+
540
+ header[date_atr.tag].value = self._date_to_DA(out_datetime)
541
+ header[time_atr.tag].value = self._time_to_TM(out_datetime)
542
+
543
+ def anonymise(self, filename, actions=None):
544
+ """
545
+ Anonymise the DICOM dataset using the
546
+ restricted_actions profile, and replace
547
+ the date/time elements with an anonymised date/time that keeps
548
+ the relative date and time of different DICOM datasets intact.
549
+
550
+ Parameters
551
+ ----------
552
+ filename: str
553
+ The file to anonymise
554
+ actions: dict, optional
555
+
556
+ Raises
557
+ ------
558
+ time_utils.TooLargeDeltaError
559
+ when trying to anonymise multiple DICOM headers of which the
560
+ datetimes to anonymise span more than 24h.
561
+ """
562
+ header = pydicom.read_file(filename)
563
+ self.anonymise_datetime(header)
564
+
565
+ actions = actions or DICOM_ANON_MIN_SUPP_55
566
+ restricted_actions = actions.copy()
567
+ restricted_actions.pop(DicomAttribute.AcquisitionDate)
568
+ restricted_actions.pop(DicomAttribute.ContentDate)
569
+ restricted_actions.pop(DicomAttribute.AcquisitionTime)
570
+ restricted_actions.pop(DicomAttribute.ContentTime)
571
+
572
+ anonymise_dicom_dataset(header, restricted_actions)
573
+ _updateMetaInfo(header)
574
+
575
+ header.save_as(filename)
@@ -0,0 +1,48 @@
1
+ import os
2
+ import shutil
3
+ from tempfile import TemporaryDirectory
4
+
5
+ import nibabel
6
+
7
+
8
+ def anonymise(filename):
9
+ try:
10
+ nifti_file = nibabel.load(filename)
11
+
12
+ hdr = nifti_file.header
13
+ if "db_name" in hdr:
14
+ hdr["db_name"] = "XXXX"
15
+
16
+ with TemporaryDirectory() as tmp_dir:
17
+ new_image_path = os.path.join(tmp_dir, os.path.basename(filename))
18
+ new_image = nibabel.Nifti1Image(
19
+ nifti_file.get_fdata(), nifti_file.affine, hdr
20
+ )
21
+ nibabel.save(new_image, new_image_path)
22
+ shutil.copyfile(new_image_path, filename)
23
+
24
+ return {"OK": True}
25
+ except Exception as e:
26
+ return {
27
+ "OK": False,
28
+ "error": "Some NIFTI files are not accepted or are corrupted. "
29
+ f"\n{str(e)}"
30
+ }
31
+
32
+
33
+ def check_anonymised_file(filename):
34
+ try:
35
+ nifti_file = nibabel.load(filename)
36
+ hdr = nifti_file.header
37
+ if "db_name" in hdr and (
38
+ str(hdr["db_name"].astype(str)) not in ["XXXX", ""]
39
+ ):
40
+ return {"OK": False}
41
+ else:
42
+ return {"OK": True}
43
+ except Exception as e:
44
+ return {
45
+ "OK": False,
46
+ "error": "Some NIFTI files are not accepted or are corrupted. "
47
+ f"\n{str(e)}"
48
+ }
@@ -0,0 +1,127 @@
1
+ from calendar import monthrange
2
+ from datetime import datetime, timedelta
3
+
4
+
5
+ class TimeUtilsError(Exception):
6
+ """
7
+ Base class for time utils errors
8
+ """
9
+
10
+ pass
11
+
12
+
13
+ class TooLargeDeltaError(TimeUtilsError):
14
+ """
15
+ Too large difference between two times.
16
+ """
17
+
18
+ def __init__(self, t1, t2, max_delta):
19
+ message = (
20
+ "Too large time difference between {} and {}. "
21
+ "Maximum allowed delta: {}."
22
+ ).format(t1, t2, max_delta)
23
+ super().__init__(self, message)
24
+
25
+
26
+ def monthdelta(d1, d2):
27
+ """
28
+ Calculates the difference in months between two timepoints
29
+ """
30
+ delta = 0
31
+ while True:
32
+ mdays = monthrange(d1.year, d1.month)[1]
33
+ d1 += timedelta(days=mdays)
34
+ if d1 <= d2:
35
+ delta += 1
36
+ else:
37
+ break
38
+ return delta
39
+
40
+
41
+ def calculate_passed_days(timestamp1, timestamp2):
42
+ """
43
+ Calculates the number of days between two timestamps.
44
+ """
45
+ if timestamp2 is None:
46
+ return 0
47
+
48
+ diff = timestamp1 - timestamp2
49
+ min_sec = divmod(diff.days * 86400 + diff.seconds, 60)
50
+ return min_sec[0] / (24 * 60)
51
+
52
+
53
+ class TimeAnonymise:
54
+ """
55
+ Anonymise time while keeping the relative time differences between
56
+ different image files in a single session. Each TimeAnonymise object
57
+ should be used to anonymise exactly one session.
58
+
59
+ Attributes
60
+ ----------
61
+ target_base : datetime
62
+ The target date that is the base for the returned anonymised
63
+ datetimes
64
+ source_base : datetime
65
+ The source date that will be used to compute the time difference with
66
+ new input dates. This will be the input date of the first
67
+ anonymise_datetime() function call
68
+ max_delta : timedelta
69
+ The maximum difference between two datetimes that are acceptable
70
+ to be anonymised by a single TimeAnonymise instance. We use this
71
+ to avoid mistakes where multiple sessions are being anonymised
72
+ with a single TimeAnonymise instance.
73
+ """
74
+
75
+ def __init__(self):
76
+ # Noon on January 1, 1900.
77
+ self.target_base = datetime(1900, 1, 1, 12)
78
+ # Will be set by the first input date
79
+ self.source_base = None
80
+ self.max_delta = timedelta(hours=24)
81
+
82
+ # The minimum and maximum input times
83
+ self._source_min = None
84
+ self._source_max = None
85
+
86
+ def anonymise_datetime(self, source):
87
+ """
88
+ Anonymise the input datetime by changing the date to be in the range
89
+ (targetBase - 24h, targetBase + 24h) without changing the relative
90
+ time difference between two any two times that were anonymised by this
91
+ TimeAnonymise object.
92
+
93
+ Parameters
94
+ ----------
95
+ source : datetime
96
+ The datetime to anonymise
97
+
98
+ Raises
99
+ ------
100
+ TooLargeDeltaError
101
+ When the time difference between source and any of the previously
102
+ anonymised datetime objects is more than max_delta.
103
+
104
+ Returns
105
+ -------
106
+ datetime
107
+ The anonymised datetime object
108
+ """
109
+ if self.source_base is None:
110
+ # This is the first input to anonymise
111
+ self.source_base = source
112
+ self._source_min = source
113
+ self._source_max = source
114
+ return self.target_base
115
+
116
+ if source < self._source_min:
117
+ self._source_min = source
118
+ elif source > self._source_max:
119
+ self._source_max = source
120
+
121
+ if (self._source_max - self._source_min) > self.max_delta:
122
+ raise TooLargeDeltaError(source, self.source_base, self.max_delta)
123
+
124
+ delta = source - self.source_base
125
+ target = self.target_base + delta
126
+
127
+ return target