dicom 0.9.6 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,654 +1,677 @@
1
- module DICOM
2
-
3
- # This is a convenience class for handling the anonymization
4
- # (de-identification) of DICOM files.
5
- #
6
- # @note
7
- # For a thorough introduction to the concept of DICOM anonymization,
8
- # please refer to The DICOM Standard, Part 15: Security and System
9
- # Management Profiles, Annex E: Attribute Confidentiality Profiles.
10
- # For guidance on settings for individual data elements, please
11
- # refer to DICOM PS 3.15, Annex E, Table E.1-1: Application Level
12
- # Confidentiality Profile Attributes.
13
- #
14
- class Anonymizer
15
- include Logging
16
-
17
- # An AuditTrail instance used for this anonymization (if specified).
18
- attr_reader :audit_trail
19
- # The file name used for the AuditTrail serialization (if specified).
20
- attr_reader :audit_trail_file
21
- # A boolean that if set as true will cause all anonymized tags to be blank instead of get some generic value.
22
- attr_accessor :blank
23
- # An hash of elements (represented by tag keys) that will be deleted from the DICOM objects on anonymization.
24
- attr_reader :delete
25
- # A boolean that if set as true, will make the anonymization delete all private tags.
26
- attr_accessor :delete_private
27
- # The cryptographic hash function to be used for encrypting DICOM values recorded in an audit trail file.
28
- attr_reader :encryption
29
- # A boolean that if set as true will cause all anonymized tags to be get enumerated values, to enable post-anonymization re-identification by the user.
30
- attr_accessor :enumeration
31
- # The logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL).
32
- attr_reader :logger_level
33
- # A boolean that if set as true will cause all anonymized files to be written with random file names (if write_path has been specified).
34
- attr_accessor :random_file_name
35
- # A boolean that if set as true, will cause the anonymization to run on all levels of the DICOM file tag hierarchy.
36
- attr_accessor :recursive
37
- # A boolean indicating whether or not UIDs shall be replaced when executing the anonymization.
38
- attr_accessor :uid
39
- # The DICOM UID root to use when generating new UIDs.
40
- attr_accessor :uid_root
41
- # The path where the anonymized files will be saved. If this value is not set, the original DICOM files will be overwritten.
42
- attr_accessor :write_path
43
-
44
- # Creates an Anonymizer instance.
45
- #
46
- # @note To customize logging behaviour, refer to the Logging module documentation.
47
- # @param [Hash] options the options to create an anonymizer instance with
48
- # @option options [String] :audit_trail a file name path (if the file contains old audit data, these are loaded and used in the current anonymization)
49
- # @option options [Boolean] :blank toggles whether to set the values of anonymized elements as empty instead of some generic value
50
- # @option options [Boolean] :delete_private toggles whether private elements are to be deleted
51
- # @option options [TrueClass, Digest::Class] :encryption if set as true, the default hash function (MD5) will be used for representing DICOM values in an audit file. Otherwise a Digest class can be given, e.g. Digest::SHA256
52
- # @option options [Boolean] :enumeration toggles whether (some) elements get enumerated values (to enable post-anonymization re-identification)
53
- # @option options [Fixnum] :logger_level the logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL)
54
- # @option options [Boolean] :random_file_name toggles whether anonymized files will be given random file names when rewritten (in combination with the :write_path option)
55
- # @option options [Boolean] :recursive toggles whether to anonymize on all sub-levels of the DICOM object tag hierarchies
56
- # @option options [Boolean] :uid toggles whether UIDs will be replaced with custom generated UIDs (beware that to preserve UID relations in studies/series, the audit_trail feature must be used)
57
- # @option options [String] :uid_root an organization (or custom) UID root to use when replacing UIDs
58
- # @option options [String] :write_path a directory where the anonymized files are re-written (if not specified, files are overwritten)
59
- # @example Create an Anonymizer instance and increase the log output
60
- # a = Anonymizer.new
61
- # a.logger.level = Logger::INFO
62
- # @example Perform anonymization using the audit trail feature
63
- # a = Anonymizer.new(:audit_trail => 'trail.json')
64
- # a.enumeration = true
65
- # a.write_path = '//anonymized/'
66
- # a.anonymize('//dicom/today/')
67
- #
68
- def initialize(options={})
69
- # Transfer options to attributes:
70
- @blank = options[:blank]
71
- @delete_private = options[:delete_private]
72
- @enumeration = options[:enumeration]
73
- @logger_level = options[:logger_level] || Logger::FATAL
74
- @random_file_name = options[:random_file_name]
75
- @recursive = options[:recursive]
76
- @uid = options[:uid]
77
- @uid_root = options[:uid_root] ? options[:uid_root] : UID_ROOT
78
- @write_path = options[:write_path]
79
- # Array of folders to be processed for anonymization:
80
- @folders = Array.new
81
- # Folders that will be skipped:
82
- @exceptions = Array.new
83
- # Data elements which will be anonymized (the array will hold a list of tag strings):
84
- @tags = Array.new
85
- # Default values to use on anonymized data elements:
86
- @values = Array.new
87
- # Which data elements will have enumeration applied, if requested by the user:
88
- @enumerations = Array.new
89
- # We use a Hash to store information from DICOM files if enumeration is desired:
90
- @enum_old_hash = Hash.new
91
- @enum_new_hash = Hash.new
92
- # All the files to be anonymized will be put in this array:
93
- @files = Array.new
94
- @prefixes = Hash.new
95
- # Setup audit trail if requested:
96
- if options[:audit_trail]
97
- @audit_trail_file = options[:audit_trail]
98
- if File.exists?(@audit_trail_file) && File.size(@audit_trail_file) > 2
99
- # Load the pre-existing audit trail from file:
100
- @audit_trail = AuditTrail.read(@audit_trail_file)
101
- else
102
- # Start from scratch with an empty audit trail:
103
- @audit_trail = AuditTrail.new
104
- end
105
- # Set up encryption if indicated:
106
- if options[:encryption]
107
- require 'digest'
108
- if options[:encryption].respond_to?(:hexdigest)
109
- @encryption = options[:encryption]
110
- else
111
- @encryption = Digest::MD5
112
- end
113
- end
114
- end
115
- # Set the default data elements to be anonymized:
116
- set_defaults
117
- end
118
-
119
- # Checks for equality.
120
- #
121
- # Other and self are considered equivalent if they are
122
- # of compatible types and their attributes are equivalent.
123
- #
124
- # @param other an object to be compared with self.
125
- # @return [Boolean] true if self and other are considered equivalent
126
- #
127
- def ==(other)
128
- if other.respond_to?(:to_anonymizer)
129
- other.send(:state) == state
130
- end
131
- end
132
-
133
- alias_method :eql?, :==
134
-
135
- # Anonymizes the given DICOM data with the settings of this Anonymizer instance.
136
- #
137
- # @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
138
- # @return [Array<DObject>] an array of the anonymized DICOM objects
139
- #
140
- def anonymize(data)
141
- dicom = prepare(data)
142
- if @tags.length > 0
143
- dicom.each do |dcm|
144
- anonymize_dcm(dcm)
145
- # Write DICOM object to file unless it was passed to the anonymizer as an object:
146
- write(dcm) unless dcm.was_dcm_on_input
147
- end
148
- else
149
- logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
150
- end
151
- # Reset the ruby-dicom log threshold to its original level:
152
- logger.level = @original_level
153
- # Save the audit trail (if used):
154
- @audit_trail.write(@audit_trail_file) if @audit_trail
155
- logger.info("Anonymization complete.")
156
- dicom
157
- end
158
-
159
- # Specifies that the given tag is to be completely deleted
160
- # from the anonymized DICOM objects.
161
- #
162
- # @param [String] tag a data element tag
163
- # @example Completely delete the Patient's Name tag from the DICOM files
164
- # a.delete_tag('0010,0010')
165
- #
166
- def delete_tag(tag)
167
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
168
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
169
- @delete[tag] = true
170
- end
171
-
172
- # Checks the enumeration status of this tag.
173
- #
174
- # @param [String] tag a data element tag
175
- # @return [Boolean, NilClass] the enumeration status of the tag, or nil if the tag has no match
176
- #
177
- def enum(tag)
178
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
179
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
180
- pos = @tags.index(tag)
181
- if pos
182
- return @enumerations[pos]
183
- else
184
- logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
185
- return nil
186
- end
187
- end
188
-
189
- # Computes a hash code for this object.
190
- #
191
- # @note Two objects with the same attributes will have the same hash code.
192
- #
193
- # @return [Fixnum] the object's hash code
194
- #
195
- def hash
196
- state.hash
197
- end
198
-
199
- # Removes a tag from the list of tags that will be anonymized.
200
- #
201
- # @param [String] tag a data element tag
202
- # @example Do not anonymize the Patient's Name tag
203
- # a.remove_tag('0010,0010')
204
- #
205
- def remove_tag(tag)
206
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
207
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
208
- pos = @tags.index(tag)
209
- if pos
210
- @tags.delete_at(pos)
211
- @values.delete_at(pos)
212
- @enumerations.delete_at(pos)
213
- end
214
- end
215
-
216
- # Sets the anonymization settings for the specified tag. If the tag is already present in the list
217
- # of tags to be anonymized, its settings are updated, and if not, a new tag entry is created.
218
- #
219
- # @param [String] tag a data element tag
220
- # @param [Hash] options the anonymization settings for the specified tag
221
- # @option options [String, Integer, Float] :value the replacement value to be used when anonymizing this data element. Defaults to the pre-existing value and '' for new tags.
222
- # @option options [String, Integer, Float] :enum specifies if enumeration is to be used for this tag. Defaults to the pre-existing value and false for new tags.
223
- # @example Set the anonymization settings of the Patient's Name tag
224
- # a.set_tag('0010,0010', :value => 'MrAnonymous', :enum => true)
225
- #
226
- def set_tag(tag, options={})
227
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
228
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
229
- pos = @tags.index(tag)
230
- if pos
231
- # Update existing values:
232
- @values[pos] = options[:value] if options[:value]
233
- @enumerations[pos] = options[:enum] if options[:enum] != nil
234
- else
235
- # Add new elements:
236
- @tags << tag
237
- @values << (options[:value] ? options[:value] : default_value(tag))
238
- @enumerations << (options[:enum] ? options[:enum] : false)
239
- end
240
- end
241
-
242
- # Returns self.
243
- #
244
- # @return [Anonymizer] self
245
- #
246
- def to_anonymizer
247
- self
248
- end
249
-
250
- # Gives the value which will be used when anonymizing this tag.
251
- #
252
- # @note If enumeration is selected for a string type tag, a number will be
253
- # appended in addition to the string that is returned here.
254
- #
255
- # @param [String] tag a data element tag
256
- # @return [String, Integer, Float, NilClass] the replacement value for the specified tag, or nil if the tag is not matched
257
- #
258
- def value(tag)
259
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
260
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
261
- pos = @tags.index(tag)
262
- if pos
263
- return @values[pos]
264
- else
265
- logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
266
- return nil
267
- end
268
- end
269
-
270
-
271
- private
272
-
273
-
274
- # Performs anonymization on a DICOM object.
275
- #
276
- # @param [DObject] dcm a DICOM object
277
- #
278
- def anonymize_dcm(dcm)
279
- # Extract the data element parents to investigate:
280
- parents = element_parents(dcm)
281
- parents.each do |parent|
282
- # Anonymize the desired tags:
283
- @tags.each_index do |j|
284
- if parent.exists?(@tags[j])
285
- element = parent[@tags[j]]
286
- if element.is_a?(Element)
287
- if @blank
288
- value = ''
289
- elsif @enumeration
290
- old_value = element.value
291
- # Only launch enumeration logic if there is an actual value to the data element:
292
- if old_value
293
- value = enumerated_value(old_value, j)
294
- else
295
- value = ''
296
- end
297
- else
298
- # Use the value that has been set for this tag:
299
- value = @values[j]
300
- end
301
- element.value = value
302
- end
303
- end
304
- end
305
- # Delete elements marked for deletion:
306
- @delete.each_key do |tag|
307
- parent.delete(tag) if parent.exists?(tag)
308
- end
309
- end
310
- # General DICOM object manipulation:
311
- # Add a Patient Identity Removed attribute (as per
312
- # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 6):
313
- dcm.add(Element.new('0012,0062', 'YES'))
314
- # Add a De-Identification Method Code Sequence Item:
315
- dcm.add(Sequence.new('0012,0064')) unless dcm.exists?('0012,0064')
316
- i = dcm['0012,0064'].add_item
317
- i.add(Element.new('0012,0063', 'De-identified by the ruby-dicom Anonymizer'))
318
- # FIXME: At some point we should add a set of de-indentification method codes, as per
319
- # DICOM PS 3.16 CID 7050 which corresponds to the settings chosen for the anonymizer.
320
- # Delete the old File Meta Information group (as per
321
- # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 7):
322
- dcm.delete_group('0002')
323
- # Handle UIDs if requested:
324
- replace_uids(parents) if @uid
325
- # Delete private tags if indicated:
326
- dcm.delete_private if @delete_private
327
- end
328
-
329
- # Gives the value to be used for the audit trail, which is either
330
- # the original value itself, or an encrypted string based on it.
331
- #
332
- # @param [String, Integer, Float] original the original value of the tag to be anonymized
333
- # @return [String, Integer, Float] with encryption, a hash string is returned, otherwise the original value
334
- #
335
- def at_value(original)
336
- @encryption ? @encryption.hexdigest(original) : original
337
- end
338
-
339
- # Creates a hash that is used for storing information that is used when enumeration is selected.
340
- #
341
- def create_enum_hash
342
- @enumerations.each_index do |i|
343
- @enum_old_hash[@tags[i]] = Array.new
344
- @enum_new_hash[@tags[i]] = Array.new
345
- end
346
- end
347
-
348
- # Determines a default value to use for anonymizing the given tag.
349
- #
350
- # @param [String] tag a data element tag
351
- # @return [String, Integer, Float] the default replacement value for a given tag
352
- #
353
- def default_value(tag)
354
- name, vr = LIBRARY.name_and_vr(tag)
355
- conversion = VALUE_CONVERSION[vr]
356
- case conversion
357
- when :to_i then return 0
358
- when :to_f then return 0.0
359
- else
360
- # Assume type is string and return an empty string:
361
- return ''
362
- end
363
- end
364
-
365
- # Creates a write path for the given DICOM object, based on the object's
366
- # original file path and the write_path attribute.
367
- #
368
- # @param [DObject] dcm a DICOM object
369
- # @return [String] the destination directory path
370
- #
371
- def destination(dcm)
372
- # Separate the path from the source file string:
373
- file_start = dcm.source.rindex(File.basename(dcm.source))
374
- if file_start == 0
375
- source_dir = "."
376
- else
377
- source_dir = dcm.source[0..(file_start-1)]
378
- end
379
- source_folders = source_dir.split(File::SEPARATOR)
380
- target_folders = @write_path.split(File::SEPARATOR)
381
- # If the first element is the current dir symbol, get rid of it:
382
- source_folders.delete('.')
383
- # Check for equalness of folder names in a range limited by the shortest array:
384
- common_length = [source_folders.length, target_folders.length].min
385
- uncommon_index = nil
386
- common_length.times do |i|
387
- if target_folders[i] != source_folders[i]
388
- uncommon_index = i
389
- break
390
- end
391
- end
392
- # Create the output path by joining the two paths together using the determined index:
393
- append_path = uncommon_index ? source_folders[uncommon_index..-1] : nil
394
- [target_folders, append_path].compact.join(File::SEPARATOR)
395
- end
396
-
397
- # Extracts all parents from a DObject instance which potentially
398
- # have child (data) elements. This typically means the DObject
399
- # instance itself as well as items (i.e. not sequences).
400
- # Note that unless the @recursive attribute has been set,
401
- # this method will only return the DObject (placed inside an array).
402
- #
403
- # @param [DObject] dcm a DICOM object
404
- # @return [Array<DObject, Item>] an array containing either just a DObject or also all parental child items within the tag hierarchy
405
- #
406
- def element_parents(dcm)
407
- parents = Array.new
408
- parents << dcm
409
- if @recursive
410
- dcm.sequences.each do |s|
411
- parents += element_parents_recursive(s)
412
- end
413
- end
414
- parents
415
- end
416
-
417
- # Recursively extracts all item parents from a sequence instance (including
418
- # any sub-sequences) which actually contain child (data) elements.
419
- #
420
- # @param [Sequence] sequence a Sequence instance
421
- # @return [Array<Item>] an array containing items within the tag hierarchy that contains child elements
422
- #
423
- def element_parents_recursive(sequence)
424
- parents = Array.new
425
- sequence.items.each do |i|
426
- parents << i if i.elements?
427
- i.sequences.each do |s|
428
- parents += element_parents_recursive(s)
429
- end
430
- end
431
- parents
432
- end
433
-
434
- # Handles the enumeration for the given data element tag.
435
- # If its value has been encountered before, its corresponding enumerated
436
- # replacement value is retrieved, and if a new original value is encountered,
437
- # a new enumerated replacement value is found by increasing an index by 1.
438
- #
439
- # @param [String, Integer, Float] original the original value of the tag to be anonymized
440
- # @param [Fixnum] j the index of this tag in the tag-related instance arrays
441
- # @return [String, Integer, Float] the replacement value which is used for the anonymization of the tag
442
- #
443
- def enumerated_value(original, j)
444
- # Is enumeration requested for this tag?
445
- if @enumerations[j]
446
- if @audit_trail
447
- # Check if the UID has been encountered already:
448
- replacement = @audit_trail.replacement(@tags[j], at_value(original))
449
- unless replacement
450
- # This original value has not been encountered yet. Determine the index to use.
451
- index = @audit_trail.records(@tags[j]).length + 1
452
- # Create the replacement value:
453
- if @values[j].is_a?(String)
454
- replacement = @values[j] + index.to_s
455
- else
456
- replacement = @values[j] + index
457
- end
458
- # Add this tag record to the audit trail:
459
- @audit_trail.add_record(@tags[j], at_value(original), replacement)
460
- end
461
- else
462
- # Retrieve earlier used anonymization values:
463
- previous_old = @enum_old_hash[@tags[j]]
464
- previous_new = @enum_new_hash[@tags[j]]
465
- p_index = previous_old.length
466
- if previous_old.index(original) == nil
467
- # Current value has not been encountered before:
468
- replacement = @values[j]+(p_index + 1).to_s
469
- # Store value in array (and hash):
470
- previous_old << original
471
- previous_new << replacement
472
- @enum_old_hash[@tags[j]] = previous_old
473
- @enum_new_hash[@tags[j]] = previous_new
474
- else
475
- # Current value has been observed before:
476
- replacement = previous_new[previous_old.index(original)]
477
- end
478
- end
479
- else
480
- replacement = @values[j]
481
- end
482
- return replacement
483
- end
484
-
485
- # Establishes a prefix for a given UID tag.
486
- # This makes it somewhat easier to distinguish
487
- # between different types of random generated UIDs.
488
- #
489
- # @param [String] tag a data element string tag
490
- #
491
- def prefix(tag)
492
- if @prefixes[tag]
493
- @prefixes[tag]
494
- else
495
- @prefixes[tag] = @prefixes.length + 1
496
- @prefixes[tag]
497
- end
498
- end
499
-
500
- # Prepares the data for anonymization.
501
- #
502
- # @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
503
- # @return [Array] the original data (wrapped in an array) as well as an array of loaded DObject instances
504
- #
505
- def prepare(data)
506
- logger.info("Loading DICOM data.")
507
- # Temporarily adjust the ruby-dicom log threshold (usually to suppress messages from the DObject class):
508
- @original_level = logger.level
509
- logger.level = @logger_level
510
- dicom = DICOM.load(data)
511
- logger.level = @original_level
512
- logger.info("#{dicom.length} DICOM objects have been prepared for anonymization.")
513
- logger.level = @logger_level
514
- # Set up enumeration if requested:
515
- create_enum_hash if @enumeration
516
- require 'securerandom' if @random_file_name
517
- dicom
518
- end
519
-
520
- # Replaces the UIDs of the given DICOM object.
521
- #
522
- # @note Empty UIDs are ignored (we don't generate new UIDs for these).
523
- # @note If AuditTrail is set, the relationship between old and new UIDs are preserved,
524
- # and the relations between files in a study/series should remain valid.
525
- # @param [Array<DObject, Item>] parents dicom parent objects who's child elements will be investigated
526
- #
527
- def replace_uids(parents)
528
- parents.each do |parent|
529
- parent.each_element do |element|
530
- if element.vr == ('UI') and !@static_uids[element.tag]
531
- original = element.value
532
- if original && original.length > 0
533
- # We have a UID value, go ahead and replace it:
534
- if @audit_trail
535
- # Check if the UID has been encountered already:
536
- replacement = @audit_trail.replacement('uids', original)
537
- unless replacement
538
- # The UID has not been stored previously. Generate a new one:
539
- replacement = DICOM.generate_uid(@uid_root, prefix(element.tag))
540
- # Add this tag record to the audit trail:
541
- @audit_trail.add_record('uids', original, replacement)
542
- end
543
- # Replace the UID in the DICOM object:
544
- element.value = replacement
545
- else
546
- # We don't care about preserving UID relations. Just insert a custom UID:
547
- element.value = DICOM.generate_uid(@uid_root, prefix(element.tag))
548
- end
549
- end
550
- end
551
- end
552
- end
553
- end
554
-
555
- # Sets up some default information variables that are used by the Anonymizer.
556
- #
557
- def set_defaults
558
- # Some UIDs should not be remapped even if uid anonymization has been requested:
559
- @static_uids = {
560
- # Private related:
561
- '0002,0100' => true,
562
- '0004,1432' => true,
563
- # Coding scheme related:
564
- '0008,010C' => true,
565
- '0008,010D' => true,
566
- # Transfer syntax related:
567
- '0002,0010' => true,
568
- '0400,0010' => true,
569
- '0400,0510' => true,
570
- '0004,1512' => true,
571
- # SOP class related:
572
- '0000,0002' => true,
573
- '0000,0003' => true,
574
- '0002,0002' => true,
575
- '0004,1510' => true,
576
- '0004,151A' => true,
577
- '0008,0016' => true,
578
- '0008,001A' => true,
579
- '0008,001B' => true,
580
- '0008,0062' => true,
581
- '0008,1150' => true,
582
- '0008,115A' => true
583
- }
584
- # Sets up default tags that will be anonymized, along with default replacement values and enumeration settings.
585
- # This data is stored in 3 separate instance arrays for tags, values and enumeration.
586
- data = [
587
- ['0008,0012', '20000101', false], # Instance Creation Date
588
- ['0008,0013', '000000.00', false], # Instance Creation Time
589
- ['0008,0020', '20000101', false], # Study Date
590
- ['0008,0021', '20000101', false], # Series Date
591
- ['0008,0022', '20000101', false], # Acquisition Date
592
- ['0008,0023', '20000101', false], # Image Date
593
- ['0008,0030', '000000.00', false], # Study Time
594
- ['0008,0031', '000000.00', false], # Series Time
595
- ['0008,0032', '000000.00', false], # Acquisition Time
596
- ['0008,0033', '000000.00', false], # Image Time
597
- ['0008,0050', '', true], # Accession Number
598
- ['0008,0080', 'Institution', true], # Institution name
599
- ['0008,0081', 'Address', true], # Institution Address
600
- ['0008,0090', 'Physician', true], # Referring Physician's name
601
- ['0008,1010', 'Station', true], # Station name
602
- ['0008,1040', 'Department', true], # Institutional Department name
603
- ['0008,1070', 'Operator', true], # Operator's Name
604
- ['0010,0010', 'Patient', true], # Patient's name
605
- ['0010,0020', 'ID', true], # Patient's ID
606
- ['0010,0030', '20000101', false], # Patient's Birth Date
607
- ['0010,0040', 'O', false], # Patient's Sex
608
- ['0010,1010', '', false], # Patient's Age
609
- ['0020,4000', '', false], # Image Comments
610
- ].transpose
611
- @tags = data[0]
612
- @values = data[1]
613
- @enumerations = data[2]
614
- # Tags to be deleted completely during anonymization:
615
- @delete = Hash.new
616
- end
617
-
618
- # Collects the attributes of this instance.
619
- #
620
- # @return [Array] an array of attributes
621
- #
622
- def state
623
- [
624
- @tags, @values, @enumerations, @delete, @blank,
625
- @delete_private, @enumeration, @logger_level,
626
- @random_file_name, @recursive, @uid, @uid_root, @write_path
627
- ]
628
- end
629
-
630
- # Writes a DICOM object to file.
631
- #
632
- # @param [DObject] dcm a DICOM object
633
- #
634
- def write(dcm)
635
- if @write_path
636
- # The DICOM object is to be written to a separate directory. If the
637
- # original and the new directories have a common root, this is taken into
638
- # consideration when determining the object's write path:
639
- path = destination(dcm)
640
- if @random_file_name
641
- file_name = "#{SecureRandom.hex(16)}.dcm"
642
- else
643
- file_name = File.basename(dcm.source)
644
- end
645
- dcm.write(File.join(path, file_name))
646
- else
647
- # The original DICOM file is overwritten with the anonymized DICOM object:
648
- dcm.write(dcm.source)
649
- end
650
- end
651
-
652
- end
653
-
654
- end
1
+ module DICOM
2
+
3
+ # This is a convenience class for handling the anonymization
4
+ # (de-identification) of DICOM files.
5
+ #
6
+ # @note
7
+ # For a thorough introduction to the concept of DICOM anonymization,
8
+ # please refer to The DICOM Standard, Part 15: Security and System
9
+ # Management Profiles, Annex E: Attribute Confidentiality Profiles.
10
+ # For guidance on settings for individual data elements, please
11
+ # refer to DICOM PS 3.15, Annex E, Table E.1-1: Application Level
12
+ # Confidentiality Profile Attributes.
13
+ #
14
+ class Anonymizer
15
+ include Logging
16
+
17
+ # An AuditTrail instance used for this anonymization (if specified).
18
+ attr_reader :audit_trail
19
+ # The file name used for the AuditTrail serialization (if specified).
20
+ attr_reader :audit_trail_file
21
+ # A boolean that if set as true will cause all anonymized tags to be blank instead of get some generic value.
22
+ attr_accessor :blank
23
+ # An hash of elements (represented by tag keys) that will be deleted from the DICOM objects on anonymization.
24
+ attr_reader :delete
25
+ # A boolean that if set as true, will make the anonymization delete all private tags.
26
+ attr_accessor :delete_private
27
+ # The cryptographic hash function to be used for encrypting DICOM values recorded in an audit trail file.
28
+ attr_reader :encryption
29
+ # A boolean that if set as true will cause all anonymized tags to be get enumerated values, to enable post-anonymization re-identification by the user.
30
+ attr_accessor :enumeration
31
+ # The logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL).
32
+ attr_reader :logger_level
33
+ # A boolean that if set as true will cause all anonymized files to be written with random file names (if write_path has been specified).
34
+ attr_accessor :random_file_name
35
+ # A boolean that if set as true, will cause the anonymization to run on all levels of the DICOM file tag hierarchy.
36
+ attr_accessor :recursive
37
+ # A boolean indicating whether or not UIDs shall be replaced when executing the anonymization.
38
+ attr_accessor :uid
39
+ # The DICOM UID root to use when generating new UIDs.
40
+ attr_accessor :uid_root
41
+ # The path where the anonymized files will be saved. If this value is not set, the original DICOM files will be overwritten.
42
+ attr_accessor :write_path
43
+
44
+ # Creates an Anonymizer instance.
45
+ #
46
+ # @note To customize logging behaviour, refer to the Logging module documentation.
47
+ # @param [Hash] options the options to create an anonymizer instance with
48
+ # @option options [String] :audit_trail a file name path (if the file contains old audit data, these are loaded and used in the current anonymization)
49
+ # @option options [Boolean] :blank toggles whether to set the values of anonymized elements as empty instead of some generic value
50
+ # @option options [Boolean] :delete_private toggles whether private elements are to be deleted
51
+ # @option options [TrueClass, Digest::Class] :encryption if set as true, the default hash function (MD5) will be used for representing DICOM values in an audit file. Otherwise a Digest class can be given, e.g. Digest::SHA256
52
+ # @option options [Boolean] :enumeration toggles whether (some) elements get enumerated values (to enable post-anonymization re-identification)
53
+ # @option options [Fixnum] :logger_level the logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL)
54
+ # @option options [Boolean] :random_file_name toggles whether anonymized files will be given random file names when rewritten (in combination with the :write_path option)
55
+ # @option options [Boolean] :recursive toggles whether to anonymize on all sub-levels of the DICOM object tag hierarchies
56
+ # @option options [Boolean] :uid toggles whether UIDs will be replaced with custom generated UIDs (beware that to preserve UID relations in studies/series, the audit_trail feature must be used)
57
+ # @option options [String] :uid_root an organization (or custom) UID root to use when replacing UIDs
58
+ # @option options [String] :write_path a directory where the anonymized files are re-written (if not specified, files are overwritten)
59
+ # @example Create an Anonymizer instance and increase the log output
60
+ # a = Anonymizer.new
61
+ # a.logger.level = Logger::INFO
62
+ # @example Perform anonymization using the audit trail feature
63
+ # a = Anonymizer.new(:audit_trail => 'trail.json')
64
+ # a.enumeration = true
65
+ # a.write_path = '//anonymized/'
66
+ # a.anonymize('//dicom/today/')
67
+ #
68
+ def initialize(options={})
69
+ # Transfer options to attributes:
70
+ @blank = options[:blank]
71
+ @delete_private = options[:delete_private]
72
+ @enumeration = options[:enumeration]
73
+ @logger_level = options[:logger_level] || Logger::FATAL
74
+ @random_file_name = options[:random_file_name]
75
+ @recursive = options[:recursive]
76
+ @uid = options[:uid]
77
+ @uid_root = options[:uid_root] ? options[:uid_root] : UID_ROOT
78
+ @write_path = options[:write_path]
79
+ # Array of folders to be processed for anonymization:
80
+ @folders = Array.new
81
+ # Folders that will be skipped:
82
+ @exceptions = Array.new
83
+ # Data elements which will be anonymized (the array will hold a list of tag strings):
84
+ @tags = Array.new
85
+ # Default values to use on anonymized data elements:
86
+ @values = Array.new
87
+ # Which data elements will have enumeration applied, if requested by the user:
88
+ @enumerations = Array.new
89
+ # We use a Hash to store information from DICOM files if enumeration is desired:
90
+ @enum_old_hash = Hash.new
91
+ @enum_new_hash = Hash.new
92
+ # All the files to be anonymized will be put in this array:
93
+ @files = Array.new
94
+ @prefixes = Hash.new
95
+ # Setup audit trail if requested:
96
+ if options[:audit_trail]
97
+ @audit_trail_file = options[:audit_trail]
98
+ if File.exists?(@audit_trail_file) && File.size(@audit_trail_file) > 2
99
+ # Load the pre-existing audit trail from file:
100
+ @audit_trail = AuditTrail.read(@audit_trail_file)
101
+ else
102
+ # Start from scratch with an empty audit trail:
103
+ @audit_trail = AuditTrail.new
104
+ end
105
+ # Set up encryption if indicated:
106
+ if options[:encryption]
107
+ require 'digest'
108
+ if options[:encryption].respond_to?(:hexdigest)
109
+ @encryption = options[:encryption]
110
+ else
111
+ @encryption = Digest::MD5
112
+ end
113
+ end
114
+ end
115
+ # Set the default data elements to be anonymized:
116
+ set_defaults
117
+ end
118
+
119
+ # Checks for equality.
120
+ #
121
+ # Other and self are considered equivalent if they are
122
+ # of compatible types and their attributes are equivalent.
123
+ #
124
+ # @param other an object to be compared with self.
125
+ # @return [Boolean] true if self and other are considered equivalent
126
+ #
127
+ def ==(other)
128
+ if other.respond_to?(:to_anonymizer)
129
+ other.send(:state) == state
130
+ end
131
+ end
132
+
133
+ alias_method :eql?, :==
134
+
135
+ # Anonymizes the given DObject or array of DICOM objects with the settings
136
+ # of this Anonymizer instance.
137
+ #
138
+ # @param [DObject, Array<DObject>] dicom single or multiple DICOM objects
139
+ # @return [Array<DObject>] an array of the anonymized DICOM objects
140
+ #
141
+ def anonymize(dicom)
142
+ dicom = Array[dicom] unless dicom.respond_to?(:to_ary)
143
+ if @tags.length > 0
144
+ prepare_anonymization
145
+ dicom.each do |dcm|
146
+ anonymize_dcm(dcm.to_dcm)
147
+ end
148
+ else
149
+ logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
150
+ end
151
+ # Save the audit trail (if used):
152
+ @audit_trail.write(@audit_trail_file) if @audit_trail
153
+ logger.info("Anonymization complete.")
154
+ dicom
155
+ end
156
+
157
+ # Anonymizes any DICOM files found at the given path (file or directory)
158
+ # with the settings of this Anonymizer instance.
159
+ #
160
+ # @param [String] path a file or directory path
161
+ #
162
+ def anonymize_path(path)
163
+ if @tags.length > 0
164
+ prepare_anonymization
165
+ files = DICOM.load_files(path)
166
+ logger.info("#{files.length} DICOM files have been prepared for anonymization.")
167
+ files.each do |f|
168
+ dcm = anonymize_file(f)
169
+ write(dcm)
170
+ end
171
+ else
172
+ logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
173
+ end
174
+ # Save the audit trail (if used):
175
+ @audit_trail.write(@audit_trail_file) if @audit_trail
176
+ logger.info("Anonymization complete.")
177
+ end
178
+
179
+ # Specifies that the given tag is to be completely deleted
180
+ # from the anonymized DICOM objects.
181
+ #
182
+ # @param [String] tag a data element tag
183
+ # @example Completely delete the Patient's Name tag from the DICOM files
184
+ # a.delete_tag('0010,0010')
185
+ #
186
+ def delete_tag(tag)
187
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
188
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
189
+ @delete[tag] = true
190
+ end
191
+
192
+ # Checks the enumeration status of this tag.
193
+ #
194
+ # @param [String] tag a data element tag
195
+ # @return [Boolean, NilClass] the enumeration status of the tag, or nil if the tag has no match
196
+ #
197
+ def enum(tag)
198
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
199
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
200
+ pos = @tags.index(tag)
201
+ if pos
202
+ return @enumerations[pos]
203
+ else
204
+ logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
205
+ return nil
206
+ end
207
+ end
208
+
209
+ # Computes a hash code for this object.
210
+ #
211
+ # @note Two objects with the same attributes will have the same hash code.
212
+ #
213
+ # @return [Fixnum] the object's hash code
214
+ #
215
+ def hash
216
+ state.hash
217
+ end
218
+
219
+ # Removes a tag from the list of tags that will be anonymized.
220
+ #
221
+ # @param [String] tag a data element tag
222
+ # @example Do not anonymize the Patient's Name tag
223
+ # a.remove_tag('0010,0010')
224
+ #
225
+ def remove_tag(tag)
226
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
227
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
228
+ pos = @tags.index(tag)
229
+ if pos
230
+ @tags.delete_at(pos)
231
+ @values.delete_at(pos)
232
+ @enumerations.delete_at(pos)
233
+ end
234
+ end
235
+
236
+ # Sets the anonymization settings for the specified tag. If the tag is already present in the list
237
+ # of tags to be anonymized, its settings are updated, and if not, a new tag entry is created.
238
+ #
239
+ # @param [String] tag a data element tag
240
+ # @param [Hash] options the anonymization settings for the specified tag
241
+ # @option options [String, Integer, Float] :value the replacement value to be used when anonymizing this data element. Defaults to the pre-existing value and '' for new tags.
242
+ # @option options [String, Integer, Float] :enum specifies if enumeration is to be used for this tag. Defaults to the pre-existing value and false for new tags.
243
+ # @example Set the anonymization settings of the Patient's Name tag
244
+ # a.set_tag('0010,0010', :value => 'MrAnonymous', :enum => true)
245
+ #
246
+ def set_tag(tag, options={})
247
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
248
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
249
+ pos = @tags.index(tag)
250
+ if pos
251
+ # Update existing values:
252
+ @values[pos] = options[:value] if options[:value]
253
+ @enumerations[pos] = options[:enum] if options[:enum] != nil
254
+ else
255
+ # Add new elements:
256
+ @tags << tag
257
+ @values << (options[:value] ? options[:value] : default_value(tag))
258
+ @enumerations << (options[:enum] ? options[:enum] : false)
259
+ end
260
+ end
261
+
262
+ # Returns self.
263
+ #
264
+ # @return [Anonymizer] self
265
+ #
266
+ def to_anonymizer
267
+ self
268
+ end
269
+
270
+ # Gives the value which will be used when anonymizing this tag.
271
+ #
272
+ # @note If enumeration is selected for a string type tag, a number will be
273
+ # appended in addition to the string that is returned here.
274
+ #
275
+ # @param [String] tag a data element tag
276
+ # @return [String, Integer, Float, NilClass] the replacement value for the specified tag, or nil if the tag is not matched
277
+ #
278
+ def value(tag)
279
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
280
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
281
+ pos = @tags.index(tag)
282
+ if pos
283
+ return @values[pos]
284
+ else
285
+ logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
286
+ return nil
287
+ end
288
+ end
289
+
290
+
291
+ private
292
+
293
+
294
+ # Performs anonymization on a DICOM object.
295
+ #
296
+ # @param [DObject] dcm a DICOM object
297
+ #
298
+ def anonymize_dcm(dcm)
299
+ # Extract the data element parents to investigate:
300
+ parents = element_parents(dcm)
301
+ parents.each do |parent|
302
+ # Anonymize the desired tags:
303
+ @tags.each_index do |j|
304
+ if parent.exists?(@tags[j])
305
+ element = parent[@tags[j]]
306
+ if element.is_a?(Element)
307
+ if @blank
308
+ value = ''
309
+ elsif @enumeration
310
+ old_value = element.value
311
+ # Only launch enumeration logic if there is an actual value to the data element:
312
+ if old_value
313
+ value = enumerated_value(old_value, j)
314
+ else
315
+ value = ''
316
+ end
317
+ else
318
+ # Use the value that has been set for this tag:
319
+ value = @values[j]
320
+ end
321
+ element.value = value
322
+ end
323
+ end
324
+ end
325
+ # Delete elements marked for deletion:
326
+ @delete.each_key do |tag|
327
+ parent.delete(tag) if parent.exists?(tag)
328
+ end
329
+ end
330
+ # General DICOM object manipulation:
331
+ # Add a Patient Identity Removed attribute (as per
332
+ # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 6):
333
+ dcm.add(Element.new('0012,0062', 'YES'))
334
+ # Add a De-Identification Method Code Sequence Item:
335
+ dcm.add(Sequence.new('0012,0064')) unless dcm.exists?('0012,0064')
336
+ i = dcm['0012,0064'].add_item
337
+ i.add(Element.new('0012,0063', 'De-identified by the ruby-dicom Anonymizer'))
338
+ # FIXME: At some point we should add a set of de-indentification method codes, as per
339
+ # DICOM PS 3.16 CID 7050 which corresponds to the settings chosen for the anonymizer.
340
+ # Delete the old File Meta Information group (as per
341
+ # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 7):
342
+ dcm.delete_group('0002')
343
+ # Handle UIDs if requested:
344
+ replace_uids(parents) if @uid
345
+ # Delete private tags if indicated:
346
+ dcm.delete_private if @delete_private
347
+ end
348
+
349
+ # Performs anonymization of a DICOM file.
350
+ #
351
+ # @param [String] file a DICOM file path
352
+ #
353
+ def anonymize_file(file)
354
+ # Temporarily adjust the ruby-dicom log threshold (to suppress messages from the DObject class):
355
+ @original_level = logger.level
356
+ logger.level = @logger_level
357
+ dcm = DObject.read(file)
358
+ logger.level = @original_level
359
+ anonymize_dcm(dcm)
360
+ dcm
361
+ end
362
+
363
+ # Gives the value to be used for the audit trail, which is either
364
+ # the original value itself, or an encrypted string based on it.
365
+ #
366
+ # @param [String, Integer, Float] original the original value of the tag to be anonymized
367
+ # @return [String, Integer, Float] with encryption, a hash string is returned, otherwise the original value
368
+ #
369
+ def at_value(original)
370
+ @encryption ? @encryption.hexdigest(original) : original
371
+ end
372
+
373
+ # Creates a hash that is used for storing information that is used when enumeration is selected.
374
+ #
375
+ def create_enum_hash
376
+ @enumerations.each_index do |i|
377
+ @enum_old_hash[@tags[i]] = Array.new
378
+ @enum_new_hash[@tags[i]] = Array.new
379
+ end
380
+ end
381
+
382
+ # Determines a default value to use for anonymizing the given tag.
383
+ #
384
+ # @param [String] tag a data element tag
385
+ # @return [String, Integer, Float] the default replacement value for a given tag
386
+ #
387
+ def default_value(tag)
388
+ name, vr = LIBRARY.name_and_vr(tag)
389
+ conversion = VALUE_CONVERSION[vr]
390
+ case conversion
391
+ when :to_i then return 0
392
+ when :to_f then return 0.0
393
+ else
394
+ # Assume type is string and return an empty string:
395
+ return ''
396
+ end
397
+ end
398
+
399
+ # Creates a write path for the given DICOM object, based on the object's
400
+ # original file path and the write_path attribute.
401
+ #
402
+ # @param [DObject] dcm a DICOM object
403
+ # @return [String] the destination directory path
404
+ #
405
+ def destination(dcm)
406
+ # Separate the path from the source file string:
407
+ file_start = dcm.source.rindex(File.basename(dcm.source))
408
+ if file_start == 0
409
+ source_dir = "."
410
+ else
411
+ source_dir = dcm.source[0..(file_start-1)]
412
+ end
413
+ source_folders = source_dir.split(File::SEPARATOR)
414
+ target_folders = @write_path.split(File::SEPARATOR)
415
+ # If the first element is the current dir symbol, get rid of it:
416
+ source_folders.delete('.')
417
+ # Check for equalness of folder names in a range limited by the shortest array:
418
+ common_length = [source_folders.length, target_folders.length].min
419
+ uncommon_index = nil
420
+ common_length.times do |i|
421
+ if target_folders[i] != source_folders[i]
422
+ uncommon_index = i
423
+ break
424
+ end
425
+ end
426
+ # Create the output path by joining the two paths together using the determined index:
427
+ append_path = uncommon_index ? source_folders[uncommon_index..-1] : nil
428
+ [target_folders, append_path].compact.join(File::SEPARATOR)
429
+ end
430
+
431
+ # Extracts all parents from a DObject instance which potentially
432
+ # have child (data) elements. This typically means the DObject
433
+ # instance itself as well as items (i.e. not sequences).
434
+ # Note that unless the @recursive attribute has been set,
435
+ # this method will only return the DObject (placed inside an array).
436
+ #
437
+ # @param [DObject] dcm a DICOM object
438
+ # @return [Array<DObject, Item>] an array containing either just a DObject or also all parental child items within the tag hierarchy
439
+ #
440
+ def element_parents(dcm)
441
+ parents = Array.new
442
+ parents << dcm
443
+ if @recursive
444
+ dcm.sequences.each do |s|
445
+ parents += element_parents_recursive(s)
446
+ end
447
+ end
448
+ parents
449
+ end
450
+
451
+ # Recursively extracts all item parents from a sequence instance (including
452
+ # any sub-sequences) which actually contain child (data) elements.
453
+ #
454
+ # @param [Sequence] sequence a Sequence instance
455
+ # @return [Array<Item>] an array containing items within the tag hierarchy that contains child elements
456
+ #
457
+ def element_parents_recursive(sequence)
458
+ parents = Array.new
459
+ sequence.items.each do |i|
460
+ parents << i if i.elements?
461
+ i.sequences.each do |s|
462
+ parents += element_parents_recursive(s)
463
+ end
464
+ end
465
+ parents
466
+ end
467
+
468
+ # Handles the enumeration for the given data element tag.
469
+ # If its value has been encountered before, its corresponding enumerated
470
+ # replacement value is retrieved, and if a new original value is encountered,
471
+ # a new enumerated replacement value is found by increasing an index by 1.
472
+ #
473
+ # @param [String, Integer, Float] original the original value of the tag to be anonymized
474
+ # @param [Fixnum] j the index of this tag in the tag-related instance arrays
475
+ # @return [String, Integer, Float] the replacement value which is used for the anonymization of the tag
476
+ #
477
+ def enumerated_value(original, j)
478
+ # Is enumeration requested for this tag?
479
+ if @enumerations[j]
480
+ if @audit_trail
481
+ # Check if the UID has been encountered already:
482
+ replacement = @audit_trail.replacement(@tags[j], at_value(original))
483
+ unless replacement
484
+ # This original value has not been encountered yet. Determine the index to use.
485
+ index = @audit_trail.records(@tags[j]).length + 1
486
+ # Create the replacement value:
487
+ if @values[j].is_a?(String)
488
+ replacement = @values[j] + index.to_s
489
+ else
490
+ replacement = @values[j] + index
491
+ end
492
+ # Add this tag record to the audit trail:
493
+ @audit_trail.add_record(@tags[j], at_value(original), replacement)
494
+ end
495
+ else
496
+ # Retrieve earlier used anonymization values:
497
+ previous_old = @enum_old_hash[@tags[j]]
498
+ previous_new = @enum_new_hash[@tags[j]]
499
+ p_index = previous_old.length
500
+ if previous_old.index(original) == nil
501
+ # Current value has not been encountered before:
502
+ replacement = @values[j]+(p_index + 1).to_s
503
+ # Store value in array (and hash):
504
+ previous_old << original
505
+ previous_new << replacement
506
+ @enum_old_hash[@tags[j]] = previous_old
507
+ @enum_new_hash[@tags[j]] = previous_new
508
+ else
509
+ # Current value has been observed before:
510
+ replacement = previous_new[previous_old.index(original)]
511
+ end
512
+ end
513
+ else
514
+ replacement = @values[j]
515
+ end
516
+ return replacement
517
+ end
518
+
519
+ # Establishes a prefix for a given UID tag.
520
+ # This makes it somewhat easier to distinguish
521
+ # between different types of random generated UIDs.
522
+ #
523
+ # @param [String] tag a data element string tag
524
+ #
525
+ def prefix(tag)
526
+ if @prefixes[tag]
527
+ @prefixes[tag]
528
+ else
529
+ @prefixes[tag] = @prefixes.length + 1
530
+ @prefixes[tag]
531
+ end
532
+ end
533
+
534
+ # Prepares the anonymizer for anonymization.
535
+ #
536
+ #
537
+ def prepare_anonymization
538
+ # Set up enumeration if requested:
539
+ create_enum_hash if @enumeration
540
+ require 'securerandom' if @random_file_name
541
+ end
542
+
543
+ # Replaces the UIDs of the given DICOM object.
544
+ #
545
+ # @note Empty UIDs are ignored (we don't generate new UIDs for these).
546
+ # @note If AuditTrail is set, the relationship between old and new UIDs are preserved,
547
+ # and the relations between files in a study/series should remain valid.
548
+ # @param [Array<DObject, Item>] parents dicom parent objects who's child elements will be investigated
549
+ #
550
+ def replace_uids(parents)
551
+ parents.each do |parent|
552
+ parent.each_element do |element|
553
+ if element.vr == ('UI') and !@static_uids[element.tag]
554
+ original = element.value
555
+ if original && original.length > 0
556
+ # We have a UID value, go ahead and replace it:
557
+ if @audit_trail
558
+ # Check if the UID has been encountered already:
559
+ replacement = @audit_trail.replacement('uids', original)
560
+ unless replacement
561
+ # The UID has not been stored previously. Generate a new one:
562
+ replacement = DICOM.generate_uid(@uid_root, prefix(element.tag))
563
+ # Add this tag record to the audit trail:
564
+ @audit_trail.add_record('uids', original, replacement)
565
+ end
566
+ # Replace the UID in the DICOM object:
567
+ element.value = replacement
568
+ else
569
+ # We don't care about preserving UID relations. Just insert a custom UID:
570
+ element.value = DICOM.generate_uid(@uid_root, prefix(element.tag))
571
+ end
572
+ end
573
+ end
574
+ end
575
+ end
576
+ end
577
+
578
+ # Sets up some default information variables that are used by the Anonymizer.
579
+ #
580
+ def set_defaults
581
+ # Some UIDs should not be remapped even if uid anonymization has been requested:
582
+ @static_uids = {
583
+ # Private related:
584
+ '0002,0100' => true,
585
+ '0004,1432' => true,
586
+ # Coding scheme related:
587
+ '0008,010C' => true,
588
+ '0008,010D' => true,
589
+ # Transfer syntax related:
590
+ '0002,0010' => true,
591
+ '0400,0010' => true,
592
+ '0400,0510' => true,
593
+ '0004,1512' => true,
594
+ # SOP class related:
595
+ '0000,0002' => true,
596
+ '0000,0003' => true,
597
+ '0002,0002' => true,
598
+ '0004,1510' => true,
599
+ '0004,151A' => true,
600
+ '0008,0016' => true,
601
+ '0008,001A' => true,
602
+ '0008,001B' => true,
603
+ '0008,0062' => true,
604
+ '0008,1150' => true,
605
+ '0008,115A' => true
606
+ }
607
+ # Sets up default tags that will be anonymized, along with default replacement values and enumeration settings.
608
+ # This data is stored in 3 separate instance arrays for tags, values and enumeration.
609
+ data = [
610
+ ['0008,0012', '20000101', false], # Instance Creation Date
611
+ ['0008,0013', '000000.00', false], # Instance Creation Time
612
+ ['0008,0020', '20000101', false], # Study Date
613
+ ['0008,0021', '20000101', false], # Series Date
614
+ ['0008,0022', '20000101', false], # Acquisition Date
615
+ ['0008,0023', '20000101', false], # Image Date
616
+ ['0008,0030', '000000.00', false], # Study Time
617
+ ['0008,0031', '000000.00', false], # Series Time
618
+ ['0008,0032', '000000.00', false], # Acquisition Time
619
+ ['0008,0033', '000000.00', false], # Image Time
620
+ ['0008,0050', '', true], # Accession Number
621
+ ['0008,0080', 'Institution', true], # Institution name
622
+ ['0008,0081', 'Address', true], # Institution Address
623
+ ['0008,0090', 'Physician', true], # Referring Physician's name
624
+ ['0008,1010', 'Station', true], # Station name
625
+ ['0008,1040', 'Department', true], # Institutional Department name
626
+ ['0008,1070', 'Operator', true], # Operator's Name
627
+ ['0010,0010', 'Patient', true], # Patient's name
628
+ ['0010,0020', 'ID', true], # Patient's ID
629
+ ['0010,0030', '20000101', false], # Patient's Birth Date
630
+ ['0010,0040', 'O', false], # Patient's Sex
631
+ ['0010,1010', '', false], # Patient's Age
632
+ ['0020,4000', '', false], # Image Comments
633
+ ].transpose
634
+ @tags = data[0]
635
+ @values = data[1]
636
+ @enumerations = data[2]
637
+ # Tags to be deleted completely during anonymization:
638
+ @delete = Hash.new
639
+ end
640
+
641
+ # Collects the attributes of this instance.
642
+ #
643
+ # @return [Array] an array of attributes
644
+ #
645
+ def state
646
+ [
647
+ @tags, @values, @enumerations, @delete, @blank,
648
+ @delete_private, @enumeration, @logger_level,
649
+ @random_file_name, @recursive, @uid, @uid_root, @write_path
650
+ ]
651
+ end
652
+
653
+ # Writes a DICOM object to file.
654
+ #
655
+ # @param [DObject] dcm a DICOM object
656
+ #
657
+ def write(dcm)
658
+ if @write_path
659
+ # The DICOM object is to be written to a separate directory. If the
660
+ # original and the new directories have a common root, this is taken into
661
+ # consideration when determining the object's write path:
662
+ path = destination(dcm)
663
+ if @random_file_name
664
+ file_name = "#{SecureRandom.hex(16)}.dcm"
665
+ else
666
+ file_name = File.basename(dcm.source)
667
+ end
668
+ dcm.write(File.join(path, file_name))
669
+ else
670
+ # The original DICOM file is overwritten with the anonymized DICOM object:
671
+ dcm.write(dcm.source)
672
+ end
673
+ end
674
+
675
+ end
676
+
677
+ end