dicom 0.9.6 → 0.9.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,654 +1,677 @@
1
- module DICOM
2
-
3
- # This is a convenience class for handling the anonymization
4
- # (de-identification) of DICOM files.
5
- #
6
- # @note
7
- # For a thorough introduction to the concept of DICOM anonymization,
8
- # please refer to The DICOM Standard, Part 15: Security and System
9
- # Management Profiles, Annex E: Attribute Confidentiality Profiles.
10
- # For guidance on settings for individual data elements, please
11
- # refer to DICOM PS 3.15, Annex E, Table E.1-1: Application Level
12
- # Confidentiality Profile Attributes.
13
- #
14
- class Anonymizer
15
- include Logging
16
-
17
- # An AuditTrail instance used for this anonymization (if specified).
18
- attr_reader :audit_trail
19
- # The file name used for the AuditTrail serialization (if specified).
20
- attr_reader :audit_trail_file
21
- # A boolean that if set as true will cause all anonymized tags to be blank instead of get some generic value.
22
- attr_accessor :blank
23
- # An hash of elements (represented by tag keys) that will be deleted from the DICOM objects on anonymization.
24
- attr_reader :delete
25
- # A boolean that if set as true, will make the anonymization delete all private tags.
26
- attr_accessor :delete_private
27
- # The cryptographic hash function to be used for encrypting DICOM values recorded in an audit trail file.
28
- attr_reader :encryption
29
- # A boolean that if set as true will cause all anonymized tags to be get enumerated values, to enable post-anonymization re-identification by the user.
30
- attr_accessor :enumeration
31
- # The logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL).
32
- attr_reader :logger_level
33
- # A boolean that if set as true will cause all anonymized files to be written with random file names (if write_path has been specified).
34
- attr_accessor :random_file_name
35
- # A boolean that if set as true, will cause the anonymization to run on all levels of the DICOM file tag hierarchy.
36
- attr_accessor :recursive
37
- # A boolean indicating whether or not UIDs shall be replaced when executing the anonymization.
38
- attr_accessor :uid
39
- # The DICOM UID root to use when generating new UIDs.
40
- attr_accessor :uid_root
41
- # The path where the anonymized files will be saved. If this value is not set, the original DICOM files will be overwritten.
42
- attr_accessor :write_path
43
-
44
- # Creates an Anonymizer instance.
45
- #
46
- # @note To customize logging behaviour, refer to the Logging module documentation.
47
- # @param [Hash] options the options to create an anonymizer instance with
48
- # @option options [String] :audit_trail a file name path (if the file contains old audit data, these are loaded and used in the current anonymization)
49
- # @option options [Boolean] :blank toggles whether to set the values of anonymized elements as empty instead of some generic value
50
- # @option options [Boolean] :delete_private toggles whether private elements are to be deleted
51
- # @option options [TrueClass, Digest::Class] :encryption if set as true, the default hash function (MD5) will be used for representing DICOM values in an audit file. Otherwise a Digest class can be given, e.g. Digest::SHA256
52
- # @option options [Boolean] :enumeration toggles whether (some) elements get enumerated values (to enable post-anonymization re-identification)
53
- # @option options [Fixnum] :logger_level the logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL)
54
- # @option options [Boolean] :random_file_name toggles whether anonymized files will be given random file names when rewritten (in combination with the :write_path option)
55
- # @option options [Boolean] :recursive toggles whether to anonymize on all sub-levels of the DICOM object tag hierarchies
56
- # @option options [Boolean] :uid toggles whether UIDs will be replaced with custom generated UIDs (beware that to preserve UID relations in studies/series, the audit_trail feature must be used)
57
- # @option options [String] :uid_root an organization (or custom) UID root to use when replacing UIDs
58
- # @option options [String] :write_path a directory where the anonymized files are re-written (if not specified, files are overwritten)
59
- # @example Create an Anonymizer instance and increase the log output
60
- # a = Anonymizer.new
61
- # a.logger.level = Logger::INFO
62
- # @example Perform anonymization using the audit trail feature
63
- # a = Anonymizer.new(:audit_trail => 'trail.json')
64
- # a.enumeration = true
65
- # a.write_path = '//anonymized/'
66
- # a.anonymize('//dicom/today/')
67
- #
68
- def initialize(options={})
69
- # Transfer options to attributes:
70
- @blank = options[:blank]
71
- @delete_private = options[:delete_private]
72
- @enumeration = options[:enumeration]
73
- @logger_level = options[:logger_level] || Logger::FATAL
74
- @random_file_name = options[:random_file_name]
75
- @recursive = options[:recursive]
76
- @uid = options[:uid]
77
- @uid_root = options[:uid_root] ? options[:uid_root] : UID_ROOT
78
- @write_path = options[:write_path]
79
- # Array of folders to be processed for anonymization:
80
- @folders = Array.new
81
- # Folders that will be skipped:
82
- @exceptions = Array.new
83
- # Data elements which will be anonymized (the array will hold a list of tag strings):
84
- @tags = Array.new
85
- # Default values to use on anonymized data elements:
86
- @values = Array.new
87
- # Which data elements will have enumeration applied, if requested by the user:
88
- @enumerations = Array.new
89
- # We use a Hash to store information from DICOM files if enumeration is desired:
90
- @enum_old_hash = Hash.new
91
- @enum_new_hash = Hash.new
92
- # All the files to be anonymized will be put in this array:
93
- @files = Array.new
94
- @prefixes = Hash.new
95
- # Setup audit trail if requested:
96
- if options[:audit_trail]
97
- @audit_trail_file = options[:audit_trail]
98
- if File.exists?(@audit_trail_file) && File.size(@audit_trail_file) > 2
99
- # Load the pre-existing audit trail from file:
100
- @audit_trail = AuditTrail.read(@audit_trail_file)
101
- else
102
- # Start from scratch with an empty audit trail:
103
- @audit_trail = AuditTrail.new
104
- end
105
- # Set up encryption if indicated:
106
- if options[:encryption]
107
- require 'digest'
108
- if options[:encryption].respond_to?(:hexdigest)
109
- @encryption = options[:encryption]
110
- else
111
- @encryption = Digest::MD5
112
- end
113
- end
114
- end
115
- # Set the default data elements to be anonymized:
116
- set_defaults
117
- end
118
-
119
- # Checks for equality.
120
- #
121
- # Other and self are considered equivalent if they are
122
- # of compatible types and their attributes are equivalent.
123
- #
124
- # @param other an object to be compared with self.
125
- # @return [Boolean] true if self and other are considered equivalent
126
- #
127
- def ==(other)
128
- if other.respond_to?(:to_anonymizer)
129
- other.send(:state) == state
130
- end
131
- end
132
-
133
- alias_method :eql?, :==
134
-
135
- # Anonymizes the given DICOM data with the settings of this Anonymizer instance.
136
- #
137
- # @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
138
- # @return [Array<DObject>] an array of the anonymized DICOM objects
139
- #
140
- def anonymize(data)
141
- dicom = prepare(data)
142
- if @tags.length > 0
143
- dicom.each do |dcm|
144
- anonymize_dcm(dcm)
145
- # Write DICOM object to file unless it was passed to the anonymizer as an object:
146
- write(dcm) unless dcm.was_dcm_on_input
147
- end
148
- else
149
- logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
150
- end
151
- # Reset the ruby-dicom log threshold to its original level:
152
- logger.level = @original_level
153
- # Save the audit trail (if used):
154
- @audit_trail.write(@audit_trail_file) if @audit_trail
155
- logger.info("Anonymization complete.")
156
- dicom
157
- end
158
-
159
- # Specifies that the given tag is to be completely deleted
160
- # from the anonymized DICOM objects.
161
- #
162
- # @param [String] tag a data element tag
163
- # @example Completely delete the Patient's Name tag from the DICOM files
164
- # a.delete_tag('0010,0010')
165
- #
166
- def delete_tag(tag)
167
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
168
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
169
- @delete[tag] = true
170
- end
171
-
172
- # Checks the enumeration status of this tag.
173
- #
174
- # @param [String] tag a data element tag
175
- # @return [Boolean, NilClass] the enumeration status of the tag, or nil if the tag has no match
176
- #
177
- def enum(tag)
178
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
179
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
180
- pos = @tags.index(tag)
181
- if pos
182
- return @enumerations[pos]
183
- else
184
- logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
185
- return nil
186
- end
187
- end
188
-
189
- # Computes a hash code for this object.
190
- #
191
- # @note Two objects with the same attributes will have the same hash code.
192
- #
193
- # @return [Fixnum] the object's hash code
194
- #
195
- def hash
196
- state.hash
197
- end
198
-
199
- # Removes a tag from the list of tags that will be anonymized.
200
- #
201
- # @param [String] tag a data element tag
202
- # @example Do not anonymize the Patient's Name tag
203
- # a.remove_tag('0010,0010')
204
- #
205
- def remove_tag(tag)
206
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
207
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
208
- pos = @tags.index(tag)
209
- if pos
210
- @tags.delete_at(pos)
211
- @values.delete_at(pos)
212
- @enumerations.delete_at(pos)
213
- end
214
- end
215
-
216
- # Sets the anonymization settings for the specified tag. If the tag is already present in the list
217
- # of tags to be anonymized, its settings are updated, and if not, a new tag entry is created.
218
- #
219
- # @param [String] tag a data element tag
220
- # @param [Hash] options the anonymization settings for the specified tag
221
- # @option options [String, Integer, Float] :value the replacement value to be used when anonymizing this data element. Defaults to the pre-existing value and '' for new tags.
222
- # @option options [String, Integer, Float] :enum specifies if enumeration is to be used for this tag. Defaults to the pre-existing value and false for new tags.
223
- # @example Set the anonymization settings of the Patient's Name tag
224
- # a.set_tag('0010,0010', :value => 'MrAnonymous', :enum => true)
225
- #
226
- def set_tag(tag, options={})
227
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
228
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
229
- pos = @tags.index(tag)
230
- if pos
231
- # Update existing values:
232
- @values[pos] = options[:value] if options[:value]
233
- @enumerations[pos] = options[:enum] if options[:enum] != nil
234
- else
235
- # Add new elements:
236
- @tags << tag
237
- @values << (options[:value] ? options[:value] : default_value(tag))
238
- @enumerations << (options[:enum] ? options[:enum] : false)
239
- end
240
- end
241
-
242
- # Returns self.
243
- #
244
- # @return [Anonymizer] self
245
- #
246
- def to_anonymizer
247
- self
248
- end
249
-
250
- # Gives the value which will be used when anonymizing this tag.
251
- #
252
- # @note If enumeration is selected for a string type tag, a number will be
253
- # appended in addition to the string that is returned here.
254
- #
255
- # @param [String] tag a data element tag
256
- # @return [String, Integer, Float, NilClass] the replacement value for the specified tag, or nil if the tag is not matched
257
- #
258
- def value(tag)
259
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
260
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
261
- pos = @tags.index(tag)
262
- if pos
263
- return @values[pos]
264
- else
265
- logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
266
- return nil
267
- end
268
- end
269
-
270
-
271
- private
272
-
273
-
274
- # Performs anonymization on a DICOM object.
275
- #
276
- # @param [DObject] dcm a DICOM object
277
- #
278
- def anonymize_dcm(dcm)
279
- # Extract the data element parents to investigate:
280
- parents = element_parents(dcm)
281
- parents.each do |parent|
282
- # Anonymize the desired tags:
283
- @tags.each_index do |j|
284
- if parent.exists?(@tags[j])
285
- element = parent[@tags[j]]
286
- if element.is_a?(Element)
287
- if @blank
288
- value = ''
289
- elsif @enumeration
290
- old_value = element.value
291
- # Only launch enumeration logic if there is an actual value to the data element:
292
- if old_value
293
- value = enumerated_value(old_value, j)
294
- else
295
- value = ''
296
- end
297
- else
298
- # Use the value that has been set for this tag:
299
- value = @values[j]
300
- end
301
- element.value = value
302
- end
303
- end
304
- end
305
- # Delete elements marked for deletion:
306
- @delete.each_key do |tag|
307
- parent.delete(tag) if parent.exists?(tag)
308
- end
309
- end
310
- # General DICOM object manipulation:
311
- # Add a Patient Identity Removed attribute (as per
312
- # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 6):
313
- dcm.add(Element.new('0012,0062', 'YES'))
314
- # Add a De-Identification Method Code Sequence Item:
315
- dcm.add(Sequence.new('0012,0064')) unless dcm.exists?('0012,0064')
316
- i = dcm['0012,0064'].add_item
317
- i.add(Element.new('0012,0063', 'De-identified by the ruby-dicom Anonymizer'))
318
- # FIXME: At some point we should add a set of de-indentification method codes, as per
319
- # DICOM PS 3.16 CID 7050 which corresponds to the settings chosen for the anonymizer.
320
- # Delete the old File Meta Information group (as per
321
- # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 7):
322
- dcm.delete_group('0002')
323
- # Handle UIDs if requested:
324
- replace_uids(parents) if @uid
325
- # Delete private tags if indicated:
326
- dcm.delete_private if @delete_private
327
- end
328
-
329
- # Gives the value to be used for the audit trail, which is either
330
- # the original value itself, or an encrypted string based on it.
331
- #
332
- # @param [String, Integer, Float] original the original value of the tag to be anonymized
333
- # @return [String, Integer, Float] with encryption, a hash string is returned, otherwise the original value
334
- #
335
- def at_value(original)
336
- @encryption ? @encryption.hexdigest(original) : original
337
- end
338
-
339
- # Creates a hash that is used for storing information that is used when enumeration is selected.
340
- #
341
- def create_enum_hash
342
- @enumerations.each_index do |i|
343
- @enum_old_hash[@tags[i]] = Array.new
344
- @enum_new_hash[@tags[i]] = Array.new
345
- end
346
- end
347
-
348
- # Determines a default value to use for anonymizing the given tag.
349
- #
350
- # @param [String] tag a data element tag
351
- # @return [String, Integer, Float] the default replacement value for a given tag
352
- #
353
- def default_value(tag)
354
- name, vr = LIBRARY.name_and_vr(tag)
355
- conversion = VALUE_CONVERSION[vr]
356
- case conversion
357
- when :to_i then return 0
358
- when :to_f then return 0.0
359
- else
360
- # Assume type is string and return an empty string:
361
- return ''
362
- end
363
- end
364
-
365
- # Creates a write path for the given DICOM object, based on the object's
366
- # original file path and the write_path attribute.
367
- #
368
- # @param [DObject] dcm a DICOM object
369
- # @return [String] the destination directory path
370
- #
371
- def destination(dcm)
372
- # Separate the path from the source file string:
373
- file_start = dcm.source.rindex(File.basename(dcm.source))
374
- if file_start == 0
375
- source_dir = "."
376
- else
377
- source_dir = dcm.source[0..(file_start-1)]
378
- end
379
- source_folders = source_dir.split(File::SEPARATOR)
380
- target_folders = @write_path.split(File::SEPARATOR)
381
- # If the first element is the current dir symbol, get rid of it:
382
- source_folders.delete('.')
383
- # Check for equalness of folder names in a range limited by the shortest array:
384
- common_length = [source_folders.length, target_folders.length].min
385
- uncommon_index = nil
386
- common_length.times do |i|
387
- if target_folders[i] != source_folders[i]
388
- uncommon_index = i
389
- break
390
- end
391
- end
392
- # Create the output path by joining the two paths together using the determined index:
393
- append_path = uncommon_index ? source_folders[uncommon_index..-1] : nil
394
- [target_folders, append_path].compact.join(File::SEPARATOR)
395
- end
396
-
397
- # Extracts all parents from a DObject instance which potentially
398
- # have child (data) elements. This typically means the DObject
399
- # instance itself as well as items (i.e. not sequences).
400
- # Note that unless the @recursive attribute has been set,
401
- # this method will only return the DObject (placed inside an array).
402
- #
403
- # @param [DObject] dcm a DICOM object
404
- # @return [Array<DObject, Item>] an array containing either just a DObject or also all parental child items within the tag hierarchy
405
- #
406
- def element_parents(dcm)
407
- parents = Array.new
408
- parents << dcm
409
- if @recursive
410
- dcm.sequences.each do |s|
411
- parents += element_parents_recursive(s)
412
- end
413
- end
414
- parents
415
- end
416
-
417
- # Recursively extracts all item parents from a sequence instance (including
418
- # any sub-sequences) which actually contain child (data) elements.
419
- #
420
- # @param [Sequence] sequence a Sequence instance
421
- # @return [Array<Item>] an array containing items within the tag hierarchy that contains child elements
422
- #
423
- def element_parents_recursive(sequence)
424
- parents = Array.new
425
- sequence.items.each do |i|
426
- parents << i if i.elements?
427
- i.sequences.each do |s|
428
- parents += element_parents_recursive(s)
429
- end
430
- end
431
- parents
432
- end
433
-
434
- # Handles the enumeration for the given data element tag.
435
- # If its value has been encountered before, its corresponding enumerated
436
- # replacement value is retrieved, and if a new original value is encountered,
437
- # a new enumerated replacement value is found by increasing an index by 1.
438
- #
439
- # @param [String, Integer, Float] original the original value of the tag to be anonymized
440
- # @param [Fixnum] j the index of this tag in the tag-related instance arrays
441
- # @return [String, Integer, Float] the replacement value which is used for the anonymization of the tag
442
- #
443
- def enumerated_value(original, j)
444
- # Is enumeration requested for this tag?
445
- if @enumerations[j]
446
- if @audit_trail
447
- # Check if the UID has been encountered already:
448
- replacement = @audit_trail.replacement(@tags[j], at_value(original))
449
- unless replacement
450
- # This original value has not been encountered yet. Determine the index to use.
451
- index = @audit_trail.records(@tags[j]).length + 1
452
- # Create the replacement value:
453
- if @values[j].is_a?(String)
454
- replacement = @values[j] + index.to_s
455
- else
456
- replacement = @values[j] + index
457
- end
458
- # Add this tag record to the audit trail:
459
- @audit_trail.add_record(@tags[j], at_value(original), replacement)
460
- end
461
- else
462
- # Retrieve earlier used anonymization values:
463
- previous_old = @enum_old_hash[@tags[j]]
464
- previous_new = @enum_new_hash[@tags[j]]
465
- p_index = previous_old.length
466
- if previous_old.index(original) == nil
467
- # Current value has not been encountered before:
468
- replacement = @values[j]+(p_index + 1).to_s
469
- # Store value in array (and hash):
470
- previous_old << original
471
- previous_new << replacement
472
- @enum_old_hash[@tags[j]] = previous_old
473
- @enum_new_hash[@tags[j]] = previous_new
474
- else
475
- # Current value has been observed before:
476
- replacement = previous_new[previous_old.index(original)]
477
- end
478
- end
479
- else
480
- replacement = @values[j]
481
- end
482
- return replacement
483
- end
484
-
485
- # Establishes a prefix for a given UID tag.
486
- # This makes it somewhat easier to distinguish
487
- # between different types of random generated UIDs.
488
- #
489
- # @param [String] tag a data element string tag
490
- #
491
- def prefix(tag)
492
- if @prefixes[tag]
493
- @prefixes[tag]
494
- else
495
- @prefixes[tag] = @prefixes.length + 1
496
- @prefixes[tag]
497
- end
498
- end
499
-
500
- # Prepares the data for anonymization.
501
- #
502
- # @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
503
- # @return [Array] the original data (wrapped in an array) as well as an array of loaded DObject instances
504
- #
505
- def prepare(data)
506
- logger.info("Loading DICOM data.")
507
- # Temporarily adjust the ruby-dicom log threshold (usually to suppress messages from the DObject class):
508
- @original_level = logger.level
509
- logger.level = @logger_level
510
- dicom = DICOM.load(data)
511
- logger.level = @original_level
512
- logger.info("#{dicom.length} DICOM objects have been prepared for anonymization.")
513
- logger.level = @logger_level
514
- # Set up enumeration if requested:
515
- create_enum_hash if @enumeration
516
- require 'securerandom' if @random_file_name
517
- dicom
518
- end
519
-
520
- # Replaces the UIDs of the given DICOM object.
521
- #
522
- # @note Empty UIDs are ignored (we don't generate new UIDs for these).
523
- # @note If AuditTrail is set, the relationship between old and new UIDs are preserved,
524
- # and the relations between files in a study/series should remain valid.
525
- # @param [Array<DObject, Item>] parents dicom parent objects who's child elements will be investigated
526
- #
527
- def replace_uids(parents)
528
- parents.each do |parent|
529
- parent.each_element do |element|
530
- if element.vr == ('UI') and !@static_uids[element.tag]
531
- original = element.value
532
- if original && original.length > 0
533
- # We have a UID value, go ahead and replace it:
534
- if @audit_trail
535
- # Check if the UID has been encountered already:
536
- replacement = @audit_trail.replacement('uids', original)
537
- unless replacement
538
- # The UID has not been stored previously. Generate a new one:
539
- replacement = DICOM.generate_uid(@uid_root, prefix(element.tag))
540
- # Add this tag record to the audit trail:
541
- @audit_trail.add_record('uids', original, replacement)
542
- end
543
- # Replace the UID in the DICOM object:
544
- element.value = replacement
545
- else
546
- # We don't care about preserving UID relations. Just insert a custom UID:
547
- element.value = DICOM.generate_uid(@uid_root, prefix(element.tag))
548
- end
549
- end
550
- end
551
- end
552
- end
553
- end
554
-
555
- # Sets up some default information variables that are used by the Anonymizer.
556
- #
557
- def set_defaults
558
- # Some UIDs should not be remapped even if uid anonymization has been requested:
559
- @static_uids = {
560
- # Private related:
561
- '0002,0100' => true,
562
- '0004,1432' => true,
563
- # Coding scheme related:
564
- '0008,010C' => true,
565
- '0008,010D' => true,
566
- # Transfer syntax related:
567
- '0002,0010' => true,
568
- '0400,0010' => true,
569
- '0400,0510' => true,
570
- '0004,1512' => true,
571
- # SOP class related:
572
- '0000,0002' => true,
573
- '0000,0003' => true,
574
- '0002,0002' => true,
575
- '0004,1510' => true,
576
- '0004,151A' => true,
577
- '0008,0016' => true,
578
- '0008,001A' => true,
579
- '0008,001B' => true,
580
- '0008,0062' => true,
581
- '0008,1150' => true,
582
- '0008,115A' => true
583
- }
584
- # Sets up default tags that will be anonymized, along with default replacement values and enumeration settings.
585
- # This data is stored in 3 separate instance arrays for tags, values and enumeration.
586
- data = [
587
- ['0008,0012', '20000101', false], # Instance Creation Date
588
- ['0008,0013', '000000.00', false], # Instance Creation Time
589
- ['0008,0020', '20000101', false], # Study Date
590
- ['0008,0021', '20000101', false], # Series Date
591
- ['0008,0022', '20000101', false], # Acquisition Date
592
- ['0008,0023', '20000101', false], # Image Date
593
- ['0008,0030', '000000.00', false], # Study Time
594
- ['0008,0031', '000000.00', false], # Series Time
595
- ['0008,0032', '000000.00', false], # Acquisition Time
596
- ['0008,0033', '000000.00', false], # Image Time
597
- ['0008,0050', '', true], # Accession Number
598
- ['0008,0080', 'Institution', true], # Institution name
599
- ['0008,0081', 'Address', true], # Institution Address
600
- ['0008,0090', 'Physician', true], # Referring Physician's name
601
- ['0008,1010', 'Station', true], # Station name
602
- ['0008,1040', 'Department', true], # Institutional Department name
603
- ['0008,1070', 'Operator', true], # Operator's Name
604
- ['0010,0010', 'Patient', true], # Patient's name
605
- ['0010,0020', 'ID', true], # Patient's ID
606
- ['0010,0030', '20000101', false], # Patient's Birth Date
607
- ['0010,0040', 'O', false], # Patient's Sex
608
- ['0010,1010', '', false], # Patient's Age
609
- ['0020,4000', '', false], # Image Comments
610
- ].transpose
611
- @tags = data[0]
612
- @values = data[1]
613
- @enumerations = data[2]
614
- # Tags to be deleted completely during anonymization:
615
- @delete = Hash.new
616
- end
617
-
618
- # Collects the attributes of this instance.
619
- #
620
- # @return [Array] an array of attributes
621
- #
622
- def state
623
- [
624
- @tags, @values, @enumerations, @delete, @blank,
625
- @delete_private, @enumeration, @logger_level,
626
- @random_file_name, @recursive, @uid, @uid_root, @write_path
627
- ]
628
- end
629
-
630
- # Writes a DICOM object to file.
631
- #
632
- # @param [DObject] dcm a DICOM object
633
- #
634
- def write(dcm)
635
- if @write_path
636
- # The DICOM object is to be written to a separate directory. If the
637
- # original and the new directories have a common root, this is taken into
638
- # consideration when determining the object's write path:
639
- path = destination(dcm)
640
- if @random_file_name
641
- file_name = "#{SecureRandom.hex(16)}.dcm"
642
- else
643
- file_name = File.basename(dcm.source)
644
- end
645
- dcm.write(File.join(path, file_name))
646
- else
647
- # The original DICOM file is overwritten with the anonymized DICOM object:
648
- dcm.write(dcm.source)
649
- end
650
- end
651
-
652
- end
653
-
654
- end
1
+ module DICOM
2
+
3
+ # This is a convenience class for handling the anonymization
4
+ # (de-identification) of DICOM files.
5
+ #
6
+ # @note
7
+ # For a thorough introduction to the concept of DICOM anonymization,
8
+ # please refer to The DICOM Standard, Part 15: Security and System
9
+ # Management Profiles, Annex E: Attribute Confidentiality Profiles.
10
+ # For guidance on settings for individual data elements, please
11
+ # refer to DICOM PS 3.15, Annex E, Table E.1-1: Application Level
12
+ # Confidentiality Profile Attributes.
13
+ #
14
+ class Anonymizer
15
+ include Logging
16
+
17
+ # An AuditTrail instance used for this anonymization (if specified).
18
+ attr_reader :audit_trail
19
+ # The file name used for the AuditTrail serialization (if specified).
20
+ attr_reader :audit_trail_file
21
+ # A boolean that if set as true will cause all anonymized tags to be blank instead of get some generic value.
22
+ attr_accessor :blank
23
+ # An hash of elements (represented by tag keys) that will be deleted from the DICOM objects on anonymization.
24
+ attr_reader :delete
25
+ # A boolean that if set as true, will make the anonymization delete all private tags.
26
+ attr_accessor :delete_private
27
+ # The cryptographic hash function to be used for encrypting DICOM values recorded in an audit trail file.
28
+ attr_reader :encryption
29
+ # A boolean that if set as true will cause all anonymized tags to be get enumerated values, to enable post-anonymization re-identification by the user.
30
+ attr_accessor :enumeration
31
+ # The logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL).
32
+ attr_reader :logger_level
33
+ # A boolean that if set as true will cause all anonymized files to be written with random file names (if write_path has been specified).
34
+ attr_accessor :random_file_name
35
+ # A boolean that if set as true, will cause the anonymization to run on all levels of the DICOM file tag hierarchy.
36
+ attr_accessor :recursive
37
+ # A boolean indicating whether or not UIDs shall be replaced when executing the anonymization.
38
+ attr_accessor :uid
39
+ # The DICOM UID root to use when generating new UIDs.
40
+ attr_accessor :uid_root
41
+ # The path where the anonymized files will be saved. If this value is not set, the original DICOM files will be overwritten.
42
+ attr_accessor :write_path
43
+
44
+ # Creates an Anonymizer instance.
45
+ #
46
+ # @note To customize logging behaviour, refer to the Logging module documentation.
47
+ # @param [Hash] options the options to create an anonymizer instance with
48
+ # @option options [String] :audit_trail a file name path (if the file contains old audit data, these are loaded and used in the current anonymization)
49
+ # @option options [Boolean] :blank toggles whether to set the values of anonymized elements as empty instead of some generic value
50
+ # @option options [Boolean] :delete_private toggles whether private elements are to be deleted
51
+ # @option options [TrueClass, Digest::Class] :encryption if set as true, the default hash function (MD5) will be used for representing DICOM values in an audit file. Otherwise a Digest class can be given, e.g. Digest::SHA256
52
+ # @option options [Boolean] :enumeration toggles whether (some) elements get enumerated values (to enable post-anonymization re-identification)
53
+ # @option options [Fixnum] :logger_level the logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL)
54
+ # @option options [Boolean] :random_file_name toggles whether anonymized files will be given random file names when rewritten (in combination with the :write_path option)
55
+ # @option options [Boolean] :recursive toggles whether to anonymize on all sub-levels of the DICOM object tag hierarchies
56
+ # @option options [Boolean] :uid toggles whether UIDs will be replaced with custom generated UIDs (beware that to preserve UID relations in studies/series, the audit_trail feature must be used)
57
+ # @option options [String] :uid_root an organization (or custom) UID root to use when replacing UIDs
58
+ # @option options [String] :write_path a directory where the anonymized files are re-written (if not specified, files are overwritten)
59
+ # @example Create an Anonymizer instance and increase the log output
60
+ # a = Anonymizer.new
61
+ # a.logger.level = Logger::INFO
62
+ # @example Perform anonymization using the audit trail feature
63
+ # a = Anonymizer.new(:audit_trail => 'trail.json')
64
+ # a.enumeration = true
65
+ # a.write_path = '//anonymized/'
66
+ # a.anonymize('//dicom/today/')
67
+ #
68
+ def initialize(options={})
69
+ # Transfer options to attributes:
70
+ @blank = options[:blank]
71
+ @delete_private = options[:delete_private]
72
+ @enumeration = options[:enumeration]
73
+ @logger_level = options[:logger_level] || Logger::FATAL
74
+ @random_file_name = options[:random_file_name]
75
+ @recursive = options[:recursive]
76
+ @uid = options[:uid]
77
+ @uid_root = options[:uid_root] ? options[:uid_root] : UID_ROOT
78
+ @write_path = options[:write_path]
79
+ # Array of folders to be processed for anonymization:
80
+ @folders = Array.new
81
+ # Folders that will be skipped:
82
+ @exceptions = Array.new
83
+ # Data elements which will be anonymized (the array will hold a list of tag strings):
84
+ @tags = Array.new
85
+ # Default values to use on anonymized data elements:
86
+ @values = Array.new
87
+ # Which data elements will have enumeration applied, if requested by the user:
88
+ @enumerations = Array.new
89
+ # We use a Hash to store information from DICOM files if enumeration is desired:
90
+ @enum_old_hash = Hash.new
91
+ @enum_new_hash = Hash.new
92
+ # All the files to be anonymized will be put in this array:
93
+ @files = Array.new
94
+ @prefixes = Hash.new
95
+ # Setup audit trail if requested:
96
+ if options[:audit_trail]
97
+ @audit_trail_file = options[:audit_trail]
98
+ if File.exists?(@audit_trail_file) && File.size(@audit_trail_file) > 2
99
+ # Load the pre-existing audit trail from file:
100
+ @audit_trail = AuditTrail.read(@audit_trail_file)
101
+ else
102
+ # Start from scratch with an empty audit trail:
103
+ @audit_trail = AuditTrail.new
104
+ end
105
+ # Set up encryption if indicated:
106
+ if options[:encryption]
107
+ require 'digest'
108
+ if options[:encryption].respond_to?(:hexdigest)
109
+ @encryption = options[:encryption]
110
+ else
111
+ @encryption = Digest::MD5
112
+ end
113
+ end
114
+ end
115
+ # Set the default data elements to be anonymized:
116
+ set_defaults
117
+ end
118
+
119
+ # Checks for equality.
120
+ #
121
+ # Other and self are considered equivalent if they are
122
+ # of compatible types and their attributes are equivalent.
123
+ #
124
+ # @param other an object to be compared with self.
125
+ # @return [Boolean] true if self and other are considered equivalent
126
+ #
127
+ def ==(other)
128
+ if other.respond_to?(:to_anonymizer)
129
+ other.send(:state) == state
130
+ end
131
+ end
132
+
133
+ alias_method :eql?, :==
134
+
135
+ # Anonymizes the given DObject or array of DICOM objects with the settings
136
+ # of this Anonymizer instance.
137
+ #
138
+ # @param [DObject, Array<DObject>] dicom single or multiple DICOM objects
139
+ # @return [Array<DObject>] an array of the anonymized DICOM objects
140
+ #
141
+ def anonymize(dicom)
142
+ dicom = Array[dicom] unless dicom.respond_to?(:to_ary)
143
+ if @tags.length > 0
144
+ prepare_anonymization
145
+ dicom.each do |dcm|
146
+ anonymize_dcm(dcm.to_dcm)
147
+ end
148
+ else
149
+ logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
150
+ end
151
+ # Save the audit trail (if used):
152
+ @audit_trail.write(@audit_trail_file) if @audit_trail
153
+ logger.info("Anonymization complete.")
154
+ dicom
155
+ end
156
+
157
+ # Anonymizes any DICOM files found at the given path (file or directory)
158
+ # with the settings of this Anonymizer instance.
159
+ #
160
+ # @param [String] path a file or directory path
161
+ #
162
+ def anonymize_path(path)
163
+ if @tags.length > 0
164
+ prepare_anonymization
165
+ files = DICOM.load_files(path)
166
+ logger.info("#{files.length} DICOM files have been prepared for anonymization.")
167
+ files.each do |f|
168
+ dcm = anonymize_file(f)
169
+ write(dcm)
170
+ end
171
+ else
172
+ logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
173
+ end
174
+ # Save the audit trail (if used):
175
+ @audit_trail.write(@audit_trail_file) if @audit_trail
176
+ logger.info("Anonymization complete.")
177
+ end
178
+
179
+ # Specifies that the given tag is to be completely deleted
180
+ # from the anonymized DICOM objects.
181
+ #
182
+ # @param [String] tag a data element tag
183
+ # @example Completely delete the Patient's Name tag from the DICOM files
184
+ # a.delete_tag('0010,0010')
185
+ #
186
+ def delete_tag(tag)
187
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
188
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
189
+ @delete[tag] = true
190
+ end
191
+
192
+ # Checks the enumeration status of this tag.
193
+ #
194
+ # @param [String] tag a data element tag
195
+ # @return [Boolean, NilClass] the enumeration status of the tag, or nil if the tag has no match
196
+ #
197
+ def enum(tag)
198
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
199
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
200
+ pos = @tags.index(tag)
201
+ if pos
202
+ return @enumerations[pos]
203
+ else
204
+ logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
205
+ return nil
206
+ end
207
+ end
208
+
209
+ # Computes a hash code for this object.
210
+ #
211
+ # @note Two objects with the same attributes will have the same hash code.
212
+ #
213
+ # @return [Fixnum] the object's hash code
214
+ #
215
+ def hash
216
+ state.hash
217
+ end
218
+
219
+ # Removes a tag from the list of tags that will be anonymized.
220
+ #
221
+ # @param [String] tag a data element tag
222
+ # @example Do not anonymize the Patient's Name tag
223
+ # a.remove_tag('0010,0010')
224
+ #
225
+ def remove_tag(tag)
226
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
227
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
228
+ pos = @tags.index(tag)
229
+ if pos
230
+ @tags.delete_at(pos)
231
+ @values.delete_at(pos)
232
+ @enumerations.delete_at(pos)
233
+ end
234
+ end
235
+
236
+ # Sets the anonymization settings for the specified tag. If the tag is already present in the list
237
+ # of tags to be anonymized, its settings are updated, and if not, a new tag entry is created.
238
+ #
239
+ # @param [String] tag a data element tag
240
+ # @param [Hash] options the anonymization settings for the specified tag
241
+ # @option options [String, Integer, Float] :value the replacement value to be used when anonymizing this data element. Defaults to the pre-existing value and '' for new tags.
242
+ # @option options [String, Integer, Float] :enum specifies if enumeration is to be used for this tag. Defaults to the pre-existing value and false for new tags.
243
+ # @example Set the anonymization settings of the Patient's Name tag
244
+ # a.set_tag('0010,0010', :value => 'MrAnonymous', :enum => true)
245
+ #
246
+ def set_tag(tag, options={})
247
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
248
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
249
+ pos = @tags.index(tag)
250
+ if pos
251
+ # Update existing values:
252
+ @values[pos] = options[:value] if options[:value]
253
+ @enumerations[pos] = options[:enum] if options[:enum] != nil
254
+ else
255
+ # Add new elements:
256
+ @tags << tag
257
+ @values << (options[:value] ? options[:value] : default_value(tag))
258
+ @enumerations << (options[:enum] ? options[:enum] : false)
259
+ end
260
+ end
261
+
262
+ # Returns self.
263
+ #
264
+ # @return [Anonymizer] self
265
+ #
266
+ def to_anonymizer
267
+ self
268
+ end
269
+
270
+ # Gives the value which will be used when anonymizing this tag.
271
+ #
272
+ # @note If enumeration is selected for a string type tag, a number will be
273
+ # appended in addition to the string that is returned here.
274
+ #
275
+ # @param [String] tag a data element tag
276
+ # @return [String, Integer, Float, NilClass] the replacement value for the specified tag, or nil if the tag is not matched
277
+ #
278
+ def value(tag)
279
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
280
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
281
+ pos = @tags.index(tag)
282
+ if pos
283
+ return @values[pos]
284
+ else
285
+ logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
286
+ return nil
287
+ end
288
+ end
289
+
290
+
291
+ private
292
+
293
+
294
+ # Performs anonymization on a DICOM object.
295
+ #
296
+ # @param [DObject] dcm a DICOM object
297
+ #
298
+ def anonymize_dcm(dcm)
299
+ # Extract the data element parents to investigate:
300
+ parents = element_parents(dcm)
301
+ parents.each do |parent|
302
+ # Anonymize the desired tags:
303
+ @tags.each_index do |j|
304
+ if parent.exists?(@tags[j])
305
+ element = parent[@tags[j]]
306
+ if element.is_a?(Element)
307
+ if @blank
308
+ value = ''
309
+ elsif @enumeration
310
+ old_value = element.value
311
+ # Only launch enumeration logic if there is an actual value to the data element:
312
+ if old_value
313
+ value = enumerated_value(old_value, j)
314
+ else
315
+ value = ''
316
+ end
317
+ else
318
+ # Use the value that has been set for this tag:
319
+ value = @values[j]
320
+ end
321
+ element.value = value
322
+ end
323
+ end
324
+ end
325
+ # Delete elements marked for deletion:
326
+ @delete.each_key do |tag|
327
+ parent.delete(tag) if parent.exists?(tag)
328
+ end
329
+ end
330
+ # General DICOM object manipulation:
331
+ # Add a Patient Identity Removed attribute (as per
332
+ # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 6):
333
+ dcm.add(Element.new('0012,0062', 'YES'))
334
+ # Add a De-Identification Method Code Sequence Item:
335
+ dcm.add(Sequence.new('0012,0064')) unless dcm.exists?('0012,0064')
336
+ i = dcm['0012,0064'].add_item
337
+ i.add(Element.new('0012,0063', 'De-identified by the ruby-dicom Anonymizer'))
338
+ # FIXME: At some point we should add a set of de-indentification method codes, as per
339
+ # DICOM PS 3.16 CID 7050 which corresponds to the settings chosen for the anonymizer.
340
+ # Delete the old File Meta Information group (as per
341
+ # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 7):
342
+ dcm.delete_group('0002')
343
+ # Handle UIDs if requested:
344
+ replace_uids(parents) if @uid
345
+ # Delete private tags if indicated:
346
+ dcm.delete_private if @delete_private
347
+ end
348
+
349
+ # Performs anonymization of a DICOM file.
350
+ #
351
+ # @param [String] file a DICOM file path
352
+ #
353
+ def anonymize_file(file)
354
+ # Temporarily adjust the ruby-dicom log threshold (to suppress messages from the DObject class):
355
+ @original_level = logger.level
356
+ logger.level = @logger_level
357
+ dcm = DObject.read(file)
358
+ logger.level = @original_level
359
+ anonymize_dcm(dcm)
360
+ dcm
361
+ end
362
+
363
+ # Gives the value to be used for the audit trail, which is either
364
+ # the original value itself, or an encrypted string based on it.
365
+ #
366
+ # @param [String, Integer, Float] original the original value of the tag to be anonymized
367
+ # @return [String, Integer, Float] with encryption, a hash string is returned, otherwise the original value
368
+ #
369
+ def at_value(original)
370
+ @encryption ? @encryption.hexdigest(original) : original
371
+ end
372
+
373
+ # Creates a hash that is used for storing information that is used when enumeration is selected.
374
+ #
375
+ def create_enum_hash
376
+ @enumerations.each_index do |i|
377
+ @enum_old_hash[@tags[i]] = Array.new
378
+ @enum_new_hash[@tags[i]] = Array.new
379
+ end
380
+ end
381
+
382
+ # Determines a default value to use for anonymizing the given tag.
383
+ #
384
+ # @param [String] tag a data element tag
385
+ # @return [String, Integer, Float] the default replacement value for a given tag
386
+ #
387
+ def default_value(tag)
388
+ name, vr = LIBRARY.name_and_vr(tag)
389
+ conversion = VALUE_CONVERSION[vr]
390
+ case conversion
391
+ when :to_i then return 0
392
+ when :to_f then return 0.0
393
+ else
394
+ # Assume type is string and return an empty string:
395
+ return ''
396
+ end
397
+ end
398
+
399
+ # Creates a write path for the given DICOM object, based on the object's
400
+ # original file path and the write_path attribute.
401
+ #
402
+ # @param [DObject] dcm a DICOM object
403
+ # @return [String] the destination directory path
404
+ #
405
+ def destination(dcm)
406
+ # Separate the path from the source file string:
407
+ file_start = dcm.source.rindex(File.basename(dcm.source))
408
+ if file_start == 0
409
+ source_dir = "."
410
+ else
411
+ source_dir = dcm.source[0..(file_start-1)]
412
+ end
413
+ source_folders = source_dir.split(File::SEPARATOR)
414
+ target_folders = @write_path.split(File::SEPARATOR)
415
+ # If the first element is the current dir symbol, get rid of it:
416
+ source_folders.delete('.')
417
+ # Check for equalness of folder names in a range limited by the shortest array:
418
+ common_length = [source_folders.length, target_folders.length].min
419
+ uncommon_index = nil
420
+ common_length.times do |i|
421
+ if target_folders[i] != source_folders[i]
422
+ uncommon_index = i
423
+ break
424
+ end
425
+ end
426
+ # Create the output path by joining the two paths together using the determined index:
427
+ append_path = uncommon_index ? source_folders[uncommon_index..-1] : nil
428
+ [target_folders, append_path].compact.join(File::SEPARATOR)
429
+ end
430
+
431
+ # Extracts all parents from a DObject instance which potentially
432
+ # have child (data) elements. This typically means the DObject
433
+ # instance itself as well as items (i.e. not sequences).
434
+ # Note that unless the @recursive attribute has been set,
435
+ # this method will only return the DObject (placed inside an array).
436
+ #
437
+ # @param [DObject] dcm a DICOM object
438
+ # @return [Array<DObject, Item>] an array containing either just a DObject or also all parental child items within the tag hierarchy
439
+ #
440
+ def element_parents(dcm)
441
+ parents = Array.new
442
+ parents << dcm
443
+ if @recursive
444
+ dcm.sequences.each do |s|
445
+ parents += element_parents_recursive(s)
446
+ end
447
+ end
448
+ parents
449
+ end
450
+
451
+ # Recursively extracts all item parents from a sequence instance (including
452
+ # any sub-sequences) which actually contain child (data) elements.
453
+ #
454
+ # @param [Sequence] sequence a Sequence instance
455
+ # @return [Array<Item>] an array containing items within the tag hierarchy that contains child elements
456
+ #
457
+ def element_parents_recursive(sequence)
458
+ parents = Array.new
459
+ sequence.items.each do |i|
460
+ parents << i if i.elements?
461
+ i.sequences.each do |s|
462
+ parents += element_parents_recursive(s)
463
+ end
464
+ end
465
+ parents
466
+ end
467
+
468
+ # Handles the enumeration for the given data element tag.
469
+ # If its value has been encountered before, its corresponding enumerated
470
+ # replacement value is retrieved, and if a new original value is encountered,
471
+ # a new enumerated replacement value is found by increasing an index by 1.
472
+ #
473
+ # @param [String, Integer, Float] original the original value of the tag to be anonymized
474
+ # @param [Fixnum] j the index of this tag in the tag-related instance arrays
475
+ # @return [String, Integer, Float] the replacement value which is used for the anonymization of the tag
476
+ #
477
+ def enumerated_value(original, j)
478
+ # Is enumeration requested for this tag?
479
+ if @enumerations[j]
480
+ if @audit_trail
481
+ # Check if the UID has been encountered already:
482
+ replacement = @audit_trail.replacement(@tags[j], at_value(original))
483
+ unless replacement
484
+ # This original value has not been encountered yet. Determine the index to use.
485
+ index = @audit_trail.records(@tags[j]).length + 1
486
+ # Create the replacement value:
487
+ if @values[j].is_a?(String)
488
+ replacement = @values[j] + index.to_s
489
+ else
490
+ replacement = @values[j] + index
491
+ end
492
+ # Add this tag record to the audit trail:
493
+ @audit_trail.add_record(@tags[j], at_value(original), replacement)
494
+ end
495
+ else
496
+ # Retrieve earlier used anonymization values:
497
+ previous_old = @enum_old_hash[@tags[j]]
498
+ previous_new = @enum_new_hash[@tags[j]]
499
+ p_index = previous_old.length
500
+ if previous_old.index(original) == nil
501
+ # Current value has not been encountered before:
502
+ replacement = @values[j]+(p_index + 1).to_s
503
+ # Store value in array (and hash):
504
+ previous_old << original
505
+ previous_new << replacement
506
+ @enum_old_hash[@tags[j]] = previous_old
507
+ @enum_new_hash[@tags[j]] = previous_new
508
+ else
509
+ # Current value has been observed before:
510
+ replacement = previous_new[previous_old.index(original)]
511
+ end
512
+ end
513
+ else
514
+ replacement = @values[j]
515
+ end
516
+ return replacement
517
+ end
518
+
519
+ # Establishes a prefix for a given UID tag.
520
+ # This makes it somewhat easier to distinguish
521
+ # between different types of random generated UIDs.
522
+ #
523
+ # @param [String] tag a data element string tag
524
+ #
525
+ def prefix(tag)
526
+ if @prefixes[tag]
527
+ @prefixes[tag]
528
+ else
529
+ @prefixes[tag] = @prefixes.length + 1
530
+ @prefixes[tag]
531
+ end
532
+ end
533
+
534
+ # Prepares the anonymizer for anonymization.
535
+ #
536
+ #
537
+ def prepare_anonymization
538
+ # Set up enumeration if requested:
539
+ create_enum_hash if @enumeration
540
+ require 'securerandom' if @random_file_name
541
+ end
542
+
543
+ # Replaces the UIDs of the given DICOM object.
544
+ #
545
+ # @note Empty UIDs are ignored (we don't generate new UIDs for these).
546
+ # @note If AuditTrail is set, the relationship between old and new UIDs are preserved,
547
+ # and the relations between files in a study/series should remain valid.
548
+ # @param [Array<DObject, Item>] parents dicom parent objects who's child elements will be investigated
549
+ #
550
+ def replace_uids(parents)
551
+ parents.each do |parent|
552
+ parent.each_element do |element|
553
+ if element.vr == ('UI') and !@static_uids[element.tag]
554
+ original = element.value
555
+ if original && original.length > 0
556
+ # We have a UID value, go ahead and replace it:
557
+ if @audit_trail
558
+ # Check if the UID has been encountered already:
559
+ replacement = @audit_trail.replacement('uids', original)
560
+ unless replacement
561
+ # The UID has not been stored previously. Generate a new one:
562
+ replacement = DICOM.generate_uid(@uid_root, prefix(element.tag))
563
+ # Add this tag record to the audit trail:
564
+ @audit_trail.add_record('uids', original, replacement)
565
+ end
566
+ # Replace the UID in the DICOM object:
567
+ element.value = replacement
568
+ else
569
+ # We don't care about preserving UID relations. Just insert a custom UID:
570
+ element.value = DICOM.generate_uid(@uid_root, prefix(element.tag))
571
+ end
572
+ end
573
+ end
574
+ end
575
+ end
576
+ end
577
+
578
+ # Sets up some default information variables that are used by the Anonymizer.
579
+ #
580
+ def set_defaults
581
+ # Some UIDs should not be remapped even if uid anonymization has been requested:
582
+ @static_uids = {
583
+ # Private related:
584
+ '0002,0100' => true,
585
+ '0004,1432' => true,
586
+ # Coding scheme related:
587
+ '0008,010C' => true,
588
+ '0008,010D' => true,
589
+ # Transfer syntax related:
590
+ '0002,0010' => true,
591
+ '0400,0010' => true,
592
+ '0400,0510' => true,
593
+ '0004,1512' => true,
594
+ # SOP class related:
595
+ '0000,0002' => true,
596
+ '0000,0003' => true,
597
+ '0002,0002' => true,
598
+ '0004,1510' => true,
599
+ '0004,151A' => true,
600
+ '0008,0016' => true,
601
+ '0008,001A' => true,
602
+ '0008,001B' => true,
603
+ '0008,0062' => true,
604
+ '0008,1150' => true,
605
+ '0008,115A' => true
606
+ }
607
+ # Sets up default tags that will be anonymized, along with default replacement values and enumeration settings.
608
+ # This data is stored in 3 separate instance arrays for tags, values and enumeration.
609
+ data = [
610
+ ['0008,0012', '20000101', false], # Instance Creation Date
611
+ ['0008,0013', '000000.00', false], # Instance Creation Time
612
+ ['0008,0020', '20000101', false], # Study Date
613
+ ['0008,0021', '20000101', false], # Series Date
614
+ ['0008,0022', '20000101', false], # Acquisition Date
615
+ ['0008,0023', '20000101', false], # Image Date
616
+ ['0008,0030', '000000.00', false], # Study Time
617
+ ['0008,0031', '000000.00', false], # Series Time
618
+ ['0008,0032', '000000.00', false], # Acquisition Time
619
+ ['0008,0033', '000000.00', false], # Image Time
620
+ ['0008,0050', '', true], # Accession Number
621
+ ['0008,0080', 'Institution', true], # Institution name
622
+ ['0008,0081', 'Address', true], # Institution Address
623
+ ['0008,0090', 'Physician', true], # Referring Physician's name
624
+ ['0008,1010', 'Station', true], # Station name
625
+ ['0008,1040', 'Department', true], # Institutional Department name
626
+ ['0008,1070', 'Operator', true], # Operator's Name
627
+ ['0010,0010', 'Patient', true], # Patient's name
628
+ ['0010,0020', 'ID', true], # Patient's ID
629
+ ['0010,0030', '20000101', false], # Patient's Birth Date
630
+ ['0010,0040', 'O', false], # Patient's Sex
631
+ ['0010,1010', '', false], # Patient's Age
632
+ ['0020,4000', '', false], # Image Comments
633
+ ].transpose
634
+ @tags = data[0]
635
+ @values = data[1]
636
+ @enumerations = data[2]
637
+ # Tags to be deleted completely during anonymization:
638
+ @delete = Hash.new
639
+ end
640
+
641
+ # Collects the attributes of this instance.
642
+ #
643
+ # @return [Array] an array of attributes
644
+ #
645
+ def state
646
+ [
647
+ @tags, @values, @enumerations, @delete, @blank,
648
+ @delete_private, @enumeration, @logger_level,
649
+ @random_file_name, @recursive, @uid, @uid_root, @write_path
650
+ ]
651
+ end
652
+
653
+ # Writes a DICOM object to file.
654
+ #
655
+ # @param [DObject] dcm a DICOM object
656
+ #
657
+ def write(dcm)
658
+ if @write_path
659
+ # The DICOM object is to be written to a separate directory. If the
660
+ # original and the new directories have a common root, this is taken into
661
+ # consideration when determining the object's write path:
662
+ path = destination(dcm)
663
+ if @random_file_name
664
+ file_name = "#{SecureRandom.hex(16)}.dcm"
665
+ else
666
+ file_name = File.basename(dcm.source)
667
+ end
668
+ dcm.write(File.join(path, file_name))
669
+ else
670
+ # The original DICOM file is overwritten with the anonymized DICOM object:
671
+ dcm.write(dcm.source)
672
+ end
673
+ end
674
+
675
+ end
676
+
677
+ end