dicom 0.9.5 → 0.9.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +13 -5
  2. data/{CHANGELOG.rdoc → CHANGELOG.md} +50 -30
  3. data/{CONTRIBUTING.rdoc → CONTRIBUTING.md} +16 -16
  4. data/Gemfile.lock +47 -0
  5. data/README.md +152 -0
  6. data/dicom.gemspec +11 -10
  7. data/lib/dicom.rb +30 -11
  8. data/lib/dicom/anonymizer.rb +654 -649
  9. data/lib/dicom/audit_trail.rb +0 -2
  10. data/lib/dicom/d_client.rb +1 -1
  11. data/lib/dicom/d_library.rb +45 -15
  12. data/lib/dicom/d_object.rb +18 -18
  13. data/lib/dicom/d_read.rb +28 -4
  14. data/lib/dicom/d_write.rb +49 -26
  15. data/lib/dicom/dictionary/{elements.txt → elements.tsv} +0 -0
  16. data/lib/dicom/dictionary/{uids.txt → uids.tsv} +0 -0
  17. data/lib/dicom/element.rb +6 -7
  18. data/lib/dicom/elemental.rb +1 -0
  19. data/lib/dicom/elemental_parent.rb +64 -0
  20. data/lib/dicom/extensions/array.rb +57 -0
  21. data/lib/dicom/extensions/hash.rb +31 -0
  22. data/lib/dicom/extensions/string.rb +126 -0
  23. data/lib/dicom/{constants.rb → general/constants.rb} +29 -38
  24. data/lib/dicom/{deprecated.rb → general/deprecated.rb} +0 -0
  25. data/lib/dicom/{logging.rb → general/logging.rb} +0 -0
  26. data/lib/dicom/{variables.rb → general/methods.rb} +0 -22
  27. data/lib/dicom/general/variables.rb +29 -0
  28. data/lib/dicom/{version.rb → general/version.rb} +1 -1
  29. data/lib/dicom/image_item.rb +0 -2
  30. data/lib/dicom/image_processor.rb +2 -0
  31. data/lib/dicom/item.rb +1 -13
  32. data/lib/dicom/link.rb +2 -1
  33. data/lib/dicom/parent.rb +34 -86
  34. data/lib/dicom/sequence.rb +1 -13
  35. data/lib/dicom/stream.rb +94 -114
  36. data/rakefile.rb +1 -1
  37. metadata +73 -36
  38. data/README.rdoc +0 -149
  39. data/lib/dicom/ruby_extensions.rb +0 -249
@@ -1,6 +1,6 @@
1
1
  # encoding: UTF-8
2
2
 
3
- require File.expand_path('../lib/dicom/version', __FILE__)
3
+ require File.expand_path('../lib/dicom/general/version', __FILE__)
4
4
 
5
5
  Gem::Specification.new do |s|
6
6
  s.platform = Gem::Platform::RUBY
@@ -17,14 +17,15 @@ Gem::Specification.new do |s|
17
17
  s.files = Dir["{lib}/**/*", "[A-Z]*"]
18
18
  s.rubyforge_project = 'dicom'
19
19
 
20
- s.required_ruby_version = '>= 1.9.2'
20
+ s.required_ruby_version = '>= 1.9.3'
21
21
 
22
- s.add_development_dependency('bundler', '~> 1.3')
23
- s.add_development_dependency('mocha', '~> 0.13')
24
- s.add_development_dependency('mini_magick', '~> 3.5')
25
- s.add_development_dependency('narray', '~> 0.6.0.8')
26
- s.add_development_dependency('rake', '~> 0.9.6')
27
- s.add_development_dependency('rmagick', '~> 2.13.2')
28
- s.add_development_dependency('rspec', '~> 2.13')
29
- s.add_development_dependency('yard', '~> 0.8.5')
22
+ s.add_development_dependency('bundler', '~> 1.6')
23
+ s.add_development_dependency('mini_magick', '~> 3.7')
24
+ s.add_development_dependency('mocha', '~> 1.1')
25
+ s.add_development_dependency('narray', '~> 0.6', '>= 0.6.0.8')
26
+ s.add_development_dependency('rake', '~> 10.3')
27
+ s.add_development_dependency('redcarpet', '~> 3.1')
28
+ s.add_development_dependency('rmagick', '~> 2.13', '>= 2.13.2')
29
+ s.add_development_dependency('rspec', '~> 3.0')
30
+ s.add_development_dependency('yard', '~> 0.8', '>= 0.8.7')
30
31
  end
@@ -12,14 +12,30 @@
12
12
  # The rest of the classes visible in the documentation generated by YARD are
13
13
  # in principle 'private' classes, which are mainly of interest to developers.
14
14
 
15
- # Logging:
16
- require_relative 'dicom/logging'
15
+ # Standard library dependencies:
16
+ require 'json'
17
+ require 'yaml'
18
+
19
+ # Gem specific extensions:
20
+ require_relative 'dicom/extensions/array'
21
+ require_relative 'dicom/extensions/hash'
22
+ require_relative 'dicom/extensions/string'
23
+
24
+ # General module features/settings:
25
+ require_relative 'dicom/general/version'
26
+ require_relative 'dicom/general/constants'
27
+ require_relative 'dicom/general/variables'
28
+ require_relative 'dicom/general/methods'
29
+ require_relative 'dicom/general/logging'
30
+ require_relative 'dicom/general/deprecated'
31
+
17
32
  # Core library:
18
33
  # Super classes/modules:
19
34
  require_relative 'dicom/image_processor'
20
35
  require_relative 'dicom/parent'
21
36
  require_relative 'dicom/image_item'
22
37
  require_relative 'dicom/elemental'
38
+ require_relative 'dicom/elemental_parent'
23
39
  # Subclasses and independent classes:
24
40
  require_relative 'dicom/d_client'
25
41
  require_relative 'dicom/d_object'
@@ -36,18 +52,21 @@ require_relative 'dicom/stream'
36
52
  require_relative 'dicom/d_library'
37
53
  require_relative 'dicom/dictionary_element'
38
54
  require_relative 'dicom/uid'
39
- # Extensions to the Ruby library:
40
- require_relative 'dicom/ruby_extensions'
41
- # Module settings:
42
- require_relative 'dicom/version'
43
- require_relative 'dicom/constants'
44
- require_relative 'dicom/variables'
45
55
  # Image processors:
46
56
  require_relative 'dicom/image_processor_mini_magick'
47
57
  require_relative 'dicom/image_processor_r_magick'
48
- # Deprecated methods:
49
- require_relative 'dicom/deprecated'
50
58
 
51
59
  # Extensions (non-core functionality):
52
60
  require_relative 'dicom/anonymizer'
53
- require_relative 'dicom/audit_trail'
61
+ require_relative 'dicom/audit_trail'
62
+
63
+
64
+ module DICOM
65
+
66
+ # Defines the gem root directory in the file system.
67
+ ROOT_DIR = "#{File.dirname(__FILE__)}/dicom"
68
+
69
+ # The library instance (data dictionary) of the DICOM module.
70
+ LIBRARY = DICOM::DLibrary.new
71
+
72
+ end
@@ -1,649 +1,654 @@
1
- module DICOM
2
-
3
- # This is a convenience class for handling the anonymization
4
- # (de-identification) of DICOM files.
5
- #
6
- # @note
7
- # For a thorough introduction to the concept of DICOM anonymization,
8
- # please refer to The DICOM Standard, Part 15: Security and System
9
- # Management Profiles, Annex E: Attribute Confidentiality Profiles.
10
- # For guidance on settings for individual data elements, please
11
- # refer to DICOM PS 3.15, Annex E, Table E.1-1: Application Level
12
- # Confidentiality Profile Attributes.
13
- #
14
- class Anonymizer
15
- include Logging
16
-
17
- # An AuditTrail instance used for this anonymization (if specified).
18
- attr_reader :audit_trail
19
- # The file name used for the AuditTrail serialization (if specified).
20
- attr_reader :audit_trail_file
21
- # A boolean that if set as true will cause all anonymized tags to be blank instead of get some generic value.
22
- attr_accessor :blank
23
- # An hash of elements (represented by tag keys) that will be deleted from the DICOM objects on anonymization.
24
- attr_reader :delete
25
- # A boolean that if set as true, will make the anonymization delete all private tags.
26
- attr_accessor :delete_private
27
- # The cryptographic hash function to be used for encrypting DICOM values recorded in an audit trail file.
28
- attr_reader :encryption
29
- # A boolean that if set as true will cause all anonymized tags to be get enumerated values, to enable post-anonymization re-identification by the user.
30
- attr_accessor :enumeration
31
- # The logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL).
32
- attr_reader :logger_level
33
- # A boolean that if set as true will cause all anonymized files to be written with random file names (if write_path has been specified).
34
- attr_accessor :random_file_name
35
- # A boolean that if set as true, will cause the anonymization to run on all levels of the DICOM file tag hierarchy.
36
- attr_accessor :recursive
37
- # A boolean indicating whether or not UIDs shall be replaced when executing the anonymization.
38
- attr_accessor :uid
39
- # The DICOM UID root to use when generating new UIDs.
40
- attr_accessor :uid_root
41
- # The path where the anonymized files will be saved. If this value is not set, the original DICOM files will be overwritten.
42
- attr_accessor :write_path
43
-
44
- # Creates an Anonymizer instance.
45
- #
46
- # @note To customize logging behaviour, refer to the Logging module documentation.
47
- # @param [Hash] options the options to create an anonymizer instance with
48
- # @option options [String] :audit_trail a file name path (if the file contains old audit data, these are loaded and used in the current anonymization)
49
- # @option options [Boolean] :blank toggles whether to set the values of anonymized elements as empty instead of some generic value
50
- # @option options [Boolean] :delete_private toggles whether private elements are to be deleted
51
- # @option options [TrueClass, Digest::Class] :encryption if set as true, the default hash function (MD5) will be used for representing DICOM values in an audit file. Otherwise a Digest class can be given, e.g. Digest::SHA256
52
- # @option options [Boolean] :enumeration toggles whether (some) elements get enumerated values (to enable post-anonymization re-identification)
53
- # @option options [Fixnum] :logger_level the logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL)
54
- # @option options [Boolean] :random_file_name toggles whether anonymized files will be given random file names when rewritten (in combination with the :write_path option)
55
- # @option options [Boolean] :recursive toggles whether to anonymize on all sub-levels of the DICOM object tag hierarchies
56
- # @option options [Boolean] :uid toggles whether UIDs will be replaced with custom generated UIDs (beware that to preserve UID relations in studies/series, the audit_trail feature must be used)
57
- # @option options [String] :uid_root an organization (or custom) UID root to use when replacing UIDs
58
- # @option options [String] :write_path a directory where the anonymized files are re-written (if not specified, files are overwritten)
59
- # @example Create an Anonymizer instance and increase the log output
60
- # a = Anonymizer.new
61
- # a.logger.level = Logger::INFO
62
- # @example Perform anonymization using the audit trail feature
63
- # a = Anonymizer.new(:audit_trail => 'trail.json')
64
- # a.enumeration = true
65
- # a.write_path = '//anonymized/'
66
- # a.anonymize('//dicom/today/')
67
- #
68
- def initialize(options={})
69
- # Transfer options to attributes:
70
- @blank = options[:blank]
71
- @delete_private = options[:delete_private]
72
- @enumeration = options[:enumeration]
73
- @logger_level = options[:logger_level] || Logger::FATAL
74
- @random_file_name = options[:random_file_name]
75
- @recursive = options[:recursive]
76
- @uid = options[:uid]
77
- @uid_root = options[:uid_root] ? options[:uid_root] : UID_ROOT
78
- @write_path = options[:write_path]
79
- # Array of folders to be processed for anonymization:
80
- @folders = Array.new
81
- # Folders that will be skipped:
82
- @exceptions = Array.new
83
- # Data elements which will be anonymized (the array will hold a list of tag strings):
84
- @tags = Array.new
85
- # Default values to use on anonymized data elements:
86
- @values = Array.new
87
- # Which data elements will have enumeration applied, if requested by the user:
88
- @enumerations = Array.new
89
- # We use a Hash to store information from DICOM files if enumeration is desired:
90
- @enum_old_hash = Hash.new
91
- @enum_new_hash = Hash.new
92
- # All the files to be anonymized will be put in this array:
93
- @files = Array.new
94
- @prefixes = Hash.new
95
- # Setup audit trail if requested:
96
- if options[:audit_trail]
97
- @audit_trail_file = options[:audit_trail]
98
- if File.exists?(@audit_trail_file) && File.size(@audit_trail_file) > 2
99
- # Load the pre-existing audit trail from file:
100
- @audit_trail = AuditTrail.read(@audit_trail_file)
101
- else
102
- # Start from scratch with an empty audit trail:
103
- @audit_trail = AuditTrail.new
104
- end
105
- # Set up encryption if indicated:
106
- if options[:encryption]
107
- require 'digest'
108
- if options[:encryption].respond_to?(:hexdigest)
109
- @encryption = options[:encryption]
110
- else
111
- @encryption = Digest::MD5
112
- end
113
- end
114
- end
115
- # Set the default data elements to be anonymized:
116
- set_defaults
117
- end
118
-
119
- # Checks for equality.
120
- #
121
- # Other and self are considered equivalent if they are
122
- # of compatible types and their attributes are equivalent.
123
- #
124
- # @param other an object to be compared with self.
125
- # @return [Boolean] true if self and other are considered equivalent
126
- #
127
- def ==(other)
128
- if other.respond_to?(:to_anonymizer)
129
- other.send(:state) == state
130
- end
131
- end
132
-
133
- alias_method :eql?, :==
134
-
135
- # Anonymizes the given DICOM data with the settings of this Anonymizer instance.
136
- #
137
- # @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
138
- # @return [Array<DObject>] an array of the anonymized DICOM objects
139
- #
140
- def anonymize(data)
141
- dicom = prepare(data)
142
- if @tags.length > 0
143
- dicom.each do |dcm|
144
- anonymize_dcm(dcm)
145
- # Write DICOM object to file unless it was passed to the anonymizer as an object:
146
- write(dcm) unless dcm.was_dcm_on_input
147
- end
148
- else
149
- logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
150
- end
151
- # Reset the ruby-dicom log threshold to its original level:
152
- logger.level = @original_level
153
- # Save the audit trail (if used):
154
- @audit_trail.write(@audit_trail_file) if @audit_trail
155
- logger.info("Anonymization complete.")
156
- dicom
157
- end
158
-
159
- # Specifies that the given tag is to be completely deleted
160
- # from the anonymized DICOM objects.
161
- #
162
- # @param [String] tag a data element tag
163
- # @example Completely delete the Patient's Name tag from the DICOM files
164
- # a.delete_tag('0010,0010')
165
- #
166
- def delete_tag(tag)
167
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
168
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
169
- @delete[tag] = true
170
- end
171
-
172
- # Checks the enumeration status of this tag.
173
- #
174
- # @param [String] tag a data element tag
175
- # @return [Boolean, NilClass] the enumeration status of the tag, or nil if the tag has no match
176
- #
177
- def enum(tag)
178
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
179
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
180
- pos = @tags.index(tag)
181
- if pos
182
- return @enumerations[pos]
183
- else
184
- logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
185
- return nil
186
- end
187
- end
188
-
189
- # Computes a hash code for this object.
190
- #
191
- # @note Two objects with the same attributes will have the same hash code.
192
- #
193
- # @return [Fixnum] the object's hash code
194
- #
195
- def hash
196
- state.hash
197
- end
198
-
199
- # Removes a tag from the list of tags that will be anonymized.
200
- #
201
- # @param [String] tag a data element tag
202
- # @example Do not anonymize the Patient's Name tag
203
- # a.remove_tag('0010,0010')
204
- #
205
- def remove_tag(tag)
206
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
207
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
208
- pos = @tags.index(tag)
209
- if pos
210
- @tags.delete_at(pos)
211
- @values.delete_at(pos)
212
- @enumerations.delete_at(pos)
213
- end
214
- end
215
-
216
- # Sets the anonymization settings for the specified tag. If the tag is already present in the list
217
- # of tags to be anonymized, its settings are updated, and if not, a new tag entry is created.
218
- #
219
- # @param [String] tag a data element tag
220
- # @param [Hash] options the anonymization settings for the specified tag
221
- # @option options [String, Integer, Float] :value the replacement value to be used when anonymizing this data element. Defaults to the pre-existing value and '' for new tags.
222
- # @option options [String, Integer, Float] :enum specifies if enumeration is to be used for this tag. Defaults to the pre-existing value and false for new tags.
223
- # @example Set the anonymization settings of the Patient's Name tag
224
- # a.set_tag('0010,0010', :value => 'MrAnonymous', :enum => true)
225
- #
226
- def set_tag(tag, options={})
227
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
228
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
229
- pos = @tags.index(tag)
230
- if pos
231
- # Update existing values:
232
- @values[pos] = options[:value] if options[:value]
233
- @enumerations[pos] = options[:enum] if options[:enum] != nil
234
- else
235
- # Add new elements:
236
- @tags << tag
237
- @values << (options[:value] ? options[:value] : default_value(tag))
238
- @enumerations << (options[:enum] ? options[:enum] : false)
239
- end
240
- end
241
-
242
- # Returns self.
243
- #
244
- # @return [Anonymizer] self
245
- #
246
- def to_anonymizer
247
- self
248
- end
249
-
250
- # Gives the value which will be used when anonymizing this tag.
251
- #
252
- # @note If enumeration is selected for a string type tag, a number will be
253
- # appended in addition to the string that is returned here.
254
- #
255
- # @param [String] tag a data element tag
256
- # @return [String, Integer, Float, NilClass] the replacement value for the specified tag, or nil if the tag is not matched
257
- #
258
- def value(tag)
259
- raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
260
- raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
261
- pos = @tags.index(tag)
262
- if pos
263
- return @values[pos]
264
- else
265
- logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
266
- return nil
267
- end
268
- end
269
-
270
-
271
- private
272
-
273
-
274
- # Performs anonymization on a DICOM object.
275
- #
276
- # @param [DObject] dcm a DICOM object
277
- #
278
- def anonymize_dcm(dcm)
279
- # Extract the data element parents to investigate:
280
- parents = element_parents(dcm)
281
- parents.each do |parent|
282
- # Anonymize the desired tags:
283
- @tags.each_index do |j|
284
- if parent.exists?(@tags[j])
285
- element = parent[@tags[j]]
286
- if element.is_a?(Element)
287
- if @blank
288
- value = ''
289
- elsif @enumeration
290
- old_value = element.value
291
- # Only launch enumeration logic if there is an actual value to the data element:
292
- if old_value
293
- value = enumerated_value(old_value, j)
294
- else
295
- value = ''
296
- end
297
- else
298
- # Use the value that has been set for this tag:
299
- value = @values[j]
300
- end
301
- element.value = value
302
- end
303
- end
304
- end
305
- # Delete elements marked for deletion:
306
- @delete.each_key do |tag|
307
- parent.delete(tag) if parent.exists?(tag)
308
- end
309
- end
310
- # General DICOM object manipulation:
311
- # Add a Patient Identity Removed attribute (as per
312
- # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 6):
313
- dcm.add(Element.new('0012,0062', 'YES'))
314
- # Add a De-Identification Method Code Sequence Item:
315
- dcm.add(Sequence.new('0012,0064')) unless dcm.exists?('0012,0064')
316
- i = dcm['0012,0064'].add_item
317
- i.add(Element.new('0012,0063', 'De-identified by the ruby-dicom Anonymizer'))
318
- # FIXME: At some point we should add a set of de-indentification method codes, as per
319
- # DICOM PS 3.16 CID 7050 which corresponds to the settings chosen for the anonymizer.
320
- # Delete the old File Meta Information group (as per
321
- # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 7):
322
- dcm.delete_group('0002')
323
- # Handle UIDs if requested:
324
- replace_uids(parents) if @uid
325
- # Delete private tags if indicated:
326
- dcm.delete_private if @delete_private
327
- end
328
-
329
- # Gives the value to be used for the audit trail, which is either
330
- # the original value itself, or an encrypted string based on it.
331
- #
332
- # @param [String, Integer, Float] original the original value of the tag to be anonymized
333
- # @return [String, Integer, Float] with encryption, a hash string is returned, otherwise the original value
334
- #
335
- def at_value(original)
336
- @encryption ? @encryption.hexdigest(original) : original
337
- end
338
-
339
- # Creates a hash that is used for storing information that is used when enumeration is selected.
340
- #
341
- def create_enum_hash
342
- @enumerations.each_index do |i|
343
- @enum_old_hash[@tags[i]] = Array.new
344
- @enum_new_hash[@tags[i]] = Array.new
345
- end
346
- end
347
-
348
- # Determines a default value to use for anonymizing the given tag.
349
- #
350
- # @param [String] tag a data element tag
351
- # @return [String, Integer, Float] the default replacement value for a given tag
352
- #
353
- def default_value(tag)
354
- name, vr = LIBRARY.name_and_vr(tag)
355
- conversion = VALUE_CONVERSION[vr] || :to_s
356
- case conversion
357
- when :to_i then return 0
358
- when :to_f then return 0.0
359
- else
360
- # Assume type is string and return an empty string:
361
- return ''
362
- end
363
- end
364
-
365
- # Creates a write path for the given DICOM object, based on the object's
366
- # original file path and the write_path attribute.
367
- #
368
- # @param [DObject] dcm a DICOM object
369
- # @return [String] the destination directory path
370
- #
371
- def destination(dcm)
372
- # Split the source path into dir and file:
373
- source_dir = File.dirname(dcm.source)
374
- source_folders = source_dir.split(File::SEPARATOR)
375
- target_folders = @write_path.split(File::SEPARATOR)
376
- # If the first element is the current dir symbol, get rid of it:
377
- source_folders.delete('.')
378
- # Check for equalness of folder names in a range limited by the shortest array:
379
- common_length = [source_folders.length, target_folders.length].min
380
- uncommon_index = nil
381
- common_length.times do |i|
382
- if target_folders[i] != source_folders[i]
383
- uncommon_index = i
384
- break
385
- end
386
- end
387
- # Create the output path by joining the two paths together using the determined index:
388
- append_path = uncommon_index ? source_folders[uncommon_index..-1] : nil
389
- [target_folders, append_path].compact.join(File::SEPARATOR)
390
- end
391
-
392
- # Extracts all parents from a DObject instance which potentially
393
- # have child (data) elements. This typically means the DObject
394
- # instance itself as well as items (i.e. not sequences).
395
- # Note that unless the @recursive attribute has been set,
396
- # this method will only return the DObject (placed inside an array).
397
- #
398
- # @param [DObject] dcm a DICOM object
399
- # @return [Array<DObject, Item>] an array containing either just a DObject or also all parental child items within the tag hierarchy
400
- #
401
- def element_parents(dcm)
402
- parents = Array.new
403
- parents << dcm
404
- if @recursive
405
- dcm.sequences.each do |s|
406
- parents += element_parents_recursive(s)
407
- end
408
- end
409
- parents
410
- end
411
-
412
- # Recursively extracts all item parents from a sequence instance (including
413
- # any sub-sequences) which actually contain child (data) elements.
414
- #
415
- # @param [Sequence] sequence a Sequence instance
416
- # @return [Array<Item>] an array containing items within the tag hierarchy that contains child elements
417
- #
418
- def element_parents_recursive(sequence)
419
- parents = Array.new
420
- sequence.items.each do |i|
421
- parents << i if i.elements?
422
- i.sequences.each do |s|
423
- parents += element_parents_recursive(s)
424
- end
425
- end
426
- parents
427
- end
428
-
429
- # Handles the enumeration for the given data element tag.
430
- # If its value has been encountered before, its corresponding enumerated
431
- # replacement value is retrieved, and if a new original value is encountered,
432
- # a new enumerated replacement value is found by increasing an index by 1.
433
- #
434
- # @param [String, Integer, Float] original the original value of the tag to be anonymized
435
- # @param [Fixnum] j the index of this tag in the tag-related instance arrays
436
- # @return [String, Integer, Float] the replacement value which is used for the anonymization of the tag
437
- #
438
- def enumerated_value(original, j)
439
- # Is enumeration requested for this tag?
440
- if @enumerations[j]
441
- if @audit_trail
442
- # Check if the UID has been encountered already:
443
- replacement = @audit_trail.replacement(@tags[j], at_value(original))
444
- unless replacement
445
- # This original value has not been encountered yet. Determine the index to use.
446
- index = @audit_trail.records(@tags[j]).length + 1
447
- # Create the replacement value:
448
- if @values[j].is_a?(String)
449
- replacement = @values[j] + index.to_s
450
- else
451
- replacement = @values[j] + index
452
- end
453
- # Add this tag record to the audit trail:
454
- @audit_trail.add_record(@tags[j], at_value(original), replacement)
455
- end
456
- else
457
- # Retrieve earlier used anonymization values:
458
- previous_old = @enum_old_hash[@tags[j]]
459
- previous_new = @enum_new_hash[@tags[j]]
460
- p_index = previous_old.length
461
- if previous_old.index(original) == nil
462
- # Current value has not been encountered before:
463
- replacement = @values[j]+(p_index + 1).to_s
464
- # Store value in array (and hash):
465
- previous_old << original
466
- previous_new << replacement
467
- @enum_old_hash[@tags[j]] = previous_old
468
- @enum_new_hash[@tags[j]] = previous_new
469
- else
470
- # Current value has been observed before:
471
- replacement = previous_new[previous_old.index(original)]
472
- end
473
- end
474
- else
475
- replacement = @values[j]
476
- end
477
- return replacement
478
- end
479
-
480
- # Establishes a prefix for a given UID tag.
481
- # This makes it somewhat easier to distinguish
482
- # between different types of random generated UIDs.
483
- #
484
- # @param [String] tag a data element string tag
485
- #
486
- def prefix(tag)
487
- if @prefixes[tag]
488
- @prefixes[tag]
489
- else
490
- @prefixes[tag] = @prefixes.length + 1
491
- @prefixes[tag]
492
- end
493
- end
494
-
495
- # Prepares the data for anonymization.
496
- #
497
- # @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
498
- # @return [Array] the original data (wrapped in an array) as well as an array of loaded DObject instances
499
- #
500
- def prepare(data)
501
- logger.info("Loading DICOM data.")
502
- # Temporarily adjust the ruby-dicom log threshold (usually to suppress messages from the DObject class):
503
- @original_level = logger.level
504
- logger.level = @logger_level
505
- dicom = DICOM.load(data)
506
- logger.level = @original_level
507
- logger.info("#{dicom.length} DICOM objects have been prepared for anonymization.")
508
- logger.level = @logger_level
509
- # Set up enumeration if requested:
510
- create_enum_hash if @enumeration
511
- require 'securerandom' if @random_file_name
512
- dicom
513
- end
514
-
515
- # Replaces the UIDs of the given DICOM object.
516
- #
517
- # @note Empty UIDs are ignored (we don't generate new UIDs for these).
518
- # @note If AuditTrail is set, the relationship between old and new UIDs are preserved,
519
- # and the relations between files in a study/series should remain valid.
520
- # @param [Array<DObject, Item>] parents dicom parent objects who's child elements will be investigated
521
- #
522
- def replace_uids(parents)
523
- parents.each do |parent|
524
- parent.each_element do |element|
525
- if element.vr == ('UI') and !@static_uids[element.tag]
526
- original = element.value
527
- if original && original.length > 0
528
- # We have a UID value, go ahead and replace it:
529
- if @audit_trail
530
- # Check if the UID has been encountered already:
531
- replacement = @audit_trail.replacement('uids', original)
532
- unless replacement
533
- # The UID has not been stored previously. Generate a new one:
534
- replacement = DICOM.generate_uid(@uid_root, prefix(element.tag))
535
- # Add this tag record to the audit trail:
536
- @audit_trail.add_record('uids', original, replacement)
537
- end
538
- # Replace the UID in the DICOM object:
539
- element.value = replacement
540
- else
541
- # We don't care about preserving UID relations. Just insert a custom UID:
542
- element.value = DICOM.generate_uid(@uid_root, prefix(element.tag))
543
- end
544
- end
545
- end
546
- end
547
- end
548
- end
549
-
550
- # Sets up some default information variables that are used by the Anonymizer.
551
- #
552
- def set_defaults
553
- # Some UIDs should not be remapped even if uid anonymization has been requested:
554
- @static_uids = {
555
- # Private related:
556
- '0002,0100' => true,
557
- '0004,1432' => true,
558
- # Coding scheme related:
559
- '0008,010C' => true,
560
- '0008,010D' => true,
561
- # Transfer syntax related:
562
- '0002,0010' => true,
563
- '0400,0010' => true,
564
- '0400,0510' => true,
565
- '0004,1512' => true,
566
- # SOP class related:
567
- '0000,0002' => true,
568
- '0000,0003' => true,
569
- '0002,0002' => true,
570
- '0004,1510' => true,
571
- '0004,151A' => true,
572
- '0008,0016' => true,
573
- '0008,001A' => true,
574
- '0008,001B' => true,
575
- '0008,0062' => true,
576
- '0008,1150' => true,
577
- '0008,115A' => true
578
- }
579
- # Sets up default tags that will be anonymized, along with default replacement values and enumeration settings.
580
- # This data is stored in 3 separate instance arrays for tags, values and enumeration.
581
- data = [
582
- ['0008,0012', '20000101', false], # Instance Creation Date
583
- ['0008,0013', '000000.00', false], # Instance Creation Time
584
- ['0008,0020', '20000101', false], # Study Date
585
- ['0008,0021', '20000101', false], # Series Date
586
- ['0008,0022', '20000101', false], # Acquisition Date
587
- ['0008,0023', '20000101', false], # Image Date
588
- ['0008,0030', '000000.00', false], # Study Time
589
- ['0008,0031', '000000.00', false], # Series Time
590
- ['0008,0032', '000000.00', false], # Acquisition Time
591
- ['0008,0033', '000000.00', false], # Image Time
592
- ['0008,0050', '', true], # Accession Number
593
- ['0008,0080', 'Institution', true], # Institution name
594
- ['0008,0081', 'Address', true], # Institution Address
595
- ['0008,0090', 'Physician', true], # Referring Physician's name
596
- ['0008,1010', 'Station', true], # Station name
597
- ['0008,1040', 'Department', true], # Institutional Department name
598
- ['0008,1070', 'Operator', true], # Operator's Name
599
- ['0010,0010', 'Patient', true], # Patient's name
600
- ['0010,0020', 'ID', true], # Patient's ID
601
- ['0010,0030', '20000101', false], # Patient's Birth Date
602
- ['0010,0040', 'O', false], # Patient's Sex
603
- ['0010,1010', '', false], # Patient's Age
604
- ['0020,4000', '', false], # Image Comments
605
- ].transpose
606
- @tags = data[0]
607
- @values = data[1]
608
- @enumerations = data[2]
609
- # Tags to be deleted completely during anonymization:
610
- @delete = Hash.new
611
- end
612
-
613
- # Collects the attributes of this instance.
614
- #
615
- # @return [Array] an array of attributes
616
- #
617
- def state
618
- [
619
- @tags, @values, @enumerations, @delete, @blank,
620
- @delete_private, @enumeration, @logger_level,
621
- @random_file_name, @recursive, @uid, @uid_root, @write_path
622
- ]
623
- end
624
-
625
- # Writes a DICOM object to file.
626
- #
627
- # @param [DObject] dcm a DICOM object
628
- #
629
- def write(dcm)
630
- if @write_path
631
- # The DICOM object is to be written to a separate directory. If the
632
- # original and the new directories have a common root, this is taken into
633
- # consideration when determining the object's write path:
634
- path = destination(dcm)
635
- if @random_file_name
636
- file_name = "#{SecureRandom.hex(16)}.dcm"
637
- else
638
- file_name = File.basename(dcm.source)
639
- end
640
- dcm.write(File.join(path, file_name))
641
- else
642
- # The original DICOM file is overwritten with the anonymized DICOM object:
643
- dcm.write(dcm.source)
644
- end
645
- end
646
-
647
- end
648
-
649
- end
1
+ module DICOM
2
+
3
+ # This is a convenience class for handling the anonymization
4
+ # (de-identification) of DICOM files.
5
+ #
6
+ # @note
7
+ # For a thorough introduction to the concept of DICOM anonymization,
8
+ # please refer to The DICOM Standard, Part 15: Security and System
9
+ # Management Profiles, Annex E: Attribute Confidentiality Profiles.
10
+ # For guidance on settings for individual data elements, please
11
+ # refer to DICOM PS 3.15, Annex E, Table E.1-1: Application Level
12
+ # Confidentiality Profile Attributes.
13
+ #
14
+ class Anonymizer
15
+ include Logging
16
+
17
+ # An AuditTrail instance used for this anonymization (if specified).
18
+ attr_reader :audit_trail
19
+ # The file name used for the AuditTrail serialization (if specified).
20
+ attr_reader :audit_trail_file
21
+ # A boolean that if set as true will cause all anonymized tags to be blank instead of get some generic value.
22
+ attr_accessor :blank
23
+ # An hash of elements (represented by tag keys) that will be deleted from the DICOM objects on anonymization.
24
+ attr_reader :delete
25
+ # A boolean that if set as true, will make the anonymization delete all private tags.
26
+ attr_accessor :delete_private
27
+ # The cryptographic hash function to be used for encrypting DICOM values recorded in an audit trail file.
28
+ attr_reader :encryption
29
+ # A boolean that if set as true will cause all anonymized tags to be get enumerated values, to enable post-anonymization re-identification by the user.
30
+ attr_accessor :enumeration
31
+ # The logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL).
32
+ attr_reader :logger_level
33
+ # A boolean that if set as true will cause all anonymized files to be written with random file names (if write_path has been specified).
34
+ attr_accessor :random_file_name
35
+ # A boolean that if set as true, will cause the anonymization to run on all levels of the DICOM file tag hierarchy.
36
+ attr_accessor :recursive
37
+ # A boolean indicating whether or not UIDs shall be replaced when executing the anonymization.
38
+ attr_accessor :uid
39
+ # The DICOM UID root to use when generating new UIDs.
40
+ attr_accessor :uid_root
41
+ # The path where the anonymized files will be saved. If this value is not set, the original DICOM files will be overwritten.
42
+ attr_accessor :write_path
43
+
44
+ # Creates an Anonymizer instance.
45
+ #
46
+ # @note To customize logging behaviour, refer to the Logging module documentation.
47
+ # @param [Hash] options the options to create an anonymizer instance with
48
+ # @option options [String] :audit_trail a file name path (if the file contains old audit data, these are loaded and used in the current anonymization)
49
+ # @option options [Boolean] :blank toggles whether to set the values of anonymized elements as empty instead of some generic value
50
+ # @option options [Boolean] :delete_private toggles whether private elements are to be deleted
51
+ # @option options [TrueClass, Digest::Class] :encryption if set as true, the default hash function (MD5) will be used for representing DICOM values in an audit file. Otherwise a Digest class can be given, e.g. Digest::SHA256
52
+ # @option options [Boolean] :enumeration toggles whether (some) elements get enumerated values (to enable post-anonymization re-identification)
53
+ # @option options [Fixnum] :logger_level the logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL)
54
+ # @option options [Boolean] :random_file_name toggles whether anonymized files will be given random file names when rewritten (in combination with the :write_path option)
55
+ # @option options [Boolean] :recursive toggles whether to anonymize on all sub-levels of the DICOM object tag hierarchies
56
+ # @option options [Boolean] :uid toggles whether UIDs will be replaced with custom generated UIDs (beware that to preserve UID relations in studies/series, the audit_trail feature must be used)
57
+ # @option options [String] :uid_root an organization (or custom) UID root to use when replacing UIDs
58
+ # @option options [String] :write_path a directory where the anonymized files are re-written (if not specified, files are overwritten)
59
+ # @example Create an Anonymizer instance and increase the log output
60
+ # a = Anonymizer.new
61
+ # a.logger.level = Logger::INFO
62
+ # @example Perform anonymization using the audit trail feature
63
+ # a = Anonymizer.new(:audit_trail => 'trail.json')
64
+ # a.enumeration = true
65
+ # a.write_path = '//anonymized/'
66
+ # a.anonymize('//dicom/today/')
67
+ #
68
+ def initialize(options={})
69
+ # Transfer options to attributes:
70
+ @blank = options[:blank]
71
+ @delete_private = options[:delete_private]
72
+ @enumeration = options[:enumeration]
73
+ @logger_level = options[:logger_level] || Logger::FATAL
74
+ @random_file_name = options[:random_file_name]
75
+ @recursive = options[:recursive]
76
+ @uid = options[:uid]
77
+ @uid_root = options[:uid_root] ? options[:uid_root] : UID_ROOT
78
+ @write_path = options[:write_path]
79
+ # Array of folders to be processed for anonymization:
80
+ @folders = Array.new
81
+ # Folders that will be skipped:
82
+ @exceptions = Array.new
83
+ # Data elements which will be anonymized (the array will hold a list of tag strings):
84
+ @tags = Array.new
85
+ # Default values to use on anonymized data elements:
86
+ @values = Array.new
87
+ # Which data elements will have enumeration applied, if requested by the user:
88
+ @enumerations = Array.new
89
+ # We use a Hash to store information from DICOM files if enumeration is desired:
90
+ @enum_old_hash = Hash.new
91
+ @enum_new_hash = Hash.new
92
+ # All the files to be anonymized will be put in this array:
93
+ @files = Array.new
94
+ @prefixes = Hash.new
95
+ # Setup audit trail if requested:
96
+ if options[:audit_trail]
97
+ @audit_trail_file = options[:audit_trail]
98
+ if File.exists?(@audit_trail_file) && File.size(@audit_trail_file) > 2
99
+ # Load the pre-existing audit trail from file:
100
+ @audit_trail = AuditTrail.read(@audit_trail_file)
101
+ else
102
+ # Start from scratch with an empty audit trail:
103
+ @audit_trail = AuditTrail.new
104
+ end
105
+ # Set up encryption if indicated:
106
+ if options[:encryption]
107
+ require 'digest'
108
+ if options[:encryption].respond_to?(:hexdigest)
109
+ @encryption = options[:encryption]
110
+ else
111
+ @encryption = Digest::MD5
112
+ end
113
+ end
114
+ end
115
+ # Set the default data elements to be anonymized:
116
+ set_defaults
117
+ end
118
+
119
+ # Checks for equality.
120
+ #
121
+ # Other and self are considered equivalent if they are
122
+ # of compatible types and their attributes are equivalent.
123
+ #
124
+ # @param other an object to be compared with self.
125
+ # @return [Boolean] true if self and other are considered equivalent
126
+ #
127
+ def ==(other)
128
+ if other.respond_to?(:to_anonymizer)
129
+ other.send(:state) == state
130
+ end
131
+ end
132
+
133
+ alias_method :eql?, :==
134
+
135
+ # Anonymizes the given DICOM data with the settings of this Anonymizer instance.
136
+ #
137
+ # @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
138
+ # @return [Array<DObject>] an array of the anonymized DICOM objects
139
+ #
140
+ def anonymize(data)
141
+ dicom = prepare(data)
142
+ if @tags.length > 0
143
+ dicom.each do |dcm|
144
+ anonymize_dcm(dcm)
145
+ # Write DICOM object to file unless it was passed to the anonymizer as an object:
146
+ write(dcm) unless dcm.was_dcm_on_input
147
+ end
148
+ else
149
+ logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
150
+ end
151
+ # Reset the ruby-dicom log threshold to its original level:
152
+ logger.level = @original_level
153
+ # Save the audit trail (if used):
154
+ @audit_trail.write(@audit_trail_file) if @audit_trail
155
+ logger.info("Anonymization complete.")
156
+ dicom
157
+ end
158
+
159
+ # Specifies that the given tag is to be completely deleted
160
+ # from the anonymized DICOM objects.
161
+ #
162
+ # @param [String] tag a data element tag
163
+ # @example Completely delete the Patient's Name tag from the DICOM files
164
+ # a.delete_tag('0010,0010')
165
+ #
166
+ def delete_tag(tag)
167
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
168
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
169
+ @delete[tag] = true
170
+ end
171
+
172
+ # Checks the enumeration status of this tag.
173
+ #
174
+ # @param [String] tag a data element tag
175
+ # @return [Boolean, NilClass] the enumeration status of the tag, or nil if the tag has no match
176
+ #
177
+ def enum(tag)
178
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
179
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
180
+ pos = @tags.index(tag)
181
+ if pos
182
+ return @enumerations[pos]
183
+ else
184
+ logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
185
+ return nil
186
+ end
187
+ end
188
+
189
+ # Computes a hash code for this object.
190
+ #
191
+ # @note Two objects with the same attributes will have the same hash code.
192
+ #
193
+ # @return [Fixnum] the object's hash code
194
+ #
195
+ def hash
196
+ state.hash
197
+ end
198
+
199
+ # Removes a tag from the list of tags that will be anonymized.
200
+ #
201
+ # @param [String] tag a data element tag
202
+ # @example Do not anonymize the Patient's Name tag
203
+ # a.remove_tag('0010,0010')
204
+ #
205
+ def remove_tag(tag)
206
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
207
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
208
+ pos = @tags.index(tag)
209
+ if pos
210
+ @tags.delete_at(pos)
211
+ @values.delete_at(pos)
212
+ @enumerations.delete_at(pos)
213
+ end
214
+ end
215
+
216
+ # Sets the anonymization settings for the specified tag. If the tag is already present in the list
217
+ # of tags to be anonymized, its settings are updated, and if not, a new tag entry is created.
218
+ #
219
+ # @param [String] tag a data element tag
220
+ # @param [Hash] options the anonymization settings for the specified tag
221
+ # @option options [String, Integer, Float] :value the replacement value to be used when anonymizing this data element. Defaults to the pre-existing value and '' for new tags.
222
+ # @option options [String, Integer, Float] :enum specifies if enumeration is to be used for this tag. Defaults to the pre-existing value and false for new tags.
223
+ # @example Set the anonymization settings of the Patient's Name tag
224
+ # a.set_tag('0010,0010', :value => 'MrAnonymous', :enum => true)
225
+ #
226
+ def set_tag(tag, options={})
227
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
228
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
229
+ pos = @tags.index(tag)
230
+ if pos
231
+ # Update existing values:
232
+ @values[pos] = options[:value] if options[:value]
233
+ @enumerations[pos] = options[:enum] if options[:enum] != nil
234
+ else
235
+ # Add new elements:
236
+ @tags << tag
237
+ @values << (options[:value] ? options[:value] : default_value(tag))
238
+ @enumerations << (options[:enum] ? options[:enum] : false)
239
+ end
240
+ end
241
+
242
+ # Returns self.
243
+ #
244
+ # @return [Anonymizer] self
245
+ #
246
+ def to_anonymizer
247
+ self
248
+ end
249
+
250
+ # Gives the value which will be used when anonymizing this tag.
251
+ #
252
+ # @note If enumeration is selected for a string type tag, a number will be
253
+ # appended in addition to the string that is returned here.
254
+ #
255
+ # @param [String] tag a data element tag
256
+ # @return [String, Integer, Float, NilClass] the replacement value for the specified tag, or nil if the tag is not matched
257
+ #
258
+ def value(tag)
259
+ raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
260
+ raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
261
+ pos = @tags.index(tag)
262
+ if pos
263
+ return @values[pos]
264
+ else
265
+ logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
266
+ return nil
267
+ end
268
+ end
269
+
270
+
271
+ private
272
+
273
+
274
+ # Performs anonymization on a DICOM object.
275
+ #
276
+ # @param [DObject] dcm a DICOM object
277
+ #
278
+ def anonymize_dcm(dcm)
279
+ # Extract the data element parents to investigate:
280
+ parents = element_parents(dcm)
281
+ parents.each do |parent|
282
+ # Anonymize the desired tags:
283
+ @tags.each_index do |j|
284
+ if parent.exists?(@tags[j])
285
+ element = parent[@tags[j]]
286
+ if element.is_a?(Element)
287
+ if @blank
288
+ value = ''
289
+ elsif @enumeration
290
+ old_value = element.value
291
+ # Only launch enumeration logic if there is an actual value to the data element:
292
+ if old_value
293
+ value = enumerated_value(old_value, j)
294
+ else
295
+ value = ''
296
+ end
297
+ else
298
+ # Use the value that has been set for this tag:
299
+ value = @values[j]
300
+ end
301
+ element.value = value
302
+ end
303
+ end
304
+ end
305
+ # Delete elements marked for deletion:
306
+ @delete.each_key do |tag|
307
+ parent.delete(tag) if parent.exists?(tag)
308
+ end
309
+ end
310
+ # General DICOM object manipulation:
311
+ # Add a Patient Identity Removed attribute (as per
312
+ # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 6):
313
+ dcm.add(Element.new('0012,0062', 'YES'))
314
+ # Add a De-Identification Method Code Sequence Item:
315
+ dcm.add(Sequence.new('0012,0064')) unless dcm.exists?('0012,0064')
316
+ i = dcm['0012,0064'].add_item
317
+ i.add(Element.new('0012,0063', 'De-identified by the ruby-dicom Anonymizer'))
318
+ # FIXME: At some point we should add a set of de-indentification method codes, as per
319
+ # DICOM PS 3.16 CID 7050 which corresponds to the settings chosen for the anonymizer.
320
+ # Delete the old File Meta Information group (as per
321
+ # DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 7):
322
+ dcm.delete_group('0002')
323
+ # Handle UIDs if requested:
324
+ replace_uids(parents) if @uid
325
+ # Delete private tags if indicated:
326
+ dcm.delete_private if @delete_private
327
+ end
328
+
329
+ # Gives the value to be used for the audit trail, which is either
330
+ # the original value itself, or an encrypted string based on it.
331
+ #
332
+ # @param [String, Integer, Float] original the original value of the tag to be anonymized
333
+ # @return [String, Integer, Float] with encryption, a hash string is returned, otherwise the original value
334
+ #
335
+ def at_value(original)
336
+ @encryption ? @encryption.hexdigest(original) : original
337
+ end
338
+
339
+ # Creates a hash that is used for storing information that is used when enumeration is selected.
340
+ #
341
+ def create_enum_hash
342
+ @enumerations.each_index do |i|
343
+ @enum_old_hash[@tags[i]] = Array.new
344
+ @enum_new_hash[@tags[i]] = Array.new
345
+ end
346
+ end
347
+
348
+ # Determines a default value to use for anonymizing the given tag.
349
+ #
350
+ # @param [String] tag a data element tag
351
+ # @return [String, Integer, Float] the default replacement value for a given tag
352
+ #
353
+ def default_value(tag)
354
+ name, vr = LIBRARY.name_and_vr(tag)
355
+ conversion = VALUE_CONVERSION[vr]
356
+ case conversion
357
+ when :to_i then return 0
358
+ when :to_f then return 0.0
359
+ else
360
+ # Assume type is string and return an empty string:
361
+ return ''
362
+ end
363
+ end
364
+
365
+ # Creates a write path for the given DICOM object, based on the object's
366
+ # original file path and the write_path attribute.
367
+ #
368
+ # @param [DObject] dcm a DICOM object
369
+ # @return [String] the destination directory path
370
+ #
371
+ def destination(dcm)
372
+ # Separate the path from the source file string:
373
+ file_start = dcm.source.rindex(File.basename(dcm.source))
374
+ if file_start == 0
375
+ source_dir = "."
376
+ else
377
+ source_dir = dcm.source[0..(file_start-1)]
378
+ end
379
+ source_folders = source_dir.split(File::SEPARATOR)
380
+ target_folders = @write_path.split(File::SEPARATOR)
381
+ # If the first element is the current dir symbol, get rid of it:
382
+ source_folders.delete('.')
383
+ # Check for equalness of folder names in a range limited by the shortest array:
384
+ common_length = [source_folders.length, target_folders.length].min
385
+ uncommon_index = nil
386
+ common_length.times do |i|
387
+ if target_folders[i] != source_folders[i]
388
+ uncommon_index = i
389
+ break
390
+ end
391
+ end
392
+ # Create the output path by joining the two paths together using the determined index:
393
+ append_path = uncommon_index ? source_folders[uncommon_index..-1] : nil
394
+ [target_folders, append_path].compact.join(File::SEPARATOR)
395
+ end
396
+
397
+ # Extracts all parents from a DObject instance which potentially
398
+ # have child (data) elements. This typically means the DObject
399
+ # instance itself as well as items (i.e. not sequences).
400
+ # Note that unless the @recursive attribute has been set,
401
+ # this method will only return the DObject (placed inside an array).
402
+ #
403
+ # @param [DObject] dcm a DICOM object
404
+ # @return [Array<DObject, Item>] an array containing either just a DObject or also all parental child items within the tag hierarchy
405
+ #
406
+ def element_parents(dcm)
407
+ parents = Array.new
408
+ parents << dcm
409
+ if @recursive
410
+ dcm.sequences.each do |s|
411
+ parents += element_parents_recursive(s)
412
+ end
413
+ end
414
+ parents
415
+ end
416
+
417
+ # Recursively extracts all item parents from a sequence instance (including
418
+ # any sub-sequences) which actually contain child (data) elements.
419
+ #
420
+ # @param [Sequence] sequence a Sequence instance
421
+ # @return [Array<Item>] an array containing items within the tag hierarchy that contains child elements
422
+ #
423
+ def element_parents_recursive(sequence)
424
+ parents = Array.new
425
+ sequence.items.each do |i|
426
+ parents << i if i.elements?
427
+ i.sequences.each do |s|
428
+ parents += element_parents_recursive(s)
429
+ end
430
+ end
431
+ parents
432
+ end
433
+
434
+ # Handles the enumeration for the given data element tag.
435
+ # If its value has been encountered before, its corresponding enumerated
436
+ # replacement value is retrieved, and if a new original value is encountered,
437
+ # a new enumerated replacement value is found by increasing an index by 1.
438
+ #
439
+ # @param [String, Integer, Float] original the original value of the tag to be anonymized
440
+ # @param [Fixnum] j the index of this tag in the tag-related instance arrays
441
+ # @return [String, Integer, Float] the replacement value which is used for the anonymization of the tag
442
+ #
443
+ def enumerated_value(original, j)
444
+ # Is enumeration requested for this tag?
445
+ if @enumerations[j]
446
+ if @audit_trail
447
+ # Check if the UID has been encountered already:
448
+ replacement = @audit_trail.replacement(@tags[j], at_value(original))
449
+ unless replacement
450
+ # This original value has not been encountered yet. Determine the index to use.
451
+ index = @audit_trail.records(@tags[j]).length + 1
452
+ # Create the replacement value:
453
+ if @values[j].is_a?(String)
454
+ replacement = @values[j] + index.to_s
455
+ else
456
+ replacement = @values[j] + index
457
+ end
458
+ # Add this tag record to the audit trail:
459
+ @audit_trail.add_record(@tags[j], at_value(original), replacement)
460
+ end
461
+ else
462
+ # Retrieve earlier used anonymization values:
463
+ previous_old = @enum_old_hash[@tags[j]]
464
+ previous_new = @enum_new_hash[@tags[j]]
465
+ p_index = previous_old.length
466
+ if previous_old.index(original) == nil
467
+ # Current value has not been encountered before:
468
+ replacement = @values[j]+(p_index + 1).to_s
469
+ # Store value in array (and hash):
470
+ previous_old << original
471
+ previous_new << replacement
472
+ @enum_old_hash[@tags[j]] = previous_old
473
+ @enum_new_hash[@tags[j]] = previous_new
474
+ else
475
+ # Current value has been observed before:
476
+ replacement = previous_new[previous_old.index(original)]
477
+ end
478
+ end
479
+ else
480
+ replacement = @values[j]
481
+ end
482
+ return replacement
483
+ end
484
+
485
+ # Establishes a prefix for a given UID tag.
486
+ # This makes it somewhat easier to distinguish
487
+ # between different types of random generated UIDs.
488
+ #
489
+ # @param [String] tag a data element string tag
490
+ #
491
+ def prefix(tag)
492
+ if @prefixes[tag]
493
+ @prefixes[tag]
494
+ else
495
+ @prefixes[tag] = @prefixes.length + 1
496
+ @prefixes[tag]
497
+ end
498
+ end
499
+
500
+ # Prepares the data for anonymization.
501
+ #
502
+ # @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
503
+ # @return [Array] the original data (wrapped in an array) as well as an array of loaded DObject instances
504
+ #
505
+ def prepare(data)
506
+ logger.info("Loading DICOM data.")
507
+ # Temporarily adjust the ruby-dicom log threshold (usually to suppress messages from the DObject class):
508
+ @original_level = logger.level
509
+ logger.level = @logger_level
510
+ dicom = DICOM.load(data)
511
+ logger.level = @original_level
512
+ logger.info("#{dicom.length} DICOM objects have been prepared for anonymization.")
513
+ logger.level = @logger_level
514
+ # Set up enumeration if requested:
515
+ create_enum_hash if @enumeration
516
+ require 'securerandom' if @random_file_name
517
+ dicom
518
+ end
519
+
520
+ # Replaces the UIDs of the given DICOM object.
521
+ #
522
+ # @note Empty UIDs are ignored (we don't generate new UIDs for these).
523
+ # @note If AuditTrail is set, the relationship between old and new UIDs are preserved,
524
+ # and the relations between files in a study/series should remain valid.
525
+ # @param [Array<DObject, Item>] parents dicom parent objects who's child elements will be investigated
526
+ #
527
+ def replace_uids(parents)
528
+ parents.each do |parent|
529
+ parent.each_element do |element|
530
+ if element.vr == ('UI') and !@static_uids[element.tag]
531
+ original = element.value
532
+ if original && original.length > 0
533
+ # We have a UID value, go ahead and replace it:
534
+ if @audit_trail
535
+ # Check if the UID has been encountered already:
536
+ replacement = @audit_trail.replacement('uids', original)
537
+ unless replacement
538
+ # The UID has not been stored previously. Generate a new one:
539
+ replacement = DICOM.generate_uid(@uid_root, prefix(element.tag))
540
+ # Add this tag record to the audit trail:
541
+ @audit_trail.add_record('uids', original, replacement)
542
+ end
543
+ # Replace the UID in the DICOM object:
544
+ element.value = replacement
545
+ else
546
+ # We don't care about preserving UID relations. Just insert a custom UID:
547
+ element.value = DICOM.generate_uid(@uid_root, prefix(element.tag))
548
+ end
549
+ end
550
+ end
551
+ end
552
+ end
553
+ end
554
+
555
+ # Sets up some default information variables that are used by the Anonymizer.
556
+ #
557
+ def set_defaults
558
+ # Some UIDs should not be remapped even if uid anonymization has been requested:
559
+ @static_uids = {
560
+ # Private related:
561
+ '0002,0100' => true,
562
+ '0004,1432' => true,
563
+ # Coding scheme related:
564
+ '0008,010C' => true,
565
+ '0008,010D' => true,
566
+ # Transfer syntax related:
567
+ '0002,0010' => true,
568
+ '0400,0010' => true,
569
+ '0400,0510' => true,
570
+ '0004,1512' => true,
571
+ # SOP class related:
572
+ '0000,0002' => true,
573
+ '0000,0003' => true,
574
+ '0002,0002' => true,
575
+ '0004,1510' => true,
576
+ '0004,151A' => true,
577
+ '0008,0016' => true,
578
+ '0008,001A' => true,
579
+ '0008,001B' => true,
580
+ '0008,0062' => true,
581
+ '0008,1150' => true,
582
+ '0008,115A' => true
583
+ }
584
+ # Sets up default tags that will be anonymized, along with default replacement values and enumeration settings.
585
+ # This data is stored in 3 separate instance arrays for tags, values and enumeration.
586
+ data = [
587
+ ['0008,0012', '20000101', false], # Instance Creation Date
588
+ ['0008,0013', '000000.00', false], # Instance Creation Time
589
+ ['0008,0020', '20000101', false], # Study Date
590
+ ['0008,0021', '20000101', false], # Series Date
591
+ ['0008,0022', '20000101', false], # Acquisition Date
592
+ ['0008,0023', '20000101', false], # Image Date
593
+ ['0008,0030', '000000.00', false], # Study Time
594
+ ['0008,0031', '000000.00', false], # Series Time
595
+ ['0008,0032', '000000.00', false], # Acquisition Time
596
+ ['0008,0033', '000000.00', false], # Image Time
597
+ ['0008,0050', '', true], # Accession Number
598
+ ['0008,0080', 'Institution', true], # Institution name
599
+ ['0008,0081', 'Address', true], # Institution Address
600
+ ['0008,0090', 'Physician', true], # Referring Physician's name
601
+ ['0008,1010', 'Station', true], # Station name
602
+ ['0008,1040', 'Department', true], # Institutional Department name
603
+ ['0008,1070', 'Operator', true], # Operator's Name
604
+ ['0010,0010', 'Patient', true], # Patient's name
605
+ ['0010,0020', 'ID', true], # Patient's ID
606
+ ['0010,0030', '20000101', false], # Patient's Birth Date
607
+ ['0010,0040', 'O', false], # Patient's Sex
608
+ ['0010,1010', '', false], # Patient's Age
609
+ ['0020,4000', '', false], # Image Comments
610
+ ].transpose
611
+ @tags = data[0]
612
+ @values = data[1]
613
+ @enumerations = data[2]
614
+ # Tags to be deleted completely during anonymization:
615
+ @delete = Hash.new
616
+ end
617
+
618
+ # Collects the attributes of this instance.
619
+ #
620
+ # @return [Array] an array of attributes
621
+ #
622
+ def state
623
+ [
624
+ @tags, @values, @enumerations, @delete, @blank,
625
+ @delete_private, @enumeration, @logger_level,
626
+ @random_file_name, @recursive, @uid, @uid_root, @write_path
627
+ ]
628
+ end
629
+
630
+ # Writes a DICOM object to file.
631
+ #
632
+ # @param [DObject] dcm a DICOM object
633
+ #
634
+ def write(dcm)
635
+ if @write_path
636
+ # The DICOM object is to be written to a separate directory. If the
637
+ # original and the new directories have a common root, this is taken into
638
+ # consideration when determining the object's write path:
639
+ path = destination(dcm)
640
+ if @random_file_name
641
+ file_name = "#{SecureRandom.hex(16)}.dcm"
642
+ else
643
+ file_name = File.basename(dcm.source)
644
+ end
645
+ dcm.write(File.join(path, file_name))
646
+ else
647
+ # The original DICOM file is overwritten with the anonymized DICOM object:
648
+ dcm.write(dcm.source)
649
+ end
650
+ end
651
+
652
+ end
653
+
654
+ end