dicom 0.9.5 → 0.9.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +13 -5
- data/{CHANGELOG.rdoc → CHANGELOG.md} +50 -30
- data/{CONTRIBUTING.rdoc → CONTRIBUTING.md} +16 -16
- data/Gemfile.lock +47 -0
- data/README.md +152 -0
- data/dicom.gemspec +11 -10
- data/lib/dicom.rb +30 -11
- data/lib/dicom/anonymizer.rb +654 -649
- data/lib/dicom/audit_trail.rb +0 -2
- data/lib/dicom/d_client.rb +1 -1
- data/lib/dicom/d_library.rb +45 -15
- data/lib/dicom/d_object.rb +18 -18
- data/lib/dicom/d_read.rb +28 -4
- data/lib/dicom/d_write.rb +49 -26
- data/lib/dicom/dictionary/{elements.txt → elements.tsv} +0 -0
- data/lib/dicom/dictionary/{uids.txt → uids.tsv} +0 -0
- data/lib/dicom/element.rb +6 -7
- data/lib/dicom/elemental.rb +1 -0
- data/lib/dicom/elemental_parent.rb +64 -0
- data/lib/dicom/extensions/array.rb +57 -0
- data/lib/dicom/extensions/hash.rb +31 -0
- data/lib/dicom/extensions/string.rb +126 -0
- data/lib/dicom/{constants.rb → general/constants.rb} +29 -38
- data/lib/dicom/{deprecated.rb → general/deprecated.rb} +0 -0
- data/lib/dicom/{logging.rb → general/logging.rb} +0 -0
- data/lib/dicom/{variables.rb → general/methods.rb} +0 -22
- data/lib/dicom/general/variables.rb +29 -0
- data/lib/dicom/{version.rb → general/version.rb} +1 -1
- data/lib/dicom/image_item.rb +0 -2
- data/lib/dicom/image_processor.rb +2 -0
- data/lib/dicom/item.rb +1 -13
- data/lib/dicom/link.rb +2 -1
- data/lib/dicom/parent.rb +34 -86
- data/lib/dicom/sequence.rb +1 -13
- data/lib/dicom/stream.rb +94 -114
- data/rakefile.rb +1 -1
- metadata +73 -36
- data/README.rdoc +0 -149
- data/lib/dicom/ruby_extensions.rb +0 -249
data/dicom.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
|
3
|
-
require File.expand_path('../lib/dicom/version', __FILE__)
|
3
|
+
require File.expand_path('../lib/dicom/general/version', __FILE__)
|
4
4
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.platform = Gem::Platform::RUBY
|
@@ -17,14 +17,15 @@ Gem::Specification.new do |s|
|
|
17
17
|
s.files = Dir["{lib}/**/*", "[A-Z]*"]
|
18
18
|
s.rubyforge_project = 'dicom'
|
19
19
|
|
20
|
-
s.required_ruby_version = '>= 1.9.
|
20
|
+
s.required_ruby_version = '>= 1.9.3'
|
21
21
|
|
22
|
-
s.add_development_dependency('bundler', '~> 1.
|
23
|
-
s.add_development_dependency('
|
24
|
-
s.add_development_dependency('
|
25
|
-
s.add_development_dependency('narray', '~> 0.6.0.8')
|
26
|
-
s.add_development_dependency('rake', '~>
|
27
|
-
s.add_development_dependency('
|
28
|
-
s.add_development_dependency('
|
29
|
-
s.add_development_dependency('
|
22
|
+
s.add_development_dependency('bundler', '~> 1.6')
|
23
|
+
s.add_development_dependency('mini_magick', '~> 3.7')
|
24
|
+
s.add_development_dependency('mocha', '~> 1.1')
|
25
|
+
s.add_development_dependency('narray', '~> 0.6', '>= 0.6.0.8')
|
26
|
+
s.add_development_dependency('rake', '~> 10.3')
|
27
|
+
s.add_development_dependency('redcarpet', '~> 3.1')
|
28
|
+
s.add_development_dependency('rmagick', '~> 2.13', '>= 2.13.2')
|
29
|
+
s.add_development_dependency('rspec', '~> 3.0')
|
30
|
+
s.add_development_dependency('yard', '~> 0.8', '>= 0.8.7')
|
30
31
|
end
|
data/lib/dicom.rb
CHANGED
@@ -12,14 +12,30 @@
|
|
12
12
|
# The rest of the classes visible in the documentation generated by YARD are
|
13
13
|
# in principle 'private' classes, which are mainly of interest to developers.
|
14
14
|
|
15
|
-
#
|
16
|
-
|
15
|
+
# Standard library dependencies:
|
16
|
+
require 'json'
|
17
|
+
require 'yaml'
|
18
|
+
|
19
|
+
# Gem specific extensions:
|
20
|
+
require_relative 'dicom/extensions/array'
|
21
|
+
require_relative 'dicom/extensions/hash'
|
22
|
+
require_relative 'dicom/extensions/string'
|
23
|
+
|
24
|
+
# General module features/settings:
|
25
|
+
require_relative 'dicom/general/version'
|
26
|
+
require_relative 'dicom/general/constants'
|
27
|
+
require_relative 'dicom/general/variables'
|
28
|
+
require_relative 'dicom/general/methods'
|
29
|
+
require_relative 'dicom/general/logging'
|
30
|
+
require_relative 'dicom/general/deprecated'
|
31
|
+
|
17
32
|
# Core library:
|
18
33
|
# Super classes/modules:
|
19
34
|
require_relative 'dicom/image_processor'
|
20
35
|
require_relative 'dicom/parent'
|
21
36
|
require_relative 'dicom/image_item'
|
22
37
|
require_relative 'dicom/elemental'
|
38
|
+
require_relative 'dicom/elemental_parent'
|
23
39
|
# Subclasses and independent classes:
|
24
40
|
require_relative 'dicom/d_client'
|
25
41
|
require_relative 'dicom/d_object'
|
@@ -36,18 +52,21 @@ require_relative 'dicom/stream'
|
|
36
52
|
require_relative 'dicom/d_library'
|
37
53
|
require_relative 'dicom/dictionary_element'
|
38
54
|
require_relative 'dicom/uid'
|
39
|
-
# Extensions to the Ruby library:
|
40
|
-
require_relative 'dicom/ruby_extensions'
|
41
|
-
# Module settings:
|
42
|
-
require_relative 'dicom/version'
|
43
|
-
require_relative 'dicom/constants'
|
44
|
-
require_relative 'dicom/variables'
|
45
55
|
# Image processors:
|
46
56
|
require_relative 'dicom/image_processor_mini_magick'
|
47
57
|
require_relative 'dicom/image_processor_r_magick'
|
48
|
-
# Deprecated methods:
|
49
|
-
require_relative 'dicom/deprecated'
|
50
58
|
|
51
59
|
# Extensions (non-core functionality):
|
52
60
|
require_relative 'dicom/anonymizer'
|
53
|
-
require_relative 'dicom/audit_trail'
|
61
|
+
require_relative 'dicom/audit_trail'
|
62
|
+
|
63
|
+
|
64
|
+
module DICOM
|
65
|
+
|
66
|
+
# Defines the gem root directory in the file system.
|
67
|
+
ROOT_DIR = "#{File.dirname(__FILE__)}/dicom"
|
68
|
+
|
69
|
+
# The library instance (data dictionary) of the DICOM module.
|
70
|
+
LIBRARY = DICOM::DLibrary.new
|
71
|
+
|
72
|
+
end
|
data/lib/dicom/anonymizer.rb
CHANGED
@@ -1,649 +1,654 @@
|
|
1
|
-
module DICOM
|
2
|
-
|
3
|
-
# This is a convenience class for handling the anonymization
|
4
|
-
# (de-identification) of DICOM files.
|
5
|
-
#
|
6
|
-
# @note
|
7
|
-
# For a thorough introduction to the concept of DICOM anonymization,
|
8
|
-
# please refer to The DICOM Standard, Part 15: Security and System
|
9
|
-
# Management Profiles, Annex E: Attribute Confidentiality Profiles.
|
10
|
-
# For guidance on settings for individual data elements, please
|
11
|
-
# refer to DICOM PS 3.15, Annex E, Table E.1-1: Application Level
|
12
|
-
# Confidentiality Profile Attributes.
|
13
|
-
#
|
14
|
-
class Anonymizer
|
15
|
-
include Logging
|
16
|
-
|
17
|
-
# An AuditTrail instance used for this anonymization (if specified).
|
18
|
-
attr_reader :audit_trail
|
19
|
-
# The file name used for the AuditTrail serialization (if specified).
|
20
|
-
attr_reader :audit_trail_file
|
21
|
-
# A boolean that if set as true will cause all anonymized tags to be blank instead of get some generic value.
|
22
|
-
attr_accessor :blank
|
23
|
-
# An hash of elements (represented by tag keys) that will be deleted from the DICOM objects on anonymization.
|
24
|
-
attr_reader :delete
|
25
|
-
# A boolean that if set as true, will make the anonymization delete all private tags.
|
26
|
-
attr_accessor :delete_private
|
27
|
-
# The cryptographic hash function to be used for encrypting DICOM values recorded in an audit trail file.
|
28
|
-
attr_reader :encryption
|
29
|
-
# A boolean that if set as true will cause all anonymized tags to be get enumerated values, to enable post-anonymization re-identification by the user.
|
30
|
-
attr_accessor :enumeration
|
31
|
-
# The logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL).
|
32
|
-
attr_reader :logger_level
|
33
|
-
# A boolean that if set as true will cause all anonymized files to be written with random file names (if write_path has been specified).
|
34
|
-
attr_accessor :random_file_name
|
35
|
-
# A boolean that if set as true, will cause the anonymization to run on all levels of the DICOM file tag hierarchy.
|
36
|
-
attr_accessor :recursive
|
37
|
-
# A boolean indicating whether or not UIDs shall be replaced when executing the anonymization.
|
38
|
-
attr_accessor :uid
|
39
|
-
# The DICOM UID root to use when generating new UIDs.
|
40
|
-
attr_accessor :uid_root
|
41
|
-
# The path where the anonymized files will be saved. If this value is not set, the original DICOM files will be overwritten.
|
42
|
-
attr_accessor :write_path
|
43
|
-
|
44
|
-
# Creates an Anonymizer instance.
|
45
|
-
#
|
46
|
-
# @note To customize logging behaviour, refer to the Logging module documentation.
|
47
|
-
# @param [Hash] options the options to create an anonymizer instance with
|
48
|
-
# @option options [String] :audit_trail a file name path (if the file contains old audit data, these are loaded and used in the current anonymization)
|
49
|
-
# @option options [Boolean] :blank toggles whether to set the values of anonymized elements as empty instead of some generic value
|
50
|
-
# @option options [Boolean] :delete_private toggles whether private elements are to be deleted
|
51
|
-
# @option options [TrueClass, Digest::Class] :encryption if set as true, the default hash function (MD5) will be used for representing DICOM values in an audit file. Otherwise a Digest class can be given, e.g. Digest::SHA256
|
52
|
-
# @option options [Boolean] :enumeration toggles whether (some) elements get enumerated values (to enable post-anonymization re-identification)
|
53
|
-
# @option options [Fixnum] :logger_level the logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL)
|
54
|
-
# @option options [Boolean] :random_file_name toggles whether anonymized files will be given random file names when rewritten (in combination with the :write_path option)
|
55
|
-
# @option options [Boolean] :recursive toggles whether to anonymize on all sub-levels of the DICOM object tag hierarchies
|
56
|
-
# @option options [Boolean] :uid toggles whether UIDs will be replaced with custom generated UIDs (beware that to preserve UID relations in studies/series, the audit_trail feature must be used)
|
57
|
-
# @option options [String] :uid_root an organization (or custom) UID root to use when replacing UIDs
|
58
|
-
# @option options [String] :write_path a directory where the anonymized files are re-written (if not specified, files are overwritten)
|
59
|
-
# @example Create an Anonymizer instance and increase the log output
|
60
|
-
# a = Anonymizer.new
|
61
|
-
# a.logger.level = Logger::INFO
|
62
|
-
# @example Perform anonymization using the audit trail feature
|
63
|
-
# a = Anonymizer.new(:audit_trail => 'trail.json')
|
64
|
-
# a.enumeration = true
|
65
|
-
# a.write_path = '//anonymized/'
|
66
|
-
# a.anonymize('//dicom/today/')
|
67
|
-
#
|
68
|
-
def initialize(options={})
|
69
|
-
# Transfer options to attributes:
|
70
|
-
@blank = options[:blank]
|
71
|
-
@delete_private = options[:delete_private]
|
72
|
-
@enumeration = options[:enumeration]
|
73
|
-
@logger_level = options[:logger_level] || Logger::FATAL
|
74
|
-
@random_file_name = options[:random_file_name]
|
75
|
-
@recursive = options[:recursive]
|
76
|
-
@uid = options[:uid]
|
77
|
-
@uid_root = options[:uid_root] ? options[:uid_root] : UID_ROOT
|
78
|
-
@write_path = options[:write_path]
|
79
|
-
# Array of folders to be processed for anonymization:
|
80
|
-
@folders = Array.new
|
81
|
-
# Folders that will be skipped:
|
82
|
-
@exceptions = Array.new
|
83
|
-
# Data elements which will be anonymized (the array will hold a list of tag strings):
|
84
|
-
@tags = Array.new
|
85
|
-
# Default values to use on anonymized data elements:
|
86
|
-
@values = Array.new
|
87
|
-
# Which data elements will have enumeration applied, if requested by the user:
|
88
|
-
@enumerations = Array.new
|
89
|
-
# We use a Hash to store information from DICOM files if enumeration is desired:
|
90
|
-
@enum_old_hash = Hash.new
|
91
|
-
@enum_new_hash = Hash.new
|
92
|
-
# All the files to be anonymized will be put in this array:
|
93
|
-
@files = Array.new
|
94
|
-
@prefixes = Hash.new
|
95
|
-
# Setup audit trail if requested:
|
96
|
-
if options[:audit_trail]
|
97
|
-
@audit_trail_file = options[:audit_trail]
|
98
|
-
if File.exists?(@audit_trail_file) && File.size(@audit_trail_file) > 2
|
99
|
-
# Load the pre-existing audit trail from file:
|
100
|
-
@audit_trail = AuditTrail.read(@audit_trail_file)
|
101
|
-
else
|
102
|
-
# Start from scratch with an empty audit trail:
|
103
|
-
@audit_trail = AuditTrail.new
|
104
|
-
end
|
105
|
-
# Set up encryption if indicated:
|
106
|
-
if options[:encryption]
|
107
|
-
require 'digest'
|
108
|
-
if options[:encryption].respond_to?(:hexdigest)
|
109
|
-
@encryption = options[:encryption]
|
110
|
-
else
|
111
|
-
@encryption = Digest::MD5
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
115
|
-
# Set the default data elements to be anonymized:
|
116
|
-
set_defaults
|
117
|
-
end
|
118
|
-
|
119
|
-
# Checks for equality.
|
120
|
-
#
|
121
|
-
# Other and self are considered equivalent if they are
|
122
|
-
# of compatible types and their attributes are equivalent.
|
123
|
-
#
|
124
|
-
# @param other an object to be compared with self.
|
125
|
-
# @return [Boolean] true if self and other are considered equivalent
|
126
|
-
#
|
127
|
-
def ==(other)
|
128
|
-
if other.respond_to?(:to_anonymizer)
|
129
|
-
other.send(:state) == state
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
alias_method :eql?, :==
|
134
|
-
|
135
|
-
# Anonymizes the given DICOM data with the settings of this Anonymizer instance.
|
136
|
-
#
|
137
|
-
# @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
|
138
|
-
# @return [Array<DObject>] an array of the anonymized DICOM objects
|
139
|
-
#
|
140
|
-
def anonymize(data)
|
141
|
-
dicom = prepare(data)
|
142
|
-
if @tags.length > 0
|
143
|
-
dicom.each do |dcm|
|
144
|
-
anonymize_dcm(dcm)
|
145
|
-
# Write DICOM object to file unless it was passed to the anonymizer as an object:
|
146
|
-
write(dcm) unless dcm.was_dcm_on_input
|
147
|
-
end
|
148
|
-
else
|
149
|
-
logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
|
150
|
-
end
|
151
|
-
# Reset the ruby-dicom log threshold to its original level:
|
152
|
-
logger.level = @original_level
|
153
|
-
# Save the audit trail (if used):
|
154
|
-
@audit_trail.write(@audit_trail_file) if @audit_trail
|
155
|
-
logger.info("Anonymization complete.")
|
156
|
-
dicom
|
157
|
-
end
|
158
|
-
|
159
|
-
# Specifies that the given tag is to be completely deleted
|
160
|
-
# from the anonymized DICOM objects.
|
161
|
-
#
|
162
|
-
# @param [String] tag a data element tag
|
163
|
-
# @example Completely delete the Patient's Name tag from the DICOM files
|
164
|
-
# a.delete_tag('0010,0010')
|
165
|
-
#
|
166
|
-
def delete_tag(tag)
|
167
|
-
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
168
|
-
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
169
|
-
@delete[tag] = true
|
170
|
-
end
|
171
|
-
|
172
|
-
# Checks the enumeration status of this tag.
|
173
|
-
#
|
174
|
-
# @param [String] tag a data element tag
|
175
|
-
# @return [Boolean, NilClass] the enumeration status of the tag, or nil if the tag has no match
|
176
|
-
#
|
177
|
-
def enum(tag)
|
178
|
-
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
179
|
-
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
180
|
-
pos = @tags.index(tag)
|
181
|
-
if pos
|
182
|
-
return @enumerations[pos]
|
183
|
-
else
|
184
|
-
logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
|
185
|
-
return nil
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
# Computes a hash code for this object.
|
190
|
-
#
|
191
|
-
# @note Two objects with the same attributes will have the same hash code.
|
192
|
-
#
|
193
|
-
# @return [Fixnum] the object's hash code
|
194
|
-
#
|
195
|
-
def hash
|
196
|
-
state.hash
|
197
|
-
end
|
198
|
-
|
199
|
-
# Removes a tag from the list of tags that will be anonymized.
|
200
|
-
#
|
201
|
-
# @param [String] tag a data element tag
|
202
|
-
# @example Do not anonymize the Patient's Name tag
|
203
|
-
# a.remove_tag('0010,0010')
|
204
|
-
#
|
205
|
-
def remove_tag(tag)
|
206
|
-
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
207
|
-
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
208
|
-
pos = @tags.index(tag)
|
209
|
-
if pos
|
210
|
-
@tags.delete_at(pos)
|
211
|
-
@values.delete_at(pos)
|
212
|
-
@enumerations.delete_at(pos)
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
# Sets the anonymization settings for the specified tag. If the tag is already present in the list
|
217
|
-
# of tags to be anonymized, its settings are updated, and if not, a new tag entry is created.
|
218
|
-
#
|
219
|
-
# @param [String] tag a data element tag
|
220
|
-
# @param [Hash] options the anonymization settings for the specified tag
|
221
|
-
# @option options [String, Integer, Float] :value the replacement value to be used when anonymizing this data element. Defaults to the pre-existing value and '' for new tags.
|
222
|
-
# @option options [String, Integer, Float] :enum specifies if enumeration is to be used for this tag. Defaults to the pre-existing value and false for new tags.
|
223
|
-
# @example Set the anonymization settings of the Patient's Name tag
|
224
|
-
# a.set_tag('0010,0010', :value => 'MrAnonymous', :enum => true)
|
225
|
-
#
|
226
|
-
def set_tag(tag, options={})
|
227
|
-
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
228
|
-
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
229
|
-
pos = @tags.index(tag)
|
230
|
-
if pos
|
231
|
-
# Update existing values:
|
232
|
-
@values[pos] = options[:value] if options[:value]
|
233
|
-
@enumerations[pos] = options[:enum] if options[:enum] != nil
|
234
|
-
else
|
235
|
-
# Add new elements:
|
236
|
-
@tags << tag
|
237
|
-
@values << (options[:value] ? options[:value] : default_value(tag))
|
238
|
-
@enumerations << (options[:enum] ? options[:enum] : false)
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
# Returns self.
|
243
|
-
#
|
244
|
-
# @return [Anonymizer] self
|
245
|
-
#
|
246
|
-
def to_anonymizer
|
247
|
-
self
|
248
|
-
end
|
249
|
-
|
250
|
-
# Gives the value which will be used when anonymizing this tag.
|
251
|
-
#
|
252
|
-
# @note If enumeration is selected for a string type tag, a number will be
|
253
|
-
# appended in addition to the string that is returned here.
|
254
|
-
#
|
255
|
-
# @param [String] tag a data element tag
|
256
|
-
# @return [String, Integer, Float, NilClass] the replacement value for the specified tag, or nil if the tag is not matched
|
257
|
-
#
|
258
|
-
def value(tag)
|
259
|
-
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
260
|
-
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
261
|
-
pos = @tags.index(tag)
|
262
|
-
if pos
|
263
|
-
return @values[pos]
|
264
|
-
else
|
265
|
-
logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
|
266
|
-
return nil
|
267
|
-
end
|
268
|
-
end
|
269
|
-
|
270
|
-
|
271
|
-
private
|
272
|
-
|
273
|
-
|
274
|
-
# Performs anonymization on a DICOM object.
|
275
|
-
#
|
276
|
-
# @param [DObject] dcm a DICOM object
|
277
|
-
#
|
278
|
-
def anonymize_dcm(dcm)
|
279
|
-
# Extract the data element parents to investigate:
|
280
|
-
parents = element_parents(dcm)
|
281
|
-
parents.each do |parent|
|
282
|
-
# Anonymize the desired tags:
|
283
|
-
@tags.each_index do |j|
|
284
|
-
if parent.exists?(@tags[j])
|
285
|
-
element = parent[@tags[j]]
|
286
|
-
if element.is_a?(Element)
|
287
|
-
if @blank
|
288
|
-
value = ''
|
289
|
-
elsif @enumeration
|
290
|
-
old_value = element.value
|
291
|
-
# Only launch enumeration logic if there is an actual value to the data element:
|
292
|
-
if old_value
|
293
|
-
value = enumerated_value(old_value, j)
|
294
|
-
else
|
295
|
-
value = ''
|
296
|
-
end
|
297
|
-
else
|
298
|
-
# Use the value that has been set for this tag:
|
299
|
-
value = @values[j]
|
300
|
-
end
|
301
|
-
element.value = value
|
302
|
-
end
|
303
|
-
end
|
304
|
-
end
|
305
|
-
# Delete elements marked for deletion:
|
306
|
-
@delete.each_key do |tag|
|
307
|
-
parent.delete(tag) if parent.exists?(tag)
|
308
|
-
end
|
309
|
-
end
|
310
|
-
# General DICOM object manipulation:
|
311
|
-
# Add a Patient Identity Removed attribute (as per
|
312
|
-
# DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 6):
|
313
|
-
dcm.add(Element.new('0012,0062', 'YES'))
|
314
|
-
# Add a De-Identification Method Code Sequence Item:
|
315
|
-
dcm.add(Sequence.new('0012,0064')) unless dcm.exists?('0012,0064')
|
316
|
-
i = dcm['0012,0064'].add_item
|
317
|
-
i.add(Element.new('0012,0063', 'De-identified by the ruby-dicom Anonymizer'))
|
318
|
-
# FIXME: At some point we should add a set of de-indentification method codes, as per
|
319
|
-
# DICOM PS 3.16 CID 7050 which corresponds to the settings chosen for the anonymizer.
|
320
|
-
# Delete the old File Meta Information group (as per
|
321
|
-
# DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 7):
|
322
|
-
dcm.delete_group('0002')
|
323
|
-
# Handle UIDs if requested:
|
324
|
-
replace_uids(parents) if @uid
|
325
|
-
# Delete private tags if indicated:
|
326
|
-
dcm.delete_private if @delete_private
|
327
|
-
end
|
328
|
-
|
329
|
-
# Gives the value to be used for the audit trail, which is either
|
330
|
-
# the original value itself, or an encrypted string based on it.
|
331
|
-
#
|
332
|
-
# @param [String, Integer, Float] original the original value of the tag to be anonymized
|
333
|
-
# @return [String, Integer, Float] with encryption, a hash string is returned, otherwise the original value
|
334
|
-
#
|
335
|
-
def at_value(original)
|
336
|
-
@encryption ? @encryption.hexdigest(original) : original
|
337
|
-
end
|
338
|
-
|
339
|
-
# Creates a hash that is used for storing information that is used when enumeration is selected.
|
340
|
-
#
|
341
|
-
def create_enum_hash
|
342
|
-
@enumerations.each_index do |i|
|
343
|
-
@enum_old_hash[@tags[i]] = Array.new
|
344
|
-
@enum_new_hash[@tags[i]] = Array.new
|
345
|
-
end
|
346
|
-
end
|
347
|
-
|
348
|
-
# Determines a default value to use for anonymizing the given tag.
|
349
|
-
#
|
350
|
-
# @param [String] tag a data element tag
|
351
|
-
# @return [String, Integer, Float] the default replacement value for a given tag
|
352
|
-
#
|
353
|
-
def default_value(tag)
|
354
|
-
name, vr = LIBRARY.name_and_vr(tag)
|
355
|
-
conversion = VALUE_CONVERSION[vr]
|
356
|
-
case conversion
|
357
|
-
when :to_i then return 0
|
358
|
-
when :to_f then return 0.0
|
359
|
-
else
|
360
|
-
# Assume type is string and return an empty string:
|
361
|
-
return ''
|
362
|
-
end
|
363
|
-
end
|
364
|
-
|
365
|
-
# Creates a write path for the given DICOM object, based on the object's
|
366
|
-
# original file path and the write_path attribute.
|
367
|
-
#
|
368
|
-
# @param [DObject] dcm a DICOM object
|
369
|
-
# @return [String] the destination directory path
|
370
|
-
#
|
371
|
-
def destination(dcm)
|
372
|
-
#
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
#
|
398
|
-
#
|
399
|
-
#
|
400
|
-
#
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
#
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
#
|
435
|
-
#
|
436
|
-
#
|
437
|
-
#
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
@
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
#
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
logger.
|
507
|
-
|
508
|
-
logger.level
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
dicom
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
#
|
521
|
-
#
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
'
|
563
|
-
|
564
|
-
'
|
565
|
-
'
|
566
|
-
#
|
567
|
-
'
|
568
|
-
'
|
569
|
-
'
|
570
|
-
'0004,
|
571
|
-
|
572
|
-
'
|
573
|
-
'
|
574
|
-
'
|
575
|
-
'
|
576
|
-
'
|
577
|
-
'0008,
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
['0008,
|
588
|
-
['0008,
|
589
|
-
['0008,
|
590
|
-
['0008,
|
591
|
-
['0008,
|
592
|
-
['0008,
|
593
|
-
['0008,
|
594
|
-
['0008,
|
595
|
-
['0008,
|
596
|
-
['0008,
|
597
|
-
['0008,
|
598
|
-
['0008,
|
599
|
-
['
|
600
|
-
['
|
601
|
-
['
|
602
|
-
['
|
603
|
-
['
|
604
|
-
['
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
end
|
1
|
+
module DICOM
|
2
|
+
|
3
|
+
# This is a convenience class for handling the anonymization
|
4
|
+
# (de-identification) of DICOM files.
|
5
|
+
#
|
6
|
+
# @note
|
7
|
+
# For a thorough introduction to the concept of DICOM anonymization,
|
8
|
+
# please refer to The DICOM Standard, Part 15: Security and System
|
9
|
+
# Management Profiles, Annex E: Attribute Confidentiality Profiles.
|
10
|
+
# For guidance on settings for individual data elements, please
|
11
|
+
# refer to DICOM PS 3.15, Annex E, Table E.1-1: Application Level
|
12
|
+
# Confidentiality Profile Attributes.
|
13
|
+
#
|
14
|
+
class Anonymizer
|
15
|
+
include Logging
|
16
|
+
|
17
|
+
# An AuditTrail instance used for this anonymization (if specified).
|
18
|
+
attr_reader :audit_trail
|
19
|
+
# The file name used for the AuditTrail serialization (if specified).
|
20
|
+
attr_reader :audit_trail_file
|
21
|
+
# A boolean that if set as true will cause all anonymized tags to be blank instead of get some generic value.
|
22
|
+
attr_accessor :blank
|
23
|
+
# An hash of elements (represented by tag keys) that will be deleted from the DICOM objects on anonymization.
|
24
|
+
attr_reader :delete
|
25
|
+
# A boolean that if set as true, will make the anonymization delete all private tags.
|
26
|
+
attr_accessor :delete_private
|
27
|
+
# The cryptographic hash function to be used for encrypting DICOM values recorded in an audit trail file.
|
28
|
+
attr_reader :encryption
|
29
|
+
# A boolean that if set as true will cause all anonymized tags to be get enumerated values, to enable post-anonymization re-identification by the user.
|
30
|
+
attr_accessor :enumeration
|
31
|
+
# The logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL).
|
32
|
+
attr_reader :logger_level
|
33
|
+
# A boolean that if set as true will cause all anonymized files to be written with random file names (if write_path has been specified).
|
34
|
+
attr_accessor :random_file_name
|
35
|
+
# A boolean that if set as true, will cause the anonymization to run on all levels of the DICOM file tag hierarchy.
|
36
|
+
attr_accessor :recursive
|
37
|
+
# A boolean indicating whether or not UIDs shall be replaced when executing the anonymization.
|
38
|
+
attr_accessor :uid
|
39
|
+
# The DICOM UID root to use when generating new UIDs.
|
40
|
+
attr_accessor :uid_root
|
41
|
+
# The path where the anonymized files will be saved. If this value is not set, the original DICOM files will be overwritten.
|
42
|
+
attr_accessor :write_path
|
43
|
+
|
44
|
+
# Creates an Anonymizer instance.
|
45
|
+
#
|
46
|
+
# @note To customize logging behaviour, refer to the Logging module documentation.
|
47
|
+
# @param [Hash] options the options to create an anonymizer instance with
|
48
|
+
# @option options [String] :audit_trail a file name path (if the file contains old audit data, these are loaded and used in the current anonymization)
|
49
|
+
# @option options [Boolean] :blank toggles whether to set the values of anonymized elements as empty instead of some generic value
|
50
|
+
# @option options [Boolean] :delete_private toggles whether private elements are to be deleted
|
51
|
+
# @option options [TrueClass, Digest::Class] :encryption if set as true, the default hash function (MD5) will be used for representing DICOM values in an audit file. Otherwise a Digest class can be given, e.g. Digest::SHA256
|
52
|
+
# @option options [Boolean] :enumeration toggles whether (some) elements get enumerated values (to enable post-anonymization re-identification)
|
53
|
+
# @option options [Fixnum] :logger_level the logger level which is applied to DObject operations during anonymization (defaults to Logger::FATAL)
|
54
|
+
# @option options [Boolean] :random_file_name toggles whether anonymized files will be given random file names when rewritten (in combination with the :write_path option)
|
55
|
+
# @option options [Boolean] :recursive toggles whether to anonymize on all sub-levels of the DICOM object tag hierarchies
|
56
|
+
# @option options [Boolean] :uid toggles whether UIDs will be replaced with custom generated UIDs (beware that to preserve UID relations in studies/series, the audit_trail feature must be used)
|
57
|
+
# @option options [String] :uid_root an organization (or custom) UID root to use when replacing UIDs
|
58
|
+
# @option options [String] :write_path a directory where the anonymized files are re-written (if not specified, files are overwritten)
|
59
|
+
# @example Create an Anonymizer instance and increase the log output
|
60
|
+
# a = Anonymizer.new
|
61
|
+
# a.logger.level = Logger::INFO
|
62
|
+
# @example Perform anonymization using the audit trail feature
|
63
|
+
# a = Anonymizer.new(:audit_trail => 'trail.json')
|
64
|
+
# a.enumeration = true
|
65
|
+
# a.write_path = '//anonymized/'
|
66
|
+
# a.anonymize('//dicom/today/')
|
67
|
+
#
|
68
|
+
def initialize(options={})
|
69
|
+
# Transfer options to attributes:
|
70
|
+
@blank = options[:blank]
|
71
|
+
@delete_private = options[:delete_private]
|
72
|
+
@enumeration = options[:enumeration]
|
73
|
+
@logger_level = options[:logger_level] || Logger::FATAL
|
74
|
+
@random_file_name = options[:random_file_name]
|
75
|
+
@recursive = options[:recursive]
|
76
|
+
@uid = options[:uid]
|
77
|
+
@uid_root = options[:uid_root] ? options[:uid_root] : UID_ROOT
|
78
|
+
@write_path = options[:write_path]
|
79
|
+
# Array of folders to be processed for anonymization:
|
80
|
+
@folders = Array.new
|
81
|
+
# Folders that will be skipped:
|
82
|
+
@exceptions = Array.new
|
83
|
+
# Data elements which will be anonymized (the array will hold a list of tag strings):
|
84
|
+
@tags = Array.new
|
85
|
+
# Default values to use on anonymized data elements:
|
86
|
+
@values = Array.new
|
87
|
+
# Which data elements will have enumeration applied, if requested by the user:
|
88
|
+
@enumerations = Array.new
|
89
|
+
# We use a Hash to store information from DICOM files if enumeration is desired:
|
90
|
+
@enum_old_hash = Hash.new
|
91
|
+
@enum_new_hash = Hash.new
|
92
|
+
# All the files to be anonymized will be put in this array:
|
93
|
+
@files = Array.new
|
94
|
+
@prefixes = Hash.new
|
95
|
+
# Setup audit trail if requested:
|
96
|
+
if options[:audit_trail]
|
97
|
+
@audit_trail_file = options[:audit_trail]
|
98
|
+
if File.exists?(@audit_trail_file) && File.size(@audit_trail_file) > 2
|
99
|
+
# Load the pre-existing audit trail from file:
|
100
|
+
@audit_trail = AuditTrail.read(@audit_trail_file)
|
101
|
+
else
|
102
|
+
# Start from scratch with an empty audit trail:
|
103
|
+
@audit_trail = AuditTrail.new
|
104
|
+
end
|
105
|
+
# Set up encryption if indicated:
|
106
|
+
if options[:encryption]
|
107
|
+
require 'digest'
|
108
|
+
if options[:encryption].respond_to?(:hexdigest)
|
109
|
+
@encryption = options[:encryption]
|
110
|
+
else
|
111
|
+
@encryption = Digest::MD5
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
# Set the default data elements to be anonymized:
|
116
|
+
set_defaults
|
117
|
+
end
|
118
|
+
|
119
|
+
# Checks for equality.
|
120
|
+
#
|
121
|
+
# Other and self are considered equivalent if they are
|
122
|
+
# of compatible types and their attributes are equivalent.
|
123
|
+
#
|
124
|
+
# @param other an object to be compared with self.
|
125
|
+
# @return [Boolean] true if self and other are considered equivalent
|
126
|
+
#
|
127
|
+
def ==(other)
|
128
|
+
if other.respond_to?(:to_anonymizer)
|
129
|
+
other.send(:state) == state
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
alias_method :eql?, :==
|
134
|
+
|
135
|
+
# Anonymizes the given DICOM data with the settings of this Anonymizer instance.
|
136
|
+
#
|
137
|
+
# @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
|
138
|
+
# @return [Array<DObject>] an array of the anonymized DICOM objects
|
139
|
+
#
|
140
|
+
def anonymize(data)
|
141
|
+
dicom = prepare(data)
|
142
|
+
if @tags.length > 0
|
143
|
+
dicom.each do |dcm|
|
144
|
+
anonymize_dcm(dcm)
|
145
|
+
# Write DICOM object to file unless it was passed to the anonymizer as an object:
|
146
|
+
write(dcm) unless dcm.was_dcm_on_input
|
147
|
+
end
|
148
|
+
else
|
149
|
+
logger.warn("No tags have been selected for anonymization. Aborting anonymization.")
|
150
|
+
end
|
151
|
+
# Reset the ruby-dicom log threshold to its original level:
|
152
|
+
logger.level = @original_level
|
153
|
+
# Save the audit trail (if used):
|
154
|
+
@audit_trail.write(@audit_trail_file) if @audit_trail
|
155
|
+
logger.info("Anonymization complete.")
|
156
|
+
dicom
|
157
|
+
end
|
158
|
+
|
159
|
+
# Specifies that the given tag is to be completely deleted
|
160
|
+
# from the anonymized DICOM objects.
|
161
|
+
#
|
162
|
+
# @param [String] tag a data element tag
|
163
|
+
# @example Completely delete the Patient's Name tag from the DICOM files
|
164
|
+
# a.delete_tag('0010,0010')
|
165
|
+
#
|
166
|
+
def delete_tag(tag)
|
167
|
+
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
168
|
+
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
169
|
+
@delete[tag] = true
|
170
|
+
end
|
171
|
+
|
172
|
+
# Checks the enumeration status of this tag.
|
173
|
+
#
|
174
|
+
# @param [String] tag a data element tag
|
175
|
+
# @return [Boolean, NilClass] the enumeration status of the tag, or nil if the tag has no match
|
176
|
+
#
|
177
|
+
def enum(tag)
|
178
|
+
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
179
|
+
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
180
|
+
pos = @tags.index(tag)
|
181
|
+
if pos
|
182
|
+
return @enumerations[pos]
|
183
|
+
else
|
184
|
+
logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
|
185
|
+
return nil
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
# Computes a hash code for this object.
|
190
|
+
#
|
191
|
+
# @note Two objects with the same attributes will have the same hash code.
|
192
|
+
#
|
193
|
+
# @return [Fixnum] the object's hash code
|
194
|
+
#
|
195
|
+
def hash
|
196
|
+
state.hash
|
197
|
+
end
|
198
|
+
|
199
|
+
# Removes a tag from the list of tags that will be anonymized.
|
200
|
+
#
|
201
|
+
# @param [String] tag a data element tag
|
202
|
+
# @example Do not anonymize the Patient's Name tag
|
203
|
+
# a.remove_tag('0010,0010')
|
204
|
+
#
|
205
|
+
def remove_tag(tag)
|
206
|
+
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
207
|
+
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
208
|
+
pos = @tags.index(tag)
|
209
|
+
if pos
|
210
|
+
@tags.delete_at(pos)
|
211
|
+
@values.delete_at(pos)
|
212
|
+
@enumerations.delete_at(pos)
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
# Sets the anonymization settings for the specified tag. If the tag is already present in the list
|
217
|
+
# of tags to be anonymized, its settings are updated, and if not, a new tag entry is created.
|
218
|
+
#
|
219
|
+
# @param [String] tag a data element tag
|
220
|
+
# @param [Hash] options the anonymization settings for the specified tag
|
221
|
+
# @option options [String, Integer, Float] :value the replacement value to be used when anonymizing this data element. Defaults to the pre-existing value and '' for new tags.
|
222
|
+
# @option options [String, Integer, Float] :enum specifies if enumeration is to be used for this tag. Defaults to the pre-existing value and false for new tags.
|
223
|
+
# @example Set the anonymization settings of the Patient's Name tag
|
224
|
+
# a.set_tag('0010,0010', :value => 'MrAnonymous', :enum => true)
|
225
|
+
#
|
226
|
+
def set_tag(tag, options={})
|
227
|
+
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
228
|
+
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
229
|
+
pos = @tags.index(tag)
|
230
|
+
if pos
|
231
|
+
# Update existing values:
|
232
|
+
@values[pos] = options[:value] if options[:value]
|
233
|
+
@enumerations[pos] = options[:enum] if options[:enum] != nil
|
234
|
+
else
|
235
|
+
# Add new elements:
|
236
|
+
@tags << tag
|
237
|
+
@values << (options[:value] ? options[:value] : default_value(tag))
|
238
|
+
@enumerations << (options[:enum] ? options[:enum] : false)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
# Returns self.
|
243
|
+
#
|
244
|
+
# @return [Anonymizer] self
|
245
|
+
#
|
246
|
+
def to_anonymizer
|
247
|
+
self
|
248
|
+
end
|
249
|
+
|
250
|
+
# Gives the value which will be used when anonymizing this tag.
|
251
|
+
#
|
252
|
+
# @note If enumeration is selected for a string type tag, a number will be
|
253
|
+
# appended in addition to the string that is returned here.
|
254
|
+
#
|
255
|
+
# @param [String] tag a data element tag
|
256
|
+
# @return [String, Integer, Float, NilClass] the replacement value for the specified tag, or nil if the tag is not matched
|
257
|
+
#
|
258
|
+
def value(tag)
|
259
|
+
raise ArgumentError, "Expected String, got #{tag.class}." unless tag.is_a?(String)
|
260
|
+
raise ArgumentError, "Expected a valid tag of format 'GGGG,EEEE', got #{tag}." unless tag.tag?
|
261
|
+
pos = @tags.index(tag)
|
262
|
+
if pos
|
263
|
+
return @values[pos]
|
264
|
+
else
|
265
|
+
logger.warn("The specified tag (#{tag}) was not found in the list of tags to be anonymized.")
|
266
|
+
return nil
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
|
271
|
+
private
|
272
|
+
|
273
|
+
|
274
|
+
# Performs anonymization on a DICOM object.
|
275
|
+
#
|
276
|
+
# @param [DObject] dcm a DICOM object
|
277
|
+
#
|
278
|
+
def anonymize_dcm(dcm)
|
279
|
+
# Extract the data element parents to investigate:
|
280
|
+
parents = element_parents(dcm)
|
281
|
+
parents.each do |parent|
|
282
|
+
# Anonymize the desired tags:
|
283
|
+
@tags.each_index do |j|
|
284
|
+
if parent.exists?(@tags[j])
|
285
|
+
element = parent[@tags[j]]
|
286
|
+
if element.is_a?(Element)
|
287
|
+
if @blank
|
288
|
+
value = ''
|
289
|
+
elsif @enumeration
|
290
|
+
old_value = element.value
|
291
|
+
# Only launch enumeration logic if there is an actual value to the data element:
|
292
|
+
if old_value
|
293
|
+
value = enumerated_value(old_value, j)
|
294
|
+
else
|
295
|
+
value = ''
|
296
|
+
end
|
297
|
+
else
|
298
|
+
# Use the value that has been set for this tag:
|
299
|
+
value = @values[j]
|
300
|
+
end
|
301
|
+
element.value = value
|
302
|
+
end
|
303
|
+
end
|
304
|
+
end
|
305
|
+
# Delete elements marked for deletion:
|
306
|
+
@delete.each_key do |tag|
|
307
|
+
parent.delete(tag) if parent.exists?(tag)
|
308
|
+
end
|
309
|
+
end
|
310
|
+
# General DICOM object manipulation:
|
311
|
+
# Add a Patient Identity Removed attribute (as per
|
312
|
+
# DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 6):
|
313
|
+
dcm.add(Element.new('0012,0062', 'YES'))
|
314
|
+
# Add a De-Identification Method Code Sequence Item:
|
315
|
+
dcm.add(Sequence.new('0012,0064')) unless dcm.exists?('0012,0064')
|
316
|
+
i = dcm['0012,0064'].add_item
|
317
|
+
i.add(Element.new('0012,0063', 'De-identified by the ruby-dicom Anonymizer'))
|
318
|
+
# FIXME: At some point we should add a set of de-indentification method codes, as per
|
319
|
+
# DICOM PS 3.16 CID 7050 which corresponds to the settings chosen for the anonymizer.
|
320
|
+
# Delete the old File Meta Information group (as per
|
321
|
+
# DICOM PS 3.15, Annex E, E.1.1 De-Identifier, point 7):
|
322
|
+
dcm.delete_group('0002')
|
323
|
+
# Handle UIDs if requested:
|
324
|
+
replace_uids(parents) if @uid
|
325
|
+
# Delete private tags if indicated:
|
326
|
+
dcm.delete_private if @delete_private
|
327
|
+
end
|
328
|
+
|
329
|
+
# Gives the value to be used for the audit trail, which is either
|
330
|
+
# the original value itself, or an encrypted string based on it.
|
331
|
+
#
|
332
|
+
# @param [String, Integer, Float] original the original value of the tag to be anonymized
|
333
|
+
# @return [String, Integer, Float] with encryption, a hash string is returned, otherwise the original value
|
334
|
+
#
|
335
|
+
def at_value(original)
|
336
|
+
@encryption ? @encryption.hexdigest(original) : original
|
337
|
+
end
|
338
|
+
|
339
|
+
# Creates a hash that is used for storing information that is used when enumeration is selected.
|
340
|
+
#
|
341
|
+
def create_enum_hash
|
342
|
+
@enumerations.each_index do |i|
|
343
|
+
@enum_old_hash[@tags[i]] = Array.new
|
344
|
+
@enum_new_hash[@tags[i]] = Array.new
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
# Determines a default value to use for anonymizing the given tag.
|
349
|
+
#
|
350
|
+
# @param [String] tag a data element tag
|
351
|
+
# @return [String, Integer, Float] the default replacement value for a given tag
|
352
|
+
#
|
353
|
+
def default_value(tag)
|
354
|
+
name, vr = LIBRARY.name_and_vr(tag)
|
355
|
+
conversion = VALUE_CONVERSION[vr]
|
356
|
+
case conversion
|
357
|
+
when :to_i then return 0
|
358
|
+
when :to_f then return 0.0
|
359
|
+
else
|
360
|
+
# Assume type is string and return an empty string:
|
361
|
+
return ''
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
# Creates a write path for the given DICOM object, based on the object's
|
366
|
+
# original file path and the write_path attribute.
|
367
|
+
#
|
368
|
+
# @param [DObject] dcm a DICOM object
|
369
|
+
# @return [String] the destination directory path
|
370
|
+
#
|
371
|
+
def destination(dcm)
|
372
|
+
# Separate the path from the source file string:
|
373
|
+
file_start = dcm.source.rindex(File.basename(dcm.source))
|
374
|
+
if file_start == 0
|
375
|
+
source_dir = "."
|
376
|
+
else
|
377
|
+
source_dir = dcm.source[0..(file_start-1)]
|
378
|
+
end
|
379
|
+
source_folders = source_dir.split(File::SEPARATOR)
|
380
|
+
target_folders = @write_path.split(File::SEPARATOR)
|
381
|
+
# If the first element is the current dir symbol, get rid of it:
|
382
|
+
source_folders.delete('.')
|
383
|
+
# Check for equalness of folder names in a range limited by the shortest array:
|
384
|
+
common_length = [source_folders.length, target_folders.length].min
|
385
|
+
uncommon_index = nil
|
386
|
+
common_length.times do |i|
|
387
|
+
if target_folders[i] != source_folders[i]
|
388
|
+
uncommon_index = i
|
389
|
+
break
|
390
|
+
end
|
391
|
+
end
|
392
|
+
# Create the output path by joining the two paths together using the determined index:
|
393
|
+
append_path = uncommon_index ? source_folders[uncommon_index..-1] : nil
|
394
|
+
[target_folders, append_path].compact.join(File::SEPARATOR)
|
395
|
+
end
|
396
|
+
|
397
|
+
# Extracts all parents from a DObject instance which potentially
|
398
|
+
# have child (data) elements. This typically means the DObject
|
399
|
+
# instance itself as well as items (i.e. not sequences).
|
400
|
+
# Note that unless the @recursive attribute has been set,
|
401
|
+
# this method will only return the DObject (placed inside an array).
|
402
|
+
#
|
403
|
+
# @param [DObject] dcm a DICOM object
|
404
|
+
# @return [Array<DObject, Item>] an array containing either just a DObject or also all parental child items within the tag hierarchy
|
405
|
+
#
|
406
|
+
def element_parents(dcm)
|
407
|
+
parents = Array.new
|
408
|
+
parents << dcm
|
409
|
+
if @recursive
|
410
|
+
dcm.sequences.each do |s|
|
411
|
+
parents += element_parents_recursive(s)
|
412
|
+
end
|
413
|
+
end
|
414
|
+
parents
|
415
|
+
end
|
416
|
+
|
417
|
+
# Recursively extracts all item parents from a sequence instance (including
|
418
|
+
# any sub-sequences) which actually contain child (data) elements.
|
419
|
+
#
|
420
|
+
# @param [Sequence] sequence a Sequence instance
|
421
|
+
# @return [Array<Item>] an array containing items within the tag hierarchy that contains child elements
|
422
|
+
#
|
423
|
+
def element_parents_recursive(sequence)
|
424
|
+
parents = Array.new
|
425
|
+
sequence.items.each do |i|
|
426
|
+
parents << i if i.elements?
|
427
|
+
i.sequences.each do |s|
|
428
|
+
parents += element_parents_recursive(s)
|
429
|
+
end
|
430
|
+
end
|
431
|
+
parents
|
432
|
+
end
|
433
|
+
|
434
|
+
# Handles the enumeration for the given data element tag.
|
435
|
+
# If its value has been encountered before, its corresponding enumerated
|
436
|
+
# replacement value is retrieved, and if a new original value is encountered,
|
437
|
+
# a new enumerated replacement value is found by increasing an index by 1.
|
438
|
+
#
|
439
|
+
# @param [String, Integer, Float] original the original value of the tag to be anonymized
|
440
|
+
# @param [Fixnum] j the index of this tag in the tag-related instance arrays
|
441
|
+
# @return [String, Integer, Float] the replacement value which is used for the anonymization of the tag
|
442
|
+
#
|
443
|
+
def enumerated_value(original, j)
|
444
|
+
# Is enumeration requested for this tag?
|
445
|
+
if @enumerations[j]
|
446
|
+
if @audit_trail
|
447
|
+
# Check if the UID has been encountered already:
|
448
|
+
replacement = @audit_trail.replacement(@tags[j], at_value(original))
|
449
|
+
unless replacement
|
450
|
+
# This original value has not been encountered yet. Determine the index to use.
|
451
|
+
index = @audit_trail.records(@tags[j]).length + 1
|
452
|
+
# Create the replacement value:
|
453
|
+
if @values[j].is_a?(String)
|
454
|
+
replacement = @values[j] + index.to_s
|
455
|
+
else
|
456
|
+
replacement = @values[j] + index
|
457
|
+
end
|
458
|
+
# Add this tag record to the audit trail:
|
459
|
+
@audit_trail.add_record(@tags[j], at_value(original), replacement)
|
460
|
+
end
|
461
|
+
else
|
462
|
+
# Retrieve earlier used anonymization values:
|
463
|
+
previous_old = @enum_old_hash[@tags[j]]
|
464
|
+
previous_new = @enum_new_hash[@tags[j]]
|
465
|
+
p_index = previous_old.length
|
466
|
+
if previous_old.index(original) == nil
|
467
|
+
# Current value has not been encountered before:
|
468
|
+
replacement = @values[j]+(p_index + 1).to_s
|
469
|
+
# Store value in array (and hash):
|
470
|
+
previous_old << original
|
471
|
+
previous_new << replacement
|
472
|
+
@enum_old_hash[@tags[j]] = previous_old
|
473
|
+
@enum_new_hash[@tags[j]] = previous_new
|
474
|
+
else
|
475
|
+
# Current value has been observed before:
|
476
|
+
replacement = previous_new[previous_old.index(original)]
|
477
|
+
end
|
478
|
+
end
|
479
|
+
else
|
480
|
+
replacement = @values[j]
|
481
|
+
end
|
482
|
+
return replacement
|
483
|
+
end
|
484
|
+
|
485
|
+
# Establishes a prefix for a given UID tag.
|
486
|
+
# This makes it somewhat easier to distinguish
|
487
|
+
# between different types of random generated UIDs.
|
488
|
+
#
|
489
|
+
# @param [String] tag a data element string tag
|
490
|
+
#
|
491
|
+
def prefix(tag)
|
492
|
+
if @prefixes[tag]
|
493
|
+
@prefixes[tag]
|
494
|
+
else
|
495
|
+
@prefixes[tag] = @prefixes.length + 1
|
496
|
+
@prefixes[tag]
|
497
|
+
end
|
498
|
+
end
|
499
|
+
|
500
|
+
# Prepares the data for anonymization.
|
501
|
+
#
|
502
|
+
# @param [String, DObject, Array<String, DObject>] data single or multiple DICOM data (directories, file paths, binary strings, DICOM objects)
|
503
|
+
# @return [Array] the original data (wrapped in an array) as well as an array of loaded DObject instances
|
504
|
+
#
|
505
|
+
def prepare(data)
|
506
|
+
logger.info("Loading DICOM data.")
|
507
|
+
# Temporarily adjust the ruby-dicom log threshold (usually to suppress messages from the DObject class):
|
508
|
+
@original_level = logger.level
|
509
|
+
logger.level = @logger_level
|
510
|
+
dicom = DICOM.load(data)
|
511
|
+
logger.level = @original_level
|
512
|
+
logger.info("#{dicom.length} DICOM objects have been prepared for anonymization.")
|
513
|
+
logger.level = @logger_level
|
514
|
+
# Set up enumeration if requested:
|
515
|
+
create_enum_hash if @enumeration
|
516
|
+
require 'securerandom' if @random_file_name
|
517
|
+
dicom
|
518
|
+
end
|
519
|
+
|
520
|
+
# Replaces the UIDs of the given DICOM object.
|
521
|
+
#
|
522
|
+
# @note Empty UIDs are ignored (we don't generate new UIDs for these).
|
523
|
+
# @note If AuditTrail is set, the relationship between old and new UIDs are preserved,
|
524
|
+
# and the relations between files in a study/series should remain valid.
|
525
|
+
# @param [Array<DObject, Item>] parents dicom parent objects who's child elements will be investigated
|
526
|
+
#
|
527
|
+
def replace_uids(parents)
|
528
|
+
parents.each do |parent|
|
529
|
+
parent.each_element do |element|
|
530
|
+
if element.vr == ('UI') and !@static_uids[element.tag]
|
531
|
+
original = element.value
|
532
|
+
if original && original.length > 0
|
533
|
+
# We have a UID value, go ahead and replace it:
|
534
|
+
if @audit_trail
|
535
|
+
# Check if the UID has been encountered already:
|
536
|
+
replacement = @audit_trail.replacement('uids', original)
|
537
|
+
unless replacement
|
538
|
+
# The UID has not been stored previously. Generate a new one:
|
539
|
+
replacement = DICOM.generate_uid(@uid_root, prefix(element.tag))
|
540
|
+
# Add this tag record to the audit trail:
|
541
|
+
@audit_trail.add_record('uids', original, replacement)
|
542
|
+
end
|
543
|
+
# Replace the UID in the DICOM object:
|
544
|
+
element.value = replacement
|
545
|
+
else
|
546
|
+
# We don't care about preserving UID relations. Just insert a custom UID:
|
547
|
+
element.value = DICOM.generate_uid(@uid_root, prefix(element.tag))
|
548
|
+
end
|
549
|
+
end
|
550
|
+
end
|
551
|
+
end
|
552
|
+
end
|
553
|
+
end
|
554
|
+
|
555
|
+
# Sets up some default information variables that are used by the Anonymizer.
|
556
|
+
#
|
557
|
+
def set_defaults
|
558
|
+
# Some UIDs should not be remapped even if uid anonymization has been requested:
|
559
|
+
@static_uids = {
|
560
|
+
# Private related:
|
561
|
+
'0002,0100' => true,
|
562
|
+
'0004,1432' => true,
|
563
|
+
# Coding scheme related:
|
564
|
+
'0008,010C' => true,
|
565
|
+
'0008,010D' => true,
|
566
|
+
# Transfer syntax related:
|
567
|
+
'0002,0010' => true,
|
568
|
+
'0400,0010' => true,
|
569
|
+
'0400,0510' => true,
|
570
|
+
'0004,1512' => true,
|
571
|
+
# SOP class related:
|
572
|
+
'0000,0002' => true,
|
573
|
+
'0000,0003' => true,
|
574
|
+
'0002,0002' => true,
|
575
|
+
'0004,1510' => true,
|
576
|
+
'0004,151A' => true,
|
577
|
+
'0008,0016' => true,
|
578
|
+
'0008,001A' => true,
|
579
|
+
'0008,001B' => true,
|
580
|
+
'0008,0062' => true,
|
581
|
+
'0008,1150' => true,
|
582
|
+
'0008,115A' => true
|
583
|
+
}
|
584
|
+
# Sets up default tags that will be anonymized, along with default replacement values and enumeration settings.
|
585
|
+
# This data is stored in 3 separate instance arrays for tags, values and enumeration.
|
586
|
+
data = [
|
587
|
+
['0008,0012', '20000101', false], # Instance Creation Date
|
588
|
+
['0008,0013', '000000.00', false], # Instance Creation Time
|
589
|
+
['0008,0020', '20000101', false], # Study Date
|
590
|
+
['0008,0021', '20000101', false], # Series Date
|
591
|
+
['0008,0022', '20000101', false], # Acquisition Date
|
592
|
+
['0008,0023', '20000101', false], # Image Date
|
593
|
+
['0008,0030', '000000.00', false], # Study Time
|
594
|
+
['0008,0031', '000000.00', false], # Series Time
|
595
|
+
['0008,0032', '000000.00', false], # Acquisition Time
|
596
|
+
['0008,0033', '000000.00', false], # Image Time
|
597
|
+
['0008,0050', '', true], # Accession Number
|
598
|
+
['0008,0080', 'Institution', true], # Institution name
|
599
|
+
['0008,0081', 'Address', true], # Institution Address
|
600
|
+
['0008,0090', 'Physician', true], # Referring Physician's name
|
601
|
+
['0008,1010', 'Station', true], # Station name
|
602
|
+
['0008,1040', 'Department', true], # Institutional Department name
|
603
|
+
['0008,1070', 'Operator', true], # Operator's Name
|
604
|
+
['0010,0010', 'Patient', true], # Patient's name
|
605
|
+
['0010,0020', 'ID', true], # Patient's ID
|
606
|
+
['0010,0030', '20000101', false], # Patient's Birth Date
|
607
|
+
['0010,0040', 'O', false], # Patient's Sex
|
608
|
+
['0010,1010', '', false], # Patient's Age
|
609
|
+
['0020,4000', '', false], # Image Comments
|
610
|
+
].transpose
|
611
|
+
@tags = data[0]
|
612
|
+
@values = data[1]
|
613
|
+
@enumerations = data[2]
|
614
|
+
# Tags to be deleted completely during anonymization:
|
615
|
+
@delete = Hash.new
|
616
|
+
end
|
617
|
+
|
618
|
+
# Collects the attributes of this instance.
|
619
|
+
#
|
620
|
+
# @return [Array] an array of attributes
|
621
|
+
#
|
622
|
+
def state
|
623
|
+
[
|
624
|
+
@tags, @values, @enumerations, @delete, @blank,
|
625
|
+
@delete_private, @enumeration, @logger_level,
|
626
|
+
@random_file_name, @recursive, @uid, @uid_root, @write_path
|
627
|
+
]
|
628
|
+
end
|
629
|
+
|
630
|
+
# Writes a DICOM object to file.
|
631
|
+
#
|
632
|
+
# @param [DObject] dcm a DICOM object
|
633
|
+
#
|
634
|
+
def write(dcm)
|
635
|
+
if @write_path
|
636
|
+
# The DICOM object is to be written to a separate directory. If the
|
637
|
+
# original and the new directories have a common root, this is taken into
|
638
|
+
# consideration when determining the object's write path:
|
639
|
+
path = destination(dcm)
|
640
|
+
if @random_file_name
|
641
|
+
file_name = "#{SecureRandom.hex(16)}.dcm"
|
642
|
+
else
|
643
|
+
file_name = File.basename(dcm.source)
|
644
|
+
end
|
645
|
+
dcm.write(File.join(path, file_name))
|
646
|
+
else
|
647
|
+
# The original DICOM file is overwritten with the anonymized DICOM object:
|
648
|
+
dcm.write(dcm.source)
|
649
|
+
end
|
650
|
+
end
|
651
|
+
|
652
|
+
end
|
653
|
+
|
654
|
+
end
|