deidentify 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9eca693cc144bd94344c12561d0728028b6ce0ccb840cb5beeb71239a8345d5c
4
+ data.tar.gz: 27f33aa54b6f9a017b68da0b0bdbbf4c53a5bd7f1b6739742c235d008032461f
5
+ SHA512:
6
+ metadata.gz: 98cff4920b7254d44a6df1aa8c3d5802753d74244707b05bbd625f8af18aefa09c5cd3f25c70a6f6174be293f095a647d1534e64eeb5bf73765458f465c743fc
7
+ data.tar.gz: 529987f6564b6dd8b52cdbfa01a1b6432c3d431c8482621c16baef665cbaef6fd79455022a400e141ad4842fde8d1ec31a1a0a715623199ac51a6fa01a8f6839
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class BaseHash
5
+ def self.call(old_value, length: nil)
6
+ return old_value unless old_value.present?
7
+
8
+ salt = Deidentify.configuration.salt
9
+
10
+ raise Deidentify::Error, 'You must specify the salting value in the configuration' if salt.blank?
11
+
12
+ hash = Digest::SHA256.hexdigest(old_value + salt)
13
+
14
+ hash = hash[0, length] if length.present? && length < hash.length
15
+
16
+ hash
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class Configuration
5
+ attr_accessor :salt, :scope
6
+
7
+ def initialize
8
+ @salt = nil
9
+ @scope = scope
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class Delete
5
+ def self.call(_old_value)
6
+ nil
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class DelocalizeIp
5
+ def self.call(old_ip, mask_length: nil)
6
+ return old_ip unless old_ip.present?
7
+
8
+ addr = IPAddr.new(old_ip)
9
+ addr.mask(mask_length || default_mask(addr)).to_s
10
+ end
11
+
12
+ def self.default_mask(addr)
13
+ addr.ipv4? ? 24 : 48
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class Error < StandardError
5
+ end
6
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class HashEmail
5
+ # 63 is the longest domain that is still acceptable for URI::MailTo::EMAILS_REGEXP
6
+ MAX_DOMAIN_LENGTH = 63
7
+
8
+ def self.call(old_email, length: 255)
9
+ return old_email unless old_email.present?
10
+
11
+ half_length = (length - 1) / 2 # the -1 is to account for the @ symbol
12
+
13
+ name, domain = old_email.split('@')
14
+
15
+ hashed_name = Deidentify::BaseHash.call(name, length: half_length)
16
+ hashed_domain = Deidentify::BaseHash.call(domain, length: [half_length, MAX_DOMAIN_LENGTH].min)
17
+
18
+ "#{hashed_name}@#{hashed_domain}"
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class HashUrl
5
+ def self.call(old_url, length: 255)
6
+ return old_url unless old_url.present?
7
+
8
+ uri = URI.parse(old_url)
9
+ uri = URI.parse("http://#{old_url}") if uri.scheme.nil?
10
+
11
+ hash_length = calculate_hash_length(uri, length)
12
+
13
+ hash_host(uri, hash_length)
14
+ hash_path(uri, hash_length)
15
+ hash_query(uri, hash_length)
16
+ hash_fragment(uri, hash_length)
17
+
18
+ uri.to_s
19
+ end
20
+
21
+ def self.calculate_hash_length(uri, length)
22
+ number_of_hashes = [uri.host, uri.path, uri.query, uri.fragment].reject(&:blank?).size
23
+
24
+ (length - 'https:///?#'.length) / number_of_hashes
25
+ end
26
+
27
+ def self.hash_host(uri, hash_length)
28
+ uri.host = Deidentify::BaseHash.call(uri.host, length: hash_length)
29
+ end
30
+
31
+ def self.hash_path(uri, hash_length)
32
+ uri.path = "/#{Deidentify::BaseHash.call(remove_slash(uri.path), length: hash_length)}" if uri.path.present?
33
+ end
34
+
35
+ def self.remove_slash(path)
36
+ path[1..]
37
+ end
38
+
39
+ def self.hash_query(uri, hash_length)
40
+ uri.query = Deidentify::BaseHash.call(uri.query, length: hash_length) if uri.query.present?
41
+ end
42
+
43
+ def self.hash_fragment(uri, hash_length)
44
+ uri.fragment = Deidentify::BaseHash.call(uri.fragment, length: hash_length) if uri.fragment.present?
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class Keep
5
+ def self.call(old_value)
6
+ old_value
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify
4
+ class Replace
5
+ def self.call(old_value, new_value:, keep_nil: true)
6
+ return old_value if old_value.blank? && keep_nil
7
+
8
+ new_value
9
+ end
10
+ end
11
+ end
data/lib/deidentify.rb ADDED
@@ -0,0 +1,177 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'deidentify/configuration'
4
+ require 'deidentify/replace'
5
+ require 'deidentify/delete'
6
+ require 'deidentify/base_hash'
7
+ require 'deidentify/hash_email'
8
+ require 'deidentify/hash_url'
9
+ require 'deidentify/delocalize_ip'
10
+ require 'deidentify/keep'
11
+ require 'deidentify/error'
12
+
13
+ module Deidentify
14
+ class << self
15
+ def configuration
16
+ @configuration ||= Configuration.new
17
+ end
18
+
19
+ def configure
20
+ yield(configuration)
21
+ end
22
+ end
23
+
24
+ extend ::ActiveSupport::Concern
25
+
26
+ POLICY_MAP = {
27
+ replace: Deidentify::Replace,
28
+ delete: Deidentify::Delete,
29
+ hash: Deidentify::BaseHash,
30
+ hash_email: Deidentify::HashEmail,
31
+ hash_url: Deidentify::HashUrl,
32
+ keep: Deidentify::Keep,
33
+ delocalize_ip: Deidentify::DelocalizeIp
34
+ }.freeze
35
+
36
+ included do
37
+ class_attribute :deidentify_configuration
38
+ self.deidentify_configuration = {}
39
+
40
+ class_attribute :associations_to_deidentify
41
+ self.associations_to_deidentify = []
42
+
43
+ define_model_callbacks :deidentify
44
+ after_deidentify :deidentify_associations!, if: -> { associations_to_deidentify.present? }
45
+ end
46
+
47
+ module ClassMethods
48
+ def deidentify(column, method:, **options)
49
+ unless POLICY_MAP.keys.include?(method) || method.respond_to?(:call)
50
+ raise Deidentify::Error, 'you must specify a valid deidentification method'
51
+ end
52
+
53
+ deidentify_configuration[column] = [method, options]
54
+ end
55
+
56
+ def deidentify_associations(*associations)
57
+ self.associations_to_deidentify += associations
58
+ end
59
+ end
60
+
61
+ def deidentify!(validate: true)
62
+ scope = Deidentify.configuration.scope
63
+ return self if scope && scope.call(self.class).find_by(id: id).nil?
64
+
65
+ recursive_deidentify!(validate: validate, deidentified_objects: [])
66
+ end
67
+
68
+ def deidentify_attributes
69
+ deidentify_configuration.each_pair do |col, config|
70
+ deidentify_column(col, config)
71
+ end
72
+ end
73
+
74
+ protected
75
+
76
+ def recursive_deidentify!(validate:, deidentified_objects:)
77
+ @validate = validate
78
+ @deidentified_objects = deidentified_objects
79
+
80
+ return if @deidentified_objects.include?(self)
81
+
82
+ ActiveRecord::Base.transaction do
83
+ run_callbacks(:deidentify) do
84
+ deidentify_attributes
85
+
86
+ write_attribute(:deidentified_at, Time.current) if respond_to?(:deidentified_at)
87
+
88
+ @deidentified_objects << self
89
+
90
+ save!(validate: validate)
91
+ end
92
+ end
93
+ end
94
+
95
+ private
96
+
97
+ def deidentify_column(column, config)
98
+ policy, options = Array(config)
99
+ old_value = read_attribute(column)
100
+
101
+ new_value = if policy.respond_to? :call
102
+ policy.call(self)
103
+ else
104
+ POLICY_MAP[policy].call(old_value, **options)
105
+ end
106
+
107
+ write_attribute(column, new_value)
108
+ end
109
+
110
+ def deidentify_associations!
111
+ associations_to_deidentify.each do |association_name|
112
+ association = self.class.reflect_on_association(association_name)
113
+
114
+ if association.nil?
115
+ raise Deidentify::Error, "undefined association #{association_name} in #{self.class.name} deidentification"
116
+ end
117
+
118
+ scope = Deidentify.configuration.scope
119
+ if scope
120
+ deidentify_associations_with_scope!(association_name, association, scope)
121
+ else
122
+ deidentify_associations_without_scope!(association_name, association)
123
+ end
124
+ end
125
+ end
126
+
127
+ def deidentify_associations_without_scope!(association_name, association)
128
+ if association.collection?
129
+ deidentify_many!(send(association_name))
130
+ else
131
+ deidentify_one!(send(association_name))
132
+ end
133
+ end
134
+
135
+ def deidentify_associations_with_scope!(association_name, association, configuration_scope)
136
+ if association.collection?
137
+ # eg. has_many :bubbles, -> { popped }
138
+ # This will call configuration_scope.call(self.bubbles).merge(popped)
139
+ class_query = class_query(association.scope, configuration_scope, send(association_name))
140
+
141
+ deidentify_many!(class_query)
142
+ else
143
+ class_query = class_query(association.scope, configuration_scope, association.klass)
144
+
145
+ if association.has_one?
146
+ # eg. (bubble) has_one :party, -> { birthday }
147
+ # This will call configuration_scope.call(Party).merge(birthday).find_by(bubble_id: id)
148
+ deidentify_one!(class_query.find_by("#{association.foreign_key} = #{send(:id)}"))
149
+ else
150
+ # eg. belongs_to :party, -> { birthday }
151
+ # This will call configuration_scope.call(Party).merge(birthday).find_by(id: party_id)
152
+ deidentify_one!(class_query.find_by(id: send(association.foreign_key)))
153
+ end
154
+ end
155
+ end
156
+
157
+ def class_query(association_scope, configuration_scope, klass_or_association)
158
+ if association_scope.nil?
159
+ configuration_scope.call(klass_or_association)
160
+ else
161
+ # Use both the configuration scope and the scope from the association.
162
+ # Unfortunately the order here matters so something in the association_scope
163
+ # will take precedence over the configuration scope.
164
+ configuration_scope.call(klass_or_association).merge(association_scope)
165
+ end
166
+ end
167
+
168
+ def deidentify_many!(records)
169
+ records.each do |record|
170
+ record.recursive_deidentify!(validate: @validate, deidentified_objects: @deidentified_objects)
171
+ end
172
+ end
173
+
174
+ def deidentify_one!(record)
175
+ record&.recursive_deidentify!(validate: @validate, deidentified_objects: @deidentified_objects)
176
+ end
177
+ end
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rails/generators'
4
+ module Deidentify
5
+ module Generators
6
+ class ConfigureForGenerator < Rails::Generators::Base
7
+ source_root File.expand_path('../templates', __dir__)
8
+
9
+ argument :model, type: :string, banner: 'model name'
10
+ class_option :file_path, type: :string, default: ''
11
+
12
+ def call
13
+ template 'module_template.rb', File.join(module_path, "#{klass.underscore}_policy.rb")
14
+
15
+ insert_into_file(
16
+ model_path,
17
+ "\n include Deidentify::#{namespace_model}Policy",
18
+ after: "#{klass} < ApplicationRecord"
19
+ )
20
+ end
21
+
22
+ private
23
+
24
+ def namespace_model
25
+ if file_path.present?
26
+ file_path.split('/').map(&:camelcase).join('::')
27
+ else
28
+ model
29
+ end
30
+ end
31
+
32
+ def model_path
33
+ path = if file_path.present?
34
+ file_path
35
+ else
36
+ full_path.map(&:underscore).join('/')
37
+ end
38
+
39
+ "app/models/#{path}.rb"
40
+ end
41
+
42
+ def module_path
43
+ path = if file_path.present?
44
+ file_path.split('/')
45
+ else
46
+ full_path.map(&:underscore)
47
+ end
48
+
49
+ path = path[0...-1].join('/') # remove the class name
50
+
51
+ "app/concerns/deidentify/#{path}"
52
+ end
53
+
54
+ def klass
55
+ full_path.last
56
+ end
57
+
58
+ def full_path
59
+ @full_path ||= model.split('::')
60
+ end
61
+
62
+ def file_path
63
+ options['file_path'].split('.').first # remove the .rb if it exists
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Deidentify::<%= namespace_model %>Policy
4
+ extend ActiveSupport::Concern
5
+ include Deidentify
6
+
7
+ included do
8
+ <% model.constantize.column_names.each do |name| -%>
9
+ deidentify :<%= name %>, method: :keep
10
+ <% end -%>
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,67 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: deidentify
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.5.0
5
+ platform: ruby
6
+ authors:
7
+ - Lucy Dement
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-08-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rails
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 5.0.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 5.0.0
27
+ description: A gem to allow deidentification of certain fields
28
+ email:
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - lib/deidentify.rb
34
+ - lib/deidentify/base_hash.rb
35
+ - lib/deidentify/configuration.rb
36
+ - lib/deidentify/delete.rb
37
+ - lib/deidentify/delocalize_ip.rb
38
+ - lib/deidentify/error.rb
39
+ - lib/deidentify/hash_email.rb
40
+ - lib/deidentify/hash_url.rb
41
+ - lib/deidentify/keep.rb
42
+ - lib/deidentify/replace.rb
43
+ - lib/generators/deidentify/configure_for_generator.rb
44
+ - lib/generators/templates/module_template.rb
45
+ homepage:
46
+ licenses: []
47
+ metadata: {}
48
+ post_install_message:
49
+ rdoc_options: []
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '2.6'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ requirements: []
63
+ rubygems_version: 3.1.6
64
+ signing_key:
65
+ specification_version: 4
66
+ summary: Deidentify a rails model
67
+ test_files: []