data_cleansing 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +82 -15
- data/lib/data_cleansing/cleanse.rb +92 -79
- data/lib/data_cleansing/version.rb +1 -1
- data/test/test_db.sqlite3 +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 963a448b6c2ab7dfe313dd911f19e83a4c4688a6
|
4
|
+
data.tar.gz: ff312d2b3a5dfc3bc20255f909bf3376ef720188
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f899d278990a64a9ce9dcf51713b9143ba9485449ed793f4c96afe9a078dd0273308dfc2eaca41ce081c31ed2beba54173c0b6733c25ef9dfa16b4608e73b9a
|
7
|
+
data.tar.gz: c3d1addfece09468dc7f05981145e98f90ff46721c1b5e68e8b8b283f9b0d71485f0d63164115149b3a2dc84acece390fbbd5fa982b87c8689ad50316f2181bd
|
data/README.md
CHANGED
@@ -22,11 +22,12 @@ or pull request.
|
|
22
22
|
## Features
|
23
23
|
|
24
24
|
* Supports global cleansing definitions that can be associated with any Ruby,
|
25
|
-
Rails, Mongoid, or other model
|
26
|
-
* Supports custom cleansing definitions that can be defined in-line
|
27
|
-
* A cleansing block can access the other attributes in the model
|
28
|
-
|
29
|
-
* In a cleansing block other can
|
25
|
+
Rails, Mongoid, or other model
|
26
|
+
* Supports custom cleansing definitions that can be defined in-line
|
27
|
+
* A cleansing block can access the other attributes in the model while cleansing
|
28
|
+
the current attribute
|
29
|
+
* In a cleansing block other attributes in the model can be modified at the
|
30
|
+
same time
|
30
31
|
* Cleansers are executed in the order they are defined. As a result multiple
|
31
32
|
cleansers can be run against the same field and the order is preserved
|
32
33
|
* Multiple cleansers can be specified for a list of attributes at the same time
|
@@ -34,9 +35,8 @@ or pull request.
|
|
34
35
|
the child's cleansers
|
35
36
|
* Cleansers can be called outside of a model instance for cases where fields
|
36
37
|
need to be cleansed before the model is created, or needs to be found
|
37
|
-
*
|
38
|
-
|
39
|
-
completely wiped out to nil
|
38
|
+
* To aid troubleshooting the before and after values of cleansed attributes
|
39
|
+
is logged. The level of detail is fine-tuned using the log level
|
40
40
|
|
41
41
|
## ActiveRecord (ActiveModel) Features
|
42
42
|
|
@@ -67,11 +67,11 @@ u = User.new
|
|
67
67
|
u.first_name = ' joe '
|
68
68
|
u.last_name = "\n black\n"
|
69
69
|
puts "Before data cleansing #{u.inspect}"
|
70
|
-
# Before data cleansing
|
70
|
+
# Before data cleansing #<User:0x007fc9f1081980 @first_name=" joe ", @last_name="\n black\n">
|
71
71
|
|
72
72
|
u.cleanse_attributes!
|
73
73
|
puts "After data cleansing #{u.inspect}"
|
74
|
-
# After data cleansing
|
74
|
+
# After data cleansing #<User:0x007fc9f1081980 @first_name="joe", @last_name="black">
|
75
75
|
```
|
76
76
|
|
77
77
|
### Rails Example
|
@@ -154,6 +154,64 @@ puts "After data cleansing #{u.inspect}"
|
|
154
154
|
# After data cleansing #<User:0x007fdd5a83a8f8 @first_name="joe", @last_name="black", @address1="2632 Brown St", @title="MR.", @gender="Male">
|
155
155
|
```
|
156
156
|
|
157
|
+
## After Cleansing
|
158
|
+
|
159
|
+
It is sometimes useful to read or write multiple fields as part of a cleansing, or
|
160
|
+
where attributes need to be manipulated automatically once they have been cleansed.
|
161
|
+
For this purpose instance methods on the model can be registered for invocation once
|
162
|
+
all the attributes have been cleansed according to their :cleanse specifications.
|
163
|
+
Multiple methods can be registered and they are called in the order they are registered.
|
164
|
+
|
165
|
+
```ruby
|
166
|
+
after_cleanse <instance_method_name>, <instance_method_name>, ...
|
167
|
+
```
|
168
|
+
|
169
|
+
Example:
|
170
|
+
```ruby
|
171
|
+
# Define a global cleanser
|
172
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
173
|
+
|
174
|
+
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
175
|
+
class User < ActiveRecord::Base
|
176
|
+
include DataCleansing::Cleanse
|
177
|
+
|
178
|
+
# Use a global cleaner
|
179
|
+
cleanse :first_name, :last_name, :cleaner => :strip
|
180
|
+
|
181
|
+
# Define a once off cleaner
|
182
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip}
|
183
|
+
|
184
|
+
# Once the above cleansing is complete call the instance method
|
185
|
+
after_cleanse :check_address
|
186
|
+
|
187
|
+
protected
|
188
|
+
|
189
|
+
# Method to be called once data cleansing is complete
|
190
|
+
def check_address
|
191
|
+
# Move address2 to address1 if Address1 is blank and address2 has a value
|
192
|
+
address2 = address1 if address1.blank? && !address2.blank?
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
# Create a User instance
|
198
|
+
u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address2 => "2632 Brown St \n")
|
199
|
+
puts "Before data cleansing #{u.attributes.inspect}"
|
200
|
+
u.cleanse_attributes!
|
201
|
+
puts "After data cleansing #{u.attributes.inspect}"
|
202
|
+
u.save!
|
203
|
+
```
|
204
|
+
|
205
|
+
## Recommendations
|
206
|
+
|
207
|
+
:data_cleanse block are ideal for cleansing a single attribute, and applying any
|
208
|
+
global or common cleansing algorithms.
|
209
|
+
|
210
|
+
Even though multiple attributes can be read or written in a single :data_cleanse
|
211
|
+
block, it is recommended to use the :after_cleanse method for working with multiple
|
212
|
+
attributes. It is much easier to read and understand the interactions between multiple
|
213
|
+
attributes in the :after_cleanse methods.
|
214
|
+
|
157
215
|
## Rails configuration
|
158
216
|
|
159
217
|
When DataCleansing is used in a Rails environment it can be configured using the
|
@@ -196,13 +254,22 @@ SemanticLogger.default_level = Rails.logger.level
|
|
196
254
|
SemanticLogger.add_appender(Rails.logger)
|
197
255
|
```
|
198
256
|
|
199
|
-
By changing the log level
|
257
|
+
By changing the log level of DataCleansing itself the type of output for data
|
200
258
|
cleansing can be controlled:
|
201
259
|
|
202
260
|
* :trace or :debug to log all fields modified
|
203
261
|
* :info to log only those fields which were nilled out
|
204
262
|
* :warn or higher to disable logging of cleansing actions
|
205
263
|
|
264
|
+
Note:
|
265
|
+
|
266
|
+
* The logging of changes made to attributes only includes attributes cleansed
|
267
|
+
with :data_cleanse blocks. Attributes modified within :after_cleanse methods
|
268
|
+
are not logged
|
269
|
+
|
270
|
+
* It is not necessary to change the global log level to affect the logging detail
|
271
|
+
level in DataCleansing. DataCleansing log level is changed independently
|
272
|
+
|
206
273
|
To change the log level, either use the Rails configuration approach, or set it
|
207
274
|
directly:
|
208
275
|
|
@@ -212,9 +279,9 @@ DataCleansing.logger.level = :info
|
|
212
279
|
|
213
280
|
## Notes
|
214
281
|
|
215
|
-
Cleaners are called in the order in which they are defined, so subsequent cleaners
|
216
|
-
can assume that the previous cleaners have run and can therefore access or even
|
217
|
-
modify previously cleaned attributes
|
282
|
+
* Cleaners are called in the order in which they are defined, so subsequent cleaners
|
283
|
+
can assume that the previous cleaners have run and can therefore access or even
|
284
|
+
modify previously cleaned attributes
|
218
285
|
|
219
286
|
## Installation
|
220
287
|
|
@@ -223,7 +290,7 @@ modify previously cleaned attributes
|
|
223
290
|
Add the following line to Gemfile
|
224
291
|
|
225
292
|
```ruby
|
226
|
-
gem '
|
293
|
+
gem 'data_cleansing'
|
227
294
|
```
|
228
295
|
|
229
296
|
Install the Gem with bundler
|
@@ -120,109 +120,122 @@ module DataCleansing
|
|
120
120
|
module InstanceMethods
|
121
121
|
# Cleanse the attributes using specified cleaners
|
122
122
|
# and execute after cleaners once complete
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
123
|
+
#
|
124
|
+
# Returns fields changed whilst cleaning the attributes
|
125
|
+
#
|
126
|
+
# Note: At this time the changes returned does not include any fields
|
127
|
+
# modified in any of the after_cleaner methods
|
128
|
+
def cleanse_attributes!(verbose=DataCleansing.logger.debug?)
|
129
|
+
changes = {}
|
130
|
+
DataCleansing.logger.benchmark_info("#{self.class.name}#cleanse_attributes!", :payload => changes) do
|
131
|
+
# Collect parent cleaners first, starting with the top parent
|
132
|
+
cleaners = [self.class.send(:data_cleansing_cleaners)]
|
133
|
+
after_cleaners = [self.class.send(:data_cleansing_after_cleaners)]
|
134
|
+
klass = self.class.superclass
|
135
|
+
while klass != Object
|
136
|
+
cleaners << klass.send(:data_cleansing_cleaners) if klass.respond_to?(:data_cleansing_cleaners)
|
137
|
+
after_cleaners << klass.send(:data_cleansing_after_cleaners) if klass.respond_to?(:data_cleansing_after_cleaners)
|
138
|
+
klass = klass.superclass
|
139
|
+
end
|
140
|
+
# Capture all modified fields if log_level is :debug or :trace
|
141
|
+
cleaners.reverse_each {|cleaner| changes.merge!(data_cleansing_execute_cleaners(cleaner, verbose))}
|
134
142
|
|
135
|
-
|
136
|
-
|
137
|
-
|
143
|
+
# Execute the after cleaners, starting with the parent after cleanse methods
|
144
|
+
after_cleaners.reverse_each {|a| a.each {|method| send(method)} }
|
145
|
+
end
|
146
|
+
changes
|
138
147
|
end
|
139
148
|
|
140
149
|
private
|
141
150
|
|
142
151
|
# Run each of the cleaners in the order they are listed in the array
|
143
|
-
|
152
|
+
# Returns a hash of before and after values of what was cleansed
|
153
|
+
# Parameters
|
154
|
+
# cleaners
|
155
|
+
# List of cleaners to run
|
156
|
+
#
|
157
|
+
# verbose [true|false]
|
158
|
+
# Whether to include all the fields cleansed or just the fields that
|
159
|
+
# were cleansed to nil
|
160
|
+
def data_cleansing_execute_cleaners(cleaners, verbose = false)
|
144
161
|
return false if cleaners.nil?
|
145
162
|
|
146
163
|
# Capture all changes to attributes if the log level is :info or greater
|
147
|
-
changes = {}
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
serialized_attrs = self.class.serialized_attributes.keys
|
165
|
-
attrs -= serialized_attrs.collect{|i| i.to_sym} if serialized_attrs
|
166
|
-
end
|
164
|
+
changes = {}
|
165
|
+
|
166
|
+
cleaners.each do |cleaner_struct|
|
167
|
+
params = cleaner_struct.params
|
168
|
+
attrs = cleaner_struct.attributes
|
169
|
+
|
170
|
+
# Special case to include :all fields
|
171
|
+
# Only works with ActiveRecord based models, not supported with regular Ruby models
|
172
|
+
if attrs.include?(:all) && defined?(ActiveRecord) && respond_to?(:attributes)
|
173
|
+
attrs = attributes.keys.collect{|i| i.to_sym}
|
174
|
+
attrs.delete(:id)
|
175
|
+
|
176
|
+
# Remove serialized_attributes if any, from the :all condition
|
177
|
+
if self.class.respond_to?(:serialized_attributes)
|
178
|
+
serialized_attrs = self.class.serialized_attributes.keys
|
179
|
+
attrs -= serialized_attrs.collect{|i| i.to_sym} if serialized_attrs
|
180
|
+
end
|
167
181
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
end
|
182
|
+
# Replace any encrypted attributes with their non-encrypted versions if any
|
183
|
+
if defined?(SymmetricEncryption) && self.class.respond_to?(:encrypted_attributes)
|
184
|
+
self.class.encrypted_attributes.each_pair do |clear, encrypted|
|
185
|
+
if attrs.include?(encrypted.to_sym)
|
186
|
+
attrs.delete(encrypted.to_sym)
|
187
|
+
attrs << clear.to_sym
|
175
188
|
end
|
176
189
|
end
|
190
|
+
end
|
177
191
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
end
|
182
|
-
|
192
|
+
# Explicitly remove specified attributes from cleansing
|
193
|
+
if except = params[:except]
|
194
|
+
attrs -= except
|
183
195
|
end
|
184
196
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
197
|
+
end
|
198
|
+
|
199
|
+
attrs.each do |attr|
|
200
|
+
# Under ActiveModel for Rails and Mongoid need to retrieve raw value
|
201
|
+
# before data type conversion
|
202
|
+
value = if respond_to?(:read_attribute_before_type_cast) && has_attribute?(attr.to_s)
|
203
|
+
read_attribute_before_type_cast(attr.to_s)
|
204
|
+
else
|
205
|
+
send(attr.to_sym)
|
206
|
+
end
|
193
207
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
end
|
208
|
+
# No need to clean if attribute is nil
|
209
|
+
unless value.nil?
|
210
|
+
new_value = self.class.send(:data_cleansing_clean,cleaner_struct, value, self)
|
211
|
+
|
212
|
+
if new_value != value
|
213
|
+
# Update value only if it has changed
|
214
|
+
send("#{attr.to_sym}=".to_sym, new_value)
|
215
|
+
|
216
|
+
# Capture changed attributes
|
217
|
+
if changes
|
218
|
+
# Mask sensitive attributes when logging
|
219
|
+
masked = DataCleansing.masked_attributes.include?(attr.to_sym)
|
220
|
+
new_value = :masked if masked && !new_value.nil?
|
221
|
+
if previous = changes[attr.to_sym]
|
222
|
+
previous[:after] = new_value
|
223
|
+
else
|
224
|
+
if new_value.nil? || verbose
|
225
|
+
changes[attr.to_sym] = {
|
226
|
+
:before => masked ? :masked : value,
|
227
|
+
:after => new_value
|
228
|
+
}
|
216
229
|
end
|
217
230
|
end
|
218
231
|
end
|
219
232
|
end
|
220
|
-
|
221
233
|
end
|
222
234
|
end
|
223
235
|
end
|
224
|
-
|
236
|
+
changes
|
225
237
|
end
|
238
|
+
|
226
239
|
end
|
227
240
|
|
228
241
|
def self.included(base)
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-08-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|