data_cleansing 0.6.0 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +82 -15
- data/lib/data_cleansing/cleanse.rb +92 -79
- data/lib/data_cleansing/version.rb +1 -1
- data/test/test_db.sqlite3 +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 963a448b6c2ab7dfe313dd911f19e83a4c4688a6
|
4
|
+
data.tar.gz: ff312d2b3a5dfc3bc20255f909bf3376ef720188
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f899d278990a64a9ce9dcf51713b9143ba9485449ed793f4c96afe9a078dd0273308dfc2eaca41ce081c31ed2beba54173c0b6733c25ef9dfa16b4608e73b9a
|
7
|
+
data.tar.gz: c3d1addfece09468dc7f05981145e98f90ff46721c1b5e68e8b8b283f9b0d71485f0d63164115149b3a2dc84acece390fbbd5fa982b87c8689ad50316f2181bd
|
data/README.md
CHANGED
@@ -22,11 +22,12 @@ or pull request.
|
|
22
22
|
## Features
|
23
23
|
|
24
24
|
* Supports global cleansing definitions that can be associated with any Ruby,
|
25
|
-
Rails, Mongoid, or other model
|
26
|
-
* Supports custom cleansing definitions that can be defined in-line
|
27
|
-
* A cleansing block can access the other attributes in the model
|
28
|
-
|
29
|
-
* In a cleansing block other can
|
25
|
+
Rails, Mongoid, or other model
|
26
|
+
* Supports custom cleansing definitions that can be defined in-line
|
27
|
+
* A cleansing block can access the other attributes in the model while cleansing
|
28
|
+
the current attribute
|
29
|
+
* In a cleansing block other attributes in the model can be modified at the
|
30
|
+
same time
|
30
31
|
* Cleansers are executed in the order they are defined. As a result multiple
|
31
32
|
cleansers can be run against the same field and the order is preserved
|
32
33
|
* Multiple cleansers can be specified for a list of attributes at the same time
|
@@ -34,9 +35,8 @@ or pull request.
|
|
34
35
|
the child's cleansers
|
35
36
|
* Cleansers can be called outside of a model instance for cases where fields
|
36
37
|
need to be cleansed before the model is created, or needs to be found
|
37
|
-
*
|
38
|
-
|
39
|
-
completely wiped out to nil
|
38
|
+
* To aid troubleshooting the before and after values of cleansed attributes
|
39
|
+
is logged. The level of detail is fine-tuned using the log level
|
40
40
|
|
41
41
|
## ActiveRecord (ActiveModel) Features
|
42
42
|
|
@@ -67,11 +67,11 @@ u = User.new
|
|
67
67
|
u.first_name = ' joe '
|
68
68
|
u.last_name = "\n black\n"
|
69
69
|
puts "Before data cleansing #{u.inspect}"
|
70
|
-
# Before data cleansing
|
70
|
+
# Before data cleansing #<User:0x007fc9f1081980 @first_name=" joe ", @last_name="\n black\n">
|
71
71
|
|
72
72
|
u.cleanse_attributes!
|
73
73
|
puts "After data cleansing #{u.inspect}"
|
74
|
-
# After data cleansing
|
74
|
+
# After data cleansing #<User:0x007fc9f1081980 @first_name="joe", @last_name="black">
|
75
75
|
```
|
76
76
|
|
77
77
|
### Rails Example
|
@@ -154,6 +154,64 @@ puts "After data cleansing #{u.inspect}"
|
|
154
154
|
# After data cleansing #<User:0x007fdd5a83a8f8 @first_name="joe", @last_name="black", @address1="2632 Brown St", @title="MR.", @gender="Male">
|
155
155
|
```
|
156
156
|
|
157
|
+
## After Cleansing
|
158
|
+
|
159
|
+
It is sometimes useful to read or write multiple fields as part of a cleansing, or
|
160
|
+
where attributes need to be manipulated automatically once they have been cleansed.
|
161
|
+
For this purpose instance methods on the model can be registered for invocation once
|
162
|
+
all the attributes have been cleansed according to their :cleanse specifications.
|
163
|
+
Multiple methods can be registered and they are called in the order they are registered.
|
164
|
+
|
165
|
+
```ruby
|
166
|
+
after_cleanse <instance_method_name>, <instance_method_name>, ...
|
167
|
+
```
|
168
|
+
|
169
|
+
Example:
|
170
|
+
```ruby
|
171
|
+
# Define a global cleanser
|
172
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
173
|
+
|
174
|
+
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
175
|
+
class User < ActiveRecord::Base
|
176
|
+
include DataCleansing::Cleanse
|
177
|
+
|
178
|
+
# Use a global cleaner
|
179
|
+
cleanse :first_name, :last_name, :cleaner => :strip
|
180
|
+
|
181
|
+
# Define a once off cleaner
|
182
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip}
|
183
|
+
|
184
|
+
# Once the above cleansing is complete call the instance method
|
185
|
+
after_cleanse :check_address
|
186
|
+
|
187
|
+
protected
|
188
|
+
|
189
|
+
# Method to be called once data cleansing is complete
|
190
|
+
def check_address
|
191
|
+
# Move address2 to address1 if Address1 is blank and address2 has a value
|
192
|
+
address2 = address1 if address1.blank? && !address2.blank?
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
|
197
|
+
# Create a User instance
|
198
|
+
u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address2 => "2632 Brown St \n")
|
199
|
+
puts "Before data cleansing #{u.attributes.inspect}"
|
200
|
+
u.cleanse_attributes!
|
201
|
+
puts "After data cleansing #{u.attributes.inspect}"
|
202
|
+
u.save!
|
203
|
+
```
|
204
|
+
|
205
|
+
## Recommendations
|
206
|
+
|
207
|
+
:data_cleanse block are ideal for cleansing a single attribute, and applying any
|
208
|
+
global or common cleansing algorithms.
|
209
|
+
|
210
|
+
Even though multiple attributes can be read or written in a single :data_cleanse
|
211
|
+
block, it is recommended to use the :after_cleanse method for working with multiple
|
212
|
+
attributes. It is much easier to read and understand the interactions between multiple
|
213
|
+
attributes in the :after_cleanse methods.
|
214
|
+
|
157
215
|
## Rails configuration
|
158
216
|
|
159
217
|
When DataCleansing is used in a Rails environment it can be configured using the
|
@@ -196,13 +254,22 @@ SemanticLogger.default_level = Rails.logger.level
|
|
196
254
|
SemanticLogger.add_appender(Rails.logger)
|
197
255
|
```
|
198
256
|
|
199
|
-
By changing the log level
|
257
|
+
By changing the log level of DataCleansing itself the type of output for data
|
200
258
|
cleansing can be controlled:
|
201
259
|
|
202
260
|
* :trace or :debug to log all fields modified
|
203
261
|
* :info to log only those fields which were nilled out
|
204
262
|
* :warn or higher to disable logging of cleansing actions
|
205
263
|
|
264
|
+
Note:
|
265
|
+
|
266
|
+
* The logging of changes made to attributes only includes attributes cleansed
|
267
|
+
with :data_cleanse blocks. Attributes modified within :after_cleanse methods
|
268
|
+
are not logged
|
269
|
+
|
270
|
+
* It is not necessary to change the global log level to affect the logging detail
|
271
|
+
level in DataCleansing. DataCleansing log level is changed independently
|
272
|
+
|
206
273
|
To change the log level, either use the Rails configuration approach, or set it
|
207
274
|
directly:
|
208
275
|
|
@@ -212,9 +279,9 @@ DataCleansing.logger.level = :info
|
|
212
279
|
|
213
280
|
## Notes
|
214
281
|
|
215
|
-
Cleaners are called in the order in which they are defined, so subsequent cleaners
|
216
|
-
can assume that the previous cleaners have run and can therefore access or even
|
217
|
-
modify previously cleaned attributes
|
282
|
+
* Cleaners are called in the order in which they are defined, so subsequent cleaners
|
283
|
+
can assume that the previous cleaners have run and can therefore access or even
|
284
|
+
modify previously cleaned attributes
|
218
285
|
|
219
286
|
## Installation
|
220
287
|
|
@@ -223,7 +290,7 @@ modify previously cleaned attributes
|
|
223
290
|
Add the following line to Gemfile
|
224
291
|
|
225
292
|
```ruby
|
226
|
-
gem '
|
293
|
+
gem 'data_cleansing'
|
227
294
|
```
|
228
295
|
|
229
296
|
Install the Gem with bundler
|
@@ -120,109 +120,122 @@ module DataCleansing
|
|
120
120
|
module InstanceMethods
|
121
121
|
# Cleanse the attributes using specified cleaners
|
122
122
|
# and execute after cleaners once complete
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
123
|
+
#
|
124
|
+
# Returns fields changed whilst cleaning the attributes
|
125
|
+
#
|
126
|
+
# Note: At this time the changes returned does not include any fields
|
127
|
+
# modified in any of the after_cleaner methods
|
128
|
+
def cleanse_attributes!(verbose=DataCleansing.logger.debug?)
|
129
|
+
changes = {}
|
130
|
+
DataCleansing.logger.benchmark_info("#{self.class.name}#cleanse_attributes!", :payload => changes) do
|
131
|
+
# Collect parent cleaners first, starting with the top parent
|
132
|
+
cleaners = [self.class.send(:data_cleansing_cleaners)]
|
133
|
+
after_cleaners = [self.class.send(:data_cleansing_after_cleaners)]
|
134
|
+
klass = self.class.superclass
|
135
|
+
while klass != Object
|
136
|
+
cleaners << klass.send(:data_cleansing_cleaners) if klass.respond_to?(:data_cleansing_cleaners)
|
137
|
+
after_cleaners << klass.send(:data_cleansing_after_cleaners) if klass.respond_to?(:data_cleansing_after_cleaners)
|
138
|
+
klass = klass.superclass
|
139
|
+
end
|
140
|
+
# Capture all modified fields if log_level is :debug or :trace
|
141
|
+
cleaners.reverse_each {|cleaner| changes.merge!(data_cleansing_execute_cleaners(cleaner, verbose))}
|
134
142
|
|
135
|
-
|
136
|
-
|
137
|
-
|
143
|
+
# Execute the after cleaners, starting with the parent after cleanse methods
|
144
|
+
after_cleaners.reverse_each {|a| a.each {|method| send(method)} }
|
145
|
+
end
|
146
|
+
changes
|
138
147
|
end
|
139
148
|
|
140
149
|
private
|
141
150
|
|
142
151
|
# Run each of the cleaners in the order they are listed in the array
|
143
|
-
|
152
|
+
# Returns a hash of before and after values of what was cleansed
|
153
|
+
# Parameters
|
154
|
+
# cleaners
|
155
|
+
# List of cleaners to run
|
156
|
+
#
|
157
|
+
# verbose [true|false]
|
158
|
+
# Whether to include all the fields cleansed or just the fields that
|
159
|
+
# were cleansed to nil
|
160
|
+
def data_cleansing_execute_cleaners(cleaners, verbose = false)
|
144
161
|
return false if cleaners.nil?
|
145
162
|
|
146
163
|
# Capture all changes to attributes if the log level is :info or greater
|
147
|
-
changes = {}
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
serialized_attrs = self.class.serialized_attributes.keys
|
165
|
-
attrs -= serialized_attrs.collect{|i| i.to_sym} if serialized_attrs
|
166
|
-
end
|
164
|
+
changes = {}
|
165
|
+
|
166
|
+
cleaners.each do |cleaner_struct|
|
167
|
+
params = cleaner_struct.params
|
168
|
+
attrs = cleaner_struct.attributes
|
169
|
+
|
170
|
+
# Special case to include :all fields
|
171
|
+
# Only works with ActiveRecord based models, not supported with regular Ruby models
|
172
|
+
if attrs.include?(:all) && defined?(ActiveRecord) && respond_to?(:attributes)
|
173
|
+
attrs = attributes.keys.collect{|i| i.to_sym}
|
174
|
+
attrs.delete(:id)
|
175
|
+
|
176
|
+
# Remove serialized_attributes if any, from the :all condition
|
177
|
+
if self.class.respond_to?(:serialized_attributes)
|
178
|
+
serialized_attrs = self.class.serialized_attributes.keys
|
179
|
+
attrs -= serialized_attrs.collect{|i| i.to_sym} if serialized_attrs
|
180
|
+
end
|
167
181
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
end
|
182
|
+
# Replace any encrypted attributes with their non-encrypted versions if any
|
183
|
+
if defined?(SymmetricEncryption) && self.class.respond_to?(:encrypted_attributes)
|
184
|
+
self.class.encrypted_attributes.each_pair do |clear, encrypted|
|
185
|
+
if attrs.include?(encrypted.to_sym)
|
186
|
+
attrs.delete(encrypted.to_sym)
|
187
|
+
attrs << clear.to_sym
|
175
188
|
end
|
176
189
|
end
|
190
|
+
end
|
177
191
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
end
|
182
|
-
|
192
|
+
# Explicitly remove specified attributes from cleansing
|
193
|
+
if except = params[:except]
|
194
|
+
attrs -= except
|
183
195
|
end
|
184
196
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
197
|
+
end
|
198
|
+
|
199
|
+
attrs.each do |attr|
|
200
|
+
# Under ActiveModel for Rails and Mongoid need to retrieve raw value
|
201
|
+
# before data type conversion
|
202
|
+
value = if respond_to?(:read_attribute_before_type_cast) && has_attribute?(attr.to_s)
|
203
|
+
read_attribute_before_type_cast(attr.to_s)
|
204
|
+
else
|
205
|
+
send(attr.to_sym)
|
206
|
+
end
|
193
207
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
end
|
208
|
+
# No need to clean if attribute is nil
|
209
|
+
unless value.nil?
|
210
|
+
new_value = self.class.send(:data_cleansing_clean,cleaner_struct, value, self)
|
211
|
+
|
212
|
+
if new_value != value
|
213
|
+
# Update value only if it has changed
|
214
|
+
send("#{attr.to_sym}=".to_sym, new_value)
|
215
|
+
|
216
|
+
# Capture changed attributes
|
217
|
+
if changes
|
218
|
+
# Mask sensitive attributes when logging
|
219
|
+
masked = DataCleansing.masked_attributes.include?(attr.to_sym)
|
220
|
+
new_value = :masked if masked && !new_value.nil?
|
221
|
+
if previous = changes[attr.to_sym]
|
222
|
+
previous[:after] = new_value
|
223
|
+
else
|
224
|
+
if new_value.nil? || verbose
|
225
|
+
changes[attr.to_sym] = {
|
226
|
+
:before => masked ? :masked : value,
|
227
|
+
:after => new_value
|
228
|
+
}
|
216
229
|
end
|
217
230
|
end
|
218
231
|
end
|
219
232
|
end
|
220
|
-
|
221
233
|
end
|
222
234
|
end
|
223
235
|
end
|
224
|
-
|
236
|
+
changes
|
225
237
|
end
|
238
|
+
|
226
239
|
end
|
227
240
|
|
228
241
|
def self.included(base)
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-08-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|