data_cleansing 0.9.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +3 -25
- data/Rakefile +18 -14
- data/lib/data_cleansing/cleaners.rb +35 -21
- data/lib/data_cleansing/cleanse.rb +52 -73
- data/lib/data_cleansing/data_cleansing.rb +19 -1
- data/lib/data_cleansing/railtie.rb +0 -1
- data/lib/data_cleansing/version.rb +1 -1
- data/lib/data_cleansing.rb +6 -8
- data/test/active_record_test.rb +56 -53
- data/test/cleaners_test.rb +84 -73
- data/test/data_cleansing_test.rb +9 -0
- data/test/ruby_test.rb +48 -49
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +6 -10
- metadata +11 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 04a8c565168dcba73088d9e17afcda3ab49097a41fb55bcf880f2cd04e6d38bf
|
4
|
+
data.tar.gz: da24439e8431d81c19241b7ed579bd73170cfc8e61c1781bc103befa1e37d6fe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28d9b9af8b18271bcbedd3930b0c1882ee162470959992ab2140f4a733377aaffadb0efef52d66ffeb86a93b1c10cdbcc78d61382d09fd85f8135596bd845628
|
7
|
+
data.tar.gz: f49c9062ffa4620c723cdb1d114f3baf649b63cb273dc67cadb34212cdf1d42c54a1ad4be90cff38e02f565c2135d18c3d58ccd0e90c3031e65dfe3fbef0e01c
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
data_cleansing
|
2
2
|
==============
|
3
3
|
|
4
|
-
Data Cleansing framework for Ruby
|
4
|
+
Data Cleansing framework for Ruby.
|
5
5
|
|
6
6
|
* http://github.com/reidmorrison/data_cleansing
|
7
7
|
|
@@ -12,12 +12,8 @@ or trailing blanks and even newlines.
|
|
12
12
|
Similarly it would be useful to be able to attach a cleansing solution to a field
|
13
13
|
in a model and have the data cleansed transparently when required.
|
14
14
|
|
15
|
-
DataCleansing is a framework that allows
|
16
|
-
specific attributes or fields.
|
17
|
-
solutions themselves since they are usually straight forward, or so complex
|
18
|
-
that they don't tend to be too useful to others. However, over time built-in
|
19
|
-
cleansing solutions may be added. Feel free to submit any suggestions via a ticket
|
20
|
-
or pull request.
|
15
|
+
DataCleansing is a framework that allows data cleansing to be applied to
|
16
|
+
specific attributes or fields.
|
21
17
|
|
22
18
|
## Features
|
23
19
|
|
@@ -297,24 +293,6 @@ Install the Gem with bundler
|
|
297
293
|
|
298
294
|
bundle install
|
299
295
|
|
300
|
-
## Architecture
|
301
|
-
|
302
|
-
DataCleansing has been designed to support externalized data cleansing routines.
|
303
|
-
In this way the data cleansing routine itself can be loaded from a datastore and
|
304
|
-
applied dynamically at runtime.
|
305
|
-
Although not supported out of the box, this design allows for example for the
|
306
|
-
data cleansing routines to be stored in something like [ZooKeeper](http://zookeeper.apache.org/).
|
307
|
-
Then any changes to the data cleansing routines can be pushed out immediately to
|
308
|
-
every server that needs it.
|
309
|
-
|
310
|
-
DataCleansing is designed to support any Ruby model. In this way it can be used
|
311
|
-
in just about any ORM or DOM. For example, it currently easily supports both
|
312
|
-
Rails and Mongoid models. Some extensions have been added to support these frameworks.
|
313
|
-
|
314
|
-
For example, in Rails it obtains the raw data value before Rails has converted it.
|
315
|
-
Which is useful for cleansing integer or float fields as raw strings before Rails
|
316
|
-
tries to convert it to an integer or float.
|
317
|
-
|
318
296
|
## Dependencies
|
319
297
|
|
320
298
|
DataCleansing requires the following dependencies
|
data/Rakefile
CHANGED
@@ -1,27 +1,31 @@
|
|
1
|
-
|
2
|
-
require
|
1
|
+
# Setup bundler to avoid having to run bundle exec all the time.
|
2
|
+
require "rubygems"
|
3
|
+
require "bundler/setup"
|
3
4
|
|
4
|
-
|
5
|
+
require "rake/testtask"
|
6
|
+
require_relative "lib/data_cleansing/version"
|
5
7
|
|
6
8
|
task :gem do
|
7
|
-
system
|
9
|
+
system "gem build data_cleansing.gemspec"
|
8
10
|
end
|
9
11
|
|
10
12
|
task publish: :gem do
|
11
13
|
system "git tag -a v#{DataCleansing::VERSION} -m 'Tagging #{DataCleansing::VERSION}'"
|
12
|
-
system
|
14
|
+
system "git push --tags"
|
13
15
|
system "gem push data_cleansing-#{DataCleansing::VERSION}.gem"
|
14
16
|
system "rm data_cleansing-#{DataCleansing::VERSION}.gem"
|
15
17
|
end
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
t.verbose = true
|
22
|
-
end
|
23
|
-
|
24
|
-
Rake::Task['functional'].invoke
|
19
|
+
Rake::TestTask.new(:test) do |t|
|
20
|
+
t.pattern = "test/**/*_test.rb"
|
21
|
+
t.verbose = true
|
22
|
+
t.warning = false
|
25
23
|
end
|
26
24
|
|
27
|
-
|
25
|
+
# By default run tests against all appraisals
|
26
|
+
if !ENV["APPRAISAL_INITIALIZED"] && !ENV["TRAVIS"]
|
27
|
+
require "appraisal"
|
28
|
+
task default: :appraisal
|
29
|
+
else
|
30
|
+
task default: :test
|
31
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "cgi"
|
2
2
|
module Cleaners
|
3
3
|
# Strip leading and trailing whitespace
|
4
4
|
module Strip
|
@@ -20,6 +20,16 @@ module Cleaners
|
|
20
20
|
end
|
21
21
|
DataCleansing.register_cleaner(:upcase, Upcase)
|
22
22
|
|
23
|
+
# Convert to downcase
|
24
|
+
module Downcase
|
25
|
+
def self.call(string)
|
26
|
+
return string unless string.is_a?(String)
|
27
|
+
|
28
|
+
string.downcase! || string
|
29
|
+
end
|
30
|
+
end
|
31
|
+
DataCleansing.register_cleaner(:downcase, Downcase)
|
32
|
+
|
23
33
|
# Remove all non-word characters, including whitespace
|
24
34
|
module RemoveNonWord
|
25
35
|
NOT_WORDS = Regexp.compile(/\W/)
|
@@ -27,7 +37,7 @@ module Cleaners
|
|
27
37
|
def self.call(string)
|
28
38
|
return string unless string.is_a?(String)
|
29
39
|
|
30
|
-
string.gsub!(NOT_WORDS,
|
40
|
+
string.gsub!(NOT_WORDS, "") || string
|
31
41
|
end
|
32
42
|
end
|
33
43
|
DataCleansing.register_cleaner(:remove_non_word, RemoveNonWord)
|
@@ -39,12 +49,16 @@ module Cleaners
|
|
39
49
|
def self.call(string)
|
40
50
|
return string unless string.is_a?(String)
|
41
51
|
|
52
|
+
# Strip invalid characters, since they are non printable
|
53
|
+
unless string.valid_encoding?
|
54
|
+
string = string.encode(string.encoding, invalid: :replace, undef: :replace, replace: "")
|
55
|
+
end
|
42
56
|
string.gsub!(NOT_PRINTABLE, '') || string
|
43
57
|
end
|
44
58
|
end
|
45
59
|
DataCleansing.register_cleaner(:remove_non_printable, RemoveNonPrintable)
|
46
60
|
|
47
|
-
#
|
61
|
+
# Unescape HTML Markup ( case-insensitive )
|
48
62
|
module ReplaceHTMLMarkup
|
49
63
|
HTML_MARKUP = Regexp.compile(/&(amp|quot|gt|lt|apos|nbsp);/in)
|
50
64
|
|
@@ -53,18 +67,18 @@ module Cleaners
|
|
53
67
|
|
54
68
|
string.gsub!(HTML_MARKUP) do |match|
|
55
69
|
case match.downcase
|
56
|
-
when
|
57
|
-
|
58
|
-
when
|
70
|
+
when "&"
|
71
|
+
"&"
|
72
|
+
when """
|
59
73
|
'"'
|
60
|
-
when
|
61
|
-
|
62
|
-
when
|
63
|
-
|
64
|
-
when
|
74
|
+
when ">"
|
75
|
+
">"
|
76
|
+
when "<"
|
77
|
+
"<"
|
78
|
+
when "'"
|
65
79
|
"'"
|
66
|
-
when
|
67
|
-
|
80
|
+
when " "
|
81
|
+
" "
|
68
82
|
else
|
69
83
|
"&#{match};"
|
70
84
|
end
|
@@ -77,7 +91,7 @@ module Cleaners
|
|
77
91
|
def self.call(string)
|
78
92
|
return string unless string.is_a?(String)
|
79
93
|
|
80
|
-
|
94
|
+
CGI.unescape(string)
|
81
95
|
end
|
82
96
|
end
|
83
97
|
DataCleansing.register_cleaner(:unescape_uri, UnescapeURI)
|
@@ -86,7 +100,7 @@ module Cleaners
|
|
86
100
|
def self.call(string)
|
87
101
|
return string unless string.is_a?(String)
|
88
102
|
|
89
|
-
|
103
|
+
CGI.escape(string)
|
90
104
|
end
|
91
105
|
end
|
92
106
|
DataCleansing.register_cleaner(:escape_uri, EscapeURI)
|
@@ -98,7 +112,7 @@ module Cleaners
|
|
98
112
|
def self.call(string)
|
99
113
|
return string unless string.is_a?(String)
|
100
114
|
|
101
|
-
string.gsub!(WHITESPACE,
|
115
|
+
string.gsub!(WHITESPACE, " ") || string
|
102
116
|
end
|
103
117
|
end
|
104
118
|
DataCleansing.register_cleaner(:compress_whitespace, CompressWhitespace)
|
@@ -111,7 +125,7 @@ module Cleaners
|
|
111
125
|
def self.call(string)
|
112
126
|
return string unless string.is_a?(String)
|
113
127
|
|
114
|
-
string.gsub!(DIGITS,
|
128
|
+
string.gsub!(DIGITS, "")
|
115
129
|
string.length > 0 ? string : nil
|
116
130
|
end
|
117
131
|
end
|
@@ -120,13 +134,13 @@ module Cleaners
|
|
120
134
|
# Returns [Integer] after removing all non-digit characters, except '.'
|
121
135
|
# Returns nil if no digits are present in the string.
|
122
136
|
module StringToInteger
|
123
|
-
NUMERIC = Regexp.compile(/[^0-9
|
137
|
+
NUMERIC = Regexp.compile(/[^0-9.]/)
|
124
138
|
|
125
139
|
def self.call(string)
|
126
140
|
return string unless string.is_a?(String)
|
127
141
|
|
128
142
|
# Remove Non-Digit Chars, except for '.'
|
129
|
-
string.gsub!(NUMERIC,
|
143
|
+
string.gsub!(NUMERIC, "")
|
130
144
|
string.length > 0 ? string.to_i : nil
|
131
145
|
end
|
132
146
|
end
|
@@ -135,13 +149,13 @@ module Cleaners
|
|
135
149
|
# Returns [Integer] after removing all non-digit characters, except '.'
|
136
150
|
# Returns nil if no digits are present in the string.
|
137
151
|
module StringToFloat
|
138
|
-
NUMERIC = Regexp.compile(/[^0-9
|
152
|
+
NUMERIC = Regexp.compile(/[^0-9.]/)
|
139
153
|
|
140
154
|
def self.call(string)
|
141
155
|
return string unless string.is_a?(String)
|
142
156
|
|
143
157
|
# Remove Non-Digit Chars, except for '.'
|
144
|
-
string.gsub!(NUMERIC,
|
158
|
+
string.gsub!(NUMERIC, "")
|
145
159
|
string.length > 0 ? string.to_f : nil
|
146
160
|
end
|
147
161
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require
|
1
|
+
require "data_cleansing/cleaners"
|
2
2
|
module DataCleansing
|
3
3
|
# Mix-in to add cleaner
|
4
4
|
module Cleanse
|
@@ -7,10 +7,10 @@ module DataCleansing
|
|
7
7
|
module ClassMethods
|
8
8
|
# Define how to cleanse one or more attributes
|
9
9
|
def cleanse(*args)
|
10
|
-
last
|
10
|
+
last = args.last
|
11
11
|
attributes = args.dup
|
12
|
-
params
|
13
|
-
cleaners
|
12
|
+
params = last.is_a?(Hash) && last.instance_of?(Hash) ? attributes.pop.dup : {}
|
13
|
+
cleaners = Array(params.delete(:cleaner))
|
14
14
|
raise(ArgumentError, "Mandatory :cleaner parameter is missing: #{params.inspect}") unless cleaners
|
15
15
|
|
16
16
|
cleaner = DataCleansingCleaner.new(cleaners, attributes, params)
|
@@ -34,6 +34,7 @@ module DataCleansing
|
|
34
34
|
def after_cleanse(*methods)
|
35
35
|
methods.each do |m|
|
36
36
|
raise "Method #{m.inspect} must be a symbol" unless m.is_a?(Symbol)
|
37
|
+
|
37
38
|
data_cleansing_after_cleaners << m unless data_cleansing_after_cleaners.include?(m)
|
38
39
|
end
|
39
40
|
end
|
@@ -53,12 +54,12 @@ module DataCleansing
|
|
53
54
|
#
|
54
55
|
# Warning: If any of the cleaners read or write to other object attributes
|
55
56
|
# then a valid object instance must be supplied
|
56
|
-
def cleanse_attribute(attribute_name, value, object=nil)
|
57
|
+
def cleanse_attribute(attribute_name, value, object = nil)
|
57
58
|
return if value.nil?
|
58
59
|
|
59
60
|
# Collect parent cleaners first, starting with the top parent
|
60
61
|
cleaners = []
|
61
|
-
klass
|
62
|
+
klass = self
|
62
63
|
while klass != Object
|
63
64
|
if klass.respond_to?(:data_cleansing_attribute_cleaners)
|
64
65
|
cleaners += klass.data_cleansing_attribute_cleaners[:all] || []
|
@@ -66,8 +67,9 @@ module DataCleansing
|
|
66
67
|
end
|
67
68
|
klass = klass.superclass
|
68
69
|
end
|
69
|
-
|
70
|
-
|
70
|
+
# Support Integer values
|
71
|
+
cleansed_value = value.is_a?(Integer) ? value : value.dup
|
72
|
+
cleaners.reverse_each { |cleaner| cleansed_value = data_cleansing_clean(cleaner, cleansed_value, object) if cleaner }
|
71
73
|
cleansed_value
|
72
74
|
end
|
73
75
|
|
@@ -90,37 +92,23 @@ module DataCleansing
|
|
90
92
|
|
91
93
|
# Returns the supplied value cleansed using the supplied cleaner
|
92
94
|
# Parameters
|
93
|
-
#
|
95
|
+
# binding
|
94
96
|
# If supplied the cleansing will be performed within the scope of
|
95
|
-
# that
|
96
|
-
# of that
|
97
|
+
# that binding so that cleaners can read and write to attributes
|
98
|
+
# of that binding
|
97
99
|
#
|
98
100
|
# No logging of cleansing is performed by this method since the value
|
99
101
|
# itself is not modified
|
100
|
-
def data_cleansing_clean(cleaner_struct, value,
|
102
|
+
def data_cleansing_clean(cleaner_struct, value, binding = nil)
|
101
103
|
return if cleaner_struct.nil? || value.nil?
|
104
|
+
|
102
105
|
# Duplicate value in case cleaner uses methods such as gsub!
|
103
106
|
new_value = value.is_a?(String) ? value.dup : value
|
104
107
|
cleaner_struct.cleaners.each do |name|
|
105
|
-
|
106
|
-
proc = name.is_a?(Proc) ? name : DataCleansing.cleaner(name.to_sym)
|
107
|
-
raise "No cleaner defined for #{name.inspect}" unless proc
|
108
|
-
|
109
|
-
if proc.is_a?(Proc)
|
110
|
-
new_value = if object
|
111
|
-
# Call the cleaner proc within the scope (binding) of the object
|
112
|
-
proc.arity == 1 ? object.instance_exec(new_value, &proc) : object.instance_exec(new_value, cleaner_struct.params, &proc)
|
113
|
-
else
|
114
|
-
proc.arity == 1 ? proc.call(new_value) : proc.call(new_value, cleaner_struct.params)
|
115
|
-
end
|
116
|
-
else
|
117
|
-
new_value = (proc.method(:call).arity == 1 ? proc.call(new_value) : proc.call(new_value, cleaner_struct.params))
|
118
|
-
end
|
119
|
-
|
108
|
+
new_value = DataCleansing.clean(name, new_value, cleaner_struct.params, binding)
|
120
109
|
end
|
121
110
|
new_value
|
122
111
|
end
|
123
|
-
|
124
112
|
end
|
125
113
|
|
126
114
|
module InstanceMethods
|
@@ -131,23 +119,23 @@ module DataCleansing
|
|
131
119
|
#
|
132
120
|
# Note: At this time the changes returned does not include any fields
|
133
121
|
# modified in any of the after_cleaner methods
|
134
|
-
def cleanse_attributes!(verbose=DataCleansing.logger.debug?)
|
122
|
+
def cleanse_attributes!(verbose = DataCleansing.logger.debug?)
|
135
123
|
changes = {}
|
136
|
-
DataCleansing.logger.benchmark_info("#{self.class.name}#cleanse_attributes!", :
|
124
|
+
DataCleansing.logger.benchmark_info("#{self.class.name}#cleanse_attributes!", payload: changes) do
|
137
125
|
# Collect parent cleaners first, starting with the top parent
|
138
|
-
cleaners
|
126
|
+
cleaners = [self.class.send(:data_cleansing_cleaners)]
|
139
127
|
after_cleaners = [self.class.send(:data_cleansing_after_cleaners)]
|
140
|
-
klass
|
128
|
+
klass = self.class.superclass
|
141
129
|
while klass != Object
|
142
130
|
cleaners << klass.send(:data_cleansing_cleaners) if klass.respond_to?(:data_cleansing_cleaners)
|
143
131
|
after_cleaners << klass.send(:data_cleansing_after_cleaners) if klass.respond_to?(:data_cleansing_after_cleaners)
|
144
132
|
klass = klass.superclass
|
145
133
|
end
|
146
134
|
# Capture all modified fields if log_level is :debug or :trace
|
147
|
-
cleaners.reverse_each {|cleaner| changes.merge!(data_cleansing_execute_cleaners(cleaner, verbose))}
|
135
|
+
cleaners.reverse_each { |cleaner| changes.merge!(data_cleansing_execute_cleaners(cleaner, verbose)) }
|
148
136
|
|
149
137
|
# Execute the after cleaners, starting with the parent after cleanse methods
|
150
|
-
after_cleaners.reverse_each {|a| a.each {|method| send(method)} }
|
138
|
+
after_cleaners.reverse_each { |a| a.each { |method| send(method) } }
|
151
139
|
end
|
152
140
|
changes
|
153
141
|
end
|
@@ -176,15 +164,9 @@ module DataCleansing
|
|
176
164
|
# Special case to include :all fields
|
177
165
|
# Only works with ActiveRecord based models, not supported with regular Ruby models
|
178
166
|
if attrs.include?(:all) && defined?(ActiveRecord) && respond_to?(:attributes)
|
179
|
-
attrs = attributes.keys.collect{|i| i.to_sym}
|
167
|
+
attrs = attributes.keys.collect { |i| i.to_sym }
|
180
168
|
attrs.delete(:id)
|
181
169
|
|
182
|
-
# Remove serialized_attributes if any, from the :all condition
|
183
|
-
if self.class.respond_to?(:serialized_attributes)
|
184
|
-
serialized_attrs = self.class.serialized_attributes.keys
|
185
|
-
attrs -= serialized_attrs.collect{|i| i.to_sym} if serialized_attrs
|
186
|
-
end
|
187
|
-
|
188
170
|
# Replace any encrypted attributes with their non-encrypted versions if any
|
189
171
|
if defined?(SymmetricEncryption) && self.class.respond_to?(:encrypted_attributes)
|
190
172
|
self.class.encrypted_attributes.each_pair do |clear, encrypted|
|
@@ -205,51 +187,48 @@ module DataCleansing
|
|
205
187
|
attrs.each do |attr|
|
206
188
|
# Under ActiveModel for Rails and Mongoid need to retrieve raw value
|
207
189
|
# before data type conversion
|
208
|
-
value =
|
209
|
-
read_attribute_before_type_cast(attr.to_s)
|
210
|
-
|
211
|
-
|
212
|
-
|
190
|
+
value =
|
191
|
+
if respond_to?(:read_attribute_before_type_cast) && has_attribute?(attr.to_s)
|
192
|
+
read_attribute_before_type_cast(attr.to_s)
|
193
|
+
else
|
194
|
+
send(attr.to_sym)
|
195
|
+
end
|
213
196
|
|
214
197
|
# No need to clean if attribute is nil
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
end
|
238
|
-
end
|
198
|
+
next if value.nil?
|
199
|
+
|
200
|
+
new_value = self.class.send(:data_cleansing_clean, cleaner_struct, value, self)
|
201
|
+
|
202
|
+
next unless new_value != value
|
203
|
+
|
204
|
+
# Update value only if it has changed
|
205
|
+
send("#{attr.to_sym}=".to_sym, new_value)
|
206
|
+
|
207
|
+
# Capture changed attributes
|
208
|
+
next unless changes
|
209
|
+
|
210
|
+
# Mask sensitive attributes when logging
|
211
|
+
masked = DataCleansing.masked_attributes.include?(attr.to_sym)
|
212
|
+
new_value = :masked if masked && !new_value.nil?
|
213
|
+
if previous = changes[attr.to_sym]
|
214
|
+
previous[:after] = new_value
|
215
|
+
elsif new_value.nil? || verbose
|
216
|
+
changes[attr.to_sym] = {
|
217
|
+
before: masked ? :masked : value,
|
218
|
+
after: new_value
|
219
|
+
}
|
239
220
|
end
|
240
221
|
end
|
241
222
|
end
|
242
223
|
changes
|
243
224
|
end
|
244
|
-
|
245
225
|
end
|
246
226
|
|
247
227
|
def self.included(base)
|
248
228
|
base.class_eval do
|
249
|
-
extend
|
229
|
+
extend DataCleansing::Cleanse::ClassMethods
|
250
230
|
include DataCleansing::Cleanse::InstanceMethods
|
251
231
|
end
|
252
232
|
end
|
253
233
|
end
|
254
|
-
|
255
234
|
end
|
@@ -9,6 +9,7 @@ module DataCleansing
|
|
9
9
|
# Replaces any existing cleaner with the same name
|
10
10
|
def self.register_cleaner(name, cleaner = nil, &block)
|
11
11
|
raise "Must supply a Proc with the cleaner" unless block || cleaner
|
12
|
+
|
12
13
|
@@global_cleaners[name.to_sym] = cleaner || block
|
13
14
|
end
|
14
15
|
|
@@ -19,7 +20,7 @@ module DataCleansing
|
|
19
20
|
|
20
21
|
# Register Attributes to be masked out in any log output
|
21
22
|
def self.register_masked_attributes(*attributes)
|
22
|
-
attributes.each {|attr| @@masked_attributes << attr.to_sym }
|
23
|
+
attributes.each { |attr| @@masked_attributes << attr.to_sym }
|
23
24
|
end
|
24
25
|
|
25
26
|
# Returns the Global list of attributes to mask in any log output
|
@@ -27,4 +28,21 @@ module DataCleansing
|
|
27
28
|
@@masked_attributes.freeze
|
28
29
|
end
|
29
30
|
|
31
|
+
# Run the specified cleanser against the supplied value
|
32
|
+
def self.clean(name, value, params = nil, binding = nil)
|
33
|
+
# Cleaner itself could be a custom Proc, otherwise do a global lookup for it
|
34
|
+
proc = name.is_a?(Proc) ? name : DataCleansing.cleaner(name.to_sym)
|
35
|
+
raise(ArgumentError, "No cleaner defined for #{name.inspect}") unless proc
|
36
|
+
|
37
|
+
if proc.is_a?(Proc)
|
38
|
+
if binding
|
39
|
+
# Call the cleaner proc within the scope (binding) of the binding
|
40
|
+
proc.arity == 1 ? binding.instance_exec(value, &proc) : binding.instance_exec(value, params, &proc)
|
41
|
+
else
|
42
|
+
proc.arity == 1 ? proc.call(value) : proc.call(value, params)
|
43
|
+
end
|
44
|
+
else
|
45
|
+
(proc.method(:call).arity == 1 ? proc.call(value) : proc.call(value, params))
|
46
|
+
end
|
47
|
+
end
|
30
48
|
end
|
data/lib/data_cleansing.rb
CHANGED
@@ -1,13 +1,11 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
1
|
+
require "concurrent"
|
2
|
+
require "semantic_logger"
|
3
|
+
require "data_cleansing/version"
|
4
|
+
require "data_cleansing/data_cleansing"
|
5
5
|
|
6
6
|
module DataCleansing
|
7
|
-
autoload :Cleanse,
|
7
|
+
autoload :Cleanse, "data_cleansing/cleanse"
|
8
8
|
end
|
9
9
|
|
10
10
|
# Rails Extensions
|
11
|
-
if defined?(Rails)
|
12
|
-
require 'data_cleansing/railtie'
|
13
|
-
end
|
11
|
+
require "data_cleansing/railtie" if defined?(Rails)
|