data_cleansing 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -7
- data/lib/data_cleansing/cleanse.rb +29 -0
- data/lib/data_cleansing/railtie.rb +5 -9
- data/lib/data_cleansing/version.rb +1 -1
- data/test/active_record_test.rb +2 -2
- data/test/ruby_test.rb +24 -4
- data/test/test_db.sqlite3 +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d330ec38e5e452e5a6278a9086d2fbd010240d12
|
4
|
+
data.tar.gz: 38dcd89391ba829b8d5b0d31fbf8f5b9c12ac6d0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7a6e0ff6067d55da2910f494a7f1f3b0688460e794daa9cf7b5e0fdd8e1233df0251573404af4178b4e6d1559cb61960a252a8fad4155a4d773e41090f34867
|
7
|
+
data.tar.gz: 29eb21b9b89ea762e405d80aefe947dc3c2066e563f3f07f2e491bee757cec5cdba5fa239741be8256ba701405278143c3811f61dd973503a0c7470bb547c0e5
|
data/README.md
CHANGED
@@ -52,7 +52,7 @@ or pull request.
|
|
52
52
|
require 'data_cleansing'
|
53
53
|
|
54
54
|
# Define a global cleaner
|
55
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip
|
55
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
56
56
|
|
57
57
|
class User
|
58
58
|
include DataCleansing::Cleanse
|
@@ -78,7 +78,7 @@ puts "After data cleansing #{u.inspect}"
|
|
78
78
|
|
79
79
|
```ruby
|
80
80
|
# Define a global cleanser
|
81
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip
|
81
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
82
82
|
|
83
83
|
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
84
84
|
class User < ActiveRecord::Base
|
@@ -88,7 +88,7 @@ class User < ActiveRecord::Base
|
|
88
88
|
cleanse :first_name, :last_name, :cleaner => :strip
|
89
89
|
|
90
90
|
# Define a once off cleaner
|
91
|
-
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip
|
91
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip}
|
92
92
|
|
93
93
|
# Automatically cleanse data before validation
|
94
94
|
before_validation :cleanse_attributes!
|
@@ -108,8 +108,8 @@ u.save!
|
|
108
108
|
require 'data_cleansing'
|
109
109
|
|
110
110
|
# Define a global cleaners
|
111
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip
|
112
|
-
DataCleansing.register_cleaner(:upcase) {|string| string.upcase
|
111
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
112
|
+
DataCleansing.register_cleaner(:upcase) {|string| string.upcase}
|
113
113
|
|
114
114
|
class User
|
115
115
|
include DataCleansing::Cleanse
|
@@ -120,7 +120,7 @@ class User
|
|
120
120
|
cleanse :first_name, :last_name, :cleaner => :strip
|
121
121
|
|
122
122
|
# Define a once off cleaner
|
123
|
-
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip
|
123
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip}
|
124
124
|
|
125
125
|
# Use multiple cleaners, and a custom block
|
126
126
|
cleanse :title, :cleaner => [:strip, :upcase, Proc.new {|string| "#{string}." unless string.end_with?('.')}]
|
@@ -175,7 +175,7 @@ module MyApplication
|
|
175
175
|
config.data_cleansing.logger.level = :info
|
176
176
|
|
177
177
|
# Register any global cleaners
|
178
|
-
config.data_cleansing.register_cleaner(:strip) {|string| string.strip
|
178
|
+
config.data_cleansing.register_cleaner(:strip) {|string| string.strip}
|
179
179
|
|
180
180
|
end
|
181
181
|
end
|
@@ -11,14 +11,32 @@ module DataCleansing
|
|
11
11
|
params = (last.is_a?(Hash) && last.instance_of?(Hash)) ? attributes.pop.dup : {}
|
12
12
|
cleaners = Array(params.delete(:cleaner))
|
13
13
|
raise(ArgumentError, "Mandatory :cleaner parameter is missing: #{params.inspect}") unless cleaners
|
14
|
+
|
14
15
|
cleaner = DataCleansingCleaner.new(cleaners, attributes, params)
|
15
16
|
data_cleansing_cleaners << cleaner
|
17
|
+
|
18
|
+
# Create shortcuts to cleaners for each attribute for use by .cleanse_attribute
|
16
19
|
attributes.each do |attr|
|
17
20
|
(data_cleansing_attribute_cleaners[attr] ||= ThreadSafe::Array.new) << cleaner
|
18
21
|
end
|
19
22
|
cleaner
|
20
23
|
end
|
21
24
|
|
25
|
+
# Add one or more methods on this object to be called after cleansing is complete
|
26
|
+
# on an object
|
27
|
+
# After cleansers are executed when #cleanse_attributes! is called, but after
|
28
|
+
# all other defined cleansers have been executed.
|
29
|
+
# They are _not_ called when .cleanse_attribute is called
|
30
|
+
#
|
31
|
+
# After cleaners should be used when based on the value of one attribute,
|
32
|
+
# one or more of the other attributes need to be modified
|
33
|
+
def after_cleanse(*methods)
|
34
|
+
methods.each do |m|
|
35
|
+
raise "Method #{m.inspect} must be a symbol" unless m.is_a?(Symbol)
|
36
|
+
data_cleansing_after_cleaners << m unless data_cleansing_after_cleaners.include?(m)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
22
40
|
# Returns the value cleansed using the cleaners defined for that attribute
|
23
41
|
# in this model and any of it's parents
|
24
42
|
#
|
@@ -57,6 +75,11 @@ module DataCleansing
|
|
57
75
|
@data_cleansing_cleaners ||= ThreadSafe::Array.new
|
58
76
|
end
|
59
77
|
|
78
|
+
# Array of cleaners to execute against this model and it's children
|
79
|
+
def data_cleansing_after_cleaners
|
80
|
+
@data_cleansing_after_cleaners ||= ThreadSafe::Array.new
|
81
|
+
end
|
82
|
+
|
60
83
|
# Hash of attributes to clean with their corresponding cleaner
|
61
84
|
def data_cleansing_attribute_cleaners
|
62
85
|
@data_cleansing_attribute_cleaners ||= ThreadSafe::Hash.new
|
@@ -96,15 +119,21 @@ module DataCleansing
|
|
96
119
|
|
97
120
|
module InstanceMethods
|
98
121
|
# Cleanse the attributes using specified cleaners
|
122
|
+
# and execute after cleaners once complete
|
99
123
|
def cleanse_attributes!
|
100
124
|
# Collect parent cleaners first, starting with the top parent
|
101
125
|
cleaners = [self.class.send(:data_cleansing_cleaners)]
|
126
|
+
after_cleaners = [self.class.send(:data_cleansing_after_cleaners)]
|
102
127
|
klass = self.class.superclass
|
103
128
|
while klass != Object
|
104
129
|
cleaners << klass.send(:data_cleansing_cleaners) if klass.respond_to?(:data_cleansing_cleaners)
|
130
|
+
after_cleaners << klass.send(:data_cleansing_after_cleaners) if klass.respond_to?(:data_cleansing_after_cleaners)
|
105
131
|
klass = klass.superclass
|
106
132
|
end
|
107
133
|
cleaners.reverse_each {|cleaner| data_cleansing_execute_cleaners(cleaner)}
|
134
|
+
|
135
|
+
# Execute the after cleaners, starting with the parent after cleanse methods
|
136
|
+
after_cleaners.reverse_each {|a| a.each {|method| send(method)} }
|
108
137
|
true
|
109
138
|
end
|
110
139
|
|
@@ -4,15 +4,11 @@ module RubySkynet #:nodoc:
|
|
4
4
|
# Exposes DataCleansing configuration to the Rails application configuration.
|
5
5
|
#
|
6
6
|
# @example Set up configuration in the Rails app.
|
7
|
-
#
|
8
|
-
#
|
7
|
+
# module MyApplication
|
8
|
+
# class Application < Rails::Application
|
9
9
|
#
|
10
10
|
# # Data Cleansing Configuration
|
11
11
|
#
|
12
|
-
# # By default logging is enabled of data cleansing actions
|
13
|
-
# # Set to false to disable
|
14
|
-
# config.data_cleansing.logging_enabled = true
|
15
|
-
#
|
16
12
|
# # Attributes who's values are to be masked out during logging
|
17
13
|
# config.data_cleansing.register_masked_attributes :bank_account_number, :social_security_number
|
18
14
|
#
|
@@ -23,10 +19,10 @@ module RubySkynet #:nodoc:
|
|
23
19
|
# config.data_cleansing.logger.level = :info
|
24
20
|
#
|
25
21
|
# # Register any global cleaners
|
26
|
-
# config.data_cleansing.register_cleaner(:strip) {|string| string.strip
|
22
|
+
# config.data_cleansing.register_cleaner(:strip) {|string| string.strip}
|
27
23
|
#
|
28
|
-
#
|
29
|
-
#
|
24
|
+
# end
|
25
|
+
# end
|
30
26
|
config.data_cleansing = ::DataCleansing
|
31
27
|
end
|
32
28
|
end
|
data/test/active_record_test.rb
CHANGED
@@ -37,7 +37,7 @@ ActiveRecord::Schema.define :version => 0 do
|
|
37
37
|
end
|
38
38
|
|
39
39
|
# Define a global cleaner
|
40
|
-
DataCleansing.register_cleaner(:strip) {|string
|
40
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
41
41
|
|
42
42
|
# Log data cleansing result
|
43
43
|
# Set to :warn or higher to disable
|
@@ -70,7 +70,7 @@ class User < ActiveRecord::Base
|
|
70
70
|
cleanse :first_name, :last_name, :cleaner => :strip
|
71
71
|
|
72
72
|
# Define a once off cleaner
|
73
|
-
cleanse :address1, :address2, :instance_var, :cleaner => Proc.new {|string| "<< #{string.strip
|
73
|
+
cleanse :address1, :address2, :instance_var, :cleaner => Proc.new {|string| "<< #{string.strip} >>"}
|
74
74
|
|
75
75
|
# Custom Zip Code cleaner
|
76
76
|
cleanse :zip_code, :cleaner => :digits_to_integer
|
data/test/ruby_test.rb
CHANGED
@@ -7,7 +7,7 @@ require 'shoulda'
|
|
7
7
|
require 'data_cleansing'
|
8
8
|
|
9
9
|
# Define a global cleanser
|
10
|
-
DataCleansing.register_cleaner(:strip) {|string
|
10
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
11
11
|
|
12
12
|
# Non Cleansing base class
|
13
13
|
class RubyUserBase
|
@@ -23,7 +23,19 @@ class RubyUser < RubyUserBase
|
|
23
23
|
cleanse :first_name, :last_name, :cleaner => :strip
|
24
24
|
|
25
25
|
# Define a once off cleaner
|
26
|
-
cleanse :address1, :address2, :cleaner => Proc.new {|string| "<< #{string.strip
|
26
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| "<< #{string.strip} >>"}
|
27
|
+
|
28
|
+
# Execute after cleanser
|
29
|
+
after_cleanse :name_check
|
30
|
+
|
31
|
+
# Called once cleaning has been completed
|
32
|
+
def name_check
|
33
|
+
# If first_name has a value, but last_name does not
|
34
|
+
if last_name.nil? || (last_name.length == 0)
|
35
|
+
self.last_name = first_name
|
36
|
+
self.first_name = nil
|
37
|
+
end
|
38
|
+
end
|
27
39
|
end
|
28
40
|
|
29
41
|
class RubyUserChild < RubyUser
|
@@ -32,7 +44,7 @@ class RubyUserChild < RubyUser
|
|
32
44
|
end
|
33
45
|
|
34
46
|
# Another global cleaner, used by RubyUser2
|
35
|
-
DataCleansing.register_cleaner(:upcase) {|string| string.upcase
|
47
|
+
DataCleansing.register_cleaner(:upcase) {|string| string.upcase}
|
36
48
|
|
37
49
|
class RubyUser2
|
38
50
|
include DataCleansing::Cleanse
|
@@ -43,7 +55,7 @@ class RubyUser2
|
|
43
55
|
cleanse :first_name, :last_name, :cleaner => :strip
|
44
56
|
|
45
57
|
# Define a once off cleaner
|
46
|
-
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip
|
58
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip}
|
47
59
|
|
48
60
|
# Use multiple cleaners, and a custom block
|
49
61
|
cleanse :title, :cleaner => [:strip, :upcase, Proc.new {|string| "#{string}." unless string.end_with?('.')}]
|
@@ -101,6 +113,14 @@ class RubyTest < Test::Unit::TestCase
|
|
101
113
|
@user.cleanse_attributes!
|
102
114
|
assert_equal nil, @user.first_name
|
103
115
|
end
|
116
|
+
|
117
|
+
should 'cleanse_attributes! call after cleaner' do
|
118
|
+
@user.first_name = 'Jack'
|
119
|
+
@user.last_name = nil
|
120
|
+
@user.cleanse_attributes!
|
121
|
+
assert_equal nil, @user.first_name, @user.inspect
|
122
|
+
assert_equal 'Jack', @user.last_name, @user.inspect
|
123
|
+
end
|
104
124
|
end
|
105
125
|
|
106
126
|
context "with ruby user child" do
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|