data_cleansing 0.9.0 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +3 -25
- data/Rakefile +18 -14
- data/lib/data_cleansing/cleaners.rb +35 -21
- data/lib/data_cleansing/cleanse.rb +52 -73
- data/lib/data_cleansing/data_cleansing.rb +19 -1
- data/lib/data_cleansing/railtie.rb +0 -1
- data/lib/data_cleansing/version.rb +1 -1
- data/lib/data_cleansing.rb +6 -8
- data/test/active_record_test.rb +56 -53
- data/test/cleaners_test.rb +84 -73
- data/test/data_cleansing_test.rb +9 -0
- data/test/ruby_test.rb +48 -49
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +6 -10
- metadata +11 -11
data/test/active_record_test.rb
CHANGED
@@ -1,25 +1,26 @@
|
|
1
|
-
require_relative
|
2
|
-
require
|
1
|
+
require_relative "test_helper"
|
2
|
+
require "active_record"
|
3
3
|
|
4
4
|
ActiveRecord::Base.logger = SemanticLogger[ActiveRecord::Base]
|
5
5
|
ActiveRecord::Base.configurations = {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
"test" => {
|
7
|
+
"adapter" => "sqlite3",
|
8
|
+
"database" => "test/test_db.sqlite3",
|
9
|
+
"pool" => 5,
|
10
|
+
"timeout" => 5000
|
11
11
|
}
|
12
12
|
}
|
13
|
-
ActiveRecord::Base.establish_connection(
|
13
|
+
ActiveRecord::Base.establish_connection(:test)
|
14
14
|
|
15
|
-
ActiveRecord::Schema.define :
|
16
|
-
create_table :users, :
|
15
|
+
ActiveRecord::Schema.define version: 0 do
|
16
|
+
create_table :users, force: true do |t|
|
17
17
|
t.string :first_name
|
18
18
|
t.string :last_name
|
19
19
|
t.string :address1
|
20
20
|
t.string :address2
|
21
21
|
t.string :ssn
|
22
22
|
t.integer :zip_code
|
23
|
+
t.text :text
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
@@ -37,13 +38,13 @@ class User < ActiveRecord::Base
|
|
37
38
|
attr_accessor :instance_var
|
38
39
|
|
39
40
|
# Use a global cleaner
|
40
|
-
cleanse :first_name, :last_name, :
|
41
|
+
cleanse :first_name, :last_name, cleaner: :strip
|
41
42
|
|
42
43
|
# Define a once off cleaner
|
43
|
-
cleanse :address1, :address2, :instance_var, :
|
44
|
+
cleanse :address1, :address2, :instance_var, cleaner: proc { |string| "<< #{string.strip} >>" }
|
44
45
|
|
45
46
|
# Custom Zip Code cleaner
|
46
|
-
cleanse :zip_code, :
|
47
|
+
cleanse :zip_code, cleaner: :string_to_integer
|
47
48
|
|
48
49
|
# Automatically cleanse data before validation
|
49
50
|
before_validation :cleanse_attributes!
|
@@ -52,19 +53,22 @@ end
|
|
52
53
|
class User2 < ActiveRecord::Base
|
53
54
|
include DataCleansing::Cleanse
|
54
55
|
# Use the same table as User above
|
55
|
-
self.table_name =
|
56
|
+
self.table_name = "users"
|
57
|
+
|
58
|
+
serialize :text
|
56
59
|
|
57
60
|
# Test :all cleaner. Only works with ActiveRecord Models
|
58
|
-
|
61
|
+
# Must explicitly excelude :text since it is serialized
|
62
|
+
cleanse :all, cleaner: [:strip, proc { |s| "@#{s}@" }], except: %i[address1 zip_code text]
|
59
63
|
|
60
64
|
# Clean :first_name multiple times
|
61
|
-
cleanse :first_name, :
|
65
|
+
cleanse :first_name, cleaner: proc { |string| "<< #{string} >>" }
|
62
66
|
|
63
67
|
# Clean :first_name multiple times
|
64
|
-
cleanse :first_name, :
|
68
|
+
cleanse :first_name, cleaner: proc { |string| "$#{string}$" }
|
65
69
|
|
66
70
|
# Custom Zip Code cleaner
|
67
|
-
cleanse :zip_code, :
|
71
|
+
cleanse :zip_code, cleaner: :string_to_integer
|
68
72
|
|
69
73
|
# Automatically cleanse data before validation
|
70
74
|
before_validation :cleanse_attributes!
|
@@ -72,82 +76,81 @@ end
|
|
72
76
|
|
73
77
|
class ActiveRecordTest < Minitest::Test
|
74
78
|
describe "ActiveRecord Models" do
|
75
|
-
|
76
|
-
it 'have globally registered cleaner' do
|
79
|
+
it "have globally registered cleaner" do
|
77
80
|
assert DataCleansing.cleaner(:strip)
|
78
81
|
end
|
79
82
|
|
80
|
-
it
|
81
|
-
assert_equal
|
82
|
-
assert_equal
|
83
|
-
assert_equal
|
84
|
-
assert_equal
|
85
|
-
assert_equal
|
83
|
+
it "Model.cleanse_attribute" do
|
84
|
+
assert_equal "joe", User.cleanse_attribute(:first_name, " joe ")
|
85
|
+
assert_equal "black", User.cleanse_attribute(:last_name, "\n black\n")
|
86
|
+
assert_equal "<< 2632 Brown St >>", User.cleanse_attribute(:address1, "2632 Brown St \n")
|
87
|
+
assert_equal "<< instance >>", User.cleanse_attribute(:instance_var, "\n instance\n\t ")
|
88
|
+
assert_equal 12_345, User.cleanse_attribute(:zip_code, "\n\tblah 12345badtext\n")
|
86
89
|
end
|
87
90
|
|
88
91
|
describe "with user" do
|
89
92
|
before do
|
90
93
|
@user = User.new(
|
91
|
-
:
|
92
|
-
:
|
93
|
-
:
|
94
|
-
:
|
95
|
-
:
|
94
|
+
first_name: " joe ",
|
95
|
+
last_name: "\n black\n",
|
96
|
+
address1: "2632 Brown St \n",
|
97
|
+
zip_code: "\n\tblah 12345badtext\n",
|
98
|
+
instance_var: "\n instance\n\t "
|
96
99
|
)
|
97
100
|
end
|
98
101
|
|
99
|
-
it
|
102
|
+
it "only have 3 cleaners" do
|
100
103
|
assert_equal 3, User.send(:data_cleansing_cleaners).size, User.send(:data_cleansing_cleaners)
|
101
104
|
end
|
102
105
|
|
103
|
-
it
|
106
|
+
it "cleanse_attributes! using global cleaner" do
|
104
107
|
assert_equal true, @user.valid?
|
105
|
-
assert_equal
|
106
|
-
assert_equal
|
108
|
+
assert_equal "joe", @user.first_name
|
109
|
+
assert_equal "black", @user.last_name
|
107
110
|
end
|
108
111
|
|
109
|
-
it
|
112
|
+
it "cleanse_attributes! using attribute specific custom cleaner" do
|
110
113
|
assert_equal true, @user.valid?
|
111
|
-
assert_equal
|
112
|
-
assert_equal
|
114
|
+
assert_equal "<< 2632 Brown St >>", @user.address1
|
115
|
+
assert_equal "<< instance >>", @user.instance_var
|
113
116
|
end
|
114
117
|
|
115
|
-
it
|
118
|
+
it "cleanse_attributes! using global cleaner using rails extensions" do
|
116
119
|
@user.cleanse_attributes!
|
117
|
-
assert_equal
|
120
|
+
assert_equal 12_345, @user.zip_code
|
118
121
|
end
|
119
122
|
end
|
120
123
|
|
121
124
|
describe "with user2" do
|
122
125
|
before do
|
123
126
|
@user = User2.new(
|
124
|
-
:
|
125
|
-
:
|
126
|
-
:
|
127
|
-
:
|
128
|
-
:
|
127
|
+
first_name: " joe ",
|
128
|
+
last_name: "\n black\n",
|
129
|
+
ssn: "\n 123456789 \n ",
|
130
|
+
address1: "2632 Brown St \n",
|
131
|
+
zip_code: "\n\t blah\n",
|
132
|
+
text: ["\n 123456789 \n ", " second "]
|
129
133
|
)
|
130
134
|
end
|
131
135
|
|
132
|
-
it
|
136
|
+
it "have 4 cleaners defined" do
|
133
137
|
assert_equal 4, User2.send(:data_cleansing_cleaners).size, User2.send(:data_cleansing_cleaners)
|
134
138
|
end
|
135
139
|
|
136
|
-
it
|
140
|
+
it "have 3 attributes cleaners defined" do
|
137
141
|
# :all, :first_name, :zip_code
|
138
142
|
assert_equal 3, User2.send(:data_cleansing_attribute_cleaners).size, User2.send(:data_cleansing_attribute_cleaners)
|
139
143
|
end
|
140
144
|
|
141
|
-
it
|
145
|
+
it "cleanse_attributes! clean all attributes" do
|
142
146
|
assert_equal true, @user.valid?
|
143
|
-
assert_equal
|
144
|
-
assert_equal
|
147
|
+
assert_equal "$<< @joe@ >>$", @user.first_name, User2.send(:data_cleansing_cleaners)
|
148
|
+
assert_equal "@black@", @user.last_name
|
145
149
|
assert_equal "2632 Brown St \n", @user.address1
|
146
150
|
assert_equal "@123456789@", @user.ssn
|
147
|
-
|
151
|
+
assert_nil @user.zip_code, User2.send(:data_cleansing_cleaners)
|
152
|
+
assert_equal ["\n 123456789 \n ", " second "], @user.text
|
148
153
|
end
|
149
|
-
|
150
154
|
end
|
151
|
-
|
152
155
|
end
|
153
156
|
end
|
data/test/cleaners_test.rb
CHANGED
@@ -1,14 +1,15 @@
|
|
1
|
-
require_relative
|
2
|
-
require
|
1
|
+
require_relative "test_helper"
|
2
|
+
require "active_support/core_ext/time/calculations"
|
3
3
|
|
4
4
|
class CleanersTest < Minitest::Test
|
5
5
|
class User
|
6
6
|
include DataCleansing::Cleanse
|
7
7
|
|
8
8
|
attr_accessor :first_name, :last_name, :address1, :address2,
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
:make_this_upper, :clean_non_word, :clean_non_printable,
|
10
|
+
:clean_html, :clean_from_uri, :clean_to_uri, :clean_whitespace,
|
11
|
+
:clean_digits_only, :clean_to_integer, :clean_to_float, :clean_end_of_day,
|
12
|
+
:clean_order
|
12
13
|
|
13
14
|
cleanse :first_name, :last_name, :address1, :address2, cleaner: :strip
|
14
15
|
cleanse :make_this_upper, cleaner: :upcase
|
@@ -22,188 +23,198 @@ class CleanersTest < Minitest::Test
|
|
22
23
|
cleanse :clean_to_integer, cleaner: :string_to_integer
|
23
24
|
cleanse :clean_to_float, cleaner: :string_to_float
|
24
25
|
cleanse :clean_end_of_day, cleaner: :end_of_day
|
26
|
+
|
27
|
+
# Call cleaners in the order they are defined
|
28
|
+
cleanse :clean_order, cleaner: %i[upcase strip]
|
29
|
+
cleanse :clean_order, cleaner: ->(val) { val == "BLAH" ? " yes " : " no " }
|
25
30
|
end
|
26
31
|
|
27
|
-
describe
|
28
|
-
it
|
32
|
+
describe "Cleaners" do
|
33
|
+
it "#strip" do
|
29
34
|
user = User.new
|
30
|
-
user.first_name =
|
35
|
+
user.first_name = " jack black "
|
31
36
|
user.last_name = " \n \t joe"
|
32
37
|
user.address1 = "joe \n\n \n \t\t "
|
33
38
|
user.address2 = "joe \n\n bloggs \n \t\t "
|
34
39
|
user.cleanse_attributes!
|
35
|
-
assert_equal
|
36
|
-
assert_equal
|
37
|
-
assert_equal
|
40
|
+
assert_equal "jack black", user.first_name
|
41
|
+
assert_equal "joe", user.last_name
|
42
|
+
assert_equal "joe", user.address1
|
38
43
|
assert_equal "joe \n\n bloggs", user.address2
|
39
44
|
end
|
40
45
|
|
41
|
-
it
|
46
|
+
it "#upcase" do
|
42
47
|
user = User.new
|
43
|
-
user.make_this_upper =
|
48
|
+
user.make_this_upper = " jacK blAck "
|
44
49
|
user.cleanse_attributes!
|
45
|
-
assert_equal
|
50
|
+
assert_equal " JACK BLACK ", user.make_this_upper
|
46
51
|
end
|
47
52
|
|
48
|
-
it
|
53
|
+
it "#remove_non_word" do
|
49
54
|
user = User.new
|
50
55
|
user.clean_non_word = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\ "
|
51
56
|
user.cleanse_attributes!
|
52
|
-
assert_equal
|
57
|
+
assert_equal "jacKblAck", user.clean_non_word
|
53
58
|
end
|
54
59
|
|
55
|
-
it
|
60
|
+
it "#remove_non_printable" do
|
56
61
|
user = User.new
|
57
|
-
user.clean_non_printable = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\
|
62
|
+
user.clean_non_printable = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\ \x89 "
|
58
63
|
user.cleanse_attributes!
|
59
64
|
assert_equal " !@#$%^&*()+=-~` jacK blAck <>.,/\"':;{][]\|?/\\ ", user.clean_non_printable
|
60
65
|
end
|
61
66
|
|
62
|
-
describe
|
63
|
-
it
|
67
|
+
describe "#clean_html" do
|
68
|
+
it "cleans "" do
|
64
69
|
user = User.new
|
65
|
-
user.clean_html =
|
70
|
+
user.clean_html = "O"Leary"
|
66
71
|
user.cleanse_attributes!
|
67
72
|
assert_equal 'O"Leary', user.clean_html
|
68
73
|
end
|
69
74
|
|
70
|
-
it
|
75
|
+
it "cleans &" do
|
71
76
|
user = User.new
|
72
|
-
user.clean_html =
|
77
|
+
user.clean_html = "Jim & Candi"
|
73
78
|
user.cleanse_attributes!
|
74
|
-
assert_equal
|
79
|
+
assert_equal "Jim & Candi", user.clean_html
|
75
80
|
end
|
76
81
|
|
77
|
-
it
|
82
|
+
it "cleans >" do
|
78
83
|
user = User.new
|
79
|
-
user.clean_html =
|
84
|
+
user.clean_html = "2 > 1"
|
80
85
|
user.cleanse_attributes!
|
81
|
-
assert_equal
|
86
|
+
assert_equal "2 > 1", user.clean_html
|
82
87
|
end
|
83
88
|
|
84
|
-
it
|
89
|
+
it "cleans <" do
|
85
90
|
user = User.new
|
86
|
-
user.clean_html =
|
91
|
+
user.clean_html = "1 < 2"
|
87
92
|
user.cleanse_attributes!
|
88
|
-
assert_equal
|
93
|
+
assert_equal "1 < 2", user.clean_html
|
89
94
|
end
|
90
95
|
|
91
|
-
it
|
96
|
+
it "cleans '" do
|
92
97
|
user = User.new
|
93
|
-
user.clean_html =
|
98
|
+
user.clean_html = "1'2"
|
94
99
|
user.cleanse_attributes!
|
95
100
|
assert_equal "1'2", user.clean_html
|
96
101
|
end
|
97
102
|
|
98
|
-
it
|
103
|
+
it "cleans " do
|
99
104
|
user = User.new
|
100
|
-
user.clean_html =
|
105
|
+
user.clean_html = "1 2"
|
101
106
|
user.cleanse_attributes!
|
102
107
|
assert_equal "1 2", user.clean_html
|
103
108
|
end
|
104
109
|
|
105
|
-
it
|
110
|
+
it "cleans &" do
|
106
111
|
user = User.new
|
107
|
-
user.clean_html =
|
112
|
+
user.clean_html = "Mutt & Jeff Inc."
|
108
113
|
user.cleanse_attributes!
|
109
|
-
assert_equal
|
114
|
+
assert_equal "Mutt & Jeff Inc.", user.clean_html
|
110
115
|
end
|
111
116
|
|
112
|
-
it
|
117
|
+
it "does not clean &;" do
|
113
118
|
user = User.new
|
114
|
-
user.clean_html =
|
119
|
+
user.clean_html = "Mutt &; Jeff Inc."
|
115
120
|
user.cleanse_attributes!
|
116
|
-
assert_equal
|
121
|
+
assert_equal "Mutt &; Jeff Inc.", user.clean_html
|
117
122
|
end
|
118
123
|
|
119
|
-
it
|
124
|
+
it "does not clean &blah;" do
|
120
125
|
user = User.new
|
121
|
-
user.clean_html =
|
126
|
+
user.clean_html = "1&blah;2"
|
122
127
|
user.cleanse_attributes!
|
123
|
-
assert_equal
|
128
|
+
assert_equal "1&blah;2", user.clean_html
|
124
129
|
end
|
125
130
|
end
|
126
131
|
|
127
|
-
describe
|
128
|
-
it
|
132
|
+
describe "#unescape_uri" do
|
133
|
+
it "converts %20" do
|
129
134
|
user = User.new
|
130
|
-
user.clean_from_uri =
|
135
|
+
user.clean_from_uri = "Jim%20%20Bob%20"
|
131
136
|
user.cleanse_attributes!
|
132
|
-
assert_equal
|
137
|
+
assert_equal "Jim Bob ", user.clean_from_uri
|
133
138
|
end
|
134
|
-
it
|
139
|
+
it "converts %20 only" do
|
135
140
|
user = User.new
|
136
|
-
user.clean_from_uri =
|
141
|
+
user.clean_from_uri = "%20"
|
137
142
|
user.cleanse_attributes!
|
138
|
-
assert_equal
|
143
|
+
assert_equal " ", user.clean_from_uri
|
139
144
|
end
|
140
145
|
end
|
141
146
|
|
142
|
-
describe
|
143
|
-
it
|
147
|
+
describe "#escape_uri" do
|
148
|
+
it "converts spaces" do
|
144
149
|
user = User.new
|
145
|
-
user.clean_to_uri =
|
150
|
+
user.clean_to_uri = "Jim Bob "
|
146
151
|
user.cleanse_attributes!
|
147
|
-
assert_equal
|
152
|
+
assert_equal "Jim++Bob+", user.clean_to_uri
|
148
153
|
end
|
149
|
-
it
|
154
|
+
it "converts space only" do
|
150
155
|
user = User.new
|
151
|
-
user.clean_to_uri =
|
156
|
+
user.clean_to_uri = " "
|
152
157
|
user.cleanse_attributes!
|
153
|
-
assert_equal
|
158
|
+
assert_equal "+", user.clean_to_uri
|
154
159
|
end
|
155
160
|
end
|
156
161
|
|
157
|
-
describe
|
158
|
-
it
|
162
|
+
describe "#compress_whitespace" do
|
163
|
+
it "compresses multiple spaces" do
|
159
164
|
user = User.new
|
160
|
-
user.clean_whitespace =
|
165
|
+
user.clean_whitespace = " J im B ob "
|
161
166
|
user.cleanse_attributes!
|
162
|
-
assert_equal
|
167
|
+
assert_equal " J im B ob ", user.clean_whitespace
|
163
168
|
end
|
164
169
|
|
165
|
-
it
|
170
|
+
it "does not compress single spaces" do
|
166
171
|
user = User.new
|
167
|
-
user.clean_whitespace =
|
172
|
+
user.clean_whitespace = " Jack Black"
|
168
173
|
user.cleanse_attributes!
|
169
|
-
assert_equal
|
174
|
+
assert_equal " Jack Black", user.clean_whitespace
|
170
175
|
end
|
171
176
|
|
172
|
-
it
|
177
|
+
it "compresses newlines and tabs" do
|
173
178
|
user = User.new
|
174
179
|
user.clean_whitespace = " \n\n J im B ob \t\n\t "
|
175
180
|
user.cleanse_attributes!
|
176
|
-
assert_equal
|
181
|
+
assert_equal " J im B ob ", user.clean_whitespace
|
177
182
|
end
|
178
183
|
end
|
179
184
|
|
180
|
-
it
|
185
|
+
it "#digits_only" do
|
181
186
|
user = User.new
|
182
187
|
user.clean_digits_only = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
|
183
188
|
user.cleanse_attributes!
|
184
|
-
assert_equal
|
189
|
+
assert_equal "13689", user.clean_digits_only
|
185
190
|
end
|
186
191
|
|
187
|
-
it
|
192
|
+
it "#string_to_integer" do
|
188
193
|
user = User.new
|
189
194
|
user.clean_to_integer = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
|
190
195
|
user.cleanse_attributes!
|
191
196
|
assert_equal 136, user.clean_to_integer
|
192
197
|
end
|
193
198
|
|
194
|
-
it
|
199
|
+
it "#string_to_float" do
|
195
200
|
user = User.new
|
196
201
|
user.clean_to_float = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
|
197
202
|
user.cleanse_attributes!
|
198
203
|
assert_equal 136.89, user.clean_to_float
|
199
204
|
end
|
200
205
|
|
201
|
-
it
|
206
|
+
it "#date_to_time_at_end_of_day" do
|
202
207
|
user = User.new
|
203
|
-
user.clean_end_of_day = Time.parse(
|
208
|
+
user.clean_end_of_day = Time.parse("2016-03-03 14:33:44 +0000")
|
204
209
|
user.cleanse_attributes!
|
205
|
-
assert_equal Time.parse(
|
210
|
+
assert_equal Time.parse("2016-03-03 23:59:59 +0000").to_i, user.clean_end_of_day.to_i
|
206
211
|
end
|
207
212
|
|
213
|
+
it "cleans in the order defined" do
|
214
|
+
user = User.new
|
215
|
+
user.clean_order = " blah "
|
216
|
+
user.cleanse_attributes!
|
217
|
+
assert_equal " yes ", user.clean_order
|
218
|
+
end
|
208
219
|
end
|
209
220
|
end
|