data_cleansing 0.9.0 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +3 -25
- data/Rakefile +18 -14
- data/lib/data_cleansing/cleaners.rb +35 -21
- data/lib/data_cleansing/cleanse.rb +52 -73
- data/lib/data_cleansing/data_cleansing.rb +19 -1
- data/lib/data_cleansing/railtie.rb +0 -1
- data/lib/data_cleansing/version.rb +1 -1
- data/lib/data_cleansing.rb +6 -8
- data/test/active_record_test.rb +56 -53
- data/test/cleaners_test.rb +84 -73
- data/test/data_cleansing_test.rb +9 -0
- data/test/ruby_test.rb +48 -49
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +6 -10
- metadata +11 -11
data/test/active_record_test.rb
CHANGED
@@ -1,25 +1,26 @@
|
|
1
|
-
require_relative
|
2
|
-
require
|
1
|
+
require_relative "test_helper"
|
2
|
+
require "active_record"
|
3
3
|
|
4
4
|
ActiveRecord::Base.logger = SemanticLogger[ActiveRecord::Base]
|
5
5
|
ActiveRecord::Base.configurations = {
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
"test" => {
|
7
|
+
"adapter" => "sqlite3",
|
8
|
+
"database" => "test/test_db.sqlite3",
|
9
|
+
"pool" => 5,
|
10
|
+
"timeout" => 5000
|
11
11
|
}
|
12
12
|
}
|
13
|
-
ActiveRecord::Base.establish_connection(
|
13
|
+
ActiveRecord::Base.establish_connection(:test)
|
14
14
|
|
15
|
-
ActiveRecord::Schema.define :
|
16
|
-
create_table :users, :
|
15
|
+
ActiveRecord::Schema.define version: 0 do
|
16
|
+
create_table :users, force: true do |t|
|
17
17
|
t.string :first_name
|
18
18
|
t.string :last_name
|
19
19
|
t.string :address1
|
20
20
|
t.string :address2
|
21
21
|
t.string :ssn
|
22
22
|
t.integer :zip_code
|
23
|
+
t.text :text
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
@@ -37,13 +38,13 @@ class User < ActiveRecord::Base
|
|
37
38
|
attr_accessor :instance_var
|
38
39
|
|
39
40
|
# Use a global cleaner
|
40
|
-
cleanse :first_name, :last_name, :
|
41
|
+
cleanse :first_name, :last_name, cleaner: :strip
|
41
42
|
|
42
43
|
# Define a once off cleaner
|
43
|
-
cleanse :address1, :address2, :instance_var, :
|
44
|
+
cleanse :address1, :address2, :instance_var, cleaner: proc { |string| "<< #{string.strip} >>" }
|
44
45
|
|
45
46
|
# Custom Zip Code cleaner
|
46
|
-
cleanse :zip_code, :
|
47
|
+
cleanse :zip_code, cleaner: :string_to_integer
|
47
48
|
|
48
49
|
# Automatically cleanse data before validation
|
49
50
|
before_validation :cleanse_attributes!
|
@@ -52,19 +53,22 @@ end
|
|
52
53
|
class User2 < ActiveRecord::Base
|
53
54
|
include DataCleansing::Cleanse
|
54
55
|
# Use the same table as User above
|
55
|
-
self.table_name =
|
56
|
+
self.table_name = "users"
|
57
|
+
|
58
|
+
serialize :text
|
56
59
|
|
57
60
|
# Test :all cleaner. Only works with ActiveRecord Models
|
58
|
-
|
61
|
+
# Must explicitly excelude :text since it is serialized
|
62
|
+
cleanse :all, cleaner: [:strip, proc { |s| "@#{s}@" }], except: %i[address1 zip_code text]
|
59
63
|
|
60
64
|
# Clean :first_name multiple times
|
61
|
-
cleanse :first_name, :
|
65
|
+
cleanse :first_name, cleaner: proc { |string| "<< #{string} >>" }
|
62
66
|
|
63
67
|
# Clean :first_name multiple times
|
64
|
-
cleanse :first_name, :
|
68
|
+
cleanse :first_name, cleaner: proc { |string| "$#{string}$" }
|
65
69
|
|
66
70
|
# Custom Zip Code cleaner
|
67
|
-
cleanse :zip_code, :
|
71
|
+
cleanse :zip_code, cleaner: :string_to_integer
|
68
72
|
|
69
73
|
# Automatically cleanse data before validation
|
70
74
|
before_validation :cleanse_attributes!
|
@@ -72,82 +76,81 @@ end
|
|
72
76
|
|
73
77
|
class ActiveRecordTest < Minitest::Test
|
74
78
|
describe "ActiveRecord Models" do
|
75
|
-
|
76
|
-
it 'have globally registered cleaner' do
|
79
|
+
it "have globally registered cleaner" do
|
77
80
|
assert DataCleansing.cleaner(:strip)
|
78
81
|
end
|
79
82
|
|
80
|
-
it
|
81
|
-
assert_equal
|
82
|
-
assert_equal
|
83
|
-
assert_equal
|
84
|
-
assert_equal
|
85
|
-
assert_equal
|
83
|
+
it "Model.cleanse_attribute" do
|
84
|
+
assert_equal "joe", User.cleanse_attribute(:first_name, " joe ")
|
85
|
+
assert_equal "black", User.cleanse_attribute(:last_name, "\n black\n")
|
86
|
+
assert_equal "<< 2632 Brown St >>", User.cleanse_attribute(:address1, "2632 Brown St \n")
|
87
|
+
assert_equal "<< instance >>", User.cleanse_attribute(:instance_var, "\n instance\n\t ")
|
88
|
+
assert_equal 12_345, User.cleanse_attribute(:zip_code, "\n\tblah 12345badtext\n")
|
86
89
|
end
|
87
90
|
|
88
91
|
describe "with user" do
|
89
92
|
before do
|
90
93
|
@user = User.new(
|
91
|
-
:
|
92
|
-
:
|
93
|
-
:
|
94
|
-
:
|
95
|
-
:
|
94
|
+
first_name: " joe ",
|
95
|
+
last_name: "\n black\n",
|
96
|
+
address1: "2632 Brown St \n",
|
97
|
+
zip_code: "\n\tblah 12345badtext\n",
|
98
|
+
instance_var: "\n instance\n\t "
|
96
99
|
)
|
97
100
|
end
|
98
101
|
|
99
|
-
it
|
102
|
+
it "only have 3 cleaners" do
|
100
103
|
assert_equal 3, User.send(:data_cleansing_cleaners).size, User.send(:data_cleansing_cleaners)
|
101
104
|
end
|
102
105
|
|
103
|
-
it
|
106
|
+
it "cleanse_attributes! using global cleaner" do
|
104
107
|
assert_equal true, @user.valid?
|
105
|
-
assert_equal
|
106
|
-
assert_equal
|
108
|
+
assert_equal "joe", @user.first_name
|
109
|
+
assert_equal "black", @user.last_name
|
107
110
|
end
|
108
111
|
|
109
|
-
it
|
112
|
+
it "cleanse_attributes! using attribute specific custom cleaner" do
|
110
113
|
assert_equal true, @user.valid?
|
111
|
-
assert_equal
|
112
|
-
assert_equal
|
114
|
+
assert_equal "<< 2632 Brown St >>", @user.address1
|
115
|
+
assert_equal "<< instance >>", @user.instance_var
|
113
116
|
end
|
114
117
|
|
115
|
-
it
|
118
|
+
it "cleanse_attributes! using global cleaner using rails extensions" do
|
116
119
|
@user.cleanse_attributes!
|
117
|
-
assert_equal
|
120
|
+
assert_equal 12_345, @user.zip_code
|
118
121
|
end
|
119
122
|
end
|
120
123
|
|
121
124
|
describe "with user2" do
|
122
125
|
before do
|
123
126
|
@user = User2.new(
|
124
|
-
:
|
125
|
-
:
|
126
|
-
:
|
127
|
-
:
|
128
|
-
:
|
127
|
+
first_name: " joe ",
|
128
|
+
last_name: "\n black\n",
|
129
|
+
ssn: "\n 123456789 \n ",
|
130
|
+
address1: "2632 Brown St \n",
|
131
|
+
zip_code: "\n\t blah\n",
|
132
|
+
text: ["\n 123456789 \n ", " second "]
|
129
133
|
)
|
130
134
|
end
|
131
135
|
|
132
|
-
it
|
136
|
+
it "have 4 cleaners defined" do
|
133
137
|
assert_equal 4, User2.send(:data_cleansing_cleaners).size, User2.send(:data_cleansing_cleaners)
|
134
138
|
end
|
135
139
|
|
136
|
-
it
|
140
|
+
it "have 3 attributes cleaners defined" do
|
137
141
|
# :all, :first_name, :zip_code
|
138
142
|
assert_equal 3, User2.send(:data_cleansing_attribute_cleaners).size, User2.send(:data_cleansing_attribute_cleaners)
|
139
143
|
end
|
140
144
|
|
141
|
-
it
|
145
|
+
it "cleanse_attributes! clean all attributes" do
|
142
146
|
assert_equal true, @user.valid?
|
143
|
-
assert_equal
|
144
|
-
assert_equal
|
147
|
+
assert_equal "$<< @joe@ >>$", @user.first_name, User2.send(:data_cleansing_cleaners)
|
148
|
+
assert_equal "@black@", @user.last_name
|
145
149
|
assert_equal "2632 Brown St \n", @user.address1
|
146
150
|
assert_equal "@123456789@", @user.ssn
|
147
|
-
|
151
|
+
assert_nil @user.zip_code, User2.send(:data_cleansing_cleaners)
|
152
|
+
assert_equal ["\n 123456789 \n ", " second "], @user.text
|
148
153
|
end
|
149
|
-
|
150
154
|
end
|
151
|
-
|
152
155
|
end
|
153
156
|
end
|
data/test/cleaners_test.rb
CHANGED
@@ -1,14 +1,15 @@
|
|
1
|
-
require_relative
|
2
|
-
require
|
1
|
+
require_relative "test_helper"
|
2
|
+
require "active_support/core_ext/time/calculations"
|
3
3
|
|
4
4
|
class CleanersTest < Minitest::Test
|
5
5
|
class User
|
6
6
|
include DataCleansing::Cleanse
|
7
7
|
|
8
8
|
attr_accessor :first_name, :last_name, :address1, :address2,
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
:make_this_upper, :clean_non_word, :clean_non_printable,
|
10
|
+
:clean_html, :clean_from_uri, :clean_to_uri, :clean_whitespace,
|
11
|
+
:clean_digits_only, :clean_to_integer, :clean_to_float, :clean_end_of_day,
|
12
|
+
:clean_order
|
12
13
|
|
13
14
|
cleanse :first_name, :last_name, :address1, :address2, cleaner: :strip
|
14
15
|
cleanse :make_this_upper, cleaner: :upcase
|
@@ -22,188 +23,198 @@ class CleanersTest < Minitest::Test
|
|
22
23
|
cleanse :clean_to_integer, cleaner: :string_to_integer
|
23
24
|
cleanse :clean_to_float, cleaner: :string_to_float
|
24
25
|
cleanse :clean_end_of_day, cleaner: :end_of_day
|
26
|
+
|
27
|
+
# Call cleaners in the order they are defined
|
28
|
+
cleanse :clean_order, cleaner: %i[upcase strip]
|
29
|
+
cleanse :clean_order, cleaner: ->(val) { val == "BLAH" ? " yes " : " no " }
|
25
30
|
end
|
26
31
|
|
27
|
-
describe
|
28
|
-
it
|
32
|
+
describe "Cleaners" do
|
33
|
+
it "#strip" do
|
29
34
|
user = User.new
|
30
|
-
user.first_name =
|
35
|
+
user.first_name = " jack black "
|
31
36
|
user.last_name = " \n \t joe"
|
32
37
|
user.address1 = "joe \n\n \n \t\t "
|
33
38
|
user.address2 = "joe \n\n bloggs \n \t\t "
|
34
39
|
user.cleanse_attributes!
|
35
|
-
assert_equal
|
36
|
-
assert_equal
|
37
|
-
assert_equal
|
40
|
+
assert_equal "jack black", user.first_name
|
41
|
+
assert_equal "joe", user.last_name
|
42
|
+
assert_equal "joe", user.address1
|
38
43
|
assert_equal "joe \n\n bloggs", user.address2
|
39
44
|
end
|
40
45
|
|
41
|
-
it
|
46
|
+
it "#upcase" do
|
42
47
|
user = User.new
|
43
|
-
user.make_this_upper =
|
48
|
+
user.make_this_upper = " jacK blAck "
|
44
49
|
user.cleanse_attributes!
|
45
|
-
assert_equal
|
50
|
+
assert_equal " JACK BLACK ", user.make_this_upper
|
46
51
|
end
|
47
52
|
|
48
|
-
it
|
53
|
+
it "#remove_non_word" do
|
49
54
|
user = User.new
|
50
55
|
user.clean_non_word = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\ "
|
51
56
|
user.cleanse_attributes!
|
52
|
-
assert_equal
|
57
|
+
assert_equal "jacKblAck", user.clean_non_word
|
53
58
|
end
|
54
59
|
|
55
|
-
it
|
60
|
+
it "#remove_non_printable" do
|
56
61
|
user = User.new
|
57
|
-
user.clean_non_printable = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\
|
62
|
+
user.clean_non_printable = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\ \x89 "
|
58
63
|
user.cleanse_attributes!
|
59
64
|
assert_equal " !@#$%^&*()+=-~` jacK blAck <>.,/\"':;{][]\|?/\\ ", user.clean_non_printable
|
60
65
|
end
|
61
66
|
|
62
|
-
describe
|
63
|
-
it
|
67
|
+
describe "#clean_html" do
|
68
|
+
it "cleans "" do
|
64
69
|
user = User.new
|
65
|
-
user.clean_html =
|
70
|
+
user.clean_html = "O"Leary"
|
66
71
|
user.cleanse_attributes!
|
67
72
|
assert_equal 'O"Leary', user.clean_html
|
68
73
|
end
|
69
74
|
|
70
|
-
it
|
75
|
+
it "cleans &" do
|
71
76
|
user = User.new
|
72
|
-
user.clean_html =
|
77
|
+
user.clean_html = "Jim & Candi"
|
73
78
|
user.cleanse_attributes!
|
74
|
-
assert_equal
|
79
|
+
assert_equal "Jim & Candi", user.clean_html
|
75
80
|
end
|
76
81
|
|
77
|
-
it
|
82
|
+
it "cleans >" do
|
78
83
|
user = User.new
|
79
|
-
user.clean_html =
|
84
|
+
user.clean_html = "2 > 1"
|
80
85
|
user.cleanse_attributes!
|
81
|
-
assert_equal
|
86
|
+
assert_equal "2 > 1", user.clean_html
|
82
87
|
end
|
83
88
|
|
84
|
-
it
|
89
|
+
it "cleans <" do
|
85
90
|
user = User.new
|
86
|
-
user.clean_html =
|
91
|
+
user.clean_html = "1 < 2"
|
87
92
|
user.cleanse_attributes!
|
88
|
-
assert_equal
|
93
|
+
assert_equal "1 < 2", user.clean_html
|
89
94
|
end
|
90
95
|
|
91
|
-
it
|
96
|
+
it "cleans '" do
|
92
97
|
user = User.new
|
93
|
-
user.clean_html =
|
98
|
+
user.clean_html = "1'2"
|
94
99
|
user.cleanse_attributes!
|
95
100
|
assert_equal "1'2", user.clean_html
|
96
101
|
end
|
97
102
|
|
98
|
-
it
|
103
|
+
it "cleans " do
|
99
104
|
user = User.new
|
100
|
-
user.clean_html =
|
105
|
+
user.clean_html = "1 2"
|
101
106
|
user.cleanse_attributes!
|
102
107
|
assert_equal "1 2", user.clean_html
|
103
108
|
end
|
104
109
|
|
105
|
-
it
|
110
|
+
it "cleans &" do
|
106
111
|
user = User.new
|
107
|
-
user.clean_html =
|
112
|
+
user.clean_html = "Mutt & Jeff Inc."
|
108
113
|
user.cleanse_attributes!
|
109
|
-
assert_equal
|
114
|
+
assert_equal "Mutt & Jeff Inc.", user.clean_html
|
110
115
|
end
|
111
116
|
|
112
|
-
it
|
117
|
+
it "does not clean &;" do
|
113
118
|
user = User.new
|
114
|
-
user.clean_html =
|
119
|
+
user.clean_html = "Mutt &; Jeff Inc."
|
115
120
|
user.cleanse_attributes!
|
116
|
-
assert_equal
|
121
|
+
assert_equal "Mutt &; Jeff Inc.", user.clean_html
|
117
122
|
end
|
118
123
|
|
119
|
-
it
|
124
|
+
it "does not clean &blah;" do
|
120
125
|
user = User.new
|
121
|
-
user.clean_html =
|
126
|
+
user.clean_html = "1&blah;2"
|
122
127
|
user.cleanse_attributes!
|
123
|
-
assert_equal
|
128
|
+
assert_equal "1&blah;2", user.clean_html
|
124
129
|
end
|
125
130
|
end
|
126
131
|
|
127
|
-
describe
|
128
|
-
it
|
132
|
+
describe "#unescape_uri" do
|
133
|
+
it "converts %20" do
|
129
134
|
user = User.new
|
130
|
-
user.clean_from_uri =
|
135
|
+
user.clean_from_uri = "Jim%20%20Bob%20"
|
131
136
|
user.cleanse_attributes!
|
132
|
-
assert_equal
|
137
|
+
assert_equal "Jim Bob ", user.clean_from_uri
|
133
138
|
end
|
134
|
-
it
|
139
|
+
it "converts %20 only" do
|
135
140
|
user = User.new
|
136
|
-
user.clean_from_uri =
|
141
|
+
user.clean_from_uri = "%20"
|
137
142
|
user.cleanse_attributes!
|
138
|
-
assert_equal
|
143
|
+
assert_equal " ", user.clean_from_uri
|
139
144
|
end
|
140
145
|
end
|
141
146
|
|
142
|
-
describe
|
143
|
-
it
|
147
|
+
describe "#escape_uri" do
|
148
|
+
it "converts spaces" do
|
144
149
|
user = User.new
|
145
|
-
user.clean_to_uri =
|
150
|
+
user.clean_to_uri = "Jim Bob "
|
146
151
|
user.cleanse_attributes!
|
147
|
-
assert_equal
|
152
|
+
assert_equal "Jim++Bob+", user.clean_to_uri
|
148
153
|
end
|
149
|
-
it
|
154
|
+
it "converts space only" do
|
150
155
|
user = User.new
|
151
|
-
user.clean_to_uri =
|
156
|
+
user.clean_to_uri = " "
|
152
157
|
user.cleanse_attributes!
|
153
|
-
assert_equal
|
158
|
+
assert_equal "+", user.clean_to_uri
|
154
159
|
end
|
155
160
|
end
|
156
161
|
|
157
|
-
describe
|
158
|
-
it
|
162
|
+
describe "#compress_whitespace" do
|
163
|
+
it "compresses multiple spaces" do
|
159
164
|
user = User.new
|
160
|
-
user.clean_whitespace =
|
165
|
+
user.clean_whitespace = " J im B ob "
|
161
166
|
user.cleanse_attributes!
|
162
|
-
assert_equal
|
167
|
+
assert_equal " J im B ob ", user.clean_whitespace
|
163
168
|
end
|
164
169
|
|
165
|
-
it
|
170
|
+
it "does not compress single spaces" do
|
166
171
|
user = User.new
|
167
|
-
user.clean_whitespace =
|
172
|
+
user.clean_whitespace = " Jack Black"
|
168
173
|
user.cleanse_attributes!
|
169
|
-
assert_equal
|
174
|
+
assert_equal " Jack Black", user.clean_whitespace
|
170
175
|
end
|
171
176
|
|
172
|
-
it
|
177
|
+
it "compresses newlines and tabs" do
|
173
178
|
user = User.new
|
174
179
|
user.clean_whitespace = " \n\n J im B ob \t\n\t "
|
175
180
|
user.cleanse_attributes!
|
176
|
-
assert_equal
|
181
|
+
assert_equal " J im B ob ", user.clean_whitespace
|
177
182
|
end
|
178
183
|
end
|
179
184
|
|
180
|
-
it
|
185
|
+
it "#digits_only" do
|
181
186
|
user = User.new
|
182
187
|
user.clean_digits_only = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
|
183
188
|
user.cleanse_attributes!
|
184
|
-
assert_equal
|
189
|
+
assert_equal "13689", user.clean_digits_only
|
185
190
|
end
|
186
191
|
|
187
|
-
it
|
192
|
+
it "#string_to_integer" do
|
188
193
|
user = User.new
|
189
194
|
user.clean_to_integer = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
|
190
195
|
user.cleanse_attributes!
|
191
196
|
assert_equal 136, user.clean_to_integer
|
192
197
|
end
|
193
198
|
|
194
|
-
it
|
199
|
+
it "#string_to_float" do
|
195
200
|
user = User.new
|
196
201
|
user.clean_to_float = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
|
197
202
|
user.cleanse_attributes!
|
198
203
|
assert_equal 136.89, user.clean_to_float
|
199
204
|
end
|
200
205
|
|
201
|
-
it
|
206
|
+
it "#date_to_time_at_end_of_day" do
|
202
207
|
user = User.new
|
203
|
-
user.clean_end_of_day = Time.parse(
|
208
|
+
user.clean_end_of_day = Time.parse("2016-03-03 14:33:44 +0000")
|
204
209
|
user.cleanse_attributes!
|
205
|
-
assert_equal Time.parse(
|
210
|
+
assert_equal Time.parse("2016-03-03 23:59:59 +0000").to_i, user.clean_end_of_day.to_i
|
206
211
|
end
|
207
212
|
|
213
|
+
it "cleans in the order defined" do
|
214
|
+
user = User.new
|
215
|
+
user.clean_order = " blah "
|
216
|
+
user.cleanse_attributes!
|
217
|
+
assert_equal " yes ", user.clean_order
|
218
|
+
end
|
208
219
|
end
|
209
220
|
end
|