data_cleansing 0.8.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a24ad3a5780b445ed15310ad8776d89c122747d9
4
- data.tar.gz: ab79edb935ae22415b50c51d1e1c7dc60c7a16c5
3
+ metadata.gz: 8ba846025b7441eb5a93230b7fbd8ebe2a4d88e3
4
+ data.tar.gz: 4e209fd6ef57540a8b549d06c314ae4caeddbf59
5
5
  SHA512:
6
- metadata.gz: 032ce480495e7127cd17b4b1bd39630e51573c579fda5b7cb34bb32f1f3cb6509c8e3ebd9a568b88f6f36cb694d7a7dbd7b32aad040fe6f511ce47b3d01fad3f
7
- data.tar.gz: b2d7af9ad633ad5c5045c1103129d843fb5453b256cd5c3cbe3590967d950036da39ab8d6940387c36a51f1501c4b348ef5128b64b3c044b7ff01969392b6d5c
6
+ metadata.gz: 7b464ca76d4c40f4621d86a32cd76bd4bc3e71e8b5eed18ac094ae651a8f0be58772a503fa096c6798b081cf3030363973b0d96cfd2cf45d6497e14a5b2717f1
7
+ data.tar.gz: e6933049c6200cadb6e398e3d2af8bae641534942a201c6ed7b8a47fc991f7a843d7b2d1b6cbc1c00f14d837f2de887ac6011f23c187fe33be3c6199a1e18cdf
@@ -45,7 +45,7 @@ module Cleaners
45
45
  DataCleansing.register_cleaner(:remove_non_printable, RemoveNonPrintable)
46
46
 
47
47
  # Remove HTML Markup
48
- module RemoveHTMLMarkup
48
+ module ReplaceHTMLMarkup
49
49
  HTML_MARKUP = Regexp.compile(/&(amp|quot|gt|lt|apos|nbsp);/in)
50
50
 
51
51
  def self.call(string)
@@ -53,17 +53,17 @@ module Cleaners
53
53
 
54
54
  string.gsub!(HTML_MARKUP) do |match|
55
55
  case match.downcase
56
- when 'amp' then
56
+ when '&' then
57
57
  '&'
58
- when 'quot' then
58
+ when '"' then
59
59
  '"'
60
- when 'gt' then
60
+ when '>' then
61
61
  '>'
62
- when 'lt' then
62
+ when '<' then
63
63
  '<'
64
- when 'apos' then
64
+ when '&apos;' then
65
65
  "'"
66
- when 'nbsp' then
66
+ when '&nbsp;' then
67
67
  ' '
68
68
  else
69
69
  "&#{match};"
@@ -71,16 +71,25 @@ module Cleaners
71
71
  end || string
72
72
  end
73
73
  end
74
- DataCleansing.register_cleaner(:remove_html_markup, RemoveHTMLMarkup)
74
+ DataCleansing.register_cleaner(:replace_html_markup, ReplaceHTMLMarkup)
75
75
 
76
- module ReplaceURIChars
76
+ module UnescapeURI
77
77
  def self.call(string)
78
78
  return string unless string.is_a?(String)
79
79
 
80
80
  URI.unescape(string)
81
81
  end
82
82
  end
83
- DataCleansing.register_cleaner(:replace_uri_chars, ReplaceURIChars)
83
+ DataCleansing.register_cleaner(:unescape_uri, UnescapeURI)
84
+
85
+ module EscapeURI
86
+ def self.call(string)
87
+ return string unless string.is_a?(String)
88
+
89
+ URI.escape(string)
90
+ end
91
+ end
92
+ DataCleansing.register_cleaner(:escape_uri, EscapeURI)
84
93
 
85
94
  # Compress multiple whitespace to a single space
86
95
  module CompressWhitespace
@@ -123,15 +132,39 @@ module Cleaners
123
132
  end
124
133
  DataCleansing.register_cleaner(:string_to_integer, StringToInteger)
125
134
 
135
+ # Returns [Integer] after removing all non-digit characters, except '.'
136
+ # Returns nil if no digits are present in the string.
137
+ module StringToFloat
138
+ NUMERIC = Regexp.compile(/[^0-9\.]/)
139
+
140
+ def self.call(string)
141
+ return string unless string.is_a?(String)
142
+
143
+ # Remove Non-Digit Chars, except for '.'
144
+ string.gsub!(NUMERIC, '')
145
+ string.length > 0 ? string.to_f : nil
146
+ end
147
+ end
148
+ DataCleansing.register_cleaner(:string_to_float, StringToFloat)
149
+
126
150
  # Convert a Date to a Time at the end of day for that date (YYYY-MM-DD 23:59:59)
127
151
  # Ex: 2015-12-31 becomes 2015-12-31 23:59:59
128
152
  # If something other than a Date object is passed in, it just passes through.
129
- module DateToTimeAtEndOfDay
130
- def self.call(date)
131
- return date unless date.kind_of?(Date)
132
-
133
- date.to_time.end_of_day
153
+ #
154
+ # Note: Only works if ActiveSupport is also loaded since it defines Time#end_of_day.
155
+ module EndOfDay
156
+ def self.call(datetime)
157
+ case datetime
158
+ when String
159
+ Time.parse(datetime).end_of_day
160
+ when Date
161
+ datetime.to_time.end_of_day
162
+ when Time
163
+ datetime.end_of_day
164
+ else
165
+ datetime
166
+ end
134
167
  end
135
168
  end
136
- DataCleansing.register_cleaner(:date_to_time_at_end_of_day, DateToTimeAtEndOfDay)
169
+ DataCleansing.register_cleaner(:end_of_day, EndOfDay)
137
170
  end
@@ -1,3 +1,3 @@
1
1
  module DataCleansing
2
- VERSION = '0.8.0'
2
+ VERSION = '0.9.0'
3
3
  end
@@ -0,0 +1,209 @@
1
+ require_relative 'test_helper'
2
+ require 'active_support/core_ext/time/calculations'
3
+
4
+ class CleanersTest < Minitest::Test
5
+ class User
6
+ include DataCleansing::Cleanse
7
+
8
+ attr_accessor :first_name, :last_name, :address1, :address2,
9
+ :make_this_upper, :clean_non_word, :clean_non_printable,
10
+ :clean_html, :clean_from_uri, :clean_to_uri, :clean_whitespace,
11
+ :clean_digits_only, :clean_to_integer, :clean_to_float, :clean_end_of_day
12
+
13
+ cleanse :first_name, :last_name, :address1, :address2, cleaner: :strip
14
+ cleanse :make_this_upper, cleaner: :upcase
15
+ cleanse :clean_non_word, cleaner: :remove_non_word
16
+ cleanse :clean_non_printable, cleaner: :remove_non_printable
17
+ cleanse :clean_html, cleaner: :replace_html_markup
18
+ cleanse :clean_from_uri, cleaner: :unescape_uri
19
+ cleanse :clean_to_uri, cleaner: :escape_uri
20
+ cleanse :clean_whitespace, cleaner: :compress_whitespace
21
+ cleanse :clean_digits_only, cleaner: :digits_only
22
+ cleanse :clean_to_integer, cleaner: :string_to_integer
23
+ cleanse :clean_to_float, cleaner: :string_to_float
24
+ cleanse :clean_end_of_day, cleaner: :end_of_day
25
+ end
26
+
27
+ describe 'Cleaners' do
28
+ it '#strip' do
29
+ user = User.new
30
+ user.first_name = ' jack black '
31
+ user.last_name = " \n \t joe"
32
+ user.address1 = "joe \n\n \n \t\t "
33
+ user.address2 = "joe \n\n bloggs \n \t\t "
34
+ user.cleanse_attributes!
35
+ assert_equal 'jack black', user.first_name
36
+ assert_equal 'joe', user.last_name
37
+ assert_equal 'joe', user.address1
38
+ assert_equal "joe \n\n bloggs", user.address2
39
+ end
40
+
41
+ it '#upcase' do
42
+ user = User.new
43
+ user.make_this_upper = ' jacK blAck '
44
+ user.cleanse_attributes!
45
+ assert_equal ' JACK BLACK ', user.make_this_upper
46
+ end
47
+
48
+ it '#remove_non_word' do
49
+ user = User.new
50
+ user.clean_non_word = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\ "
51
+ user.cleanse_attributes!
52
+ assert_equal 'jacKblAck', user.clean_non_word
53
+ end
54
+
55
+ it '#remove_non_printable' do
56
+ user = User.new
57
+ user.clean_non_printable = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\ "
58
+ user.cleanse_attributes!
59
+ assert_equal " !@#$%^&*()+=-~` jacK blAck <>.,/\"':;{][]\|?/\\ ", user.clean_non_printable
60
+ end
61
+
62
+ describe '#clean_html' do
63
+ it 'cleans &quot;' do
64
+ user = User.new
65
+ user.clean_html = 'O&quot;Leary'
66
+ user.cleanse_attributes!
67
+ assert_equal 'O"Leary', user.clean_html
68
+ end
69
+
70
+ it 'cleans &amp;' do
71
+ user = User.new
72
+ user.clean_html = 'Jim &amp; Candi'
73
+ user.cleanse_attributes!
74
+ assert_equal 'Jim & Candi', user.clean_html
75
+ end
76
+
77
+ it 'cleans &gt;' do
78
+ user = User.new
79
+ user.clean_html = '2 &gt; 1'
80
+ user.cleanse_attributes!
81
+ assert_equal '2 > 1', user.clean_html
82
+ end
83
+
84
+ it 'cleans &lt;' do
85
+ user = User.new
86
+ user.clean_html = '1 &lt; 2'
87
+ user.cleanse_attributes!
88
+ assert_equal '1 < 2', user.clean_html
89
+ end
90
+
91
+ it 'cleans &apos;' do
92
+ user = User.new
93
+ user.clean_html = '1&apos;2'
94
+ user.cleanse_attributes!
95
+ assert_equal "1'2", user.clean_html
96
+ end
97
+
98
+ it 'cleans &nbsp;' do
99
+ user = User.new
100
+ user.clean_html = '1&nbsp;2'
101
+ user.cleanse_attributes!
102
+ assert_equal "1 2", user.clean_html
103
+ end
104
+
105
+ it 'cleans &AMP;' do
106
+ user = User.new
107
+ user.clean_html = 'Mutt &AMP; Jeff Inc.'
108
+ user.cleanse_attributes!
109
+ assert_equal 'Mutt & Jeff Inc.', user.clean_html
110
+ end
111
+
112
+ it 'does not clean &;' do
113
+ user = User.new
114
+ user.clean_html = 'Mutt &; Jeff Inc.'
115
+ user.cleanse_attributes!
116
+ assert_equal 'Mutt &; Jeff Inc.', user.clean_html
117
+ end
118
+
119
+ it 'does not clean &blah;' do
120
+ user = User.new
121
+ user.clean_html = '1&blah;2'
122
+ user.cleanse_attributes!
123
+ assert_equal '1&blah;2', user.clean_html
124
+ end
125
+ end
126
+
127
+ describe '#unescape_uri' do
128
+ it 'converts %20' do
129
+ user = User.new
130
+ user.clean_from_uri = 'Jim%20%20Bob%20'
131
+ user.cleanse_attributes!
132
+ assert_equal 'Jim Bob ', user.clean_from_uri
133
+ end
134
+ it 'converts %20 only' do
135
+ user = User.new
136
+ user.clean_from_uri = '%20'
137
+ user.cleanse_attributes!
138
+ assert_equal ' ', user.clean_from_uri
139
+ end
140
+ end
141
+
142
+ describe '#escape_uri' do
143
+ it 'converts %20' do
144
+ user = User.new
145
+ user.clean_to_uri = 'Jim Bob '
146
+ user.cleanse_attributes!
147
+ assert_equal 'Jim%20%20Bob%20', user.clean_to_uri
148
+ end
149
+ it 'converts %20 only' do
150
+ user = User.new
151
+ user.clean_to_uri = ' '
152
+ user.cleanse_attributes!
153
+ assert_equal '%20', user.clean_to_uri
154
+ end
155
+ end
156
+
157
+ describe '#compress_whitespace' do
158
+ it 'compresses multiple spaces' do
159
+ user = User.new
160
+ user.clean_whitespace = ' J im B ob '
161
+ user.cleanse_attributes!
162
+ assert_equal ' J im B ob ', user.clean_whitespace
163
+ end
164
+
165
+ it 'does not compress single spaces' do
166
+ user = User.new
167
+ user.clean_whitespace = ' Jack Black'
168
+ user.cleanse_attributes!
169
+ assert_equal ' Jack Black', user.clean_whitespace
170
+ end
171
+
172
+ it 'compresses newlines and tabs' do
173
+ user = User.new
174
+ user.clean_whitespace = " \n\n J im B ob \t\n\t "
175
+ user.cleanse_attributes!
176
+ assert_equal ' J im B ob ', user.clean_whitespace
177
+ end
178
+ end
179
+
180
+ it '#digits_only' do
181
+ user = User.new
182
+ user.clean_digits_only = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
183
+ user.cleanse_attributes!
184
+ assert_equal '13689', user.clean_digits_only
185
+ end
186
+
187
+ it '#string_to_integer' do
188
+ user = User.new
189
+ user.clean_to_integer = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
190
+ user.cleanse_attributes!
191
+ assert_equal 136, user.clean_to_integer
192
+ end
193
+
194
+ it '#string_to_float' do
195
+ user = User.new
196
+ user.clean_to_float = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
197
+ user.cleanse_attributes!
198
+ assert_equal 136.89, user.clean_to_float
199
+ end
200
+
201
+ it '#date_to_time_at_end_of_day' do
202
+ user = User.new
203
+ user.clean_end_of_day = Time.parse('2016-03-03 14:33:44 +0000')
204
+ user.cleanse_attributes!
205
+ assert_equal Time.parse('2016-03-03 23:59:59 +0000').to_i, user.clean_end_of_day.to_i
206
+ end
207
+
208
+ end
209
+ end
data/test/test_db.sqlite3 CHANGED
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_cleansing
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-01 00:00:00.000000000 Z
11
+ date: 2016-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby
@@ -55,6 +55,7 @@ files:
55
55
  - lib/data_cleansing/railtie.rb
56
56
  - lib/data_cleansing/version.rb
57
57
  - test/active_record_test.rb
58
+ - test/cleaners_test.rb
58
59
  - test/ruby_test.rb
59
60
  - test/test_db.sqlite3
60
61
  - test/test_helper.rb
@@ -84,6 +85,7 @@ specification_version: 4
84
85
  summary: Data Cleansing framework for Ruby, Rails, Mongoid and MongoMapper.
85
86
  test_files:
86
87
  - test/active_record_test.rb
88
+ - test/cleaners_test.rb
87
89
  - test/ruby_test.rb
88
90
  - test/test_db.sqlite3
89
91
  - test/test_helper.rb