data_cleansing 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a24ad3a5780b445ed15310ad8776d89c122747d9
4
- data.tar.gz: ab79edb935ae22415b50c51d1e1c7dc60c7a16c5
3
+ metadata.gz: 8ba846025b7441eb5a93230b7fbd8ebe2a4d88e3
4
+ data.tar.gz: 4e209fd6ef57540a8b549d06c314ae4caeddbf59
5
5
  SHA512:
6
- metadata.gz: 032ce480495e7127cd17b4b1bd39630e51573c579fda5b7cb34bb32f1f3cb6509c8e3ebd9a568b88f6f36cb694d7a7dbd7b32aad040fe6f511ce47b3d01fad3f
7
- data.tar.gz: b2d7af9ad633ad5c5045c1103129d843fb5453b256cd5c3cbe3590967d950036da39ab8d6940387c36a51f1501c4b348ef5128b64b3c044b7ff01969392b6d5c
6
+ metadata.gz: 7b464ca76d4c40f4621d86a32cd76bd4bc3e71e8b5eed18ac094ae651a8f0be58772a503fa096c6798b081cf3030363973b0d96cfd2cf45d6497e14a5b2717f1
7
+ data.tar.gz: e6933049c6200cadb6e398e3d2af8bae641534942a201c6ed7b8a47fc991f7a843d7b2d1b6cbc1c00f14d837f2de887ac6011f23c187fe33be3c6199a1e18cdf
@@ -45,7 +45,7 @@ module Cleaners
45
45
  DataCleansing.register_cleaner(:remove_non_printable, RemoveNonPrintable)
46
46
 
47
47
  # Remove HTML Markup
48
- module RemoveHTMLMarkup
48
+ module ReplaceHTMLMarkup
49
49
  HTML_MARKUP = Regexp.compile(/&(amp|quot|gt|lt|apos|nbsp);/in)
50
50
 
51
51
  def self.call(string)
@@ -53,17 +53,17 @@ module Cleaners
53
53
 
54
54
  string.gsub!(HTML_MARKUP) do |match|
55
55
  case match.downcase
56
- when 'amp' then
56
+ when '&' then
57
57
  '&'
58
- when 'quot' then
58
+ when '"' then
59
59
  '"'
60
- when 'gt' then
60
+ when '>' then
61
61
  '>'
62
- when 'lt' then
62
+ when '<' then
63
63
  '<'
64
- when 'apos' then
64
+ when '&apos;' then
65
65
  "'"
66
- when 'nbsp' then
66
+ when '&nbsp;' then
67
67
  ' '
68
68
  else
69
69
  "&#{match};"
@@ -71,16 +71,25 @@ module Cleaners
71
71
  end || string
72
72
  end
73
73
  end
74
- DataCleansing.register_cleaner(:remove_html_markup, RemoveHTMLMarkup)
74
+ DataCleansing.register_cleaner(:replace_html_markup, ReplaceHTMLMarkup)
75
75
 
76
- module ReplaceURIChars
76
+ module UnescapeURI
77
77
  def self.call(string)
78
78
  return string unless string.is_a?(String)
79
79
 
80
80
  URI.unescape(string)
81
81
  end
82
82
  end
83
- DataCleansing.register_cleaner(:replace_uri_chars, ReplaceURIChars)
83
+ DataCleansing.register_cleaner(:unescape_uri, UnescapeURI)
84
+
85
+ module EscapeURI
86
+ def self.call(string)
87
+ return string unless string.is_a?(String)
88
+
89
+ URI.escape(string)
90
+ end
91
+ end
92
+ DataCleansing.register_cleaner(:escape_uri, EscapeURI)
84
93
 
85
94
  # Compress multiple whitespace to a single space
86
95
  module CompressWhitespace
@@ -123,15 +132,39 @@ module Cleaners
123
132
  end
124
133
  DataCleansing.register_cleaner(:string_to_integer, StringToInteger)
125
134
 
135
+ # Returns [Integer] after removing all non-digit characters, except '.'
136
+ # Returns nil if no digits are present in the string.
137
+ module StringToFloat
138
+ NUMERIC = Regexp.compile(/[^0-9\.]/)
139
+
140
+ def self.call(string)
141
+ return string unless string.is_a?(String)
142
+
143
+ # Remove Non-Digit Chars, except for '.'
144
+ string.gsub!(NUMERIC, '')
145
+ string.length > 0 ? string.to_f : nil
146
+ end
147
+ end
148
+ DataCleansing.register_cleaner(:string_to_float, StringToFloat)
149
+
126
150
  # Convert a Date to a Time at the end of day for that date (YYYY-MM-DD 23:59:59)
127
151
  # Ex: 2015-12-31 becomes 2015-12-31 23:59:59
128
152
  # If something other than a Date object is passed in, it just passes through.
129
- module DateToTimeAtEndOfDay
130
- def self.call(date)
131
- return date unless date.kind_of?(Date)
132
-
133
- date.to_time.end_of_day
153
+ #
154
+ # Note: Only works if ActiveSupport is also loaded since it defines Time#end_of_day.
155
+ module EndOfDay
156
+ def self.call(datetime)
157
+ case datetime
158
+ when String
159
+ Time.parse(datetime).end_of_day
160
+ when Date
161
+ datetime.to_time.end_of_day
162
+ when Time
163
+ datetime.end_of_day
164
+ else
165
+ datetime
166
+ end
134
167
  end
135
168
  end
136
- DataCleansing.register_cleaner(:date_to_time_at_end_of_day, DateToTimeAtEndOfDay)
169
+ DataCleansing.register_cleaner(:end_of_day, EndOfDay)
137
170
  end
@@ -1,3 +1,3 @@
1
1
  module DataCleansing
2
- VERSION = '0.8.0'
2
+ VERSION = '0.9.0'
3
3
  end
@@ -0,0 +1,209 @@
1
+ require_relative 'test_helper'
2
+ require 'active_support/core_ext/time/calculations'
3
+
4
+ class CleanersTest < Minitest::Test
5
+ class User
6
+ include DataCleansing::Cleanse
7
+
8
+ attr_accessor :first_name, :last_name, :address1, :address2,
9
+ :make_this_upper, :clean_non_word, :clean_non_printable,
10
+ :clean_html, :clean_from_uri, :clean_to_uri, :clean_whitespace,
11
+ :clean_digits_only, :clean_to_integer, :clean_to_float, :clean_end_of_day
12
+
13
+ cleanse :first_name, :last_name, :address1, :address2, cleaner: :strip
14
+ cleanse :make_this_upper, cleaner: :upcase
15
+ cleanse :clean_non_word, cleaner: :remove_non_word
16
+ cleanse :clean_non_printable, cleaner: :remove_non_printable
17
+ cleanse :clean_html, cleaner: :replace_html_markup
18
+ cleanse :clean_from_uri, cleaner: :unescape_uri
19
+ cleanse :clean_to_uri, cleaner: :escape_uri
20
+ cleanse :clean_whitespace, cleaner: :compress_whitespace
21
+ cleanse :clean_digits_only, cleaner: :digits_only
22
+ cleanse :clean_to_integer, cleaner: :string_to_integer
23
+ cleanse :clean_to_float, cleaner: :string_to_float
24
+ cleanse :clean_end_of_day, cleaner: :end_of_day
25
+ end
26
+
27
+ describe 'Cleaners' do
28
+ it '#strip' do
29
+ user = User.new
30
+ user.first_name = ' jack black '
31
+ user.last_name = " \n \t joe"
32
+ user.address1 = "joe \n\n \n \t\t "
33
+ user.address2 = "joe \n\n bloggs \n \t\t "
34
+ user.cleanse_attributes!
35
+ assert_equal 'jack black', user.first_name
36
+ assert_equal 'joe', user.last_name
37
+ assert_equal 'joe', user.address1
38
+ assert_equal "joe \n\n bloggs", user.address2
39
+ end
40
+
41
+ it '#upcase' do
42
+ user = User.new
43
+ user.make_this_upper = ' jacK blAck '
44
+ user.cleanse_attributes!
45
+ assert_equal ' JACK BLACK ', user.make_this_upper
46
+ end
47
+
48
+ it '#remove_non_word' do
49
+ user = User.new
50
+ user.clean_non_word = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\ "
51
+ user.cleanse_attributes!
52
+ assert_equal 'jacKblAck', user.clean_non_word
53
+ end
54
+
55
+ it '#remove_non_printable' do
56
+ user = User.new
57
+ user.clean_non_printable = " !@#$%^&*()+=-~`\t\n jacK blAck <>.,/\"':;{][]\|?/\\ "
58
+ user.cleanse_attributes!
59
+ assert_equal " !@#$%^&*()+=-~` jacK blAck <>.,/\"':;{][]\|?/\\ ", user.clean_non_printable
60
+ end
61
+
62
+ describe '#clean_html' do
63
+ it 'cleans &quot;' do
64
+ user = User.new
65
+ user.clean_html = 'O&quot;Leary'
66
+ user.cleanse_attributes!
67
+ assert_equal 'O"Leary', user.clean_html
68
+ end
69
+
70
+ it 'cleans &amp;' do
71
+ user = User.new
72
+ user.clean_html = 'Jim &amp; Candi'
73
+ user.cleanse_attributes!
74
+ assert_equal 'Jim & Candi', user.clean_html
75
+ end
76
+
77
+ it 'cleans &gt;' do
78
+ user = User.new
79
+ user.clean_html = '2 &gt; 1'
80
+ user.cleanse_attributes!
81
+ assert_equal '2 > 1', user.clean_html
82
+ end
83
+
84
+ it 'cleans &lt;' do
85
+ user = User.new
86
+ user.clean_html = '1 &lt; 2'
87
+ user.cleanse_attributes!
88
+ assert_equal '1 < 2', user.clean_html
89
+ end
90
+
91
+ it 'cleans &apos;' do
92
+ user = User.new
93
+ user.clean_html = '1&apos;2'
94
+ user.cleanse_attributes!
95
+ assert_equal "1'2", user.clean_html
96
+ end
97
+
98
+ it 'cleans &nbsp;' do
99
+ user = User.new
100
+ user.clean_html = '1&nbsp;2'
101
+ user.cleanse_attributes!
102
+ assert_equal "1 2", user.clean_html
103
+ end
104
+
105
+ it 'cleans &AMP;' do
106
+ user = User.new
107
+ user.clean_html = 'Mutt &AMP; Jeff Inc.'
108
+ user.cleanse_attributes!
109
+ assert_equal 'Mutt & Jeff Inc.', user.clean_html
110
+ end
111
+
112
+ it 'does not clean &;' do
113
+ user = User.new
114
+ user.clean_html = 'Mutt &; Jeff Inc.'
115
+ user.cleanse_attributes!
116
+ assert_equal 'Mutt &; Jeff Inc.', user.clean_html
117
+ end
118
+
119
+ it 'does not clean &blah;' do
120
+ user = User.new
121
+ user.clean_html = '1&blah;2'
122
+ user.cleanse_attributes!
123
+ assert_equal '1&blah;2', user.clean_html
124
+ end
125
+ end
126
+
127
+ describe '#unescape_uri' do
128
+ it 'converts %20' do
129
+ user = User.new
130
+ user.clean_from_uri = 'Jim%20%20Bob%20'
131
+ user.cleanse_attributes!
132
+ assert_equal 'Jim Bob ', user.clean_from_uri
133
+ end
134
+ it 'converts %20 only' do
135
+ user = User.new
136
+ user.clean_from_uri = '%20'
137
+ user.cleanse_attributes!
138
+ assert_equal ' ', user.clean_from_uri
139
+ end
140
+ end
141
+
142
+ describe '#escape_uri' do
143
+ it 'converts %20' do
144
+ user = User.new
145
+ user.clean_to_uri = 'Jim Bob '
146
+ user.cleanse_attributes!
147
+ assert_equal 'Jim%20%20Bob%20', user.clean_to_uri
148
+ end
149
+ it 'converts %20 only' do
150
+ user = User.new
151
+ user.clean_to_uri = ' '
152
+ user.cleanse_attributes!
153
+ assert_equal '%20', user.clean_to_uri
154
+ end
155
+ end
156
+
157
+ describe '#compress_whitespace' do
158
+ it 'compresses multiple spaces' do
159
+ user = User.new
160
+ user.clean_whitespace = ' J im B ob '
161
+ user.cleanse_attributes!
162
+ assert_equal ' J im B ob ', user.clean_whitespace
163
+ end
164
+
165
+ it 'does not compress single spaces' do
166
+ user = User.new
167
+ user.clean_whitespace = ' Jack Black'
168
+ user.cleanse_attributes!
169
+ assert_equal ' Jack Black', user.clean_whitespace
170
+ end
171
+
172
+ it 'compresses newlines and tabs' do
173
+ user = User.new
174
+ user.clean_whitespace = " \n\n J im B ob \t\n\t "
175
+ user.cleanse_attributes!
176
+ assert_equal ' J im B ob ', user.clean_whitespace
177
+ end
178
+ end
179
+
180
+ it '#digits_only' do
181
+ user = User.new
182
+ user.clean_digits_only = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
183
+ user.cleanse_attributes!
184
+ assert_equal '13689', user.clean_digits_only
185
+ end
186
+
187
+ it '#string_to_integer' do
188
+ user = User.new
189
+ user.clean_to_integer = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
190
+ user.cleanse_attributes!
191
+ assert_equal 136, user.clean_to_integer
192
+ end
193
+
194
+ it '#string_to_float' do
195
+ user = User.new
196
+ user.clean_to_float = " 1 !@#$%^&*3()+=-~`\t\n jacK6 blAck <>.,/\"':;8{][]9\|?/\\ "
197
+ user.cleanse_attributes!
198
+ assert_equal 136.89, user.clean_to_float
199
+ end
200
+
201
+ it '#date_to_time_at_end_of_day' do
202
+ user = User.new
203
+ user.clean_end_of_day = Time.parse('2016-03-03 14:33:44 +0000')
204
+ user.cleanse_attributes!
205
+ assert_equal Time.parse('2016-03-03 23:59:59 +0000').to_i, user.clean_end_of_day.to_i
206
+ end
207
+
208
+ end
209
+ end
data/test/test_db.sqlite3 CHANGED
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_cleansing
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Reid Morrison
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-01 00:00:00.000000000 Z
11
+ date: 2016-03-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: concurrent-ruby
@@ -55,6 +55,7 @@ files:
55
55
  - lib/data_cleansing/railtie.rb
56
56
  - lib/data_cleansing/version.rb
57
57
  - test/active_record_test.rb
58
+ - test/cleaners_test.rb
58
59
  - test/ruby_test.rb
59
60
  - test/test_db.sqlite3
60
61
  - test/test_helper.rb
@@ -84,6 +85,7 @@ specification_version: 4
84
85
  summary: Data Cleansing framework for Ruby, Rails, Mongoid and MongoMapper.
85
86
  test_files:
86
87
  - test/active_record_test.rb
88
+ - test/cleaners_test.rb
87
89
  - test/ruby_test.rb
88
90
  - test/test_db.sqlite3
89
91
  - test/test_helper.rb