data_cleansing 0.6.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +6 -6
- data/Rakefile +15 -28
- data/lib/data_cleansing.rb +1 -1
- data/lib/data_cleansing/cleaners.rb +137 -0
- data/lib/data_cleansing/cleanse.rb +17 -11
- data/lib/data_cleansing/data_cleansing.rb +5 -5
- data/lib/data_cleansing/version.rb +1 -1
- data/test/active_record_test.rb +19 -49
- data/test/ruby_test.rb +23 -35
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +13 -0
- metadata +25 -27
- data/Gemfile +0 -21
- data/Gemfile.lock +0 -63
- data/nbproject/private/private.properties +0 -3
- data/nbproject/private/private.xml +0 -4
- data/nbproject/private/rake-d.txt +0 -4
- data/nbproject/project.properties +0 -9
- data/nbproject/project.xml +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a24ad3a5780b445ed15310ad8776d89c122747d9
|
4
|
+
data.tar.gz: ab79edb935ae22415b50c51d1e1c7dc60c7a16c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 032ce480495e7127cd17b4b1bd39630e51573c579fda5b7cb34bb32f1f3cb6509c8e3ebd9a568b88f6f36cb694d7a7dbd7b32aad040fe6f511ce47b3d01fad3f
|
7
|
+
data.tar.gz: b2d7af9ad633ad5c5045c1103129d843fb5453b256cd5c3cbe3590967d950036da39ab8d6940387c36a51f1501c4b348ef5128b64b3c044b7ff01969392b6d5c
|
data/LICENSE.txt
CHANGED
@@ -186,7 +186,7 @@
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
187
187
|
identification within third-party archives.
|
188
188
|
|
189
|
-
Copyright 2012
|
189
|
+
Copyright 2012, 2013, 2014 Reid Morrison
|
190
190
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
192
192
|
you may not use this file except in compliance with the License.
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
data_cleansing
|
2
2
|
==============
|
3
3
|
|
4
|
-
Data Cleansing framework for Ruby
|
4
|
+
Data Cleansing framework for Ruby, Rails, Mongoid and MongoMapper.
|
5
5
|
|
6
6
|
* http://github.com/reidmorrison/data_cleansing
|
7
7
|
|
@@ -251,7 +251,7 @@ in a Rails initializer as follows:
|
|
251
251
|
|
252
252
|
```ruby
|
253
253
|
SemanticLogger.default_level = Rails.logger.level
|
254
|
-
SemanticLogger.add_appender(Rails.logger)
|
254
|
+
SemanticLogger.add_appender(logger: Rails.logger)
|
255
255
|
```
|
256
256
|
|
257
257
|
By changing the log level of DataCleansing itself the type of output for data
|
@@ -319,9 +319,9 @@ tries to convert it to an integer or float.
|
|
319
319
|
|
320
320
|
DataCleansing requires the following dependencies
|
321
321
|
|
322
|
-
* Ruby V1.
|
323
|
-
* Rails
|
324
|
-
* Mongoid
|
322
|
+
* Ruby V1.9.3, V2 and greater
|
323
|
+
* Rails V3.2 (Active Model) or greater for Rails integration ( Only if Rails is being used )
|
324
|
+
* Mongoid and Mongomapper supporting Active Model V3.2 or greater ( Only if Mongoid or MongoMapper is being used )
|
325
325
|
|
326
326
|
## Meta
|
327
327
|
|
@@ -338,7 +338,7 @@ Reid Morrison :: reidmo@gmail.com :: @reidmorrison
|
|
338
338
|
|
339
339
|
## License
|
340
340
|
|
341
|
-
Copyright 2013 Reid Morrison
|
341
|
+
Copyright 2013, 2014, 2015, 2016 Reid Morrison
|
342
342
|
|
343
343
|
Licensed under the Apache License, Version 2.0 (the "License");
|
344
344
|
you may not use this file except in compliance with the License.
|
data/Rakefile
CHANGED
@@ -1,40 +1,27 @@
|
|
1
|
-
lib = File.expand_path('../lib/', __FILE__)
|
2
|
-
$:.unshift lib unless $:.include?(lib)
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
require 'rubygems/package'
|
6
1
|
require 'rake/clean'
|
7
2
|
require 'rake/testtask'
|
8
|
-
require 'date'
|
9
|
-
require 'data_cleansing/version'
|
10
3
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
s.version = DataCleansing::VERSION
|
16
|
-
s.platform = Gem::Platform::RUBY
|
17
|
-
s.authors = ['Reid Morrison']
|
18
|
-
s.email = ['reidmo@gmail.com']
|
19
|
-
s.homepage = 'https://github.com/ClarityServices/data_cleansing'
|
20
|
-
s.date = Date.today.to_s
|
21
|
-
s.summary = "Data Cleansing framework for Ruby, and Ruby on Rails"
|
22
|
-
s.description = "Data Cleansing framework for Ruby with additional support for Rails and Mongoid"
|
23
|
-
s.files = FileList["./**/*"].exclude(/.gem$/, /.log$/,/^nbproject/).map{|f| f.sub(/^\.\//, '')}
|
24
|
-
s.license = "Apache License V2.0"
|
25
|
-
s.has_rdoc = true
|
26
|
-
s.add_dependency 'thread_safe'
|
27
|
-
s.add_dependency 'semantic_logger'
|
28
|
-
end
|
29
|
-
Gem::Package.build gemspec
|
4
|
+
require_relative 'lib/data_cleansing/version'
|
5
|
+
|
6
|
+
task :gem do
|
7
|
+
system 'gem build data_cleansing.gemspec'
|
30
8
|
end
|
31
9
|
|
32
|
-
|
10
|
+
task publish: :gem do
|
11
|
+
system "git tag -a v#{DataCleansing::VERSION} -m 'Tagging #{DataCleansing::VERSION}'"
|
12
|
+
system 'git push --tags'
|
13
|
+
system "gem push data_cleansing-#{DataCleansing::VERSION}.gem"
|
14
|
+
system "rm data_cleansing-#{DataCleansing::VERSION}.gem"
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Run Test Suite'
|
33
18
|
task :test do
|
34
19
|
Rake::TestTask.new(:functional) do |t|
|
35
|
-
t.test_files = FileList['test
|
20
|
+
t.test_files = FileList['test/**/*_test.rb']
|
36
21
|
t.verbose = true
|
37
22
|
end
|
38
23
|
|
39
24
|
Rake::Task['functional'].invoke
|
40
25
|
end
|
26
|
+
|
27
|
+
task default: :test
|
data/lib/data_cleansing.rb
CHANGED
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'uri'
|
2
|
+
module Cleaners
|
3
|
+
# Strip leading and trailing whitespace
|
4
|
+
module Strip
|
5
|
+
def self.call(string)
|
6
|
+
return string unless string.is_a?(String)
|
7
|
+
|
8
|
+
string.strip! || string
|
9
|
+
end
|
10
|
+
end
|
11
|
+
DataCleansing.register_cleaner(:strip, Strip)
|
12
|
+
|
13
|
+
# Convert to uppercase
|
14
|
+
module Upcase
|
15
|
+
def self.call(string)
|
16
|
+
return string unless string.is_a?(String)
|
17
|
+
|
18
|
+
string.upcase! || string
|
19
|
+
end
|
20
|
+
end
|
21
|
+
DataCleansing.register_cleaner(:upcase, Upcase)
|
22
|
+
|
23
|
+
# Remove all non-word characters, including whitespace
|
24
|
+
module RemoveNonWord
|
25
|
+
NOT_WORDS = Regexp.compile(/\W/)
|
26
|
+
|
27
|
+
def self.call(string)
|
28
|
+
return string unless string.is_a?(String)
|
29
|
+
|
30
|
+
string.gsub!(NOT_WORDS, '') || string
|
31
|
+
end
|
32
|
+
end
|
33
|
+
DataCleansing.register_cleaner(:remove_non_word, RemoveNonWord)
|
34
|
+
|
35
|
+
# Remove all not printable characters
|
36
|
+
module RemoveNonPrintable
|
37
|
+
NOT_PRINTABLE = Regexp.compile(/[^[:print:]]/)
|
38
|
+
|
39
|
+
def self.call(string)
|
40
|
+
return string unless string.is_a?(String)
|
41
|
+
|
42
|
+
string.gsub!(NOT_PRINTABLE, '') || string
|
43
|
+
end
|
44
|
+
end
|
45
|
+
DataCleansing.register_cleaner(:remove_non_printable, RemoveNonPrintable)
|
46
|
+
|
47
|
+
# Remove HTML Markup
|
48
|
+
module RemoveHTMLMarkup
|
49
|
+
HTML_MARKUP = Regexp.compile(/&(amp|quot|gt|lt|apos|nbsp);/in)
|
50
|
+
|
51
|
+
def self.call(string)
|
52
|
+
return string unless string.is_a?(String)
|
53
|
+
|
54
|
+
string.gsub!(HTML_MARKUP) do |match|
|
55
|
+
case match.downcase
|
56
|
+
when 'amp' then
|
57
|
+
'&'
|
58
|
+
when 'quot' then
|
59
|
+
'"'
|
60
|
+
when 'gt' then
|
61
|
+
'>'
|
62
|
+
when 'lt' then
|
63
|
+
'<'
|
64
|
+
when 'apos' then
|
65
|
+
"'"
|
66
|
+
when 'nbsp' then
|
67
|
+
' '
|
68
|
+
else
|
69
|
+
"&#{match};"
|
70
|
+
end
|
71
|
+
end || string
|
72
|
+
end
|
73
|
+
end
|
74
|
+
DataCleansing.register_cleaner(:remove_html_markup, RemoveHTMLMarkup)
|
75
|
+
|
76
|
+
module ReplaceURIChars
|
77
|
+
def self.call(string)
|
78
|
+
return string unless string.is_a?(String)
|
79
|
+
|
80
|
+
URI.unescape(string)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
DataCleansing.register_cleaner(:replace_uri_chars, ReplaceURIChars)
|
84
|
+
|
85
|
+
# Compress multiple whitespace to a single space
|
86
|
+
module CompressWhitespace
|
87
|
+
WHITESPACE = Regexp.compile(/\s+/)
|
88
|
+
|
89
|
+
def self.call(string)
|
90
|
+
return string unless string.is_a?(String)
|
91
|
+
|
92
|
+
string.gsub!(WHITESPACE, ' ') || string
|
93
|
+
end
|
94
|
+
end
|
95
|
+
DataCleansing.register_cleaner(:compress_whitespace, CompressWhitespace)
|
96
|
+
|
97
|
+
# Remove Non-Digit Chars
|
98
|
+
# Returns nil if no digit characters present
|
99
|
+
module DigitsOnly
|
100
|
+
DIGITS = Regexp.compile(/\D/)
|
101
|
+
|
102
|
+
def self.call(string)
|
103
|
+
return string unless string.is_a?(String)
|
104
|
+
|
105
|
+
string.gsub!(DIGITS, '')
|
106
|
+
string.length > 0 ? string : nil
|
107
|
+
end
|
108
|
+
end
|
109
|
+
DataCleansing.register_cleaner(:digits_only, DigitsOnly)
|
110
|
+
|
111
|
+
# Returns [Integer] after removing all non-digit characters, except '.'
|
112
|
+
# Returns nil if no digits are present in the string.
|
113
|
+
module StringToInteger
|
114
|
+
NUMERIC = Regexp.compile(/[^0-9\.]/)
|
115
|
+
|
116
|
+
def self.call(string)
|
117
|
+
return string unless string.is_a?(String)
|
118
|
+
|
119
|
+
# Remove Non-Digit Chars, except for '.'
|
120
|
+
string.gsub!(NUMERIC, '')
|
121
|
+
string.length > 0 ? string.to_i : nil
|
122
|
+
end
|
123
|
+
end
|
124
|
+
DataCleansing.register_cleaner(:string_to_integer, StringToInteger)
|
125
|
+
|
126
|
+
# Convert a Date to a Time at the end of day for that date (YYYY-MM-DD 23:59:59)
|
127
|
+
# Ex: 2015-12-31 becomes 2015-12-31 23:59:59
|
128
|
+
# If something other than a Date object is passed in, it just passes through.
|
129
|
+
module DateToTimeAtEndOfDay
|
130
|
+
def self.call(date)
|
131
|
+
return date unless date.kind_of?(Date)
|
132
|
+
|
133
|
+
date.to_time.end_of_day
|
134
|
+
end
|
135
|
+
end
|
136
|
+
DataCleansing.register_cleaner(:date_to_time_at_end_of_day, DateToTimeAtEndOfDay)
|
137
|
+
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'data_cleansing/cleaners'
|
1
2
|
module DataCleansing
|
2
3
|
# Mix-in to add cleaner
|
3
4
|
module Cleanse
|
@@ -17,7 +18,7 @@ module DataCleansing
|
|
17
18
|
|
18
19
|
# Create shortcuts to cleaners for each attribute for use by .cleanse_attribute
|
19
20
|
attributes.each do |attr|
|
20
|
-
(data_cleansing_attribute_cleaners[attr] ||=
|
21
|
+
(data_cleansing_attribute_cleaners[attr] ||= Concurrent::Array.new) << cleaner
|
21
22
|
end
|
22
23
|
cleaner
|
23
24
|
end
|
@@ -72,17 +73,17 @@ module DataCleansing
|
|
72
73
|
|
73
74
|
# Array of cleaners to execute against this model and it's children
|
74
75
|
def data_cleansing_cleaners
|
75
|
-
@data_cleansing_cleaners ||=
|
76
|
+
@data_cleansing_cleaners ||= Concurrent::Array.new
|
76
77
|
end
|
77
78
|
|
78
79
|
# Array of cleaners to execute against this model and it's children
|
79
80
|
def data_cleansing_after_cleaners
|
80
|
-
@data_cleansing_after_cleaners ||=
|
81
|
+
@data_cleansing_after_cleaners ||= Concurrent::Array.new
|
81
82
|
end
|
82
83
|
|
83
84
|
# Hash of attributes to clean with their corresponding cleaner
|
84
85
|
def data_cleansing_attribute_cleaners
|
85
|
-
@data_cleansing_attribute_cleaners ||=
|
86
|
+
@data_cleansing_attribute_cleaners ||= Concurrent::Hash.new
|
86
87
|
end
|
87
88
|
|
88
89
|
private
|
@@ -100,17 +101,22 @@ module DataCleansing
|
|
100
101
|
return if cleaner_struct.nil? || value.nil?
|
101
102
|
# Duplicate value in case cleaner uses methods such as gsub!
|
102
103
|
new_value = value.is_a?(String) ? value.dup : value
|
103
|
-
cleaner_struct.cleaners.each do |
|
104
|
+
cleaner_struct.cleaners.each do |name|
|
104
105
|
# Cleaner itself could be a custom Proc, otherwise do a global lookup for it
|
105
|
-
proc =
|
106
|
-
raise "No cleaner defined for #{
|
106
|
+
proc = name.is_a?(Proc) ? name : DataCleansing.cleaner(name.to_sym)
|
107
|
+
raise "No cleaner defined for #{name.inspect}" unless proc
|
107
108
|
|
108
|
-
|
109
|
-
|
110
|
-
|
109
|
+
if proc.is_a?(Proc)
|
110
|
+
new_value = if object
|
111
|
+
# Call the cleaner proc within the scope (binding) of the object
|
112
|
+
proc.arity == 1 ? object.instance_exec(new_value, &proc) : object.instance_exec(new_value, cleaner_struct.params, &proc)
|
113
|
+
else
|
114
|
+
proc.arity == 1 ? proc.call(new_value) : proc.call(new_value, cleaner_struct.params)
|
115
|
+
end
|
111
116
|
else
|
112
|
-
proc.arity == 1 ? proc.call(new_value) : proc.call(new_value, cleaner_struct.params)
|
117
|
+
new_value = (proc.method(:call).arity == 1 ? proc.call(new_value) : proc.call(new_value, cleaner_struct.params))
|
113
118
|
end
|
119
|
+
|
114
120
|
end
|
115
121
|
new_value
|
116
122
|
end
|
@@ -2,14 +2,14 @@ module DataCleansing
|
|
2
2
|
include SemanticLogger::Loggable
|
3
3
|
|
4
4
|
# Global Data Cleansers
|
5
|
-
@@global_cleaners =
|
6
|
-
@@masked_attributes =
|
5
|
+
@@global_cleaners = Concurrent::Hash.new
|
6
|
+
@@masked_attributes = Concurrent::Array.new
|
7
7
|
|
8
8
|
# Register a new cleaner
|
9
9
|
# Replaces any existing cleaner with the same name
|
10
|
-
def self.register_cleaner(cleaner, &block)
|
11
|
-
raise "Must supply a Proc with the cleaner" unless block
|
12
|
-
@@global_cleaners[
|
10
|
+
def self.register_cleaner(name, cleaner = nil, &block)
|
11
|
+
raise "Must supply a Proc with the cleaner" unless block || cleaner
|
12
|
+
@@global_cleaners[name.to_sym] = cleaner || block
|
13
13
|
end
|
14
14
|
|
15
15
|
# Returns the cleaner matching the supplied cleaner name
|
data/test/active_record_test.rb
CHANGED
@@ -1,18 +1,5 @@
|
|
1
|
-
|
2
|
-
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
require 'test/unit'
|
6
|
-
require 'shoulda'
|
7
|
-
# Load ActiveRecord before loading data_cleansing so that the AR extensions
|
8
|
-
# are loaded
|
1
|
+
require_relative 'test_helper'
|
9
2
|
require 'active_record'
|
10
|
-
require 'data_cleansing'
|
11
|
-
require 'semantic_logger'
|
12
|
-
|
13
|
-
# Register an appender if one is not already registered
|
14
|
-
SemanticLogger.default_level = :trace
|
15
|
-
SemanticLogger.add_appender('test.log') if SemanticLogger.appenders.size == 0
|
16
3
|
|
17
4
|
ActiveRecord::Base.logger = SemanticLogger[ActiveRecord::Base]
|
18
5
|
ActiveRecord::Base.configurations = {
|
@@ -36,9 +23,6 @@ ActiveRecord::Schema.define :version => 0 do
|
|
36
23
|
end
|
37
24
|
end
|
38
25
|
|
39
|
-
# Define a global cleaner
|
40
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
41
|
-
|
42
26
|
# Log data cleansing result
|
43
27
|
# Set to :warn or higher to disable
|
44
28
|
DataCleansing.logger.level = :debug
|
@@ -46,20 +30,6 @@ DataCleansing.logger.level = :debug
|
|
46
30
|
# Set the Global list of fields to be masked
|
47
31
|
DataCleansing.register_masked_attributes :ssn, :bank_account_number
|
48
32
|
|
49
|
-
# Removes all non-digit characters, except '.' then truncates
|
50
|
-
# the result to an integer string
|
51
|
-
# Returns nil if no digits are present in the string
|
52
|
-
DataCleansing.register_cleaner(:digits_to_integer) do |integer|
|
53
|
-
if integer.kind_of?(String)
|
54
|
-
# Remove Non-Digit Chars, except for '.'
|
55
|
-
integer = integer.gsub(/[^0-9\.]/, '')
|
56
|
-
integer.length > 0 ? integer.to_i : nil
|
57
|
-
else
|
58
|
-
integer
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
33
|
class User < ActiveRecord::Base
|
64
34
|
include DataCleansing::Cleanse
|
65
35
|
|
@@ -73,7 +43,7 @@ class User < ActiveRecord::Base
|
|
73
43
|
cleanse :address1, :address2, :instance_var, :cleaner => Proc.new {|string| "<< #{string.strip} >>"}
|
74
44
|
|
75
45
|
# Custom Zip Code cleaner
|
76
|
-
cleanse :zip_code, :cleaner => :
|
46
|
+
cleanse :zip_code, :cleaner => :string_to_integer
|
77
47
|
|
78
48
|
# Automatically cleanse data before validation
|
79
49
|
before_validation :cleanse_attributes!
|
@@ -94,20 +64,20 @@ class User2 < ActiveRecord::Base
|
|
94
64
|
cleanse :first_name, :cleaner => Proc.new {|string| "$#{string}$"}
|
95
65
|
|
96
66
|
# Custom Zip Code cleaner
|
97
|
-
cleanse :zip_code, :cleaner => :
|
67
|
+
cleanse :zip_code, :cleaner => :string_to_integer
|
98
68
|
|
99
69
|
# Automatically cleanse data before validation
|
100
70
|
before_validation :cleanse_attributes!
|
101
71
|
end
|
102
72
|
|
103
|
-
class ActiveRecordTest < Test
|
104
|
-
|
73
|
+
class ActiveRecordTest < Minitest::Test
|
74
|
+
describe "ActiveRecord Models" do
|
105
75
|
|
106
|
-
|
76
|
+
it 'have globally registered cleaner' do
|
107
77
|
assert DataCleansing.cleaner(:strip)
|
108
78
|
end
|
109
79
|
|
110
|
-
|
80
|
+
it 'Model.cleanse_attribute' do
|
111
81
|
assert_equal 'joe', User.cleanse_attribute(:first_name, ' joe ')
|
112
82
|
assert_equal 'black', User.cleanse_attribute(:last_name, "\n black\n")
|
113
83
|
assert_equal '<< 2632 Brown St >>', User.cleanse_attribute(:address1, "2632 Brown St \n")
|
@@ -115,8 +85,8 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
115
85
|
assert_equal 12345, User.cleanse_attribute(:zip_code, "\n\tblah 12345badtext\n")
|
116
86
|
end
|
117
87
|
|
118
|
-
|
119
|
-
|
88
|
+
describe "with user" do
|
89
|
+
before do
|
120
90
|
@user = User.new(
|
121
91
|
:first_name => ' joe ',
|
122
92
|
:last_name => "\n black\n",
|
@@ -126,30 +96,30 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
126
96
|
)
|
127
97
|
end
|
128
98
|
|
129
|
-
|
99
|
+
it 'only have 3 cleaners' do
|
130
100
|
assert_equal 3, User.send(:data_cleansing_cleaners).size, User.send(:data_cleansing_cleaners)
|
131
101
|
end
|
132
102
|
|
133
|
-
|
103
|
+
it 'cleanse_attributes! using global cleaner' do
|
134
104
|
assert_equal true, @user.valid?
|
135
105
|
assert_equal 'joe', @user.first_name
|
136
106
|
assert_equal 'black', @user.last_name
|
137
107
|
end
|
138
108
|
|
139
|
-
|
109
|
+
it 'cleanse_attributes! using attribute specific custom cleaner' do
|
140
110
|
assert_equal true, @user.valid?
|
141
111
|
assert_equal '<< 2632 Brown St >>', @user.address1
|
142
112
|
assert_equal '<< instance >>', @user.instance_var
|
143
113
|
end
|
144
114
|
|
145
|
-
|
115
|
+
it 'cleanse_attributes! using global cleaner using rails extensions' do
|
146
116
|
@user.cleanse_attributes!
|
147
117
|
assert_equal 12345, @user.zip_code
|
148
118
|
end
|
149
119
|
end
|
150
120
|
|
151
|
-
|
152
|
-
|
121
|
+
describe "with user2" do
|
122
|
+
before do
|
153
123
|
@user = User2.new(
|
154
124
|
:first_name => ' joe ',
|
155
125
|
:last_name => "\n black\n",
|
@@ -159,16 +129,16 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
159
129
|
)
|
160
130
|
end
|
161
131
|
|
162
|
-
|
132
|
+
it 'have 4 cleaners defined' do
|
163
133
|
assert_equal 4, User2.send(:data_cleansing_cleaners).size, User2.send(:data_cleansing_cleaners)
|
164
134
|
end
|
165
135
|
|
166
|
-
|
136
|
+
it 'have 3 attributes cleaners defined' do
|
167
137
|
# :all, :first_name, :zip_code
|
168
138
|
assert_equal 3, User2.send(:data_cleansing_attribute_cleaners).size, User2.send(:data_cleansing_attribute_cleaners)
|
169
139
|
end
|
170
140
|
|
171
|
-
|
141
|
+
it 'cleanse_attributes! clean all attributes' do
|
172
142
|
assert_equal true, @user.valid?
|
173
143
|
assert_equal '$<< @joe@ >>$', @user.first_name, User2.send(:data_cleansing_cleaners)
|
174
144
|
assert_equal '@black@', @user.last_name
|
@@ -180,4 +150,4 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
180
150
|
end
|
181
151
|
|
182
152
|
end
|
183
|
-
end
|
153
|
+
end
|
data/test/ruby_test.rb
CHANGED
@@ -1,13 +1,4 @@
|
|
1
|
-
|
2
|
-
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
require 'test/unit'
|
6
|
-
require 'shoulda'
|
7
|
-
require 'data_cleansing'
|
8
|
-
|
9
|
-
# Define a global cleanser
|
10
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
1
|
+
require_relative 'test_helper'
|
11
2
|
|
12
3
|
# Non Cleansing base class
|
13
4
|
class RubyUserBase
|
@@ -43,9 +34,6 @@ class RubyUserChild < RubyUser
|
|
43
34
|
cleanse :gender, :cleaner => Proc.new {|gender| gender.to_s.strip.downcase}
|
44
35
|
end
|
45
36
|
|
46
|
-
# Another global cleaner, used by RubyUser2
|
47
|
-
DataCleansing.register_cleaner(:upcase) {|string| string.upcase}
|
48
|
-
|
49
37
|
class RubyUser2
|
50
38
|
include DataCleansing::Cleanse
|
51
39
|
|
@@ -75,46 +63,46 @@ class RubyUser2
|
|
75
63
|
]
|
76
64
|
end
|
77
65
|
|
78
|
-
class RubyTest < Test
|
79
|
-
|
66
|
+
class RubyTest < Minitest::Test
|
67
|
+
describe "Ruby Models" do
|
80
68
|
|
81
|
-
|
69
|
+
it 'have globally registered cleaner' do
|
82
70
|
assert DataCleansing.cleaner(:strip)
|
83
71
|
end
|
84
72
|
|
85
|
-
|
73
|
+
it 'Model.cleanse_attribute' do
|
86
74
|
assert_equal 'male', RubyUserChild.cleanse_attribute(:gender, "\n Male \n"), RubyUserChild.send(:data_cleansing_attribute_cleaners)
|
87
75
|
assert_equal 'joe', RubyUserChild.cleanse_attribute(:first_name, ' joe '), RubyUserChild.send(:data_cleansing_attribute_cleaners)
|
88
76
|
assert_equal 'black', RubyUserChild.cleanse_attribute(:last_name, "\n black\n"), RubyUserChild.send(:data_cleansing_attribute_cleaners)
|
89
77
|
assert_equal '<< 2632 Brown St >>', RubyUserChild.cleanse_attribute(:address1, "2632 Brown St \n"), RubyUserChild.send(:data_cleansing_attribute_cleaners)
|
90
78
|
end
|
91
79
|
|
92
|
-
|
93
|
-
|
80
|
+
describe "with ruby user" do
|
81
|
+
before do
|
94
82
|
@user = RubyUser.new
|
95
83
|
@user.first_name = ' joe '
|
96
84
|
@user.last_name = "\n black\n"
|
97
85
|
@user.address1 = "2632 Brown St \n"
|
98
86
|
end
|
99
87
|
|
100
|
-
|
88
|
+
it 'cleanse_attributes! using global cleaner' do
|
101
89
|
@user.cleanse_attributes!
|
102
90
|
assert_equal 'joe', @user.first_name
|
103
91
|
assert_equal 'black', @user.last_name
|
104
92
|
end
|
105
93
|
|
106
|
-
|
94
|
+
it 'cleanse_attributes! using attribute specific custom cleaner' do
|
107
95
|
@user.cleanse_attributes!
|
108
96
|
assert_equal '<< 2632 Brown St >>', @user.address1
|
109
97
|
end
|
110
98
|
|
111
|
-
|
99
|
+
it 'cleanse_attributes! not cleanse nil attributes' do
|
112
100
|
@user.first_name = nil
|
113
101
|
@user.cleanse_attributes!
|
114
102
|
assert_equal nil, @user.first_name
|
115
103
|
end
|
116
104
|
|
117
|
-
|
105
|
+
it 'cleanse_attributes! call after cleaner' do
|
118
106
|
@user.first_name = 'Jack'
|
119
107
|
@user.last_name = nil
|
120
108
|
@user.cleanse_attributes!
|
@@ -123,8 +111,8 @@ class RubyTest < Test::Unit::TestCase
|
|
123
111
|
end
|
124
112
|
end
|
125
113
|
|
126
|
-
|
127
|
-
|
114
|
+
describe "with ruby user child" do
|
115
|
+
before do
|
128
116
|
@user = RubyUserChild.new
|
129
117
|
@user.first_name = ' joe '
|
130
118
|
@user.last_name = "\n black\n"
|
@@ -132,32 +120,32 @@ class RubyTest < Test::Unit::TestCase
|
|
132
120
|
@user.gender = "\n Male \n"
|
133
121
|
end
|
134
122
|
|
135
|
-
|
123
|
+
it 'cleanse_attributes! using global cleaner' do
|
136
124
|
@user.cleanse_attributes!
|
137
125
|
assert_equal 'joe', @user.first_name
|
138
126
|
assert_equal 'black', @user.last_name
|
139
127
|
end
|
140
128
|
|
141
|
-
|
129
|
+
it 'cleanse_attributes! using attribute specific custom cleaner' do
|
142
130
|
@user.cleanse_attributes!
|
143
131
|
assert_equal '<< 2632 Brown St >>', @user.address1
|
144
132
|
end
|
145
133
|
|
146
|
-
|
134
|
+
it 'cleanse_attributes! not cleanse nil attributes' do
|
147
135
|
@user.first_name = nil
|
148
136
|
@user.cleanse_attributes!
|
149
137
|
assert_equal nil, @user.first_name
|
150
138
|
end
|
151
139
|
|
152
|
-
|
140
|
+
it 'cleanse_attributes! clean child attributes' do
|
153
141
|
@user.cleanse_attributes!
|
154
142
|
assert_equal 'male', @user.gender
|
155
143
|
end
|
156
144
|
|
157
145
|
end
|
158
146
|
|
159
|
-
|
160
|
-
|
147
|
+
describe "with ruby user2" do
|
148
|
+
before do
|
161
149
|
@user = RubyUser2.new
|
162
150
|
@user.first_name = ' joe '
|
163
151
|
@user.last_name = "\n black\n"
|
@@ -166,23 +154,23 @@ class RubyTest < Test::Unit::TestCase
|
|
166
154
|
@user.gender = " Unknown "
|
167
155
|
end
|
168
156
|
|
169
|
-
|
157
|
+
it 'cleanse_attributes!' do
|
170
158
|
@user.cleanse_attributes!
|
171
159
|
assert_equal 'joe', @user.first_name
|
172
160
|
assert_equal 'black', @user.last_name
|
173
161
|
assert_equal '2632 Brown St', @user.address1
|
174
162
|
end
|
175
163
|
|
176
|
-
|
164
|
+
it 'cleanse_attributes! with multiple cleaners' do
|
177
165
|
@user.cleanse_attributes!
|
178
166
|
assert_equal 'MR.', @user.title
|
179
167
|
end
|
180
168
|
|
181
|
-
|
169
|
+
it 'cleanse_attributes! referencing other attributes' do
|
182
170
|
@user.cleanse_attributes!
|
183
171
|
assert_equal 'Male', @user.gender
|
184
172
|
end
|
185
173
|
end
|
186
174
|
|
187
175
|
end
|
188
|
-
end
|
176
|
+
end
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'minitest/autorun'
|
5
|
+
require 'minitest/reporters'
|
6
|
+
require 'minitest/stub_any_instance'
|
7
|
+
require 'awesome_print'
|
8
|
+
require 'data_cleansing'
|
9
|
+
|
10
|
+
MiniTest::Reporters.use! MiniTest::Reporters::SpecReporter.new
|
11
|
+
|
12
|
+
SemanticLogger.add_appender(file_name: 'test.log', formatter: :color)
|
13
|
+
SemanticLogger.default_level = :debug
|
metadata
CHANGED
@@ -1,72 +1,66 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: concurrent-ruby
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '1.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '1.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: semantic_logger
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
33
|
+
version: '2.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
description:
|
42
|
-
Mongoid
|
40
|
+
version: '2.0'
|
41
|
+
description:
|
43
42
|
email:
|
44
43
|
- reidmo@gmail.com
|
45
44
|
executables: []
|
46
45
|
extensions: []
|
47
46
|
extra_rdoc_files: []
|
48
47
|
files:
|
49
|
-
- Gemfile
|
50
|
-
- Gemfile.lock
|
51
48
|
- LICENSE.txt
|
52
49
|
- README.md
|
53
50
|
- Rakefile
|
54
51
|
- lib/data_cleansing.rb
|
52
|
+
- lib/data_cleansing/cleaners.rb
|
55
53
|
- lib/data_cleansing/cleanse.rb
|
56
54
|
- lib/data_cleansing/data_cleansing.rb
|
57
55
|
- lib/data_cleansing/railtie.rb
|
58
56
|
- lib/data_cleansing/version.rb
|
59
|
-
- nbproject/private/private.properties
|
60
|
-
- nbproject/private/private.xml
|
61
|
-
- nbproject/private/rake-d.txt
|
62
|
-
- nbproject/project.properties
|
63
|
-
- nbproject/project.xml
|
64
57
|
- test/active_record_test.rb
|
65
58
|
- test/ruby_test.rb
|
66
59
|
- test/test_db.sqlite3
|
67
|
-
|
60
|
+
- test/test_helper.rb
|
61
|
+
homepage: http://github.com/reidmorrison/data_cleansing
|
68
62
|
licenses:
|
69
|
-
- Apache
|
63
|
+
- Apache-2.0
|
70
64
|
metadata: {}
|
71
65
|
post_install_message:
|
72
66
|
rdoc_options: []
|
@@ -74,18 +68,22 @@ require_paths:
|
|
74
68
|
- lib
|
75
69
|
required_ruby_version: !ruby/object:Gem::Requirement
|
76
70
|
requirements:
|
77
|
-
- -
|
71
|
+
- - ">="
|
78
72
|
- !ruby/object:Gem::Version
|
79
73
|
version: '0'
|
80
74
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
75
|
requirements:
|
82
|
-
- -
|
76
|
+
- - ">="
|
83
77
|
- !ruby/object:Gem::Version
|
84
78
|
version: '0'
|
85
79
|
requirements: []
|
86
80
|
rubyforge_project:
|
87
|
-
rubygems_version: 2.
|
81
|
+
rubygems_version: 2.5.1
|
88
82
|
signing_key:
|
89
83
|
specification_version: 4
|
90
|
-
summary: Data Cleansing framework for Ruby,
|
91
|
-
test_files:
|
84
|
+
summary: Data Cleansing framework for Ruby, Rails, Mongoid and MongoMapper.
|
85
|
+
test_files:
|
86
|
+
- test/active_record_test.rb
|
87
|
+
- test/ruby_test.rb
|
88
|
+
- test/test_db.sqlite3
|
89
|
+
- test/test_helper.rb
|
data/Gemfile
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
gem 'thread_safe'
|
3
|
-
gem 'semantic_logger'
|
4
|
-
|
5
|
-
group :test do
|
6
|
-
gem "shoulda"
|
7
|
-
|
8
|
-
gem "activerecord"
|
9
|
-
gem 'sqlite3', :platform => :ruby
|
10
|
-
|
11
|
-
platforms :jruby do
|
12
|
-
gem 'jdbc-sqlite3'
|
13
|
-
gem 'activerecord-jdbcsqlite3-adapter'
|
14
|
-
end
|
15
|
-
|
16
|
-
gem "mongoid"
|
17
|
-
end
|
18
|
-
|
19
|
-
group :develop do
|
20
|
-
gem 'awesome_print'
|
21
|
-
end
|
data/Gemfile.lock
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
GEM
|
2
|
-
remote: https://rubygems.org/
|
3
|
-
specs:
|
4
|
-
activemodel (4.0.0)
|
5
|
-
activesupport (= 4.0.0)
|
6
|
-
builder (~> 3.1.0)
|
7
|
-
activerecord (4.0.0)
|
8
|
-
activemodel (= 4.0.0)
|
9
|
-
activerecord-deprecated_finders (~> 1.0.2)
|
10
|
-
activesupport (= 4.0.0)
|
11
|
-
arel (~> 4.0.0)
|
12
|
-
activerecord-deprecated_finders (1.0.3)
|
13
|
-
activesupport (4.0.0)
|
14
|
-
i18n (~> 0.6, >= 0.6.4)
|
15
|
-
minitest (~> 4.2)
|
16
|
-
multi_json (~> 1.3)
|
17
|
-
thread_safe (~> 0.1)
|
18
|
-
tzinfo (~> 0.3.37)
|
19
|
-
arel (4.0.0)
|
20
|
-
atomic (1.1.10)
|
21
|
-
awesome_print (1.1.0)
|
22
|
-
bson (1.9.1)
|
23
|
-
builder (3.1.4)
|
24
|
-
durran-validatable (2.0.1)
|
25
|
-
i18n (0.6.4)
|
26
|
-
leshill-will_paginate (2.3.11)
|
27
|
-
minitest (4.7.5)
|
28
|
-
mongo (1.9.1)
|
29
|
-
bson (~> 1.9.1)
|
30
|
-
mongoid (1.0.6)
|
31
|
-
activesupport (>= 2.2.2)
|
32
|
-
durran-validatable (>= 2.0.1)
|
33
|
-
leshill-will_paginate (>= 2.3.11)
|
34
|
-
mongo (>= 0.18.2)
|
35
|
-
multi_json (1.7.7)
|
36
|
-
semantic_logger (2.1.0)
|
37
|
-
sync_attr (>= 1.0)
|
38
|
-
thread_safe (>= 0.1.0)
|
39
|
-
shoulda (3.5.0)
|
40
|
-
shoulda-context (~> 1.0, >= 1.0.1)
|
41
|
-
shoulda-matchers (>= 1.4.1, < 3.0)
|
42
|
-
shoulda-context (1.1.4)
|
43
|
-
shoulda-matchers (2.2.0)
|
44
|
-
activesupport (>= 3.0.0)
|
45
|
-
sqlite3 (1.3.7)
|
46
|
-
sync_attr (1.0.0)
|
47
|
-
thread_safe (0.1.0)
|
48
|
-
atomic
|
49
|
-
tzinfo (0.3.37)
|
50
|
-
|
51
|
-
PLATFORMS
|
52
|
-
ruby
|
53
|
-
|
54
|
-
DEPENDENCIES
|
55
|
-
activerecord
|
56
|
-
activerecord-jdbcsqlite3-adapter
|
57
|
-
awesome_print
|
58
|
-
jdbc-sqlite3
|
59
|
-
mongoid
|
60
|
-
semantic_logger
|
61
|
-
shoulda
|
62
|
-
sqlite3
|
63
|
-
thread_safe
|
@@ -1,9 +0,0 @@
|
|
1
|
-
examples.dir=${file.reference.data_cleansing-examples}
|
2
|
-
file.reference.data_cleansing-examples=examples
|
3
|
-
file.reference.data_cleansing-lib=lib
|
4
|
-
file.reference.data_cleansing-test=test
|
5
|
-
main.file=
|
6
|
-
platform.active=Ruby_1
|
7
|
-
source.encoding=UTF-8
|
8
|
-
src.dir=${file.reference.data_cleansing-lib}
|
9
|
-
test.src.dir=${file.reference.data_cleansing-test}
|
data/nbproject/project.xml
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<project xmlns="http://www.netbeans.org/ns/project/1">
|
3
|
-
<type>org.netbeans.modules.ruby.rubyproject</type>
|
4
|
-
<configuration>
|
5
|
-
<data xmlns="http://www.netbeans.org/ns/ruby-project/1">
|
6
|
-
<name>data_cleansing</name>
|
7
|
-
<source-roots>
|
8
|
-
<root id="src.dir"/>
|
9
|
-
<root id="examples.dir"/>
|
10
|
-
</source-roots>
|
11
|
-
<test-roots>
|
12
|
-
<root id="test.src.dir"/>
|
13
|
-
</test-roots>
|
14
|
-
</data>
|
15
|
-
</configuration>
|
16
|
-
</project>
|