data_cleansing 0.6.1 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/README.md +6 -6
- data/Rakefile +15 -28
- data/lib/data_cleansing.rb +1 -1
- data/lib/data_cleansing/cleaners.rb +137 -0
- data/lib/data_cleansing/cleanse.rb +17 -11
- data/lib/data_cleansing/data_cleansing.rb +5 -5
- data/lib/data_cleansing/version.rb +1 -1
- data/test/active_record_test.rb +19 -49
- data/test/ruby_test.rb +23 -35
- data/test/test_db.sqlite3 +0 -0
- data/test/test_helper.rb +13 -0
- metadata +25 -27
- data/Gemfile +0 -21
- data/Gemfile.lock +0 -63
- data/nbproject/private/private.properties +0 -3
- data/nbproject/private/private.xml +0 -4
- data/nbproject/private/rake-d.txt +0 -4
- data/nbproject/project.properties +0 -9
- data/nbproject/project.xml +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a24ad3a5780b445ed15310ad8776d89c122747d9
|
4
|
+
data.tar.gz: ab79edb935ae22415b50c51d1e1c7dc60c7a16c5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 032ce480495e7127cd17b4b1bd39630e51573c579fda5b7cb34bb32f1f3cb6509c8e3ebd9a568b88f6f36cb694d7a7dbd7b32aad040fe6f511ce47b3d01fad3f
|
7
|
+
data.tar.gz: b2d7af9ad633ad5c5045c1103129d843fb5453b256cd5c3cbe3590967d950036da39ab8d6940387c36a51f1501c4b348ef5128b64b3c044b7ff01969392b6d5c
|
data/LICENSE.txt
CHANGED
@@ -186,7 +186,7 @@
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
187
187
|
identification within third-party archives.
|
188
188
|
|
189
|
-
Copyright 2012
|
189
|
+
Copyright 2012, 2013, 2014 Reid Morrison
|
190
190
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
192
192
|
you may not use this file except in compliance with the License.
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
data_cleansing
|
2
2
|
==============
|
3
3
|
|
4
|
-
Data Cleansing framework for Ruby
|
4
|
+
Data Cleansing framework for Ruby, Rails, Mongoid and MongoMapper.
|
5
5
|
|
6
6
|
* http://github.com/reidmorrison/data_cleansing
|
7
7
|
|
@@ -251,7 +251,7 @@ in a Rails initializer as follows:
|
|
251
251
|
|
252
252
|
```ruby
|
253
253
|
SemanticLogger.default_level = Rails.logger.level
|
254
|
-
SemanticLogger.add_appender(Rails.logger)
|
254
|
+
SemanticLogger.add_appender(logger: Rails.logger)
|
255
255
|
```
|
256
256
|
|
257
257
|
By changing the log level of DataCleansing itself the type of output for data
|
@@ -319,9 +319,9 @@ tries to convert it to an integer or float.
|
|
319
319
|
|
320
320
|
DataCleansing requires the following dependencies
|
321
321
|
|
322
|
-
* Ruby V1.
|
323
|
-
* Rails
|
324
|
-
* Mongoid
|
322
|
+
* Ruby V1.9.3, V2 and greater
|
323
|
+
* Rails V3.2 (Active Model) or greater for Rails integration ( Only if Rails is being used )
|
324
|
+
* Mongoid and Mongomapper supporting Active Model V3.2 or greater ( Only if Mongoid or MongoMapper is being used )
|
325
325
|
|
326
326
|
## Meta
|
327
327
|
|
@@ -338,7 +338,7 @@ Reid Morrison :: reidmo@gmail.com :: @reidmorrison
|
|
338
338
|
|
339
339
|
## License
|
340
340
|
|
341
|
-
Copyright 2013 Reid Morrison
|
341
|
+
Copyright 2013, 2014, 2015, 2016 Reid Morrison
|
342
342
|
|
343
343
|
Licensed under the Apache License, Version 2.0 (the "License");
|
344
344
|
you may not use this file except in compliance with the License.
|
data/Rakefile
CHANGED
@@ -1,40 +1,27 @@
|
|
1
|
-
lib = File.expand_path('../lib/', __FILE__)
|
2
|
-
$:.unshift lib unless $:.include?(lib)
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
require 'rubygems/package'
|
6
1
|
require 'rake/clean'
|
7
2
|
require 'rake/testtask'
|
8
|
-
require 'date'
|
9
|
-
require 'data_cleansing/version'
|
10
3
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
s.version = DataCleansing::VERSION
|
16
|
-
s.platform = Gem::Platform::RUBY
|
17
|
-
s.authors = ['Reid Morrison']
|
18
|
-
s.email = ['reidmo@gmail.com']
|
19
|
-
s.homepage = 'https://github.com/ClarityServices/data_cleansing'
|
20
|
-
s.date = Date.today.to_s
|
21
|
-
s.summary = "Data Cleansing framework for Ruby, and Ruby on Rails"
|
22
|
-
s.description = "Data Cleansing framework for Ruby with additional support for Rails and Mongoid"
|
23
|
-
s.files = FileList["./**/*"].exclude(/.gem$/, /.log$/,/^nbproject/).map{|f| f.sub(/^\.\//, '')}
|
24
|
-
s.license = "Apache License V2.0"
|
25
|
-
s.has_rdoc = true
|
26
|
-
s.add_dependency 'thread_safe'
|
27
|
-
s.add_dependency 'semantic_logger'
|
28
|
-
end
|
29
|
-
Gem::Package.build gemspec
|
4
|
+
require_relative 'lib/data_cleansing/version'
|
5
|
+
|
6
|
+
task :gem do
|
7
|
+
system 'gem build data_cleansing.gemspec'
|
30
8
|
end
|
31
9
|
|
32
|
-
|
10
|
+
task publish: :gem do
|
11
|
+
system "git tag -a v#{DataCleansing::VERSION} -m 'Tagging #{DataCleansing::VERSION}'"
|
12
|
+
system 'git push --tags'
|
13
|
+
system "gem push data_cleansing-#{DataCleansing::VERSION}.gem"
|
14
|
+
system "rm data_cleansing-#{DataCleansing::VERSION}.gem"
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Run Test Suite'
|
33
18
|
task :test do
|
34
19
|
Rake::TestTask.new(:functional) do |t|
|
35
|
-
t.test_files = FileList['test
|
20
|
+
t.test_files = FileList['test/**/*_test.rb']
|
36
21
|
t.verbose = true
|
37
22
|
end
|
38
23
|
|
39
24
|
Rake::Task['functional'].invoke
|
40
25
|
end
|
26
|
+
|
27
|
+
task default: :test
|
data/lib/data_cleansing.rb
CHANGED
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'uri'
|
2
|
+
module Cleaners
|
3
|
+
# Strip leading and trailing whitespace
|
4
|
+
module Strip
|
5
|
+
def self.call(string)
|
6
|
+
return string unless string.is_a?(String)
|
7
|
+
|
8
|
+
string.strip! || string
|
9
|
+
end
|
10
|
+
end
|
11
|
+
DataCleansing.register_cleaner(:strip, Strip)
|
12
|
+
|
13
|
+
# Convert to uppercase
|
14
|
+
module Upcase
|
15
|
+
def self.call(string)
|
16
|
+
return string unless string.is_a?(String)
|
17
|
+
|
18
|
+
string.upcase! || string
|
19
|
+
end
|
20
|
+
end
|
21
|
+
DataCleansing.register_cleaner(:upcase, Upcase)
|
22
|
+
|
23
|
+
# Remove all non-word characters, including whitespace
|
24
|
+
module RemoveNonWord
|
25
|
+
NOT_WORDS = Regexp.compile(/\W/)
|
26
|
+
|
27
|
+
def self.call(string)
|
28
|
+
return string unless string.is_a?(String)
|
29
|
+
|
30
|
+
string.gsub!(NOT_WORDS, '') || string
|
31
|
+
end
|
32
|
+
end
|
33
|
+
DataCleansing.register_cleaner(:remove_non_word, RemoveNonWord)
|
34
|
+
|
35
|
+
# Remove all not printable characters
|
36
|
+
module RemoveNonPrintable
|
37
|
+
NOT_PRINTABLE = Regexp.compile(/[^[:print:]]/)
|
38
|
+
|
39
|
+
def self.call(string)
|
40
|
+
return string unless string.is_a?(String)
|
41
|
+
|
42
|
+
string.gsub!(NOT_PRINTABLE, '') || string
|
43
|
+
end
|
44
|
+
end
|
45
|
+
DataCleansing.register_cleaner(:remove_non_printable, RemoveNonPrintable)
|
46
|
+
|
47
|
+
# Remove HTML Markup
|
48
|
+
module RemoveHTMLMarkup
|
49
|
+
HTML_MARKUP = Regexp.compile(/&(amp|quot|gt|lt|apos|nbsp);/in)
|
50
|
+
|
51
|
+
def self.call(string)
|
52
|
+
return string unless string.is_a?(String)
|
53
|
+
|
54
|
+
string.gsub!(HTML_MARKUP) do |match|
|
55
|
+
case match.downcase
|
56
|
+
when 'amp' then
|
57
|
+
'&'
|
58
|
+
when 'quot' then
|
59
|
+
'"'
|
60
|
+
when 'gt' then
|
61
|
+
'>'
|
62
|
+
when 'lt' then
|
63
|
+
'<'
|
64
|
+
when 'apos' then
|
65
|
+
"'"
|
66
|
+
when 'nbsp' then
|
67
|
+
' '
|
68
|
+
else
|
69
|
+
"&#{match};"
|
70
|
+
end
|
71
|
+
end || string
|
72
|
+
end
|
73
|
+
end
|
74
|
+
DataCleansing.register_cleaner(:remove_html_markup, RemoveHTMLMarkup)
|
75
|
+
|
76
|
+
module ReplaceURIChars
|
77
|
+
def self.call(string)
|
78
|
+
return string unless string.is_a?(String)
|
79
|
+
|
80
|
+
URI.unescape(string)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
DataCleansing.register_cleaner(:replace_uri_chars, ReplaceURIChars)
|
84
|
+
|
85
|
+
# Compress multiple whitespace to a single space
|
86
|
+
module CompressWhitespace
|
87
|
+
WHITESPACE = Regexp.compile(/\s+/)
|
88
|
+
|
89
|
+
def self.call(string)
|
90
|
+
return string unless string.is_a?(String)
|
91
|
+
|
92
|
+
string.gsub!(WHITESPACE, ' ') || string
|
93
|
+
end
|
94
|
+
end
|
95
|
+
DataCleansing.register_cleaner(:compress_whitespace, CompressWhitespace)
|
96
|
+
|
97
|
+
# Remove Non-Digit Chars
|
98
|
+
# Returns nil if no digit characters present
|
99
|
+
module DigitsOnly
|
100
|
+
DIGITS = Regexp.compile(/\D/)
|
101
|
+
|
102
|
+
def self.call(string)
|
103
|
+
return string unless string.is_a?(String)
|
104
|
+
|
105
|
+
string.gsub!(DIGITS, '')
|
106
|
+
string.length > 0 ? string : nil
|
107
|
+
end
|
108
|
+
end
|
109
|
+
DataCleansing.register_cleaner(:digits_only, DigitsOnly)
|
110
|
+
|
111
|
+
# Returns [Integer] after removing all non-digit characters, except '.'
|
112
|
+
# Returns nil if no digits are present in the string.
|
113
|
+
module StringToInteger
|
114
|
+
NUMERIC = Regexp.compile(/[^0-9\.]/)
|
115
|
+
|
116
|
+
def self.call(string)
|
117
|
+
return string unless string.is_a?(String)
|
118
|
+
|
119
|
+
# Remove Non-Digit Chars, except for '.'
|
120
|
+
string.gsub!(NUMERIC, '')
|
121
|
+
string.length > 0 ? string.to_i : nil
|
122
|
+
end
|
123
|
+
end
|
124
|
+
DataCleansing.register_cleaner(:string_to_integer, StringToInteger)
|
125
|
+
|
126
|
+
# Convert a Date to a Time at the end of day for that date (YYYY-MM-DD 23:59:59)
|
127
|
+
# Ex: 2015-12-31 becomes 2015-12-31 23:59:59
|
128
|
+
# If something other than a Date object is passed in, it just passes through.
|
129
|
+
module DateToTimeAtEndOfDay
|
130
|
+
def self.call(date)
|
131
|
+
return date unless date.kind_of?(Date)
|
132
|
+
|
133
|
+
date.to_time.end_of_day
|
134
|
+
end
|
135
|
+
end
|
136
|
+
DataCleansing.register_cleaner(:date_to_time_at_end_of_day, DateToTimeAtEndOfDay)
|
137
|
+
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'data_cleansing/cleaners'
|
1
2
|
module DataCleansing
|
2
3
|
# Mix-in to add cleaner
|
3
4
|
module Cleanse
|
@@ -17,7 +18,7 @@ module DataCleansing
|
|
17
18
|
|
18
19
|
# Create shortcuts to cleaners for each attribute for use by .cleanse_attribute
|
19
20
|
attributes.each do |attr|
|
20
|
-
(data_cleansing_attribute_cleaners[attr] ||=
|
21
|
+
(data_cleansing_attribute_cleaners[attr] ||= Concurrent::Array.new) << cleaner
|
21
22
|
end
|
22
23
|
cleaner
|
23
24
|
end
|
@@ -72,17 +73,17 @@ module DataCleansing
|
|
72
73
|
|
73
74
|
# Array of cleaners to execute against this model and it's children
|
74
75
|
def data_cleansing_cleaners
|
75
|
-
@data_cleansing_cleaners ||=
|
76
|
+
@data_cleansing_cleaners ||= Concurrent::Array.new
|
76
77
|
end
|
77
78
|
|
78
79
|
# Array of cleaners to execute against this model and it's children
|
79
80
|
def data_cleansing_after_cleaners
|
80
|
-
@data_cleansing_after_cleaners ||=
|
81
|
+
@data_cleansing_after_cleaners ||= Concurrent::Array.new
|
81
82
|
end
|
82
83
|
|
83
84
|
# Hash of attributes to clean with their corresponding cleaner
|
84
85
|
def data_cleansing_attribute_cleaners
|
85
|
-
@data_cleansing_attribute_cleaners ||=
|
86
|
+
@data_cleansing_attribute_cleaners ||= Concurrent::Hash.new
|
86
87
|
end
|
87
88
|
|
88
89
|
private
|
@@ -100,17 +101,22 @@ module DataCleansing
|
|
100
101
|
return if cleaner_struct.nil? || value.nil?
|
101
102
|
# Duplicate value in case cleaner uses methods such as gsub!
|
102
103
|
new_value = value.is_a?(String) ? value.dup : value
|
103
|
-
cleaner_struct.cleaners.each do |
|
104
|
+
cleaner_struct.cleaners.each do |name|
|
104
105
|
# Cleaner itself could be a custom Proc, otherwise do a global lookup for it
|
105
|
-
proc =
|
106
|
-
raise "No cleaner defined for #{
|
106
|
+
proc = name.is_a?(Proc) ? name : DataCleansing.cleaner(name.to_sym)
|
107
|
+
raise "No cleaner defined for #{name.inspect}" unless proc
|
107
108
|
|
108
|
-
|
109
|
-
|
110
|
-
|
109
|
+
if proc.is_a?(Proc)
|
110
|
+
new_value = if object
|
111
|
+
# Call the cleaner proc within the scope (binding) of the object
|
112
|
+
proc.arity == 1 ? object.instance_exec(new_value, &proc) : object.instance_exec(new_value, cleaner_struct.params, &proc)
|
113
|
+
else
|
114
|
+
proc.arity == 1 ? proc.call(new_value) : proc.call(new_value, cleaner_struct.params)
|
115
|
+
end
|
111
116
|
else
|
112
|
-
proc.arity == 1 ? proc.call(new_value) : proc.call(new_value, cleaner_struct.params)
|
117
|
+
new_value = (proc.method(:call).arity == 1 ? proc.call(new_value) : proc.call(new_value, cleaner_struct.params))
|
113
118
|
end
|
119
|
+
|
114
120
|
end
|
115
121
|
new_value
|
116
122
|
end
|
@@ -2,14 +2,14 @@ module DataCleansing
|
|
2
2
|
include SemanticLogger::Loggable
|
3
3
|
|
4
4
|
# Global Data Cleansers
|
5
|
-
@@global_cleaners =
|
6
|
-
@@masked_attributes =
|
5
|
+
@@global_cleaners = Concurrent::Hash.new
|
6
|
+
@@masked_attributes = Concurrent::Array.new
|
7
7
|
|
8
8
|
# Register a new cleaner
|
9
9
|
# Replaces any existing cleaner with the same name
|
10
|
-
def self.register_cleaner(cleaner, &block)
|
11
|
-
raise "Must supply a Proc with the cleaner" unless block
|
12
|
-
@@global_cleaners[
|
10
|
+
def self.register_cleaner(name, cleaner = nil, &block)
|
11
|
+
raise "Must supply a Proc with the cleaner" unless block || cleaner
|
12
|
+
@@global_cleaners[name.to_sym] = cleaner || block
|
13
13
|
end
|
14
14
|
|
15
15
|
# Returns the cleaner matching the supplied cleaner name
|
data/test/active_record_test.rb
CHANGED
@@ -1,18 +1,5 @@
|
|
1
|
-
|
2
|
-
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
require 'test/unit'
|
6
|
-
require 'shoulda'
|
7
|
-
# Load ActiveRecord before loading data_cleansing so that the AR extensions
|
8
|
-
# are loaded
|
1
|
+
require_relative 'test_helper'
|
9
2
|
require 'active_record'
|
10
|
-
require 'data_cleansing'
|
11
|
-
require 'semantic_logger'
|
12
|
-
|
13
|
-
# Register an appender if one is not already registered
|
14
|
-
SemanticLogger.default_level = :trace
|
15
|
-
SemanticLogger.add_appender('test.log') if SemanticLogger.appenders.size == 0
|
16
3
|
|
17
4
|
ActiveRecord::Base.logger = SemanticLogger[ActiveRecord::Base]
|
18
5
|
ActiveRecord::Base.configurations = {
|
@@ -36,9 +23,6 @@ ActiveRecord::Schema.define :version => 0 do
|
|
36
23
|
end
|
37
24
|
end
|
38
25
|
|
39
|
-
# Define a global cleaner
|
40
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
41
|
-
|
42
26
|
# Log data cleansing result
|
43
27
|
# Set to :warn or higher to disable
|
44
28
|
DataCleansing.logger.level = :debug
|
@@ -46,20 +30,6 @@ DataCleansing.logger.level = :debug
|
|
46
30
|
# Set the Global list of fields to be masked
|
47
31
|
DataCleansing.register_masked_attributes :ssn, :bank_account_number
|
48
32
|
|
49
|
-
# Removes all non-digit characters, except '.' then truncates
|
50
|
-
# the result to an integer string
|
51
|
-
# Returns nil if no digits are present in the string
|
52
|
-
DataCleansing.register_cleaner(:digits_to_integer) do |integer|
|
53
|
-
if integer.kind_of?(String)
|
54
|
-
# Remove Non-Digit Chars, except for '.'
|
55
|
-
integer = integer.gsub(/[^0-9\.]/, '')
|
56
|
-
integer.length > 0 ? integer.to_i : nil
|
57
|
-
else
|
58
|
-
integer
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
33
|
class User < ActiveRecord::Base
|
64
34
|
include DataCleansing::Cleanse
|
65
35
|
|
@@ -73,7 +43,7 @@ class User < ActiveRecord::Base
|
|
73
43
|
cleanse :address1, :address2, :instance_var, :cleaner => Proc.new {|string| "<< #{string.strip} >>"}
|
74
44
|
|
75
45
|
# Custom Zip Code cleaner
|
76
|
-
cleanse :zip_code, :cleaner => :
|
46
|
+
cleanse :zip_code, :cleaner => :string_to_integer
|
77
47
|
|
78
48
|
# Automatically cleanse data before validation
|
79
49
|
before_validation :cleanse_attributes!
|
@@ -94,20 +64,20 @@ class User2 < ActiveRecord::Base
|
|
94
64
|
cleanse :first_name, :cleaner => Proc.new {|string| "$#{string}$"}
|
95
65
|
|
96
66
|
# Custom Zip Code cleaner
|
97
|
-
cleanse :zip_code, :cleaner => :
|
67
|
+
cleanse :zip_code, :cleaner => :string_to_integer
|
98
68
|
|
99
69
|
# Automatically cleanse data before validation
|
100
70
|
before_validation :cleanse_attributes!
|
101
71
|
end
|
102
72
|
|
103
|
-
class ActiveRecordTest < Test
|
104
|
-
|
73
|
+
class ActiveRecordTest < Minitest::Test
|
74
|
+
describe "ActiveRecord Models" do
|
105
75
|
|
106
|
-
|
76
|
+
it 'have globally registered cleaner' do
|
107
77
|
assert DataCleansing.cleaner(:strip)
|
108
78
|
end
|
109
79
|
|
110
|
-
|
80
|
+
it 'Model.cleanse_attribute' do
|
111
81
|
assert_equal 'joe', User.cleanse_attribute(:first_name, ' joe ')
|
112
82
|
assert_equal 'black', User.cleanse_attribute(:last_name, "\n black\n")
|
113
83
|
assert_equal '<< 2632 Brown St >>', User.cleanse_attribute(:address1, "2632 Brown St \n")
|
@@ -115,8 +85,8 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
115
85
|
assert_equal 12345, User.cleanse_attribute(:zip_code, "\n\tblah 12345badtext\n")
|
116
86
|
end
|
117
87
|
|
118
|
-
|
119
|
-
|
88
|
+
describe "with user" do
|
89
|
+
before do
|
120
90
|
@user = User.new(
|
121
91
|
:first_name => ' joe ',
|
122
92
|
:last_name => "\n black\n",
|
@@ -126,30 +96,30 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
126
96
|
)
|
127
97
|
end
|
128
98
|
|
129
|
-
|
99
|
+
it 'only have 3 cleaners' do
|
130
100
|
assert_equal 3, User.send(:data_cleansing_cleaners).size, User.send(:data_cleansing_cleaners)
|
131
101
|
end
|
132
102
|
|
133
|
-
|
103
|
+
it 'cleanse_attributes! using global cleaner' do
|
134
104
|
assert_equal true, @user.valid?
|
135
105
|
assert_equal 'joe', @user.first_name
|
136
106
|
assert_equal 'black', @user.last_name
|
137
107
|
end
|
138
108
|
|
139
|
-
|
109
|
+
it 'cleanse_attributes! using attribute specific custom cleaner' do
|
140
110
|
assert_equal true, @user.valid?
|
141
111
|
assert_equal '<< 2632 Brown St >>', @user.address1
|
142
112
|
assert_equal '<< instance >>', @user.instance_var
|
143
113
|
end
|
144
114
|
|
145
|
-
|
115
|
+
it 'cleanse_attributes! using global cleaner using rails extensions' do
|
146
116
|
@user.cleanse_attributes!
|
147
117
|
assert_equal 12345, @user.zip_code
|
148
118
|
end
|
149
119
|
end
|
150
120
|
|
151
|
-
|
152
|
-
|
121
|
+
describe "with user2" do
|
122
|
+
before do
|
153
123
|
@user = User2.new(
|
154
124
|
:first_name => ' joe ',
|
155
125
|
:last_name => "\n black\n",
|
@@ -159,16 +129,16 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
159
129
|
)
|
160
130
|
end
|
161
131
|
|
162
|
-
|
132
|
+
it 'have 4 cleaners defined' do
|
163
133
|
assert_equal 4, User2.send(:data_cleansing_cleaners).size, User2.send(:data_cleansing_cleaners)
|
164
134
|
end
|
165
135
|
|
166
|
-
|
136
|
+
it 'have 3 attributes cleaners defined' do
|
167
137
|
# :all, :first_name, :zip_code
|
168
138
|
assert_equal 3, User2.send(:data_cleansing_attribute_cleaners).size, User2.send(:data_cleansing_attribute_cleaners)
|
169
139
|
end
|
170
140
|
|
171
|
-
|
141
|
+
it 'cleanse_attributes! clean all attributes' do
|
172
142
|
assert_equal true, @user.valid?
|
173
143
|
assert_equal '$<< @joe@ >>$', @user.first_name, User2.send(:data_cleansing_cleaners)
|
174
144
|
assert_equal '@black@', @user.last_name
|
@@ -180,4 +150,4 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
180
150
|
end
|
181
151
|
|
182
152
|
end
|
183
|
-
end
|
153
|
+
end
|
data/test/ruby_test.rb
CHANGED
@@ -1,13 +1,4 @@
|
|
1
|
-
|
2
|
-
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
3
|
-
|
4
|
-
require 'rubygems'
|
5
|
-
require 'test/unit'
|
6
|
-
require 'shoulda'
|
7
|
-
require 'data_cleansing'
|
8
|
-
|
9
|
-
# Define a global cleanser
|
10
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip}
|
1
|
+
require_relative 'test_helper'
|
11
2
|
|
12
3
|
# Non Cleansing base class
|
13
4
|
class RubyUserBase
|
@@ -43,9 +34,6 @@ class RubyUserChild < RubyUser
|
|
43
34
|
cleanse :gender, :cleaner => Proc.new {|gender| gender.to_s.strip.downcase}
|
44
35
|
end
|
45
36
|
|
46
|
-
# Another global cleaner, used by RubyUser2
|
47
|
-
DataCleansing.register_cleaner(:upcase) {|string| string.upcase}
|
48
|
-
|
49
37
|
class RubyUser2
|
50
38
|
include DataCleansing::Cleanse
|
51
39
|
|
@@ -75,46 +63,46 @@ class RubyUser2
|
|
75
63
|
]
|
76
64
|
end
|
77
65
|
|
78
|
-
class RubyTest < Test
|
79
|
-
|
66
|
+
class RubyTest < Minitest::Test
|
67
|
+
describe "Ruby Models" do
|
80
68
|
|
81
|
-
|
69
|
+
it 'have globally registered cleaner' do
|
82
70
|
assert DataCleansing.cleaner(:strip)
|
83
71
|
end
|
84
72
|
|
85
|
-
|
73
|
+
it 'Model.cleanse_attribute' do
|
86
74
|
assert_equal 'male', RubyUserChild.cleanse_attribute(:gender, "\n Male \n"), RubyUserChild.send(:data_cleansing_attribute_cleaners)
|
87
75
|
assert_equal 'joe', RubyUserChild.cleanse_attribute(:first_name, ' joe '), RubyUserChild.send(:data_cleansing_attribute_cleaners)
|
88
76
|
assert_equal 'black', RubyUserChild.cleanse_attribute(:last_name, "\n black\n"), RubyUserChild.send(:data_cleansing_attribute_cleaners)
|
89
77
|
assert_equal '<< 2632 Brown St >>', RubyUserChild.cleanse_attribute(:address1, "2632 Brown St \n"), RubyUserChild.send(:data_cleansing_attribute_cleaners)
|
90
78
|
end
|
91
79
|
|
92
|
-
|
93
|
-
|
80
|
+
describe "with ruby user" do
|
81
|
+
before do
|
94
82
|
@user = RubyUser.new
|
95
83
|
@user.first_name = ' joe '
|
96
84
|
@user.last_name = "\n black\n"
|
97
85
|
@user.address1 = "2632 Brown St \n"
|
98
86
|
end
|
99
87
|
|
100
|
-
|
88
|
+
it 'cleanse_attributes! using global cleaner' do
|
101
89
|
@user.cleanse_attributes!
|
102
90
|
assert_equal 'joe', @user.first_name
|
103
91
|
assert_equal 'black', @user.last_name
|
104
92
|
end
|
105
93
|
|
106
|
-
|
94
|
+
it 'cleanse_attributes! using attribute specific custom cleaner' do
|
107
95
|
@user.cleanse_attributes!
|
108
96
|
assert_equal '<< 2632 Brown St >>', @user.address1
|
109
97
|
end
|
110
98
|
|
111
|
-
|
99
|
+
it 'cleanse_attributes! not cleanse nil attributes' do
|
112
100
|
@user.first_name = nil
|
113
101
|
@user.cleanse_attributes!
|
114
102
|
assert_equal nil, @user.first_name
|
115
103
|
end
|
116
104
|
|
117
|
-
|
105
|
+
it 'cleanse_attributes! call after cleaner' do
|
118
106
|
@user.first_name = 'Jack'
|
119
107
|
@user.last_name = nil
|
120
108
|
@user.cleanse_attributes!
|
@@ -123,8 +111,8 @@ class RubyTest < Test::Unit::TestCase
|
|
123
111
|
end
|
124
112
|
end
|
125
113
|
|
126
|
-
|
127
|
-
|
114
|
+
describe "with ruby user child" do
|
115
|
+
before do
|
128
116
|
@user = RubyUserChild.new
|
129
117
|
@user.first_name = ' joe '
|
130
118
|
@user.last_name = "\n black\n"
|
@@ -132,32 +120,32 @@ class RubyTest < Test::Unit::TestCase
|
|
132
120
|
@user.gender = "\n Male \n"
|
133
121
|
end
|
134
122
|
|
135
|
-
|
123
|
+
it 'cleanse_attributes! using global cleaner' do
|
136
124
|
@user.cleanse_attributes!
|
137
125
|
assert_equal 'joe', @user.first_name
|
138
126
|
assert_equal 'black', @user.last_name
|
139
127
|
end
|
140
128
|
|
141
|
-
|
129
|
+
it 'cleanse_attributes! using attribute specific custom cleaner' do
|
142
130
|
@user.cleanse_attributes!
|
143
131
|
assert_equal '<< 2632 Brown St >>', @user.address1
|
144
132
|
end
|
145
133
|
|
146
|
-
|
134
|
+
it 'cleanse_attributes! not cleanse nil attributes' do
|
147
135
|
@user.first_name = nil
|
148
136
|
@user.cleanse_attributes!
|
149
137
|
assert_equal nil, @user.first_name
|
150
138
|
end
|
151
139
|
|
152
|
-
|
140
|
+
it 'cleanse_attributes! clean child attributes' do
|
153
141
|
@user.cleanse_attributes!
|
154
142
|
assert_equal 'male', @user.gender
|
155
143
|
end
|
156
144
|
|
157
145
|
end
|
158
146
|
|
159
|
-
|
160
|
-
|
147
|
+
describe "with ruby user2" do
|
148
|
+
before do
|
161
149
|
@user = RubyUser2.new
|
162
150
|
@user.first_name = ' joe '
|
163
151
|
@user.last_name = "\n black\n"
|
@@ -166,23 +154,23 @@ class RubyTest < Test::Unit::TestCase
|
|
166
154
|
@user.gender = " Unknown "
|
167
155
|
end
|
168
156
|
|
169
|
-
|
157
|
+
it 'cleanse_attributes!' do
|
170
158
|
@user.cleanse_attributes!
|
171
159
|
assert_equal 'joe', @user.first_name
|
172
160
|
assert_equal 'black', @user.last_name
|
173
161
|
assert_equal '2632 Brown St', @user.address1
|
174
162
|
end
|
175
163
|
|
176
|
-
|
164
|
+
it 'cleanse_attributes! with multiple cleaners' do
|
177
165
|
@user.cleanse_attributes!
|
178
166
|
assert_equal 'MR.', @user.title
|
179
167
|
end
|
180
168
|
|
181
|
-
|
169
|
+
it 'cleanse_attributes! referencing other attributes' do
|
182
170
|
@user.cleanse_attributes!
|
183
171
|
assert_equal 'Male', @user.gender
|
184
172
|
end
|
185
173
|
end
|
186
174
|
|
187
175
|
end
|
188
|
-
end
|
176
|
+
end
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
$LOAD_PATH.unshift File.dirname(__FILE__) + '/../lib'
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'minitest/autorun'
|
5
|
+
require 'minitest/reporters'
|
6
|
+
require 'minitest/stub_any_instance'
|
7
|
+
require 'awesome_print'
|
8
|
+
require 'data_cleansing'
|
9
|
+
|
10
|
+
MiniTest::Reporters.use! MiniTest::Reporters::SpecReporter.new
|
11
|
+
|
12
|
+
SemanticLogger.add_appender(file_name: 'test.log', formatter: :color)
|
13
|
+
SemanticLogger.default_level = :debug
|
metadata
CHANGED
@@ -1,72 +1,66 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: concurrent-ruby
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '1.0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '1.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: semantic_logger
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
33
|
+
version: '2.0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
description:
|
42
|
-
Mongoid
|
40
|
+
version: '2.0'
|
41
|
+
description:
|
43
42
|
email:
|
44
43
|
- reidmo@gmail.com
|
45
44
|
executables: []
|
46
45
|
extensions: []
|
47
46
|
extra_rdoc_files: []
|
48
47
|
files:
|
49
|
-
- Gemfile
|
50
|
-
- Gemfile.lock
|
51
48
|
- LICENSE.txt
|
52
49
|
- README.md
|
53
50
|
- Rakefile
|
54
51
|
- lib/data_cleansing.rb
|
52
|
+
- lib/data_cleansing/cleaners.rb
|
55
53
|
- lib/data_cleansing/cleanse.rb
|
56
54
|
- lib/data_cleansing/data_cleansing.rb
|
57
55
|
- lib/data_cleansing/railtie.rb
|
58
56
|
- lib/data_cleansing/version.rb
|
59
|
-
- nbproject/private/private.properties
|
60
|
-
- nbproject/private/private.xml
|
61
|
-
- nbproject/private/rake-d.txt
|
62
|
-
- nbproject/project.properties
|
63
|
-
- nbproject/project.xml
|
64
57
|
- test/active_record_test.rb
|
65
58
|
- test/ruby_test.rb
|
66
59
|
- test/test_db.sqlite3
|
67
|
-
|
60
|
+
- test/test_helper.rb
|
61
|
+
homepage: http://github.com/reidmorrison/data_cleansing
|
68
62
|
licenses:
|
69
|
-
- Apache
|
63
|
+
- Apache-2.0
|
70
64
|
metadata: {}
|
71
65
|
post_install_message:
|
72
66
|
rdoc_options: []
|
@@ -74,18 +68,22 @@ require_paths:
|
|
74
68
|
- lib
|
75
69
|
required_ruby_version: !ruby/object:Gem::Requirement
|
76
70
|
requirements:
|
77
|
-
- -
|
71
|
+
- - ">="
|
78
72
|
- !ruby/object:Gem::Version
|
79
73
|
version: '0'
|
80
74
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
75
|
requirements:
|
82
|
-
- -
|
76
|
+
- - ">="
|
83
77
|
- !ruby/object:Gem::Version
|
84
78
|
version: '0'
|
85
79
|
requirements: []
|
86
80
|
rubyforge_project:
|
87
|
-
rubygems_version: 2.
|
81
|
+
rubygems_version: 2.5.1
|
88
82
|
signing_key:
|
89
83
|
specification_version: 4
|
90
|
-
summary: Data Cleansing framework for Ruby,
|
91
|
-
test_files:
|
84
|
+
summary: Data Cleansing framework for Ruby, Rails, Mongoid and MongoMapper.
|
85
|
+
test_files:
|
86
|
+
- test/active_record_test.rb
|
87
|
+
- test/ruby_test.rb
|
88
|
+
- test/test_db.sqlite3
|
89
|
+
- test/test_helper.rb
|
data/Gemfile
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
gem 'thread_safe'
|
3
|
-
gem 'semantic_logger'
|
4
|
-
|
5
|
-
group :test do
|
6
|
-
gem "shoulda"
|
7
|
-
|
8
|
-
gem "activerecord"
|
9
|
-
gem 'sqlite3', :platform => :ruby
|
10
|
-
|
11
|
-
platforms :jruby do
|
12
|
-
gem 'jdbc-sqlite3'
|
13
|
-
gem 'activerecord-jdbcsqlite3-adapter'
|
14
|
-
end
|
15
|
-
|
16
|
-
gem "mongoid"
|
17
|
-
end
|
18
|
-
|
19
|
-
group :develop do
|
20
|
-
gem 'awesome_print'
|
21
|
-
end
|
data/Gemfile.lock
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
GEM
|
2
|
-
remote: https://rubygems.org/
|
3
|
-
specs:
|
4
|
-
activemodel (4.0.0)
|
5
|
-
activesupport (= 4.0.0)
|
6
|
-
builder (~> 3.1.0)
|
7
|
-
activerecord (4.0.0)
|
8
|
-
activemodel (= 4.0.0)
|
9
|
-
activerecord-deprecated_finders (~> 1.0.2)
|
10
|
-
activesupport (= 4.0.0)
|
11
|
-
arel (~> 4.0.0)
|
12
|
-
activerecord-deprecated_finders (1.0.3)
|
13
|
-
activesupport (4.0.0)
|
14
|
-
i18n (~> 0.6, >= 0.6.4)
|
15
|
-
minitest (~> 4.2)
|
16
|
-
multi_json (~> 1.3)
|
17
|
-
thread_safe (~> 0.1)
|
18
|
-
tzinfo (~> 0.3.37)
|
19
|
-
arel (4.0.0)
|
20
|
-
atomic (1.1.10)
|
21
|
-
awesome_print (1.1.0)
|
22
|
-
bson (1.9.1)
|
23
|
-
builder (3.1.4)
|
24
|
-
durran-validatable (2.0.1)
|
25
|
-
i18n (0.6.4)
|
26
|
-
leshill-will_paginate (2.3.11)
|
27
|
-
minitest (4.7.5)
|
28
|
-
mongo (1.9.1)
|
29
|
-
bson (~> 1.9.1)
|
30
|
-
mongoid (1.0.6)
|
31
|
-
activesupport (>= 2.2.2)
|
32
|
-
durran-validatable (>= 2.0.1)
|
33
|
-
leshill-will_paginate (>= 2.3.11)
|
34
|
-
mongo (>= 0.18.2)
|
35
|
-
multi_json (1.7.7)
|
36
|
-
semantic_logger (2.1.0)
|
37
|
-
sync_attr (>= 1.0)
|
38
|
-
thread_safe (>= 0.1.0)
|
39
|
-
shoulda (3.5.0)
|
40
|
-
shoulda-context (~> 1.0, >= 1.0.1)
|
41
|
-
shoulda-matchers (>= 1.4.1, < 3.0)
|
42
|
-
shoulda-context (1.1.4)
|
43
|
-
shoulda-matchers (2.2.0)
|
44
|
-
activesupport (>= 3.0.0)
|
45
|
-
sqlite3 (1.3.7)
|
46
|
-
sync_attr (1.0.0)
|
47
|
-
thread_safe (0.1.0)
|
48
|
-
atomic
|
49
|
-
tzinfo (0.3.37)
|
50
|
-
|
51
|
-
PLATFORMS
|
52
|
-
ruby
|
53
|
-
|
54
|
-
DEPENDENCIES
|
55
|
-
activerecord
|
56
|
-
activerecord-jdbcsqlite3-adapter
|
57
|
-
awesome_print
|
58
|
-
jdbc-sqlite3
|
59
|
-
mongoid
|
60
|
-
semantic_logger
|
61
|
-
shoulda
|
62
|
-
sqlite3
|
63
|
-
thread_safe
|
@@ -1,9 +0,0 @@
|
|
1
|
-
examples.dir=${file.reference.data_cleansing-examples}
|
2
|
-
file.reference.data_cleansing-examples=examples
|
3
|
-
file.reference.data_cleansing-lib=lib
|
4
|
-
file.reference.data_cleansing-test=test
|
5
|
-
main.file=
|
6
|
-
platform.active=Ruby_1
|
7
|
-
source.encoding=UTF-8
|
8
|
-
src.dir=${file.reference.data_cleansing-lib}
|
9
|
-
test.src.dir=${file.reference.data_cleansing-test}
|
data/nbproject/project.xml
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
-
<project xmlns="http://www.netbeans.org/ns/project/1">
|
3
|
-
<type>org.netbeans.modules.ruby.rubyproject</type>
|
4
|
-
<configuration>
|
5
|
-
<data xmlns="http://www.netbeans.org/ns/ruby-project/1">
|
6
|
-
<name>data_cleansing</name>
|
7
|
-
<source-roots>
|
8
|
-
<root id="src.dir"/>
|
9
|
-
<root id="examples.dir"/>
|
10
|
-
</source-roots>
|
11
|
-
<test-roots>
|
12
|
-
<root id="test.src.dir"/>
|
13
|
-
</test-roots>
|
14
|
-
</data>
|
15
|
-
</configuration>
|
16
|
-
</project>
|