data_cleansing 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +43 -38
- data/lib/data_cleansing/cleanse.rb +10 -6
- data/lib/data_cleansing/version.rb +1 -1
- data/test/active_record_test.rb +20 -7
- data/test/test_db.sqlite3 +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64293789c3bf568975be9ed1c8c05c8536c23144
|
4
|
+
data.tar.gz: dc47cfa960847eaf926eb7f9695c92750d05c515
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e7f84d6c1858468a1bcd916f98cf14b20869c3a50bf556001192eb6f166b97c4489d5a20b062604afeb3cc309eb709423e3401f50bf77f7f9980decd2c931688
|
7
|
+
data.tar.gz: 6ce9b1ea07813cca4ee601c63dadab67c57c2d0e4e729f1a9b10175312be92c218d727ceca84060169215a24fd6e3d676cade7cc5a57de2918be81b7fe81ae90
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
data_cleansing
|
2
2
|
==============
|
3
3
|
|
4
|
-
Data Cleansing
|
4
|
+
Data Cleansing framework for Ruby with additional support for Rails and Mongoid
|
5
5
|
|
6
6
|
* http://github.com/reidmorrison/data_cleansing
|
7
7
|
|
@@ -27,7 +27,7 @@ or pull request.
|
|
27
27
|
|
28
28
|
## Examples
|
29
29
|
|
30
|
-
###
|
30
|
+
### Ruby Example
|
31
31
|
```ruby
|
32
32
|
require 'data_cleansing'
|
33
33
|
|
@@ -54,7 +54,35 @@ puts "After data cleansing #{u.inspect}"
|
|
54
54
|
# After data cleansing After data cleansing #<User:0x007fc9f1081980 @first_name="joe", @last_name="black">
|
55
55
|
```
|
56
56
|
|
57
|
-
###
|
57
|
+
### Rails Example
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
# Define a global cleanser
|
61
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
62
|
+
|
63
|
+
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
64
|
+
class User < ActiveRecord::Base
|
65
|
+
include DataCleansing::Cleanse
|
66
|
+
|
67
|
+
# Use a global cleaner
|
68
|
+
cleanse :first_name, :last_name, :cleaner => :strip
|
69
|
+
|
70
|
+
# Define a once off cleaner
|
71
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
|
72
|
+
|
73
|
+
# Automatically cleanse data before validation
|
74
|
+
before_validation :cleanse_attributes!
|
75
|
+
end
|
76
|
+
|
77
|
+
# Create a User instance
|
78
|
+
u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address1 => "2632 Brown St \n")
|
79
|
+
puts "Before data cleansing #{u.attributes.inspect}"
|
80
|
+
u.validate
|
81
|
+
puts "After data cleansing #{u.attributes.inspect}"
|
82
|
+
u.save!
|
83
|
+
```
|
84
|
+
|
85
|
+
### Advanced Ruby Example
|
58
86
|
|
59
87
|
```ruby
|
60
88
|
require 'data_cleansing'
|
@@ -106,34 +134,6 @@ puts "After data cleansing #{u.inspect}"
|
|
106
134
|
# After data cleansing #<User:0x007fdd5a83a8f8 @first_name="joe", @last_name="black", @address1="2632 Brown St", @title="MR.", @gender="Male">
|
107
135
|
```
|
108
136
|
|
109
|
-
### Rails Example
|
110
|
-
|
111
|
-
```ruby
|
112
|
-
# Define a global cleanser
|
113
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
114
|
-
|
115
|
-
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
116
|
-
class User < ActiveRecord::Base
|
117
|
-
include DataCleansing::Cleanse
|
118
|
-
|
119
|
-
# Use a global cleaner
|
120
|
-
cleanse :first_name, :last_name, :cleaner => :strip
|
121
|
-
|
122
|
-
# Define a once off cleaner
|
123
|
-
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
|
124
|
-
|
125
|
-
# Automatically cleanse data before validation
|
126
|
-
before_validation :cleanse_attributes!
|
127
|
-
end
|
128
|
-
|
129
|
-
# Create a User instance
|
130
|
-
u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address1 => "2632 Brown St \n")
|
131
|
-
puts "Before data cleansing #{u.attributes.inspect}"
|
132
|
-
u.validate
|
133
|
-
puts "After data cleansing #{u.attributes.inspect}"
|
134
|
-
u.save!
|
135
|
-
```
|
136
|
-
|
137
137
|
## Notes
|
138
138
|
|
139
139
|
Cleaners are called in the order in which they are defined, so subsequent cleaners
|
@@ -172,8 +172,15 @@ For example, in Rails it obtains the raw data value before Rails has converted i
|
|
172
172
|
Which is useful for cleansing integer or float fields as raw strings before Rails
|
173
173
|
tries to convert it to an integer or float.
|
174
174
|
|
175
|
-
|
176
|
-
|
175
|
+
## Dependencies
|
176
|
+
|
177
|
+
DataCleansing requires the following dependencies
|
178
|
+
|
179
|
+
* Ruby V1.8.7, V1.9.3 or V2 and greater
|
180
|
+
* Rails V2 or greater for Rails integration ( Only if Rails is being used )
|
181
|
+
* Mongoid V2 or greater for Rails integration ( Only if Mongoid is being used )
|
182
|
+
|
183
|
+
## Meta
|
177
184
|
|
178
185
|
* Code: `git clone git://github.com/reidmorrison/data_cleansing.git`
|
179
186
|
* Home: <https://github.com/reidmorrison/data_cleansing>
|
@@ -182,15 +189,13 @@ Meta
|
|
182
189
|
|
183
190
|
This project uses [Semantic Versioning](http://semver.org/).
|
184
191
|
|
185
|
-
Authors
|
186
|
-
-------
|
192
|
+
## Authors
|
187
193
|
|
188
194
|
Reid Morrison :: reidmo@gmail.com :: @reidmorrison
|
189
195
|
|
190
|
-
License
|
191
|
-
-------
|
196
|
+
## License
|
192
197
|
|
193
|
-
Copyright 2013 Reid Morrison
|
198
|
+
Copyright 2013 Reid Morrison
|
194
199
|
|
195
200
|
Licensed under the Apache License, Version 2.0 (the "License");
|
196
201
|
you may not use this file except in compliance with the License.
|
@@ -23,16 +23,14 @@ module DataCleansing
|
|
23
23
|
# Cleanse the attributes using specified cleaners
|
24
24
|
def cleanse_attributes!
|
25
25
|
self.class.cleaners.each do |cleaner_struct|
|
26
|
-
params
|
27
|
-
attrs
|
26
|
+
params = cleaner_struct.params
|
27
|
+
attrs = cleaner_struct.attributes
|
28
28
|
|
29
29
|
# Special case to include :all fields
|
30
30
|
# Only works with ActiveRecord based models, not supported with regular Ruby models
|
31
31
|
if attrs.include?(:all) && defined?(ActiveRecord) && respond_to?(:attributes)
|
32
32
|
attrs = attributes.keys.collect{|i| i.to_sym}
|
33
|
-
|
34
|
-
attrs -= except
|
35
|
-
end
|
33
|
+
attrs.delete(:id)
|
36
34
|
|
37
35
|
# Remove serialized_attributes if any, from the :all condition
|
38
36
|
if self.class.respond_to?(:serialized_attributes)
|
@@ -49,12 +47,18 @@ module DataCleansing
|
|
49
47
|
end
|
50
48
|
end
|
51
49
|
end
|
50
|
+
|
51
|
+
# Explicitly remove specified attributes from cleansing
|
52
|
+
if except = params[:except]
|
53
|
+
attrs -= except
|
54
|
+
end
|
55
|
+
|
52
56
|
end
|
53
57
|
|
54
58
|
attrs.each do |attr|
|
55
59
|
# Under ActiveModel for Rails and Mongoid need to retrieve raw value
|
56
60
|
# before data type conversion
|
57
|
-
value = if respond_to?(:read_attribute_before_type_cast)
|
61
|
+
value = if respond_to?(:read_attribute_before_type_cast) && has_attribute?(attr.to_s)
|
58
62
|
read_attribute_before_type_cast(attr.to_s)
|
59
63
|
else
|
60
64
|
send(attr.to_sym)
|
data/test/active_record_test.rb
CHANGED
@@ -50,11 +50,14 @@ end
|
|
50
50
|
class User < ActiveRecord::Base
|
51
51
|
include DataCleansing::Cleanse
|
52
52
|
|
53
|
+
# Also cleanse non-database backed fields
|
54
|
+
attr_accessor :instance_var
|
55
|
+
|
53
56
|
# Use a global cleaner
|
54
57
|
cleanse :first_name, :last_name, :cleaner => :strip
|
55
58
|
|
56
59
|
# Define a once off cleaner
|
57
|
-
cleanse :address1, :address2, :cleaner => Proc.new {|string| "<< #{string.strip!} >>"}
|
60
|
+
cleanse :address1, :address2, :instance_var, :cleaner => Proc.new {|string| "<< #{string.strip!} >>"}
|
58
61
|
|
59
62
|
# Custom Zip Code cleaner
|
60
63
|
cleanse :zip_code, :cleaner => :digits_to_integer
|
@@ -69,7 +72,7 @@ class User2 < ActiveRecord::Base
|
|
69
72
|
self.table_name = 'users'
|
70
73
|
|
71
74
|
# Test :all cleaner. Only works with ActiveRecord Models
|
72
|
-
cleanse :all, :cleaner => :strip
|
75
|
+
cleanse :all, :cleaner => [:strip, Proc.new{|s| "@#{s}@"}], :except => [:address1, :zip_code]
|
73
76
|
|
74
77
|
# Automatically cleanse data before validation
|
75
78
|
before_validation :cleanse_attributes!
|
@@ -88,10 +91,15 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
88
91
|
:first_name => ' joe ',
|
89
92
|
:last_name => "\n black\n",
|
90
93
|
:address1 => "2632 Brown St \n",
|
91
|
-
:zip_code => "\n\tblah 12345badtext\n"
|
94
|
+
:zip_code => "\n\tblah 12345badtext\n",
|
95
|
+
:instance_var => "\n instance\n\t "
|
92
96
|
)
|
93
97
|
end
|
94
98
|
|
99
|
+
should 'only have 3 cleaners' do
|
100
|
+
assert_equal 3, User.cleaners.size, User.cleaners
|
101
|
+
end
|
102
|
+
|
95
103
|
should 'cleanse_attributes! using global cleaner' do
|
96
104
|
assert_equal true, @user.valid?
|
97
105
|
assert_equal 'joe', @user.first_name
|
@@ -101,6 +109,7 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
101
109
|
should 'cleanse_attributes! using attribute specific custom cleaner' do
|
102
110
|
assert_equal true, @user.valid?
|
103
111
|
assert_equal '<< 2632 Brown St >>', @user.address1
|
112
|
+
assert_equal '<< instance >>', @user.instance_var
|
104
113
|
end
|
105
114
|
|
106
115
|
should 'cleanse_attributes! using global cleaner using rails extensions' do
|
@@ -119,12 +128,16 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
119
128
|
)
|
120
129
|
end
|
121
130
|
|
131
|
+
should 'only have 1 cleaner' do
|
132
|
+
assert_equal 1, User2.cleaners.size, User2.cleaners
|
133
|
+
end
|
134
|
+
|
122
135
|
should 'cleanse_attributes! clean all attributes' do
|
123
136
|
assert_equal true, @user.valid?
|
124
|
-
assert_equal 'joe', @user.first_name, User2.cleaners
|
125
|
-
assert_equal 'black', @user.last_name
|
126
|
-
assert_equal
|
127
|
-
assert_equal 12345, @user.zip_code
|
137
|
+
assert_equal '@joe@', @user.first_name, User2.cleaners
|
138
|
+
assert_equal '@black@', @user.last_name
|
139
|
+
assert_equal "2632 Brown St \n", @user.address1
|
140
|
+
assert_equal 12345, @user.zip_code, User2.cleaners
|
128
141
|
end
|
129
142
|
|
130
143
|
end
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|