data_cleansing 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +43 -38
- data/lib/data_cleansing/cleanse.rb +10 -6
- data/lib/data_cleansing/version.rb +1 -1
- data/test/active_record_test.rb +20 -7
- data/test/test_db.sqlite3 +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64293789c3bf568975be9ed1c8c05c8536c23144
|
4
|
+
data.tar.gz: dc47cfa960847eaf926eb7f9695c92750d05c515
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e7f84d6c1858468a1bcd916f98cf14b20869c3a50bf556001192eb6f166b97c4489d5a20b062604afeb3cc309eb709423e3401f50bf77f7f9980decd2c931688
|
7
|
+
data.tar.gz: 6ce9b1ea07813cca4ee601c63dadab67c57c2d0e4e729f1a9b10175312be92c218d727ceca84060169215a24fd6e3d676cade7cc5a57de2918be81b7fe81ae90
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
data_cleansing
|
2
2
|
==============
|
3
3
|
|
4
|
-
Data Cleansing
|
4
|
+
Data Cleansing framework for Ruby with additional support for Rails and Mongoid
|
5
5
|
|
6
6
|
* http://github.com/reidmorrison/data_cleansing
|
7
7
|
|
@@ -27,7 +27,7 @@ or pull request.
|
|
27
27
|
|
28
28
|
## Examples
|
29
29
|
|
30
|
-
###
|
30
|
+
### Ruby Example
|
31
31
|
```ruby
|
32
32
|
require 'data_cleansing'
|
33
33
|
|
@@ -54,7 +54,35 @@ puts "After data cleansing #{u.inspect}"
|
|
54
54
|
# After data cleansing After data cleansing #<User:0x007fc9f1081980 @first_name="joe", @last_name="black">
|
55
55
|
```
|
56
56
|
|
57
|
-
###
|
57
|
+
### Rails Example
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
# Define a global cleanser
|
61
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
62
|
+
|
63
|
+
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
64
|
+
class User < ActiveRecord::Base
|
65
|
+
include DataCleansing::Cleanse
|
66
|
+
|
67
|
+
# Use a global cleaner
|
68
|
+
cleanse :first_name, :last_name, :cleaner => :strip
|
69
|
+
|
70
|
+
# Define a once off cleaner
|
71
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
|
72
|
+
|
73
|
+
# Automatically cleanse data before validation
|
74
|
+
before_validation :cleanse_attributes!
|
75
|
+
end
|
76
|
+
|
77
|
+
# Create a User instance
|
78
|
+
u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address1 => "2632 Brown St \n")
|
79
|
+
puts "Before data cleansing #{u.attributes.inspect}"
|
80
|
+
u.validate
|
81
|
+
puts "After data cleansing #{u.attributes.inspect}"
|
82
|
+
u.save!
|
83
|
+
```
|
84
|
+
|
85
|
+
### Advanced Ruby Example
|
58
86
|
|
59
87
|
```ruby
|
60
88
|
require 'data_cleansing'
|
@@ -106,34 +134,6 @@ puts "After data cleansing #{u.inspect}"
|
|
106
134
|
# After data cleansing #<User:0x007fdd5a83a8f8 @first_name="joe", @last_name="black", @address1="2632 Brown St", @title="MR.", @gender="Male">
|
107
135
|
```
|
108
136
|
|
109
|
-
### Rails Example
|
110
|
-
|
111
|
-
```ruby
|
112
|
-
# Define a global cleanser
|
113
|
-
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
114
|
-
|
115
|
-
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
116
|
-
class User < ActiveRecord::Base
|
117
|
-
include DataCleansing::Cleanse
|
118
|
-
|
119
|
-
# Use a global cleaner
|
120
|
-
cleanse :first_name, :last_name, :cleaner => :strip
|
121
|
-
|
122
|
-
# Define a once off cleaner
|
123
|
-
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
|
124
|
-
|
125
|
-
# Automatically cleanse data before validation
|
126
|
-
before_validation :cleanse_attributes!
|
127
|
-
end
|
128
|
-
|
129
|
-
# Create a User instance
|
130
|
-
u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address1 => "2632 Brown St \n")
|
131
|
-
puts "Before data cleansing #{u.attributes.inspect}"
|
132
|
-
u.validate
|
133
|
-
puts "After data cleansing #{u.attributes.inspect}"
|
134
|
-
u.save!
|
135
|
-
```
|
136
|
-
|
137
137
|
## Notes
|
138
138
|
|
139
139
|
Cleaners are called in the order in which they are defined, so subsequent cleaners
|
@@ -172,8 +172,15 @@ For example, in Rails it obtains the raw data value before Rails has converted i
|
|
172
172
|
Which is useful for cleansing integer or float fields as raw strings before Rails
|
173
173
|
tries to convert it to an integer or float.
|
174
174
|
|
175
|
-
|
176
|
-
|
175
|
+
## Dependencies
|
176
|
+
|
177
|
+
DataCleansing requires the following dependencies
|
178
|
+
|
179
|
+
* Ruby V1.8.7, V1.9.3 or V2 and greater
|
180
|
+
* Rails V2 or greater for Rails integration ( Only if Rails is being used )
|
181
|
+
* Mongoid V2 or greater for Rails integration ( Only if Mongoid is being used )
|
182
|
+
|
183
|
+
## Meta
|
177
184
|
|
178
185
|
* Code: `git clone git://github.com/reidmorrison/data_cleansing.git`
|
179
186
|
* Home: <https://github.com/reidmorrison/data_cleansing>
|
@@ -182,15 +189,13 @@ Meta
|
|
182
189
|
|
183
190
|
This project uses [Semantic Versioning](http://semver.org/).
|
184
191
|
|
185
|
-
Authors
|
186
|
-
-------
|
192
|
+
## Authors
|
187
193
|
|
188
194
|
Reid Morrison :: reidmo@gmail.com :: @reidmorrison
|
189
195
|
|
190
|
-
License
|
191
|
-
-------
|
196
|
+
## License
|
192
197
|
|
193
|
-
Copyright 2013 Reid Morrison
|
198
|
+
Copyright 2013 Reid Morrison
|
194
199
|
|
195
200
|
Licensed under the Apache License, Version 2.0 (the "License");
|
196
201
|
you may not use this file except in compliance with the License.
|
@@ -23,16 +23,14 @@ module DataCleansing
|
|
23
23
|
# Cleanse the attributes using specified cleaners
|
24
24
|
def cleanse_attributes!
|
25
25
|
self.class.cleaners.each do |cleaner_struct|
|
26
|
-
params
|
27
|
-
attrs
|
26
|
+
params = cleaner_struct.params
|
27
|
+
attrs = cleaner_struct.attributes
|
28
28
|
|
29
29
|
# Special case to include :all fields
|
30
30
|
# Only works with ActiveRecord based models, not supported with regular Ruby models
|
31
31
|
if attrs.include?(:all) && defined?(ActiveRecord) && respond_to?(:attributes)
|
32
32
|
attrs = attributes.keys.collect{|i| i.to_sym}
|
33
|
-
|
34
|
-
attrs -= except
|
35
|
-
end
|
33
|
+
attrs.delete(:id)
|
36
34
|
|
37
35
|
# Remove serialized_attributes if any, from the :all condition
|
38
36
|
if self.class.respond_to?(:serialized_attributes)
|
@@ -49,12 +47,18 @@ module DataCleansing
|
|
49
47
|
end
|
50
48
|
end
|
51
49
|
end
|
50
|
+
|
51
|
+
# Explicitly remove specified attributes from cleansing
|
52
|
+
if except = params[:except]
|
53
|
+
attrs -= except
|
54
|
+
end
|
55
|
+
|
52
56
|
end
|
53
57
|
|
54
58
|
attrs.each do |attr|
|
55
59
|
# Under ActiveModel for Rails and Mongoid need to retrieve raw value
|
56
60
|
# before data type conversion
|
57
|
-
value = if respond_to?(:read_attribute_before_type_cast)
|
61
|
+
value = if respond_to?(:read_attribute_before_type_cast) && has_attribute?(attr.to_s)
|
58
62
|
read_attribute_before_type_cast(attr.to_s)
|
59
63
|
else
|
60
64
|
send(attr.to_sym)
|
data/test/active_record_test.rb
CHANGED
@@ -50,11 +50,14 @@ end
|
|
50
50
|
class User < ActiveRecord::Base
|
51
51
|
include DataCleansing::Cleanse
|
52
52
|
|
53
|
+
# Also cleanse non-database backed fields
|
54
|
+
attr_accessor :instance_var
|
55
|
+
|
53
56
|
# Use a global cleaner
|
54
57
|
cleanse :first_name, :last_name, :cleaner => :strip
|
55
58
|
|
56
59
|
# Define a once off cleaner
|
57
|
-
cleanse :address1, :address2, :cleaner => Proc.new {|string| "<< #{string.strip!} >>"}
|
60
|
+
cleanse :address1, :address2, :instance_var, :cleaner => Proc.new {|string| "<< #{string.strip!} >>"}
|
58
61
|
|
59
62
|
# Custom Zip Code cleaner
|
60
63
|
cleanse :zip_code, :cleaner => :digits_to_integer
|
@@ -69,7 +72,7 @@ class User2 < ActiveRecord::Base
|
|
69
72
|
self.table_name = 'users'
|
70
73
|
|
71
74
|
# Test :all cleaner. Only works with ActiveRecord Models
|
72
|
-
cleanse :all, :cleaner => :strip
|
75
|
+
cleanse :all, :cleaner => [:strip, Proc.new{|s| "@#{s}@"}], :except => [:address1, :zip_code]
|
73
76
|
|
74
77
|
# Automatically cleanse data before validation
|
75
78
|
before_validation :cleanse_attributes!
|
@@ -88,10 +91,15 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
88
91
|
:first_name => ' joe ',
|
89
92
|
:last_name => "\n black\n",
|
90
93
|
:address1 => "2632 Brown St \n",
|
91
|
-
:zip_code => "\n\tblah 12345badtext\n"
|
94
|
+
:zip_code => "\n\tblah 12345badtext\n",
|
95
|
+
:instance_var => "\n instance\n\t "
|
92
96
|
)
|
93
97
|
end
|
94
98
|
|
99
|
+
should 'only have 3 cleaners' do
|
100
|
+
assert_equal 3, User.cleaners.size, User.cleaners
|
101
|
+
end
|
102
|
+
|
95
103
|
should 'cleanse_attributes! using global cleaner' do
|
96
104
|
assert_equal true, @user.valid?
|
97
105
|
assert_equal 'joe', @user.first_name
|
@@ -101,6 +109,7 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
101
109
|
should 'cleanse_attributes! using attribute specific custom cleaner' do
|
102
110
|
assert_equal true, @user.valid?
|
103
111
|
assert_equal '<< 2632 Brown St >>', @user.address1
|
112
|
+
assert_equal '<< instance >>', @user.instance_var
|
104
113
|
end
|
105
114
|
|
106
115
|
should 'cleanse_attributes! using global cleaner using rails extensions' do
|
@@ -119,12 +128,16 @@ class ActiveRecordTest < Test::Unit::TestCase
|
|
119
128
|
)
|
120
129
|
end
|
121
130
|
|
131
|
+
should 'only have 1 cleaner' do
|
132
|
+
assert_equal 1, User2.cleaners.size, User2.cleaners
|
133
|
+
end
|
134
|
+
|
122
135
|
should 'cleanse_attributes! clean all attributes' do
|
123
136
|
assert_equal true, @user.valid?
|
124
|
-
assert_equal 'joe', @user.first_name, User2.cleaners
|
125
|
-
assert_equal 'black', @user.last_name
|
126
|
-
assert_equal
|
127
|
-
assert_equal 12345, @user.zip_code
|
137
|
+
assert_equal '@joe@', @user.first_name, User2.cleaners
|
138
|
+
assert_equal '@black@', @user.last_name
|
139
|
+
assert_equal "2632 Brown St \n", @user.address1
|
140
|
+
assert_equal 12345, @user.zip_code, User2.cleaners
|
128
141
|
end
|
129
142
|
|
130
143
|
end
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|