data_cleansing 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +71 -16
- data/lib/data_cleansing/cleanse.rb +16 -13
- data/lib/data_cleansing/version.rb +1 -1
- data/test/ruby_test.rb +60 -0
- data/test/test_db.sqlite3 +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f07805d14a1985523defc78b1dead46165975d7e
|
4
|
+
data.tar.gz: 6d64e90d76621a9656a40610fcb5e9373062eb25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f51316bdd3a17e6be5dcebb973883b826dfa38d3956a345a455f9d795cd7859a9f9818386846dc6dd326eed96036b4aec907529911ce4c1903b1415aab40820f
|
7
|
+
data.tar.gz: b33a0fc76fa07efc63fcc74d343ae123fc2b9b7572492e9d1387bff0da4ba1490ab82f066537e50366dcb2f57e74779ca71c24d5c3519383a986faa95c25cc2e
|
data/README.md
CHANGED
@@ -3,7 +3,7 @@ data_cleansing
|
|
3
3
|
|
4
4
|
Data Cleansing solution for Ruby with additional support for Rails and Mongoid
|
5
5
|
|
6
|
-
* http://github.com/
|
6
|
+
* http://github.com/reidmorrison/data_cleansing
|
7
7
|
|
8
8
|
## Introduction
|
9
9
|
|
@@ -15,7 +15,7 @@ in a model and have the data cleansed transparently when required.
|
|
15
15
|
DataCleansing is a framework that allows any data cleansing to be applied to
|
16
16
|
specific attributes or fields. At this time it does not supply the cleaning
|
17
17
|
solutions themselves since they are usually straight forward, or so complex
|
18
|
-
that they don't tend to be too
|
18
|
+
that they don't tend to be too useful to others. However, over time built-in
|
19
19
|
cleansing solutions may be added. Feel free to submit any suggestions via a ticket
|
20
20
|
or pull request.
|
21
21
|
|
@@ -27,39 +27,91 @@ or pull request.
|
|
27
27
|
|
28
28
|
## Examples
|
29
29
|
|
30
|
+
### Simple Example
|
31
|
+
```ruby
|
32
|
+
require 'data_cleansing'
|
33
|
+
|
34
|
+
# Define a global cleaner
|
35
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
36
|
+
|
37
|
+
class User
|
38
|
+
include DataCleansing::Cleanse
|
39
|
+
|
40
|
+
attr_accessor :first_name, :last_name
|
41
|
+
|
42
|
+
# Strip leading and trialing whitespace from first_name and last_name
|
43
|
+
cleanse :first_name, :last_name, :cleaner => :strip
|
44
|
+
end
|
45
|
+
|
46
|
+
u = User.new
|
47
|
+
u.first_name = ' joe '
|
48
|
+
u.last_name = "\n black\n"
|
49
|
+
puts "Before data cleansing #{u.inspect}"
|
50
|
+
# Before data cleansing Before data cleansing #<User:0x007fc9f1081980 @first_name=" joe ", @last_name="\n black\n">
|
51
|
+
|
52
|
+
u.cleanse_attributes!
|
53
|
+
puts "After data cleansing #{u.inspect}"
|
54
|
+
# After data cleansing After data cleansing #<User:0x007fc9f1081980 @first_name="joe", @last_name="black">
|
55
|
+
```
|
56
|
+
|
30
57
|
### Ruby Example
|
31
58
|
|
32
59
|
```ruby
|
60
|
+
require 'data_cleansing'
|
61
|
+
|
62
|
+
# Define a global cleaners
|
63
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
64
|
+
DataCleansing.register_cleaner(:upcase) {|string| string.upcase!}
|
65
|
+
|
33
66
|
class User
|
34
67
|
include DataCleansing::Cleanse
|
35
68
|
|
36
|
-
attr_accessor :first_name, :last_name, :address1, :address2
|
69
|
+
attr_accessor :first_name, :last_name, :title, :address1, :address2, :gender
|
37
70
|
|
38
71
|
# Use a global cleaner
|
39
72
|
cleanse :first_name, :last_name, :cleaner => :strip
|
40
73
|
|
41
74
|
# Define a once off cleaner
|
42
75
|
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
|
43
|
-
end
|
44
76
|
|
45
|
-
#
|
46
|
-
|
77
|
+
# Use multiple cleaners, and a custom block
|
78
|
+
cleanse :title, :cleaner => [:strip, :upcase, Proc.new {|string| "#{string}." unless string.end_with?('.')}]
|
79
|
+
|
80
|
+
# Change the cleansing rule based on the value of other attributes in that instance of user
|
81
|
+
# The 'title' is retrieved from the current instance of the user
|
82
|
+
cleanse :gender, :cleaner => [
|
83
|
+
:strip,
|
84
|
+
:upcase,
|
85
|
+
Proc.new do |gender|
|
86
|
+
if (gender == "UNKNOWN") && (title == "MR.")
|
87
|
+
"Male"
|
88
|
+
else
|
89
|
+
"Female"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
]
|
93
|
+
end
|
47
94
|
|
48
95
|
u = User.new
|
49
96
|
u.first_name = ' joe '
|
50
97
|
u.last_name = "\n black\n"
|
51
98
|
u.address1 = "2632 Brown St \n"
|
99
|
+
u.title = " \nmr \n"
|
100
|
+
u.gender = " Unknown "
|
52
101
|
puts "Before data cleansing #{u.inspect}"
|
102
|
+
# Before data cleansing #<User:0x007fdd5a83a8f8 @first_name=" joe ", @last_name="\n black\n", @address1="2632 Brown St \n", @title=" \nmr \n", @gender=" Unknown ">
|
103
|
+
|
53
104
|
u.cleanse_attributes!
|
54
105
|
puts "After data cleansing #{u.inspect}"
|
106
|
+
# After data cleansing #<User:0x007fdd5a83a8f8 @first_name="joe", @last_name="black", @address1="2632 Brown St", @title="MR.", @gender="Male">
|
55
107
|
```
|
56
108
|
|
57
109
|
### Rails Example
|
58
110
|
|
59
|
-
To encrypt a field in a Mongoid document, just add ":encrypted => true" at the end
|
60
|
-
of the field specifier. The field name must currently begin with "encrypted_"
|
61
|
-
|
62
111
|
```ruby
|
112
|
+
# Define a global cleanser
|
113
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
114
|
+
|
63
115
|
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
64
116
|
class User < ActiveRecord::Base
|
65
117
|
include DataCleansing::Cleanse
|
@@ -74,9 +126,6 @@ class User < ActiveRecord::Base
|
|
74
126
|
before_validation :cleanse_attributes!
|
75
127
|
end
|
76
128
|
|
77
|
-
# Define a global cleanser
|
78
|
-
DataCleansing.register_cleaner(:strip) {|string, params, object| string.strip!}
|
79
|
-
|
80
129
|
# Create a User instance
|
81
130
|
u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address1 => "2632 Brown St \n")
|
82
131
|
puts "Before data cleansing #{u.attributes.inspect}"
|
@@ -85,6 +134,12 @@ puts "After data cleansing #{u.attributes.inspect}"
|
|
85
134
|
u.save!
|
86
135
|
```
|
87
136
|
|
137
|
+
## Notes
|
138
|
+
|
139
|
+
Cleaners are called in the order in which they are defined, so subsequent cleaners
|
140
|
+
can assume that the previous cleaners have run and can therefore access or even
|
141
|
+
modify previously cleaned attributes
|
142
|
+
|
88
143
|
## Installation
|
89
144
|
|
90
145
|
### Add to an existing Rails project
|
@@ -120,9 +175,9 @@ tries to convert it to an integer or float.
|
|
120
175
|
Meta
|
121
176
|
----
|
122
177
|
|
123
|
-
* Code: `git clone git://github.com/
|
124
|
-
* Home: <https://github.com/
|
125
|
-
* Issues: <http://github.com/
|
178
|
+
* Code: `git clone git://github.com/reidmorrison/data_cleansing.git`
|
179
|
+
* Home: <https://github.com/reidmorrison/data_cleansing>
|
180
|
+
* Issues: <http://github.com/reidmorrison/data_cleansing/issues>
|
126
181
|
* Gems: <http://rubygems.org/gems/data_cleansing>
|
127
182
|
|
128
183
|
This project uses [Semantic Versioning](http://semver.org/).
|
@@ -135,7 +190,7 @@ Reid Morrison :: reidmo@gmail.com :: @reidmorrison
|
|
135
190
|
License
|
136
191
|
-------
|
137
192
|
|
138
|
-
Copyright 2013 Clarity Services, Inc.
|
193
|
+
Copyright 2013 Reid Morrison, Clarity Services, Inc.
|
139
194
|
|
140
195
|
Licensed under the Apache License, Version 2.0 (the "License");
|
141
196
|
you may not use this file except in compliance with the License.
|
@@ -1,16 +1,17 @@
|
|
1
1
|
module DataCleansing
|
2
2
|
# Mix-in to add cleaner
|
3
3
|
module Cleanse
|
4
|
-
CleanerStruct = Struct.new(:
|
4
|
+
CleanerStruct = Struct.new(:cleaners, :attributes, :params)
|
5
5
|
|
6
6
|
module ClassMethods
|
7
7
|
# Define how to cleanse one or more attributes
|
8
8
|
def cleanse(*args)
|
9
9
|
last = args.last
|
10
10
|
params = (last.is_a?(Hash) && last.instance_of?(Hash)) ? args.pop.dup : {}
|
11
|
-
|
12
|
-
raise(ArgumentError, "Mandatory :cleaner parameter is missing: #{params.inspect}") unless
|
13
|
-
(@cleaners ||= ThreadSafe::Array.new) << CleanerStruct.new(
|
11
|
+
cleaners = Array(params.delete(:cleaner))
|
12
|
+
raise(ArgumentError, "Mandatory :cleaner parameter is missing: #{params.inspect}") unless cleaners
|
13
|
+
(@cleaners ||= ThreadSafe::Array.new) << CleanerStruct.new(cleaners, args, params)
|
14
|
+
nil
|
14
15
|
end
|
15
16
|
|
16
17
|
def cleaners
|
@@ -23,7 +24,6 @@ module DataCleansing
|
|
23
24
|
def cleanse_attributes!
|
24
25
|
self.class.cleaners.each do |cleaner_struct|
|
25
26
|
params = cleaner_struct.params
|
26
|
-
cleaner = cleaner_struct.cleaner
|
27
27
|
attrs = cleaner_struct.attributes
|
28
28
|
|
29
29
|
# Special case to include :all fields
|
@@ -62,14 +62,16 @@ module DataCleansing
|
|
62
62
|
|
63
63
|
# No need to clean if attribute is nil
|
64
64
|
unless value.nil?
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
65
|
+
# Allow multiple cleaners to be defined and only set the new value
|
66
|
+
# once all cleaners have run
|
67
|
+
new_value = value
|
68
|
+
cleaner_struct.cleaners.each do |cleaner|
|
69
|
+
# Cleaner itself could be a custom Proc, otherwise do a global lookup for it
|
70
|
+
proc = cleaner.is_a?(Proc) ? cleaner : DataCleansing.cleaner(cleaner.to_sym)
|
71
|
+
raise "No cleaner defined for #{cleaner.inspect}" unless proc
|
72
|
+
|
73
|
+
# Call the cleaner proc within the scope (binding) of this object
|
74
|
+
new_value = instance_exec(new_value, params, &proc)
|
73
75
|
end
|
74
76
|
# Update value if it has changed
|
75
77
|
send("#{attr.to_sym}=".to_sym, new_value) if new_value != value
|
@@ -77,6 +79,7 @@ module DataCleansing
|
|
77
79
|
|
78
80
|
end
|
79
81
|
end
|
82
|
+
nil
|
80
83
|
end
|
81
84
|
end
|
82
85
|
|
data/test/ruby_test.rb
CHANGED
@@ -21,6 +21,38 @@ class RubyUser
|
|
21
21
|
cleanse :address1, :address2, :cleaner => Proc.new {|string| "<< #{string.strip!} >>"}
|
22
22
|
end
|
23
23
|
|
24
|
+
# Another global cleaner, used by RubyUser2
|
25
|
+
DataCleansing.register_cleaner(:upcase) {|string| string.upcase!}
|
26
|
+
|
27
|
+
class RubyUser2
|
28
|
+
include DataCleansing::Cleanse
|
29
|
+
|
30
|
+
attr_accessor :first_name, :last_name, :title, :address1, :address2, :gender
|
31
|
+
|
32
|
+
# Use a global cleaner
|
33
|
+
cleanse :first_name, :last_name, :cleaner => :strip
|
34
|
+
|
35
|
+
# Define a once off cleaner
|
36
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
|
37
|
+
|
38
|
+
# Use multiple cleaners, and a custom block
|
39
|
+
cleanse :title, :cleaner => [:strip, :upcase, Proc.new {|string| "#{string}." unless string.end_with?('.')}]
|
40
|
+
|
41
|
+
# Change the cleansing rule based on the value of other attributes in that instance of user
|
42
|
+
# The 'title' is retrieved from the current instance of the user
|
43
|
+
cleanse :gender, :cleaner => [
|
44
|
+
:strip,
|
45
|
+
:upcase,
|
46
|
+
Proc.new do |gender|
|
47
|
+
if (gender == "UNKNOWN") && (title == "MR.")
|
48
|
+
"Male"
|
49
|
+
else
|
50
|
+
"Female"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
]
|
54
|
+
end
|
55
|
+
|
24
56
|
class RubyTest < Test::Unit::TestCase
|
25
57
|
context "Ruby Models" do
|
26
58
|
|
@@ -54,5 +86,33 @@ class RubyTest < Test::Unit::TestCase
|
|
54
86
|
end
|
55
87
|
end
|
56
88
|
|
89
|
+
context "with ruby user2" do
|
90
|
+
setup do
|
91
|
+
@user = RubyUser2.new
|
92
|
+
@user.first_name = ' joe '
|
93
|
+
@user.last_name = "\n black\n"
|
94
|
+
@user.address1 = "2632 Brown St \n"
|
95
|
+
@user.title = " \nmr \n"
|
96
|
+
@user.gender = " Unknown "
|
97
|
+
end
|
98
|
+
|
99
|
+
should 'cleanse_attributes!' do
|
100
|
+
@user.cleanse_attributes!
|
101
|
+
assert_equal 'joe', @user.first_name
|
102
|
+
assert_equal 'black', @user.last_name
|
103
|
+
assert_equal '2632 Brown St', @user.address1
|
104
|
+
end
|
105
|
+
|
106
|
+
should 'cleanse_attributes! with multiple cleaners' do
|
107
|
+
@user.cleanse_attributes!
|
108
|
+
assert_equal 'MR.', @user.title
|
109
|
+
end
|
110
|
+
|
111
|
+
should 'cleanse_attributes! referencing other attributes' do
|
112
|
+
@user.cleanse_attributes!
|
113
|
+
assert_equal 'Male', @user.gender
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
57
117
|
end
|
58
118
|
end
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|