data_cleansing 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +71 -16
- data/lib/data_cleansing/cleanse.rb +16 -13
- data/lib/data_cleansing/version.rb +1 -1
- data/test/ruby_test.rb +60 -0
- data/test/test_db.sqlite3 +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f07805d14a1985523defc78b1dead46165975d7e
|
4
|
+
data.tar.gz: 6d64e90d76621a9656a40610fcb5e9373062eb25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f51316bdd3a17e6be5dcebb973883b826dfa38d3956a345a455f9d795cd7859a9f9818386846dc6dd326eed96036b4aec907529911ce4c1903b1415aab40820f
|
7
|
+
data.tar.gz: b33a0fc76fa07efc63fcc74d343ae123fc2b9b7572492e9d1387bff0da4ba1490ab82f066537e50366dcb2f57e74779ca71c24d5c3519383a986faa95c25cc2e
|
data/README.md
CHANGED
@@ -3,7 +3,7 @@ data_cleansing
|
|
3
3
|
|
4
4
|
Data Cleansing solution for Ruby with additional support for Rails and Mongoid
|
5
5
|
|
6
|
-
* http://github.com/
|
6
|
+
* http://github.com/reidmorrison/data_cleansing
|
7
7
|
|
8
8
|
## Introduction
|
9
9
|
|
@@ -15,7 +15,7 @@ in a model and have the data cleansed transparently when required.
|
|
15
15
|
DataCleansing is a framework that allows any data cleansing to be applied to
|
16
16
|
specific attributes or fields. At this time it does not supply the cleaning
|
17
17
|
solutions themselves since they are usually straight forward, or so complex
|
18
|
-
that they don't tend to be too
|
18
|
+
that they don't tend to be too useful to others. However, over time built-in
|
19
19
|
cleansing solutions may be added. Feel free to submit any suggestions via a ticket
|
20
20
|
or pull request.
|
21
21
|
|
@@ -27,39 +27,91 @@ or pull request.
|
|
27
27
|
|
28
28
|
## Examples
|
29
29
|
|
30
|
+
### Simple Example
|
31
|
+
```ruby
|
32
|
+
require 'data_cleansing'
|
33
|
+
|
34
|
+
# Define a global cleaner
|
35
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
36
|
+
|
37
|
+
class User
|
38
|
+
include DataCleansing::Cleanse
|
39
|
+
|
40
|
+
attr_accessor :first_name, :last_name
|
41
|
+
|
42
|
+
# Strip leading and trialing whitespace from first_name and last_name
|
43
|
+
cleanse :first_name, :last_name, :cleaner => :strip
|
44
|
+
end
|
45
|
+
|
46
|
+
u = User.new
|
47
|
+
u.first_name = ' joe '
|
48
|
+
u.last_name = "\n black\n"
|
49
|
+
puts "Before data cleansing #{u.inspect}"
|
50
|
+
# Before data cleansing Before data cleansing #<User:0x007fc9f1081980 @first_name=" joe ", @last_name="\n black\n">
|
51
|
+
|
52
|
+
u.cleanse_attributes!
|
53
|
+
puts "After data cleansing #{u.inspect}"
|
54
|
+
# After data cleansing After data cleansing #<User:0x007fc9f1081980 @first_name="joe", @last_name="black">
|
55
|
+
```
|
56
|
+
|
30
57
|
### Ruby Example
|
31
58
|
|
32
59
|
```ruby
|
60
|
+
require 'data_cleansing'
|
61
|
+
|
62
|
+
# Define a global cleaners
|
63
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
64
|
+
DataCleansing.register_cleaner(:upcase) {|string| string.upcase!}
|
65
|
+
|
33
66
|
class User
|
34
67
|
include DataCleansing::Cleanse
|
35
68
|
|
36
|
-
attr_accessor :first_name, :last_name, :address1, :address2
|
69
|
+
attr_accessor :first_name, :last_name, :title, :address1, :address2, :gender
|
37
70
|
|
38
71
|
# Use a global cleaner
|
39
72
|
cleanse :first_name, :last_name, :cleaner => :strip
|
40
73
|
|
41
74
|
# Define a once off cleaner
|
42
75
|
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
|
43
|
-
end
|
44
76
|
|
45
|
-
#
|
46
|
-
|
77
|
+
# Use multiple cleaners, and a custom block
|
78
|
+
cleanse :title, :cleaner => [:strip, :upcase, Proc.new {|string| "#{string}." unless string.end_with?('.')}]
|
79
|
+
|
80
|
+
# Change the cleansing rule based on the value of other attributes in that instance of user
|
81
|
+
# The 'title' is retrieved from the current instance of the user
|
82
|
+
cleanse :gender, :cleaner => [
|
83
|
+
:strip,
|
84
|
+
:upcase,
|
85
|
+
Proc.new do |gender|
|
86
|
+
if (gender == "UNKNOWN") && (title == "MR.")
|
87
|
+
"Male"
|
88
|
+
else
|
89
|
+
"Female"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
]
|
93
|
+
end
|
47
94
|
|
48
95
|
u = User.new
|
49
96
|
u.first_name = ' joe '
|
50
97
|
u.last_name = "\n black\n"
|
51
98
|
u.address1 = "2632 Brown St \n"
|
99
|
+
u.title = " \nmr \n"
|
100
|
+
u.gender = " Unknown "
|
52
101
|
puts "Before data cleansing #{u.inspect}"
|
102
|
+
# Before data cleansing #<User:0x007fdd5a83a8f8 @first_name=" joe ", @last_name="\n black\n", @address1="2632 Brown St \n", @title=" \nmr \n", @gender=" Unknown ">
|
103
|
+
|
53
104
|
u.cleanse_attributes!
|
54
105
|
puts "After data cleansing #{u.inspect}"
|
106
|
+
# After data cleansing #<User:0x007fdd5a83a8f8 @first_name="joe", @last_name="black", @address1="2632 Brown St", @title="MR.", @gender="Male">
|
55
107
|
```
|
56
108
|
|
57
109
|
### Rails Example
|
58
110
|
|
59
|
-
To encrypt a field in a Mongoid document, just add ":encrypted => true" at the end
|
60
|
-
of the field specifier. The field name must currently begin with "encrypted_"
|
61
|
-
|
62
111
|
```ruby
|
112
|
+
# Define a global cleanser
|
113
|
+
DataCleansing.register_cleaner(:strip) {|string| string.strip!}
|
114
|
+
|
63
115
|
# 'users' table has the following columns :first_name, :last_name, :address1, :address2
|
64
116
|
class User < ActiveRecord::Base
|
65
117
|
include DataCleansing::Cleanse
|
@@ -74,9 +126,6 @@ class User < ActiveRecord::Base
|
|
74
126
|
before_validation :cleanse_attributes!
|
75
127
|
end
|
76
128
|
|
77
|
-
# Define a global cleanser
|
78
|
-
DataCleansing.register_cleaner(:strip) {|string, params, object| string.strip!}
|
79
|
-
|
80
129
|
# Create a User instance
|
81
130
|
u = User.new(:first_name => ' joe ', :last_name => "\n black\n", :address1 => "2632 Brown St \n")
|
82
131
|
puts "Before data cleansing #{u.attributes.inspect}"
|
@@ -85,6 +134,12 @@ puts "After data cleansing #{u.attributes.inspect}"
|
|
85
134
|
u.save!
|
86
135
|
```
|
87
136
|
|
137
|
+
## Notes
|
138
|
+
|
139
|
+
Cleaners are called in the order in which they are defined, so subsequent cleaners
|
140
|
+
can assume that the previous cleaners have run and can therefore access or even
|
141
|
+
modify previously cleaned attributes
|
142
|
+
|
88
143
|
## Installation
|
89
144
|
|
90
145
|
### Add to an existing Rails project
|
@@ -120,9 +175,9 @@ tries to convert it to an integer or float.
|
|
120
175
|
Meta
|
121
176
|
----
|
122
177
|
|
123
|
-
* Code: `git clone git://github.com/
|
124
|
-
* Home: <https://github.com/
|
125
|
-
* Issues: <http://github.com/
|
178
|
+
* Code: `git clone git://github.com/reidmorrison/data_cleansing.git`
|
179
|
+
* Home: <https://github.com/reidmorrison/data_cleansing>
|
180
|
+
* Issues: <http://github.com/reidmorrison/data_cleansing/issues>
|
126
181
|
* Gems: <http://rubygems.org/gems/data_cleansing>
|
127
182
|
|
128
183
|
This project uses [Semantic Versioning](http://semver.org/).
|
@@ -135,7 +190,7 @@ Reid Morrison :: reidmo@gmail.com :: @reidmorrison
|
|
135
190
|
License
|
136
191
|
-------
|
137
192
|
|
138
|
-
Copyright 2013 Clarity Services, Inc.
|
193
|
+
Copyright 2013 Reid Morrison, Clarity Services, Inc.
|
139
194
|
|
140
195
|
Licensed under the Apache License, Version 2.0 (the "License");
|
141
196
|
you may not use this file except in compliance with the License.
|
@@ -1,16 +1,17 @@
|
|
1
1
|
module DataCleansing
|
2
2
|
# Mix-in to add cleaner
|
3
3
|
module Cleanse
|
4
|
-
CleanerStruct = Struct.new(:
|
4
|
+
CleanerStruct = Struct.new(:cleaners, :attributes, :params)
|
5
5
|
|
6
6
|
module ClassMethods
|
7
7
|
# Define how to cleanse one or more attributes
|
8
8
|
def cleanse(*args)
|
9
9
|
last = args.last
|
10
10
|
params = (last.is_a?(Hash) && last.instance_of?(Hash)) ? args.pop.dup : {}
|
11
|
-
|
12
|
-
raise(ArgumentError, "Mandatory :cleaner parameter is missing: #{params.inspect}") unless
|
13
|
-
(@cleaners ||= ThreadSafe::Array.new) << CleanerStruct.new(
|
11
|
+
cleaners = Array(params.delete(:cleaner))
|
12
|
+
raise(ArgumentError, "Mandatory :cleaner parameter is missing: #{params.inspect}") unless cleaners
|
13
|
+
(@cleaners ||= ThreadSafe::Array.new) << CleanerStruct.new(cleaners, args, params)
|
14
|
+
nil
|
14
15
|
end
|
15
16
|
|
16
17
|
def cleaners
|
@@ -23,7 +24,6 @@ module DataCleansing
|
|
23
24
|
def cleanse_attributes!
|
24
25
|
self.class.cleaners.each do |cleaner_struct|
|
25
26
|
params = cleaner_struct.params
|
26
|
-
cleaner = cleaner_struct.cleaner
|
27
27
|
attrs = cleaner_struct.attributes
|
28
28
|
|
29
29
|
# Special case to include :all fields
|
@@ -62,14 +62,16 @@ module DataCleansing
|
|
62
62
|
|
63
63
|
# No need to clean if attribute is nil
|
64
64
|
unless value.nil?
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
65
|
+
# Allow multiple cleaners to be defined and only set the new value
|
66
|
+
# once all cleaners have run
|
67
|
+
new_value = value
|
68
|
+
cleaner_struct.cleaners.each do |cleaner|
|
69
|
+
# Cleaner itself could be a custom Proc, otherwise do a global lookup for it
|
70
|
+
proc = cleaner.is_a?(Proc) ? cleaner : DataCleansing.cleaner(cleaner.to_sym)
|
71
|
+
raise "No cleaner defined for #{cleaner.inspect}" unless proc
|
72
|
+
|
73
|
+
# Call the cleaner proc within the scope (binding) of this object
|
74
|
+
new_value = instance_exec(new_value, params, &proc)
|
73
75
|
end
|
74
76
|
# Update value if it has changed
|
75
77
|
send("#{attr.to_sym}=".to_sym, new_value) if new_value != value
|
@@ -77,6 +79,7 @@ module DataCleansing
|
|
77
79
|
|
78
80
|
end
|
79
81
|
end
|
82
|
+
nil
|
80
83
|
end
|
81
84
|
end
|
82
85
|
|
data/test/ruby_test.rb
CHANGED
@@ -21,6 +21,38 @@ class RubyUser
|
|
21
21
|
cleanse :address1, :address2, :cleaner => Proc.new {|string| "<< #{string.strip!} >>"}
|
22
22
|
end
|
23
23
|
|
24
|
+
# Another global cleaner, used by RubyUser2
|
25
|
+
DataCleansing.register_cleaner(:upcase) {|string| string.upcase!}
|
26
|
+
|
27
|
+
class RubyUser2
|
28
|
+
include DataCleansing::Cleanse
|
29
|
+
|
30
|
+
attr_accessor :first_name, :last_name, :title, :address1, :address2, :gender
|
31
|
+
|
32
|
+
# Use a global cleaner
|
33
|
+
cleanse :first_name, :last_name, :cleaner => :strip
|
34
|
+
|
35
|
+
# Define a once off cleaner
|
36
|
+
cleanse :address1, :address2, :cleaner => Proc.new {|string| string.strip!}
|
37
|
+
|
38
|
+
# Use multiple cleaners, and a custom block
|
39
|
+
cleanse :title, :cleaner => [:strip, :upcase, Proc.new {|string| "#{string}." unless string.end_with?('.')}]
|
40
|
+
|
41
|
+
# Change the cleansing rule based on the value of other attributes in that instance of user
|
42
|
+
# The 'title' is retrieved from the current instance of the user
|
43
|
+
cleanse :gender, :cleaner => [
|
44
|
+
:strip,
|
45
|
+
:upcase,
|
46
|
+
Proc.new do |gender|
|
47
|
+
if (gender == "UNKNOWN") && (title == "MR.")
|
48
|
+
"Male"
|
49
|
+
else
|
50
|
+
"Female"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
]
|
54
|
+
end
|
55
|
+
|
24
56
|
class RubyTest < Test::Unit::TestCase
|
25
57
|
context "Ruby Models" do
|
26
58
|
|
@@ -54,5 +86,33 @@ class RubyTest < Test::Unit::TestCase
|
|
54
86
|
end
|
55
87
|
end
|
56
88
|
|
89
|
+
context "with ruby user2" do
|
90
|
+
setup do
|
91
|
+
@user = RubyUser2.new
|
92
|
+
@user.first_name = ' joe '
|
93
|
+
@user.last_name = "\n black\n"
|
94
|
+
@user.address1 = "2632 Brown St \n"
|
95
|
+
@user.title = " \nmr \n"
|
96
|
+
@user.gender = " Unknown "
|
97
|
+
end
|
98
|
+
|
99
|
+
should 'cleanse_attributes!' do
|
100
|
+
@user.cleanse_attributes!
|
101
|
+
assert_equal 'joe', @user.first_name
|
102
|
+
assert_equal 'black', @user.last_name
|
103
|
+
assert_equal '2632 Brown St', @user.address1
|
104
|
+
end
|
105
|
+
|
106
|
+
should 'cleanse_attributes! with multiple cleaners' do
|
107
|
+
@user.cleanse_attributes!
|
108
|
+
assert_equal 'MR.', @user.title
|
109
|
+
end
|
110
|
+
|
111
|
+
should 'cleanse_attributes! referencing other attributes' do
|
112
|
+
@user.cleanse_attributes!
|
113
|
+
assert_equal 'Male', @user.gender
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
57
117
|
end
|
58
118
|
end
|
data/test/test_db.sqlite3
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleansing
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Reid Morrison
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thread_safe
|