data_cleaner 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +125 -6
- data/lib/data_cleaner/cleaner.rb +2 -58
- data/lib/data_cleaner/format.rb +2 -2
- data/lib/data_cleaner/formats.rb +78 -15
- metadata +4 -4
data/README.txt
CHANGED
@@ -6,6 +6,9 @@ DataCleaner wants to make sure your data still looks real, and importantly, pass
|
|
6
6
|
|
7
7
|
Only data that need anonymising needs to be specified, foreign keys, non-customer-identifiable data should be left alone.
|
8
8
|
|
9
|
+
* rdoc[http://sourcetagsandcodes.com/data_cleaner/doc/]
|
10
|
+
* source[https://github.com/matsadler/data_cleaner]
|
11
|
+
|
9
12
|
== Installation
|
10
13
|
|
11
14
|
gem install data_cleaner
|
@@ -35,13 +38,17 @@ gem install data_cleaner
|
|
35
38
|
end
|
36
39
|
|
37
40
|
module DataCleaner::Formats
|
41
|
+
helper :embarrassing_secret do
|
42
|
+
"I like " + ["the colour pink", "programming PHP", "Judas Priest"].sample
|
43
|
+
end
|
44
|
+
|
38
45
|
format "TopSecret" do |f|
|
39
46
|
f.name [:first_name, " ", :last_name]
|
40
47
|
f.email :email, &:name # passes the objects name to the email method
|
41
|
-
f.reference do |
|
42
|
-
"#{
|
48
|
+
f.reference do |obj| # one off helper
|
49
|
+
"#{obj.name[0..2].downcase}#{obj.date.strftime("%y")}"
|
43
50
|
end
|
44
|
-
f.secret
|
51
|
+
f.secret :embarrassing_secret
|
45
52
|
end
|
46
53
|
end
|
47
54
|
|
@@ -58,11 +65,123 @@ gem install data_cleaner
|
|
58
65
|
prints:
|
59
66
|
|
60
67
|
#<TopSecret:0x1015f7830 @email="mat@foo.com", @date=Mon Jan 17 16:53:19 +0000 2011, @name="Matthew Sadler", @secret="I like kittens", @reference="mat09">
|
61
|
-
is valid? true
|
68
|
+
is valid? true
|
62
69
|
|
63
|
-
#<TopSecret:0x1015f7830 @email="javier.kuhlman@franeckikonopelski.co.uk", @date=Mon Jan 17 16:53:19 +0000 2011, @name="Javier Kuhlman", @secret="
|
64
|
-
is valid? true
|
70
|
+
#<TopSecret:0x1015f7830 @email="javier.kuhlman@franeckikonopelski.co.uk", @date=Mon Jan 17 16:53:19 +0000 2011, @name="Javier Kuhlman", @secret="the colour pink", @reference="jav11">
|
71
|
+
is valid? true
|
72
|
+
|
73
|
+
== Formats
|
74
|
+
There are various ways of specifying the format of an attribute.
|
75
|
+
|
76
|
+
=== Basic Symbol
|
77
|
+
|
78
|
+
format "TopSecret" do |f|
|
79
|
+
f.name :first_name
|
80
|
+
end
|
81
|
+
In this case the helper :first_name will be used to replace the name attribute
|
82
|
+
|
83
|
+
=== Symbol With Arguments
|
84
|
+
|
85
|
+
format "TopSecret" do |f|
|
86
|
+
f.email :email, "Arthur"
|
87
|
+
end
|
88
|
+
The helper :email will be used to replace the attribute, and be given the argument "Arthur"
|
89
|
+
|
90
|
+
=== Symbol With Block
|
91
|
+
|
92
|
+
format "TopSecret" do |f|
|
93
|
+
f.name :first_name
|
94
|
+
f.email(:email) {|obj| obj.name}
|
95
|
+
end
|
96
|
+
In this example the :email helper will be given the objects replacement name as an argument
|
97
|
+
|
98
|
+
format "TopSecret" do |f|
|
99
|
+
f.email(:email) {|obj| obj.name}
|
100
|
+
f.name :first_name
|
101
|
+
end
|
102
|
+
Here the :email helper will get the objects original name as an argument
|
103
|
+
|
104
|
+
=== String
|
105
|
+
|
106
|
+
format "TopSecret" do |f|
|
107
|
+
f.name "Arthur"
|
108
|
+
end
|
109
|
+
In this case the name will simply be replaced with the string specified
|
110
|
+
|
111
|
+
=== Array
|
112
|
+
|
113
|
+
format "TopSecret" do |f|
|
114
|
+
f.name [:first_name, " ", :last_name]
|
115
|
+
end
|
116
|
+
With an array the individual elements behave like those above, then they are concatenated together, in this example the results from the :first_name and :last_name helpers will be joined with the string " " between them
|
117
|
+
|
118
|
+
=== Nested Arrays
|
119
|
+
|
120
|
+
format "TopSecret" do |f|
|
121
|
+
f.emails [[:email, "Arthur"], ", ", [:email, "Ford"]]
|
122
|
+
end
|
123
|
+
In this example the :email helper will be called twice, once with the argument "Arthur", then again with the argument "Ford", and these will be joined by the string ", "
|
124
|
+
|
125
|
+
=== Block
|
126
|
+
|
127
|
+
format "TopSecret" do |f|
|
128
|
+
f.secret {|obj| rand(100)}
|
129
|
+
end
|
130
|
+
When using a block the attribute will be replaced by the result of the block.
|
131
|
+
|
132
|
+
== Built-in helpers
|
133
|
+
The built-in helpers use the faker gem to generate data, see the faker documentation for more details
|
134
|
+
|
135
|
+
:name
|
136
|
+
:first_name
|
137
|
+
:last_name
|
138
|
+
:name_prefix
|
139
|
+
:name_suffix
|
140
|
+
|
141
|
+
:phone_number
|
142
|
+
|
143
|
+
:city
|
144
|
+
:city_prefix
|
145
|
+
:city_suffix
|
146
|
+
:secondary_address
|
147
|
+
:street_address
|
148
|
+
:street_name
|
149
|
+
:street_suffix
|
150
|
+
:uk_country
|
151
|
+
:uk_county
|
152
|
+
:uk_postcode
|
153
|
+
:us_state
|
154
|
+
:us_state_abbr
|
155
|
+
:zip_code
|
156
|
+
|
157
|
+
:domain_name
|
158
|
+
:domain_suffix
|
159
|
+
:domain_word
|
160
|
+
:email
|
161
|
+
:free_email
|
162
|
+
:user_name
|
163
|
+
|
164
|
+
:bs
|
165
|
+
:catch_phrase
|
166
|
+
:company_name
|
167
|
+
:company_suffix
|
168
|
+
|
169
|
+
:paragraph
|
170
|
+
:paragraphs
|
171
|
+
:sentence
|
172
|
+
:sentences
|
173
|
+
:words
|
174
|
+
|
175
|
+
== Custom helpers
|
176
|
+
Custom helpers can be defined like
|
177
|
+
|
178
|
+
module DataCleaner::Formats
|
179
|
+
helper :embarrassing_secret do
|
180
|
+
"I like " + ["the colour pink", "programming PHP", "Judas Priest"].sample
|
181
|
+
end
|
182
|
+
end
|
65
183
|
|
184
|
+
This can also be used to redefine the built-in helpers.
|
66
185
|
|
67
186
|
== Licence
|
68
187
|
|
data/lib/data_cleaner/cleaner.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module DataCleaner
|
1
|
+
module DataCleaner # :nodoc:
|
2
2
|
# DataCleaner::Cleaner is a module which can either be mixed-in, or used
|
3
3
|
# standalone to anonymise the data held within objects.
|
4
4
|
#
|
@@ -6,57 +6,6 @@ module DataCleaner
|
|
6
6
|
# DataCleaner::Formats.
|
7
7
|
#
|
8
8
|
module Cleaner
|
9
|
-
# In the format
|
10
|
-
# :specifier => instance
|
11
|
-
# the method specifier will be called on instance
|
12
|
-
#
|
13
|
-
# whereas with
|
14
|
-
# :specifier => [instance, :method]
|
15
|
-
# the method method will be called on instance.
|
16
|
-
#
|
17
|
-
# :specifier is used when describing the format of your object's attributes
|
18
|
-
#
|
19
|
-
MAPPING = {
|
20
|
-
:name => Faker::Name,
|
21
|
-
:first_name => Faker::Name,
|
22
|
-
:last_name => Faker::Name,
|
23
|
-
:name_prefix => [Faker::Name, :prefix],
|
24
|
-
:name_suffix => [Faker::Name, :suffix],
|
25
|
-
|
26
|
-
:phone_number => Faker::PhoneNumber,
|
27
|
-
|
28
|
-
:city => Faker::Address,
|
29
|
-
:city_prefix => Faker::Address,
|
30
|
-
:city_suffix => Faker::Address,
|
31
|
-
:secondary_address => Faker::Address,
|
32
|
-
:street_address => Faker::Address,
|
33
|
-
:street_name => Faker::Address,
|
34
|
-
:street_suffix => Faker::Address,
|
35
|
-
:uk_country => Faker::Address,
|
36
|
-
:uk_county => Faker::Address,
|
37
|
-
:uk_postcode => Faker::Address,
|
38
|
-
:us_state => Faker::Address,
|
39
|
-
:us_state_abbr => Faker::Address,
|
40
|
-
:zip_code => Faker::Address,
|
41
|
-
|
42
|
-
:domain_name => Faker::Internet,
|
43
|
-
:domain_suffix => Faker::Internet,
|
44
|
-
:domain_word => Faker::Internet,
|
45
|
-
:email => Faker::Internet,
|
46
|
-
:free_email => Faker::Internet,
|
47
|
-
:user_name => Faker::Internet,
|
48
|
-
|
49
|
-
:bs => Faker::Company,
|
50
|
-
:catch_phrase => Faker::Company,
|
51
|
-
:company_name => [Faker::Company, :name],
|
52
|
-
:company_suffix => [Faker::Company, :suffix],
|
53
|
-
|
54
|
-
:paragraph => Faker::Lorem,
|
55
|
-
:paragraphs => Faker::Lorem,
|
56
|
-
:sentence => Faker::Lorem,
|
57
|
-
:sentences => Faker::Lorem,
|
58
|
-
:words => Faker::Lorem,
|
59
|
-
}
|
60
9
|
|
61
10
|
# :call-seq: Cleaner.clean_value(attr, klass, instance=nil) -> clean_value
|
62
11
|
#
|
@@ -112,7 +61,7 @@ module DataCleaner
|
|
112
61
|
first
|
113
62
|
when Symbol
|
114
63
|
args.map! {|arg| if arg.is_a?(Proc) then arg.call(object) end || arg}
|
115
|
-
|
64
|
+
DataCleaner::Formats.helpers[first].call(*args)
|
116
65
|
when Array
|
117
66
|
first.map do |e|
|
118
67
|
e = [e] unless e.is_a?(Array)
|
@@ -123,10 +72,5 @@ module DataCleaner
|
|
123
72
|
end
|
124
73
|
end
|
125
74
|
|
126
|
-
def __data__(type, *args)
|
127
|
-
klass, method = DataCleaner::Cleaner::MAPPING[type]
|
128
|
-
klass.send(method || type, *args)
|
129
|
-
end
|
130
|
-
|
131
75
|
end
|
132
76
|
end
|
data/lib/data_cleaner/format.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module DataCleaner
|
1
|
+
module DataCleaner # :nodoc:
|
2
2
|
# DataCleaner::Format can be used to describe the format of an object's
|
3
3
|
# attributes.
|
4
4
|
#
|
@@ -6,7 +6,7 @@ module DataCleaner
|
|
6
6
|
# f = DataCleaner::Format.new("Person")
|
7
7
|
# f.name [:first_name, " ", :last_name]
|
8
8
|
#
|
9
|
-
# You most likely do not want to use this class directly,
|
9
|
+
# You most likely do not want to use this class directly, but instead through
|
10
10
|
# DataCleaner::Formats.
|
11
11
|
#
|
12
12
|
class Format < if defined? BasicObject then BasicObject else Object end
|
data/lib/data_cleaner/formats.rb
CHANGED
@@ -1,17 +1,4 @@
|
|
1
|
-
|
2
|
-
class Symbol
|
3
|
-
def to_proc
|
4
|
-
Proc.new(&method(:__apply__))
|
5
|
-
end
|
6
|
-
|
7
|
-
private
|
8
|
-
def __apply__(*args)
|
9
|
-
args.shift.send(self, *args)
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
module DataCleaner
|
1
|
+
module DataCleaner # :nodoc:
|
15
2
|
# DataCleaner::Formats provides a DSL for describing, and method for looking
|
16
3
|
# up the format of object's attributes, such that they can be replaced with
|
17
4
|
# fake data, but still pass validation.
|
@@ -33,8 +20,28 @@ module DataCleaner
|
|
33
20
|
# end
|
34
21
|
#
|
35
22
|
module Formats
|
36
|
-
class << self; attr_accessor :formats end
|
23
|
+
class << self; attr_accessor :formats, :helpers end
|
37
24
|
self.formats = {}
|
25
|
+
self.helpers = {}
|
26
|
+
|
27
|
+
# :call-seq: Formats.helper(name) {|*args| block } -> helper
|
28
|
+
#
|
29
|
+
# Define a format helper, which can then be used in a format block
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# module DataCleaner::Formats
|
33
|
+
# helper :ip_address do
|
34
|
+
# Array.new(4).map {rand(255)}.join(".")
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# format "Server" do |f|
|
38
|
+
# f.ip :ip_address
|
39
|
+
# end
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
def self.helper(name, &block)
|
43
|
+
helpers[name] = block
|
44
|
+
end
|
38
45
|
|
39
46
|
# :call-seq: Formats.format(klass) {|format| block } -> format
|
40
47
|
#
|
@@ -59,5 +66,61 @@ module DataCleaner
|
|
59
66
|
end
|
60
67
|
end
|
61
68
|
|
69
|
+
# Set up default helpers
|
70
|
+
#
|
71
|
+
# In the format
|
72
|
+
# :name => receiver
|
73
|
+
# the method name will be called on receiver
|
74
|
+
#
|
75
|
+
# whereas with
|
76
|
+
# :name => [receiver, :method]
|
77
|
+
# the method method will be called on receiver.
|
78
|
+
#
|
79
|
+
# :name is used when describing the format of your object's attributes
|
80
|
+
#
|
81
|
+
{
|
82
|
+
:name => Faker::Name,
|
83
|
+
:first_name => Faker::Name,
|
84
|
+
:last_name => Faker::Name,
|
85
|
+
:name_prefix => [Faker::Name, :prefix],
|
86
|
+
:name_suffix => [Faker::Name, :suffix],
|
87
|
+
|
88
|
+
:phone_number => Faker::PhoneNumber,
|
89
|
+
|
90
|
+
:city => Faker::Address,
|
91
|
+
:city_prefix => Faker::Address,
|
92
|
+
:city_suffix => Faker::Address,
|
93
|
+
:secondary_address => Faker::Address,
|
94
|
+
:street_address => Faker::Address,
|
95
|
+
:street_name => Faker::Address,
|
96
|
+
:street_suffix => Faker::Address,
|
97
|
+
:uk_country => Faker::Address,
|
98
|
+
:uk_county => Faker::Address,
|
99
|
+
:uk_postcode => Faker::Address,
|
100
|
+
:us_state => Faker::Address,
|
101
|
+
:us_state_abbr => Faker::Address,
|
102
|
+
:zip_code => Faker::Address,
|
103
|
+
|
104
|
+
:domain_name => Faker::Internet,
|
105
|
+
:domain_suffix => Faker::Internet,
|
106
|
+
:domain_word => Faker::Internet,
|
107
|
+
:email => Faker::Internet,
|
108
|
+
:free_email => Faker::Internet,
|
109
|
+
:user_name => Faker::Internet,
|
110
|
+
|
111
|
+
:bs => Faker::Company,
|
112
|
+
:catch_phrase => Faker::Company,
|
113
|
+
:company_name => [Faker::Company, :name],
|
114
|
+
:company_suffix => [Faker::Company, :suffix],
|
115
|
+
|
116
|
+
:paragraph => Faker::Lorem,
|
117
|
+
:paragraphs => Faker::Lorem,
|
118
|
+
:sentence => Faker::Lorem,
|
119
|
+
:sentences => Faker::Lorem,
|
120
|
+
:words => Faker::Lorem,
|
121
|
+
}.each do |name, (receiver, method)|
|
122
|
+
helper(name, &receiver.method(method || name))
|
123
|
+
end
|
124
|
+
|
62
125
|
end
|
63
126
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleaner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matthew Sadler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-02-
|
18
|
+
date: 2011-02-09 00:00:00 +00:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|