data_cleaner 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +125 -6
- data/lib/data_cleaner/cleaner.rb +2 -58
- data/lib/data_cleaner/format.rb +2 -2
- data/lib/data_cleaner/formats.rb +78 -15
- metadata +4 -4
data/README.txt
CHANGED
@@ -6,6 +6,9 @@ DataCleaner wants to make sure your data still looks real, and importantly, pass
|
|
6
6
|
|
7
7
|
Only data that need anonymising needs to be specified, foreign keys, non-customer-identifiable data should be left alone.
|
8
8
|
|
9
|
+
* rdoc[http://sourcetagsandcodes.com/data_cleaner/doc/]
|
10
|
+
* source[https://github.com/matsadler/data_cleaner]
|
11
|
+
|
9
12
|
== Installation
|
10
13
|
|
11
14
|
gem install data_cleaner
|
@@ -35,13 +38,17 @@ gem install data_cleaner
|
|
35
38
|
end
|
36
39
|
|
37
40
|
module DataCleaner::Formats
|
41
|
+
helper :embarrassing_secret do
|
42
|
+
"I like " + ["the colour pink", "programming PHP", "Judas Priest"].sample
|
43
|
+
end
|
44
|
+
|
38
45
|
format "TopSecret" do |f|
|
39
46
|
f.name [:first_name, " ", :last_name]
|
40
47
|
f.email :email, &:name # passes the objects name to the email method
|
41
|
-
f.reference do |
|
42
|
-
"#{
|
48
|
+
f.reference do |obj| # one off helper
|
49
|
+
"#{obj.name[0..2].downcase}#{obj.date.strftime("%y")}"
|
43
50
|
end
|
44
|
-
f.secret
|
51
|
+
f.secret :embarrassing_secret
|
45
52
|
end
|
46
53
|
end
|
47
54
|
|
@@ -58,11 +65,123 @@ gem install data_cleaner
|
|
58
65
|
prints:
|
59
66
|
|
60
67
|
#<TopSecret:0x1015f7830 @email="mat@foo.com", @date=Mon Jan 17 16:53:19 +0000 2011, @name="Matthew Sadler", @secret="I like kittens", @reference="mat09">
|
61
|
-
is valid? true
|
68
|
+
is valid? true
|
62
69
|
|
63
|
-
#<TopSecret:0x1015f7830 @email="javier.kuhlman@franeckikonopelski.co.uk", @date=Mon Jan 17 16:53:19 +0000 2011, @name="Javier Kuhlman", @secret="
|
64
|
-
is valid? true
|
70
|
+
#<TopSecret:0x1015f7830 @email="javier.kuhlman@franeckikonopelski.co.uk", @date=Mon Jan 17 16:53:19 +0000 2011, @name="Javier Kuhlman", @secret="the colour pink", @reference="jav11">
|
71
|
+
is valid? true
|
72
|
+
|
73
|
+
== Formats
|
74
|
+
There are various ways of specifying the format of an attribute.
|
75
|
+
|
76
|
+
=== Basic Symbol
|
77
|
+
|
78
|
+
format "TopSecret" do |f|
|
79
|
+
f.name :first_name
|
80
|
+
end
|
81
|
+
In this case the helper :first_name will be used to replace the name attribute
|
82
|
+
|
83
|
+
=== Symbol With Arguments
|
84
|
+
|
85
|
+
format "TopSecret" do |f|
|
86
|
+
f.email :email, "Arthur"
|
87
|
+
end
|
88
|
+
The helper :email will be used to replace the attribute, and be given the argument "Arthur"
|
89
|
+
|
90
|
+
=== Symbol With Block
|
91
|
+
|
92
|
+
format "TopSecret" do |f|
|
93
|
+
f.name :first_name
|
94
|
+
f.email(:email) {|obj| obj.name}
|
95
|
+
end
|
96
|
+
In this example the :email helper will be given the objects replacement name as an argument
|
97
|
+
|
98
|
+
format "TopSecret" do |f|
|
99
|
+
f.email(:email) {|obj| obj.name}
|
100
|
+
f.name :first_name
|
101
|
+
end
|
102
|
+
Here the :email helper will get the objects original name as an argument
|
103
|
+
|
104
|
+
=== String
|
105
|
+
|
106
|
+
format "TopSecret" do |f|
|
107
|
+
f.name "Arthur"
|
108
|
+
end
|
109
|
+
In this case the name will simply be replaced with the string specified
|
110
|
+
|
111
|
+
=== Array
|
112
|
+
|
113
|
+
format "TopSecret" do |f|
|
114
|
+
f.name [:first_name, " ", :last_name]
|
115
|
+
end
|
116
|
+
With an array the individual elements behave like those above, then they are concatenated together, in this example the results from the :first_name and :last_name helpers will be joined with the string " " between them
|
117
|
+
|
118
|
+
=== Nested Arrays
|
119
|
+
|
120
|
+
format "TopSecret" do |f|
|
121
|
+
f.emails [[:email, "Arthur"], ", ", [:email, "Ford"]]
|
122
|
+
end
|
123
|
+
In this example the :email helper will be called twice, once with the argument "Arthur", then again with the argument "Ford", and these will be joined by the string ", "
|
124
|
+
|
125
|
+
=== Block
|
126
|
+
|
127
|
+
format "TopSecret" do |f|
|
128
|
+
f.secret {|obj| rand(100)}
|
129
|
+
end
|
130
|
+
When using a block the attribute will be replaced by the result of the block.
|
131
|
+
|
132
|
+
== Built-in helpers
|
133
|
+
The built-in helpers use the faker gem to generate data, see the faker documentation for more details
|
134
|
+
|
135
|
+
:name
|
136
|
+
:first_name
|
137
|
+
:last_name
|
138
|
+
:name_prefix
|
139
|
+
:name_suffix
|
140
|
+
|
141
|
+
:phone_number
|
142
|
+
|
143
|
+
:city
|
144
|
+
:city_prefix
|
145
|
+
:city_suffix
|
146
|
+
:secondary_address
|
147
|
+
:street_address
|
148
|
+
:street_name
|
149
|
+
:street_suffix
|
150
|
+
:uk_country
|
151
|
+
:uk_county
|
152
|
+
:uk_postcode
|
153
|
+
:us_state
|
154
|
+
:us_state_abbr
|
155
|
+
:zip_code
|
156
|
+
|
157
|
+
:domain_name
|
158
|
+
:domain_suffix
|
159
|
+
:domain_word
|
160
|
+
:email
|
161
|
+
:free_email
|
162
|
+
:user_name
|
163
|
+
|
164
|
+
:bs
|
165
|
+
:catch_phrase
|
166
|
+
:company_name
|
167
|
+
:company_suffix
|
168
|
+
|
169
|
+
:paragraph
|
170
|
+
:paragraphs
|
171
|
+
:sentence
|
172
|
+
:sentences
|
173
|
+
:words
|
174
|
+
|
175
|
+
== Custom helpers
|
176
|
+
Custom helpers can be defined like
|
177
|
+
|
178
|
+
module DataCleaner::Formats
|
179
|
+
helper :embarrassing_secret do
|
180
|
+
"I like " + ["the colour pink", "programming PHP", "Judas Priest"].sample
|
181
|
+
end
|
182
|
+
end
|
65
183
|
|
184
|
+
This can also be used to redefine the built-in helpers.
|
66
185
|
|
67
186
|
== Licence
|
68
187
|
|
data/lib/data_cleaner/cleaner.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module DataCleaner
|
1
|
+
module DataCleaner # :nodoc:
|
2
2
|
# DataCleaner::Cleaner is a module which can either be mixed-in, or used
|
3
3
|
# standalone to anonymise the data held within objects.
|
4
4
|
#
|
@@ -6,57 +6,6 @@ module DataCleaner
|
|
6
6
|
# DataCleaner::Formats.
|
7
7
|
#
|
8
8
|
module Cleaner
|
9
|
-
# In the format
|
10
|
-
# :specifier => instance
|
11
|
-
# the method specifier will be called on instance
|
12
|
-
#
|
13
|
-
# whereas with
|
14
|
-
# :specifier => [instance, :method]
|
15
|
-
# the method method will be called on instance.
|
16
|
-
#
|
17
|
-
# :specifier is used when describing the format of your object's attributes
|
18
|
-
#
|
19
|
-
MAPPING = {
|
20
|
-
:name => Faker::Name,
|
21
|
-
:first_name => Faker::Name,
|
22
|
-
:last_name => Faker::Name,
|
23
|
-
:name_prefix => [Faker::Name, :prefix],
|
24
|
-
:name_suffix => [Faker::Name, :suffix],
|
25
|
-
|
26
|
-
:phone_number => Faker::PhoneNumber,
|
27
|
-
|
28
|
-
:city => Faker::Address,
|
29
|
-
:city_prefix => Faker::Address,
|
30
|
-
:city_suffix => Faker::Address,
|
31
|
-
:secondary_address => Faker::Address,
|
32
|
-
:street_address => Faker::Address,
|
33
|
-
:street_name => Faker::Address,
|
34
|
-
:street_suffix => Faker::Address,
|
35
|
-
:uk_country => Faker::Address,
|
36
|
-
:uk_county => Faker::Address,
|
37
|
-
:uk_postcode => Faker::Address,
|
38
|
-
:us_state => Faker::Address,
|
39
|
-
:us_state_abbr => Faker::Address,
|
40
|
-
:zip_code => Faker::Address,
|
41
|
-
|
42
|
-
:domain_name => Faker::Internet,
|
43
|
-
:domain_suffix => Faker::Internet,
|
44
|
-
:domain_word => Faker::Internet,
|
45
|
-
:email => Faker::Internet,
|
46
|
-
:free_email => Faker::Internet,
|
47
|
-
:user_name => Faker::Internet,
|
48
|
-
|
49
|
-
:bs => Faker::Company,
|
50
|
-
:catch_phrase => Faker::Company,
|
51
|
-
:company_name => [Faker::Company, :name],
|
52
|
-
:company_suffix => [Faker::Company, :suffix],
|
53
|
-
|
54
|
-
:paragraph => Faker::Lorem,
|
55
|
-
:paragraphs => Faker::Lorem,
|
56
|
-
:sentence => Faker::Lorem,
|
57
|
-
:sentences => Faker::Lorem,
|
58
|
-
:words => Faker::Lorem,
|
59
|
-
}
|
60
9
|
|
61
10
|
# :call-seq: Cleaner.clean_value(attr, klass, instance=nil) -> clean_value
|
62
11
|
#
|
@@ -112,7 +61,7 @@ module DataCleaner
|
|
112
61
|
first
|
113
62
|
when Symbol
|
114
63
|
args.map! {|arg| if arg.is_a?(Proc) then arg.call(object) end || arg}
|
115
|
-
|
64
|
+
DataCleaner::Formats.helpers[first].call(*args)
|
116
65
|
when Array
|
117
66
|
first.map do |e|
|
118
67
|
e = [e] unless e.is_a?(Array)
|
@@ -123,10 +72,5 @@ module DataCleaner
|
|
123
72
|
end
|
124
73
|
end
|
125
74
|
|
126
|
-
def __data__(type, *args)
|
127
|
-
klass, method = DataCleaner::Cleaner::MAPPING[type]
|
128
|
-
klass.send(method || type, *args)
|
129
|
-
end
|
130
|
-
|
131
75
|
end
|
132
76
|
end
|
data/lib/data_cleaner/format.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module DataCleaner
|
1
|
+
module DataCleaner # :nodoc:
|
2
2
|
# DataCleaner::Format can be used to describe the format of an object's
|
3
3
|
# attributes.
|
4
4
|
#
|
@@ -6,7 +6,7 @@ module DataCleaner
|
|
6
6
|
# f = DataCleaner::Format.new("Person")
|
7
7
|
# f.name [:first_name, " ", :last_name]
|
8
8
|
#
|
9
|
-
# You most likely do not want to use this class directly,
|
9
|
+
# You most likely do not want to use this class directly, but instead through
|
10
10
|
# DataCleaner::Formats.
|
11
11
|
#
|
12
12
|
class Format < if defined? BasicObject then BasicObject else Object end
|
data/lib/data_cleaner/formats.rb
CHANGED
@@ -1,17 +1,4 @@
|
|
1
|
-
|
2
|
-
class Symbol
|
3
|
-
def to_proc
|
4
|
-
Proc.new(&method(:__apply__))
|
5
|
-
end
|
6
|
-
|
7
|
-
private
|
8
|
-
def __apply__(*args)
|
9
|
-
args.shift.send(self, *args)
|
10
|
-
end
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
module DataCleaner
|
1
|
+
module DataCleaner # :nodoc:
|
15
2
|
# DataCleaner::Formats provides a DSL for describing, and method for looking
|
16
3
|
# up the format of object's attributes, such that they can be replaced with
|
17
4
|
# fake data, but still pass validation.
|
@@ -33,8 +20,28 @@ module DataCleaner
|
|
33
20
|
# end
|
34
21
|
#
|
35
22
|
module Formats
|
36
|
-
class << self; attr_accessor :formats end
|
23
|
+
class << self; attr_accessor :formats, :helpers end
|
37
24
|
self.formats = {}
|
25
|
+
self.helpers = {}
|
26
|
+
|
27
|
+
# :call-seq: Formats.helper(name) {|*args| block } -> helper
|
28
|
+
#
|
29
|
+
# Define a format helper, which can then be used in a format block
|
30
|
+
#
|
31
|
+
# Example:
|
32
|
+
# module DataCleaner::Formats
|
33
|
+
# helper :ip_address do
|
34
|
+
# Array.new(4).map {rand(255)}.join(".")
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# format "Server" do |f|
|
38
|
+
# f.ip :ip_address
|
39
|
+
# end
|
40
|
+
# end
|
41
|
+
#
|
42
|
+
def self.helper(name, &block)
|
43
|
+
helpers[name] = block
|
44
|
+
end
|
38
45
|
|
39
46
|
# :call-seq: Formats.format(klass) {|format| block } -> format
|
40
47
|
#
|
@@ -59,5 +66,61 @@ module DataCleaner
|
|
59
66
|
end
|
60
67
|
end
|
61
68
|
|
69
|
+
# Set up default helpers
|
70
|
+
#
|
71
|
+
# In the format
|
72
|
+
# :name => receiver
|
73
|
+
# the method name will be called on receiver
|
74
|
+
#
|
75
|
+
# whereas with
|
76
|
+
# :name => [receiver, :method]
|
77
|
+
# the method method will be called on receiver.
|
78
|
+
#
|
79
|
+
# :name is used when describing the format of your object's attributes
|
80
|
+
#
|
81
|
+
{
|
82
|
+
:name => Faker::Name,
|
83
|
+
:first_name => Faker::Name,
|
84
|
+
:last_name => Faker::Name,
|
85
|
+
:name_prefix => [Faker::Name, :prefix],
|
86
|
+
:name_suffix => [Faker::Name, :suffix],
|
87
|
+
|
88
|
+
:phone_number => Faker::PhoneNumber,
|
89
|
+
|
90
|
+
:city => Faker::Address,
|
91
|
+
:city_prefix => Faker::Address,
|
92
|
+
:city_suffix => Faker::Address,
|
93
|
+
:secondary_address => Faker::Address,
|
94
|
+
:street_address => Faker::Address,
|
95
|
+
:street_name => Faker::Address,
|
96
|
+
:street_suffix => Faker::Address,
|
97
|
+
:uk_country => Faker::Address,
|
98
|
+
:uk_county => Faker::Address,
|
99
|
+
:uk_postcode => Faker::Address,
|
100
|
+
:us_state => Faker::Address,
|
101
|
+
:us_state_abbr => Faker::Address,
|
102
|
+
:zip_code => Faker::Address,
|
103
|
+
|
104
|
+
:domain_name => Faker::Internet,
|
105
|
+
:domain_suffix => Faker::Internet,
|
106
|
+
:domain_word => Faker::Internet,
|
107
|
+
:email => Faker::Internet,
|
108
|
+
:free_email => Faker::Internet,
|
109
|
+
:user_name => Faker::Internet,
|
110
|
+
|
111
|
+
:bs => Faker::Company,
|
112
|
+
:catch_phrase => Faker::Company,
|
113
|
+
:company_name => [Faker::Company, :name],
|
114
|
+
:company_suffix => [Faker::Company, :suffix],
|
115
|
+
|
116
|
+
:paragraph => Faker::Lorem,
|
117
|
+
:paragraphs => Faker::Lorem,
|
118
|
+
:sentence => Faker::Lorem,
|
119
|
+
:sentences => Faker::Lorem,
|
120
|
+
:words => Faker::Lorem,
|
121
|
+
}.each do |name, (receiver, method)|
|
122
|
+
helper(name, &receiver.method(method || name))
|
123
|
+
end
|
124
|
+
|
62
125
|
end
|
63
126
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_cleaner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matthew Sadler
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-02-
|
18
|
+
date: 2011-02-09 00:00:00 +00:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|