data_cleaner 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.txt ADDED
@@ -0,0 +1,77 @@
1
+ = DataCleaner
2
+
3
+ DataCleaner is a library to aid you in anonymising your data, for example when attempting to reproduce a bug in development, and only live data will do, but it's not safe to have a whole database of customer names, email, etc on your development machine.
4
+
5
+ DataCleaner wants to make sure your data still looks real, and importantly, passes any validation your code might have. To achieve this it provides a DSL for you to specify the format of the data, along with helpers (using the faker gem) to generate common data.
6
+
7
+ Only data that need anonymising needs to be specified, foreign keys, non-customer-identifiable data should be left alone.
8
+
9
+ == Installation
10
+
11
+ gem install data_cleaner
12
+
13
+ == Usage
14
+
15
+ require 'rubygems'
16
+ require 'data_cleaner'
17
+
18
+ class TopSecret
19
+ attr_accessor :name, :email, :reference, :secret, :date
20
+
21
+ def initialize(name, email, reference, secret, date)
22
+ @name = name
23
+ @email = email
24
+ @reference = reference
25
+ @secret = secret
26
+ @date = date
27
+ end
28
+
29
+ def valid?
30
+ name.match(/^[a-z]+ [a-z]+$/i) &&
31
+ reference.match(/^[a-z]{3}[0-9]{1,5}$/) &&
32
+ date.is_a?(Time) || false
33
+ end
34
+
35
+ end
36
+
37
+ module DataCleaner::Formats
38
+ format "TopSecret" do |f|
39
+ f.name [:first_name, " ", :last_name]
40
+ f.email :email, &:name # passes the objects name to the email method
41
+ f.reference do |secret|
42
+ "#{secret.name[0..2].downcase}#{secret.date.strftime("%y")}"
43
+ end
44
+ f.secret "test"
45
+ end
46
+ end
47
+
48
+ secret = TopSecret.new("Matthew Sadler", "mat@foo.com", "mat09", "I like kittens", Time.now)
49
+ puts secret.inspect
50
+ puts "is valid? #{secret.valid?}"
51
+ puts
52
+
53
+ clean = DataCleaner::Cleaner.clean!(secret)
54
+
55
+ puts clean.inspect
56
+ puts "is valid? #{clean.valid?}"
57
+
58
+ prints:
59
+
60
+ #<TopSecret:0x1015f7830 @email="mat@foo.com", @date=Mon Jan 17 16:53:19 +0000 2011, @name="Matthew Sadler", @secret="I like kittens", @reference="mat09">
61
+ is valid? true
62
+
63
+ #<TopSecret:0x1015f7830 @email="javier.kuhlman@franeckikonopelski.co.uk", @date=Mon Jan 17 16:53:19 +0000 2011, @name="Javier Kuhlman", @secret="test", @reference="jav11">
64
+ is valid? true
65
+
66
+
67
+ == Licence
68
+
69
+ (The MIT License)
70
+
71
+ Copyright © 2011 Matthew Sadler
72
+
73
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
74
+
75
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
76
+
77
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,121 @@
1
+ module DataCleaner
2
+ # DataCleaner::Cleaner is a module which can either be mixed-in, or used
3
+ # standalone to anonymise the data held within objects.
4
+ #
5
+ # DataCleaner::Cleaner relies on the object formats specified with
6
+ # DataCleaner::Formats.
7
+ #
8
+ module Cleaner
9
+ # In the format
10
+ # :specifier => instance
11
+ # the method specifier will be called on instance
12
+ #
13
+ # whereas with
14
+ # :specifier => [instance, :method]
15
+ # the method method will be called on instance.
16
+ #
17
+ # :specifier is used when describing the format of your object's attributes
18
+ #
19
+ MAPPING = {
20
+ :name => Faker::Name,
21
+ :first_name => Faker::Name,
22
+ :last_name => Faker::Name,
23
+ :name_prefix => [Faker::Name, :prefix],
24
+ :name_suffix => [Faker::Name, :suffix],
25
+
26
+ :phone_number => Faker::PhoneNumber,
27
+
28
+ :city => Faker::Address,
29
+ :city_prefix => Faker::Address,
30
+ :city_suffix => Faker::Address,
31
+ :secondary_address => Faker::Address,
32
+ :street_address => Faker::Address,
33
+ :street_name => Faker::Address,
34
+ :street_suffix => Faker::Address,
35
+ :uk_country => Faker::Address,
36
+ :uk_county => Faker::Address,
37
+ :uk_postcode => Faker::Address,
38
+ :us_state => Faker::Address,
39
+ :us_state_abbr => Faker::Address,
40
+ :zip_code => Faker::Address,
41
+
42
+ :domain_name => Faker::Internet,
43
+ :domain_suffix => Faker::Internet,
44
+ :domain_word => Faker::Internet,
45
+ :email => Faker::Internet,
46
+ :free_email => Faker::Internet,
47
+ :user_name => Faker::Internet,
48
+
49
+ :bs => Faker::Company,
50
+ :catch_phrase => Faker::Company,
51
+ :company_name => [Faker::Company, :name],
52
+ :company_suffix => [Faker::Company, :suffix],
53
+
54
+ :paragraph => Faker::Lorem,
55
+ :paragraphs => Faker::Lorem,
56
+ :sentence => Faker::Lorem,
57
+ :sentences => Faker::Lorem,
58
+ :words => Faker::Lorem,
59
+ }
60
+
61
+ extend self
62
+
63
+ # :call-seq: Cleaner.clean(obj) -> new_obj
64
+ # obj.clean -> new_obj
65
+ #
66
+ # Returns an anonymised copy of obj.
67
+ #
68
+ # Relies on obj.dup.
69
+ #
70
+ def __clean__(object=self)
71
+ __clean__!(object.dup)
72
+ end
73
+ unless defined? clean
74
+ alias clean __clean__
75
+ end
76
+
77
+ # :call-seq: Cleaner.clean!(obj) -> obj
78
+ # obj.clean! -> obj
79
+ #
80
+ # Anonymises obj.
81
+ #
82
+ def __clean__!(object=self)
83
+ format = DataCleaner::Formats.formats[object.class.name]
84
+
85
+ format.attributes.each do |attribute, arguments|
86
+ object.send(:"#{attribute}=", __replacement__(arguments, object))
87
+ end
88
+ object
89
+ end
90
+ unless defined? clean!
91
+ alias clean! __clean__!
92
+ end
93
+
94
+ private
95
+ def __replacement__(args, object)
96
+ args = args.dup
97
+ first = args.shift
98
+
99
+ case first
100
+ when String
101
+ first
102
+ when Symbol
103
+ args.map! {|arg| if arg.is_a?(Proc) then arg.call(object) end || arg}
104
+ __data__(first, *args)
105
+ when Array
106
+ first.map do |e|
107
+ e = [e] unless e.is_a?(Array)
108
+ __replacement__(e, object)
109
+ end.join
110
+ when Proc
111
+ first.call(object)
112
+ end
113
+ end
114
+
115
+ def __data__(type, *args)
116
+ klass, method = DataCleaner::Cleaner::MAPPING[type]
117
+ klass.send(method || type, *args)
118
+ end
119
+
120
+ end
121
+ end
@@ -0,0 +1,32 @@
1
+ module DataCleaner
2
+ # DataCleaner::Format can be used to describe the format of an object's
3
+ # attributes.
4
+ #
5
+ # Example:
6
+ # f = DataCleaner::Format.new("Person")
7
+ # f.name [:first_name, " ", :last_name]
8
+ #
9
+ # You most likely do not want to use this class directly, bu instead though
10
+ # DataCleaner::Formats.
11
+ #
12
+ class Format < if defined? BasicObject then BasicObject else Object end
13
+ attr_accessor :klass, :attributes
14
+
15
+ # :call-seq: Format.new(klass) -> format
16
+ #
17
+ def initialize(klass)
18
+ @klass = klass
19
+ @attributes = []
20
+ end
21
+
22
+ # :call-seq: format.attribute(:attr [, args...]) {|obj| block } -> format
23
+ #
24
+ def attribute(name, *args, &block)
25
+ args.push(block) if block
26
+ attributes.push([name, args])
27
+ self
28
+ end
29
+ alias method_missing attribute
30
+
31
+ end
32
+ end
@@ -0,0 +1,50 @@
1
+ unless :Symbol.respond_to?(:to_proc)
2
+ class Symbol
3
+ def to_proc
4
+ Proc.new(&method(:__apply__))
5
+ end
6
+
7
+ private
8
+ def __apply__(obj, *args)
9
+ obj.send(self, *args)
10
+ end
11
+ end
12
+ end
13
+
14
+ module DataCleaner
15
+ # DataCleaner::Formats provides a DSL for describing, and method for looking
16
+ # up the format of object's attributes, such that they can be replaced with
17
+ # fake data, but still pass validation.
18
+ #
19
+ # Not all attributes need be specified, only those that need be replaced.
20
+ #
21
+ # Attributes will be processed in the order they are specified.
22
+ #
23
+ # Example:
24
+ # module DataCleaner::Formats
25
+ # format "Person" do |f|
26
+ # f.name [:first_name, " ", :last_name]
27
+ # f.email :email, &:name # passes the name to the generate email method
28
+ # # custom format, block is provided with the instance
29
+ # f.reference do |instance|
30
+ # "#{instance.name[0..2].downcase}#{rand(89) + 10}"
31
+ # end
32
+ # end
33
+ # end
34
+ #
35
+ module Formats
36
+ class << self; attr_accessor :formats end
37
+ self.formats = {}
38
+
39
+ # :call-seq: format(klass) {|format| block } -> format
40
+ #
41
+ # Yields a format object, which can be used to describe the format of klass.
42
+ #
43
+ def self.format(klass)
44
+ obj = Format.new(klass)
45
+ yield obj
46
+ formats[klass.to_s] = obj
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,6 @@
1
+ require 'rubygems'
2
+ require 'faker'
3
+
4
+ require "#{File.dirname(__FILE__)}/data_cleaner/cleaner"
5
+ require "#{File.dirname(__FILE__)}/data_cleaner/format"
6
+ require "#{File.dirname(__FILE__)}/data_cleaner/formats"
metadata ADDED
@@ -0,0 +1,88 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data_cleaner
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Matthew Sadler
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-01-17 00:00:00 +00:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: faker
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ hash: 13
30
+ segments:
31
+ - 0
32
+ - 3
33
+ version: "0.3"
34
+ type: :runtime
35
+ version_requirements: *id001
36
+ description: A Ruby library to aid in removing sensitive data from objects
37
+ email: mat@sourcetagsandcodes.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files:
43
+ - README.txt
44
+ files:
45
+ - lib/data_cleaner/cleaner.rb
46
+ - lib/data_cleaner/format.rb
47
+ - lib/data_cleaner/formats.rb
48
+ - lib/data_cleaner.rb
49
+ - README.txt
50
+ has_rdoc: true
51
+ homepage: http://github.com/matsadler/data_cleaner
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options:
56
+ - --main
57
+ - README.txt
58
+ - --charset
59
+ - utf-8
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ hash: 3
68
+ segments:
69
+ - 0
70
+ version: "0"
71
+ required_rubygems_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ">="
75
+ - !ruby/object:Gem::Version
76
+ hash: 3
77
+ segments:
78
+ - 0
79
+ version: "0"
80
+ requirements: []
81
+
82
+ rubyforge_project:
83
+ rubygems_version: 1.3.7
84
+ signing_key:
85
+ specification_version: 3
86
+ summary: A Ruby library to aid in removing sensitive data from objects
87
+ test_files: []
88
+