data-anonymization 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. data/.documentup.json +8 -0
  2. data/.gitignore +20 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +6 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +256 -0
  9. data/Rakefile +9 -0
  10. data/blacklist_dsl.rb +19 -0
  11. data/blacklist_nosql_dsl.rb +36 -0
  12. data/data-anonymization.gemspec +22 -0
  13. data/lib/core/database.rb +36 -0
  14. data/lib/core/dsl.rb +16 -0
  15. data/lib/core/field.rb +18 -0
  16. data/lib/data-anonymization.rb +12 -0
  17. data/lib/strategy/base.rb +67 -0
  18. data/lib/strategy/blacklist.rb +18 -0
  19. data/lib/strategy/field/anonymize_time.rb +57 -0
  20. data/lib/strategy/field/anonymous.rb +21 -0
  21. data/lib/strategy/field/date_time_delta.rb +24 -0
  22. data/lib/strategy/field/default_anon.rb +28 -0
  23. data/lib/strategy/field/distinct_column_values.rb +25 -0
  24. data/lib/strategy/field/fields.rb +23 -0
  25. data/lib/strategy/field/gmail_template.rb +17 -0
  26. data/lib/strategy/field/lorem_ipsum.rb +29 -0
  27. data/lib/strategy/field/random_boolean.rb +19 -0
  28. data/lib/strategy/field/random_email.rb +31 -0
  29. data/lib/strategy/field/random_first_name.rb +18 -0
  30. data/lib/strategy/field/random_float_delta.rb +24 -0
  31. data/lib/strategy/field/random_full_name.rb +28 -0
  32. data/lib/strategy/field/random_int.rb +23 -0
  33. data/lib/strategy/field/random_integer_delta.rb +21 -0
  34. data/lib/strategy/field/random_last_name.rb +19 -0
  35. data/lib/strategy/field/random_mailinator_email.rb +20 -0
  36. data/lib/strategy/field/random_phone_number.rb +24 -0
  37. data/lib/strategy/field/random_selection.rb +23 -0
  38. data/lib/strategy/field/random_string.rb +22 -0
  39. data/lib/strategy/field/random_user_name.rb +23 -0
  40. data/lib/strategy/field/string_template.rb +22 -0
  41. data/lib/strategy/field/user_name_template.rb +22 -0
  42. data/lib/strategy/field/whitelist.rb +17 -0
  43. data/lib/strategy/strategies.rb +4 -0
  44. data/lib/strategy/whitelist.rb +21 -0
  45. data/lib/tasks/rake_tasks.rb +19 -0
  46. data/lib/utils/database.rb +53 -0
  47. data/lib/utils/logging.rb +29 -0
  48. data/lib/utils/random_int.rb +15 -0
  49. data/lib/utils/random_string.rb +14 -0
  50. data/lib/utils/resource.rb +13 -0
  51. data/lib/version.rb +3 -0
  52. data/resources/first_names.txt +500 -0
  53. data/resources/last_names.txt +500 -0
  54. data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
  55. data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
  56. data/spec/spec_helper.rb +26 -0
  57. data/spec/strategy/field/anonymize_time_spec.rb +23 -0
  58. data/spec/strategy/field/date_time_delta_spec.rb +43 -0
  59. data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
  60. data/spec/strategy/field/gmail_template_spec.rb +14 -0
  61. data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
  62. data/spec/strategy/field/random_boolean_spec.rb +16 -0
  63. data/spec/strategy/field/random_email_spec.rb +18 -0
  64. data/spec/strategy/field/random_first_name_spec.rb +14 -0
  65. data/spec/strategy/field/random_float_delta_spec.rb +21 -0
  66. data/spec/strategy/field/random_full_name_spec.rb +23 -0
  67. data/spec/strategy/field/random_int_spec.rb +28 -0
  68. data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
  69. data/spec/strategy/field/random_last_name_spec.rb +14 -0
  70. data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
  71. data/spec/strategy/field/random_phone_number_spec.rb +35 -0
  72. data/spec/strategy/field/random_selection_spec.rb +36 -0
  73. data/spec/strategy/field/random_string_spec.rb +23 -0
  74. data/spec/strategy/field/random_user_name_spec.rb +23 -0
  75. data/spec/strategy/field/string_template_spec.rb +15 -0
  76. data/spec/strategy/field/user_name_template_spec.rb +13 -0
  77. data/spec/strategy/field/whitelist_spec.rb +21 -0
  78. data/spec/support/customer_sample.rb +43 -0
  79. data/spec/utils/database_spec.rb +26 -0
  80. data/spec/utils/random_int_spec.rb +9 -0
  81. data/spec/utils/random_string_spec.rb +8 -0
  82. data/whitelist_dsl.rb +44 -0
  83. metadata +192 -0
@@ -0,0 +1,36 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomSelection do
4
+
5
+ RandomSelection = DataAnon::Strategy::Field::RandomSelection
6
+ let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7
+
8
+ describe 'more than one values in predefined list' do
9
+
10
+ let(:states) { ['Maharashtra','Gujrat','Karnataka'] }
11
+ let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
12
+
13
+ it { states.should include(anonymized_value) }
14
+
15
+ end
16
+
17
+ describe 'only one value in list' do
18
+
19
+ let(:states) { ['Maharashtra'] }
20
+ let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
21
+
22
+ it { anonymized_value.should == 'Maharashtra' }
23
+
24
+ end
25
+
26
+ describe 'string value' do
27
+
28
+ let(:states) { 'Maharashtra' }
29
+ let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
30
+
31
+ it { anonymized_value.should == 'Maharashtra' }
32
+
33
+ end
34
+
35
+
36
+ end
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomString do
4
+
5
+ RandomString = DataAnon::Strategy::Field::RandomString
6
+
7
+ describe 'anonymized string must not be the same as original string' do
8
+ let(:field) {DataAnon::Core::Field.new('string_field','fakeString',1,nil)}
9
+ let(:anonymized_string) {RandomString.new.anonymize(field)}
10
+
11
+ it {anonymized_string.should_not equal field.value}
12
+ it {anonymized_string.length.should equal field.value.length}
13
+ end
14
+
15
+ describe 'anonymized name should have same number of words as original' do
16
+
17
+ let(:field) {DataAnon::Core::Field.new('string_field','Fake Longer String Test',1,nil)}
18
+ let(:anonymized_string) {RandomString.new.anonymize(field)}
19
+
20
+ it {anonymized_string.split(' ').size.should equal field.value.split(' ').size}
21
+
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomUserName do
4
+
5
+ RandomUserName = DataAnon::Strategy::Field::RandomUserName
6
+ let(:field) {DataAnon::Core::Field.new('username','fakeUserName',1,nil)}
7
+
8
+ describe 'anonymized user name should not be the same as original user name' do
9
+ let(:anonymized_username) {RandomUserName.new.anonymize(field)}
10
+
11
+ it {anonymized_username.should_not equal field.value}
12
+ it {anonymized_username.length.should be_between(5,10)}
13
+ it {anonymized_username.should match '^[a-zA-Z0-9]*$'}
14
+
15
+ end
16
+
17
+ describe 'different length for username' do
18
+ let(:anonymized_username) {RandomUserName.new(15,20).anonymize(field)}
19
+
20
+ it {anonymized_username.length.should be_between(15,20)}
21
+ end
22
+
23
+ end
@@ -0,0 +1,15 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::StringTemplate do
4
+
5
+ StringTemplate = DataAnon::Strategy::Field::StringTemplate
6
+ let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 3456, nil) }
7
+
8
+ describe 'should return same string value as StringTemplate' do
9
+ let(:anonymized_value) { StringTemplate.new('Sunit #{row_number} Parekh').anonymize(field) }
10
+ it { anonymized_value.should == 'Sunit 3456 Parekh' }
11
+ end
12
+
13
+
14
+
15
+ end
@@ -0,0 +1,13 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::UserNameTemplate do
4
+
5
+ UserNameTemplate = DataAnon::Strategy::Field::UserNameTemplate
6
+ let(:field) { DataAnon::Core::Field.new('username', 'Chuck Norris', 100, nil) }
7
+
8
+ describe 'should return same string value as StringTemplate' do
9
+ let(:anonymized_username) { UserNameTemplate.new('Rajnikanth #{row_number}').anonymize(field) }
10
+ it { anonymized_username.should == 'Rajnikanth 100' }
11
+ end
12
+
13
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::Whitelist do
4
+
5
+ Whitelist = DataAnon::Strategy::Field::Whitelist
6
+
7
+ describe 'should return same string value as whitelist' do
8
+ let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
9
+ let(:anonymized_value) { Whitelist.new.anonymize(field) }
10
+ it { anonymized_value.should == 'New Delhi' }
11
+ end
12
+
13
+ describe 'should return same date value as whitelist' do
14
+ let(:dob) { Time.now }
15
+ let(:field) { DataAnon::Core::Field.new('DateOfBirth', dob, 1, nil) }
16
+ let(:anonymized_value) { Whitelist.new.anonymize(field) }
17
+ it { anonymized_value.should == dob }
18
+ end
19
+
20
+
21
+ end
@@ -0,0 +1,43 @@
1
+ class CustomerSample
2
+
3
+ class CreateCustomer < ActiveRecord::Migration
4
+ def up
5
+ create_table :customers, { :id => false } do |t|
6
+ t.integer :cust_id, :primary => true
7
+ t.string :first_name
8
+ t.string :last_name
9
+ t.date :birth_date
10
+ t.string :address
11
+ t.string :state
12
+ t.string :zipcode
13
+ t.string :phone
14
+ t.string :email
15
+ end
16
+ end
17
+ end
18
+
19
+ def self.clean
20
+ system "rm -f tmp/*.sqlite"
21
+ system "mkdir -p tmp"
22
+ end
23
+
24
+ def self.create_schema connection_spec
25
+ ActiveRecord::Migration.verbose = false
26
+ ActiveRecord::Base.establish_connection connection_spec
27
+ CreateCustomer.migrate :up
28
+ end
29
+
30
+ SAMPLE_DATA = {:cust_id => 100, :first_name => "Sunit", :last_name => "Parekh",
31
+ :birth_date => Date.new(1977,7,8), :address => "F 501 Shanti Nagar",
32
+ :state => "Maharastra", :zipcode => "411048", :phone => "9923700662",
33
+ :email => "parekh.sunit@gmail.com"}
34
+
35
+ def self.insert_record connection_spec, data_hash = SAMPLE_DATA
36
+ DataAnon::Utils::TempDatabase.establish_connection connection_spec
37
+ source = DataAnon::Utils::BaseTable.create_table 'customers', 'cust_id',DataAnon::Utils::TempDatabase
38
+ cust = source.new data_hash
39
+ cust.cust_id = data_hash[:cust_id]
40
+ cust.save!
41
+ end
42
+
43
+ end
@@ -0,0 +1,26 @@
1
+ require "spec_helper"
2
+
3
+ describe "Utils" do
4
+
5
+ before(:each) do
6
+ source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'}
7
+ DataAnon::Utils::SourceDatabase.establish_connection source
8
+
9
+ destination = {:adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'}
10
+ DataAnon::Utils::DestinationDatabase.establish_connection destination
11
+ end
12
+
13
+ it "should test the connection to source database" do
14
+ album = DataAnon::Utils::SourceTable.create "Album", "AlbumId"
15
+ album.count.should > 0
16
+ album.all.length > 0
17
+ end
18
+
19
+ it "should test the connection to destination database" do
20
+ album = DataAnon::Utils::DestinationTable.create "Album", "AlbumId"
21
+ album.count.should == 0
22
+ album.all.length == 0
23
+
24
+ end
25
+
26
+ end
@@ -0,0 +1,9 @@
1
+ require "spec_helper"
2
+
3
+ describe "Number Utils" do
4
+
5
+ it "should generate random int between provided range" do
6
+ random_int = DataAnon::Utils::RandomInt.generate(5,10)
7
+ random_int.should be_between(5,10)
8
+ end
9
+ end
@@ -0,0 +1,8 @@
1
+ require "spec_helper"
2
+
3
+ describe "String Utils" do
4
+
5
+ it "should generate random string of given length" do
6
+ DataAnon::Utils::RandomString.generate(10).length.should equal 10
7
+ end
8
+ end
@@ -0,0 +1,44 @@
1
+ system "rake empty_dest" # clean destination database on every call
2
+
3
+ require 'data-anonymization'
4
+
5
+ FS = DataAnon::Strategy::Field
6
+
7
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
8
+
9
+ database 'Chinook' do
10
+ strategy DataAnon::Strategy::Whitelist
11
+ source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
12
+ destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
13
+
14
+ default_field_strategies :string => FS::StringTemplate.new('Sunit #{row_number} Parekh')
15
+
16
+ table 'Genre' do
17
+ primary_key 'GenreId'
18
+ whitelist 'GenreId'
19
+ anonymize 'Name' do |field|
20
+ field.value + " test"
21
+ end
22
+ end
23
+
24
+ table 'MediaType' do
25
+ primary_key 'MediaTypeId'
26
+ anonymize('MediaTypeId') { |field| field.value } # same as whitelist
27
+ anonymize('Name').using FS::StringTemplate.new('Media Type #{row_number}')
28
+
29
+ end
30
+
31
+ table 'Employee' do
32
+ primary_key 'EmployeeId'
33
+ whitelist 'EmployeeId'
34
+ anonymize('BirthDate').using FS::DateTimeDelta.new(1,1)
35
+ end
36
+
37
+ table 'Customer' do
38
+ primary_key 'CustomerId'
39
+ anonymize('Phone').using FS::RandomPhoneNumber.new
40
+ end
41
+
42
+
43
+ end
44
+
metadata ADDED
@@ -0,0 +1,192 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data-anonymization
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sunit Parekh
9
+ - Anand Agrawal
10
+ - Satyam Agarwala
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+ date: 2012-08-14 00:00:00.000000000 Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: activerecord
18
+ requirement: !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ~>
22
+ - !ruby/object:Gem::Version
23
+ version: 3.2.8
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ~>
30
+ - !ruby/object:Gem::Version
31
+ version: 3.2.8
32
+ - !ruby/object:Gem::Dependency
33
+ name: activesupport
34
+ requirement: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: 3.2.8
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 3.2.8
48
+ description: Data anonymization tool for RDBMS databases
49
+ email:
50
+ - parekh.sunit@gmail.com
51
+ - anand.agrawal84@gmail.com
52
+ - satyamag@gmail.com
53
+ executables: []
54
+ extensions: []
55
+ extra_rdoc_files: []
56
+ files:
57
+ - .documentup.json
58
+ - .gitignore
59
+ - .rspec
60
+ - .rvmrc
61
+ - .travis.yml
62
+ - Gemfile
63
+ - LICENSE.txt
64
+ - README.md
65
+ - Rakefile
66
+ - blacklist_dsl.rb
67
+ - blacklist_nosql_dsl.rb
68
+ - data-anonymization.gemspec
69
+ - lib/core/database.rb
70
+ - lib/core/dsl.rb
71
+ - lib/core/field.rb
72
+ - lib/data-anonymization.rb
73
+ - lib/strategy/base.rb
74
+ - lib/strategy/blacklist.rb
75
+ - lib/strategy/field/anonymize_time.rb
76
+ - lib/strategy/field/anonymous.rb
77
+ - lib/strategy/field/date_time_delta.rb
78
+ - lib/strategy/field/default_anon.rb
79
+ - lib/strategy/field/distinct_column_values.rb
80
+ - lib/strategy/field/fields.rb
81
+ - lib/strategy/field/gmail_template.rb
82
+ - lib/strategy/field/lorem_ipsum.rb
83
+ - lib/strategy/field/random_boolean.rb
84
+ - lib/strategy/field/random_email.rb
85
+ - lib/strategy/field/random_first_name.rb
86
+ - lib/strategy/field/random_float_delta.rb
87
+ - lib/strategy/field/random_full_name.rb
88
+ - lib/strategy/field/random_int.rb
89
+ - lib/strategy/field/random_integer_delta.rb
90
+ - lib/strategy/field/random_last_name.rb
91
+ - lib/strategy/field/random_mailinator_email.rb
92
+ - lib/strategy/field/random_phone_number.rb
93
+ - lib/strategy/field/random_selection.rb
94
+ - lib/strategy/field/random_string.rb
95
+ - lib/strategy/field/random_user_name.rb
96
+ - lib/strategy/field/string_template.rb
97
+ - lib/strategy/field/user_name_template.rb
98
+ - lib/strategy/field/whitelist.rb
99
+ - lib/strategy/strategies.rb
100
+ - lib/strategy/whitelist.rb
101
+ - lib/tasks/rake_tasks.rb
102
+ - lib/utils/database.rb
103
+ - lib/utils/logging.rb
104
+ - lib/utils/random_int.rb
105
+ - lib/utils/random_string.rb
106
+ - lib/utils/resource.rb
107
+ - lib/version.rb
108
+ - resources/first_names.txt
109
+ - resources/last_names.txt
110
+ - spec/acceptance/rdbms_blacklist_spec.rb
111
+ - spec/acceptance/rdbms_whitelist_spec.rb
112
+ - spec/spec_helper.rb
113
+ - spec/strategy/field/anonymize_time_spec.rb
114
+ - spec/strategy/field/date_time_delta_spec.rb
115
+ - spec/strategy/field/distinct_column_values_spec.rb
116
+ - spec/strategy/field/gmail_template_spec.rb
117
+ - spec/strategy/field/lorem_ipsum_spec.rb
118
+ - spec/strategy/field/random_boolean_spec.rb
119
+ - spec/strategy/field/random_email_spec.rb
120
+ - spec/strategy/field/random_first_name_spec.rb
121
+ - spec/strategy/field/random_float_delta_spec.rb
122
+ - spec/strategy/field/random_full_name_spec.rb
123
+ - spec/strategy/field/random_int_spec.rb
124
+ - spec/strategy/field/random_integer_delta_spec.rb
125
+ - spec/strategy/field/random_last_name_spec.rb
126
+ - spec/strategy/field/random_mailinator_email_spec.rb
127
+ - spec/strategy/field/random_phone_number_spec.rb
128
+ - spec/strategy/field/random_selection_spec.rb
129
+ - spec/strategy/field/random_string_spec.rb
130
+ - spec/strategy/field/random_user_name_spec.rb
131
+ - spec/strategy/field/string_template_spec.rb
132
+ - spec/strategy/field/user_name_template_spec.rb
133
+ - spec/strategy/field/whitelist_spec.rb
134
+ - spec/support/customer_sample.rb
135
+ - spec/utils/database_spec.rb
136
+ - spec/utils/random_int_spec.rb
137
+ - spec/utils/random_string_spec.rb
138
+ - whitelist_dsl.rb
139
+ homepage: http://sunitparekh.github.com/data-anonymization
140
+ licenses: []
141
+ post_install_message:
142
+ rdoc_options: []
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ none: false
147
+ requirements:
148
+ - - ! '>='
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ required_rubygems_version: !ruby/object:Gem::Requirement
152
+ none: false
153
+ requirements:
154
+ - - ! '>='
155
+ - !ruby/object:Gem::Version
156
+ version: '0'
157
+ requirements: []
158
+ rubyforge_project:
159
+ rubygems_version: 1.8.24
160
+ signing_key:
161
+ specification_version: 3
162
+ summary: Tool to create anonymized production data dump to use for PREF and other
163
+ TEST environments.
164
+ test_files:
165
+ - spec/acceptance/rdbms_blacklist_spec.rb
166
+ - spec/acceptance/rdbms_whitelist_spec.rb
167
+ - spec/spec_helper.rb
168
+ - spec/strategy/field/anonymize_time_spec.rb
169
+ - spec/strategy/field/date_time_delta_spec.rb
170
+ - spec/strategy/field/distinct_column_values_spec.rb
171
+ - spec/strategy/field/gmail_template_spec.rb
172
+ - spec/strategy/field/lorem_ipsum_spec.rb
173
+ - spec/strategy/field/random_boolean_spec.rb
174
+ - spec/strategy/field/random_email_spec.rb
175
+ - spec/strategy/field/random_first_name_spec.rb
176
+ - spec/strategy/field/random_float_delta_spec.rb
177
+ - spec/strategy/field/random_full_name_spec.rb
178
+ - spec/strategy/field/random_int_spec.rb
179
+ - spec/strategy/field/random_integer_delta_spec.rb
180
+ - spec/strategy/field/random_last_name_spec.rb
181
+ - spec/strategy/field/random_mailinator_email_spec.rb
182
+ - spec/strategy/field/random_phone_number_spec.rb
183
+ - spec/strategy/field/random_selection_spec.rb
184
+ - spec/strategy/field/random_string_spec.rb
185
+ - spec/strategy/field/random_user_name_spec.rb
186
+ - spec/strategy/field/string_template_spec.rb
187
+ - spec/strategy/field/user_name_template_spec.rb
188
+ - spec/strategy/field/whitelist_spec.rb
189
+ - spec/support/customer_sample.rb
190
+ - spec/utils/database_spec.rb
191
+ - spec/utils/random_int_spec.rb
192
+ - spec/utils/random_string_spec.rb