data-anonymization 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/.documentup.json +8 -0
  2. data/.gitignore +20 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +6 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +256 -0
  9. data/Rakefile +9 -0
  10. data/blacklist_dsl.rb +19 -0
  11. data/blacklist_nosql_dsl.rb +36 -0
  12. data/data-anonymization.gemspec +22 -0
  13. data/lib/core/database.rb +36 -0
  14. data/lib/core/dsl.rb +16 -0
  15. data/lib/core/field.rb +18 -0
  16. data/lib/data-anonymization.rb +12 -0
  17. data/lib/strategy/base.rb +67 -0
  18. data/lib/strategy/blacklist.rb +18 -0
  19. data/lib/strategy/field/anonymize_time.rb +57 -0
  20. data/lib/strategy/field/anonymous.rb +21 -0
  21. data/lib/strategy/field/date_time_delta.rb +24 -0
  22. data/lib/strategy/field/default_anon.rb +28 -0
  23. data/lib/strategy/field/distinct_column_values.rb +25 -0
  24. data/lib/strategy/field/fields.rb +23 -0
  25. data/lib/strategy/field/gmail_template.rb +17 -0
  26. data/lib/strategy/field/lorem_ipsum.rb +29 -0
  27. data/lib/strategy/field/random_boolean.rb +19 -0
  28. data/lib/strategy/field/random_email.rb +31 -0
  29. data/lib/strategy/field/random_first_name.rb +18 -0
  30. data/lib/strategy/field/random_float_delta.rb +24 -0
  31. data/lib/strategy/field/random_full_name.rb +28 -0
  32. data/lib/strategy/field/random_int.rb +23 -0
  33. data/lib/strategy/field/random_integer_delta.rb +21 -0
  34. data/lib/strategy/field/random_last_name.rb +19 -0
  35. data/lib/strategy/field/random_mailinator_email.rb +20 -0
  36. data/lib/strategy/field/random_phone_number.rb +24 -0
  37. data/lib/strategy/field/random_selection.rb +23 -0
  38. data/lib/strategy/field/random_string.rb +22 -0
  39. data/lib/strategy/field/random_user_name.rb +23 -0
  40. data/lib/strategy/field/string_template.rb +22 -0
  41. data/lib/strategy/field/user_name_template.rb +22 -0
  42. data/lib/strategy/field/whitelist.rb +17 -0
  43. data/lib/strategy/strategies.rb +4 -0
  44. data/lib/strategy/whitelist.rb +21 -0
  45. data/lib/tasks/rake_tasks.rb +19 -0
  46. data/lib/utils/database.rb +53 -0
  47. data/lib/utils/logging.rb +29 -0
  48. data/lib/utils/random_int.rb +15 -0
  49. data/lib/utils/random_string.rb +14 -0
  50. data/lib/utils/resource.rb +13 -0
  51. data/lib/version.rb +3 -0
  52. data/resources/first_names.txt +500 -0
  53. data/resources/last_names.txt +500 -0
  54. data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
  55. data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
  56. data/spec/spec_helper.rb +26 -0
  57. data/spec/strategy/field/anonymize_time_spec.rb +23 -0
  58. data/spec/strategy/field/date_time_delta_spec.rb +43 -0
  59. data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
  60. data/spec/strategy/field/gmail_template_spec.rb +14 -0
  61. data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
  62. data/spec/strategy/field/random_boolean_spec.rb +16 -0
  63. data/spec/strategy/field/random_email_spec.rb +18 -0
  64. data/spec/strategy/field/random_first_name_spec.rb +14 -0
  65. data/spec/strategy/field/random_float_delta_spec.rb +21 -0
  66. data/spec/strategy/field/random_full_name_spec.rb +23 -0
  67. data/spec/strategy/field/random_int_spec.rb +28 -0
  68. data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
  69. data/spec/strategy/field/random_last_name_spec.rb +14 -0
  70. data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
  71. data/spec/strategy/field/random_phone_number_spec.rb +35 -0
  72. data/spec/strategy/field/random_selection_spec.rb +36 -0
  73. data/spec/strategy/field/random_string_spec.rb +23 -0
  74. data/spec/strategy/field/random_user_name_spec.rb +23 -0
  75. data/spec/strategy/field/string_template_spec.rb +15 -0
  76. data/spec/strategy/field/user_name_template_spec.rb +13 -0
  77. data/spec/strategy/field/whitelist_spec.rb +21 -0
  78. data/spec/support/customer_sample.rb +43 -0
  79. data/spec/utils/database_spec.rb +26 -0
  80. data/spec/utils/random_int_spec.rb +9 -0
  81. data/spec/utils/random_string_spec.rb +8 -0
  82. data/whitelist_dsl.rb +44 -0
  83. metadata +192 -0
@@ -0,0 +1,36 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomSelection do
4
+
5
+ RandomSelection = DataAnon::Strategy::Field::RandomSelection
6
+ let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7
+
8
+ describe 'more than one values in predefined list' do
9
+
10
+ let(:states) { ['Maharashtra','Gujrat','Karnataka'] }
11
+ let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
12
+
13
+ it { states.should include(anonymized_value) }
14
+
15
+ end
16
+
17
+ describe 'only one value in list' do
18
+
19
+ let(:states) { ['Maharashtra'] }
20
+ let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
21
+
22
+ it { anonymized_value.should == 'Maharashtra' }
23
+
24
+ end
25
+
26
+ describe 'string value' do
27
+
28
+ let(:states) { 'Maharashtra' }
29
+ let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
30
+
31
+ it { anonymized_value.should == 'Maharashtra' }
32
+
33
+ end
34
+
35
+
36
+ end
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomString do
4
+
5
+ RandomString = DataAnon::Strategy::Field::RandomString
6
+
7
+ describe 'anonymized string must not be the same as original string' do
8
+ let(:field) {DataAnon::Core::Field.new('string_field','fakeString',1,nil)}
9
+ let(:anonymized_string) {RandomString.new.anonymize(field)}
10
+
11
+ it {anonymized_string.should_not equal field.value}
12
+ it {anonymized_string.length.should equal field.value.length}
13
+ end
14
+
15
+ describe 'anonymized name should have same number of words as original' do
16
+
17
+ let(:field) {DataAnon::Core::Field.new('string_field','Fake Longer String Test',1,nil)}
18
+ let(:anonymized_string) {RandomString.new.anonymize(field)}
19
+
20
+ it {anonymized_string.split(' ').size.should equal field.value.split(' ').size}
21
+
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomUserName do
4
+
5
+ RandomUserName = DataAnon::Strategy::Field::RandomUserName
6
+ let(:field) {DataAnon::Core::Field.new('username','fakeUserName',1,nil)}
7
+
8
+ describe 'anonymized user name should not be the same as original user name' do
9
+ let(:anonymized_username) {RandomUserName.new.anonymize(field)}
10
+
11
+ it {anonymized_username.should_not equal field.value}
12
+ it {anonymized_username.length.should be_between(5,10)}
13
+ it {anonymized_username.should match '^[a-zA-Z0-9]*$'}
14
+
15
+ end
16
+
17
+ describe 'different length for username' do
18
+ let(:anonymized_username) {RandomUserName.new(15,20).anonymize(field)}
19
+
20
+ it {anonymized_username.length.should be_between(15,20)}
21
+ end
22
+
23
+ end
@@ -0,0 +1,15 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::StringTemplate do
4
+
5
+ StringTemplate = DataAnon::Strategy::Field::StringTemplate
6
+ let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 3456, nil) }
7
+
8
+ describe 'should return same string value as StringTemplate' do
9
+ let(:anonymized_value) { StringTemplate.new('Sunit #{row_number} Parekh').anonymize(field) }
10
+ it { anonymized_value.should == 'Sunit 3456 Parekh' }
11
+ end
12
+
13
+
14
+
15
+ end
@@ -0,0 +1,13 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::UserNameTemplate do
4
+
5
+ UserNameTemplate = DataAnon::Strategy::Field::UserNameTemplate
6
+ let(:field) { DataAnon::Core::Field.new('username', 'Chuck Norris', 100, nil) }
7
+
8
+ describe 'should return same string value as StringTemplate' do
9
+ let(:anonymized_username) { UserNameTemplate.new('Rajnikanth #{row_number}').anonymize(field) }
10
+ it { anonymized_username.should == 'Rajnikanth 100' }
11
+ end
12
+
13
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::Whitelist do
4
+
5
+ Whitelist = DataAnon::Strategy::Field::Whitelist
6
+
7
+ describe 'should return same string value as whitelist' do
8
+ let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
9
+ let(:anonymized_value) { Whitelist.new.anonymize(field) }
10
+ it { anonymized_value.should == 'New Delhi' }
11
+ end
12
+
13
+ describe 'should return same date value as whitelist' do
14
+ let(:dob) { Time.now }
15
+ let(:field) { DataAnon::Core::Field.new('DateOfBirth', dob, 1, nil) }
16
+ let(:anonymized_value) { Whitelist.new.anonymize(field) }
17
+ it { anonymized_value.should == dob }
18
+ end
19
+
20
+
21
+ end
@@ -0,0 +1,43 @@
1
+ class CustomerSample
2
+
3
+ class CreateCustomer < ActiveRecord::Migration
4
+ def up
5
+ create_table :customers, { :id => false } do |t|
6
+ t.integer :cust_id, :primary => true
7
+ t.string :first_name
8
+ t.string :last_name
9
+ t.date :birth_date
10
+ t.string :address
11
+ t.string :state
12
+ t.string :zipcode
13
+ t.string :phone
14
+ t.string :email
15
+ end
16
+ end
17
+ end
18
+
19
+ def self.clean
20
+ system "rm -f tmp/*.sqlite"
21
+ system "mkdir -p tmp"
22
+ end
23
+
24
+ def self.create_schema connection_spec
25
+ ActiveRecord::Migration.verbose = false
26
+ ActiveRecord::Base.establish_connection connection_spec
27
+ CreateCustomer.migrate :up
28
+ end
29
+
30
+ SAMPLE_DATA = {:cust_id => 100, :first_name => "Sunit", :last_name => "Parekh",
31
+ :birth_date => Date.new(1977,7,8), :address => "F 501 Shanti Nagar",
32
+ :state => "Maharastra", :zipcode => "411048", :phone => "9923700662",
33
+ :email => "parekh.sunit@gmail.com"}
34
+
35
+ def self.insert_record connection_spec, data_hash = SAMPLE_DATA
36
+ DataAnon::Utils::TempDatabase.establish_connection connection_spec
37
+ source = DataAnon::Utils::BaseTable.create_table 'customers', 'cust_id',DataAnon::Utils::TempDatabase
38
+ cust = source.new data_hash
39
+ cust.cust_id = data_hash[:cust_id]
40
+ cust.save!
41
+ end
42
+
43
+ end
@@ -0,0 +1,26 @@
1
+ require "spec_helper"
2
+
3
+ describe "Utils" do
4
+
5
+ before(:each) do
6
+ source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'}
7
+ DataAnon::Utils::SourceDatabase.establish_connection source
8
+
9
+ destination = {:adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'}
10
+ DataAnon::Utils::DestinationDatabase.establish_connection destination
11
+ end
12
+
13
+ it "should test the connection to source database" do
14
+ album = DataAnon::Utils::SourceTable.create "Album", "AlbumId"
15
+ album.count.should > 0
16
+ album.all.length > 0
17
+ end
18
+
19
+ it "should test the connection to destination database" do
20
+ album = DataAnon::Utils::DestinationTable.create "Album", "AlbumId"
21
+ album.count.should == 0
22
+ album.all.length == 0
23
+
24
+ end
25
+
26
+ end
@@ -0,0 +1,9 @@
1
+ require "spec_helper"
2
+
3
+ describe "Number Utils" do
4
+
5
+ it "should generate random int between provided range" do
6
+ random_int = DataAnon::Utils::RandomInt.generate(5,10)
7
+ random_int.should be_between(5,10)
8
+ end
9
+ end
@@ -0,0 +1,8 @@
1
+ require "spec_helper"
2
+
3
+ describe "String Utils" do
4
+
5
+ it "should generate random string of given length" do
6
+ DataAnon::Utils::RandomString.generate(10).length.should equal 10
7
+ end
8
+ end
@@ -0,0 +1,44 @@
1
+ system "rake empty_dest" # clean destination database on every call
2
+
3
+ require 'data-anonymization'
4
+
5
+ FS = DataAnon::Strategy::Field
6
+
7
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
8
+
9
+ database 'Chinook' do
10
+ strategy DataAnon::Strategy::Whitelist
11
+ source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
12
+ destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
13
+
14
+ default_field_strategies :string => FS::StringTemplate.new('Sunit #{row_number} Parekh')
15
+
16
+ table 'Genre' do
17
+ primary_key 'GenreId'
18
+ whitelist 'GenreId'
19
+ anonymize 'Name' do |field|
20
+ field.value + " test"
21
+ end
22
+ end
23
+
24
+ table 'MediaType' do
25
+ primary_key 'MediaTypeId'
26
+ anonymize('MediaTypeId') { |field| field.value } # same as whitelist
27
+ anonymize('Name').using FS::StringTemplate.new('Media Type #{row_number}')
28
+
29
+ end
30
+
31
+ table 'Employee' do
32
+ primary_key 'EmployeeId'
33
+ whitelist 'EmployeeId'
34
+ anonymize('BirthDate').using FS::DateTimeDelta.new(1,1)
35
+ end
36
+
37
+ table 'Customer' do
38
+ primary_key 'CustomerId'
39
+ anonymize('Phone').using FS::RandomPhoneNumber.new
40
+ end
41
+
42
+
43
+ end
44
+
metadata ADDED
@@ -0,0 +1,192 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: data-anonymization
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sunit Parekh
9
+ - Anand Agrawal
10
+ - Satyam Agarwala
11
+ autorequire:
12
+ bindir: bin
13
+ cert_chain: []
14
+ date: 2012-08-14 00:00:00.000000000 Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: activerecord
18
+ requirement: !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ~>
22
+ - !ruby/object:Gem::Version
23
+ version: 3.2.8
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ none: false
28
+ requirements:
29
+ - - ~>
30
+ - !ruby/object:Gem::Version
31
+ version: 3.2.8
32
+ - !ruby/object:Gem::Dependency
33
+ name: activesupport
34
+ requirement: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: 3.2.8
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 3.2.8
48
+ description: Data anonymization tool for RDBMS databases
49
+ email:
50
+ - parekh.sunit@gmail.com
51
+ - anand.agrawal84@gmail.com
52
+ - satyamag@gmail.com
53
+ executables: []
54
+ extensions: []
55
+ extra_rdoc_files: []
56
+ files:
57
+ - .documentup.json
58
+ - .gitignore
59
+ - .rspec
60
+ - .rvmrc
61
+ - .travis.yml
62
+ - Gemfile
63
+ - LICENSE.txt
64
+ - README.md
65
+ - Rakefile
66
+ - blacklist_dsl.rb
67
+ - blacklist_nosql_dsl.rb
68
+ - data-anonymization.gemspec
69
+ - lib/core/database.rb
70
+ - lib/core/dsl.rb
71
+ - lib/core/field.rb
72
+ - lib/data-anonymization.rb
73
+ - lib/strategy/base.rb
74
+ - lib/strategy/blacklist.rb
75
+ - lib/strategy/field/anonymize_time.rb
76
+ - lib/strategy/field/anonymous.rb
77
+ - lib/strategy/field/date_time_delta.rb
78
+ - lib/strategy/field/default_anon.rb
79
+ - lib/strategy/field/distinct_column_values.rb
80
+ - lib/strategy/field/fields.rb
81
+ - lib/strategy/field/gmail_template.rb
82
+ - lib/strategy/field/lorem_ipsum.rb
83
+ - lib/strategy/field/random_boolean.rb
84
+ - lib/strategy/field/random_email.rb
85
+ - lib/strategy/field/random_first_name.rb
86
+ - lib/strategy/field/random_float_delta.rb
87
+ - lib/strategy/field/random_full_name.rb
88
+ - lib/strategy/field/random_int.rb
89
+ - lib/strategy/field/random_integer_delta.rb
90
+ - lib/strategy/field/random_last_name.rb
91
+ - lib/strategy/field/random_mailinator_email.rb
92
+ - lib/strategy/field/random_phone_number.rb
93
+ - lib/strategy/field/random_selection.rb
94
+ - lib/strategy/field/random_string.rb
95
+ - lib/strategy/field/random_user_name.rb
96
+ - lib/strategy/field/string_template.rb
97
+ - lib/strategy/field/user_name_template.rb
98
+ - lib/strategy/field/whitelist.rb
99
+ - lib/strategy/strategies.rb
100
+ - lib/strategy/whitelist.rb
101
+ - lib/tasks/rake_tasks.rb
102
+ - lib/utils/database.rb
103
+ - lib/utils/logging.rb
104
+ - lib/utils/random_int.rb
105
+ - lib/utils/random_string.rb
106
+ - lib/utils/resource.rb
107
+ - lib/version.rb
108
+ - resources/first_names.txt
109
+ - resources/last_names.txt
110
+ - spec/acceptance/rdbms_blacklist_spec.rb
111
+ - spec/acceptance/rdbms_whitelist_spec.rb
112
+ - spec/spec_helper.rb
113
+ - spec/strategy/field/anonymize_time_spec.rb
114
+ - spec/strategy/field/date_time_delta_spec.rb
115
+ - spec/strategy/field/distinct_column_values_spec.rb
116
+ - spec/strategy/field/gmail_template_spec.rb
117
+ - spec/strategy/field/lorem_ipsum_spec.rb
118
+ - spec/strategy/field/random_boolean_spec.rb
119
+ - spec/strategy/field/random_email_spec.rb
120
+ - spec/strategy/field/random_first_name_spec.rb
121
+ - spec/strategy/field/random_float_delta_spec.rb
122
+ - spec/strategy/field/random_full_name_spec.rb
123
+ - spec/strategy/field/random_int_spec.rb
124
+ - spec/strategy/field/random_integer_delta_spec.rb
125
+ - spec/strategy/field/random_last_name_spec.rb
126
+ - spec/strategy/field/random_mailinator_email_spec.rb
127
+ - spec/strategy/field/random_phone_number_spec.rb
128
+ - spec/strategy/field/random_selection_spec.rb
129
+ - spec/strategy/field/random_string_spec.rb
130
+ - spec/strategy/field/random_user_name_spec.rb
131
+ - spec/strategy/field/string_template_spec.rb
132
+ - spec/strategy/field/user_name_template_spec.rb
133
+ - spec/strategy/field/whitelist_spec.rb
134
+ - spec/support/customer_sample.rb
135
+ - spec/utils/database_spec.rb
136
+ - spec/utils/random_int_spec.rb
137
+ - spec/utils/random_string_spec.rb
138
+ - whitelist_dsl.rb
139
+ homepage: http://sunitparekh.github.com/data-anonymization
140
+ licenses: []
141
+ post_install_message:
142
+ rdoc_options: []
143
+ require_paths:
144
+ - lib
145
+ required_ruby_version: !ruby/object:Gem::Requirement
146
+ none: false
147
+ requirements:
148
+ - - ! '>='
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ required_rubygems_version: !ruby/object:Gem::Requirement
152
+ none: false
153
+ requirements:
154
+ - - ! '>='
155
+ - !ruby/object:Gem::Version
156
+ version: '0'
157
+ requirements: []
158
+ rubyforge_project:
159
+ rubygems_version: 1.8.24
160
+ signing_key:
161
+ specification_version: 3
162
+ summary: Tool to create anonymized production data dump to use for PREF and other
163
+ TEST environments.
164
+ test_files:
165
+ - spec/acceptance/rdbms_blacklist_spec.rb
166
+ - spec/acceptance/rdbms_whitelist_spec.rb
167
+ - spec/spec_helper.rb
168
+ - spec/strategy/field/anonymize_time_spec.rb
169
+ - spec/strategy/field/date_time_delta_spec.rb
170
+ - spec/strategy/field/distinct_column_values_spec.rb
171
+ - spec/strategy/field/gmail_template_spec.rb
172
+ - spec/strategy/field/lorem_ipsum_spec.rb
173
+ - spec/strategy/field/random_boolean_spec.rb
174
+ - spec/strategy/field/random_email_spec.rb
175
+ - spec/strategy/field/random_first_name_spec.rb
176
+ - spec/strategy/field/random_float_delta_spec.rb
177
+ - spec/strategy/field/random_full_name_spec.rb
178
+ - spec/strategy/field/random_int_spec.rb
179
+ - spec/strategy/field/random_integer_delta_spec.rb
180
+ - spec/strategy/field/random_last_name_spec.rb
181
+ - spec/strategy/field/random_mailinator_email_spec.rb
182
+ - spec/strategy/field/random_phone_number_spec.rb
183
+ - spec/strategy/field/random_selection_spec.rb
184
+ - spec/strategy/field/random_string_spec.rb
185
+ - spec/strategy/field/random_user_name_spec.rb
186
+ - spec/strategy/field/string_template_spec.rb
187
+ - spec/strategy/field/user_name_template_spec.rb
188
+ - spec/strategy/field/whitelist_spec.rb
189
+ - spec/support/customer_sample.rb
190
+ - spec/utils/database_spec.rb
191
+ - spec/utils/random_int_spec.rb
192
+ - spec/utils/random_string_spec.rb