data-anonymization 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. data/.documentup.json +8 -0
  2. data/.gitignore +20 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +6 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +256 -0
  9. data/Rakefile +9 -0
  10. data/blacklist_dsl.rb +19 -0
  11. data/blacklist_nosql_dsl.rb +36 -0
  12. data/data-anonymization.gemspec +22 -0
  13. data/lib/core/database.rb +36 -0
  14. data/lib/core/dsl.rb +16 -0
  15. data/lib/core/field.rb +18 -0
  16. data/lib/data-anonymization.rb +12 -0
  17. data/lib/strategy/base.rb +67 -0
  18. data/lib/strategy/blacklist.rb +18 -0
  19. data/lib/strategy/field/anonymize_time.rb +57 -0
  20. data/lib/strategy/field/anonymous.rb +21 -0
  21. data/lib/strategy/field/date_time_delta.rb +24 -0
  22. data/lib/strategy/field/default_anon.rb +28 -0
  23. data/lib/strategy/field/distinct_column_values.rb +25 -0
  24. data/lib/strategy/field/fields.rb +23 -0
  25. data/lib/strategy/field/gmail_template.rb +17 -0
  26. data/lib/strategy/field/lorem_ipsum.rb +29 -0
  27. data/lib/strategy/field/random_boolean.rb +19 -0
  28. data/lib/strategy/field/random_email.rb +31 -0
  29. data/lib/strategy/field/random_first_name.rb +18 -0
  30. data/lib/strategy/field/random_float_delta.rb +24 -0
  31. data/lib/strategy/field/random_full_name.rb +28 -0
  32. data/lib/strategy/field/random_int.rb +23 -0
  33. data/lib/strategy/field/random_integer_delta.rb +21 -0
  34. data/lib/strategy/field/random_last_name.rb +19 -0
  35. data/lib/strategy/field/random_mailinator_email.rb +20 -0
  36. data/lib/strategy/field/random_phone_number.rb +24 -0
  37. data/lib/strategy/field/random_selection.rb +23 -0
  38. data/lib/strategy/field/random_string.rb +22 -0
  39. data/lib/strategy/field/random_user_name.rb +23 -0
  40. data/lib/strategy/field/string_template.rb +22 -0
  41. data/lib/strategy/field/user_name_template.rb +22 -0
  42. data/lib/strategy/field/whitelist.rb +17 -0
  43. data/lib/strategy/strategies.rb +4 -0
  44. data/lib/strategy/whitelist.rb +21 -0
  45. data/lib/tasks/rake_tasks.rb +19 -0
  46. data/lib/utils/database.rb +53 -0
  47. data/lib/utils/logging.rb +29 -0
  48. data/lib/utils/random_int.rb +15 -0
  49. data/lib/utils/random_string.rb +14 -0
  50. data/lib/utils/resource.rb +13 -0
  51. data/lib/version.rb +3 -0
  52. data/resources/first_names.txt +500 -0
  53. data/resources/last_names.txt +500 -0
  54. data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
  55. data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
  56. data/spec/spec_helper.rb +26 -0
  57. data/spec/strategy/field/anonymize_time_spec.rb +23 -0
  58. data/spec/strategy/field/date_time_delta_spec.rb +43 -0
  59. data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
  60. data/spec/strategy/field/gmail_template_spec.rb +14 -0
  61. data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
  62. data/spec/strategy/field/random_boolean_spec.rb +16 -0
  63. data/spec/strategy/field/random_email_spec.rb +18 -0
  64. data/spec/strategy/field/random_first_name_spec.rb +14 -0
  65. data/spec/strategy/field/random_float_delta_spec.rb +21 -0
  66. data/spec/strategy/field/random_full_name_spec.rb +23 -0
  67. data/spec/strategy/field/random_int_spec.rb +28 -0
  68. data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
  69. data/spec/strategy/field/random_last_name_spec.rb +14 -0
  70. data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
  71. data/spec/strategy/field/random_phone_number_spec.rb +35 -0
  72. data/spec/strategy/field/random_selection_spec.rb +36 -0
  73. data/spec/strategy/field/random_string_spec.rb +23 -0
  74. data/spec/strategy/field/random_user_name_spec.rb +23 -0
  75. data/spec/strategy/field/string_template_spec.rb +15 -0
  76. data/spec/strategy/field/user_name_template_spec.rb +13 -0
  77. data/spec/strategy/field/whitelist_spec.rb +21 -0
  78. data/spec/support/customer_sample.rb +43 -0
  79. data/spec/utils/database_spec.rb +26 -0
  80. data/spec/utils/random_int_spec.rb +9 -0
  81. data/spec/utils/random_string_spec.rb +8 -0
  82. data/whitelist_dsl.rb +44 -0
  83. metadata +192 -0
@@ -0,0 +1,30 @@
1
+ require "spec_helper"
2
+
3
+ describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
4
+ connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
5
+
6
+ before(:each) do
7
+ CustomerSample.clean
8
+ CustomerSample.create_schema connection_spec
9
+ CustomerSample.insert_record connection_spec, CustomerSample::SAMPLE_DATA
10
+ end
11
+
12
+ it "should anonymize customer table record " do
13
+
14
+ database "Customer" do
15
+ strategy DataAnon::Strategy::Blacklist
16
+ source_db connection_spec
17
+
18
+ table 'customers' do
19
+ primary_key 'cust_id'
20
+ anonymize('email').using DataAnon::Strategy::Field::StringTemplate.new('test+#{row_number}@gmail.com')
21
+ end
22
+ end
23
+
24
+ DataAnon::Utils::SourceDatabase.establish_connection connection_spec
25
+ source = DataAnon::Utils::SourceTable.create 'customers', 'cust_id'
26
+ new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
27
+ new_rec['email'].should == 'test+1@gmail.com'
28
+
29
+ end
30
+ end
@@ -0,0 +1,50 @@
1
+ require "spec_helper"
2
+
3
+ describe "End 2 End RDBMS Whitelist Acceptance Test using SQLite database" do
4
+
5
+ source_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
6
+ dest_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer-dest.sqlite'}
7
+
8
+ before(:each) do
9
+ CustomerSample.clean
10
+ CustomerSample.create_schema source_connection_spec
11
+ CustomerSample.insert_record source_connection_spec, CustomerSample::SAMPLE_DATA
12
+
13
+ CustomerSample.create_schema dest_connection_spec
14
+ end
15
+
16
+ it "should anonymize customer table record " do
17
+
18
+ DF = DataAnon::Strategy::Field
19
+
20
+ database "Customer" do
21
+ strategy DataAnon::Strategy::Whitelist
22
+ source_db source_connection_spec
23
+ destination_db dest_connection_spec
24
+
25
+ table 'customers' do
26
+ primary_key 'cust_id'
27
+ whitelist 'cust_id', 'address', 'zipcode'
28
+ anonymize('first_name').using DF::RandomFirstName.new
29
+ anonymize('last_name').using DF::RandomLastName.new
30
+ anonymize('state').using DF::RandomSelection.new(['Gujrat','Karnataka'])
31
+ anonymize('phone').using DF::RandomPhoneNumber.new
32
+ anonymize('email').using DF::StringTemplate.new('test+#{row_number}@gmail.com')
33
+ end
34
+ end
35
+
36
+ DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
37
+ dest_table = DataAnon::Utils::DestinationTable.create 'customers', 'cust_id'
38
+ new_rec = dest_table.find(CustomerSample::SAMPLE_DATA[:cust_id])
39
+ new_rec.first_name.should_not be("Sunit")
40
+ new_rec.last_name.should_not be("Parekh")
41
+ new_rec.birth_date.should_not be(Date.new(1977,7,8))
42
+ new_rec.address.should == 'F 501 Shanti Nagar'
43
+ ['Gujrat','Karnataka'].should include(new_rec.state)
44
+ new_rec.zipcode.should == '411048'
45
+ new_rec.phone.should_not be "9923700662"
46
+ new_rec.email.should == 'test+1@gmail.com'
47
+
48
+
49
+ end
50
+ end
@@ -0,0 +1,26 @@
1
+ require 'rspec'
2
+ require "pry"
3
+ require 'data-anonymization'
4
+
5
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
6
+
7
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
8
+ DataAnon::Utils::Logging.progress_logger.level = Logger::WARN
9
+
10
+
11
+ RSpec.configure do |config|
12
+ config.expect_with :rspec
13
+ config.mock_with 'rspec-mocks'
14
+
15
+ config.before(:suite) do
16
+ end
17
+
18
+ config.before(:each) do
19
+ end
20
+
21
+ config.after(:suite) do
22
+ end
23
+ end
24
+
25
+
26
+
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::AnonymizeTime do
4
+
5
+ AnonymizeTime = DataAnon::Strategy::Field::AnonymizeTime
6
+ let(:field) { DataAnon::Core::Field.new('date', Time.new(2000,1,1,12,12,12), 1, nil) }
7
+
8
+ describe 'providing true only for month should randomize only the month field' do
9
+
10
+ let(:anonymized_time) { AnonymizeTime.new(true,false,false,false,false).anonymize(field) }
11
+
12
+ it {
13
+ anonymized_time.year.should be 2000
14
+ anonymized_time.day.should be 1
15
+ anonymized_time.hour.should be 12
16
+ anonymized_time.min.should be 12
17
+ anonymized_time.sec.should be 12
18
+
19
+ anonymized_time.month.should be_between(1,12)
20
+ }
21
+ end
22
+
23
+ end
@@ -0,0 +1,43 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::DateTimeDelta do
4
+
5
+ DateTimeDelta = DataAnon::Strategy::Field::DateTimeDelta
6
+ let(:field) { DataAnon::Core::Field.new('date', DateTime.new(2000,1,1), 1, nil) }
7
+
8
+ describe 'datetime should not remain the same' do
9
+
10
+ let(:anonymized_value) { DateTimeDelta.new().anonymize(field) }
11
+ let(:date_difference) {anonymized_value.to_i - field.value.to_i}
12
+
13
+ it {date_difference.should_not be 0 }
14
+ end
15
+
16
+ describe 'datetime should not change when provided with 0 delta for both date and time' do
17
+
18
+ let(:anonymized_value) { DateTimeDelta.new(0,0).anonymize(field) }
19
+ let(:date_difference) {anonymized_value.to_i - field.value.to_i}
20
+
21
+ it {date_difference.should be 0 }
22
+
23
+ end
24
+
25
+ describe 'date should be anonymized within provided delta' do
26
+
27
+ let(:anonymized_value) { DateTimeDelta.new(5,0).anonymize(field) }
28
+ let(:date_difference) {anonymized_value.to_i - field.value.to_i}
29
+
30
+ it { date_difference.should be_between(-5.days, 5.days) }
31
+
32
+ end
33
+
34
+ describe 'time should be anonymized within provided delta' do
35
+
36
+ let(:anonymized_value) { DateTimeDelta.new(0,10).anonymize(field) }
37
+ let(:date_difference) {anonymized_value.to_i - field.value.to_i}
38
+
39
+ it { date_difference.should be_between(-10.minutes, 10.minutes)}
40
+ end
41
+
42
+
43
+ end
@@ -0,0 +1,22 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::DistinctColumnValues do
4
+
5
+ before(:each) do
6
+ source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'}
7
+ DataAnon::Utils::SourceDatabase.establish_connection source
8
+ end
9
+
10
+ DistinctColumnValues = DataAnon::Strategy::Field::DistinctColumnValues
11
+ let(:field) { DataAnon::Core::Field.new('name', 'Abcd', 1, nil) }
12
+
13
+ describe 'more than one values in predefined list' do
14
+
15
+ let(:anonymized_value) { DistinctColumnValues.new('MediaType','Name').anonymize(field) }
16
+
17
+ it { anonymized_value.should_not be('Abcd') }
18
+ it { anonymized_value.should_not be_empty }
19
+
20
+ end
21
+
22
+ end
@@ -0,0 +1,14 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::GmailTemplate do
4
+
5
+ GmailTemplate = DataAnon::Strategy::Field::GmailTemplate
6
+ let(:field) {DataAnon::Core::Field.new('email','user@company.com',456,nil)}
7
+
8
+ describe 'generated email must be compliant with the provided template' do
9
+
10
+ let(:anonymized_email) {GmailTemplate.new("fake@gmail.com").anonymize(field)}
11
+
12
+ it {anonymized_email.should eq('fake+456@gmail.com')}
13
+ end
14
+ end
@@ -0,0 +1,27 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::LoremIpsum do
4
+
5
+ LoremIpsum = DataAnon::Strategy::Field::LoremIpsum
6
+ let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7
+
8
+ describe 'should return same length value using default text' do
9
+
10
+ let(:anonymized_value) { LoremIpsum.new.anonymize(field) }
11
+
12
+ it { anonymized_value.length.should_not be('New Delhi') }
13
+ it { anonymized_value.length.should == 'New Delhi'.length }
14
+
15
+ end
16
+
17
+ describe 'should return same length value using set text' do
18
+
19
+ let(:anonymized_value) { LoremIpsum.new("Sunit Parekh").anonymize(field) }
20
+
21
+ it { anonymized_value.length.should_not be('New Delhi') }
22
+ it { anonymized_value.should == 'Sunit Par' }
23
+
24
+ end
25
+
26
+
27
+ end
@@ -0,0 +1,16 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomBoolean do
4
+
5
+ RandomBoolean = DataAnon::Strategy::Field::RandomBoolean
6
+ let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)}
7
+
8
+ describe 'anonymized value should be a boolean' do
9
+ let(:anonymized_boolean) {RandomBoolean.new.anonymize(field)}
10
+
11
+ it {
12
+ is_boolean = anonymized_boolean.is_a?(TrueClass) || anonymized_boolean.is_a?(FalseClass)
13
+ is_boolean.should be true
14
+ }
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomEmail do
4
+
5
+ RandomEmail = DataAnon::Strategy::Field::RandomEmail
6
+ let(:field) {DataAnon::Core::Field.new('email','real@email.com',1,nil)}
7
+
8
+
9
+ describe 'anonymized email must be different from original email' do
10
+
11
+ let(:anonymized_value) {RandomEmail.new.anonymize(field)}
12
+
13
+ it {anonymized_value.should_not equal field.value}
14
+ it {anonymized_value.should match '^\S+@\S+\.\S+$'}
15
+
16
+ end
17
+
18
+ end
@@ -0,0 +1,14 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomFirstName do
4
+
5
+ RandomFirstName = DataAnon::Strategy::Field::RandomFirstName
6
+ let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)}
7
+
8
+ describe 'anonymized name must not be the same as provided name' do
9
+ let(:anonymized_value) {RandomFirstName.new().anonymize(field.value)}
10
+
11
+ it {anonymized_value.should_not equal field.value}
12
+ end
13
+
14
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomFloatDelta do
4
+
5
+ RandomFloatDelta = DataAnon::Strategy::Field::RandomFloatDelta
6
+ let(:field) {DataAnon::Core::Field.new('float_field',5.5,1,nil)}
7
+
8
+ describe 'anonymized float should not be the same as original value' do
9
+ let(:anonymized_float) {RandomFloatDelta.new(5).anonymize(field)}
10
+
11
+ it {anonymized_float.should_not equal field.value}
12
+ end
13
+
14
+ describe 'anonymized value returned should be a float' do
15
+ let(:anonymized_float) {RandomFloatDelta.new(5).anonymize(field)}
16
+
17
+ it { is_float = anonymized_float.is_a? Float
18
+ is_float.should be true
19
+ }
20
+ end
21
+ end
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomFullName do
4
+
5
+ RandomFullName = DataAnon::Strategy::Field::RandomFullName
6
+ let(:field) {DataAnon::Core::Field.new('name','Fake User',1,nil)}
7
+
8
+ describe 'anonymized name should be the same as original' do
9
+
10
+ let(:anonymized_name) {RandomFullName.new().anonymize(field)}
11
+
12
+ it {anonymized_name.should_not equal field.value}
13
+ end
14
+
15
+ describe 'anonymized name should have same number of words as original' do
16
+
17
+ let(:field) {DataAnon::Core::Field.new('name','Fake User Longer Name Test',1,nil)}
18
+ let(:anonymized_name) {RandomFullName.new().anonymize(field)}
19
+
20
+ it {anonymized_name.split(' ').size.should equal field.value.split(' ').size}
21
+ end
22
+
23
+ end
@@ -0,0 +1,28 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomInt do
4
+
5
+ RandomInt = DataAnon::Strategy::Field::RandomInt
6
+ let(:field) { DataAnon::Core::Field.new('age', 25, 1, nil) }
7
+
8
+ describe 'verify age range between 18 and 70' do
9
+
10
+ let(:anonymized_int) { RandomInt.new(18, 70).anonymize(field) }
11
+
12
+ it { anonymized_int.should >= 18 }
13
+ it { anonymized_int.should <= 70 }
14
+
15
+ end
16
+
17
+ describe 'default range between 0 and 100' do
18
+
19
+ let(:anonymized_int) { RandomInt.new.anonymize(field) }
20
+
21
+ it { anonymized_int.should >= 0 }
22
+ it { anonymized_int.should <= 100 }
23
+
24
+ end
25
+
26
+
27
+
28
+ end
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomIntegerDelta do
4
+
5
+ RandomIntegerDelta = DataAnon::Strategy::Field::RandomIntegerDelta
6
+ let(:field) {DataAnon::Core::Field.new('integer_field',100,1,nil)}
7
+
8
+ describe "anonymized value returned should be an integer" do
9
+ let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
10
+
11
+ it { is_integer = anonymized_integer.is_a? Integer
12
+ is_integer.should be true
13
+ }
14
+
15
+ end
16
+
17
+ describe "anonymized integer should be within delta from original integer" do
18
+ let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
19
+
20
+ it{anonymized_integer.should be_between(90,110)}
21
+ end
22
+
23
+ end
@@ -0,0 +1,14 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomLastName do
4
+
5
+ RandomLastName = DataAnon::Strategy::Field::RandomLastName
6
+ let(:field) {DataAnon::Core::Field.new('lastname','fakeLastName',1,nil)}
7
+
8
+ describe 'anonymized name must not be the same as provided name' do
9
+ let(:anonymized_value) {RandomLastName.new().anonymize(field.value)}
10
+
11
+ it {anonymized_value.should_not equal field.value}
12
+ end
13
+
14
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomMailinatorEmail do
4
+
5
+ RandomMailinatorEmail = DataAnon::Strategy::Field::RandomMailinatorEmail
6
+ let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
7
+
8
+ describe 'anonymized email should not be the same as original email' do
9
+
10
+ let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
11
+
12
+ it {anonymized_email.should_not equal field.value}
13
+ end
14
+
15
+ describe 'anonymized email should be a mailinator email address' do
16
+
17
+ let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
18
+
19
+ it {anonymized_email.should match '^\S+@\mailinator\.com$'}
20
+ end
21
+ end
@@ -0,0 +1,35 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomPhoneNumber do
4
+
5
+ RandomPhoneNumber = DataAnon::Strategy::Field::RandomPhoneNumber
6
+ let(:field) {DataAnon::Core::Field.new('phone_number',"+0 (123) 456-7890",1,nil)}
7
+
8
+ describe 'anonymized phone number should not be the same as original phone number' do
9
+ let(:anonymized_number) {RandomPhoneNumber.new().anonymize(field)}
10
+
11
+ it {anonymized_number.should_not equal field.value}
12
+ end
13
+
14
+ describe 'anonymized phone number should be the same formatting as original phone number' do
15
+
16
+ it {
17
+ anonymized_number = RandomPhoneNumber.new().anonymize(field)
18
+ original_number = field.value
19
+ counter = 0
20
+ @number_similarity = true
21
+ anonymized_number.each_char do |char|
22
+ original_number_char = original_number[counter]
23
+ if /\d/.match(original_number_char).nil?
24
+ if !char.eql? original_number_char
25
+ @number_similarity = false
26
+ break
27
+ end
28
+ end
29
+
30
+ @number_similarity.should be true
31
+ end
32
+ }
33
+ end
34
+
35
+ end