data-anonymization 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/.documentup.json +8 -0
  2. data/.gitignore +20 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +6 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +256 -0
  9. data/Rakefile +9 -0
  10. data/blacklist_dsl.rb +19 -0
  11. data/blacklist_nosql_dsl.rb +36 -0
  12. data/data-anonymization.gemspec +22 -0
  13. data/lib/core/database.rb +36 -0
  14. data/lib/core/dsl.rb +16 -0
  15. data/lib/core/field.rb +18 -0
  16. data/lib/data-anonymization.rb +12 -0
  17. data/lib/strategy/base.rb +67 -0
  18. data/lib/strategy/blacklist.rb +18 -0
  19. data/lib/strategy/field/anonymize_time.rb +57 -0
  20. data/lib/strategy/field/anonymous.rb +21 -0
  21. data/lib/strategy/field/date_time_delta.rb +24 -0
  22. data/lib/strategy/field/default_anon.rb +28 -0
  23. data/lib/strategy/field/distinct_column_values.rb +25 -0
  24. data/lib/strategy/field/fields.rb +23 -0
  25. data/lib/strategy/field/gmail_template.rb +17 -0
  26. data/lib/strategy/field/lorem_ipsum.rb +29 -0
  27. data/lib/strategy/field/random_boolean.rb +19 -0
  28. data/lib/strategy/field/random_email.rb +31 -0
  29. data/lib/strategy/field/random_first_name.rb +18 -0
  30. data/lib/strategy/field/random_float_delta.rb +24 -0
  31. data/lib/strategy/field/random_full_name.rb +28 -0
  32. data/lib/strategy/field/random_int.rb +23 -0
  33. data/lib/strategy/field/random_integer_delta.rb +21 -0
  34. data/lib/strategy/field/random_last_name.rb +19 -0
  35. data/lib/strategy/field/random_mailinator_email.rb +20 -0
  36. data/lib/strategy/field/random_phone_number.rb +24 -0
  37. data/lib/strategy/field/random_selection.rb +23 -0
  38. data/lib/strategy/field/random_string.rb +22 -0
  39. data/lib/strategy/field/random_user_name.rb +23 -0
  40. data/lib/strategy/field/string_template.rb +22 -0
  41. data/lib/strategy/field/user_name_template.rb +22 -0
  42. data/lib/strategy/field/whitelist.rb +17 -0
  43. data/lib/strategy/strategies.rb +4 -0
  44. data/lib/strategy/whitelist.rb +21 -0
  45. data/lib/tasks/rake_tasks.rb +19 -0
  46. data/lib/utils/database.rb +53 -0
  47. data/lib/utils/logging.rb +29 -0
  48. data/lib/utils/random_int.rb +15 -0
  49. data/lib/utils/random_string.rb +14 -0
  50. data/lib/utils/resource.rb +13 -0
  51. data/lib/version.rb +3 -0
  52. data/resources/first_names.txt +500 -0
  53. data/resources/last_names.txt +500 -0
  54. data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
  55. data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
  56. data/spec/spec_helper.rb +26 -0
  57. data/spec/strategy/field/anonymize_time_spec.rb +23 -0
  58. data/spec/strategy/field/date_time_delta_spec.rb +43 -0
  59. data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
  60. data/spec/strategy/field/gmail_template_spec.rb +14 -0
  61. data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
  62. data/spec/strategy/field/random_boolean_spec.rb +16 -0
  63. data/spec/strategy/field/random_email_spec.rb +18 -0
  64. data/spec/strategy/field/random_first_name_spec.rb +14 -0
  65. data/spec/strategy/field/random_float_delta_spec.rb +21 -0
  66. data/spec/strategy/field/random_full_name_spec.rb +23 -0
  67. data/spec/strategy/field/random_int_spec.rb +28 -0
  68. data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
  69. data/spec/strategy/field/random_last_name_spec.rb +14 -0
  70. data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
  71. data/spec/strategy/field/random_phone_number_spec.rb +35 -0
  72. data/spec/strategy/field/random_selection_spec.rb +36 -0
  73. data/spec/strategy/field/random_string_spec.rb +23 -0
  74. data/spec/strategy/field/random_user_name_spec.rb +23 -0
  75. data/spec/strategy/field/string_template_spec.rb +15 -0
  76. data/spec/strategy/field/user_name_template_spec.rb +13 -0
  77. data/spec/strategy/field/whitelist_spec.rb +21 -0
  78. data/spec/support/customer_sample.rb +43 -0
  79. data/spec/utils/database_spec.rb +26 -0
  80. data/spec/utils/random_int_spec.rb +9 -0
  81. data/spec/utils/random_string_spec.rb +8 -0
  82. data/whitelist_dsl.rb +44 -0
  83. metadata +192 -0
@@ -0,0 +1,30 @@
1
+ require "spec_helper"
2
+
3
+ describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
4
+ connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
5
+
6
+ before(:each) do
7
+ CustomerSample.clean
8
+ CustomerSample.create_schema connection_spec
9
+ CustomerSample.insert_record connection_spec, CustomerSample::SAMPLE_DATA
10
+ end
11
+
12
+ it "should anonymize customer table record " do
13
+
14
+ database "Customer" do
15
+ strategy DataAnon::Strategy::Blacklist
16
+ source_db connection_spec
17
+
18
+ table 'customers' do
19
+ primary_key 'cust_id'
20
+ anonymize('email').using DataAnon::Strategy::Field::StringTemplate.new('test+#{row_number}@gmail.com')
21
+ end
22
+ end
23
+
24
+ DataAnon::Utils::SourceDatabase.establish_connection connection_spec
25
+ source = DataAnon::Utils::SourceTable.create 'customers', 'cust_id'
26
+ new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
27
+ new_rec['email'].should == 'test+1@gmail.com'
28
+
29
+ end
30
+ end
@@ -0,0 +1,50 @@
1
+ require "spec_helper"
2
+
3
+ describe "End 2 End RDBMS Whitelist Acceptance Test using SQLite database" do
4
+
5
+ source_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
6
+ dest_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer-dest.sqlite'}
7
+
8
+ before(:each) do
9
+ CustomerSample.clean
10
+ CustomerSample.create_schema source_connection_spec
11
+ CustomerSample.insert_record source_connection_spec, CustomerSample::SAMPLE_DATA
12
+
13
+ CustomerSample.create_schema dest_connection_spec
14
+ end
15
+
16
+ it "should anonymize customer table record " do
17
+
18
+ DF = DataAnon::Strategy::Field
19
+
20
+ database "Customer" do
21
+ strategy DataAnon::Strategy::Whitelist
22
+ source_db source_connection_spec
23
+ destination_db dest_connection_spec
24
+
25
+ table 'customers' do
26
+ primary_key 'cust_id'
27
+ whitelist 'cust_id', 'address', 'zipcode'
28
+ anonymize('first_name').using DF::RandomFirstName.new
29
+ anonymize('last_name').using DF::RandomLastName.new
30
+ anonymize('state').using DF::RandomSelection.new(['Gujrat','Karnataka'])
31
+ anonymize('phone').using DF::RandomPhoneNumber.new
32
+ anonymize('email').using DF::StringTemplate.new('test+#{row_number}@gmail.com')
33
+ end
34
+ end
35
+
36
+ DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
37
+ dest_table = DataAnon::Utils::DestinationTable.create 'customers', 'cust_id'
38
+ new_rec = dest_table.find(CustomerSample::SAMPLE_DATA[:cust_id])
39
+ new_rec.first_name.should_not be("Sunit")
40
+ new_rec.last_name.should_not be("Parekh")
41
+ new_rec.birth_date.should_not be(Date.new(1977,7,8))
42
+ new_rec.address.should == 'F 501 Shanti Nagar'
43
+ ['Gujrat','Karnataka'].should include(new_rec.state)
44
+ new_rec.zipcode.should == '411048'
45
+ new_rec.phone.should_not be "9923700662"
46
+ new_rec.email.should == 'test+1@gmail.com'
47
+
48
+
49
+ end
50
+ end
@@ -0,0 +1,26 @@
1
+ require 'rspec'
2
+ require "pry"
3
+ require 'data-anonymization'
4
+
5
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
6
+
7
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
8
+ DataAnon::Utils::Logging.progress_logger.level = Logger::WARN
9
+
10
+
11
+ RSpec.configure do |config|
12
+ config.expect_with :rspec
13
+ config.mock_with 'rspec-mocks'
14
+
15
+ config.before(:suite) do
16
+ end
17
+
18
+ config.before(:each) do
19
+ end
20
+
21
+ config.after(:suite) do
22
+ end
23
+ end
24
+
25
+
26
+
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::AnonymizeTime do
4
+
5
+ AnonymizeTime = DataAnon::Strategy::Field::AnonymizeTime
6
+ let(:field) { DataAnon::Core::Field.new('date', Time.new(2000,1,1,12,12,12), 1, nil) }
7
+
8
+ describe 'providing true only for month should randomize only the month field' do
9
+
10
+ let(:anonymized_time) { AnonymizeTime.new(true,false,false,false,false).anonymize(field) }
11
+
12
+ it {
13
+ anonymized_time.year.should be 2000
14
+ anonymized_time.day.should be 1
15
+ anonymized_time.hour.should be 12
16
+ anonymized_time.min.should be 12
17
+ anonymized_time.sec.should be 12
18
+
19
+ anonymized_time.month.should be_between(1,12)
20
+ }
21
+ end
22
+
23
+ end
@@ -0,0 +1,43 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::DateTimeDelta do
4
+
5
+ DateTimeDelta = DataAnon::Strategy::Field::DateTimeDelta
6
+ let(:field) { DataAnon::Core::Field.new('date', DateTime.new(2000,1,1), 1, nil) }
7
+
8
+ describe 'datetime should not remain the same' do
9
+
10
+ let(:anonymized_value) { DateTimeDelta.new().anonymize(field) }
11
+ let(:date_difference) {anonymized_value.to_i - field.value.to_i}
12
+
13
+ it {date_difference.should_not be 0 }
14
+ end
15
+
16
+ describe 'datetime should not change when provided with 0 delta for both date and time' do
17
+
18
+ let(:anonymized_value) { DateTimeDelta.new(0,0).anonymize(field) }
19
+ let(:date_difference) {anonymized_value.to_i - field.value.to_i}
20
+
21
+ it {date_difference.should be 0 }
22
+
23
+ end
24
+
25
+ describe 'date should be anonymized within provided delta' do
26
+
27
+ let(:anonymized_value) { DateTimeDelta.new(5,0).anonymize(field) }
28
+ let(:date_difference) {anonymized_value.to_i - field.value.to_i}
29
+
30
+ it { date_difference.should be_between(-5.days, 5.days) }
31
+
32
+ end
33
+
34
+ describe 'time should be anonymized within provided delta' do
35
+
36
+ let(:anonymized_value) { DateTimeDelta.new(0,10).anonymize(field) }
37
+ let(:date_difference) {anonymized_value.to_i - field.value.to_i}
38
+
39
+ it { date_difference.should be_between(-10.minutes, 10.minutes)}
40
+ end
41
+
42
+
43
+ end
@@ -0,0 +1,22 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::DistinctColumnValues do
4
+
5
+ before(:each) do
6
+ source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'}
7
+ DataAnon::Utils::SourceDatabase.establish_connection source
8
+ end
9
+
10
+ DistinctColumnValues = DataAnon::Strategy::Field::DistinctColumnValues
11
+ let(:field) { DataAnon::Core::Field.new('name', 'Abcd', 1, nil) }
12
+
13
+ describe 'more than one values in predefined list' do
14
+
15
+ let(:anonymized_value) { DistinctColumnValues.new('MediaType','Name').anonymize(field) }
16
+
17
+ it { anonymized_value.should_not be('Abcd') }
18
+ it { anonymized_value.should_not be_empty }
19
+
20
+ end
21
+
22
+ end
@@ -0,0 +1,14 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::GmailTemplate do
4
+
5
+ GmailTemplate = DataAnon::Strategy::Field::GmailTemplate
6
+ let(:field) {DataAnon::Core::Field.new('email','user@company.com',456,nil)}
7
+
8
+ describe 'generated email must be compliant with the provided template' do
9
+
10
+ let(:anonymized_email) {GmailTemplate.new("fake@gmail.com").anonymize(field)}
11
+
12
+ it {anonymized_email.should eq('fake+456@gmail.com')}
13
+ end
14
+ end
@@ -0,0 +1,27 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::LoremIpsum do
4
+
5
+ LoremIpsum = DataAnon::Strategy::Field::LoremIpsum
6
+ let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7
+
8
+ describe 'should return same length value using default text' do
9
+
10
+ let(:anonymized_value) { LoremIpsum.new.anonymize(field) }
11
+
12
+ it { anonymized_value.length.should_not be('New Delhi') }
13
+ it { anonymized_value.length.should == 'New Delhi'.length }
14
+
15
+ end
16
+
17
+ describe 'should return same length value using set text' do
18
+
19
+ let(:anonymized_value) { LoremIpsum.new("Sunit Parekh").anonymize(field) }
20
+
21
+ it { anonymized_value.length.should_not be('New Delhi') }
22
+ it { anonymized_value.should == 'Sunit Par' }
23
+
24
+ end
25
+
26
+
27
+ end
@@ -0,0 +1,16 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomBoolean do
4
+
5
+ RandomBoolean = DataAnon::Strategy::Field::RandomBoolean
6
+ let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)}
7
+
8
+ describe 'anonymized value should be a boolean' do
9
+ let(:anonymized_boolean) {RandomBoolean.new.anonymize(field)}
10
+
11
+ it {
12
+ is_boolean = anonymized_boolean.is_a?(TrueClass) || anonymized_boolean.is_a?(FalseClass)
13
+ is_boolean.should be true
14
+ }
15
+ end
16
+ end
@@ -0,0 +1,18 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomEmail do
4
+
5
+ RandomEmail = DataAnon::Strategy::Field::RandomEmail
6
+ let(:field) {DataAnon::Core::Field.new('email','real@email.com',1,nil)}
7
+
8
+
9
+ describe 'anonymized email must be different from original email' do
10
+
11
+ let(:anonymized_value) {RandomEmail.new.anonymize(field)}
12
+
13
+ it {anonymized_value.should_not equal field.value}
14
+ it {anonymized_value.should match '^\S+@\S+\.\S+$'}
15
+
16
+ end
17
+
18
+ end
@@ -0,0 +1,14 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomFirstName do
4
+
5
+ RandomFirstName = DataAnon::Strategy::Field::RandomFirstName
6
+ let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)}
7
+
8
+ describe 'anonymized name must not be the same as provided name' do
9
+ let(:anonymized_value) {RandomFirstName.new().anonymize(field.value)}
10
+
11
+ it {anonymized_value.should_not equal field.value}
12
+ end
13
+
14
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomFloatDelta do
4
+
5
+ RandomFloatDelta = DataAnon::Strategy::Field::RandomFloatDelta
6
+ let(:field) {DataAnon::Core::Field.new('float_field',5.5,1,nil)}
7
+
8
+ describe 'anonymized float should not be the same as original value' do
9
+ let(:anonymized_float) {RandomFloatDelta.new(5).anonymize(field)}
10
+
11
+ it {anonymized_float.should_not equal field.value}
12
+ end
13
+
14
+ describe 'anonymized value returned should be a float' do
15
+ let(:anonymized_float) {RandomFloatDelta.new(5).anonymize(field)}
16
+
17
+ it { is_float = anonymized_float.is_a? Float
18
+ is_float.should be true
19
+ }
20
+ end
21
+ end
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomFullName do
4
+
5
+ RandomFullName = DataAnon::Strategy::Field::RandomFullName
6
+ let(:field) {DataAnon::Core::Field.new('name','Fake User',1,nil)}
7
+
8
+ describe 'anonymized name should be the same as original' do
9
+
10
+ let(:anonymized_name) {RandomFullName.new().anonymize(field)}
11
+
12
+ it {anonymized_name.should_not equal field.value}
13
+ end
14
+
15
+ describe 'anonymized name should have same number of words as original' do
16
+
17
+ let(:field) {DataAnon::Core::Field.new('name','Fake User Longer Name Test',1,nil)}
18
+ let(:anonymized_name) {RandomFullName.new().anonymize(field)}
19
+
20
+ it {anonymized_name.split(' ').size.should equal field.value.split(' ').size}
21
+ end
22
+
23
+ end
@@ -0,0 +1,28 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomInt do
4
+
5
+ RandomInt = DataAnon::Strategy::Field::RandomInt
6
+ let(:field) { DataAnon::Core::Field.new('age', 25, 1, nil) }
7
+
8
+ describe 'verify age range between 18 and 70' do
9
+
10
+ let(:anonymized_int) { RandomInt.new(18, 70).anonymize(field) }
11
+
12
+ it { anonymized_int.should >= 18 }
13
+ it { anonymized_int.should <= 70 }
14
+
15
+ end
16
+
17
+ describe 'default range between 0 and 100' do
18
+
19
+ let(:anonymized_int) { RandomInt.new.anonymize(field) }
20
+
21
+ it { anonymized_int.should >= 0 }
22
+ it { anonymized_int.should <= 100 }
23
+
24
+ end
25
+
26
+
27
+
28
+ end
@@ -0,0 +1,23 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomIntegerDelta do
4
+
5
+ RandomIntegerDelta = DataAnon::Strategy::Field::RandomIntegerDelta
6
+ let(:field) {DataAnon::Core::Field.new('integer_field',100,1,nil)}
7
+
8
+ describe "anonymized value returned should be an integer" do
9
+ let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
10
+
11
+ it { is_integer = anonymized_integer.is_a? Integer
12
+ is_integer.should be true
13
+ }
14
+
15
+ end
16
+
17
+ describe "anonymized integer should be within delta from original integer" do
18
+ let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
19
+
20
+ it{anonymized_integer.should be_between(90,110)}
21
+ end
22
+
23
+ end
@@ -0,0 +1,14 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomLastName do
4
+
5
+ RandomLastName = DataAnon::Strategy::Field::RandomLastName
6
+ let(:field) {DataAnon::Core::Field.new('lastname','fakeLastName',1,nil)}
7
+
8
+ describe 'anonymized name must not be the same as provided name' do
9
+ let(:anonymized_value) {RandomLastName.new().anonymize(field.value)}
10
+
11
+ it {anonymized_value.should_not equal field.value}
12
+ end
13
+
14
+ end
@@ -0,0 +1,21 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomMailinatorEmail do
4
+
5
+ RandomMailinatorEmail = DataAnon::Strategy::Field::RandomMailinatorEmail
6
+ let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
7
+
8
+ describe 'anonymized email should not be the same as original email' do
9
+
10
+ let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
11
+
12
+ it {anonymized_email.should_not equal field.value}
13
+ end
14
+
15
+ describe 'anonymized email should be a mailinator email address' do
16
+
17
+ let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
18
+
19
+ it {anonymized_email.should match '^\S+@\mailinator\.com$'}
20
+ end
21
+ end
@@ -0,0 +1,35 @@
1
+ require "spec_helper"
2
+
3
+ describe DataAnon::Strategy::Field::RandomPhoneNumber do
4
+
5
+ RandomPhoneNumber = DataAnon::Strategy::Field::RandomPhoneNumber
6
+ let(:field) {DataAnon::Core::Field.new('phone_number',"+0 (123) 456-7890",1,nil)}
7
+
8
+ describe 'anonymized phone number should not be the same as original phone number' do
9
+ let(:anonymized_number) {RandomPhoneNumber.new().anonymize(field)}
10
+
11
+ it {anonymized_number.should_not equal field.value}
12
+ end
13
+
14
+ describe 'anonymized phone number should be the same formatting as original phone number' do
15
+
16
+ it {
17
+ anonymized_number = RandomPhoneNumber.new().anonymize(field)
18
+ original_number = field.value
19
+ counter = 0
20
+ @number_similarity = true
21
+ anonymized_number.each_char do |char|
22
+ original_number_char = original_number[counter]
23
+ if /\d/.match(original_number_char).nil?
24
+ if !char.eql? original_number_char
25
+ @number_similarity = false
26
+ break
27
+ end
28
+ end
29
+
30
+ @number_similarity.should be true
31
+ end
32
+ }
33
+ end
34
+
35
+ end