data-anonymization 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. data/.documentup.json +1 -0
  2. data/.travis.yml +0 -1
  3. data/README.md +277 -52
  4. data/blacklist_dsl.rb +1 -3
  5. data/data-anonymization.gemspec +4 -0
  6. data/lib/core/dsl.rb +1 -1
  7. data/lib/data-anonymization.rb +3 -0
  8. data/lib/strategy/base.rb +21 -11
  9. data/lib/strategy/blacklist.rb +2 -1
  10. data/lib/strategy/field/contact/geojson_base.rb +24 -0
  11. data/lib/strategy/field/contact/random_address.rb +17 -0
  12. data/lib/strategy/field/contact/random_city.rb +17 -0
  13. data/lib/strategy/field/contact/random_phone_number.rb +13 -0
  14. data/lib/strategy/field/contact/random_province.rb +17 -0
  15. data/lib/strategy/field/contact/random_zipcode.rb +17 -0
  16. data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
  17. data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
  18. data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
  19. data/lib/strategy/field/datetime/date_delta.rb +21 -0
  20. data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
  21. data/lib/strategy/field/datetime/time_delta.rb +12 -0
  22. data/lib/strategy/field/default_anon.rb +12 -7
  23. data/lib/strategy/field/email/gmail_template.rb +16 -0
  24. data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
  25. data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
  26. data/lib/strategy/field/fields.rb +51 -20
  27. data/lib/strategy/field/name/random_first_name.rb +14 -0
  28. data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
  29. data/lib/strategy/field/name/random_last_name.rb +14 -0
  30. data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
  31. data/lib/strategy/field/number/random_float.rb +23 -0
  32. data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
  33. data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
  34. data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
  35. data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
  36. data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
  37. data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
  38. data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
  39. data/lib/strategy/field/string/select_from_file.rb +18 -0
  40. data/lib/strategy/field/string/select_from_list.rb +17 -0
  41. data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
  42. data/lib/strategy/whitelist.rb +4 -2
  43. data/lib/utils/database.rb +8 -6
  44. data/lib/utils/geojson_parser.rb +42 -0
  45. data/lib/utils/logging.rb +0 -9
  46. data/lib/utils/progress_bar.rb +29 -0
  47. data/lib/utils/random_float.rb +12 -0
  48. data/lib/utils/random_int.rb +3 -7
  49. data/lib/utils/resource.rb +4 -0
  50. data/lib/version.rb +1 -1
  51. data/resources/UK_addresses.geojson +300 -0
  52. data/resources/US_addresses.geojson +300 -0
  53. data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
  54. data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
  55. data/spec/resource/sample.geojson +1 -0
  56. data/spec/spec_helper.rb +3 -2
  57. data/spec/strategy/field/contact/random_address_spec.rb +12 -0
  58. data/spec/strategy/field/contact/random_city_spec.rb +14 -0
  59. data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
  60. data/spec/strategy/field/contact/random_province_spec.rb +14 -0
  61. data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
  62. data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
  63. data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
  64. data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
  65. data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
  66. data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
  67. data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
  68. data/spec/strategy/field/default_anon_spec.rb +42 -0
  69. data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
  70. data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
  71. data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
  72. data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
  73. data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
  74. data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
  75. data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
  76. data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
  77. data/spec/strategy/field/number/random_float_spec.rb +28 -0
  78. data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
  79. data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
  80. data/spec/strategy/field/random_boolean_spec.rb +2 -2
  81. data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
  82. data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
  83. data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
  84. data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
  85. data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
  86. data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
  87. data/spec/strategy/field/whitelist_spec.rb +2 -2
  88. data/spec/support/customer_sample.rb +1 -1
  89. data/spec/utils/database_spec.rb +2 -2
  90. data/spec/utils/geojson_parser_spec.rb +38 -0
  91. data/whitelist_dsl.rb +4 -6
  92. metadata +163 -59
  93. data/lib/strategy/field/anonymize_time.rb +0 -57
  94. data/lib/strategy/field/gmail_template.rb +0 -17
  95. data/lib/strategy/field/random_first_name.rb +0 -18
  96. data/lib/strategy/field/random_last_name.rb +0 -19
  97. data/lib/strategy/field/random_selection.rb +0 -23
  98. data/lib/strategy/field/user_name_template.rb +0 -22
  99. data/spec/strategy/field/anonymize_time_spec.rb +0 -23
  100. data/spec/strategy/field/gmail_template_spec.rb +0 -14
  101. data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
  102. data/spec/strategy/field/random_phone_number_spec.rb +0 -35
  103. data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -1,57 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
- class AnonymizeTime
6
-
7
- DEFAULT_ANONYMIZATION = true
8
-
9
- def self.only_month
10
- self.new true, false, false, false, false
11
- end
12
-
13
- def self.only_day
14
- self.new false, true, false, false, false
15
- end
16
-
17
- def self.only_hour
18
- self.new false, false, true, false, false
19
- end
20
-
21
- def self.only_minute
22
- self.new false, false, false, true, false
23
- end
24
-
25
- def initialize anonymize_month = DEFAULT_ANONYMIZATION, anonymize_day = DEFAULT_ANONYMIZATION, anonymize_hour = DEFAULT_ANONYMIZATION, anonymize_min = DEFAULT_ANONYMIZATION, anonymize_sec = DEFAULT_ANONYMIZATION
26
-
27
- @anonymize_month = anonymize_month
28
- @anonymize_day = anonymize_day
29
- @anonymize_hour = anonymize_hour
30
- @anonymize_min = anonymize_min
31
- @anonymize_sec = anonymize_sec
32
-
33
- end
34
-
35
- def anonymize field
36
-
37
- provided_time = field.value
38
- year = provided_time.year
39
- month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : provided_time.month
40
- day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,31) : provided_time.day
41
- hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(1,24) : provided_time.hour
42
- min = @anonymize_min? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.min
43
- sec = @anonymize_sec? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.sec
44
-
45
- create_object(day, hour, min, month, sec, year)
46
- end
47
-
48
- private
49
-
50
- def create_object(day, hour, min, month, sec, year)
51
- Time.new(year, month, day, hour, min, sec)
52
- end
53
-
54
- end
55
- end
56
- end
57
- end
@@ -1,17 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
- class GmailTemplate
5
-
6
- def initialize gmail_address = nil
7
- @gmail_address = gmail_address
8
- end
9
-
10
- def anonymize field
11
- username = @gmail_address[0,@gmail_address.index('@')]
12
- "#{username}+#{field.row_number}@gmail.com"
13
- end
14
- end
15
- end
16
- end
17
- end
@@ -1,18 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
- class RandomFirstName
6
-
7
- def initialize file_path = nil
8
- file = file_path || DataAnon::Utils::Resource.file('first_names.txt')
9
- @names = File.read(file).split
10
- end
11
-
12
- def anonymize field
13
- return @names[rand(@names.size)]
14
- end
15
- end
16
- end
17
- end
18
- end
@@ -1,19 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
- class RandomLastName
6
-
7
- def initialize file_path = nil
8
- file = file_path || DataAnon::Utils::Resource.file('last_names.txt')
9
- @names = File.read(file).split
10
- end
11
-
12
- def anonymize field
13
- return @names[rand(@names.size)]
14
- end
15
-
16
- end
17
- end
18
- end
19
- end
@@ -1,23 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
-
6
- class RandomSelection
7
-
8
- def initialize values
9
- @values = values.class == Array ? values : [values]
10
-
11
- end
12
-
13
- def anonymize field
14
- return @values[0] if @values.length == 1
15
- @values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
16
- end
17
-
18
- end
19
-
20
-
21
- end
22
- end
23
- end
@@ -1,22 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
-
6
- class UserNameTemplate
7
-
8
- def initialize template
9
- @template = template
10
- end
11
-
12
- def anonymize field
13
- context = field.instance_eval { binding }
14
- eval ('"' + @template + '"'), context
15
- end
16
-
17
- end
18
-
19
-
20
- end
21
- end
22
- end
@@ -1,23 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::AnonymizeTime do
4
-
5
- AnonymizeTime = DataAnon::Strategy::Field::AnonymizeTime
6
- let(:field) { DataAnon::Core::Field.new('date', Time.new(2000,1,1,12,12,12), 1, nil) }
7
-
8
- describe 'providing true only for month should randomize only the month field' do
9
-
10
- let(:anonymized_time) { AnonymizeTime.new(true,false,false,false,false).anonymize(field) }
11
-
12
- it {
13
- anonymized_time.year.should be 2000
14
- anonymized_time.day.should be 1
15
- anonymized_time.hour.should be 12
16
- anonymized_time.min.should be 12
17
- anonymized_time.sec.should be 12
18
-
19
- anonymized_time.month.should be_between(1,12)
20
- }
21
- end
22
-
23
- end
@@ -1,14 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::GmailTemplate do
4
-
5
- GmailTemplate = DataAnon::Strategy::Field::GmailTemplate
6
- let(:field) {DataAnon::Core::Field.new('email','user@company.com',456,nil)}
7
-
8
- describe 'generated email must be compliant with the provided template' do
9
-
10
- let(:anonymized_email) {GmailTemplate.new("fake@gmail.com").anonymize(field)}
11
-
12
- it {anonymized_email.should eq('fake+456@gmail.com')}
13
- end
14
- end
@@ -1,21 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::RandomMailinatorEmail do
4
-
5
- RandomMailinatorEmail = DataAnon::Strategy::Field::RandomMailinatorEmail
6
- let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
7
-
8
- describe 'anonymized email should not be the same as original email' do
9
-
10
- let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
11
-
12
- it {anonymized_email.should_not equal field.value}
13
- end
14
-
15
- describe 'anonymized email should be a mailinator email address' do
16
-
17
- let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
18
-
19
- it {anonymized_email.should match '^\S+@\mailinator\.com$'}
20
- end
21
- end
@@ -1,35 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::RandomPhoneNumber do
4
-
5
- RandomPhoneNumber = DataAnon::Strategy::Field::RandomPhoneNumber
6
- let(:field) {DataAnon::Core::Field.new('phone_number',"+0 (123) 456-7890",1,nil)}
7
-
8
- describe 'anonymized phone number should not be the same as original phone number' do
9
- let(:anonymized_number) {RandomPhoneNumber.new().anonymize(field)}
10
-
11
- it {anonymized_number.should_not equal field.value}
12
- end
13
-
14
- describe 'anonymized phone number should be the same formatting as original phone number' do
15
-
16
- it {
17
- anonymized_number = RandomPhoneNumber.new().anonymize(field)
18
- original_number = field.value
19
- counter = 0
20
- @number_similarity = true
21
- anonymized_number.each_char do |char|
22
- original_number_char = original_number[counter]
23
- if /\d/.match(original_number_char).nil?
24
- if !char.eql? original_number_char
25
- @number_similarity = false
26
- break
27
- end
28
- end
29
-
30
- @number_similarity.should be true
31
- end
32
- }
33
- end
34
-
35
- end
@@ -1,13 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::UserNameTemplate do
4
-
5
- UserNameTemplate = DataAnon::Strategy::Field::UserNameTemplate
6
- let(:field) { DataAnon::Core::Field.new('username', 'Chuck Norris', 100, nil) }
7
-
8
- describe 'should return same string value as StringTemplate' do
9
- let(:anonymized_username) { UserNameTemplate.new('Rajnikanth #{row_number}').anonymize(field) }
10
- it { anonymized_username.should == 'Rajnikanth 100' }
11
- end
12
-
13
- end