data-anonymization 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. data/.documentup.json +1 -0
  2. data/.travis.yml +0 -1
  3. data/README.md +277 -52
  4. data/blacklist_dsl.rb +1 -3
  5. data/data-anonymization.gemspec +4 -0
  6. data/lib/core/dsl.rb +1 -1
  7. data/lib/data-anonymization.rb +3 -0
  8. data/lib/strategy/base.rb +21 -11
  9. data/lib/strategy/blacklist.rb +2 -1
  10. data/lib/strategy/field/contact/geojson_base.rb +24 -0
  11. data/lib/strategy/field/contact/random_address.rb +17 -0
  12. data/lib/strategy/field/contact/random_city.rb +17 -0
  13. data/lib/strategy/field/contact/random_phone_number.rb +13 -0
  14. data/lib/strategy/field/contact/random_province.rb +17 -0
  15. data/lib/strategy/field/contact/random_zipcode.rb +17 -0
  16. data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
  17. data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
  18. data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
  19. data/lib/strategy/field/datetime/date_delta.rb +21 -0
  20. data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
  21. data/lib/strategy/field/datetime/time_delta.rb +12 -0
  22. data/lib/strategy/field/default_anon.rb +12 -7
  23. data/lib/strategy/field/email/gmail_template.rb +16 -0
  24. data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
  25. data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
  26. data/lib/strategy/field/fields.rb +51 -20
  27. data/lib/strategy/field/name/random_first_name.rb +14 -0
  28. data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
  29. data/lib/strategy/field/name/random_last_name.rb +14 -0
  30. data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
  31. data/lib/strategy/field/number/random_float.rb +23 -0
  32. data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
  33. data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
  34. data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
  35. data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
  36. data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
  37. data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
  38. data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
  39. data/lib/strategy/field/string/select_from_file.rb +18 -0
  40. data/lib/strategy/field/string/select_from_list.rb +17 -0
  41. data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
  42. data/lib/strategy/whitelist.rb +4 -2
  43. data/lib/utils/database.rb +8 -6
  44. data/lib/utils/geojson_parser.rb +42 -0
  45. data/lib/utils/logging.rb +0 -9
  46. data/lib/utils/progress_bar.rb +29 -0
  47. data/lib/utils/random_float.rb +12 -0
  48. data/lib/utils/random_int.rb +3 -7
  49. data/lib/utils/resource.rb +4 -0
  50. data/lib/version.rb +1 -1
  51. data/resources/UK_addresses.geojson +300 -0
  52. data/resources/US_addresses.geojson +300 -0
  53. data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
  54. data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
  55. data/spec/resource/sample.geojson +1 -0
  56. data/spec/spec_helper.rb +3 -2
  57. data/spec/strategy/field/contact/random_address_spec.rb +12 -0
  58. data/spec/strategy/field/contact/random_city_spec.rb +14 -0
  59. data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
  60. data/spec/strategy/field/contact/random_province_spec.rb +14 -0
  61. data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
  62. data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
  63. data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
  64. data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
  65. data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
  66. data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
  67. data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
  68. data/spec/strategy/field/default_anon_spec.rb +42 -0
  69. data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
  70. data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
  71. data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
  72. data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
  73. data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
  74. data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
  75. data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
  76. data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
  77. data/spec/strategy/field/number/random_float_spec.rb +28 -0
  78. data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
  79. data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
  80. data/spec/strategy/field/random_boolean_spec.rb +2 -2
  81. data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
  82. data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
  83. data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
  84. data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
  85. data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
  86. data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
  87. data/spec/strategy/field/whitelist_spec.rb +2 -2
  88. data/spec/support/customer_sample.rb +1 -1
  89. data/spec/utils/database_spec.rb +2 -2
  90. data/spec/utils/geojson_parser_spec.rb +38 -0
  91. data/whitelist_dsl.rb +4 -6
  92. metadata +163 -59
  93. data/lib/strategy/field/anonymize_time.rb +0 -57
  94. data/lib/strategy/field/gmail_template.rb +0 -17
  95. data/lib/strategy/field/random_first_name.rb +0 -18
  96. data/lib/strategy/field/random_last_name.rb +0 -19
  97. data/lib/strategy/field/random_selection.rb +0 -23
  98. data/lib/strategy/field/user_name_template.rb +0 -22
  99. data/spec/strategy/field/anonymize_time_spec.rb +0 -23
  100. data/spec/strategy/field/gmail_template_spec.rb +0 -14
  101. data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
  102. data/spec/strategy/field/random_phone_number_spec.rb +0 -35
  103. data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -1,57 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
- class AnonymizeTime
6
-
7
- DEFAULT_ANONYMIZATION = true
8
-
9
- def self.only_month
10
- self.new true, false, false, false, false
11
- end
12
-
13
- def self.only_day
14
- self.new false, true, false, false, false
15
- end
16
-
17
- def self.only_hour
18
- self.new false, false, true, false, false
19
- end
20
-
21
- def self.only_minute
22
- self.new false, false, false, true, false
23
- end
24
-
25
- def initialize anonymize_month = DEFAULT_ANONYMIZATION, anonymize_day = DEFAULT_ANONYMIZATION, anonymize_hour = DEFAULT_ANONYMIZATION, anonymize_min = DEFAULT_ANONYMIZATION, anonymize_sec = DEFAULT_ANONYMIZATION
26
-
27
- @anonymize_month = anonymize_month
28
- @anonymize_day = anonymize_day
29
- @anonymize_hour = anonymize_hour
30
- @anonymize_min = anonymize_min
31
- @anonymize_sec = anonymize_sec
32
-
33
- end
34
-
35
- def anonymize field
36
-
37
- provided_time = field.value
38
- year = provided_time.year
39
- month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : provided_time.month
40
- day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,31) : provided_time.day
41
- hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(1,24) : provided_time.hour
42
- min = @anonymize_min? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.min
43
- sec = @anonymize_sec? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.sec
44
-
45
- create_object(day, hour, min, month, sec, year)
46
- end
47
-
48
- private
49
-
50
- def create_object(day, hour, min, month, sec, year)
51
- Time.new(year, month, day, hour, min, sec)
52
- end
53
-
54
- end
55
- end
56
- end
57
- end
@@ -1,17 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
- class GmailTemplate
5
-
6
- def initialize gmail_address = nil
7
- @gmail_address = gmail_address
8
- end
9
-
10
- def anonymize field
11
- username = @gmail_address[0,@gmail_address.index('@')]
12
- "#{username}+#{field.row_number}@gmail.com"
13
- end
14
- end
15
- end
16
- end
17
- end
@@ -1,18 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
- class RandomFirstName
6
-
7
- def initialize file_path = nil
8
- file = file_path || DataAnon::Utils::Resource.file('first_names.txt')
9
- @names = File.read(file).split
10
- end
11
-
12
- def anonymize field
13
- return @names[rand(@names.size)]
14
- end
15
- end
16
- end
17
- end
18
- end
@@ -1,19 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
- class RandomLastName
6
-
7
- def initialize file_path = nil
8
- file = file_path || DataAnon::Utils::Resource.file('last_names.txt')
9
- @names = File.read(file).split
10
- end
11
-
12
- def anonymize field
13
- return @names[rand(@names.size)]
14
- end
15
-
16
- end
17
- end
18
- end
19
- end
@@ -1,23 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
-
6
- class RandomSelection
7
-
8
- def initialize values
9
- @values = values.class == Array ? values : [values]
10
-
11
- end
12
-
13
- def anonymize field
14
- return @values[0] if @values.length == 1
15
- @values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
16
- end
17
-
18
- end
19
-
20
-
21
- end
22
- end
23
- end
@@ -1,22 +0,0 @@
1
- module DataAnon
2
- module Strategy
3
- module Field
4
-
5
-
6
- class UserNameTemplate
7
-
8
- def initialize template
9
- @template = template
10
- end
11
-
12
- def anonymize field
13
- context = field.instance_eval { binding }
14
- eval ('"' + @template + '"'), context
15
- end
16
-
17
- end
18
-
19
-
20
- end
21
- end
22
- end
@@ -1,23 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::AnonymizeTime do
4
-
5
- AnonymizeTime = DataAnon::Strategy::Field::AnonymizeTime
6
- let(:field) { DataAnon::Core::Field.new('date', Time.new(2000,1,1,12,12,12), 1, nil) }
7
-
8
- describe 'providing true only for month should randomize only the month field' do
9
-
10
- let(:anonymized_time) { AnonymizeTime.new(true,false,false,false,false).anonymize(field) }
11
-
12
- it {
13
- anonymized_time.year.should be 2000
14
- anonymized_time.day.should be 1
15
- anonymized_time.hour.should be 12
16
- anonymized_time.min.should be 12
17
- anonymized_time.sec.should be 12
18
-
19
- anonymized_time.month.should be_between(1,12)
20
- }
21
- end
22
-
23
- end
@@ -1,14 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::GmailTemplate do
4
-
5
- GmailTemplate = DataAnon::Strategy::Field::GmailTemplate
6
- let(:field) {DataAnon::Core::Field.new('email','user@company.com',456,nil)}
7
-
8
- describe 'generated email must be compliant with the provided template' do
9
-
10
- let(:anonymized_email) {GmailTemplate.new("fake@gmail.com").anonymize(field)}
11
-
12
- it {anonymized_email.should eq('fake+456@gmail.com')}
13
- end
14
- end
@@ -1,21 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::RandomMailinatorEmail do
4
-
5
- RandomMailinatorEmail = DataAnon::Strategy::Field::RandomMailinatorEmail
6
- let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
7
-
8
- describe 'anonymized email should not be the same as original email' do
9
-
10
- let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
11
-
12
- it {anonymized_email.should_not equal field.value}
13
- end
14
-
15
- describe 'anonymized email should be a mailinator email address' do
16
-
17
- let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
18
-
19
- it {anonymized_email.should match '^\S+@\mailinator\.com$'}
20
- end
21
- end
@@ -1,35 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::RandomPhoneNumber do
4
-
5
- RandomPhoneNumber = DataAnon::Strategy::Field::RandomPhoneNumber
6
- let(:field) {DataAnon::Core::Field.new('phone_number',"+0 (123) 456-7890",1,nil)}
7
-
8
- describe 'anonymized phone number should not be the same as original phone number' do
9
- let(:anonymized_number) {RandomPhoneNumber.new().anonymize(field)}
10
-
11
- it {anonymized_number.should_not equal field.value}
12
- end
13
-
14
- describe 'anonymized phone number should be the same formatting as original phone number' do
15
-
16
- it {
17
- anonymized_number = RandomPhoneNumber.new().anonymize(field)
18
- original_number = field.value
19
- counter = 0
20
- @number_similarity = true
21
- anonymized_number.each_char do |char|
22
- original_number_char = original_number[counter]
23
- if /\d/.match(original_number_char).nil?
24
- if !char.eql? original_number_char
25
- @number_similarity = false
26
- break
27
- end
28
- end
29
-
30
- @number_similarity.should be true
31
- end
32
- }
33
- end
34
-
35
- end
@@ -1,13 +0,0 @@
1
- require "spec_helper"
2
-
3
- describe DataAnon::Strategy::Field::UserNameTemplate do
4
-
5
- UserNameTemplate = DataAnon::Strategy::Field::UserNameTemplate
6
- let(:field) { DataAnon::Core::Field.new('username', 'Chuck Norris', 100, nil) }
7
-
8
- describe 'should return same string value as StringTemplate' do
9
- let(:anonymized_username) { UserNameTemplate.new('Rajnikanth #{row_number}').anonymize(field) }
10
- it { anonymized_username.should == 'Rajnikanth 100' }
11
- end
12
-
13
- end