data-anonymization 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.documentup.json +1 -0
- data/.travis.yml +0 -1
- data/README.md +277 -52
- data/blacklist_dsl.rb +1 -3
- data/data-anonymization.gemspec +4 -0
- data/lib/core/dsl.rb +1 -1
- data/lib/data-anonymization.rb +3 -0
- data/lib/strategy/base.rb +21 -11
- data/lib/strategy/blacklist.rb +2 -1
- data/lib/strategy/field/contact/geojson_base.rb +24 -0
- data/lib/strategy/field/contact/random_address.rb +17 -0
- data/lib/strategy/field/contact/random_city.rb +17 -0
- data/lib/strategy/field/contact/random_phone_number.rb +13 -0
- data/lib/strategy/field/contact/random_province.rb +17 -0
- data/lib/strategy/field/contact/random_zipcode.rb +17 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
- data/lib/strategy/field/datetime/date_delta.rb +21 -0
- data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
- data/lib/strategy/field/datetime/time_delta.rb +12 -0
- data/lib/strategy/field/default_anon.rb +12 -7
- data/lib/strategy/field/email/gmail_template.rb +16 -0
- data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
- data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
- data/lib/strategy/field/fields.rb +51 -20
- data/lib/strategy/field/name/random_first_name.rb +14 -0
- data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
- data/lib/strategy/field/name/random_last_name.rb +14 -0
- data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
- data/lib/strategy/field/number/random_float.rb +23 -0
- data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
- data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
- data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
- data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
- data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
- data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
- data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
- data/lib/strategy/field/string/select_from_file.rb +18 -0
- data/lib/strategy/field/string/select_from_list.rb +17 -0
- data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
- data/lib/strategy/whitelist.rb +4 -2
- data/lib/utils/database.rb +8 -6
- data/lib/utils/geojson_parser.rb +42 -0
- data/lib/utils/logging.rb +0 -9
- data/lib/utils/progress_bar.rb +29 -0
- data/lib/utils/random_float.rb +12 -0
- data/lib/utils/random_int.rb +3 -7
- data/lib/utils/resource.rb +4 -0
- data/lib/version.rb +1 -1
- data/resources/UK_addresses.geojson +300 -0
- data/resources/US_addresses.geojson +300 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
- data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
- data/spec/resource/sample.geojson +1 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/strategy/field/contact/random_address_spec.rb +12 -0
- data/spec/strategy/field/contact/random_city_spec.rb +14 -0
- data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
- data/spec/strategy/field/contact/random_province_spec.rb +14 -0
- data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
- data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
- data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
- data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
- data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
- data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
- data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
- data/spec/strategy/field/default_anon_spec.rb +42 -0
- data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
- data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
- data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
- data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
- data/spec/strategy/field/number/random_float_spec.rb +28 -0
- data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
- data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
- data/spec/strategy/field/random_boolean_spec.rb +2 -2
- data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
- data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
- data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
- data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
- data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
- data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
- data/spec/strategy/field/whitelist_spec.rb +2 -2
- data/spec/support/customer_sample.rb +1 -1
- data/spec/utils/database_spec.rb +2 -2
- data/spec/utils/geojson_parser_spec.rb +38 -0
- data/whitelist_dsl.rb +4 -6
- metadata +163 -59
- data/lib/strategy/field/anonymize_time.rb +0 -57
- data/lib/strategy/field/gmail_template.rb +0 -17
- data/lib/strategy/field/random_first_name.rb +0 -18
- data/lib/strategy/field/random_last_name.rb +0 -19
- data/lib/strategy/field/random_selection.rb +0 -23
- data/lib/strategy/field/user_name_template.rb +0 -22
- data/spec/strategy/field/anonymize_time_spec.rb +0 -23
- data/spec/strategy/field/gmail_template_spec.rb +0 -14
- data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
- data/spec/strategy/field/random_phone_number_spec.rb +0 -35
- data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -1,57 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
class AnonymizeTime
|
6
|
-
|
7
|
-
DEFAULT_ANONYMIZATION = true
|
8
|
-
|
9
|
-
def self.only_month
|
10
|
-
self.new true, false, false, false, false
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.only_day
|
14
|
-
self.new false, true, false, false, false
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.only_hour
|
18
|
-
self.new false, false, true, false, false
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.only_minute
|
22
|
-
self.new false, false, false, true, false
|
23
|
-
end
|
24
|
-
|
25
|
-
def initialize anonymize_month = DEFAULT_ANONYMIZATION, anonymize_day = DEFAULT_ANONYMIZATION, anonymize_hour = DEFAULT_ANONYMIZATION, anonymize_min = DEFAULT_ANONYMIZATION, anonymize_sec = DEFAULT_ANONYMIZATION
|
26
|
-
|
27
|
-
@anonymize_month = anonymize_month
|
28
|
-
@anonymize_day = anonymize_day
|
29
|
-
@anonymize_hour = anonymize_hour
|
30
|
-
@anonymize_min = anonymize_min
|
31
|
-
@anonymize_sec = anonymize_sec
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
def anonymize field
|
36
|
-
|
37
|
-
provided_time = field.value
|
38
|
-
year = provided_time.year
|
39
|
-
month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : provided_time.month
|
40
|
-
day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,31) : provided_time.day
|
41
|
-
hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(1,24) : provided_time.hour
|
42
|
-
min = @anonymize_min? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.min
|
43
|
-
sec = @anonymize_sec? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.sec
|
44
|
-
|
45
|
-
create_object(day, hour, min, month, sec, year)
|
46
|
-
end
|
47
|
-
|
48
|
-
private
|
49
|
-
|
50
|
-
def create_object(day, hour, min, month, sec, year)
|
51
|
-
Time.new(year, month, day, hour, min, sec)
|
52
|
-
end
|
53
|
-
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
class GmailTemplate
|
5
|
-
|
6
|
-
def initialize gmail_address = nil
|
7
|
-
@gmail_address = gmail_address
|
8
|
-
end
|
9
|
-
|
10
|
-
def anonymize field
|
11
|
-
username = @gmail_address[0,@gmail_address.index('@')]
|
12
|
-
"#{username}+#{field.row_number}@gmail.com"
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
class RandomFirstName
|
6
|
-
|
7
|
-
def initialize file_path = nil
|
8
|
-
file = file_path || DataAnon::Utils::Resource.file('first_names.txt')
|
9
|
-
@names = File.read(file).split
|
10
|
-
end
|
11
|
-
|
12
|
-
def anonymize field
|
13
|
-
return @names[rand(@names.size)]
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
class RandomLastName
|
6
|
-
|
7
|
-
def initialize file_path = nil
|
8
|
-
file = file_path || DataAnon::Utils::Resource.file('last_names.txt')
|
9
|
-
@names = File.read(file).split
|
10
|
-
end
|
11
|
-
|
12
|
-
def anonymize field
|
13
|
-
return @names[rand(@names.size)]
|
14
|
-
end
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
|
6
|
-
class RandomSelection
|
7
|
-
|
8
|
-
def initialize values
|
9
|
-
@values = values.class == Array ? values : [values]
|
10
|
-
|
11
|
-
end
|
12
|
-
|
13
|
-
def anonymize field
|
14
|
-
return @values[0] if @values.length == 1
|
15
|
-
@values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
|
6
|
-
class UserNameTemplate
|
7
|
-
|
8
|
-
def initialize template
|
9
|
-
@template = template
|
10
|
-
end
|
11
|
-
|
12
|
-
def anonymize field
|
13
|
-
context = field.instance_eval { binding }
|
14
|
-
eval ('"' + @template + '"'), context
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::AnonymizeTime do
|
4
|
-
|
5
|
-
AnonymizeTime = DataAnon::Strategy::Field::AnonymizeTime
|
6
|
-
let(:field) { DataAnon::Core::Field.new('date', Time.new(2000,1,1,12,12,12), 1, nil) }
|
7
|
-
|
8
|
-
describe 'providing true only for month should randomize only the month field' do
|
9
|
-
|
10
|
-
let(:anonymized_time) { AnonymizeTime.new(true,false,false,false,false).anonymize(field) }
|
11
|
-
|
12
|
-
it {
|
13
|
-
anonymized_time.year.should be 2000
|
14
|
-
anonymized_time.day.should be 1
|
15
|
-
anonymized_time.hour.should be 12
|
16
|
-
anonymized_time.min.should be 12
|
17
|
-
anonymized_time.sec.should be 12
|
18
|
-
|
19
|
-
anonymized_time.month.should be_between(1,12)
|
20
|
-
}
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
@@ -1,14 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::GmailTemplate do
|
4
|
-
|
5
|
-
GmailTemplate = DataAnon::Strategy::Field::GmailTemplate
|
6
|
-
let(:field) {DataAnon::Core::Field.new('email','user@company.com',456,nil)}
|
7
|
-
|
8
|
-
describe 'generated email must be compliant with the provided template' do
|
9
|
-
|
10
|
-
let(:anonymized_email) {GmailTemplate.new("fake@gmail.com").anonymize(field)}
|
11
|
-
|
12
|
-
it {anonymized_email.should eq('fake+456@gmail.com')}
|
13
|
-
end
|
14
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::RandomMailinatorEmail do
|
4
|
-
|
5
|
-
RandomMailinatorEmail = DataAnon::Strategy::Field::RandomMailinatorEmail
|
6
|
-
let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
|
7
|
-
|
8
|
-
describe 'anonymized email should not be the same as original email' do
|
9
|
-
|
10
|
-
let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
|
11
|
-
|
12
|
-
it {anonymized_email.should_not equal field.value}
|
13
|
-
end
|
14
|
-
|
15
|
-
describe 'anonymized email should be a mailinator email address' do
|
16
|
-
|
17
|
-
let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
|
18
|
-
|
19
|
-
it {anonymized_email.should match '^\S+@\mailinator\.com$'}
|
20
|
-
end
|
21
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::RandomPhoneNumber do
|
4
|
-
|
5
|
-
RandomPhoneNumber = DataAnon::Strategy::Field::RandomPhoneNumber
|
6
|
-
let(:field) {DataAnon::Core::Field.new('phone_number',"+0 (123) 456-7890",1,nil)}
|
7
|
-
|
8
|
-
describe 'anonymized phone number should not be the same as original phone number' do
|
9
|
-
let(:anonymized_number) {RandomPhoneNumber.new().anonymize(field)}
|
10
|
-
|
11
|
-
it {anonymized_number.should_not equal field.value}
|
12
|
-
end
|
13
|
-
|
14
|
-
describe 'anonymized phone number should be the same formatting as original phone number' do
|
15
|
-
|
16
|
-
it {
|
17
|
-
anonymized_number = RandomPhoneNumber.new().anonymize(field)
|
18
|
-
original_number = field.value
|
19
|
-
counter = 0
|
20
|
-
@number_similarity = true
|
21
|
-
anonymized_number.each_char do |char|
|
22
|
-
original_number_char = original_number[counter]
|
23
|
-
if /\d/.match(original_number_char).nil?
|
24
|
-
if !char.eql? original_number_char
|
25
|
-
@number_similarity = false
|
26
|
-
break
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
@number_similarity.should be true
|
31
|
-
end
|
32
|
-
}
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::UserNameTemplate do
|
4
|
-
|
5
|
-
UserNameTemplate = DataAnon::Strategy::Field::UserNameTemplate
|
6
|
-
let(:field) { DataAnon::Core::Field.new('username', 'Chuck Norris', 100, nil) }
|
7
|
-
|
8
|
-
describe 'should return same string value as StringTemplate' do
|
9
|
-
let(:anonymized_username) { UserNameTemplate.new('Rajnikanth #{row_number}').anonymize(field) }
|
10
|
-
it { anonymized_username.should == 'Rajnikanth 100' }
|
11
|
-
end
|
12
|
-
|
13
|
-
end
|