data-anonymization 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.documentup.json +1 -0
- data/.travis.yml +0 -1
- data/README.md +277 -52
- data/blacklist_dsl.rb +1 -3
- data/data-anonymization.gemspec +4 -0
- data/lib/core/dsl.rb +1 -1
- data/lib/data-anonymization.rb +3 -0
- data/lib/strategy/base.rb +21 -11
- data/lib/strategy/blacklist.rb +2 -1
- data/lib/strategy/field/contact/geojson_base.rb +24 -0
- data/lib/strategy/field/contact/random_address.rb +17 -0
- data/lib/strategy/field/contact/random_city.rb +17 -0
- data/lib/strategy/field/contact/random_phone_number.rb +13 -0
- data/lib/strategy/field/contact/random_province.rb +17 -0
- data/lib/strategy/field/contact/random_zipcode.rb +17 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
- data/lib/strategy/field/datetime/date_delta.rb +21 -0
- data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
- data/lib/strategy/field/datetime/time_delta.rb +12 -0
- data/lib/strategy/field/default_anon.rb +12 -7
- data/lib/strategy/field/email/gmail_template.rb +16 -0
- data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
- data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
- data/lib/strategy/field/fields.rb +51 -20
- data/lib/strategy/field/name/random_first_name.rb +14 -0
- data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
- data/lib/strategy/field/name/random_last_name.rb +14 -0
- data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
- data/lib/strategy/field/number/random_float.rb +23 -0
- data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
- data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
- data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
- data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
- data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
- data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
- data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
- data/lib/strategy/field/string/select_from_file.rb +18 -0
- data/lib/strategy/field/string/select_from_list.rb +17 -0
- data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
- data/lib/strategy/whitelist.rb +4 -2
- data/lib/utils/database.rb +8 -6
- data/lib/utils/geojson_parser.rb +42 -0
- data/lib/utils/logging.rb +0 -9
- data/lib/utils/progress_bar.rb +29 -0
- data/lib/utils/random_float.rb +12 -0
- data/lib/utils/random_int.rb +3 -7
- data/lib/utils/resource.rb +4 -0
- data/lib/version.rb +1 -1
- data/resources/UK_addresses.geojson +300 -0
- data/resources/US_addresses.geojson +300 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
- data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
- data/spec/resource/sample.geojson +1 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/strategy/field/contact/random_address_spec.rb +12 -0
- data/spec/strategy/field/contact/random_city_spec.rb +14 -0
- data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
- data/spec/strategy/field/contact/random_province_spec.rb +14 -0
- data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
- data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
- data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
- data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
- data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
- data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
- data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
- data/spec/strategy/field/default_anon_spec.rb +42 -0
- data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
- data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
- data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
- data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
- data/spec/strategy/field/number/random_float_spec.rb +28 -0
- data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
- data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
- data/spec/strategy/field/random_boolean_spec.rb +2 -2
- data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
- data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
- data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
- data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
- data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
- data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
- data/spec/strategy/field/whitelist_spec.rb +2 -2
- data/spec/support/customer_sample.rb +1 -1
- data/spec/utils/database_spec.rb +2 -2
- data/spec/utils/geojson_parser_spec.rb +38 -0
- data/whitelist_dsl.rb +4 -6
- metadata +163 -59
- data/lib/strategy/field/anonymize_time.rb +0 -57
- data/lib/strategy/field/gmail_template.rb +0 -17
- data/lib/strategy/field/random_first_name.rb +0 -18
- data/lib/strategy/field/random_last_name.rb +0 -19
- data/lib/strategy/field/random_selection.rb +0 -23
- data/lib/strategy/field/user_name_template.rb +0 -22
- data/spec/strategy/field/anonymize_time_spec.rb +0 -23
- data/spec/strategy/field/gmail_template_spec.rb +0 -14
- data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
- data/spec/strategy/field/random_phone_number_spec.rb +0 -35
- data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -1,57 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
class AnonymizeTime
|
6
|
-
|
7
|
-
DEFAULT_ANONYMIZATION = true
|
8
|
-
|
9
|
-
def self.only_month
|
10
|
-
self.new true, false, false, false, false
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.only_day
|
14
|
-
self.new false, true, false, false, false
|
15
|
-
end
|
16
|
-
|
17
|
-
def self.only_hour
|
18
|
-
self.new false, false, true, false, false
|
19
|
-
end
|
20
|
-
|
21
|
-
def self.only_minute
|
22
|
-
self.new false, false, false, true, false
|
23
|
-
end
|
24
|
-
|
25
|
-
def initialize anonymize_month = DEFAULT_ANONYMIZATION, anonymize_day = DEFAULT_ANONYMIZATION, anonymize_hour = DEFAULT_ANONYMIZATION, anonymize_min = DEFAULT_ANONYMIZATION, anonymize_sec = DEFAULT_ANONYMIZATION
|
26
|
-
|
27
|
-
@anonymize_month = anonymize_month
|
28
|
-
@anonymize_day = anonymize_day
|
29
|
-
@anonymize_hour = anonymize_hour
|
30
|
-
@anonymize_min = anonymize_min
|
31
|
-
@anonymize_sec = anonymize_sec
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
def anonymize field
|
36
|
-
|
37
|
-
provided_time = field.value
|
38
|
-
year = provided_time.year
|
39
|
-
month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : provided_time.month
|
40
|
-
day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,31) : provided_time.day
|
41
|
-
hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(1,24) : provided_time.hour
|
42
|
-
min = @anonymize_min? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.min
|
43
|
-
sec = @anonymize_sec? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.sec
|
44
|
-
|
45
|
-
create_object(day, hour, min, month, sec, year)
|
46
|
-
end
|
47
|
-
|
48
|
-
private
|
49
|
-
|
50
|
-
def create_object(day, hour, min, month, sec, year)
|
51
|
-
Time.new(year, month, day, hour, min, sec)
|
52
|
-
end
|
53
|
-
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
57
|
-
end
|
@@ -1,17 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
class GmailTemplate
|
5
|
-
|
6
|
-
def initialize gmail_address = nil
|
7
|
-
@gmail_address = gmail_address
|
8
|
-
end
|
9
|
-
|
10
|
-
def anonymize field
|
11
|
-
username = @gmail_address[0,@gmail_address.index('@')]
|
12
|
-
"#{username}+#{field.row_number}@gmail.com"
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
class RandomFirstName
|
6
|
-
|
7
|
-
def initialize file_path = nil
|
8
|
-
file = file_path || DataAnon::Utils::Resource.file('first_names.txt')
|
9
|
-
@names = File.read(file).split
|
10
|
-
end
|
11
|
-
|
12
|
-
def anonymize field
|
13
|
-
return @names[rand(@names.size)]
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
@@ -1,19 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
class RandomLastName
|
6
|
-
|
7
|
-
def initialize file_path = nil
|
8
|
-
file = file_path || DataAnon::Utils::Resource.file('last_names.txt')
|
9
|
-
@names = File.read(file).split
|
10
|
-
end
|
11
|
-
|
12
|
-
def anonymize field
|
13
|
-
return @names[rand(@names.size)]
|
14
|
-
end
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
|
6
|
-
class RandomSelection
|
7
|
-
|
8
|
-
def initialize values
|
9
|
-
@values = values.class == Array ? values : [values]
|
10
|
-
|
11
|
-
end
|
12
|
-
|
13
|
-
def anonymize field
|
14
|
-
return @values[0] if @values.length == 1
|
15
|
-
@values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
|
16
|
-
end
|
17
|
-
|
18
|
-
end
|
19
|
-
|
20
|
-
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
@@ -1,22 +0,0 @@
|
|
1
|
-
module DataAnon
|
2
|
-
module Strategy
|
3
|
-
module Field
|
4
|
-
|
5
|
-
|
6
|
-
class UserNameTemplate
|
7
|
-
|
8
|
-
def initialize template
|
9
|
-
@template = template
|
10
|
-
end
|
11
|
-
|
12
|
-
def anonymize field
|
13
|
-
context = field.instance_eval { binding }
|
14
|
-
eval ('"' + @template + '"'), context
|
15
|
-
end
|
16
|
-
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
@@ -1,23 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::AnonymizeTime do
|
4
|
-
|
5
|
-
AnonymizeTime = DataAnon::Strategy::Field::AnonymizeTime
|
6
|
-
let(:field) { DataAnon::Core::Field.new('date', Time.new(2000,1,1,12,12,12), 1, nil) }
|
7
|
-
|
8
|
-
describe 'providing true only for month should randomize only the month field' do
|
9
|
-
|
10
|
-
let(:anonymized_time) { AnonymizeTime.new(true,false,false,false,false).anonymize(field) }
|
11
|
-
|
12
|
-
it {
|
13
|
-
anonymized_time.year.should be 2000
|
14
|
-
anonymized_time.day.should be 1
|
15
|
-
anonymized_time.hour.should be 12
|
16
|
-
anonymized_time.min.should be 12
|
17
|
-
anonymized_time.sec.should be 12
|
18
|
-
|
19
|
-
anonymized_time.month.should be_between(1,12)
|
20
|
-
}
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
@@ -1,14 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::GmailTemplate do
|
4
|
-
|
5
|
-
GmailTemplate = DataAnon::Strategy::Field::GmailTemplate
|
6
|
-
let(:field) {DataAnon::Core::Field.new('email','user@company.com',456,nil)}
|
7
|
-
|
8
|
-
describe 'generated email must be compliant with the provided template' do
|
9
|
-
|
10
|
-
let(:anonymized_email) {GmailTemplate.new("fake@gmail.com").anonymize(field)}
|
11
|
-
|
12
|
-
it {anonymized_email.should eq('fake+456@gmail.com')}
|
13
|
-
end
|
14
|
-
end
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::RandomMailinatorEmail do
|
4
|
-
|
5
|
-
RandomMailinatorEmail = DataAnon::Strategy::Field::RandomMailinatorEmail
|
6
|
-
let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
|
7
|
-
|
8
|
-
describe 'anonymized email should not be the same as original email' do
|
9
|
-
|
10
|
-
let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
|
11
|
-
|
12
|
-
it {anonymized_email.should_not equal field.value}
|
13
|
-
end
|
14
|
-
|
15
|
-
describe 'anonymized email should be a mailinator email address' do
|
16
|
-
|
17
|
-
let(:anonymized_email) {RandomMailinatorEmail.new().anonymize(field)}
|
18
|
-
|
19
|
-
it {anonymized_email.should match '^\S+@\mailinator\.com$'}
|
20
|
-
end
|
21
|
-
end
|
@@ -1,35 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::RandomPhoneNumber do
|
4
|
-
|
5
|
-
RandomPhoneNumber = DataAnon::Strategy::Field::RandomPhoneNumber
|
6
|
-
let(:field) {DataAnon::Core::Field.new('phone_number',"+0 (123) 456-7890",1,nil)}
|
7
|
-
|
8
|
-
describe 'anonymized phone number should not be the same as original phone number' do
|
9
|
-
let(:anonymized_number) {RandomPhoneNumber.new().anonymize(field)}
|
10
|
-
|
11
|
-
it {anonymized_number.should_not equal field.value}
|
12
|
-
end
|
13
|
-
|
14
|
-
describe 'anonymized phone number should be the same formatting as original phone number' do
|
15
|
-
|
16
|
-
it {
|
17
|
-
anonymized_number = RandomPhoneNumber.new().anonymize(field)
|
18
|
-
original_number = field.value
|
19
|
-
counter = 0
|
20
|
-
@number_similarity = true
|
21
|
-
anonymized_number.each_char do |char|
|
22
|
-
original_number_char = original_number[counter]
|
23
|
-
if /\d/.match(original_number_char).nil?
|
24
|
-
if !char.eql? original_number_char
|
25
|
-
@number_similarity = false
|
26
|
-
break
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
@number_similarity.should be true
|
31
|
-
end
|
32
|
-
}
|
33
|
-
end
|
34
|
-
|
35
|
-
end
|
@@ -1,13 +0,0 @@
|
|
1
|
-
require "spec_helper"
|
2
|
-
|
3
|
-
describe DataAnon::Strategy::Field::UserNameTemplate do
|
4
|
-
|
5
|
-
UserNameTemplate = DataAnon::Strategy::Field::UserNameTemplate
|
6
|
-
let(:field) { DataAnon::Core::Field.new('username', 'Chuck Norris', 100, nil) }
|
7
|
-
|
8
|
-
describe 'should return same string value as StringTemplate' do
|
9
|
-
let(:anonymized_username) { UserNameTemplate.new('Rajnikanth #{row_number}').anonymize(field) }
|
10
|
-
it { anonymized_username.should == 'Rajnikanth 100' }
|
11
|
-
end
|
12
|
-
|
13
|
-
end
|