data-anonymization 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.documentup.json +1 -0
- data/.travis.yml +0 -1
- data/README.md +277 -52
- data/blacklist_dsl.rb +1 -3
- data/data-anonymization.gemspec +4 -0
- data/lib/core/dsl.rb +1 -1
- data/lib/data-anonymization.rb +3 -0
- data/lib/strategy/base.rb +21 -11
- data/lib/strategy/blacklist.rb +2 -1
- data/lib/strategy/field/contact/geojson_base.rb +24 -0
- data/lib/strategy/field/contact/random_address.rb +17 -0
- data/lib/strategy/field/contact/random_city.rb +17 -0
- data/lib/strategy/field/contact/random_phone_number.rb +13 -0
- data/lib/strategy/field/contact/random_province.rb +17 -0
- data/lib/strategy/field/contact/random_zipcode.rb +17 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
- data/lib/strategy/field/datetime/date_delta.rb +21 -0
- data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
- data/lib/strategy/field/datetime/time_delta.rb +12 -0
- data/lib/strategy/field/default_anon.rb +12 -7
- data/lib/strategy/field/email/gmail_template.rb +16 -0
- data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
- data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
- data/lib/strategy/field/fields.rb +51 -20
- data/lib/strategy/field/name/random_first_name.rb +14 -0
- data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
- data/lib/strategy/field/name/random_last_name.rb +14 -0
- data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
- data/lib/strategy/field/number/random_float.rb +23 -0
- data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
- data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
- data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
- data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
- data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
- data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
- data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
- data/lib/strategy/field/string/select_from_file.rb +18 -0
- data/lib/strategy/field/string/select_from_list.rb +17 -0
- data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
- data/lib/strategy/whitelist.rb +4 -2
- data/lib/utils/database.rb +8 -6
- data/lib/utils/geojson_parser.rb +42 -0
- data/lib/utils/logging.rb +0 -9
- data/lib/utils/progress_bar.rb +29 -0
- data/lib/utils/random_float.rb +12 -0
- data/lib/utils/random_int.rb +3 -7
- data/lib/utils/resource.rb +4 -0
- data/lib/version.rb +1 -1
- data/resources/UK_addresses.geojson +300 -0
- data/resources/US_addresses.geojson +300 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
- data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
- data/spec/resource/sample.geojson +1 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/strategy/field/contact/random_address_spec.rb +12 -0
- data/spec/strategy/field/contact/random_city_spec.rb +14 -0
- data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
- data/spec/strategy/field/contact/random_province_spec.rb +14 -0
- data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
- data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
- data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
- data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
- data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
- data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
- data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
- data/spec/strategy/field/default_anon_spec.rb +42 -0
- data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
- data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
- data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
- data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
- data/spec/strategy/field/number/random_float_spec.rb +28 -0
- data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
- data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
- data/spec/strategy/field/random_boolean_spec.rb +2 -2
- data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
- data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
- data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
- data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
- data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
- data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
- data/spec/strategy/field/whitelist_spec.rb +2 -2
- data/spec/support/customer_sample.rb +1 -1
- data/spec/utils/database_spec.rb +2 -2
- data/spec/utils/geojson_parser_spec.rb +38 -0
- data/whitelist_dsl.rb +4 -6
- metadata +163 -59
- data/lib/strategy/field/anonymize_time.rb +0 -57
- data/lib/strategy/field/gmail_template.rb +0 -17
- data/lib/strategy/field/random_first_name.rb +0 -18
- data/lib/strategy/field/random_last_name.rb +0 -19
- data/lib/strategy/field/random_selection.rb +0 -23
- data/lib/strategy/field/user_name_template.rb +0 -22
- data/spec/strategy/field/anonymize_time_spec.rb +0 -23
- data/spec/strategy/field/gmail_template_spec.rb +0 -14
- data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
- data/spec/strategy/field/random_phone_number_spec.rb +0 -35
- data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomEmail do
|
4
4
|
|
5
|
-
RandomEmail =
|
5
|
+
RandomEmail = FieldStrategy::RandomEmail
|
6
6
|
let(:field) {DataAnon::Core::Field.new('email','real@email.com',1,nil)}
|
7
7
|
|
8
8
|
|
@@ -0,0 +1,14 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe FieldStrategy::RandomMailinatorEmail do
|
4
|
+
|
5
|
+
RandomMailinatorEmail = FieldStrategy::RandomMailinatorEmail
|
6
|
+
let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
|
7
|
+
|
8
|
+
describe 'anonymized email should not be the same as original email' do
|
9
|
+
let(:anonymized_email) {RandomMailinatorEmail.new.anonymize(field)}
|
10
|
+
|
11
|
+
it {anonymized_email.should_not equal field.value}
|
12
|
+
it {anonymized_email.should match '^\S+@\mailinator\.com$'}
|
13
|
+
end
|
14
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomFirstName do
|
4
4
|
|
5
|
-
RandomFirstName =
|
5
|
+
RandomFirstName = FieldStrategy::RandomFirstName
|
6
6
|
let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)}
|
7
7
|
|
8
8
|
describe 'anonymized name must not be the same as provided name' do
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomFullName do
|
4
4
|
|
5
|
-
RandomFullName =
|
5
|
+
RandomFullName = FieldStrategy::RandomFullName
|
6
6
|
let(:field) {DataAnon::Core::Field.new('name','Fake User',1,nil)}
|
7
7
|
|
8
8
|
describe 'anonymized name should be the same as original' do
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomLastName do
|
4
4
|
|
5
|
-
RandomLastName =
|
5
|
+
RandomLastName = FieldStrategy::RandomLastName
|
6
6
|
let(:field) {DataAnon::Core::Field.new('lastname','fakeLastName',1,nil)}
|
7
7
|
|
8
8
|
describe 'anonymized name must not be the same as provided name' do
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomUserName do
|
4
4
|
|
5
|
-
RandomUserName =
|
5
|
+
RandomUserName = FieldStrategy::RandomUserName
|
6
6
|
let(:field) {DataAnon::Core::Field.new('username','fakeUserName',1,nil)}
|
7
7
|
|
8
8
|
describe 'anonymized user name should not be the same as original user name' do
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomFloatDelta do
|
4
4
|
|
5
|
-
RandomFloatDelta =
|
5
|
+
RandomFloatDelta = FieldStrategy::RandomFloatDelta
|
6
6
|
let(:field) {DataAnon::Core::Field.new('float_field',5.5,1,nil)}
|
7
7
|
|
8
8
|
describe 'anonymized float should not be the same as original value' do
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe FieldStrategy::RandomFloat do
|
4
|
+
|
5
|
+
RandomFloat = FieldStrategy::RandomFloat
|
6
|
+
let(:field) { DataAnon::Core::Field.new('points', 2.5, 1, nil) }
|
7
|
+
|
8
|
+
describe 'verify age range between 18 and 70' do
|
9
|
+
|
10
|
+
let(:anonymized_int) { RandomFloat.new(2.0, 8.0).anonymize(field) }
|
11
|
+
|
12
|
+
it { anonymized_int.should >= 2.0 }
|
13
|
+
it { anonymized_int.should <= 8.0 }
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
describe 'default range between 0 and 100' do
|
18
|
+
|
19
|
+
let(:anonymized_int) { RandomFloat.new.anonymize(field) }
|
20
|
+
|
21
|
+
it { anonymized_int.should >= 0.0 }
|
22
|
+
it { anonymized_int.should <= 100.0 }
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
|
28
|
+
end
|
data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb}
RENAMED
@@ -1,16 +1,14 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomIntegerDelta do
|
4
4
|
|
5
|
-
RandomIntegerDelta =
|
5
|
+
RandomIntegerDelta = FieldStrategy::RandomIntegerDelta
|
6
6
|
let(:field) {DataAnon::Core::Field.new('integer_field',100,1,nil)}
|
7
7
|
|
8
8
|
describe "anonymized value returned should be an integer" do
|
9
9
|
let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
|
10
10
|
|
11
|
-
it {
|
12
|
-
is_integer.should be true
|
13
|
-
}
|
11
|
+
it { anonymized_integer.should be_kind_of Integer }
|
14
12
|
|
15
13
|
end
|
16
14
|
|
@@ -1,13 +1,13 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomInteger do
|
4
4
|
|
5
|
-
|
5
|
+
RandomInteger = FieldStrategy::RandomInteger
|
6
6
|
let(:field) { DataAnon::Core::Field.new('age', 25, 1, nil) }
|
7
7
|
|
8
8
|
describe 'verify age range between 18 and 70' do
|
9
9
|
|
10
|
-
let(:anonymized_int) {
|
10
|
+
let(:anonymized_int) { RandomInteger.new(18, 70).anonymize(field) }
|
11
11
|
|
12
12
|
it { anonymized_int.should >= 18 }
|
13
13
|
it { anonymized_int.should <= 70 }
|
@@ -16,7 +16,7 @@ describe DataAnon::Strategy::Field::RandomInt do
|
|
16
16
|
|
17
17
|
describe 'default range between 0 and 100' do
|
18
18
|
|
19
|
-
let(:anonymized_int) {
|
19
|
+
let(:anonymized_int) { RandomInteger.new.anonymize(field) }
|
20
20
|
|
21
21
|
it { anonymized_int.should >= 0 }
|
22
22
|
it { anonymized_int.should <= 100 }
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomBoolean do
|
4
4
|
|
5
|
-
RandomBoolean =
|
5
|
+
RandomBoolean = FieldStrategy::RandomBoolean
|
6
6
|
let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)}
|
7
7
|
|
8
8
|
describe 'anonymized value should be a boolean' do
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe FieldStrategy::FormattedStringNumber do
|
4
|
+
|
5
|
+
FormattedStringNumber = FieldStrategy::FormattedStringNumber
|
6
|
+
let(:field) {DataAnon::Core::Field.new('credit_card_number',"1111-2222-3333-4444",1,nil)}
|
7
|
+
|
8
|
+
describe 'anonymized credit card number preserving the format' do
|
9
|
+
let(:anonymized_number) {FormattedStringNumber.new.anonymize(field)}
|
10
|
+
|
11
|
+
it {anonymized_number.should_not equal field.value}
|
12
|
+
it { anonymized_number.should match /^\d{4}-\d{4}-\d{4}-\d{4}$/}
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::LoremIpsum do
|
4
4
|
|
5
|
-
LoremIpsum =
|
5
|
+
LoremIpsum = FieldStrategy::LoremIpsum
|
6
6
|
let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
|
7
7
|
|
8
8
|
describe 'should return same length value using default text' do
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::RandomString do
|
4
4
|
|
5
|
-
RandomString =
|
5
|
+
RandomString = FieldStrategy::RandomString
|
6
6
|
|
7
7
|
describe 'anonymized string must not be the same as original string' do
|
8
8
|
let(:field) {DataAnon::Core::Field.new('string_field','fakeString',1,nil)}
|
data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb}
RENAMED
@@ -1,18 +1,18 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::SelectFromDatabase do
|
4
4
|
|
5
5
|
before(:each) do
|
6
6
|
source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'}
|
7
7
|
DataAnon::Utils::SourceDatabase.establish_connection source
|
8
8
|
end
|
9
9
|
|
10
|
-
|
10
|
+
SelectFromDatabase = FieldStrategy::SelectFromDatabase
|
11
11
|
let(:field) { DataAnon::Core::Field.new('name', 'Abcd', 1, nil) }
|
12
12
|
|
13
13
|
describe 'more than one values in predefined list' do
|
14
14
|
|
15
|
-
let(:anonymized_value) {
|
15
|
+
let(:anonymized_value) { SelectFromDatabase.new('MediaType','Name').anonymize(field) }
|
16
16
|
|
17
17
|
it { anonymized_value.should_not be('Abcd') }
|
18
18
|
it { anonymized_value.should_not be_empty }
|
@@ -1,14 +1,14 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::SelectFromList do
|
4
4
|
|
5
|
-
|
5
|
+
SelectFromList = FieldStrategy::SelectFromList
|
6
6
|
let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
|
7
7
|
|
8
8
|
describe 'more than one values in predefined list' do
|
9
9
|
|
10
10
|
let(:states) { ['Maharashtra','Gujrat','Karnataka'] }
|
11
|
-
let(:anonymized_value) {
|
11
|
+
let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
|
12
12
|
|
13
13
|
it { states.should include(anonymized_value) }
|
14
14
|
|
@@ -17,7 +17,7 @@ describe DataAnon::Strategy::Field::RandomSelection do
|
|
17
17
|
describe 'only one value in list' do
|
18
18
|
|
19
19
|
let(:states) { ['Maharashtra'] }
|
20
|
-
let(:anonymized_value) {
|
20
|
+
let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
|
21
21
|
|
22
22
|
it { anonymized_value.should == 'Maharashtra' }
|
23
23
|
|
@@ -26,7 +26,7 @@ describe DataAnon::Strategy::Field::RandomSelection do
|
|
26
26
|
describe 'string value' do
|
27
27
|
|
28
28
|
let(:states) { 'Maharashtra' }
|
29
|
-
let(:anonymized_value) {
|
29
|
+
let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
|
30
30
|
|
31
31
|
it { anonymized_value.should == 'Maharashtra' }
|
32
32
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::StringTemplate do
|
4
4
|
|
5
|
-
StringTemplate =
|
5
|
+
StringTemplate = FieldStrategy::StringTemplate
|
6
6
|
let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 3456, nil) }
|
7
7
|
|
8
8
|
describe 'should return same string value as StringTemplate' do
|
@@ -1,8 +1,8 @@
|
|
1
1
|
require "spec_helper"
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe FieldStrategy::Whitelist do
|
4
4
|
|
5
|
-
Whitelist =
|
5
|
+
Whitelist = FieldStrategy::Whitelist
|
6
6
|
|
7
7
|
describe 'should return same string value as whitelist' do
|
8
8
|
let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
|
@@ -34,7 +34,7 @@ class CustomerSample
|
|
34
34
|
|
35
35
|
def self.insert_record connection_spec, data_hash = SAMPLE_DATA
|
36
36
|
DataAnon::Utils::TempDatabase.establish_connection connection_spec
|
37
|
-
source = DataAnon::Utils::BaseTable.create_table 'customers', 'cust_id'
|
37
|
+
source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id'
|
38
38
|
cust = source.new data_hash
|
39
39
|
cust.cust_id = data_hash[:cust_id]
|
40
40
|
cust.save!
|
data/spec/utils/database_spec.rb
CHANGED
@@ -11,13 +11,13 @@ describe "Utils" do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
it "should test the connection to source database" do
|
14
|
-
album = DataAnon::Utils::SourceTable.create "Album", "AlbumId"
|
14
|
+
album = DataAnon::Utils::SourceTable.create "Album", ["AlbumId"]
|
15
15
|
album.count.should > 0
|
16
16
|
album.all.length > 0
|
17
17
|
end
|
18
18
|
|
19
19
|
it "should test the connection to destination database" do
|
20
|
-
album = DataAnon::Utils::DestinationTable.create "Album", "AlbumId"
|
20
|
+
album = DataAnon::Utils::DestinationTable.create "Album", ["AlbumId"]
|
21
21
|
album.count.should == 0
|
22
22
|
album.all.length == 0
|
23
23
|
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe "Geo Json Parser" do
|
4
|
+
|
5
|
+
SAMPLE_DATA_FILE_PATH = DataAnon::Utils::Resource.project_home+'spec/resource/sample.geojson'
|
6
|
+
|
7
|
+
describe "parser should return list of addresses when address method is called" do
|
8
|
+
let(:result_list) {DataAnon::Utils::GeojsonParser.address(SAMPLE_DATA_FILE_PATH)}
|
9
|
+
|
10
|
+
it {result_list.length.should be 1}
|
11
|
+
it {result_list[0].should eq("333 Willoughby Ave")}
|
12
|
+
end
|
13
|
+
|
14
|
+
describe "parser should return list of zip codes when zipcode method is called" do
|
15
|
+
let(:result_list) {DataAnon::Utils::GeojsonParser.zipcode(SAMPLE_DATA_FILE_PATH)}
|
16
|
+
|
17
|
+
it {result_list.length.should be 1}
|
18
|
+
it {result_list[0].should eq("99801")}
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "parser should return list of province when province method is called" do
|
23
|
+
let(:result_list) {DataAnon::Utils::GeojsonParser.province(SAMPLE_DATA_FILE_PATH)}
|
24
|
+
|
25
|
+
it {result_list.length.should be 1}
|
26
|
+
it {result_list[0].should eq("AK")}
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "parser should return list of cities when city method is called" do
|
31
|
+
let(:result_list) {DataAnon::Utils::GeojsonParser.city(SAMPLE_DATA_FILE_PATH)}
|
32
|
+
|
33
|
+
it {result_list.length.should be 1}
|
34
|
+
it {result_list[0].should eq("Juneau")}
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
data/whitelist_dsl.rb
CHANGED
@@ -2,8 +2,6 @@ system "rake empty_dest" # clean destination database on every call
|
|
2
2
|
|
3
3
|
require 'data-anonymization'
|
4
4
|
|
5
|
-
FS = DataAnon::Strategy::Field
|
6
|
-
|
7
5
|
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
8
6
|
|
9
7
|
database 'Chinook' do
|
@@ -11,7 +9,7 @@ database 'Chinook' do
|
|
11
9
|
source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
|
12
10
|
destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
|
13
11
|
|
14
|
-
default_field_strategies :string =>
|
12
|
+
default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
|
15
13
|
|
16
14
|
table 'Genre' do
|
17
15
|
primary_key 'GenreId'
|
@@ -24,19 +22,19 @@ database 'Chinook' do
|
|
24
22
|
table 'MediaType' do
|
25
23
|
primary_key 'MediaTypeId'
|
26
24
|
anonymize('MediaTypeId') { |field| field.value } # same as whitelist
|
27
|
-
anonymize('Name').using
|
25
|
+
anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
|
28
26
|
|
29
27
|
end
|
30
28
|
|
31
29
|
table 'Employee' do
|
32
30
|
primary_key 'EmployeeId'
|
33
31
|
whitelist 'EmployeeId'
|
34
|
-
anonymize('BirthDate').using
|
32
|
+
anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1,1)
|
35
33
|
end
|
36
34
|
|
37
35
|
table 'Customer' do
|
38
36
|
primary_key 'CustomerId'
|
39
|
-
anonymize('Phone').using
|
37
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
40
38
|
end
|
41
39
|
|
42
40
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data-anonymization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,7 +11,7 @@ authors:
|
|
11
11
|
autorequire:
|
12
12
|
bindir: bin
|
13
13
|
cert_chain: []
|
14
|
-
date: 2012-08-
|
14
|
+
date: 2012-08-17 00:00:00.000000000 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: activerecord
|
@@ -29,6 +29,22 @@ dependencies:
|
|
29
29
|
- - ~>
|
30
30
|
- !ruby/object:Gem::Version
|
31
31
|
version: 3.2.8
|
32
|
+
- !ruby/object:Gem::Dependency
|
33
|
+
name: composite_primary_keys
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
none: false
|
36
|
+
requirements:
|
37
|
+
- - ~>
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 5.0.8
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
none: false
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 5.0.8
|
32
48
|
- !ruby/object:Gem::Dependency
|
33
49
|
name: activesupport
|
34
50
|
requirement: !ruby/object:Gem::Requirement
|
@@ -45,6 +61,54 @@ dependencies:
|
|
45
61
|
- - ~>
|
46
62
|
- !ruby/object:Gem::Version
|
47
63
|
version: 3.2.8
|
64
|
+
- !ruby/object:Gem::Dependency
|
65
|
+
name: rgeo
|
66
|
+
requirement: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
68
|
+
requirements:
|
69
|
+
- - ~>
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: 0.3.15
|
72
|
+
type: :runtime
|
73
|
+
prerelease: false
|
74
|
+
version_requirements: !ruby/object:Gem::Requirement
|
75
|
+
none: false
|
76
|
+
requirements:
|
77
|
+
- - ~>
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: 0.3.15
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: rgeo-geojson
|
82
|
+
requirement: !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ~>
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: 0.2.3
|
88
|
+
type: :runtime
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
92
|
+
requirements:
|
93
|
+
- - ~>
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: 0.2.3
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: powerbar
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 1.0.8
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
none: false
|
108
|
+
requirements:
|
109
|
+
- - ~>
|
110
|
+
- !ruby/object:Gem::Version
|
111
|
+
version: 1.0.8
|
48
112
|
description: Data anonymization tool for RDBMS databases
|
49
113
|
email:
|
50
114
|
- parekh.sunit@gmail.com
|
@@ -72,67 +136,95 @@ files:
|
|
72
136
|
- lib/data-anonymization.rb
|
73
137
|
- lib/strategy/base.rb
|
74
138
|
- lib/strategy/blacklist.rb
|
75
|
-
- lib/strategy/field/anonymize_time.rb
|
76
139
|
- lib/strategy/field/anonymous.rb
|
77
|
-
- lib/strategy/field/
|
140
|
+
- lib/strategy/field/contact/geojson_base.rb
|
141
|
+
- lib/strategy/field/contact/random_address.rb
|
142
|
+
- lib/strategy/field/contact/random_city.rb
|
143
|
+
- lib/strategy/field/contact/random_phone_number.rb
|
144
|
+
- lib/strategy/field/contact/random_province.rb
|
145
|
+
- lib/strategy/field/contact/random_zipcode.rb
|
146
|
+
- lib/strategy/field/datetime/anonymize_date.rb
|
147
|
+
- lib/strategy/field/datetime/anonymize_datetime.rb
|
148
|
+
- lib/strategy/field/datetime/anonymize_time.rb
|
149
|
+
- lib/strategy/field/datetime/date_delta.rb
|
150
|
+
- lib/strategy/field/datetime/date_time_delta.rb
|
151
|
+
- lib/strategy/field/datetime/time_delta.rb
|
78
152
|
- lib/strategy/field/default_anon.rb
|
79
|
-
- lib/strategy/field/
|
153
|
+
- lib/strategy/field/email/gmail_template.rb
|
154
|
+
- lib/strategy/field/email/random_email.rb
|
155
|
+
- lib/strategy/field/email/random_mailinator_email.rb
|
80
156
|
- lib/strategy/field/fields.rb
|
81
|
-
- lib/strategy/field/
|
82
|
-
- lib/strategy/field/
|
157
|
+
- lib/strategy/field/name/random_first_name.rb
|
158
|
+
- lib/strategy/field/name/random_full_name.rb
|
159
|
+
- lib/strategy/field/name/random_last_name.rb
|
160
|
+
- lib/strategy/field/name/random_user_name.rb
|
161
|
+
- lib/strategy/field/number/random_float.rb
|
162
|
+
- lib/strategy/field/number/random_float_delta.rb
|
163
|
+
- lib/strategy/field/number/random_integer.rb
|
164
|
+
- lib/strategy/field/number/random_integer_delta.rb
|
83
165
|
- lib/strategy/field/random_boolean.rb
|
84
|
-
- lib/strategy/field/
|
85
|
-
- lib/strategy/field/
|
86
|
-
- lib/strategy/field/
|
87
|
-
- lib/strategy/field/
|
88
|
-
- lib/strategy/field/
|
89
|
-
- lib/strategy/field/
|
90
|
-
- lib/strategy/field/
|
91
|
-
- lib/strategy/field/random_mailinator_email.rb
|
92
|
-
- lib/strategy/field/random_phone_number.rb
|
93
|
-
- lib/strategy/field/random_selection.rb
|
94
|
-
- lib/strategy/field/random_string.rb
|
95
|
-
- lib/strategy/field/random_user_name.rb
|
96
|
-
- lib/strategy/field/string_template.rb
|
97
|
-
- lib/strategy/field/user_name_template.rb
|
166
|
+
- lib/strategy/field/string/formatted_string_numbers.rb
|
167
|
+
- lib/strategy/field/string/lorem_ipsum.rb
|
168
|
+
- lib/strategy/field/string/random_string.rb
|
169
|
+
- lib/strategy/field/string/select_from_database.rb
|
170
|
+
- lib/strategy/field/string/select_from_file.rb
|
171
|
+
- lib/strategy/field/string/select_from_list.rb
|
172
|
+
- lib/strategy/field/string/string_template.rb
|
98
173
|
- lib/strategy/field/whitelist.rb
|
99
174
|
- lib/strategy/strategies.rb
|
100
175
|
- lib/strategy/whitelist.rb
|
101
176
|
- lib/tasks/rake_tasks.rb
|
102
177
|
- lib/utils/database.rb
|
178
|
+
- lib/utils/geojson_parser.rb
|
103
179
|
- lib/utils/logging.rb
|
180
|
+
- lib/utils/progress_bar.rb
|
181
|
+
- lib/utils/random_float.rb
|
104
182
|
- lib/utils/random_int.rb
|
105
183
|
- lib/utils/random_string.rb
|
106
184
|
- lib/utils/resource.rb
|
107
185
|
- lib/version.rb
|
186
|
+
- resources/UK_addresses.geojson
|
187
|
+
- resources/US_addresses.geojson
|
108
188
|
- resources/first_names.txt
|
109
189
|
- resources/last_names.txt
|
110
190
|
- spec/acceptance/rdbms_blacklist_spec.rb
|
111
191
|
- spec/acceptance/rdbms_whitelist_spec.rb
|
192
|
+
- spec/resource/sample.geojson
|
112
193
|
- spec/spec_helper.rb
|
113
|
-
- spec/strategy/field/
|
114
|
-
- spec/strategy/field/
|
115
|
-
- spec/strategy/field/
|
116
|
-
- spec/strategy/field/
|
117
|
-
- spec/strategy/field/
|
194
|
+
- spec/strategy/field/contact/random_address_spec.rb
|
195
|
+
- spec/strategy/field/contact/random_city_spec.rb
|
196
|
+
- spec/strategy/field/contact/random_phone_number_spec.rb
|
197
|
+
- spec/strategy/field/contact/random_province_spec.rb
|
198
|
+
- spec/strategy/field/contact/random_zipcode_spec.rb
|
199
|
+
- spec/strategy/field/datetime/anonymize_date_spec.rb
|
200
|
+
- spec/strategy/field/datetime/anonymize_datetime_spec.rb
|
201
|
+
- spec/strategy/field/datetime/anonymize_time_spec.rb
|
202
|
+
- spec/strategy/field/datetime/date_delta_spec.rb
|
203
|
+
- spec/strategy/field/datetime/date_time_delta_spec.rb
|
204
|
+
- spec/strategy/field/datetime/time_delta_spec.rb
|
205
|
+
- spec/strategy/field/default_anon_spec.rb
|
206
|
+
- spec/strategy/field/email/gmail_template_spec.rb
|
207
|
+
- spec/strategy/field/email/random_email_spec.rb
|
208
|
+
- spec/strategy/field/email/random_mailinator_email_spec.rb
|
209
|
+
- spec/strategy/field/name/random_first_name_spec.rb
|
210
|
+
- spec/strategy/field/name/random_full_name_spec.rb
|
211
|
+
- spec/strategy/field/name/random_last_name_spec.rb
|
212
|
+
- spec/strategy/field/name/random_user_name_spec.rb
|
213
|
+
- spec/strategy/field/number/random_float_delta_spec.rb
|
214
|
+
- spec/strategy/field/number/random_float_spec.rb
|
215
|
+
- spec/strategy/field/number/random_integer_delta_spec.rb
|
216
|
+
- spec/strategy/field/number/random_integer_spec.rb
|
118
217
|
- spec/strategy/field/random_boolean_spec.rb
|
119
|
-
- spec/strategy/field/
|
120
|
-
- spec/strategy/field/
|
121
|
-
- spec/strategy/field/
|
122
|
-
- spec/strategy/field/
|
123
|
-
- spec/strategy/field/
|
124
|
-
- spec/strategy/field/
|
125
|
-
- spec/strategy/field/random_last_name_spec.rb
|
126
|
-
- spec/strategy/field/random_mailinator_email_spec.rb
|
127
|
-
- spec/strategy/field/random_phone_number_spec.rb
|
128
|
-
- spec/strategy/field/random_selection_spec.rb
|
129
|
-
- spec/strategy/field/random_string_spec.rb
|
130
|
-
- spec/strategy/field/random_user_name_spec.rb
|
131
|
-
- spec/strategy/field/string_template_spec.rb
|
132
|
-
- spec/strategy/field/user_name_template_spec.rb
|
218
|
+
- spec/strategy/field/string/formatted_string_numbers_spec.rb
|
219
|
+
- spec/strategy/field/string/lorem_ipsum_spec.rb
|
220
|
+
- spec/strategy/field/string/random_string_spec.rb
|
221
|
+
- spec/strategy/field/string/select_from_database_spec.rb
|
222
|
+
- spec/strategy/field/string/select_from_list_spec.rb
|
223
|
+
- spec/strategy/field/string/string_template_spec.rb
|
133
224
|
- spec/strategy/field/whitelist_spec.rb
|
134
225
|
- spec/support/customer_sample.rb
|
135
226
|
- spec/utils/database_spec.rb
|
227
|
+
- spec/utils/geojson_parser_spec.rb
|
136
228
|
- spec/utils/random_int_spec.rb
|
137
229
|
- spec/utils/random_string_spec.rb
|
138
230
|
- whitelist_dsl.rb
|
@@ -164,29 +256,41 @@ summary: Tool to create anonymized production data dump to use for PREF and othe
|
|
164
256
|
test_files:
|
165
257
|
- spec/acceptance/rdbms_blacklist_spec.rb
|
166
258
|
- spec/acceptance/rdbms_whitelist_spec.rb
|
259
|
+
- spec/resource/sample.geojson
|
167
260
|
- spec/spec_helper.rb
|
168
|
-
- spec/strategy/field/
|
169
|
-
- spec/strategy/field/
|
170
|
-
- spec/strategy/field/
|
171
|
-
- spec/strategy/field/
|
172
|
-
- spec/strategy/field/
|
261
|
+
- spec/strategy/field/contact/random_address_spec.rb
|
262
|
+
- spec/strategy/field/contact/random_city_spec.rb
|
263
|
+
- spec/strategy/field/contact/random_phone_number_spec.rb
|
264
|
+
- spec/strategy/field/contact/random_province_spec.rb
|
265
|
+
- spec/strategy/field/contact/random_zipcode_spec.rb
|
266
|
+
- spec/strategy/field/datetime/anonymize_date_spec.rb
|
267
|
+
- spec/strategy/field/datetime/anonymize_datetime_spec.rb
|
268
|
+
- spec/strategy/field/datetime/anonymize_time_spec.rb
|
269
|
+
- spec/strategy/field/datetime/date_delta_spec.rb
|
270
|
+
- spec/strategy/field/datetime/date_time_delta_spec.rb
|
271
|
+
- spec/strategy/field/datetime/time_delta_spec.rb
|
272
|
+
- spec/strategy/field/default_anon_spec.rb
|
273
|
+
- spec/strategy/field/email/gmail_template_spec.rb
|
274
|
+
- spec/strategy/field/email/random_email_spec.rb
|
275
|
+
- spec/strategy/field/email/random_mailinator_email_spec.rb
|
276
|
+
- spec/strategy/field/name/random_first_name_spec.rb
|
277
|
+
- spec/strategy/field/name/random_full_name_spec.rb
|
278
|
+
- spec/strategy/field/name/random_last_name_spec.rb
|
279
|
+
- spec/strategy/field/name/random_user_name_spec.rb
|
280
|
+
- spec/strategy/field/number/random_float_delta_spec.rb
|
281
|
+
- spec/strategy/field/number/random_float_spec.rb
|
282
|
+
- spec/strategy/field/number/random_integer_delta_spec.rb
|
283
|
+
- spec/strategy/field/number/random_integer_spec.rb
|
173
284
|
- spec/strategy/field/random_boolean_spec.rb
|
174
|
-
- spec/strategy/field/
|
175
|
-
- spec/strategy/field/
|
176
|
-
- spec/strategy/field/
|
177
|
-
- spec/strategy/field/
|
178
|
-
- spec/strategy/field/
|
179
|
-
- spec/strategy/field/
|
180
|
-
- spec/strategy/field/random_last_name_spec.rb
|
181
|
-
- spec/strategy/field/random_mailinator_email_spec.rb
|
182
|
-
- spec/strategy/field/random_phone_number_spec.rb
|
183
|
-
- spec/strategy/field/random_selection_spec.rb
|
184
|
-
- spec/strategy/field/random_string_spec.rb
|
185
|
-
- spec/strategy/field/random_user_name_spec.rb
|
186
|
-
- spec/strategy/field/string_template_spec.rb
|
187
|
-
- spec/strategy/field/user_name_template_spec.rb
|
285
|
+
- spec/strategy/field/string/formatted_string_numbers_spec.rb
|
286
|
+
- spec/strategy/field/string/lorem_ipsum_spec.rb
|
287
|
+
- spec/strategy/field/string/random_string_spec.rb
|
288
|
+
- spec/strategy/field/string/select_from_database_spec.rb
|
289
|
+
- spec/strategy/field/string/select_from_list_spec.rb
|
290
|
+
- spec/strategy/field/string/string_template_spec.rb
|
188
291
|
- spec/strategy/field/whitelist_spec.rb
|
189
292
|
- spec/support/customer_sample.rb
|
190
293
|
- spec/utils/database_spec.rb
|
294
|
+
- spec/utils/geojson_parser_spec.rb
|
191
295
|
- spec/utils/random_int_spec.rb
|
192
296
|
- spec/utils/random_string_spec.rb
|