data-anonymization 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. data/.documentup.json +1 -0
  2. data/.travis.yml +0 -1
  3. data/README.md +277 -52
  4. data/blacklist_dsl.rb +1 -3
  5. data/data-anonymization.gemspec +4 -0
  6. data/lib/core/dsl.rb +1 -1
  7. data/lib/data-anonymization.rb +3 -0
  8. data/lib/strategy/base.rb +21 -11
  9. data/lib/strategy/blacklist.rb +2 -1
  10. data/lib/strategy/field/contact/geojson_base.rb +24 -0
  11. data/lib/strategy/field/contact/random_address.rb +17 -0
  12. data/lib/strategy/field/contact/random_city.rb +17 -0
  13. data/lib/strategy/field/contact/random_phone_number.rb +13 -0
  14. data/lib/strategy/field/contact/random_province.rb +17 -0
  15. data/lib/strategy/field/contact/random_zipcode.rb +17 -0
  16. data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
  17. data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
  18. data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
  19. data/lib/strategy/field/datetime/date_delta.rb +21 -0
  20. data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
  21. data/lib/strategy/field/datetime/time_delta.rb +12 -0
  22. data/lib/strategy/field/default_anon.rb +12 -7
  23. data/lib/strategy/field/email/gmail_template.rb +16 -0
  24. data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
  25. data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
  26. data/lib/strategy/field/fields.rb +51 -20
  27. data/lib/strategy/field/name/random_first_name.rb +14 -0
  28. data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
  29. data/lib/strategy/field/name/random_last_name.rb +14 -0
  30. data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
  31. data/lib/strategy/field/number/random_float.rb +23 -0
  32. data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
  33. data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
  34. data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
  35. data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
  36. data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
  37. data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
  38. data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
  39. data/lib/strategy/field/string/select_from_file.rb +18 -0
  40. data/lib/strategy/field/string/select_from_list.rb +17 -0
  41. data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
  42. data/lib/strategy/whitelist.rb +4 -2
  43. data/lib/utils/database.rb +8 -6
  44. data/lib/utils/geojson_parser.rb +42 -0
  45. data/lib/utils/logging.rb +0 -9
  46. data/lib/utils/progress_bar.rb +29 -0
  47. data/lib/utils/random_float.rb +12 -0
  48. data/lib/utils/random_int.rb +3 -7
  49. data/lib/utils/resource.rb +4 -0
  50. data/lib/version.rb +1 -1
  51. data/resources/UK_addresses.geojson +300 -0
  52. data/resources/US_addresses.geojson +300 -0
  53. data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
  54. data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
  55. data/spec/resource/sample.geojson +1 -0
  56. data/spec/spec_helper.rb +3 -2
  57. data/spec/strategy/field/contact/random_address_spec.rb +12 -0
  58. data/spec/strategy/field/contact/random_city_spec.rb +14 -0
  59. data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
  60. data/spec/strategy/field/contact/random_province_spec.rb +14 -0
  61. data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
  62. data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
  63. data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
  64. data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
  65. data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
  66. data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
  67. data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
  68. data/spec/strategy/field/default_anon_spec.rb +42 -0
  69. data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
  70. data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
  71. data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
  72. data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
  73. data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
  74. data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
  75. data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
  76. data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
  77. data/spec/strategy/field/number/random_float_spec.rb +28 -0
  78. data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
  79. data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
  80. data/spec/strategy/field/random_boolean_spec.rb +2 -2
  81. data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
  82. data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
  83. data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
  84. data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
  85. data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
  86. data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
  87. data/spec/strategy/field/whitelist_spec.rb +2 -2
  88. data/spec/support/customer_sample.rb +1 -1
  89. data/spec/utils/database_spec.rb +2 -2
  90. data/spec/utils/geojson_parser_spec.rb +38 -0
  91. data/whitelist_dsl.rb +4 -6
  92. metadata +163 -59
  93. data/lib/strategy/field/anonymize_time.rb +0 -57
  94. data/lib/strategy/field/gmail_template.rb +0 -17
  95. data/lib/strategy/field/random_first_name.rb +0 -18
  96. data/lib/strategy/field/random_last_name.rb +0 -19
  97. data/lib/strategy/field/random_selection.rb +0 -23
  98. data/lib/strategy/field/user_name_template.rb +0 -22
  99. data/spec/strategy/field/anonymize_time_spec.rb +0 -23
  100. data/spec/strategy/field/gmail_template_spec.rb +0 -14
  101. data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
  102. data/spec/strategy/field/random_phone_number_spec.rb +0 -35
  103. data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomEmail do
3
+ describe FieldStrategy::RandomEmail do
4
4
 
5
- RandomEmail = DataAnon::Strategy::Field::RandomEmail
5
+ RandomEmail = FieldStrategy::RandomEmail
6
6
  let(:field) {DataAnon::Core::Field.new('email','real@email.com',1,nil)}
7
7
 
8
8
 
@@ -0,0 +1,14 @@
1
+ require "spec_helper"
2
+
3
+ describe FieldStrategy::RandomMailinatorEmail do
4
+
5
+ RandomMailinatorEmail = FieldStrategy::RandomMailinatorEmail
6
+ let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
7
+
8
+ describe 'anonymized email should not be the same as original email' do
9
+ let(:anonymized_email) {RandomMailinatorEmail.new.anonymize(field)}
10
+
11
+ it {anonymized_email.should_not equal field.value}
12
+ it {anonymized_email.should match '^\S+@\mailinator\.com$'}
13
+ end
14
+ end
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomFirstName do
3
+ describe FieldStrategy::RandomFirstName do
4
4
 
5
- RandomFirstName = DataAnon::Strategy::Field::RandomFirstName
5
+ RandomFirstName = FieldStrategy::RandomFirstName
6
6
  let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)}
7
7
 
8
8
  describe 'anonymized name must not be the same as provided name' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomFullName do
3
+ describe FieldStrategy::RandomFullName do
4
4
 
5
- RandomFullName = DataAnon::Strategy::Field::RandomFullName
5
+ RandomFullName = FieldStrategy::RandomFullName
6
6
  let(:field) {DataAnon::Core::Field.new('name','Fake User',1,nil)}
7
7
 
8
8
  describe 'anonymized name should be the same as original' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomLastName do
3
+ describe FieldStrategy::RandomLastName do
4
4
 
5
- RandomLastName = DataAnon::Strategy::Field::RandomLastName
5
+ RandomLastName = FieldStrategy::RandomLastName
6
6
  let(:field) {DataAnon::Core::Field.new('lastname','fakeLastName',1,nil)}
7
7
 
8
8
  describe 'anonymized name must not be the same as provided name' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomUserName do
3
+ describe FieldStrategy::RandomUserName do
4
4
 
5
- RandomUserName = DataAnon::Strategy::Field::RandomUserName
5
+ RandomUserName = FieldStrategy::RandomUserName
6
6
  let(:field) {DataAnon::Core::Field.new('username','fakeUserName',1,nil)}
7
7
 
8
8
  describe 'anonymized user name should not be the same as original user name' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomFloatDelta do
3
+ describe FieldStrategy::RandomFloatDelta do
4
4
 
5
- RandomFloatDelta = DataAnon::Strategy::Field::RandomFloatDelta
5
+ RandomFloatDelta = FieldStrategy::RandomFloatDelta
6
6
  let(:field) {DataAnon::Core::Field.new('float_field',5.5,1,nil)}
7
7
 
8
8
  describe 'anonymized float should not be the same as original value' do
@@ -0,0 +1,28 @@
1
+ require "spec_helper"
2
+
3
+ describe FieldStrategy::RandomFloat do
4
+
5
+ RandomFloat = FieldStrategy::RandomFloat
6
+ let(:field) { DataAnon::Core::Field.new('points', 2.5, 1, nil) }
7
+
8
+ describe 'verify age range between 18 and 70' do
9
+
10
+ let(:anonymized_int) { RandomFloat.new(2.0, 8.0).anonymize(field) }
11
+
12
+ it { anonymized_int.should >= 2.0 }
13
+ it { anonymized_int.should <= 8.0 }
14
+
15
+ end
16
+
17
+ describe 'default range between 0 and 100' do
18
+
19
+ let(:anonymized_int) { RandomFloat.new.anonymize(field) }
20
+
21
+ it { anonymized_int.should >= 0.0 }
22
+ it { anonymized_int.should <= 100.0 }
23
+
24
+ end
25
+
26
+
27
+
28
+ end
@@ -1,16 +1,14 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomIntegerDelta do
3
+ describe FieldStrategy::RandomIntegerDelta do
4
4
 
5
- RandomIntegerDelta = DataAnon::Strategy::Field::RandomIntegerDelta
5
+ RandomIntegerDelta = FieldStrategy::RandomIntegerDelta
6
6
  let(:field) {DataAnon::Core::Field.new('integer_field',100,1,nil)}
7
7
 
8
8
  describe "anonymized value returned should be an integer" do
9
9
  let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
10
10
 
11
- it { is_integer = anonymized_integer.is_a? Integer
12
- is_integer.should be true
13
- }
11
+ it { anonymized_integer.should be_kind_of Integer }
14
12
 
15
13
  end
16
14
 
@@ -1,13 +1,13 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomInt do
3
+ describe FieldStrategy::RandomInteger do
4
4
 
5
- RandomInt = DataAnon::Strategy::Field::RandomInt
5
+ RandomInteger = FieldStrategy::RandomInteger
6
6
  let(:field) { DataAnon::Core::Field.new('age', 25, 1, nil) }
7
7
 
8
8
  describe 'verify age range between 18 and 70' do
9
9
 
10
- let(:anonymized_int) { RandomInt.new(18, 70).anonymize(field) }
10
+ let(:anonymized_int) { RandomInteger.new(18, 70).anonymize(field) }
11
11
 
12
12
  it { anonymized_int.should >= 18 }
13
13
  it { anonymized_int.should <= 70 }
@@ -16,7 +16,7 @@ describe DataAnon::Strategy::Field::RandomInt do
16
16
 
17
17
  describe 'default range between 0 and 100' do
18
18
 
19
- let(:anonymized_int) { RandomInt.new.anonymize(field) }
19
+ let(:anonymized_int) { RandomInteger.new.anonymize(field) }
20
20
 
21
21
  it { anonymized_int.should >= 0 }
22
22
  it { anonymized_int.should <= 100 }
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomBoolean do
3
+ describe FieldStrategy::RandomBoolean do
4
4
 
5
- RandomBoolean = DataAnon::Strategy::Field::RandomBoolean
5
+ RandomBoolean = FieldStrategy::RandomBoolean
6
6
  let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)}
7
7
 
8
8
  describe 'anonymized value should be a boolean' do
@@ -0,0 +1,15 @@
1
+ require "spec_helper"
2
+
3
+ describe FieldStrategy::FormattedStringNumber do
4
+
5
+ FormattedStringNumber = FieldStrategy::FormattedStringNumber
6
+ let(:field) {DataAnon::Core::Field.new('credit_card_number',"1111-2222-3333-4444",1,nil)}
7
+
8
+ describe 'anonymized credit card number preserving the format' do
9
+ let(:anonymized_number) {FormattedStringNumber.new.anonymize(field)}
10
+
11
+ it {anonymized_number.should_not equal field.value}
12
+ it { anonymized_number.should match /^\d{4}-\d{4}-\d{4}-\d{4}$/}
13
+ end
14
+
15
+ end
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::LoremIpsum do
3
+ describe FieldStrategy::LoremIpsum do
4
4
 
5
- LoremIpsum = DataAnon::Strategy::Field::LoremIpsum
5
+ LoremIpsum = FieldStrategy::LoremIpsum
6
6
  let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7
7
 
8
8
  describe 'should return same length value using default text' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomString do
3
+ describe FieldStrategy::RandomString do
4
4
 
5
- RandomString = DataAnon::Strategy::Field::RandomString
5
+ RandomString = FieldStrategy::RandomString
6
6
 
7
7
  describe 'anonymized string must not be the same as original string' do
8
8
  let(:field) {DataAnon::Core::Field.new('string_field','fakeString',1,nil)}
@@ -1,18 +1,18 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::DistinctColumnValues do
3
+ describe FieldStrategy::SelectFromDatabase do
4
4
 
5
5
  before(:each) do
6
6
  source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'}
7
7
  DataAnon::Utils::SourceDatabase.establish_connection source
8
8
  end
9
9
 
10
- DistinctColumnValues = DataAnon::Strategy::Field::DistinctColumnValues
10
+ SelectFromDatabase = FieldStrategy::SelectFromDatabase
11
11
  let(:field) { DataAnon::Core::Field.new('name', 'Abcd', 1, nil) }
12
12
 
13
13
  describe 'more than one values in predefined list' do
14
14
 
15
- let(:anonymized_value) { DistinctColumnValues.new('MediaType','Name').anonymize(field) }
15
+ let(:anonymized_value) { SelectFromDatabase.new('MediaType','Name').anonymize(field) }
16
16
 
17
17
  it { anonymized_value.should_not be('Abcd') }
18
18
  it { anonymized_value.should_not be_empty }
@@ -1,14 +1,14 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomSelection do
3
+ describe FieldStrategy::SelectFromList do
4
4
 
5
- RandomSelection = DataAnon::Strategy::Field::RandomSelection
5
+ SelectFromList = FieldStrategy::SelectFromList
6
6
  let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7
7
 
8
8
  describe 'more than one values in predefined list' do
9
9
 
10
10
  let(:states) { ['Maharashtra','Gujrat','Karnataka'] }
11
- let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
11
+ let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
12
12
 
13
13
  it { states.should include(anonymized_value) }
14
14
 
@@ -17,7 +17,7 @@ describe DataAnon::Strategy::Field::RandomSelection do
17
17
  describe 'only one value in list' do
18
18
 
19
19
  let(:states) { ['Maharashtra'] }
20
- let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
20
+ let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
21
21
 
22
22
  it { anonymized_value.should == 'Maharashtra' }
23
23
 
@@ -26,7 +26,7 @@ describe DataAnon::Strategy::Field::RandomSelection do
26
26
  describe 'string value' do
27
27
 
28
28
  let(:states) { 'Maharashtra' }
29
- let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
29
+ let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
30
30
 
31
31
  it { anonymized_value.should == 'Maharashtra' }
32
32
 
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::StringTemplate do
3
+ describe FieldStrategy::StringTemplate do
4
4
 
5
- StringTemplate = DataAnon::Strategy::Field::StringTemplate
5
+ StringTemplate = FieldStrategy::StringTemplate
6
6
  let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 3456, nil) }
7
7
 
8
8
  describe 'should return same string value as StringTemplate' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::Whitelist do
3
+ describe FieldStrategy::Whitelist do
4
4
 
5
- Whitelist = DataAnon::Strategy::Field::Whitelist
5
+ Whitelist = FieldStrategy::Whitelist
6
6
 
7
7
  describe 'should return same string value as whitelist' do
8
8
  let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
@@ -34,7 +34,7 @@ class CustomerSample
34
34
 
35
35
  def self.insert_record connection_spec, data_hash = SAMPLE_DATA
36
36
  DataAnon::Utils::TempDatabase.establish_connection connection_spec
37
- source = DataAnon::Utils::BaseTable.create_table 'customers', 'cust_id',DataAnon::Utils::TempDatabase
37
+ source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id'
38
38
  cust = source.new data_hash
39
39
  cust.cust_id = data_hash[:cust_id]
40
40
  cust.save!
@@ -11,13 +11,13 @@ describe "Utils" do
11
11
  end
12
12
 
13
13
  it "should test the connection to source database" do
14
- album = DataAnon::Utils::SourceTable.create "Album", "AlbumId"
14
+ album = DataAnon::Utils::SourceTable.create "Album", ["AlbumId"]
15
15
  album.count.should > 0
16
16
  album.all.length > 0
17
17
  end
18
18
 
19
19
  it "should test the connection to destination database" do
20
- album = DataAnon::Utils::DestinationTable.create "Album", "AlbumId"
20
+ album = DataAnon::Utils::DestinationTable.create "Album", ["AlbumId"]
21
21
  album.count.should == 0
22
22
  album.all.length == 0
23
23
 
@@ -0,0 +1,38 @@
1
+ require "spec_helper"
2
+
3
+ describe "Geo Json Parser" do
4
+
5
+ SAMPLE_DATA_FILE_PATH = DataAnon::Utils::Resource.project_home+'spec/resource/sample.geojson'
6
+
7
+ describe "parser should return list of addresses when address method is called" do
8
+ let(:result_list) {DataAnon::Utils::GeojsonParser.address(SAMPLE_DATA_FILE_PATH)}
9
+
10
+ it {result_list.length.should be 1}
11
+ it {result_list[0].should eq("333 Willoughby Ave")}
12
+ end
13
+
14
+ describe "parser should return list of zip codes when zipcode method is called" do
15
+ let(:result_list) {DataAnon::Utils::GeojsonParser.zipcode(SAMPLE_DATA_FILE_PATH)}
16
+
17
+ it {result_list.length.should be 1}
18
+ it {result_list[0].should eq("99801")}
19
+
20
+ end
21
+
22
+ describe "parser should return list of province when province method is called" do
23
+ let(:result_list) {DataAnon::Utils::GeojsonParser.province(SAMPLE_DATA_FILE_PATH)}
24
+
25
+ it {result_list.length.should be 1}
26
+ it {result_list[0].should eq("AK")}
27
+
28
+ end
29
+
30
+ describe "parser should return list of cities when city method is called" do
31
+ let(:result_list) {DataAnon::Utils::GeojsonParser.city(SAMPLE_DATA_FILE_PATH)}
32
+
33
+ it {result_list.length.should be 1}
34
+ it {result_list[0].should eq("Juneau")}
35
+
36
+ end
37
+
38
+ end
data/whitelist_dsl.rb CHANGED
@@ -2,8 +2,6 @@ system "rake empty_dest" # clean destination database on every call
2
2
 
3
3
  require 'data-anonymization'
4
4
 
5
- FS = DataAnon::Strategy::Field
6
-
7
5
  DataAnon::Utils::Logging.logger.level = Logger::INFO
8
6
 
9
7
  database 'Chinook' do
@@ -11,7 +9,7 @@ database 'Chinook' do
11
9
  source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
12
10
  destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
13
11
 
14
- default_field_strategies :string => FS::StringTemplate.new('Sunit #{row_number} Parekh')
12
+ default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
15
13
 
16
14
  table 'Genre' do
17
15
  primary_key 'GenreId'
@@ -24,19 +22,19 @@ database 'Chinook' do
24
22
  table 'MediaType' do
25
23
  primary_key 'MediaTypeId'
26
24
  anonymize('MediaTypeId') { |field| field.value } # same as whitelist
27
- anonymize('Name').using FS::StringTemplate.new('Media Type #{row_number}')
25
+ anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
28
26
 
29
27
  end
30
28
 
31
29
  table 'Employee' do
32
30
  primary_key 'EmployeeId'
33
31
  whitelist 'EmployeeId'
34
- anonymize('BirthDate').using FS::DateTimeDelta.new(1,1)
32
+ anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1,1)
35
33
  end
36
34
 
37
35
  table 'Customer' do
38
36
  primary_key 'CustomerId'
39
- anonymize('Phone').using FS::RandomPhoneNumber.new
37
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
40
38
  end
41
39
 
42
40
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data-anonymization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-08-14 00:00:00.000000000 Z
14
+ date: 2012-08-17 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: activerecord
@@ -29,6 +29,22 @@ dependencies:
29
29
  - - ~>
30
30
  - !ruby/object:Gem::Version
31
31
  version: 3.2.8
32
+ - !ruby/object:Gem::Dependency
33
+ name: composite_primary_keys
34
+ requirement: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: 5.0.8
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 5.0.8
32
48
  - !ruby/object:Gem::Dependency
33
49
  name: activesupport
34
50
  requirement: !ruby/object:Gem::Requirement
@@ -45,6 +61,54 @@ dependencies:
45
61
  - - ~>
46
62
  - !ruby/object:Gem::Version
47
63
  version: 3.2.8
64
+ - !ruby/object:Gem::Dependency
65
+ name: rgeo
66
+ requirement: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
71
+ version: 0.3.15
72
+ type: :runtime
73
+ prerelease: false
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ~>
78
+ - !ruby/object:Gem::Version
79
+ version: 0.3.15
80
+ - !ruby/object:Gem::Dependency
81
+ name: rgeo-geojson
82
+ requirement: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ version: 0.2.3
88
+ type: :runtime
89
+ prerelease: false
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ none: false
92
+ requirements:
93
+ - - ~>
94
+ - !ruby/object:Gem::Version
95
+ version: 0.2.3
96
+ - !ruby/object:Gem::Dependency
97
+ name: powerbar
98
+ requirement: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
103
+ version: 1.0.8
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ~>
110
+ - !ruby/object:Gem::Version
111
+ version: 1.0.8
48
112
  description: Data anonymization tool for RDBMS databases
49
113
  email:
50
114
  - parekh.sunit@gmail.com
@@ -72,67 +136,95 @@ files:
72
136
  - lib/data-anonymization.rb
73
137
  - lib/strategy/base.rb
74
138
  - lib/strategy/blacklist.rb
75
- - lib/strategy/field/anonymize_time.rb
76
139
  - lib/strategy/field/anonymous.rb
77
- - lib/strategy/field/date_time_delta.rb
140
+ - lib/strategy/field/contact/geojson_base.rb
141
+ - lib/strategy/field/contact/random_address.rb
142
+ - lib/strategy/field/contact/random_city.rb
143
+ - lib/strategy/field/contact/random_phone_number.rb
144
+ - lib/strategy/field/contact/random_province.rb
145
+ - lib/strategy/field/contact/random_zipcode.rb
146
+ - lib/strategy/field/datetime/anonymize_date.rb
147
+ - lib/strategy/field/datetime/anonymize_datetime.rb
148
+ - lib/strategy/field/datetime/anonymize_time.rb
149
+ - lib/strategy/field/datetime/date_delta.rb
150
+ - lib/strategy/field/datetime/date_time_delta.rb
151
+ - lib/strategy/field/datetime/time_delta.rb
78
152
  - lib/strategy/field/default_anon.rb
79
- - lib/strategy/field/distinct_column_values.rb
153
+ - lib/strategy/field/email/gmail_template.rb
154
+ - lib/strategy/field/email/random_email.rb
155
+ - lib/strategy/field/email/random_mailinator_email.rb
80
156
  - lib/strategy/field/fields.rb
81
- - lib/strategy/field/gmail_template.rb
82
- - lib/strategy/field/lorem_ipsum.rb
157
+ - lib/strategy/field/name/random_first_name.rb
158
+ - lib/strategy/field/name/random_full_name.rb
159
+ - lib/strategy/field/name/random_last_name.rb
160
+ - lib/strategy/field/name/random_user_name.rb
161
+ - lib/strategy/field/number/random_float.rb
162
+ - lib/strategy/field/number/random_float_delta.rb
163
+ - lib/strategy/field/number/random_integer.rb
164
+ - lib/strategy/field/number/random_integer_delta.rb
83
165
  - lib/strategy/field/random_boolean.rb
84
- - lib/strategy/field/random_email.rb
85
- - lib/strategy/field/random_first_name.rb
86
- - lib/strategy/field/random_float_delta.rb
87
- - lib/strategy/field/random_full_name.rb
88
- - lib/strategy/field/random_int.rb
89
- - lib/strategy/field/random_integer_delta.rb
90
- - lib/strategy/field/random_last_name.rb
91
- - lib/strategy/field/random_mailinator_email.rb
92
- - lib/strategy/field/random_phone_number.rb
93
- - lib/strategy/field/random_selection.rb
94
- - lib/strategy/field/random_string.rb
95
- - lib/strategy/field/random_user_name.rb
96
- - lib/strategy/field/string_template.rb
97
- - lib/strategy/field/user_name_template.rb
166
+ - lib/strategy/field/string/formatted_string_numbers.rb
167
+ - lib/strategy/field/string/lorem_ipsum.rb
168
+ - lib/strategy/field/string/random_string.rb
169
+ - lib/strategy/field/string/select_from_database.rb
170
+ - lib/strategy/field/string/select_from_file.rb
171
+ - lib/strategy/field/string/select_from_list.rb
172
+ - lib/strategy/field/string/string_template.rb
98
173
  - lib/strategy/field/whitelist.rb
99
174
  - lib/strategy/strategies.rb
100
175
  - lib/strategy/whitelist.rb
101
176
  - lib/tasks/rake_tasks.rb
102
177
  - lib/utils/database.rb
178
+ - lib/utils/geojson_parser.rb
103
179
  - lib/utils/logging.rb
180
+ - lib/utils/progress_bar.rb
181
+ - lib/utils/random_float.rb
104
182
  - lib/utils/random_int.rb
105
183
  - lib/utils/random_string.rb
106
184
  - lib/utils/resource.rb
107
185
  - lib/version.rb
186
+ - resources/UK_addresses.geojson
187
+ - resources/US_addresses.geojson
108
188
  - resources/first_names.txt
109
189
  - resources/last_names.txt
110
190
  - spec/acceptance/rdbms_blacklist_spec.rb
111
191
  - spec/acceptance/rdbms_whitelist_spec.rb
192
+ - spec/resource/sample.geojson
112
193
  - spec/spec_helper.rb
113
- - spec/strategy/field/anonymize_time_spec.rb
114
- - spec/strategy/field/date_time_delta_spec.rb
115
- - spec/strategy/field/distinct_column_values_spec.rb
116
- - spec/strategy/field/gmail_template_spec.rb
117
- - spec/strategy/field/lorem_ipsum_spec.rb
194
+ - spec/strategy/field/contact/random_address_spec.rb
195
+ - spec/strategy/field/contact/random_city_spec.rb
196
+ - spec/strategy/field/contact/random_phone_number_spec.rb
197
+ - spec/strategy/field/contact/random_province_spec.rb
198
+ - spec/strategy/field/contact/random_zipcode_spec.rb
199
+ - spec/strategy/field/datetime/anonymize_date_spec.rb
200
+ - spec/strategy/field/datetime/anonymize_datetime_spec.rb
201
+ - spec/strategy/field/datetime/anonymize_time_spec.rb
202
+ - spec/strategy/field/datetime/date_delta_spec.rb
203
+ - spec/strategy/field/datetime/date_time_delta_spec.rb
204
+ - spec/strategy/field/datetime/time_delta_spec.rb
205
+ - spec/strategy/field/default_anon_spec.rb
206
+ - spec/strategy/field/email/gmail_template_spec.rb
207
+ - spec/strategy/field/email/random_email_spec.rb
208
+ - spec/strategy/field/email/random_mailinator_email_spec.rb
209
+ - spec/strategy/field/name/random_first_name_spec.rb
210
+ - spec/strategy/field/name/random_full_name_spec.rb
211
+ - spec/strategy/field/name/random_last_name_spec.rb
212
+ - spec/strategy/field/name/random_user_name_spec.rb
213
+ - spec/strategy/field/number/random_float_delta_spec.rb
214
+ - spec/strategy/field/number/random_float_spec.rb
215
+ - spec/strategy/field/number/random_integer_delta_spec.rb
216
+ - spec/strategy/field/number/random_integer_spec.rb
118
217
  - spec/strategy/field/random_boolean_spec.rb
119
- - spec/strategy/field/random_email_spec.rb
120
- - spec/strategy/field/random_first_name_spec.rb
121
- - spec/strategy/field/random_float_delta_spec.rb
122
- - spec/strategy/field/random_full_name_spec.rb
123
- - spec/strategy/field/random_int_spec.rb
124
- - spec/strategy/field/random_integer_delta_spec.rb
125
- - spec/strategy/field/random_last_name_spec.rb
126
- - spec/strategy/field/random_mailinator_email_spec.rb
127
- - spec/strategy/field/random_phone_number_spec.rb
128
- - spec/strategy/field/random_selection_spec.rb
129
- - spec/strategy/field/random_string_spec.rb
130
- - spec/strategy/field/random_user_name_spec.rb
131
- - spec/strategy/field/string_template_spec.rb
132
- - spec/strategy/field/user_name_template_spec.rb
218
+ - spec/strategy/field/string/formatted_string_numbers_spec.rb
219
+ - spec/strategy/field/string/lorem_ipsum_spec.rb
220
+ - spec/strategy/field/string/random_string_spec.rb
221
+ - spec/strategy/field/string/select_from_database_spec.rb
222
+ - spec/strategy/field/string/select_from_list_spec.rb
223
+ - spec/strategy/field/string/string_template_spec.rb
133
224
  - spec/strategy/field/whitelist_spec.rb
134
225
  - spec/support/customer_sample.rb
135
226
  - spec/utils/database_spec.rb
227
+ - spec/utils/geojson_parser_spec.rb
136
228
  - spec/utils/random_int_spec.rb
137
229
  - spec/utils/random_string_spec.rb
138
230
  - whitelist_dsl.rb
@@ -164,29 +256,41 @@ summary: Tool to create anonymized production data dump to use for PREF and othe
164
256
  test_files:
165
257
  - spec/acceptance/rdbms_blacklist_spec.rb
166
258
  - spec/acceptance/rdbms_whitelist_spec.rb
259
+ - spec/resource/sample.geojson
167
260
  - spec/spec_helper.rb
168
- - spec/strategy/field/anonymize_time_spec.rb
169
- - spec/strategy/field/date_time_delta_spec.rb
170
- - spec/strategy/field/distinct_column_values_spec.rb
171
- - spec/strategy/field/gmail_template_spec.rb
172
- - spec/strategy/field/lorem_ipsum_spec.rb
261
+ - spec/strategy/field/contact/random_address_spec.rb
262
+ - spec/strategy/field/contact/random_city_spec.rb
263
+ - spec/strategy/field/contact/random_phone_number_spec.rb
264
+ - spec/strategy/field/contact/random_province_spec.rb
265
+ - spec/strategy/field/contact/random_zipcode_spec.rb
266
+ - spec/strategy/field/datetime/anonymize_date_spec.rb
267
+ - spec/strategy/field/datetime/anonymize_datetime_spec.rb
268
+ - spec/strategy/field/datetime/anonymize_time_spec.rb
269
+ - spec/strategy/field/datetime/date_delta_spec.rb
270
+ - spec/strategy/field/datetime/date_time_delta_spec.rb
271
+ - spec/strategy/field/datetime/time_delta_spec.rb
272
+ - spec/strategy/field/default_anon_spec.rb
273
+ - spec/strategy/field/email/gmail_template_spec.rb
274
+ - spec/strategy/field/email/random_email_spec.rb
275
+ - spec/strategy/field/email/random_mailinator_email_spec.rb
276
+ - spec/strategy/field/name/random_first_name_spec.rb
277
+ - spec/strategy/field/name/random_full_name_spec.rb
278
+ - spec/strategy/field/name/random_last_name_spec.rb
279
+ - spec/strategy/field/name/random_user_name_spec.rb
280
+ - spec/strategy/field/number/random_float_delta_spec.rb
281
+ - spec/strategy/field/number/random_float_spec.rb
282
+ - spec/strategy/field/number/random_integer_delta_spec.rb
283
+ - spec/strategy/field/number/random_integer_spec.rb
173
284
  - spec/strategy/field/random_boolean_spec.rb
174
- - spec/strategy/field/random_email_spec.rb
175
- - spec/strategy/field/random_first_name_spec.rb
176
- - spec/strategy/field/random_float_delta_spec.rb
177
- - spec/strategy/field/random_full_name_spec.rb
178
- - spec/strategy/field/random_int_spec.rb
179
- - spec/strategy/field/random_integer_delta_spec.rb
180
- - spec/strategy/field/random_last_name_spec.rb
181
- - spec/strategy/field/random_mailinator_email_spec.rb
182
- - spec/strategy/field/random_phone_number_spec.rb
183
- - spec/strategy/field/random_selection_spec.rb
184
- - spec/strategy/field/random_string_spec.rb
185
- - spec/strategy/field/random_user_name_spec.rb
186
- - spec/strategy/field/string_template_spec.rb
187
- - spec/strategy/field/user_name_template_spec.rb
285
+ - spec/strategy/field/string/formatted_string_numbers_spec.rb
286
+ - spec/strategy/field/string/lorem_ipsum_spec.rb
287
+ - spec/strategy/field/string/random_string_spec.rb
288
+ - spec/strategy/field/string/select_from_database_spec.rb
289
+ - spec/strategy/field/string/select_from_list_spec.rb
290
+ - spec/strategy/field/string/string_template_spec.rb
188
291
  - spec/strategy/field/whitelist_spec.rb
189
292
  - spec/support/customer_sample.rb
190
293
  - spec/utils/database_spec.rb
294
+ - spec/utils/geojson_parser_spec.rb
191
295
  - spec/utils/random_int_spec.rb
192
296
  - spec/utils/random_string_spec.rb