data-anonymization 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. data/.documentup.json +1 -0
  2. data/.travis.yml +0 -1
  3. data/README.md +277 -52
  4. data/blacklist_dsl.rb +1 -3
  5. data/data-anonymization.gemspec +4 -0
  6. data/lib/core/dsl.rb +1 -1
  7. data/lib/data-anonymization.rb +3 -0
  8. data/lib/strategy/base.rb +21 -11
  9. data/lib/strategy/blacklist.rb +2 -1
  10. data/lib/strategy/field/contact/geojson_base.rb +24 -0
  11. data/lib/strategy/field/contact/random_address.rb +17 -0
  12. data/lib/strategy/field/contact/random_city.rb +17 -0
  13. data/lib/strategy/field/contact/random_phone_number.rb +13 -0
  14. data/lib/strategy/field/contact/random_province.rb +17 -0
  15. data/lib/strategy/field/contact/random_zipcode.rb +17 -0
  16. data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
  17. data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
  18. data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
  19. data/lib/strategy/field/datetime/date_delta.rb +21 -0
  20. data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
  21. data/lib/strategy/field/datetime/time_delta.rb +12 -0
  22. data/lib/strategy/field/default_anon.rb +12 -7
  23. data/lib/strategy/field/email/gmail_template.rb +16 -0
  24. data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
  25. data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
  26. data/lib/strategy/field/fields.rb +51 -20
  27. data/lib/strategy/field/name/random_first_name.rb +14 -0
  28. data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
  29. data/lib/strategy/field/name/random_last_name.rb +14 -0
  30. data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
  31. data/lib/strategy/field/number/random_float.rb +23 -0
  32. data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
  33. data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
  34. data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
  35. data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
  36. data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
  37. data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
  38. data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
  39. data/lib/strategy/field/string/select_from_file.rb +18 -0
  40. data/lib/strategy/field/string/select_from_list.rb +17 -0
  41. data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
  42. data/lib/strategy/whitelist.rb +4 -2
  43. data/lib/utils/database.rb +8 -6
  44. data/lib/utils/geojson_parser.rb +42 -0
  45. data/lib/utils/logging.rb +0 -9
  46. data/lib/utils/progress_bar.rb +29 -0
  47. data/lib/utils/random_float.rb +12 -0
  48. data/lib/utils/random_int.rb +3 -7
  49. data/lib/utils/resource.rb +4 -0
  50. data/lib/version.rb +1 -1
  51. data/resources/UK_addresses.geojson +300 -0
  52. data/resources/US_addresses.geojson +300 -0
  53. data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
  54. data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
  55. data/spec/resource/sample.geojson +1 -0
  56. data/spec/spec_helper.rb +3 -2
  57. data/spec/strategy/field/contact/random_address_spec.rb +12 -0
  58. data/spec/strategy/field/contact/random_city_spec.rb +14 -0
  59. data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
  60. data/spec/strategy/field/contact/random_province_spec.rb +14 -0
  61. data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
  62. data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
  63. data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
  64. data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
  65. data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
  66. data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
  67. data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
  68. data/spec/strategy/field/default_anon_spec.rb +42 -0
  69. data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
  70. data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
  71. data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
  72. data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
  73. data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
  74. data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
  75. data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
  76. data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
  77. data/spec/strategy/field/number/random_float_spec.rb +28 -0
  78. data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
  79. data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
  80. data/spec/strategy/field/random_boolean_spec.rb +2 -2
  81. data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
  82. data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
  83. data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
  84. data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
  85. data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
  86. data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
  87. data/spec/strategy/field/whitelist_spec.rb +2 -2
  88. data/spec/support/customer_sample.rb +1 -1
  89. data/spec/utils/database_spec.rb +2 -2
  90. data/spec/utils/geojson_parser_spec.rb +38 -0
  91. data/whitelist_dsl.rb +4 -6
  92. metadata +163 -59
  93. data/lib/strategy/field/anonymize_time.rb +0 -57
  94. data/lib/strategy/field/gmail_template.rb +0 -17
  95. data/lib/strategy/field/random_first_name.rb +0 -18
  96. data/lib/strategy/field/random_last_name.rb +0 -19
  97. data/lib/strategy/field/random_selection.rb +0 -23
  98. data/lib/strategy/field/user_name_template.rb +0 -22
  99. data/spec/strategy/field/anonymize_time_spec.rb +0 -23
  100. data/spec/strategy/field/gmail_template_spec.rb +0 -14
  101. data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
  102. data/spec/strategy/field/random_phone_number_spec.rb +0 -35
  103. data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomEmail do
3
+ describe FieldStrategy::RandomEmail do
4
4
 
5
- RandomEmail = DataAnon::Strategy::Field::RandomEmail
5
+ RandomEmail = FieldStrategy::RandomEmail
6
6
  let(:field) {DataAnon::Core::Field.new('email','real@email.com',1,nil)}
7
7
 
8
8
 
@@ -0,0 +1,14 @@
1
+ require "spec_helper"
2
+
3
+ describe FieldStrategy::RandomMailinatorEmail do
4
+
5
+ RandomMailinatorEmail = FieldStrategy::RandomMailinatorEmail
6
+ let(:field) {DataAnon::Core::Field.new('email','user@company.com',1,nil)}
7
+
8
+ describe 'anonymized email should not be the same as original email' do
9
+ let(:anonymized_email) {RandomMailinatorEmail.new.anonymize(field)}
10
+
11
+ it {anonymized_email.should_not equal field.value}
12
+ it {anonymized_email.should match '^\S+@\mailinator\.com$'}
13
+ end
14
+ end
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomFirstName do
3
+ describe FieldStrategy::RandomFirstName do
4
4
 
5
- RandomFirstName = DataAnon::Strategy::Field::RandomFirstName
5
+ RandomFirstName = FieldStrategy::RandomFirstName
6
6
  let(:field) {DataAnon::Core::Field.new('firstname','fakeFirstName',1,nil)}
7
7
 
8
8
  describe 'anonymized name must not be the same as provided name' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomFullName do
3
+ describe FieldStrategy::RandomFullName do
4
4
 
5
- RandomFullName = DataAnon::Strategy::Field::RandomFullName
5
+ RandomFullName = FieldStrategy::RandomFullName
6
6
  let(:field) {DataAnon::Core::Field.new('name','Fake User',1,nil)}
7
7
 
8
8
  describe 'anonymized name should be the same as original' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomLastName do
3
+ describe FieldStrategy::RandomLastName do
4
4
 
5
- RandomLastName = DataAnon::Strategy::Field::RandomLastName
5
+ RandomLastName = FieldStrategy::RandomLastName
6
6
  let(:field) {DataAnon::Core::Field.new('lastname','fakeLastName',1,nil)}
7
7
 
8
8
  describe 'anonymized name must not be the same as provided name' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomUserName do
3
+ describe FieldStrategy::RandomUserName do
4
4
 
5
- RandomUserName = DataAnon::Strategy::Field::RandomUserName
5
+ RandomUserName = FieldStrategy::RandomUserName
6
6
  let(:field) {DataAnon::Core::Field.new('username','fakeUserName',1,nil)}
7
7
 
8
8
  describe 'anonymized user name should not be the same as original user name' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomFloatDelta do
3
+ describe FieldStrategy::RandomFloatDelta do
4
4
 
5
- RandomFloatDelta = DataAnon::Strategy::Field::RandomFloatDelta
5
+ RandomFloatDelta = FieldStrategy::RandomFloatDelta
6
6
  let(:field) {DataAnon::Core::Field.new('float_field',5.5,1,nil)}
7
7
 
8
8
  describe 'anonymized float should not be the same as original value' do
@@ -0,0 +1,28 @@
1
+ require "spec_helper"
2
+
3
+ describe FieldStrategy::RandomFloat do
4
+
5
+ RandomFloat = FieldStrategy::RandomFloat
6
+ let(:field) { DataAnon::Core::Field.new('points', 2.5, 1, nil) }
7
+
8
+ describe 'verify age range between 18 and 70' do
9
+
10
+ let(:anonymized_int) { RandomFloat.new(2.0, 8.0).anonymize(field) }
11
+
12
+ it { anonymized_int.should >= 2.0 }
13
+ it { anonymized_int.should <= 8.0 }
14
+
15
+ end
16
+
17
+ describe 'default range between 0 and 100' do
18
+
19
+ let(:anonymized_int) { RandomFloat.new.anonymize(field) }
20
+
21
+ it { anonymized_int.should >= 0.0 }
22
+ it { anonymized_int.should <= 100.0 }
23
+
24
+ end
25
+
26
+
27
+
28
+ end
@@ -1,16 +1,14 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomIntegerDelta do
3
+ describe FieldStrategy::RandomIntegerDelta do
4
4
 
5
- RandomIntegerDelta = DataAnon::Strategy::Field::RandomIntegerDelta
5
+ RandomIntegerDelta = FieldStrategy::RandomIntegerDelta
6
6
  let(:field) {DataAnon::Core::Field.new('integer_field',100,1,nil)}
7
7
 
8
8
  describe "anonymized value returned should be an integer" do
9
9
  let(:anonymized_integer) {RandomIntegerDelta.new(10).anonymize(field)}
10
10
 
11
- it { is_integer = anonymized_integer.is_a? Integer
12
- is_integer.should be true
13
- }
11
+ it { anonymized_integer.should be_kind_of Integer }
14
12
 
15
13
  end
16
14
 
@@ -1,13 +1,13 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomInt do
3
+ describe FieldStrategy::RandomInteger do
4
4
 
5
- RandomInt = DataAnon::Strategy::Field::RandomInt
5
+ RandomInteger = FieldStrategy::RandomInteger
6
6
  let(:field) { DataAnon::Core::Field.new('age', 25, 1, nil) }
7
7
 
8
8
  describe 'verify age range between 18 and 70' do
9
9
 
10
- let(:anonymized_int) { RandomInt.new(18, 70).anonymize(field) }
10
+ let(:anonymized_int) { RandomInteger.new(18, 70).anonymize(field) }
11
11
 
12
12
  it { anonymized_int.should >= 18 }
13
13
  it { anonymized_int.should <= 70 }
@@ -16,7 +16,7 @@ describe DataAnon::Strategy::Field::RandomInt do
16
16
 
17
17
  describe 'default range between 0 and 100' do
18
18
 
19
- let(:anonymized_int) { RandomInt.new.anonymize(field) }
19
+ let(:anonymized_int) { RandomInteger.new.anonymize(field) }
20
20
 
21
21
  it { anonymized_int.should >= 0 }
22
22
  it { anonymized_int.should <= 100 }
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomBoolean do
3
+ describe FieldStrategy::RandomBoolean do
4
4
 
5
- RandomBoolean = DataAnon::Strategy::Field::RandomBoolean
5
+ RandomBoolean = FieldStrategy::RandomBoolean
6
6
  let(:field) {DataAnon::Core::Field.new('boolean_field',true,1,nil)}
7
7
 
8
8
  describe 'anonymized value should be a boolean' do
@@ -0,0 +1,15 @@
1
+ require "spec_helper"
2
+
3
+ describe FieldStrategy::FormattedStringNumber do
4
+
5
+ FormattedStringNumber = FieldStrategy::FormattedStringNumber
6
+ let(:field) {DataAnon::Core::Field.new('credit_card_number',"1111-2222-3333-4444",1,nil)}
7
+
8
+ describe 'anonymized credit card number preserving the format' do
9
+ let(:anonymized_number) {FormattedStringNumber.new.anonymize(field)}
10
+
11
+ it {anonymized_number.should_not equal field.value}
12
+ it { anonymized_number.should match /^\d{4}-\d{4}-\d{4}-\d{4}$/}
13
+ end
14
+
15
+ end
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::LoremIpsum do
3
+ describe FieldStrategy::LoremIpsum do
4
4
 
5
- LoremIpsum = DataAnon::Strategy::Field::LoremIpsum
5
+ LoremIpsum = FieldStrategy::LoremIpsum
6
6
  let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7
7
 
8
8
  describe 'should return same length value using default text' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomString do
3
+ describe FieldStrategy::RandomString do
4
4
 
5
- RandomString = DataAnon::Strategy::Field::RandomString
5
+ RandomString = FieldStrategy::RandomString
6
6
 
7
7
  describe 'anonymized string must not be the same as original string' do
8
8
  let(:field) {DataAnon::Core::Field.new('string_field','fakeString',1,nil)}
@@ -1,18 +1,18 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::DistinctColumnValues do
3
+ describe FieldStrategy::SelectFromDatabase do
4
4
 
5
5
  before(:each) do
6
6
  source = {:adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'}
7
7
  DataAnon::Utils::SourceDatabase.establish_connection source
8
8
  end
9
9
 
10
- DistinctColumnValues = DataAnon::Strategy::Field::DistinctColumnValues
10
+ SelectFromDatabase = FieldStrategy::SelectFromDatabase
11
11
  let(:field) { DataAnon::Core::Field.new('name', 'Abcd', 1, nil) }
12
12
 
13
13
  describe 'more than one values in predefined list' do
14
14
 
15
- let(:anonymized_value) { DistinctColumnValues.new('MediaType','Name').anonymize(field) }
15
+ let(:anonymized_value) { SelectFromDatabase.new('MediaType','Name').anonymize(field) }
16
16
 
17
17
  it { anonymized_value.should_not be('Abcd') }
18
18
  it { anonymized_value.should_not be_empty }
@@ -1,14 +1,14 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::RandomSelection do
3
+ describe FieldStrategy::SelectFromList do
4
4
 
5
- RandomSelection = DataAnon::Strategy::Field::RandomSelection
5
+ SelectFromList = FieldStrategy::SelectFromList
6
6
  let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
7
7
 
8
8
  describe 'more than one values in predefined list' do
9
9
 
10
10
  let(:states) { ['Maharashtra','Gujrat','Karnataka'] }
11
- let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
11
+ let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
12
12
 
13
13
  it { states.should include(anonymized_value) }
14
14
 
@@ -17,7 +17,7 @@ describe DataAnon::Strategy::Field::RandomSelection do
17
17
  describe 'only one value in list' do
18
18
 
19
19
  let(:states) { ['Maharashtra'] }
20
- let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
20
+ let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
21
21
 
22
22
  it { anonymized_value.should == 'Maharashtra' }
23
23
 
@@ -26,7 +26,7 @@ describe DataAnon::Strategy::Field::RandomSelection do
26
26
  describe 'string value' do
27
27
 
28
28
  let(:states) { 'Maharashtra' }
29
- let(:anonymized_value) { RandomSelection.new(states).anonymize(field) }
29
+ let(:anonymized_value) { SelectFromList.new(states).anonymize(field) }
30
30
 
31
31
  it { anonymized_value.should == 'Maharashtra' }
32
32
 
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::StringTemplate do
3
+ describe FieldStrategy::StringTemplate do
4
4
 
5
- StringTemplate = DataAnon::Strategy::Field::StringTemplate
5
+ StringTemplate = FieldStrategy::StringTemplate
6
6
  let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 3456, nil) }
7
7
 
8
8
  describe 'should return same string value as StringTemplate' do
@@ -1,8 +1,8 @@
1
1
  require "spec_helper"
2
2
 
3
- describe DataAnon::Strategy::Field::Whitelist do
3
+ describe FieldStrategy::Whitelist do
4
4
 
5
- Whitelist = DataAnon::Strategy::Field::Whitelist
5
+ Whitelist = FieldStrategy::Whitelist
6
6
 
7
7
  describe 'should return same string value as whitelist' do
8
8
  let(:field) { DataAnon::Core::Field.new('state', 'New Delhi', 1, nil) }
@@ -34,7 +34,7 @@ class CustomerSample
34
34
 
35
35
  def self.insert_record connection_spec, data_hash = SAMPLE_DATA
36
36
  DataAnon::Utils::TempDatabase.establish_connection connection_spec
37
- source = DataAnon::Utils::BaseTable.create_table 'customers', 'cust_id',DataAnon::Utils::TempDatabase
37
+ source = DataAnon::Utils::BaseTable.create_table DataAnon::Utils::TempDatabase, 'customers', 'cust_id'
38
38
  cust = source.new data_hash
39
39
  cust.cust_id = data_hash[:cust_id]
40
40
  cust.save!
@@ -11,13 +11,13 @@ describe "Utils" do
11
11
  end
12
12
 
13
13
  it "should test the connection to source database" do
14
- album = DataAnon::Utils::SourceTable.create "Album", "AlbumId"
14
+ album = DataAnon::Utils::SourceTable.create "Album", ["AlbumId"]
15
15
  album.count.should > 0
16
16
  album.all.length > 0
17
17
  end
18
18
 
19
19
  it "should test the connection to destination database" do
20
- album = DataAnon::Utils::DestinationTable.create "Album", "AlbumId"
20
+ album = DataAnon::Utils::DestinationTable.create "Album", ["AlbumId"]
21
21
  album.count.should == 0
22
22
  album.all.length == 0
23
23
 
@@ -0,0 +1,38 @@
1
+ require "spec_helper"
2
+
3
+ describe "Geo Json Parser" do
4
+
5
+ SAMPLE_DATA_FILE_PATH = DataAnon::Utils::Resource.project_home+'spec/resource/sample.geojson'
6
+
7
+ describe "parser should return list of addresses when address method is called" do
8
+ let(:result_list) {DataAnon::Utils::GeojsonParser.address(SAMPLE_DATA_FILE_PATH)}
9
+
10
+ it {result_list.length.should be 1}
11
+ it {result_list[0].should eq("333 Willoughby Ave")}
12
+ end
13
+
14
+ describe "parser should return list of zip codes when zipcode method is called" do
15
+ let(:result_list) {DataAnon::Utils::GeojsonParser.zipcode(SAMPLE_DATA_FILE_PATH)}
16
+
17
+ it {result_list.length.should be 1}
18
+ it {result_list[0].should eq("99801")}
19
+
20
+ end
21
+
22
+ describe "parser should return list of province when province method is called" do
23
+ let(:result_list) {DataAnon::Utils::GeojsonParser.province(SAMPLE_DATA_FILE_PATH)}
24
+
25
+ it {result_list.length.should be 1}
26
+ it {result_list[0].should eq("AK")}
27
+
28
+ end
29
+
30
+ describe "parser should return list of cities when city method is called" do
31
+ let(:result_list) {DataAnon::Utils::GeojsonParser.city(SAMPLE_DATA_FILE_PATH)}
32
+
33
+ it {result_list.length.should be 1}
34
+ it {result_list[0].should eq("Juneau")}
35
+
36
+ end
37
+
38
+ end
data/whitelist_dsl.rb CHANGED
@@ -2,8 +2,6 @@ system "rake empty_dest" # clean destination database on every call
2
2
 
3
3
  require 'data-anonymization'
4
4
 
5
- FS = DataAnon::Strategy::Field
6
-
7
5
  DataAnon::Utils::Logging.logger.level = Logger::INFO
8
6
 
9
7
  database 'Chinook' do
@@ -11,7 +9,7 @@ database 'Chinook' do
11
9
  source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
12
10
  destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
13
11
 
14
- default_field_strategies :string => FS::StringTemplate.new('Sunit #{row_number} Parekh')
12
+ default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
15
13
 
16
14
  table 'Genre' do
17
15
  primary_key 'GenreId'
@@ -24,19 +22,19 @@ database 'Chinook' do
24
22
  table 'MediaType' do
25
23
  primary_key 'MediaTypeId'
26
24
  anonymize('MediaTypeId') { |field| field.value } # same as whitelist
27
- anonymize('Name').using FS::StringTemplate.new('Media Type #{row_number}')
25
+ anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
28
26
 
29
27
  end
30
28
 
31
29
  table 'Employee' do
32
30
  primary_key 'EmployeeId'
33
31
  whitelist 'EmployeeId'
34
- anonymize('BirthDate').using FS::DateTimeDelta.new(1,1)
32
+ anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1,1)
35
33
  end
36
34
 
37
35
  table 'Customer' do
38
36
  primary_key 'CustomerId'
39
- anonymize('Phone').using FS::RandomPhoneNumber.new
37
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
40
38
  end
41
39
 
42
40
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data-anonymization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,7 +11,7 @@ authors:
11
11
  autorequire:
12
12
  bindir: bin
13
13
  cert_chain: []
14
- date: 2012-08-14 00:00:00.000000000 Z
14
+ date: 2012-08-17 00:00:00.000000000 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: activerecord
@@ -29,6 +29,22 @@ dependencies:
29
29
  - - ~>
30
30
  - !ruby/object:Gem::Version
31
31
  version: 3.2.8
32
+ - !ruby/object:Gem::Dependency
33
+ name: composite_primary_keys
34
+ requirement: !ruby/object:Gem::Requirement
35
+ none: false
36
+ requirements:
37
+ - - ~>
38
+ - !ruby/object:Gem::Version
39
+ version: 5.0.8
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ none: false
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 5.0.8
32
48
  - !ruby/object:Gem::Dependency
33
49
  name: activesupport
34
50
  requirement: !ruby/object:Gem::Requirement
@@ -45,6 +61,54 @@ dependencies:
45
61
  - - ~>
46
62
  - !ruby/object:Gem::Version
47
63
  version: 3.2.8
64
+ - !ruby/object:Gem::Dependency
65
+ name: rgeo
66
+ requirement: !ruby/object:Gem::Requirement
67
+ none: false
68
+ requirements:
69
+ - - ~>
70
+ - !ruby/object:Gem::Version
71
+ version: 0.3.15
72
+ type: :runtime
73
+ prerelease: false
74
+ version_requirements: !ruby/object:Gem::Requirement
75
+ none: false
76
+ requirements:
77
+ - - ~>
78
+ - !ruby/object:Gem::Version
79
+ version: 0.3.15
80
+ - !ruby/object:Gem::Dependency
81
+ name: rgeo-geojson
82
+ requirement: !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ version: 0.2.3
88
+ type: :runtime
89
+ prerelease: false
90
+ version_requirements: !ruby/object:Gem::Requirement
91
+ none: false
92
+ requirements:
93
+ - - ~>
94
+ - !ruby/object:Gem::Version
95
+ version: 0.2.3
96
+ - !ruby/object:Gem::Dependency
97
+ name: powerbar
98
+ requirement: !ruby/object:Gem::Requirement
99
+ none: false
100
+ requirements:
101
+ - - ~>
102
+ - !ruby/object:Gem::Version
103
+ version: 1.0.8
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ none: false
108
+ requirements:
109
+ - - ~>
110
+ - !ruby/object:Gem::Version
111
+ version: 1.0.8
48
112
  description: Data anonymization tool for RDBMS databases
49
113
  email:
50
114
  - parekh.sunit@gmail.com
@@ -72,67 +136,95 @@ files:
72
136
  - lib/data-anonymization.rb
73
137
  - lib/strategy/base.rb
74
138
  - lib/strategy/blacklist.rb
75
- - lib/strategy/field/anonymize_time.rb
76
139
  - lib/strategy/field/anonymous.rb
77
- - lib/strategy/field/date_time_delta.rb
140
+ - lib/strategy/field/contact/geojson_base.rb
141
+ - lib/strategy/field/contact/random_address.rb
142
+ - lib/strategy/field/contact/random_city.rb
143
+ - lib/strategy/field/contact/random_phone_number.rb
144
+ - lib/strategy/field/contact/random_province.rb
145
+ - lib/strategy/field/contact/random_zipcode.rb
146
+ - lib/strategy/field/datetime/anonymize_date.rb
147
+ - lib/strategy/field/datetime/anonymize_datetime.rb
148
+ - lib/strategy/field/datetime/anonymize_time.rb
149
+ - lib/strategy/field/datetime/date_delta.rb
150
+ - lib/strategy/field/datetime/date_time_delta.rb
151
+ - lib/strategy/field/datetime/time_delta.rb
78
152
  - lib/strategy/field/default_anon.rb
79
- - lib/strategy/field/distinct_column_values.rb
153
+ - lib/strategy/field/email/gmail_template.rb
154
+ - lib/strategy/field/email/random_email.rb
155
+ - lib/strategy/field/email/random_mailinator_email.rb
80
156
  - lib/strategy/field/fields.rb
81
- - lib/strategy/field/gmail_template.rb
82
- - lib/strategy/field/lorem_ipsum.rb
157
+ - lib/strategy/field/name/random_first_name.rb
158
+ - lib/strategy/field/name/random_full_name.rb
159
+ - lib/strategy/field/name/random_last_name.rb
160
+ - lib/strategy/field/name/random_user_name.rb
161
+ - lib/strategy/field/number/random_float.rb
162
+ - lib/strategy/field/number/random_float_delta.rb
163
+ - lib/strategy/field/number/random_integer.rb
164
+ - lib/strategy/field/number/random_integer_delta.rb
83
165
  - lib/strategy/field/random_boolean.rb
84
- - lib/strategy/field/random_email.rb
85
- - lib/strategy/field/random_first_name.rb
86
- - lib/strategy/field/random_float_delta.rb
87
- - lib/strategy/field/random_full_name.rb
88
- - lib/strategy/field/random_int.rb
89
- - lib/strategy/field/random_integer_delta.rb
90
- - lib/strategy/field/random_last_name.rb
91
- - lib/strategy/field/random_mailinator_email.rb
92
- - lib/strategy/field/random_phone_number.rb
93
- - lib/strategy/field/random_selection.rb
94
- - lib/strategy/field/random_string.rb
95
- - lib/strategy/field/random_user_name.rb
96
- - lib/strategy/field/string_template.rb
97
- - lib/strategy/field/user_name_template.rb
166
+ - lib/strategy/field/string/formatted_string_numbers.rb
167
+ - lib/strategy/field/string/lorem_ipsum.rb
168
+ - lib/strategy/field/string/random_string.rb
169
+ - lib/strategy/field/string/select_from_database.rb
170
+ - lib/strategy/field/string/select_from_file.rb
171
+ - lib/strategy/field/string/select_from_list.rb
172
+ - lib/strategy/field/string/string_template.rb
98
173
  - lib/strategy/field/whitelist.rb
99
174
  - lib/strategy/strategies.rb
100
175
  - lib/strategy/whitelist.rb
101
176
  - lib/tasks/rake_tasks.rb
102
177
  - lib/utils/database.rb
178
+ - lib/utils/geojson_parser.rb
103
179
  - lib/utils/logging.rb
180
+ - lib/utils/progress_bar.rb
181
+ - lib/utils/random_float.rb
104
182
  - lib/utils/random_int.rb
105
183
  - lib/utils/random_string.rb
106
184
  - lib/utils/resource.rb
107
185
  - lib/version.rb
186
+ - resources/UK_addresses.geojson
187
+ - resources/US_addresses.geojson
108
188
  - resources/first_names.txt
109
189
  - resources/last_names.txt
110
190
  - spec/acceptance/rdbms_blacklist_spec.rb
111
191
  - spec/acceptance/rdbms_whitelist_spec.rb
192
+ - spec/resource/sample.geojson
112
193
  - spec/spec_helper.rb
113
- - spec/strategy/field/anonymize_time_spec.rb
114
- - spec/strategy/field/date_time_delta_spec.rb
115
- - spec/strategy/field/distinct_column_values_spec.rb
116
- - spec/strategy/field/gmail_template_spec.rb
117
- - spec/strategy/field/lorem_ipsum_spec.rb
194
+ - spec/strategy/field/contact/random_address_spec.rb
195
+ - spec/strategy/field/contact/random_city_spec.rb
196
+ - spec/strategy/field/contact/random_phone_number_spec.rb
197
+ - spec/strategy/field/contact/random_province_spec.rb
198
+ - spec/strategy/field/contact/random_zipcode_spec.rb
199
+ - spec/strategy/field/datetime/anonymize_date_spec.rb
200
+ - spec/strategy/field/datetime/anonymize_datetime_spec.rb
201
+ - spec/strategy/field/datetime/anonymize_time_spec.rb
202
+ - spec/strategy/field/datetime/date_delta_spec.rb
203
+ - spec/strategy/field/datetime/date_time_delta_spec.rb
204
+ - spec/strategy/field/datetime/time_delta_spec.rb
205
+ - spec/strategy/field/default_anon_spec.rb
206
+ - spec/strategy/field/email/gmail_template_spec.rb
207
+ - spec/strategy/field/email/random_email_spec.rb
208
+ - spec/strategy/field/email/random_mailinator_email_spec.rb
209
+ - spec/strategy/field/name/random_first_name_spec.rb
210
+ - spec/strategy/field/name/random_full_name_spec.rb
211
+ - spec/strategy/field/name/random_last_name_spec.rb
212
+ - spec/strategy/field/name/random_user_name_spec.rb
213
+ - spec/strategy/field/number/random_float_delta_spec.rb
214
+ - spec/strategy/field/number/random_float_spec.rb
215
+ - spec/strategy/field/number/random_integer_delta_spec.rb
216
+ - spec/strategy/field/number/random_integer_spec.rb
118
217
  - spec/strategy/field/random_boolean_spec.rb
119
- - spec/strategy/field/random_email_spec.rb
120
- - spec/strategy/field/random_first_name_spec.rb
121
- - spec/strategy/field/random_float_delta_spec.rb
122
- - spec/strategy/field/random_full_name_spec.rb
123
- - spec/strategy/field/random_int_spec.rb
124
- - spec/strategy/field/random_integer_delta_spec.rb
125
- - spec/strategy/field/random_last_name_spec.rb
126
- - spec/strategy/field/random_mailinator_email_spec.rb
127
- - spec/strategy/field/random_phone_number_spec.rb
128
- - spec/strategy/field/random_selection_spec.rb
129
- - spec/strategy/field/random_string_spec.rb
130
- - spec/strategy/field/random_user_name_spec.rb
131
- - spec/strategy/field/string_template_spec.rb
132
- - spec/strategy/field/user_name_template_spec.rb
218
+ - spec/strategy/field/string/formatted_string_numbers_spec.rb
219
+ - spec/strategy/field/string/lorem_ipsum_spec.rb
220
+ - spec/strategy/field/string/random_string_spec.rb
221
+ - spec/strategy/field/string/select_from_database_spec.rb
222
+ - spec/strategy/field/string/select_from_list_spec.rb
223
+ - spec/strategy/field/string/string_template_spec.rb
133
224
  - spec/strategy/field/whitelist_spec.rb
134
225
  - spec/support/customer_sample.rb
135
226
  - spec/utils/database_spec.rb
227
+ - spec/utils/geojson_parser_spec.rb
136
228
  - spec/utils/random_int_spec.rb
137
229
  - spec/utils/random_string_spec.rb
138
230
  - whitelist_dsl.rb
@@ -164,29 +256,41 @@ summary: Tool to create anonymized production data dump to use for PREF and othe
164
256
  test_files:
165
257
  - spec/acceptance/rdbms_blacklist_spec.rb
166
258
  - spec/acceptance/rdbms_whitelist_spec.rb
259
+ - spec/resource/sample.geojson
167
260
  - spec/spec_helper.rb
168
- - spec/strategy/field/anonymize_time_spec.rb
169
- - spec/strategy/field/date_time_delta_spec.rb
170
- - spec/strategy/field/distinct_column_values_spec.rb
171
- - spec/strategy/field/gmail_template_spec.rb
172
- - spec/strategy/field/lorem_ipsum_spec.rb
261
+ - spec/strategy/field/contact/random_address_spec.rb
262
+ - spec/strategy/field/contact/random_city_spec.rb
263
+ - spec/strategy/field/contact/random_phone_number_spec.rb
264
+ - spec/strategy/field/contact/random_province_spec.rb
265
+ - spec/strategy/field/contact/random_zipcode_spec.rb
266
+ - spec/strategy/field/datetime/anonymize_date_spec.rb
267
+ - spec/strategy/field/datetime/anonymize_datetime_spec.rb
268
+ - spec/strategy/field/datetime/anonymize_time_spec.rb
269
+ - spec/strategy/field/datetime/date_delta_spec.rb
270
+ - spec/strategy/field/datetime/date_time_delta_spec.rb
271
+ - spec/strategy/field/datetime/time_delta_spec.rb
272
+ - spec/strategy/field/default_anon_spec.rb
273
+ - spec/strategy/field/email/gmail_template_spec.rb
274
+ - spec/strategy/field/email/random_email_spec.rb
275
+ - spec/strategy/field/email/random_mailinator_email_spec.rb
276
+ - spec/strategy/field/name/random_first_name_spec.rb
277
+ - spec/strategy/field/name/random_full_name_spec.rb
278
+ - spec/strategy/field/name/random_last_name_spec.rb
279
+ - spec/strategy/field/name/random_user_name_spec.rb
280
+ - spec/strategy/field/number/random_float_delta_spec.rb
281
+ - spec/strategy/field/number/random_float_spec.rb
282
+ - spec/strategy/field/number/random_integer_delta_spec.rb
283
+ - spec/strategy/field/number/random_integer_spec.rb
173
284
  - spec/strategy/field/random_boolean_spec.rb
174
- - spec/strategy/field/random_email_spec.rb
175
- - spec/strategy/field/random_first_name_spec.rb
176
- - spec/strategy/field/random_float_delta_spec.rb
177
- - spec/strategy/field/random_full_name_spec.rb
178
- - spec/strategy/field/random_int_spec.rb
179
- - spec/strategy/field/random_integer_delta_spec.rb
180
- - spec/strategy/field/random_last_name_spec.rb
181
- - spec/strategy/field/random_mailinator_email_spec.rb
182
- - spec/strategy/field/random_phone_number_spec.rb
183
- - spec/strategy/field/random_selection_spec.rb
184
- - spec/strategy/field/random_string_spec.rb
185
- - spec/strategy/field/random_user_name_spec.rb
186
- - spec/strategy/field/string_template_spec.rb
187
- - spec/strategy/field/user_name_template_spec.rb
285
+ - spec/strategy/field/string/formatted_string_numbers_spec.rb
286
+ - spec/strategy/field/string/lorem_ipsum_spec.rb
287
+ - spec/strategy/field/string/random_string_spec.rb
288
+ - spec/strategy/field/string/select_from_database_spec.rb
289
+ - spec/strategy/field/string/select_from_list_spec.rb
290
+ - spec/strategy/field/string/string_template_spec.rb
188
291
  - spec/strategy/field/whitelist_spec.rb
189
292
  - spec/support/customer_sample.rb
190
293
  - spec/utils/database_spec.rb
294
+ - spec/utils/geojson_parser_spec.rb
191
295
  - spec/utils/random_int_spec.rb
192
296
  - spec/utils/random_string_spec.rb