data-anonymization 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. data/.documentup.json +1 -0
  2. data/.travis.yml +0 -1
  3. data/README.md +277 -52
  4. data/blacklist_dsl.rb +1 -3
  5. data/data-anonymization.gemspec +4 -0
  6. data/lib/core/dsl.rb +1 -1
  7. data/lib/data-anonymization.rb +3 -0
  8. data/lib/strategy/base.rb +21 -11
  9. data/lib/strategy/blacklist.rb +2 -1
  10. data/lib/strategy/field/contact/geojson_base.rb +24 -0
  11. data/lib/strategy/field/contact/random_address.rb +17 -0
  12. data/lib/strategy/field/contact/random_city.rb +17 -0
  13. data/lib/strategy/field/contact/random_phone_number.rb +13 -0
  14. data/lib/strategy/field/contact/random_province.rb +17 -0
  15. data/lib/strategy/field/contact/random_zipcode.rb +17 -0
  16. data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
  17. data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
  18. data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
  19. data/lib/strategy/field/datetime/date_delta.rb +21 -0
  20. data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
  21. data/lib/strategy/field/datetime/time_delta.rb +12 -0
  22. data/lib/strategy/field/default_anon.rb +12 -7
  23. data/lib/strategy/field/email/gmail_template.rb +16 -0
  24. data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
  25. data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
  26. data/lib/strategy/field/fields.rb +51 -20
  27. data/lib/strategy/field/name/random_first_name.rb +14 -0
  28. data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
  29. data/lib/strategy/field/name/random_last_name.rb +14 -0
  30. data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
  31. data/lib/strategy/field/number/random_float.rb +23 -0
  32. data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
  33. data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
  34. data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
  35. data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
  36. data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
  37. data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
  38. data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
  39. data/lib/strategy/field/string/select_from_file.rb +18 -0
  40. data/lib/strategy/field/string/select_from_list.rb +17 -0
  41. data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
  42. data/lib/strategy/whitelist.rb +4 -2
  43. data/lib/utils/database.rb +8 -6
  44. data/lib/utils/geojson_parser.rb +42 -0
  45. data/lib/utils/logging.rb +0 -9
  46. data/lib/utils/progress_bar.rb +29 -0
  47. data/lib/utils/random_float.rb +12 -0
  48. data/lib/utils/random_int.rb +3 -7
  49. data/lib/utils/resource.rb +4 -0
  50. data/lib/version.rb +1 -1
  51. data/resources/UK_addresses.geojson +300 -0
  52. data/resources/US_addresses.geojson +300 -0
  53. data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
  54. data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
  55. data/spec/resource/sample.geojson +1 -0
  56. data/spec/spec_helper.rb +3 -2
  57. data/spec/strategy/field/contact/random_address_spec.rb +12 -0
  58. data/spec/strategy/field/contact/random_city_spec.rb +14 -0
  59. data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
  60. data/spec/strategy/field/contact/random_province_spec.rb +14 -0
  61. data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
  62. data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
  63. data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
  64. data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
  65. data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
  66. data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
  67. data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
  68. data/spec/strategy/field/default_anon_spec.rb +42 -0
  69. data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
  70. data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
  71. data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
  72. data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
  73. data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
  74. data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
  75. data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
  76. data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
  77. data/spec/strategy/field/number/random_float_spec.rb +28 -0
  78. data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
  79. data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
  80. data/spec/strategy/field/random_boolean_spec.rb +2 -2
  81. data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
  82. data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
  83. data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
  84. data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
  85. data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
  86. data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
  87. data/spec/strategy/field/whitelist_spec.rb +2 -2
  88. data/spec/support/customer_sample.rb +1 -1
  89. data/spec/utils/database_spec.rb +2 -2
  90. data/spec/utils/geojson_parser_spec.rb +38 -0
  91. data/whitelist_dsl.rb +4 -6
  92. metadata +163 -59
  93. data/lib/strategy/field/anonymize_time.rb +0 -57
  94. data/lib/strategy/field/gmail_template.rb +0 -17
  95. data/lib/strategy/field/random_first_name.rb +0 -18
  96. data/lib/strategy/field/random_last_name.rb +0 -19
  97. data/lib/strategy/field/random_selection.rb +0 -23
  98. data/lib/strategy/field/user_name_template.rb +0 -22
  99. data/spec/strategy/field/anonymize_time_spec.rb +0 -23
  100. data/spec/strategy/field/gmail_template_spec.rb +0 -14
  101. data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
  102. data/spec/strategy/field/random_phone_number_spec.rb +0 -35
  103. data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -0,0 +1,16 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+ class GmailTemplate
5
+
6
+ def initialize username = 'someusername'
7
+ @username = username
8
+ end
9
+
10
+ def anonymize field
11
+ "#{@username}+#{field.row_number}@gmail.com"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -9,9 +9,7 @@ module DataAnon
9
9
  end
10
10
 
11
11
  def anonymize field
12
-
13
12
  return @email_anonymizer.anonymize(field)
14
-
15
13
  end
16
14
  end
17
15
  end
@@ -1,23 +1,54 @@
1
1
  require 'strategy/field/whitelist'
2
- require 'strategy/field/string_template'
3
- require 'strategy/field/user_name_template'
4
- require 'strategy/field/random_string'
5
- require 'strategy/field/random_int'
6
2
  require 'strategy/field/random_boolean'
7
- require 'strategy/field/anonymize_time'
8
- require 'strategy/field/random_integer_delta'
9
- require 'strategy/field/random_float_delta'
10
- require 'strategy/field/random_selection'
11
- require 'strategy/field/distinct_column_values'
12
- require 'strategy/field/lorem_ipsum'
13
- require 'strategy/field/gmail_template'
14
- require 'strategy/field/date_time_delta'
15
- require 'strategy/field/default_anon'
16
- require 'strategy/field/random_email'
17
- require 'strategy/field/random_mailinator_email'
18
- require 'strategy/field/random_phone_number'
19
- require 'strategy/field/random_first_name'
20
- require 'strategy/field/random_last_name'
21
- require 'strategy/field/random_full_name'
22
- require 'strategy/field/random_user_name'
3
+
23
4
  require 'strategy/field/anonymous'
5
+
6
+ # string
7
+ require 'strategy/field/string/lorem_ipsum'
8
+ require 'strategy/field/string/string_template'
9
+ require 'strategy/field/string/random_string'
10
+ require 'strategy/field/string/formatted_string_numbers'
11
+
12
+ require 'strategy/field/string/select_from_file'
13
+ require 'strategy/field/string/select_from_list'
14
+ require 'strategy/field/string/select_from_database'
15
+
16
+ # number
17
+ require 'strategy/field/number/random_integer'
18
+ require 'strategy/field/number/random_float'
19
+ require 'strategy/field/number/random_integer_delta'
20
+ require 'strategy/field/number/random_float_delta'
21
+
22
+ # contact
23
+ require 'strategy/field/contact/geojson_base'
24
+ require 'strategy/field/contact/random_phone_number'
25
+ require 'strategy/field/contact/random_address'
26
+ require 'strategy/field/contact/random_zipcode'
27
+ require 'strategy/field/contact/random_city'
28
+ require 'strategy/field/contact/random_province'
29
+
30
+ # datetime
31
+ require 'strategy/field/datetime/anonymize_time'
32
+ require 'strategy/field/datetime/anonymize_datetime'
33
+ require 'strategy/field/datetime/anonymize_date'
34
+ require 'strategy/field/datetime/date_time_delta'
35
+ require 'strategy/field/datetime/time_delta'
36
+ require 'strategy/field/datetime/date_delta'
37
+
38
+ # email
39
+ require 'strategy/field/email/random_email'
40
+ require 'strategy/field/email/gmail_template'
41
+ require 'strategy/field/email/random_mailinator_email'
42
+
43
+ # name
44
+ require 'strategy/field/name/random_first_name'
45
+ require 'strategy/field/name/random_last_name'
46
+ require 'strategy/field/name/random_full_name'
47
+ require 'strategy/field/name/random_user_name'
48
+
49
+
50
+
51
+ FieldStrategy = DataAnon::Strategy::Field
52
+
53
+ require 'strategy/field/default_anon'
54
+
@@ -0,0 +1,14 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomFirstName < SelectFromFile
6
+
7
+ def initialize file_path = nil
8
+ super(file_path || DataAnon::Utils::Resource.file('first_names.txt'))
9
+ end
10
+
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomLastName < SelectFromFile
6
+
7
+ def initialize file_path = nil
8
+ super(file_path || DataAnon::Utils::Resource.file('last_names.txt'))
9
+ end
10
+
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,23 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class RandomFloat
7
+
8
+ def initialize from = 0.0, to = 100.0
9
+ @from = from
10
+ @to = to
11
+
12
+ end
13
+
14
+ def anonymize field
15
+ DataAnon::Utils::RandomFloat.generate(@from,@to)
16
+ end
17
+
18
+ end
19
+
20
+
21
+ end
22
+ end
23
+ end
@@ -3,10 +3,8 @@ module DataAnon
3
3
  module Field
4
4
  class RandomFloatDelta
5
5
 
6
- DEFAULT_DELTA = 10.0
7
-
8
- def initialize delta = nil
9
- @delta = delta || DEFAULT_DELTA
6
+ def initialize delta = 10.0
7
+ @delta = delta
10
8
  end
11
9
 
12
10
  def anonymize field
@@ -3,7 +3,7 @@ module DataAnon
3
3
  module Field
4
4
 
5
5
 
6
- class RandomInt
6
+ class RandomInteger
7
7
 
8
8
  def initialize from = 0, to = 100
9
9
  @from = from
@@ -3,11 +3,8 @@ module DataAnon
3
3
  module Field
4
4
  class RandomIntegerDelta
5
5
 
6
- DEFAULT_DELTA = 10
7
-
8
- def initialize delta = nil
9
- @delta = delta || DEFAULT_DELTA
10
-
6
+ def initialize delta = 10
7
+ @delta = delta
11
8
  end
12
9
 
13
10
  def anonymize field
@@ -3,7 +3,7 @@ module DataAnon
3
3
  module Field
4
4
 
5
5
 
6
- class RandomPhoneNumber
6
+ class FormattedStringNumber
7
7
 
8
8
  def anonymize field
9
9
  @original_phone_number = field.value
@@ -18,7 +18,10 @@ module DataAnon
18
18
 
19
19
  @anonymized_phone_number
20
20
  end
21
+
21
22
  end
23
+
24
+
22
25
  end
23
26
  end
24
27
  end
@@ -2,18 +2,17 @@ module DataAnon
2
2
  module Strategy
3
3
  module Field
4
4
 
5
- class DistinctColumnValues
5
+ class SelectFromDatabase
6
6
  include Utils::Logging
7
7
 
8
8
  def initialize table_name, field_name
9
- source = Utils::SourceTable.create table_name
9
+ source = Utils::SourceTable.create table_name, []
10
10
  @values = source.select(field_name).uniq.collect { |record| record[field_name]}
11
11
  logger.debug "For field strategy #{table_name}:#{field_name} using values #{@values} "
12
12
 
13
13
  end
14
14
 
15
15
  def anonymize field
16
- return @values[0] if @values.length == 1
17
16
  @values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
18
17
  end
19
18
 
@@ -0,0 +1,18 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class SelectFromFile
6
+
7
+ def initialize file_path
8
+ @values = File.read(file_path).split
9
+ end
10
+
11
+ def anonymize field
12
+ @values.sample
13
+ end
14
+
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,17 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class SelectFromList < SelectFromFile
7
+
8
+ def initialize values
9
+ @values = values.class == Array ? values : [values]
10
+ end
11
+
12
+ end
13
+
14
+
15
+ end
16
+ end
17
+ end
@@ -5,14 +5,16 @@ module DataAnon
5
5
  def process_record(index, record)
6
6
  dest_record_map = {}
7
7
  record.attributes.each do |field_name, field_value|
8
- unless field_value.nil? || field_name.downcase == @primary_key.downcase
8
+ unless field_value.nil? || is_primary_key?(field_name)
9
9
  field = DataAnon::Core::Field.new(field_name, field_value, index, record)
10
10
  field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
11
11
  dest_record_map[field_name] = field_strategy.anonymize(field)
12
12
  end
13
13
  end
14
14
  dest_record = dest_table.new dest_record_map
15
- dest_record[@primary_key] = record[@primary_key]
15
+ @primary_keys.each do |key|
16
+ dest_record[key] = record[key]
17
+ end
16
18
  dest_record.save!
17
19
  end
18
20
 
@@ -1,4 +1,5 @@
1
1
  require 'active_record'
2
+ require 'composite_primary_keys'
2
3
  require 'logger'
3
4
 
4
5
  module DataAnon
@@ -23,10 +24,11 @@ module DataAnon
23
24
 
24
25
  class BaseTable
25
26
 
26
- def self.create_table table_name, primary_key, database
27
+ def self.create_table database, table_name, primary_keys
27
28
  Class.new(database) do
28
29
  self.table_name = table_name
29
- self.primary_key = primary_key
30
+ self.primary_keys = primary_keys if primary_keys.length > 1
31
+ self.primary_key = primary_keys[0] if primary_keys.length == 1
30
32
  self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
31
33
  end
32
34
  end
@@ -35,16 +37,16 @@ module DataAnon
35
37
 
36
38
  class SourceTable < BaseTable
37
39
 
38
- def self.create table_name, primary_key = nil
39
- create_table table_name, primary_key, SourceDatabase
40
+ def self.create table_name, primary_key
41
+ create_table SourceDatabase, table_name, primary_key
40
42
  end
41
43
 
42
44
  end
43
45
 
44
46
  class DestinationTable < BaseTable
45
47
 
46
- def self.create table_name, primary_key = nil
47
- create_table table_name, primary_key, DestinationDatabase
48
+ def self.create table_name, primary_key
49
+ create_table DestinationDatabase, table_name, primary_key
48
50
  end
49
51
 
50
52
  end
@@ -0,0 +1,42 @@
1
+ require 'rgeo/geo_json'
2
+
3
+ module DataAnon
4
+ module Utils
5
+ class GeojsonParser
6
+
7
+
8
+ def self.address file_path
9
+ self.new(file_path).parse 'address'
10
+ end
11
+
12
+ def self.zipcode file_path
13
+ self.new(file_path).parse 'postcode'
14
+ end
15
+
16
+ def self.province file_path
17
+ self.new(file_path).parse 'province'
18
+ end
19
+
20
+ def self.city file_path
21
+ self.new(file_path).parse 'city'
22
+ end
23
+
24
+ def self.country file_path
25
+ self.new(file_path).parse 'country'
26
+ end
27
+
28
+ def initialize file_path
29
+ @places = File.read(file_path).split(/\n/)
30
+ end
31
+
32
+ def parse property
33
+ result_list = []
34
+ @places.each do |loc|
35
+ geom = RGeo::GeoJSON.decode(loc, :json_parser => :json)
36
+ result_list.push(geom[property])
37
+ end
38
+ result_list
39
+ end
40
+ end
41
+ end
42
+ end
data/lib/utils/logging.rb CHANGED
@@ -15,15 +15,6 @@ module DataAnon
15
15
  @@logger
16
16
  end
17
17
 
18
- def progress_logger
19
- @@progress_logger ||= (self.progress_logger = Logger.new(STDOUT) )
20
- end
21
-
22
- def progress_logger= logger
23
- logger.formatter = proc { |severity, datetime, progname, msg| msg }
24
- @@progress_logger = logger
25
- end
26
-
27
18
  end
28
19
  end
29
20
  end
@@ -0,0 +1,29 @@
1
+ module DataAnon
2
+ module Utils
3
+
4
+ class ProgressBar
5
+
6
+ def initialize table_name, total
7
+ @total = total
8
+ @table_name = table_name
9
+ @progress_bar = PowerBar.new if total > 0 && show_progress
10
+ end
11
+
12
+ def show_progress
13
+ ENV['show_progress'] != 'false'
14
+ end
15
+
16
+ def show index
17
+ if @progress_bar && ((index % 1000 == 0) || (index == @total) || (index == 1))
18
+ @progress_bar.show(:msg => "Table: #{@table_name} (#{index}/#{@total})", :done => index, :total => @total)
19
+ end
20
+ end
21
+
22
+ def close
23
+ @progress_bar.close if @progress_bar
24
+ end
25
+
26
+ end
27
+
28
+ end
29
+ end