data-anonymization 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. data/.documentup.json +8 -0
  2. data/.gitignore +20 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +6 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +256 -0
  9. data/Rakefile +9 -0
  10. data/blacklist_dsl.rb +19 -0
  11. data/blacklist_nosql_dsl.rb +36 -0
  12. data/data-anonymization.gemspec +22 -0
  13. data/lib/core/database.rb +36 -0
  14. data/lib/core/dsl.rb +16 -0
  15. data/lib/core/field.rb +18 -0
  16. data/lib/data-anonymization.rb +12 -0
  17. data/lib/strategy/base.rb +67 -0
  18. data/lib/strategy/blacklist.rb +18 -0
  19. data/lib/strategy/field/anonymize_time.rb +57 -0
  20. data/lib/strategy/field/anonymous.rb +21 -0
  21. data/lib/strategy/field/date_time_delta.rb +24 -0
  22. data/lib/strategy/field/default_anon.rb +28 -0
  23. data/lib/strategy/field/distinct_column_values.rb +25 -0
  24. data/lib/strategy/field/fields.rb +23 -0
  25. data/lib/strategy/field/gmail_template.rb +17 -0
  26. data/lib/strategy/field/lorem_ipsum.rb +29 -0
  27. data/lib/strategy/field/random_boolean.rb +19 -0
  28. data/lib/strategy/field/random_email.rb +31 -0
  29. data/lib/strategy/field/random_first_name.rb +18 -0
  30. data/lib/strategy/field/random_float_delta.rb +24 -0
  31. data/lib/strategy/field/random_full_name.rb +28 -0
  32. data/lib/strategy/field/random_int.rb +23 -0
  33. data/lib/strategy/field/random_integer_delta.rb +21 -0
  34. data/lib/strategy/field/random_last_name.rb +19 -0
  35. data/lib/strategy/field/random_mailinator_email.rb +20 -0
  36. data/lib/strategy/field/random_phone_number.rb +24 -0
  37. data/lib/strategy/field/random_selection.rb +23 -0
  38. data/lib/strategy/field/random_string.rb +22 -0
  39. data/lib/strategy/field/random_user_name.rb +23 -0
  40. data/lib/strategy/field/string_template.rb +22 -0
  41. data/lib/strategy/field/user_name_template.rb +22 -0
  42. data/lib/strategy/field/whitelist.rb +17 -0
  43. data/lib/strategy/strategies.rb +4 -0
  44. data/lib/strategy/whitelist.rb +21 -0
  45. data/lib/tasks/rake_tasks.rb +19 -0
  46. data/lib/utils/database.rb +53 -0
  47. data/lib/utils/logging.rb +29 -0
  48. data/lib/utils/random_int.rb +15 -0
  49. data/lib/utils/random_string.rb +14 -0
  50. data/lib/utils/resource.rb +13 -0
  51. data/lib/version.rb +3 -0
  52. data/resources/first_names.txt +500 -0
  53. data/resources/last_names.txt +500 -0
  54. data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
  55. data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
  56. data/spec/spec_helper.rb +26 -0
  57. data/spec/strategy/field/anonymize_time_spec.rb +23 -0
  58. data/spec/strategy/field/date_time_delta_spec.rb +43 -0
  59. data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
  60. data/spec/strategy/field/gmail_template_spec.rb +14 -0
  61. data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
  62. data/spec/strategy/field/random_boolean_spec.rb +16 -0
  63. data/spec/strategy/field/random_email_spec.rb +18 -0
  64. data/spec/strategy/field/random_first_name_spec.rb +14 -0
  65. data/spec/strategy/field/random_float_delta_spec.rb +21 -0
  66. data/spec/strategy/field/random_full_name_spec.rb +23 -0
  67. data/spec/strategy/field/random_int_spec.rb +28 -0
  68. data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
  69. data/spec/strategy/field/random_last_name_spec.rb +14 -0
  70. data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
  71. data/spec/strategy/field/random_phone_number_spec.rb +35 -0
  72. data/spec/strategy/field/random_selection_spec.rb +36 -0
  73. data/spec/strategy/field/random_string_spec.rb +23 -0
  74. data/spec/strategy/field/random_user_name_spec.rb +23 -0
  75. data/spec/strategy/field/string_template_spec.rb +15 -0
  76. data/spec/strategy/field/user_name_template_spec.rb +13 -0
  77. data/spec/strategy/field/whitelist_spec.rb +21 -0
  78. data/spec/support/customer_sample.rb +43 -0
  79. data/spec/utils/database_spec.rb +26 -0
  80. data/spec/utils/random_int_spec.rb +9 -0
  81. data/spec/utils/random_string_spec.rb +8 -0
  82. data/whitelist_dsl.rb +44 -0
  83. metadata +192 -0
@@ -0,0 +1,24 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+ class RandomFloatDelta
5
+
6
+ DEFAULT_DELTA = 10.0
7
+
8
+ def initialize delta = nil
9
+ @delta = delta || DEFAULT_DELTA
10
+ end
11
+
12
+ def anonymize field
13
+ return range(field.value-@delta,field.value+@delta)
14
+ end
15
+
16
+ def range (min, max)
17
+ Random.new.rand * (max-min) + min
18
+ end
19
+
20
+
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomFullName
6
+
7
+ def initialize first_names = nil, last_names = nil
8
+ @first_name_anonymizer = DataAnon::Strategy::Field::RandomFirstName.new(first_names)
9
+ @last_name_anonymizer = DataAnon::Strategy::Field::RandomLastName.new(last_names)
10
+ end
11
+
12
+ def anonymize field
13
+
14
+ name_words = field.value.split(' ')
15
+
16
+ anonymized_first_name = @first_name_anonymizer.anonymize(name_words[0])
17
+ anonymized_last_name = ""
18
+ for counter in (1..name_words.size-1)
19
+ anonymized_last_name = anonymized_last_name + " " + @last_name_anonymizer.anonymize(name_words[counter])
20
+ end
21
+
22
+ return anonymized_first_name + anonymized_last_name
23
+
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,23 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class RandomInt
7
+
8
+ def initialize from = 0, to = 100
9
+ @from = from
10
+ @to = to
11
+
12
+ end
13
+
14
+ def anonymize field
15
+ DataAnon::Utils::RandomInt.generate(@from,@to)
16
+ end
17
+
18
+ end
19
+
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+ class RandomIntegerDelta
5
+
6
+ DEFAULT_DELTA = 10
7
+
8
+ def initialize delta = nil
9
+ @delta = delta || DEFAULT_DELTA
10
+
11
+ end
12
+
13
+ def anonymize field
14
+ adjustment = DataAnon::Utils::RandomInt.generate(-@delta,@delta)
15
+ return field.value + adjustment
16
+ end
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomLastName
6
+
7
+ def initialize file_path = nil
8
+ file = file_path || DataAnon::Utils::Resource.file('last_names.txt')
9
+ @names = File.read(file).split
10
+ end
11
+
12
+ def anonymize field
13
+ return @names[rand(@names.size)]
14
+ end
15
+
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,20 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomMailinatorEmail
6
+
7
+ def initialize
8
+ @email_anonymizer = DataAnon::Strategy::Field::RandomEmail.new("mailinator","com")
9
+ end
10
+
11
+ def anonymize field
12
+
13
+ return @email_anonymizer.anonymize(field)
14
+
15
+ end
16
+ end
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,24 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class RandomPhoneNumber
7
+
8
+ def anonymize field
9
+ @original_phone_number = field.value
10
+ @anonymized_phone_number = ""
11
+ @original_phone_number.each_char do |char|
12
+ if /\d/.match(char).nil?
13
+ @anonymized_phone_number += char
14
+ else
15
+ @anonymized_phone_number += DataAnon::Utils::RandomInt.generate(0,9).to_s
16
+ end
17
+ end
18
+
19
+ @anonymized_phone_number
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,23 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class RandomSelection
7
+
8
+ def initialize values
9
+ @values = values.class == Array ? values : [values]
10
+
11
+ end
12
+
13
+ def anonymize field
14
+ return @values[0] if @values.length == 1
15
+ @values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
16
+ end
17
+
18
+ end
19
+
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,22 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+ class RandomString
5
+
6
+ def anonymize field
7
+
8
+ original_string = field.value
9
+ string_words = original_string.split(' ')
10
+ anonymized_string = ""
11
+
12
+ string_words.each do |word|
13
+ anonymized_string = anonymized_string + DataAnon::Utils::RandomString.generate(word.length) + " "
14
+ end
15
+
16
+ anonymized_string.strip
17
+
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,23 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomUserName
6
+
7
+ DEFAULT_MIN_LENGTH = 5
8
+ DEFAULT_MAX_LENGTH = 10
9
+
10
+ def initialize min_length = DEFAULT_MIN_LENGTH, max_length = DEFAULT_MAX_LENGTH
11
+ @min_length = min_length
12
+ @max_length = max_length
13
+ end
14
+
15
+ def anonymize field
16
+ username_length = DataAnon::Utils::RandomInt.generate(@min_length,@max_length)
17
+ return DataAnon::Utils::RandomString.generate(username_length)
18
+
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,22 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class StringTemplate
7
+
8
+ def initialize template
9
+ @template = template
10
+ end
11
+
12
+ def anonymize field
13
+ context = field.instance_eval { binding }
14
+ eval ('"' + @template + '"'), context
15
+ end
16
+
17
+ end
18
+
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class UserNameTemplate
7
+
8
+ def initialize template
9
+ @template = template
10
+ end
11
+
12
+ def anonymize field
13
+ context = field.instance_eval { binding }
14
+ eval ('"' + @template + '"'), context
15
+ end
16
+
17
+ end
18
+
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class Whitelist
7
+
8
+ def anonymize field
9
+ field.value
10
+ end
11
+
12
+ end
13
+
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,4 @@
1
+ require 'strategy/base'
2
+ require 'strategy/whitelist'
3
+ require 'strategy/blacklist'
4
+ require 'strategy/field/fields'
@@ -0,0 +1,21 @@
1
+ module DataAnon
2
+ module Strategy
3
+ class Whitelist < DataAnon::Strategy::Base
4
+
5
+ def process_record(index, record)
6
+ dest_record_map = {}
7
+ record.attributes.each do |field_name, field_value|
8
+ unless field_value.nil? || field_name.downcase == @primary_key.downcase
9
+ field = DataAnon::Core::Field.new(field_name, field_value, index, record)
10
+ field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
11
+ dest_record_map[field_name] = field_strategy.anonymize(field)
12
+ end
13
+ end
14
+ dest_record = dest_table.new dest_record_map
15
+ dest_record[@primary_key] = record[@primary_key]
16
+ dest_record.save!
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+
4
+ module DataAnonymization
5
+ class RakeTasks
6
+ include Rake::DSL if defined? Rake::DSL
7
+
8
+ def initialize
9
+ desc "Task to build the clean empty destination database"
10
+ task :empty_dest do
11
+ system "rm sample-data/chinook-empty.sqlite"
12
+ system "sqlite3 sample-data/chinook-empty.sqlite < sample-data/chinook_schema.sql"
13
+ end
14
+ end
15
+
16
+
17
+ end
18
+ end
19
+
@@ -0,0 +1,53 @@
1
+ require 'active_record'
2
+ require 'logger'
3
+
4
+ module DataAnon
5
+ module Utils
6
+
7
+ class MassAssignmentIgnoreSanitizer < ActiveModel::MassAssignmentSecurity::Sanitizer
8
+ def process_removed_attributes(attrs)
9
+ end
10
+ end
11
+
12
+ class TempDatabase < ActiveRecord::Base
13
+ self.abstract_class = true
14
+ end
15
+
16
+ class SourceDatabase < ActiveRecord::Base
17
+ self.abstract_class = true
18
+ end
19
+
20
+ class DestinationDatabase < ActiveRecord::Base
21
+ self.abstract_class = true
22
+ end
23
+
24
+ class BaseTable
25
+
26
+ def self.create_table table_name, primary_key, database
27
+ Class.new(database) do
28
+ self.table_name = table_name
29
+ self.primary_key = primary_key
30
+ self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ class SourceTable < BaseTable
37
+
38
+ def self.create table_name, primary_key = nil
39
+ create_table table_name, primary_key, SourceDatabase
40
+ end
41
+
42
+ end
43
+
44
+ class DestinationTable < BaseTable
45
+
46
+ def self.create table_name, primary_key = nil
47
+ create_table table_name, primary_key, DestinationDatabase
48
+ end
49
+
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,29 @@
1
+ require 'active_record'
2
+ require 'logger'
3
+
4
+ module DataAnon
5
+ module Utils
6
+ module Logging
7
+
8
+ def logger
9
+ @@logger ||= (self.logger = Logger.new(STDOUT) )
10
+ end
11
+
12
+ def logger= logger
13
+ @@logger = logger
14
+ ActiveRecord::Base.logger = logger
15
+ @@logger
16
+ end
17
+
18
+ def progress_logger
19
+ @@progress_logger ||= (self.progress_logger = Logger.new(STDOUT) )
20
+ end
21
+
22
+ def progress_logger= logger
23
+ logger.formatter = proc { |severity, datetime, progname, msg| msg }
24
+ @@progress_logger = logger
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,15 @@
1
+ module DataAnon
2
+ module Utils
3
+ class RandomInt
4
+
5
+ DEFAULT_MIN = 1
6
+ DEFAULT_MAX = 100
7
+
8
+ def self.generate min = nil, max = nil
9
+ @min = min || DEFAULT_MIN
10
+ @max = max || DEFAULT_MAX
11
+ Random.new.rand @min...@max
12
+ end
13
+ end
14
+ end
15
+ end