data-anonymization 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/.documentup.json +8 -0
  2. data/.gitignore +20 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +6 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +256 -0
  9. data/Rakefile +9 -0
  10. data/blacklist_dsl.rb +19 -0
  11. data/blacklist_nosql_dsl.rb +36 -0
  12. data/data-anonymization.gemspec +22 -0
  13. data/lib/core/database.rb +36 -0
  14. data/lib/core/dsl.rb +16 -0
  15. data/lib/core/field.rb +18 -0
  16. data/lib/data-anonymization.rb +12 -0
  17. data/lib/strategy/base.rb +67 -0
  18. data/lib/strategy/blacklist.rb +18 -0
  19. data/lib/strategy/field/anonymize_time.rb +57 -0
  20. data/lib/strategy/field/anonymous.rb +21 -0
  21. data/lib/strategy/field/date_time_delta.rb +24 -0
  22. data/lib/strategy/field/default_anon.rb +28 -0
  23. data/lib/strategy/field/distinct_column_values.rb +25 -0
  24. data/lib/strategy/field/fields.rb +23 -0
  25. data/lib/strategy/field/gmail_template.rb +17 -0
  26. data/lib/strategy/field/lorem_ipsum.rb +29 -0
  27. data/lib/strategy/field/random_boolean.rb +19 -0
  28. data/lib/strategy/field/random_email.rb +31 -0
  29. data/lib/strategy/field/random_first_name.rb +18 -0
  30. data/lib/strategy/field/random_float_delta.rb +24 -0
  31. data/lib/strategy/field/random_full_name.rb +28 -0
  32. data/lib/strategy/field/random_int.rb +23 -0
  33. data/lib/strategy/field/random_integer_delta.rb +21 -0
  34. data/lib/strategy/field/random_last_name.rb +19 -0
  35. data/lib/strategy/field/random_mailinator_email.rb +20 -0
  36. data/lib/strategy/field/random_phone_number.rb +24 -0
  37. data/lib/strategy/field/random_selection.rb +23 -0
  38. data/lib/strategy/field/random_string.rb +22 -0
  39. data/lib/strategy/field/random_user_name.rb +23 -0
  40. data/lib/strategy/field/string_template.rb +22 -0
  41. data/lib/strategy/field/user_name_template.rb +22 -0
  42. data/lib/strategy/field/whitelist.rb +17 -0
  43. data/lib/strategy/strategies.rb +4 -0
  44. data/lib/strategy/whitelist.rb +21 -0
  45. data/lib/tasks/rake_tasks.rb +19 -0
  46. data/lib/utils/database.rb +53 -0
  47. data/lib/utils/logging.rb +29 -0
  48. data/lib/utils/random_int.rb +15 -0
  49. data/lib/utils/random_string.rb +14 -0
  50. data/lib/utils/resource.rb +13 -0
  51. data/lib/version.rb +3 -0
  52. data/resources/first_names.txt +500 -0
  53. data/resources/last_names.txt +500 -0
  54. data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
  55. data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
  56. data/spec/spec_helper.rb +26 -0
  57. data/spec/strategy/field/anonymize_time_spec.rb +23 -0
  58. data/spec/strategy/field/date_time_delta_spec.rb +43 -0
  59. data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
  60. data/spec/strategy/field/gmail_template_spec.rb +14 -0
  61. data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
  62. data/spec/strategy/field/random_boolean_spec.rb +16 -0
  63. data/spec/strategy/field/random_email_spec.rb +18 -0
  64. data/spec/strategy/field/random_first_name_spec.rb +14 -0
  65. data/spec/strategy/field/random_float_delta_spec.rb +21 -0
  66. data/spec/strategy/field/random_full_name_spec.rb +23 -0
  67. data/spec/strategy/field/random_int_spec.rb +28 -0
  68. data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
  69. data/spec/strategy/field/random_last_name_spec.rb +14 -0
  70. data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
  71. data/spec/strategy/field/random_phone_number_spec.rb +35 -0
  72. data/spec/strategy/field/random_selection_spec.rb +36 -0
  73. data/spec/strategy/field/random_string_spec.rb +23 -0
  74. data/spec/strategy/field/random_user_name_spec.rb +23 -0
  75. data/spec/strategy/field/string_template_spec.rb +15 -0
  76. data/spec/strategy/field/user_name_template_spec.rb +13 -0
  77. data/spec/strategy/field/whitelist_spec.rb +21 -0
  78. data/spec/support/customer_sample.rb +43 -0
  79. data/spec/utils/database_spec.rb +26 -0
  80. data/spec/utils/random_int_spec.rb +9 -0
  81. data/spec/utils/random_string_spec.rb +8 -0
  82. data/whitelist_dsl.rb +44 -0
  83. metadata +192 -0
@@ -0,0 +1,24 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+ class RandomFloatDelta
5
+
6
+ DEFAULT_DELTA = 10.0
7
+
8
+ def initialize delta = nil
9
+ @delta = delta || DEFAULT_DELTA
10
+ end
11
+
12
+ def anonymize field
13
+ return range(field.value-@delta,field.value+@delta)
14
+ end
15
+
16
+ def range (min, max)
17
+ Random.new.rand * (max-min) + min
18
+ end
19
+
20
+
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomFullName
6
+
7
+ def initialize first_names = nil, last_names = nil
8
+ @first_name_anonymizer = DataAnon::Strategy::Field::RandomFirstName.new(first_names)
9
+ @last_name_anonymizer = DataAnon::Strategy::Field::RandomLastName.new(last_names)
10
+ end
11
+
12
+ def anonymize field
13
+
14
+ name_words = field.value.split(' ')
15
+
16
+ anonymized_first_name = @first_name_anonymizer.anonymize(name_words[0])
17
+ anonymized_last_name = ""
18
+ for counter in (1..name_words.size-1)
19
+ anonymized_last_name = anonymized_last_name + " " + @last_name_anonymizer.anonymize(name_words[counter])
20
+ end
21
+
22
+ return anonymized_first_name + anonymized_last_name
23
+
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,23 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class RandomInt
7
+
8
+ def initialize from = 0, to = 100
9
+ @from = from
10
+ @to = to
11
+
12
+ end
13
+
14
+ def anonymize field
15
+ DataAnon::Utils::RandomInt.generate(@from,@to)
16
+ end
17
+
18
+ end
19
+
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,21 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+ class RandomIntegerDelta
5
+
6
+ DEFAULT_DELTA = 10
7
+
8
+ def initialize delta = nil
9
+ @delta = delta || DEFAULT_DELTA
10
+
11
+ end
12
+
13
+ def anonymize field
14
+ adjustment = DataAnon::Utils::RandomInt.generate(-@delta,@delta)
15
+ return field.value + adjustment
16
+ end
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomLastName
6
+
7
+ def initialize file_path = nil
8
+ file = file_path || DataAnon::Utils::Resource.file('last_names.txt')
9
+ @names = File.read(file).split
10
+ end
11
+
12
+ def anonymize field
13
+ return @names[rand(@names.size)]
14
+ end
15
+
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,20 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomMailinatorEmail
6
+
7
+ def initialize
8
+ @email_anonymizer = DataAnon::Strategy::Field::RandomEmail.new("mailinator","com")
9
+ end
10
+
11
+ def anonymize field
12
+
13
+ return @email_anonymizer.anonymize(field)
14
+
15
+ end
16
+ end
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,24 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class RandomPhoneNumber
7
+
8
+ def anonymize field
9
+ @original_phone_number = field.value
10
+ @anonymized_phone_number = ""
11
+ @original_phone_number.each_char do |char|
12
+ if /\d/.match(char).nil?
13
+ @anonymized_phone_number += char
14
+ else
15
+ @anonymized_phone_number += DataAnon::Utils::RandomInt.generate(0,9).to_s
16
+ end
17
+ end
18
+
19
+ @anonymized_phone_number
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,23 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class RandomSelection
7
+
8
+ def initialize values
9
+ @values = values.class == Array ? values : [values]
10
+
11
+ end
12
+
13
+ def anonymize field
14
+ return @values[0] if @values.length == 1
15
+ @values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
16
+ end
17
+
18
+ end
19
+
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,22 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+ class RandomString
5
+
6
+ def anonymize field
7
+
8
+ original_string = field.value
9
+ string_words = original_string.split(' ')
10
+ anonymized_string = ""
11
+
12
+ string_words.each do |word|
13
+ anonymized_string = anonymized_string + DataAnon::Utils::RandomString.generate(word.length) + " "
14
+ end
15
+
16
+ anonymized_string.strip
17
+
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,23 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomUserName
6
+
7
+ DEFAULT_MIN_LENGTH = 5
8
+ DEFAULT_MAX_LENGTH = 10
9
+
10
+ def initialize min_length = DEFAULT_MIN_LENGTH, max_length = DEFAULT_MAX_LENGTH
11
+ @min_length = min_length
12
+ @max_length = max_length
13
+ end
14
+
15
+ def anonymize field
16
+ username_length = DataAnon::Utils::RandomInt.generate(@min_length,@max_length)
17
+ return DataAnon::Utils::RandomString.generate(username_length)
18
+
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,22 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class StringTemplate
7
+
8
+ def initialize template
9
+ @template = template
10
+ end
11
+
12
+ def anonymize field
13
+ context = field.instance_eval { binding }
14
+ eval ('"' + @template + '"'), context
15
+ end
16
+
17
+ end
18
+
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class UserNameTemplate
7
+
8
+ def initialize template
9
+ @template = template
10
+ end
11
+
12
+ def anonymize field
13
+ context = field.instance_eval { binding }
14
+ eval ('"' + @template + '"'), context
15
+ end
16
+
17
+ end
18
+
19
+
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,17 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class Whitelist
7
+
8
+ def anonymize field
9
+ field.value
10
+ end
11
+
12
+ end
13
+
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,4 @@
1
+ require 'strategy/base'
2
+ require 'strategy/whitelist'
3
+ require 'strategy/blacklist'
4
+ require 'strategy/field/fields'
@@ -0,0 +1,21 @@
1
+ module DataAnon
2
+ module Strategy
3
+ class Whitelist < DataAnon::Strategy::Base
4
+
5
+ def process_record(index, record)
6
+ dest_record_map = {}
7
+ record.attributes.each do |field_name, field_value|
8
+ unless field_value.nil? || field_name.downcase == @primary_key.downcase
9
+ field = DataAnon::Core::Field.new(field_name, field_value, index, record)
10
+ field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
11
+ dest_record_map[field_name] = field_strategy.anonymize(field)
12
+ end
13
+ end
14
+ dest_record = dest_table.new dest_record_map
15
+ dest_record[@primary_key] = record[@primary_key]
16
+ dest_record.save!
17
+ end
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ require 'rake'
2
+ require 'rake/tasklib'
3
+
4
+ module DataAnonymization
5
+ class RakeTasks
6
+ include Rake::DSL if defined? Rake::DSL
7
+
8
+ def initialize
9
+ desc "Task to build the clean empty destination database"
10
+ task :empty_dest do
11
+ system "rm sample-data/chinook-empty.sqlite"
12
+ system "sqlite3 sample-data/chinook-empty.sqlite < sample-data/chinook_schema.sql"
13
+ end
14
+ end
15
+
16
+
17
+ end
18
+ end
19
+
@@ -0,0 +1,53 @@
1
+ require 'active_record'
2
+ require 'logger'
3
+
4
+ module DataAnon
5
+ module Utils
6
+
7
+ class MassAssignmentIgnoreSanitizer < ActiveModel::MassAssignmentSecurity::Sanitizer
8
+ def process_removed_attributes(attrs)
9
+ end
10
+ end
11
+
12
+ class TempDatabase < ActiveRecord::Base
13
+ self.abstract_class = true
14
+ end
15
+
16
+ class SourceDatabase < ActiveRecord::Base
17
+ self.abstract_class = true
18
+ end
19
+
20
+ class DestinationDatabase < ActiveRecord::Base
21
+ self.abstract_class = true
22
+ end
23
+
24
+ class BaseTable
25
+
26
+ def self.create_table table_name, primary_key, database
27
+ Class.new(database) do
28
+ self.table_name = table_name
29
+ self.primary_key = primary_key
30
+ self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
31
+ end
32
+ end
33
+
34
+ end
35
+
36
+ class SourceTable < BaseTable
37
+
38
+ def self.create table_name, primary_key = nil
39
+ create_table table_name, primary_key, SourceDatabase
40
+ end
41
+
42
+ end
43
+
44
+ class DestinationTable < BaseTable
45
+
46
+ def self.create table_name, primary_key = nil
47
+ create_table table_name, primary_key, DestinationDatabase
48
+ end
49
+
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,29 @@
1
+ require 'active_record'
2
+ require 'logger'
3
+
4
+ module DataAnon
5
+ module Utils
6
+ module Logging
7
+
8
+ def logger
9
+ @@logger ||= (self.logger = Logger.new(STDOUT) )
10
+ end
11
+
12
+ def logger= logger
13
+ @@logger = logger
14
+ ActiveRecord::Base.logger = logger
15
+ @@logger
16
+ end
17
+
18
+ def progress_logger
19
+ @@progress_logger ||= (self.progress_logger = Logger.new(STDOUT) )
20
+ end
21
+
22
+ def progress_logger= logger
23
+ logger.formatter = proc { |severity, datetime, progname, msg| msg }
24
+ @@progress_logger = logger
25
+ end
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,15 @@
1
+ module DataAnon
2
+ module Utils
3
+ class RandomInt
4
+
5
+ DEFAULT_MIN = 1
6
+ DEFAULT_MAX = 100
7
+
8
+ def self.generate min = nil, max = nil
9
+ @min = min || DEFAULT_MIN
10
+ @max = max || DEFAULT_MAX
11
+ Random.new.rand @min...@max
12
+ end
13
+ end
14
+ end
15
+ end