data-anonymization 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. data/.documentup.json +8 -0
  2. data/.gitignore +20 -0
  3. data/.rspec +2 -0
  4. data/.rvmrc +1 -0
  5. data/.travis.yml +6 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.txt +22 -0
  8. data/README.md +256 -0
  9. data/Rakefile +9 -0
  10. data/blacklist_dsl.rb +19 -0
  11. data/blacklist_nosql_dsl.rb +36 -0
  12. data/data-anonymization.gemspec +22 -0
  13. data/lib/core/database.rb +36 -0
  14. data/lib/core/dsl.rb +16 -0
  15. data/lib/core/field.rb +18 -0
  16. data/lib/data-anonymization.rb +12 -0
  17. data/lib/strategy/base.rb +67 -0
  18. data/lib/strategy/blacklist.rb +18 -0
  19. data/lib/strategy/field/anonymize_time.rb +57 -0
  20. data/lib/strategy/field/anonymous.rb +21 -0
  21. data/lib/strategy/field/date_time_delta.rb +24 -0
  22. data/lib/strategy/field/default_anon.rb +28 -0
  23. data/lib/strategy/field/distinct_column_values.rb +25 -0
  24. data/lib/strategy/field/fields.rb +23 -0
  25. data/lib/strategy/field/gmail_template.rb +17 -0
  26. data/lib/strategy/field/lorem_ipsum.rb +29 -0
  27. data/lib/strategy/field/random_boolean.rb +19 -0
  28. data/lib/strategy/field/random_email.rb +31 -0
  29. data/lib/strategy/field/random_first_name.rb +18 -0
  30. data/lib/strategy/field/random_float_delta.rb +24 -0
  31. data/lib/strategy/field/random_full_name.rb +28 -0
  32. data/lib/strategy/field/random_int.rb +23 -0
  33. data/lib/strategy/field/random_integer_delta.rb +21 -0
  34. data/lib/strategy/field/random_last_name.rb +19 -0
  35. data/lib/strategy/field/random_mailinator_email.rb +20 -0
  36. data/lib/strategy/field/random_phone_number.rb +24 -0
  37. data/lib/strategy/field/random_selection.rb +23 -0
  38. data/lib/strategy/field/random_string.rb +22 -0
  39. data/lib/strategy/field/random_user_name.rb +23 -0
  40. data/lib/strategy/field/string_template.rb +22 -0
  41. data/lib/strategy/field/user_name_template.rb +22 -0
  42. data/lib/strategy/field/whitelist.rb +17 -0
  43. data/lib/strategy/strategies.rb +4 -0
  44. data/lib/strategy/whitelist.rb +21 -0
  45. data/lib/tasks/rake_tasks.rb +19 -0
  46. data/lib/utils/database.rb +53 -0
  47. data/lib/utils/logging.rb +29 -0
  48. data/lib/utils/random_int.rb +15 -0
  49. data/lib/utils/random_string.rb +14 -0
  50. data/lib/utils/resource.rb +13 -0
  51. data/lib/version.rb +3 -0
  52. data/resources/first_names.txt +500 -0
  53. data/resources/last_names.txt +500 -0
  54. data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
  55. data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
  56. data/spec/spec_helper.rb +26 -0
  57. data/spec/strategy/field/anonymize_time_spec.rb +23 -0
  58. data/spec/strategy/field/date_time_delta_spec.rb +43 -0
  59. data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
  60. data/spec/strategy/field/gmail_template_spec.rb +14 -0
  61. data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
  62. data/spec/strategy/field/random_boolean_spec.rb +16 -0
  63. data/spec/strategy/field/random_email_spec.rb +18 -0
  64. data/spec/strategy/field/random_first_name_spec.rb +14 -0
  65. data/spec/strategy/field/random_float_delta_spec.rb +21 -0
  66. data/spec/strategy/field/random_full_name_spec.rb +23 -0
  67. data/spec/strategy/field/random_int_spec.rb +28 -0
  68. data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
  69. data/spec/strategy/field/random_last_name_spec.rb +14 -0
  70. data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
  71. data/spec/strategy/field/random_phone_number_spec.rb +35 -0
  72. data/spec/strategy/field/random_selection_spec.rb +36 -0
  73. data/spec/strategy/field/random_string_spec.rb +23 -0
  74. data/spec/strategy/field/random_user_name_spec.rb +23 -0
  75. data/spec/strategy/field/string_template_spec.rb +15 -0
  76. data/spec/strategy/field/user_name_template_spec.rb +13 -0
  77. data/spec/strategy/field/whitelist_spec.rb +21 -0
  78. data/spec/support/customer_sample.rb +43 -0
  79. data/spec/utils/database_spec.rb +26 -0
  80. data/spec/utils/random_int_spec.rb +9 -0
  81. data/spec/utils/random_string_spec.rb +8 -0
  82. data/whitelist_dsl.rb +44 -0
  83. metadata +192 -0
@@ -0,0 +1,16 @@
1
+ module DataAnon
2
+ module Core
3
+ module DSL
4
+ include Utils::Logging
5
+
6
+ def database(name, &block)
7
+ logger.debug "#{name} : Database"
8
+ DataAnon::Core::Database.new(name).instance_eval &block
9
+ end
10
+
11
+ end
12
+ end
13
+ end
14
+
15
+ include DataAnon::Core::DSL
16
+
@@ -0,0 +1,18 @@
1
+ module DataAnon
2
+ module Core
3
+
4
+ class Field
5
+
6
+ def initialize name, value, row_number, ar_record
7
+ @name = name
8
+ @value = value
9
+ @row_number = row_number
10
+ @ar_record = ar_record
11
+ end
12
+
13
+ attr_accessor :name, :value, :row_number, :ar_record
14
+
15
+ end
16
+
17
+ end
18
+ end
@@ -0,0 +1,12 @@
1
+ require "version"
2
+
3
+ require "utils/logging"
4
+ require "utils/random_int"
5
+ require "utils/random_string"
6
+ require "utils/resource"
7
+ require "core/database"
8
+ require "core/field"
9
+ require "strategy/strategies"
10
+ require "utils/database"
11
+ require "core/dsl"
12
+
@@ -0,0 +1,67 @@
1
+ module DataAnon
2
+ module Strategy
3
+ class Base
4
+ include Utils::Logging
5
+
6
+ def initialize name, user_strategies
7
+ @name = name
8
+ @user_strategies = user_strategies
9
+ @fields = {}
10
+ end
11
+
12
+ def process_fields &block
13
+ self.instance_eval &block
14
+ self
15
+ end
16
+
17
+ def primary_key field
18
+ @primary_key = field
19
+ end
20
+
21
+ def whitelist *fields
22
+ fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
23
+ end
24
+
25
+ def fields
26
+ @fields
27
+ end
28
+
29
+ def anonymize *fields, &block
30
+ if block.nil?
31
+ fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
32
+ temp = self
33
+ return Class.new do
34
+ @temp_fields = fields
35
+ @table_fields = temp.fields
36
+ def self.using field_strategy
37
+ @temp_fields.each { |f| @table_fields[f.downcase] = field_strategy }
38
+ end
39
+ end
40
+ else
41
+ fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Anonymous.new(&block) }
42
+ end
43
+ end
44
+
45
+ def dest_table
46
+ @dest_table ||= Utils::DestinationTable.create @name, @primary_key
47
+ end
48
+
49
+ def source_table
50
+ @source_table ||= Utils::SourceTable.create @name, @primary_key
51
+ end
52
+
53
+ def process
54
+ logger.debug "Processing table #{@name} with fields strategies #{@fields}"
55
+ progress_logger.info "Table: #{@name} (#{source_table.count} records) "
56
+ index = 1
57
+ source_table.find_each(:batch_size => 100) do |record|
58
+ progress_logger.info "."
59
+ process_record index, record
60
+ index += 1
61
+ end
62
+ progress_logger.info " DONE\n"
63
+ end
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,18 @@
1
+ module DataAnon
2
+ module Strategy
3
+ class Blacklist < DataAnon::Strategy::Base
4
+
5
+ def process_record index, record
6
+ @fields.each do |field, strategy|
7
+ database_field_name = record.attributes.select { |k,v| k.downcase == field }.keys[0]
8
+ field_value = record.attributes[database_field_name]
9
+ unless field_value.nil? || database_field_name.downcase == @primary_key.downcase
10
+ field = DataAnon::Core::Field.new(database_field_name, field_value, index, record)
11
+ record[database_field_name] = strategy.anonymize(field)
12
+ end
13
+ end
14
+ record.save!
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,57 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class AnonymizeTime
6
+
7
+ DEFAULT_ANONYMIZATION = true
8
+
9
+ def self.only_month
10
+ self.new true, false, false, false, false
11
+ end
12
+
13
+ def self.only_day
14
+ self.new false, true, false, false, false
15
+ end
16
+
17
+ def self.only_hour
18
+ self.new false, false, true, false, false
19
+ end
20
+
21
+ def self.only_minute
22
+ self.new false, false, false, true, false
23
+ end
24
+
25
+ def initialize anonymize_month = DEFAULT_ANONYMIZATION, anonymize_day = DEFAULT_ANONYMIZATION, anonymize_hour = DEFAULT_ANONYMIZATION, anonymize_min = DEFAULT_ANONYMIZATION, anonymize_sec = DEFAULT_ANONYMIZATION
26
+
27
+ @anonymize_month = anonymize_month
28
+ @anonymize_day = anonymize_day
29
+ @anonymize_hour = anonymize_hour
30
+ @anonymize_min = anonymize_min
31
+ @anonymize_sec = anonymize_sec
32
+
33
+ end
34
+
35
+ def anonymize field
36
+
37
+ provided_time = field.value
38
+ year = provided_time.year
39
+ month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : provided_time.month
40
+ day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,31) : provided_time.day
41
+ hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(1,24) : provided_time.hour
42
+ min = @anonymize_min? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.min
43
+ sec = @anonymize_sec? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.sec
44
+
45
+ create_object(day, hour, min, month, sec, year)
46
+ end
47
+
48
+ private
49
+
50
+ def create_object(day, hour, min, month, sec, year)
51
+ Time.new(year, month, day, hour, min, sec)
52
+ end
53
+
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,21 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class Anonymous
7
+
8
+ def initialize &block
9
+ @block = block
10
+ end
11
+
12
+ def anonymize field
13
+ @block.call field
14
+ end
15
+
16
+ end
17
+
18
+
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,24 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class DateTimeDelta
6
+
7
+ DEFAULT_DAY_DELTA = 10
8
+ DEFAULT_MINUTE_DELTA = 30
9
+
10
+ def initialize day_delta = DEFAULT_DAY_DELTA, minute_delta = DEFAULT_MINUTE_DELTA
11
+ @day_delta = day_delta
12
+ @minute_delta = minute_delta
13
+ end
14
+
15
+ def anonymize field
16
+ day_adjustment = @day_delta==0? 0 : (DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta))
17
+ minute_adjustment = @minute_delta==0? 0 : (DataAnon::Utils::RandomInt.generate(-@minute_delta,@minute_delta))
18
+ return field.value + day_adjustment.days + minute_adjustment.minutes
19
+ end
20
+
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class DefaultAnon
6
+
7
+ FS = DataAnon::Strategy::Field
8
+ DEFAULT_STRATEGIES = {:string => FS::LoremIpsum.new,
9
+ :integer => FS::RandomInt.new(18,70),
10
+ :datetime => FS::DateTimeDelta.new,
11
+ :boolean => FS::RandomBoolean.new
12
+ }
13
+
14
+ def initialize user_defaults
15
+ @user_defaults = DEFAULT_STRATEGIES.merge user_defaults
16
+ end
17
+
18
+ def anonymize field
19
+ strategy = @user_defaults[field.value.class.to_s.downcase.to_sym] || FS::Whitelist.new
20
+ strategy.anonymize field
21
+ end
22
+
23
+ end
24
+
25
+
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,25 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class DistinctColumnValues
6
+ include Utils::Logging
7
+
8
+ def initialize table_name, field_name
9
+ source = Utils::SourceTable.create table_name
10
+ @values = source.select(field_name).uniq.collect { |record| record[field_name]}
11
+ logger.debug "For field strategy #{table_name}:#{field_name} using values #{@values} "
12
+
13
+ end
14
+
15
+ def anonymize field
16
+ return @values[0] if @values.length == 1
17
+ @values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
18
+ end
19
+
20
+ end
21
+
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,23 @@
1
+ require 'strategy/field/whitelist'
2
+ require 'strategy/field/string_template'
3
+ require 'strategy/field/user_name_template'
4
+ require 'strategy/field/random_string'
5
+ require 'strategy/field/random_int'
6
+ require 'strategy/field/random_boolean'
7
+ require 'strategy/field/anonymize_time'
8
+ require 'strategy/field/random_integer_delta'
9
+ require 'strategy/field/random_float_delta'
10
+ require 'strategy/field/random_selection'
11
+ require 'strategy/field/distinct_column_values'
12
+ require 'strategy/field/lorem_ipsum'
13
+ require 'strategy/field/gmail_template'
14
+ require 'strategy/field/date_time_delta'
15
+ require 'strategy/field/default_anon'
16
+ require 'strategy/field/random_email'
17
+ require 'strategy/field/random_mailinator_email'
18
+ require 'strategy/field/random_phone_number'
19
+ require 'strategy/field/random_first_name'
20
+ require 'strategy/field/random_last_name'
21
+ require 'strategy/field/random_full_name'
22
+ require 'strategy/field/random_user_name'
23
+ require 'strategy/field/anonymous'
@@ -0,0 +1,17 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+ class GmailTemplate
5
+
6
+ def initialize gmail_address = nil
7
+ @gmail_address = gmail_address
8
+ end
9
+
10
+ def anonymize field
11
+ username = @gmail_address[0,@gmail_address.index('@')]
12
+ "#{username}+#{field.row_number}@gmail.com"
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,29 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class LoremIpsum
7
+
8
+ DEFAULT_TEXT = <<-default
9
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed quis nulla quis ligula bibendum dignissim. Nullam elementum convallis mauris, at ultrices odio dignissim dapibus. Etiam vitae neque lorem, a luctus purus. In at diam mi, sit amet dapibus magna. Maecenas tincidunt tortor id dolor tristique dictum. Morbi pulvinar odio ut lorem gravida ac varius orci ultrices. Nulla id arcu dui, sit amet commodo augue. Curabitur elit elit, semper quis tincidunt at, auctor et tortor.
10
+ Quisque ut enim arcu. Praesent orci mi, tincidunt non sodales a, blandit ac nunc. Phasellus sed erat a nibh suscipit molestie sed a augue. Aliquam pretium ultricies nibh. Sed sit amet accumsan sapien. Pellentesque urna orci, iaculis eu lacinia ac, consequat vel elit. Suspendisse aliquet tortor et urna varius non ullamcorper augue tempus. Phasellus pretium, nulla eu adipiscing viverra, purus est fermentum enim, ut fringilla ligula lectus quis est. Phasellus quis scelerisque ligula. Cras accumsan lobortis egestas. Ut quis orci sem, sed gravida orci.
11
+ Vestibulum eget odio nisl, nec ornare ante. Aenean tristique, nisl eget lacinia aliquam, neque lectus lacinia enim, id ullamcorper nisl lorem vitae enim. Sed vulputate condimentum convallis. Ut viverra tincidunt arcu ac egestas. Quisque ut neque nec quam suscipit ornare a ornare est. Nulla facilisi. Mauris facilisis eleifend neque eget egestas. Vestibulum egestas dui eleifend urna pharetra a hendrerit quam sagittis. Duis ut turpis convallis diam interdum congue. In hac habitasse platea dictumst. Nulla a erat eget tortor tempor consectetur. Fusce euismod congue risus in feugiat. Sed rutrum vehicula lectus et vehicula. In porttitor malesuada sem at auctor.
12
+ Maecenas lacinia placerat augue quis posuere. Cras eu augue quam, eu malesuada sem. Proin facilisis iaculis lectus, vel hendrerit nulla tristique quis. Donec risus mauris, vulputate tristique feugiat nec, imperdiet sed sapien. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Aenean vitae aliquam magna. Donec tempor, ipsum non dapibus elementum, est sem hendrerit nulla, scelerisque sollicitudin lacus mauris eu libero. Vivamus turpis justo, ullamcorper sed ullamcorper quis, tempor in elit. Sed nisl erat, laoreet at adipiscing quis, lobortis et est. Duis congue iaculis mollis. Curabitur ligula turpis, malesuada non feugiat vitae, ullamcorper non nibh. Aliquam adipiscing pellentesque leo nec molestie. Donec tempor eleifend libero, at rutrum velit semper a. Sed tincidunt dictum lorem eu egestas.
13
+ Sed at iaculis risus. Nulla aliquet vulputate nulla, nec euismod sem porta quis. Aliquam erat volutpat. Sed tincidunt pharetra metus, in facilisis nunc suscipit ut. Nunc placerat vulputate sapien, elementum varius mi viverra eget. Nam hendrerit felis et arcu ultrices vehicula. Phasellus condimentum ornare orci sed placerat. Sed vel rutrum lorem. Fusce id bibendum ipsum.
14
+ default
15
+
16
+ def initialize text = nil
17
+ @text = text || DEFAULT_TEXT
18
+ end
19
+
20
+ def anonymize field
21
+ @text[0, field.value.length]
22
+ end
23
+
24
+ end
25
+
26
+
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,19 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+
6
+ class RandomBoolean
7
+
8
+ BOOL_VALUES = [true,false]
9
+
10
+ def anonymize field
11
+ BOOL_VALUES.sample
12
+ end
13
+
14
+ end
15
+
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,31 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomEmail
6
+
7
+ TLDS = ['com','org','net','edu','gov','mil','biz','info']
8
+
9
+
10
+ def initialize hostname = nil, tld = nil
11
+ @hostname = hostname
12
+ @tld = tld
13
+ end
14
+
15
+ def anonymize field
16
+
17
+ username_length = DataAnon::Utils::RandomInt.generate(5,15)
18
+ host_name_length = DataAnon::Utils::RandomInt.generate(2,10)
19
+
20
+ username = DataAnon::Utils::RandomString.generate(username_length)
21
+ hostname = @hostname || DataAnon::Utils::RandomString.generate(host_name_length)
22
+ tld = @tld || TLDS[rand(TLDS.length)]
23
+
24
+ return username + "@" + hostname + "." + tld
25
+
26
+ end
27
+ end
28
+ end
29
+
30
+ end
31
+ end
@@ -0,0 +1,18 @@
1
+ module DataAnon
2
+ module Strategy
3
+ module Field
4
+
5
+ class RandomFirstName
6
+
7
+ def initialize file_path = nil
8
+ file = file_path || DataAnon::Utils::Resource.file('first_names.txt')
9
+ @names = File.read(file).split
10
+ end
11
+
12
+ def anonymize field
13
+ return @names[rand(@names.size)]
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end