data-anonymization 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.documentup.json +8 -0
- data/.gitignore +20 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/.travis.yml +6 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +256 -0
- data/Rakefile +9 -0
- data/blacklist_dsl.rb +19 -0
- data/blacklist_nosql_dsl.rb +36 -0
- data/data-anonymization.gemspec +22 -0
- data/lib/core/database.rb +36 -0
- data/lib/core/dsl.rb +16 -0
- data/lib/core/field.rb +18 -0
- data/lib/data-anonymization.rb +12 -0
- data/lib/strategy/base.rb +67 -0
- data/lib/strategy/blacklist.rb +18 -0
- data/lib/strategy/field/anonymize_time.rb +57 -0
- data/lib/strategy/field/anonymous.rb +21 -0
- data/lib/strategy/field/date_time_delta.rb +24 -0
- data/lib/strategy/field/default_anon.rb +28 -0
- data/lib/strategy/field/distinct_column_values.rb +25 -0
- data/lib/strategy/field/fields.rb +23 -0
- data/lib/strategy/field/gmail_template.rb +17 -0
- data/lib/strategy/field/lorem_ipsum.rb +29 -0
- data/lib/strategy/field/random_boolean.rb +19 -0
- data/lib/strategy/field/random_email.rb +31 -0
- data/lib/strategy/field/random_first_name.rb +18 -0
- data/lib/strategy/field/random_float_delta.rb +24 -0
- data/lib/strategy/field/random_full_name.rb +28 -0
- data/lib/strategy/field/random_int.rb +23 -0
- data/lib/strategy/field/random_integer_delta.rb +21 -0
- data/lib/strategy/field/random_last_name.rb +19 -0
- data/lib/strategy/field/random_mailinator_email.rb +20 -0
- data/lib/strategy/field/random_phone_number.rb +24 -0
- data/lib/strategy/field/random_selection.rb +23 -0
- data/lib/strategy/field/random_string.rb +22 -0
- data/lib/strategy/field/random_user_name.rb +23 -0
- data/lib/strategy/field/string_template.rb +22 -0
- data/lib/strategy/field/user_name_template.rb +22 -0
- data/lib/strategy/field/whitelist.rb +17 -0
- data/lib/strategy/strategies.rb +4 -0
- data/lib/strategy/whitelist.rb +21 -0
- data/lib/tasks/rake_tasks.rb +19 -0
- data/lib/utils/database.rb +53 -0
- data/lib/utils/logging.rb +29 -0
- data/lib/utils/random_int.rb +15 -0
- data/lib/utils/random_string.rb +14 -0
- data/lib/utils/resource.rb +13 -0
- data/lib/version.rb +3 -0
- data/resources/first_names.txt +500 -0
- data/resources/last_names.txt +500 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
- data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/strategy/field/anonymize_time_spec.rb +23 -0
- data/spec/strategy/field/date_time_delta_spec.rb +43 -0
- data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
- data/spec/strategy/field/gmail_template_spec.rb +14 -0
- data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
- data/spec/strategy/field/random_boolean_spec.rb +16 -0
- data/spec/strategy/field/random_email_spec.rb +18 -0
- data/spec/strategy/field/random_first_name_spec.rb +14 -0
- data/spec/strategy/field/random_float_delta_spec.rb +21 -0
- data/spec/strategy/field/random_full_name_spec.rb +23 -0
- data/spec/strategy/field/random_int_spec.rb +28 -0
- data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
- data/spec/strategy/field/random_last_name_spec.rb +14 -0
- data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
- data/spec/strategy/field/random_phone_number_spec.rb +35 -0
- data/spec/strategy/field/random_selection_spec.rb +36 -0
- data/spec/strategy/field/random_string_spec.rb +23 -0
- data/spec/strategy/field/random_user_name_spec.rb +23 -0
- data/spec/strategy/field/string_template_spec.rb +15 -0
- data/spec/strategy/field/user_name_template_spec.rb +13 -0
- data/spec/strategy/field/whitelist_spec.rb +21 -0
- data/spec/support/customer_sample.rb +43 -0
- data/spec/utils/database_spec.rb +26 -0
- data/spec/utils/random_int_spec.rb +9 -0
- data/spec/utils/random_string_spec.rb +8 -0
- data/whitelist_dsl.rb +44 -0
- metadata +192 -0
data/lib/core/dsl.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Core
|
3
|
+
module DSL
|
4
|
+
include Utils::Logging
|
5
|
+
|
6
|
+
def database(name, &block)
|
7
|
+
logger.debug "#{name} : Database"
|
8
|
+
DataAnon::Core::Database.new(name).instance_eval &block
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
include DataAnon::Core::DSL
|
16
|
+
|
data/lib/core/field.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Core
|
3
|
+
|
4
|
+
class Field
|
5
|
+
|
6
|
+
def initialize name, value, row_number, ar_record
|
7
|
+
@name = name
|
8
|
+
@value = value
|
9
|
+
@row_number = row_number
|
10
|
+
@ar_record = ar_record
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_accessor :name, :value, :row_number, :ar_record
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require "version"
|
2
|
+
|
3
|
+
require "utils/logging"
|
4
|
+
require "utils/random_int"
|
5
|
+
require "utils/random_string"
|
6
|
+
require "utils/resource"
|
7
|
+
require "core/database"
|
8
|
+
require "core/field"
|
9
|
+
require "strategy/strategies"
|
10
|
+
require "utils/database"
|
11
|
+
require "core/dsl"
|
12
|
+
|
@@ -0,0 +1,67 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
class Base
|
4
|
+
include Utils::Logging
|
5
|
+
|
6
|
+
def initialize name, user_strategies
|
7
|
+
@name = name
|
8
|
+
@user_strategies = user_strategies
|
9
|
+
@fields = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def process_fields &block
|
13
|
+
self.instance_eval &block
|
14
|
+
self
|
15
|
+
end
|
16
|
+
|
17
|
+
def primary_key field
|
18
|
+
@primary_key = field
|
19
|
+
end
|
20
|
+
|
21
|
+
def whitelist *fields
|
22
|
+
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
|
23
|
+
end
|
24
|
+
|
25
|
+
def fields
|
26
|
+
@fields
|
27
|
+
end
|
28
|
+
|
29
|
+
def anonymize *fields, &block
|
30
|
+
if block.nil?
|
31
|
+
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
|
32
|
+
temp = self
|
33
|
+
return Class.new do
|
34
|
+
@temp_fields = fields
|
35
|
+
@table_fields = temp.fields
|
36
|
+
def self.using field_strategy
|
37
|
+
@temp_fields.each { |f| @table_fields[f.downcase] = field_strategy }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
else
|
41
|
+
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Anonymous.new(&block) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def dest_table
|
46
|
+
@dest_table ||= Utils::DestinationTable.create @name, @primary_key
|
47
|
+
end
|
48
|
+
|
49
|
+
def source_table
|
50
|
+
@source_table ||= Utils::SourceTable.create @name, @primary_key
|
51
|
+
end
|
52
|
+
|
53
|
+
def process
|
54
|
+
logger.debug "Processing table #{@name} with fields strategies #{@fields}"
|
55
|
+
progress_logger.info "Table: #{@name} (#{source_table.count} records) "
|
56
|
+
index = 1
|
57
|
+
source_table.find_each(:batch_size => 100) do |record|
|
58
|
+
progress_logger.info "."
|
59
|
+
process_record index, record
|
60
|
+
index += 1
|
61
|
+
end
|
62
|
+
progress_logger.info " DONE\n"
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
class Blacklist < DataAnon::Strategy::Base
|
4
|
+
|
5
|
+
def process_record index, record
|
6
|
+
@fields.each do |field, strategy|
|
7
|
+
database_field_name = record.attributes.select { |k,v| k.downcase == field }.keys[0]
|
8
|
+
field_value = record.attributes[database_field_name]
|
9
|
+
unless field_value.nil? || database_field_name.downcase == @primary_key.downcase
|
10
|
+
field = DataAnon::Core::Field.new(database_field_name, field_value, index, record)
|
11
|
+
record[database_field_name] = strategy.anonymize(field)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
record.save!
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class AnonymizeTime
|
6
|
+
|
7
|
+
DEFAULT_ANONYMIZATION = true
|
8
|
+
|
9
|
+
def self.only_month
|
10
|
+
self.new true, false, false, false, false
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.only_day
|
14
|
+
self.new false, true, false, false, false
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.only_hour
|
18
|
+
self.new false, false, true, false, false
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.only_minute
|
22
|
+
self.new false, false, false, true, false
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize anonymize_month = DEFAULT_ANONYMIZATION, anonymize_day = DEFAULT_ANONYMIZATION, anonymize_hour = DEFAULT_ANONYMIZATION, anonymize_min = DEFAULT_ANONYMIZATION, anonymize_sec = DEFAULT_ANONYMIZATION
|
26
|
+
|
27
|
+
@anonymize_month = anonymize_month
|
28
|
+
@anonymize_day = anonymize_day
|
29
|
+
@anonymize_hour = anonymize_hour
|
30
|
+
@anonymize_min = anonymize_min
|
31
|
+
@anonymize_sec = anonymize_sec
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
def anonymize field
|
36
|
+
|
37
|
+
provided_time = field.value
|
38
|
+
year = provided_time.year
|
39
|
+
month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : provided_time.month
|
40
|
+
day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,31) : provided_time.day
|
41
|
+
hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(1,24) : provided_time.hour
|
42
|
+
min = @anonymize_min? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.min
|
43
|
+
sec = @anonymize_sec? DataAnon::Utils::RandomInt.generate(1,60) : provided_time.sec
|
44
|
+
|
45
|
+
create_object(day, hour, min, month, sec, year)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def create_object(day, hour, min, month, sec, year)
|
51
|
+
Time.new(year, month, day, hour, min, sec)
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class DateTimeDelta
|
6
|
+
|
7
|
+
DEFAULT_DAY_DELTA = 10
|
8
|
+
DEFAULT_MINUTE_DELTA = 30
|
9
|
+
|
10
|
+
def initialize day_delta = DEFAULT_DAY_DELTA, minute_delta = DEFAULT_MINUTE_DELTA
|
11
|
+
@day_delta = day_delta
|
12
|
+
@minute_delta = minute_delta
|
13
|
+
end
|
14
|
+
|
15
|
+
def anonymize field
|
16
|
+
day_adjustment = @day_delta==0? 0 : (DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta))
|
17
|
+
minute_adjustment = @minute_delta==0? 0 : (DataAnon::Utils::RandomInt.generate(-@minute_delta,@minute_delta))
|
18
|
+
return field.value + day_adjustment.days + minute_adjustment.minutes
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class DefaultAnon
|
6
|
+
|
7
|
+
FS = DataAnon::Strategy::Field
|
8
|
+
DEFAULT_STRATEGIES = {:string => FS::LoremIpsum.new,
|
9
|
+
:integer => FS::RandomInt.new(18,70),
|
10
|
+
:datetime => FS::DateTimeDelta.new,
|
11
|
+
:boolean => FS::RandomBoolean.new
|
12
|
+
}
|
13
|
+
|
14
|
+
def initialize user_defaults
|
15
|
+
@user_defaults = DEFAULT_STRATEGIES.merge user_defaults
|
16
|
+
end
|
17
|
+
|
18
|
+
def anonymize field
|
19
|
+
strategy = @user_defaults[field.value.class.to_s.downcase.to_sym] || FS::Whitelist.new
|
20
|
+
strategy.anonymize field
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class DistinctColumnValues
|
6
|
+
include Utils::Logging
|
7
|
+
|
8
|
+
def initialize table_name, field_name
|
9
|
+
source = Utils::SourceTable.create table_name
|
10
|
+
@values = source.select(field_name).uniq.collect { |record| record[field_name]}
|
11
|
+
logger.debug "For field strategy #{table_name}:#{field_name} using values #{@values} "
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
def anonymize field
|
16
|
+
return @values[0] if @values.length == 1
|
17
|
+
@values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'strategy/field/whitelist'
|
2
|
+
require 'strategy/field/string_template'
|
3
|
+
require 'strategy/field/user_name_template'
|
4
|
+
require 'strategy/field/random_string'
|
5
|
+
require 'strategy/field/random_int'
|
6
|
+
require 'strategy/field/random_boolean'
|
7
|
+
require 'strategy/field/anonymize_time'
|
8
|
+
require 'strategy/field/random_integer_delta'
|
9
|
+
require 'strategy/field/random_float_delta'
|
10
|
+
require 'strategy/field/random_selection'
|
11
|
+
require 'strategy/field/distinct_column_values'
|
12
|
+
require 'strategy/field/lorem_ipsum'
|
13
|
+
require 'strategy/field/gmail_template'
|
14
|
+
require 'strategy/field/date_time_delta'
|
15
|
+
require 'strategy/field/default_anon'
|
16
|
+
require 'strategy/field/random_email'
|
17
|
+
require 'strategy/field/random_mailinator_email'
|
18
|
+
require 'strategy/field/random_phone_number'
|
19
|
+
require 'strategy/field/random_first_name'
|
20
|
+
require 'strategy/field/random_last_name'
|
21
|
+
require 'strategy/field/random_full_name'
|
22
|
+
require 'strategy/field/random_user_name'
|
23
|
+
require 'strategy/field/anonymous'
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class GmailTemplate
|
5
|
+
|
6
|
+
def initialize gmail_address = nil
|
7
|
+
@gmail_address = gmail_address
|
8
|
+
end
|
9
|
+
|
10
|
+
def anonymize field
|
11
|
+
username = @gmail_address[0,@gmail_address.index('@')]
|
12
|
+
"#{username}+#{field.row_number}@gmail.com"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class LoremIpsum
|
7
|
+
|
8
|
+
DEFAULT_TEXT = <<-default
|
9
|
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed quis nulla quis ligula bibendum dignissim. Nullam elementum convallis mauris, at ultrices odio dignissim dapibus. Etiam vitae neque lorem, a luctus purus. In at diam mi, sit amet dapibus magna. Maecenas tincidunt tortor id dolor tristique dictum. Morbi pulvinar odio ut lorem gravida ac varius orci ultrices. Nulla id arcu dui, sit amet commodo augue. Curabitur elit elit, semper quis tincidunt at, auctor et tortor.
|
10
|
+
Quisque ut enim arcu. Praesent orci mi, tincidunt non sodales a, blandit ac nunc. Phasellus sed erat a nibh suscipit molestie sed a augue. Aliquam pretium ultricies nibh. Sed sit amet accumsan sapien. Pellentesque urna orci, iaculis eu lacinia ac, consequat vel elit. Suspendisse aliquet tortor et urna varius non ullamcorper augue tempus. Phasellus pretium, nulla eu adipiscing viverra, purus est fermentum enim, ut fringilla ligula lectus quis est. Phasellus quis scelerisque ligula. Cras accumsan lobortis egestas. Ut quis orci sem, sed gravida orci.
|
11
|
+
Vestibulum eget odio nisl, nec ornare ante. Aenean tristique, nisl eget lacinia aliquam, neque lectus lacinia enim, id ullamcorper nisl lorem vitae enim. Sed vulputate condimentum convallis. Ut viverra tincidunt arcu ac egestas. Quisque ut neque nec quam suscipit ornare a ornare est. Nulla facilisi. Mauris facilisis eleifend neque eget egestas. Vestibulum egestas dui eleifend urna pharetra a hendrerit quam sagittis. Duis ut turpis convallis diam interdum congue. In hac habitasse platea dictumst. Nulla a erat eget tortor tempor consectetur. Fusce euismod congue risus in feugiat. Sed rutrum vehicula lectus et vehicula. In porttitor malesuada sem at auctor.
|
12
|
+
Maecenas lacinia placerat augue quis posuere. Cras eu augue quam, eu malesuada sem. Proin facilisis iaculis lectus, vel hendrerit nulla tristique quis. Donec risus mauris, vulputate tristique feugiat nec, imperdiet sed sapien. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Aenean vitae aliquam magna. Donec tempor, ipsum non dapibus elementum, est sem hendrerit nulla, scelerisque sollicitudin lacus mauris eu libero. Vivamus turpis justo, ullamcorper sed ullamcorper quis, tempor in elit. Sed nisl erat, laoreet at adipiscing quis, lobortis et est. Duis congue iaculis mollis. Curabitur ligula turpis, malesuada non feugiat vitae, ullamcorper non nibh. Aliquam adipiscing pellentesque leo nec molestie. Donec tempor eleifend libero, at rutrum velit semper a. Sed tincidunt dictum lorem eu egestas.
|
13
|
+
Sed at iaculis risus. Nulla aliquet vulputate nulla, nec euismod sem porta quis. Aliquam erat volutpat. Sed tincidunt pharetra metus, in facilisis nunc suscipit ut. Nunc placerat vulputate sapien, elementum varius mi viverra eget. Nam hendrerit felis et arcu ultrices vehicula. Phasellus condimentum ornare orci sed placerat. Sed vel rutrum lorem. Fusce id bibendum ipsum.
|
14
|
+
default
|
15
|
+
|
16
|
+
def initialize text = nil
|
17
|
+
@text = text || DEFAULT_TEXT
|
18
|
+
end
|
19
|
+
|
20
|
+
def anonymize field
|
21
|
+
@text[0, field.value.length]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomEmail
|
6
|
+
|
7
|
+
TLDS = ['com','org','net','edu','gov','mil','biz','info']
|
8
|
+
|
9
|
+
|
10
|
+
def initialize hostname = nil, tld = nil
|
11
|
+
@hostname = hostname
|
12
|
+
@tld = tld
|
13
|
+
end
|
14
|
+
|
15
|
+
def anonymize field
|
16
|
+
|
17
|
+
username_length = DataAnon::Utils::RandomInt.generate(5,15)
|
18
|
+
host_name_length = DataAnon::Utils::RandomInt.generate(2,10)
|
19
|
+
|
20
|
+
username = DataAnon::Utils::RandomString.generate(username_length)
|
21
|
+
hostname = @hostname || DataAnon::Utils::RandomString.generate(host_name_length)
|
22
|
+
tld = @tld || TLDS[rand(TLDS.length)]
|
23
|
+
|
24
|
+
return username + "@" + hostname + "." + tld
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomFirstName
|
6
|
+
|
7
|
+
def initialize file_path = nil
|
8
|
+
file = file_path || DataAnon::Utils::Resource.file('first_names.txt')
|
9
|
+
@names = File.read(file).split
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
return @names[rand(@names.size)]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|