data-anonymization 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.documentup.json +8 -0
- data/.gitignore +20 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/.travis.yml +6 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +256 -0
- data/Rakefile +9 -0
- data/blacklist_dsl.rb +19 -0
- data/blacklist_nosql_dsl.rb +36 -0
- data/data-anonymization.gemspec +22 -0
- data/lib/core/database.rb +36 -0
- data/lib/core/dsl.rb +16 -0
- data/lib/core/field.rb +18 -0
- data/lib/data-anonymization.rb +12 -0
- data/lib/strategy/base.rb +67 -0
- data/lib/strategy/blacklist.rb +18 -0
- data/lib/strategy/field/anonymize_time.rb +57 -0
- data/lib/strategy/field/anonymous.rb +21 -0
- data/lib/strategy/field/date_time_delta.rb +24 -0
- data/lib/strategy/field/default_anon.rb +28 -0
- data/lib/strategy/field/distinct_column_values.rb +25 -0
- data/lib/strategy/field/fields.rb +23 -0
- data/lib/strategy/field/gmail_template.rb +17 -0
- data/lib/strategy/field/lorem_ipsum.rb +29 -0
- data/lib/strategy/field/random_boolean.rb +19 -0
- data/lib/strategy/field/random_email.rb +31 -0
- data/lib/strategy/field/random_first_name.rb +18 -0
- data/lib/strategy/field/random_float_delta.rb +24 -0
- data/lib/strategy/field/random_full_name.rb +28 -0
- data/lib/strategy/field/random_int.rb +23 -0
- data/lib/strategy/field/random_integer_delta.rb +21 -0
- data/lib/strategy/field/random_last_name.rb +19 -0
- data/lib/strategy/field/random_mailinator_email.rb +20 -0
- data/lib/strategy/field/random_phone_number.rb +24 -0
- data/lib/strategy/field/random_selection.rb +23 -0
- data/lib/strategy/field/random_string.rb +22 -0
- data/lib/strategy/field/random_user_name.rb +23 -0
- data/lib/strategy/field/string_template.rb +22 -0
- data/lib/strategy/field/user_name_template.rb +22 -0
- data/lib/strategy/field/whitelist.rb +17 -0
- data/lib/strategy/strategies.rb +4 -0
- data/lib/strategy/whitelist.rb +21 -0
- data/lib/tasks/rake_tasks.rb +19 -0
- data/lib/utils/database.rb +53 -0
- data/lib/utils/logging.rb +29 -0
- data/lib/utils/random_int.rb +15 -0
- data/lib/utils/random_string.rb +14 -0
- data/lib/utils/resource.rb +13 -0
- data/lib/version.rb +3 -0
- data/resources/first_names.txt +500 -0
- data/resources/last_names.txt +500 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
- data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/strategy/field/anonymize_time_spec.rb +23 -0
- data/spec/strategy/field/date_time_delta_spec.rb +43 -0
- data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
- data/spec/strategy/field/gmail_template_spec.rb +14 -0
- data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
- data/spec/strategy/field/random_boolean_spec.rb +16 -0
- data/spec/strategy/field/random_email_spec.rb +18 -0
- data/spec/strategy/field/random_first_name_spec.rb +14 -0
- data/spec/strategy/field/random_float_delta_spec.rb +21 -0
- data/spec/strategy/field/random_full_name_spec.rb +23 -0
- data/spec/strategy/field/random_int_spec.rb +28 -0
- data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
- data/spec/strategy/field/random_last_name_spec.rb +14 -0
- data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
- data/spec/strategy/field/random_phone_number_spec.rb +35 -0
- data/spec/strategy/field/random_selection_spec.rb +36 -0
- data/spec/strategy/field/random_string_spec.rb +23 -0
- data/spec/strategy/field/random_user_name_spec.rb +23 -0
- data/spec/strategy/field/string_template_spec.rb +15 -0
- data/spec/strategy/field/user_name_template_spec.rb +13 -0
- data/spec/strategy/field/whitelist_spec.rb +21 -0
- data/spec/support/customer_sample.rb +43 -0
- data/spec/utils/database_spec.rb +26 -0
- data/spec/utils/random_int_spec.rb +9 -0
- data/spec/utils/random_string_spec.rb +8 -0
- data/whitelist_dsl.rb +44 -0
- metadata +192 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class RandomFloatDelta
|
5
|
+
|
6
|
+
DEFAULT_DELTA = 10.0
|
7
|
+
|
8
|
+
def initialize delta = nil
|
9
|
+
@delta = delta || DEFAULT_DELTA
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
return range(field.value-@delta,field.value+@delta)
|
14
|
+
end
|
15
|
+
|
16
|
+
def range (min, max)
|
17
|
+
Random.new.rand * (max-min) + min
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomFullName
|
6
|
+
|
7
|
+
def initialize first_names = nil, last_names = nil
|
8
|
+
@first_name_anonymizer = DataAnon::Strategy::Field::RandomFirstName.new(first_names)
|
9
|
+
@last_name_anonymizer = DataAnon::Strategy::Field::RandomLastName.new(last_names)
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
|
14
|
+
name_words = field.value.split(' ')
|
15
|
+
|
16
|
+
anonymized_first_name = @first_name_anonymizer.anonymize(name_words[0])
|
17
|
+
anonymized_last_name = ""
|
18
|
+
for counter in (1..name_words.size-1)
|
19
|
+
anonymized_last_name = anonymized_last_name + " " + @last_name_anonymizer.anonymize(name_words[counter])
|
20
|
+
end
|
21
|
+
|
22
|
+
return anonymized_first_name + anonymized_last_name
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class RandomInt
|
7
|
+
|
8
|
+
def initialize from = 0, to = 100
|
9
|
+
@from = from
|
10
|
+
@to = to
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
def anonymize field
|
15
|
+
DataAnon::Utils::RandomInt.generate(@from,@to)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class RandomIntegerDelta
|
5
|
+
|
6
|
+
DEFAULT_DELTA = 10
|
7
|
+
|
8
|
+
def initialize delta = nil
|
9
|
+
@delta = delta || DEFAULT_DELTA
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def anonymize field
|
14
|
+
adjustment = DataAnon::Utils::RandomInt.generate(-@delta,@delta)
|
15
|
+
return field.value + adjustment
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomLastName
|
6
|
+
|
7
|
+
def initialize file_path = nil
|
8
|
+
file = file_path || DataAnon::Utils::Resource.file('last_names.txt')
|
9
|
+
@names = File.read(file).split
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
return @names[rand(@names.size)]
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomMailinatorEmail
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@email_anonymizer = DataAnon::Strategy::Field::RandomEmail.new("mailinator","com")
|
9
|
+
end
|
10
|
+
|
11
|
+
def anonymize field
|
12
|
+
|
13
|
+
return @email_anonymizer.anonymize(field)
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class RandomPhoneNumber
|
7
|
+
|
8
|
+
def anonymize field
|
9
|
+
@original_phone_number = field.value
|
10
|
+
@anonymized_phone_number = ""
|
11
|
+
@original_phone_number.each_char do |char|
|
12
|
+
if /\d/.match(char).nil?
|
13
|
+
@anonymized_phone_number += char
|
14
|
+
else
|
15
|
+
@anonymized_phone_number += DataAnon::Utils::RandomInt.generate(0,9).to_s
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
@anonymized_phone_number
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class RandomSelection
|
7
|
+
|
8
|
+
def initialize values
|
9
|
+
@values = values.class == Array ? values : [values]
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def anonymize field
|
14
|
+
return @values[0] if @values.length == 1
|
15
|
+
@values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class RandomString
|
5
|
+
|
6
|
+
def anonymize field
|
7
|
+
|
8
|
+
original_string = field.value
|
9
|
+
string_words = original_string.split(' ')
|
10
|
+
anonymized_string = ""
|
11
|
+
|
12
|
+
string_words.each do |word|
|
13
|
+
anonymized_string = anonymized_string + DataAnon::Utils::RandomString.generate(word.length) + " "
|
14
|
+
end
|
15
|
+
|
16
|
+
anonymized_string.strip
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomUserName
|
6
|
+
|
7
|
+
DEFAULT_MIN_LENGTH = 5
|
8
|
+
DEFAULT_MAX_LENGTH = 10
|
9
|
+
|
10
|
+
def initialize min_length = DEFAULT_MIN_LENGTH, max_length = DEFAULT_MAX_LENGTH
|
11
|
+
@min_length = min_length
|
12
|
+
@max_length = max_length
|
13
|
+
end
|
14
|
+
|
15
|
+
def anonymize field
|
16
|
+
username_length = DataAnon::Utils::RandomInt.generate(@min_length,@max_length)
|
17
|
+
return DataAnon::Utils::RandomString.generate(username_length)
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class StringTemplate
|
7
|
+
|
8
|
+
def initialize template
|
9
|
+
@template = template
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
context = field.instance_eval { binding }
|
14
|
+
eval ('"' + @template + '"'), context
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class UserNameTemplate
|
7
|
+
|
8
|
+
def initialize template
|
9
|
+
@template = template
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
context = field.instance_eval { binding }
|
14
|
+
eval ('"' + @template + '"'), context
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
class Whitelist < DataAnon::Strategy::Base
|
4
|
+
|
5
|
+
def process_record(index, record)
|
6
|
+
dest_record_map = {}
|
7
|
+
record.attributes.each do |field_name, field_value|
|
8
|
+
unless field_value.nil? || field_name.downcase == @primary_key.downcase
|
9
|
+
field = DataAnon::Core::Field.new(field_name, field_value, index, record)
|
10
|
+
field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
|
11
|
+
dest_record_map[field_name] = field_strategy.anonymize(field)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
dest_record = dest_table.new dest_record_map
|
15
|
+
dest_record[@primary_key] = record[@primary_key]
|
16
|
+
dest_record.save!
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
|
4
|
+
module DataAnonymization
|
5
|
+
class RakeTasks
|
6
|
+
include Rake::DSL if defined? Rake::DSL
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
desc "Task to build the clean empty destination database"
|
10
|
+
task :empty_dest do
|
11
|
+
system "rm sample-data/chinook-empty.sqlite"
|
12
|
+
system "sqlite3 sample-data/chinook-empty.sqlite < sample-data/chinook_schema.sql"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
module DataAnon
|
5
|
+
module Utils
|
6
|
+
|
7
|
+
class MassAssignmentIgnoreSanitizer < ActiveModel::MassAssignmentSecurity::Sanitizer
|
8
|
+
def process_removed_attributes(attrs)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class TempDatabase < ActiveRecord::Base
|
13
|
+
self.abstract_class = true
|
14
|
+
end
|
15
|
+
|
16
|
+
class SourceDatabase < ActiveRecord::Base
|
17
|
+
self.abstract_class = true
|
18
|
+
end
|
19
|
+
|
20
|
+
class DestinationDatabase < ActiveRecord::Base
|
21
|
+
self.abstract_class = true
|
22
|
+
end
|
23
|
+
|
24
|
+
class BaseTable
|
25
|
+
|
26
|
+
def self.create_table table_name, primary_key, database
|
27
|
+
Class.new(database) do
|
28
|
+
self.table_name = table_name
|
29
|
+
self.primary_key = primary_key
|
30
|
+
self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
class SourceTable < BaseTable
|
37
|
+
|
38
|
+
def self.create table_name, primary_key = nil
|
39
|
+
create_table table_name, primary_key, SourceDatabase
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
class DestinationTable < BaseTable
|
45
|
+
|
46
|
+
def self.create table_name, primary_key = nil
|
47
|
+
create_table table_name, primary_key, DestinationDatabase
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
module DataAnon
|
5
|
+
module Utils
|
6
|
+
module Logging
|
7
|
+
|
8
|
+
def logger
|
9
|
+
@@logger ||= (self.logger = Logger.new(STDOUT) )
|
10
|
+
end
|
11
|
+
|
12
|
+
def logger= logger
|
13
|
+
@@logger = logger
|
14
|
+
ActiveRecord::Base.logger = logger
|
15
|
+
@@logger
|
16
|
+
end
|
17
|
+
|
18
|
+
def progress_logger
|
19
|
+
@@progress_logger ||= (self.progress_logger = Logger.new(STDOUT) )
|
20
|
+
end
|
21
|
+
|
22
|
+
def progress_logger= logger
|
23
|
+
logger.formatter = proc { |severity, datetime, progname, msg| msg }
|
24
|
+
@@progress_logger = logger
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|