data-anonymization 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.documentup.json +8 -0
- data/.gitignore +20 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/.travis.yml +6 -0
- data/Gemfile +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +256 -0
- data/Rakefile +9 -0
- data/blacklist_dsl.rb +19 -0
- data/blacklist_nosql_dsl.rb +36 -0
- data/data-anonymization.gemspec +22 -0
- data/lib/core/database.rb +36 -0
- data/lib/core/dsl.rb +16 -0
- data/lib/core/field.rb +18 -0
- data/lib/data-anonymization.rb +12 -0
- data/lib/strategy/base.rb +67 -0
- data/lib/strategy/blacklist.rb +18 -0
- data/lib/strategy/field/anonymize_time.rb +57 -0
- data/lib/strategy/field/anonymous.rb +21 -0
- data/lib/strategy/field/date_time_delta.rb +24 -0
- data/lib/strategy/field/default_anon.rb +28 -0
- data/lib/strategy/field/distinct_column_values.rb +25 -0
- data/lib/strategy/field/fields.rb +23 -0
- data/lib/strategy/field/gmail_template.rb +17 -0
- data/lib/strategy/field/lorem_ipsum.rb +29 -0
- data/lib/strategy/field/random_boolean.rb +19 -0
- data/lib/strategy/field/random_email.rb +31 -0
- data/lib/strategy/field/random_first_name.rb +18 -0
- data/lib/strategy/field/random_float_delta.rb +24 -0
- data/lib/strategy/field/random_full_name.rb +28 -0
- data/lib/strategy/field/random_int.rb +23 -0
- data/lib/strategy/field/random_integer_delta.rb +21 -0
- data/lib/strategy/field/random_last_name.rb +19 -0
- data/lib/strategy/field/random_mailinator_email.rb +20 -0
- data/lib/strategy/field/random_phone_number.rb +24 -0
- data/lib/strategy/field/random_selection.rb +23 -0
- data/lib/strategy/field/random_string.rb +22 -0
- data/lib/strategy/field/random_user_name.rb +23 -0
- data/lib/strategy/field/string_template.rb +22 -0
- data/lib/strategy/field/user_name_template.rb +22 -0
- data/lib/strategy/field/whitelist.rb +17 -0
- data/lib/strategy/strategies.rb +4 -0
- data/lib/strategy/whitelist.rb +21 -0
- data/lib/tasks/rake_tasks.rb +19 -0
- data/lib/utils/database.rb +53 -0
- data/lib/utils/logging.rb +29 -0
- data/lib/utils/random_int.rb +15 -0
- data/lib/utils/random_string.rb +14 -0
- data/lib/utils/resource.rb +13 -0
- data/lib/version.rb +3 -0
- data/resources/first_names.txt +500 -0
- data/resources/last_names.txt +500 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +30 -0
- data/spec/acceptance/rdbms_whitelist_spec.rb +50 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/strategy/field/anonymize_time_spec.rb +23 -0
- data/spec/strategy/field/date_time_delta_spec.rb +43 -0
- data/spec/strategy/field/distinct_column_values_spec.rb +22 -0
- data/spec/strategy/field/gmail_template_spec.rb +14 -0
- data/spec/strategy/field/lorem_ipsum_spec.rb +27 -0
- data/spec/strategy/field/random_boolean_spec.rb +16 -0
- data/spec/strategy/field/random_email_spec.rb +18 -0
- data/spec/strategy/field/random_first_name_spec.rb +14 -0
- data/spec/strategy/field/random_float_delta_spec.rb +21 -0
- data/spec/strategy/field/random_full_name_spec.rb +23 -0
- data/spec/strategy/field/random_int_spec.rb +28 -0
- data/spec/strategy/field/random_integer_delta_spec.rb +23 -0
- data/spec/strategy/field/random_last_name_spec.rb +14 -0
- data/spec/strategy/field/random_mailinator_email_spec.rb +21 -0
- data/spec/strategy/field/random_phone_number_spec.rb +35 -0
- data/spec/strategy/field/random_selection_spec.rb +36 -0
- data/spec/strategy/field/random_string_spec.rb +23 -0
- data/spec/strategy/field/random_user_name_spec.rb +23 -0
- data/spec/strategy/field/string_template_spec.rb +15 -0
- data/spec/strategy/field/user_name_template_spec.rb +13 -0
- data/spec/strategy/field/whitelist_spec.rb +21 -0
- data/spec/support/customer_sample.rb +43 -0
- data/spec/utils/database_spec.rb +26 -0
- data/spec/utils/random_int_spec.rb +9 -0
- data/spec/utils/random_string_spec.rb +8 -0
- data/whitelist_dsl.rb +44 -0
- metadata +192 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class RandomFloatDelta
|
5
|
+
|
6
|
+
DEFAULT_DELTA = 10.0
|
7
|
+
|
8
|
+
def initialize delta = nil
|
9
|
+
@delta = delta || DEFAULT_DELTA
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
return range(field.value-@delta,field.value+@delta)
|
14
|
+
end
|
15
|
+
|
16
|
+
def range (min, max)
|
17
|
+
Random.new.rand * (max-min) + min
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomFullName
|
6
|
+
|
7
|
+
def initialize first_names = nil, last_names = nil
|
8
|
+
@first_name_anonymizer = DataAnon::Strategy::Field::RandomFirstName.new(first_names)
|
9
|
+
@last_name_anonymizer = DataAnon::Strategy::Field::RandomLastName.new(last_names)
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
|
14
|
+
name_words = field.value.split(' ')
|
15
|
+
|
16
|
+
anonymized_first_name = @first_name_anonymizer.anonymize(name_words[0])
|
17
|
+
anonymized_last_name = ""
|
18
|
+
for counter in (1..name_words.size-1)
|
19
|
+
anonymized_last_name = anonymized_last_name + " " + @last_name_anonymizer.anonymize(name_words[counter])
|
20
|
+
end
|
21
|
+
|
22
|
+
return anonymized_first_name + anonymized_last_name
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class RandomInt
|
7
|
+
|
8
|
+
def initialize from = 0, to = 100
|
9
|
+
@from = from
|
10
|
+
@to = to
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
def anonymize field
|
15
|
+
DataAnon::Utils::RandomInt.generate(@from,@to)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class RandomIntegerDelta
|
5
|
+
|
6
|
+
DEFAULT_DELTA = 10
|
7
|
+
|
8
|
+
def initialize delta = nil
|
9
|
+
@delta = delta || DEFAULT_DELTA
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def anonymize field
|
14
|
+
adjustment = DataAnon::Utils::RandomInt.generate(-@delta,@delta)
|
15
|
+
return field.value + adjustment
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomLastName
|
6
|
+
|
7
|
+
def initialize file_path = nil
|
8
|
+
file = file_path || DataAnon::Utils::Resource.file('last_names.txt')
|
9
|
+
@names = File.read(file).split
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
return @names[rand(@names.size)]
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomMailinatorEmail
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
@email_anonymizer = DataAnon::Strategy::Field::RandomEmail.new("mailinator","com")
|
9
|
+
end
|
10
|
+
|
11
|
+
def anonymize field
|
12
|
+
|
13
|
+
return @email_anonymizer.anonymize(field)
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class RandomPhoneNumber
|
7
|
+
|
8
|
+
def anonymize field
|
9
|
+
@original_phone_number = field.value
|
10
|
+
@anonymized_phone_number = ""
|
11
|
+
@original_phone_number.each_char do |char|
|
12
|
+
if /\d/.match(char).nil?
|
13
|
+
@anonymized_phone_number += char
|
14
|
+
else
|
15
|
+
@anonymized_phone_number += DataAnon::Utils::RandomInt.generate(0,9).to_s
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
@anonymized_phone_number
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class RandomSelection
|
7
|
+
|
8
|
+
def initialize values
|
9
|
+
@values = values.class == Array ? values : [values]
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
def anonymize field
|
14
|
+
return @values[0] if @values.length == 1
|
15
|
+
@values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class RandomString
|
5
|
+
|
6
|
+
def anonymize field
|
7
|
+
|
8
|
+
original_string = field.value
|
9
|
+
string_words = original_string.split(' ')
|
10
|
+
anonymized_string = ""
|
11
|
+
|
12
|
+
string_words.each do |word|
|
13
|
+
anonymized_string = anonymized_string + DataAnon::Utils::RandomString.generate(word.length) + " "
|
14
|
+
end
|
15
|
+
|
16
|
+
anonymized_string.strip
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class RandomUserName
|
6
|
+
|
7
|
+
DEFAULT_MIN_LENGTH = 5
|
8
|
+
DEFAULT_MAX_LENGTH = 10
|
9
|
+
|
10
|
+
def initialize min_length = DEFAULT_MIN_LENGTH, max_length = DEFAULT_MAX_LENGTH
|
11
|
+
@min_length = min_length
|
12
|
+
@max_length = max_length
|
13
|
+
end
|
14
|
+
|
15
|
+
def anonymize field
|
16
|
+
username_length = DataAnon::Utils::RandomInt.generate(@min_length,@max_length)
|
17
|
+
return DataAnon::Utils::RandomString.generate(username_length)
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class StringTemplate
|
7
|
+
|
8
|
+
def initialize template
|
9
|
+
@template = template
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
context = field.instance_eval { binding }
|
14
|
+
eval ('"' + @template + '"'), context
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class UserNameTemplate
|
7
|
+
|
8
|
+
def initialize template
|
9
|
+
@template = template
|
10
|
+
end
|
11
|
+
|
12
|
+
def anonymize field
|
13
|
+
context = field.instance_eval { binding }
|
14
|
+
eval ('"' + @template + '"'), context
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
class Whitelist < DataAnon::Strategy::Base
|
4
|
+
|
5
|
+
def process_record(index, record)
|
6
|
+
dest_record_map = {}
|
7
|
+
record.attributes.each do |field_name, field_value|
|
8
|
+
unless field_value.nil? || field_name.downcase == @primary_key.downcase
|
9
|
+
field = DataAnon::Core::Field.new(field_name, field_value, index, record)
|
10
|
+
field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
|
11
|
+
dest_record_map[field_name] = field_strategy.anonymize(field)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
dest_record = dest_table.new dest_record_map
|
15
|
+
dest_record[@primary_key] = record[@primary_key]
|
16
|
+
dest_record.save!
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/tasklib'
|
3
|
+
|
4
|
+
module DataAnonymization
|
5
|
+
class RakeTasks
|
6
|
+
include Rake::DSL if defined? Rake::DSL
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
desc "Task to build the clean empty destination database"
|
10
|
+
task :empty_dest do
|
11
|
+
system "rm sample-data/chinook-empty.sqlite"
|
12
|
+
system "sqlite3 sample-data/chinook-empty.sqlite < sample-data/chinook_schema.sql"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
module DataAnon
|
5
|
+
module Utils
|
6
|
+
|
7
|
+
class MassAssignmentIgnoreSanitizer < ActiveModel::MassAssignmentSecurity::Sanitizer
|
8
|
+
def process_removed_attributes(attrs)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
class TempDatabase < ActiveRecord::Base
|
13
|
+
self.abstract_class = true
|
14
|
+
end
|
15
|
+
|
16
|
+
class SourceDatabase < ActiveRecord::Base
|
17
|
+
self.abstract_class = true
|
18
|
+
end
|
19
|
+
|
20
|
+
class DestinationDatabase < ActiveRecord::Base
|
21
|
+
self.abstract_class = true
|
22
|
+
end
|
23
|
+
|
24
|
+
class BaseTable
|
25
|
+
|
26
|
+
def self.create_table table_name, primary_key, database
|
27
|
+
Class.new(database) do
|
28
|
+
self.table_name = table_name
|
29
|
+
self.primary_key = primary_key
|
30
|
+
self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
class SourceTable < BaseTable
|
37
|
+
|
38
|
+
def self.create table_name, primary_key = nil
|
39
|
+
create_table table_name, primary_key, SourceDatabase
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
class DestinationTable < BaseTable
|
45
|
+
|
46
|
+
def self.create table_name, primary_key = nil
|
47
|
+
create_table table_name, primary_key, DestinationDatabase
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'active_record'
|
2
|
+
require 'logger'
|
3
|
+
|
4
|
+
module DataAnon
|
5
|
+
module Utils
|
6
|
+
module Logging
|
7
|
+
|
8
|
+
def logger
|
9
|
+
@@logger ||= (self.logger = Logger.new(STDOUT) )
|
10
|
+
end
|
11
|
+
|
12
|
+
def logger= logger
|
13
|
+
@@logger = logger
|
14
|
+
ActiveRecord::Base.logger = logger
|
15
|
+
@@logger
|
16
|
+
end
|
17
|
+
|
18
|
+
def progress_logger
|
19
|
+
@@progress_logger ||= (self.progress_logger = Logger.new(STDOUT) )
|
20
|
+
end
|
21
|
+
|
22
|
+
def progress_logger= logger
|
23
|
+
logger.formatter = proc { |severity, datetime, progname, msg| msg }
|
24
|
+
@@progress_logger = logger
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|