data-anonymization 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -1
- data/.rvmrc +1 -1
- data/.travis.yml +2 -0
- data/Gemfile +2 -0
- data/README.md +295 -258
- data/bin/datanon +57 -0
- data/data-anonymization.gemspec +2 -1
- data/examples/blacklist_dsl.rb +42 -0
- data/examples/mongodb_blacklist_dsl.rb +38 -0
- data/examples/mongodb_whitelist_dsl.rb +44 -0
- data/examples/whitelist_dsl.rb +63 -0
- data/lib/core/database.rb +21 -3
- data/lib/core/field.rb +5 -2
- data/lib/core/fields_missing_strategy.rb +30 -0
- data/lib/core/table_errors.rb +32 -0
- data/lib/data-anonymization.rb +11 -0
- data/lib/parallel/table.rb +8 -1
- data/lib/strategy/base.rb +35 -14
- data/lib/strategy/blacklist.rb +1 -1
- data/lib/strategy/field/anonymize_array.rb +28 -0
- data/lib/strategy/field/contact/random_address.rb +12 -0
- data/lib/strategy/field/contact/random_city.rb +12 -0
- data/lib/strategy/field/contact/random_phone_number.rb +4 -0
- data/lib/strategy/field/contact/random_province.rb +12 -0
- data/lib/strategy/field/contact/random_zipcode.rb +12 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +19 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +19 -0
- data/lib/strategy/field/datetime/date_delta.rb +10 -0
- data/lib/strategy/field/datetime/date_time_delta.rb +9 -0
- data/lib/strategy/field/datetime/time_delta.rb +8 -0
- data/lib/strategy/field/default_anon.rb +4 -1
- data/lib/strategy/field/email/gmail_template.rb +8 -0
- data/lib/strategy/field/email/random_email.rb +7 -0
- data/lib/strategy/field/email/random_mailinator_email.rb +5 -0
- data/lib/strategy/field/fields.rb +4 -0
- data/lib/strategy/field/name/random_first_name.rb +10 -0
- data/lib/strategy/field/name/random_full_name.rb +10 -2
- data/lib/strategy/field/name/random_last_name.rb +9 -0
- data/lib/strategy/field/name/random_user_name.rb +5 -0
- data/lib/strategy/field/number/random_big_decimal_delta.rb +6 -0
- data/lib/strategy/field/number/random_float.rb +4 -0
- data/lib/strategy/field/number/random_float_delta.rb +6 -0
- data/lib/strategy/field/number/random_integer.rb +4 -0
- data/lib/strategy/field/number/random_integer_delta.rb +6 -0
- data/lib/strategy/field/string/formatted_string_numbers.rb +10 -6
- data/lib/strategy/field/string/lorem_ipsum.rb +9 -0
- data/lib/strategy/field/string/random_formatted_string.rb +39 -0
- data/lib/strategy/field/string/random_string.rb +6 -0
- data/lib/strategy/field/string/random_url.rb +7 -1
- data/lib/strategy/field/string/select_from_database.rb +7 -5
- data/lib/strategy/field/string/select_from_file.rb +7 -0
- data/lib/strategy/field/string/select_from_list.rb +8 -0
- data/lib/strategy/field/string/string_template.rb +11 -0
- data/lib/strategy/mongodb/anonymize_field.rb +44 -0
- data/lib/strategy/mongodb/blacklist.rb +29 -0
- data/lib/strategy/mongodb/whitelist.rb +62 -0
- data/lib/strategy/strategies.rb +10 -1
- data/lib/strategy/whitelist.rb +7 -2
- data/lib/thor/helpers/mongodb_dsl_generator.rb +66 -0
- data/lib/thor/helpers/rdbms_dsl_generator.rb +36 -0
- data/lib/thor/templates/mongodb_whitelist_template.erb +15 -0
- data/lib/thor/templates/whitelist_template.erb +21 -0
- data/lib/utils/database.rb +4 -0
- data/lib/utils/parallel_progress_bar.rb +24 -0
- data/lib/utils/progress_bar.rb +34 -22
- data/lib/utils/random_string.rb +3 -2
- data/lib/utils/random_string_chars_only.rb +3 -5
- data/lib/utils/template_helper.rb +44 -0
- data/lib/version.rb +1 -1
- data/spec/acceptance/mongodb_blacklist_spec.rb +75 -0
- data/spec/acceptance/mongodb_whitelist_spec.rb +107 -0
- data/spec/core/fields_missing_strategy_spec.rb +26 -0
- data/spec/strategy/field/name/random_first_name_spec.rb +1 -1
- data/spec/strategy/field/name/random_full_name_spec.rb +12 -7
- data/spec/strategy/field/name/random_last_name_spec.rb +1 -1
- data/spec/strategy/field/string/random_formatted_string_spec.rb +39 -0
- data/spec/strategy/field/string/select_from_file_spec.rb +21 -0
- data/spec/strategy/mongodb/anonymize_field_spec.rb +52 -0
- data/spec/utils/random_float_spec.rb +12 -0
- data/spec/utils/random_string_char_only_spec.rb +12 -0
- data/spec/utils/template_helper_spec.rb +14 -0
- metadata +56 -6
- data/blacklist_dsl.rb +0 -17
- data/blacklist_nosql_dsl.rb +0 -36
- data/whitelist_dsl.rb +0 -42
data/bin/datanon
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
require 'thor'
|
5
|
+
require 'data-anonymization'
|
6
|
+
|
7
|
+
class AnonymizationCLI < Thor
|
8
|
+
|
9
|
+
include Thor::Actions
|
10
|
+
|
11
|
+
desc "generate_rdbms_dsl", "Generates a base anonymization script(whitelist strategy) for a RDBMS database using the database schema"
|
12
|
+
|
13
|
+
def generate_rdbms_dsl
|
14
|
+
|
15
|
+
configuration_hash = {:adapter => options["adapter"],
|
16
|
+
:host => options["host"],
|
17
|
+
:port => options["port"],
|
18
|
+
:database => options["database"],
|
19
|
+
:username => options["username"],
|
20
|
+
:password => options["password"]
|
21
|
+
}
|
22
|
+
create_file "rdbms_whitelist_generated.rb"
|
23
|
+
DataAnon::ThorHelpers::RDBMSDSLGenerator.new.generate_whitelist_script(configuration_hash)
|
24
|
+
end
|
25
|
+
|
26
|
+
method_option :adapter, :required => true, :aliases => "-a", :desc => "Activerecord database adapter to be used [required]", :for => :generate_rdbms_dsl
|
27
|
+
method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_rdbms_dsl
|
28
|
+
method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_rdbms_dsl
|
29
|
+
method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port provided by AR will be used", :for => :generate_rdbms_dsl
|
30
|
+
method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_rdbms_dsl
|
31
|
+
method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_rdbms_dsl
|
32
|
+
|
33
|
+
desc "generate_mongo_dsl", "Generates a base anonymization script(whitelist strategy) for a Mongo DB using the database schema"
|
34
|
+
|
35
|
+
def generate_mongo_dsl
|
36
|
+
|
37
|
+
configuration_hash = {:host => options["host"],
|
38
|
+
:port => options["port"],
|
39
|
+
:database => options["database"],
|
40
|
+
:username => options["username"],
|
41
|
+
:password => options["password"]
|
42
|
+
}
|
43
|
+
|
44
|
+
create_file "mongodb_whitelist_generated.rb"
|
45
|
+
DataAnon::ThorHelpers::MongoDBDSLGenerator.new(configuration_hash, options["whitelist_patterns"]).generate
|
46
|
+
end
|
47
|
+
|
48
|
+
method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_mongo_dsl
|
49
|
+
method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_mongo_dsl
|
50
|
+
method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port will be used", :for => :generate_mongo_dsl
|
51
|
+
method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_mongo_dsl
|
52
|
+
method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_mongo_dsl
|
53
|
+
method_option :whitelist_patterns, :aliases => "-r", :desc => "Whitelist Patterns", :for => :generate_mongo_dsl
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
AnonymizationCLI.start
|
data/data-anonymization.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.homepage = "http://sunitparekh.github.com/data-anonymization"
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split($/).select { |f| !f.match(/^sample-data/) }
|
16
|
-
gem.executables =
|
16
|
+
gem.executables = "datanon"
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
@@ -24,4 +24,5 @@ Gem::Specification.new do |gem|
|
|
24
24
|
gem.add_dependency('rgeo-geojson', '~> 0.2.3')
|
25
25
|
gem.add_dependency('powerbar', '~> 1.0.8')
|
26
26
|
gem.add_dependency('parallel', '~> 0.5.18')
|
27
|
+
gem.add_dependency('thor', '~> 0.16.0')
|
27
28
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
system "bundle exec ruby whitelist_dsl.rb"
|
2
|
+
|
3
|
+
require 'data-anonymization'
|
4
|
+
|
5
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
6
|
+
|
7
|
+
database 'Chinook' do
|
8
|
+
strategy DataAnon::Strategy::Blacklist
|
9
|
+
source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
|
10
|
+
|
11
|
+
table 'Employee' do
|
12
|
+
primary_key 'EmployeeId'
|
13
|
+
anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
|
14
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
15
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
16
|
+
anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
|
17
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
18
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
19
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
20
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
21
|
+
anonymize('Country') {|field| "USA" }
|
22
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
23
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
24
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
25
|
+
end
|
26
|
+
|
27
|
+
table 'Customer' do
|
28
|
+
primary_key 'CustomerId'
|
29
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
30
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
31
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
32
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
33
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
34
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
35
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
36
|
+
anonymize('Country') {|field| "USA" }
|
37
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
38
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'data-anonymization'
|
2
|
+
|
3
|
+
require 'mongo'
|
4
|
+
Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
|
5
|
+
system "mongoimport -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
|
6
|
+
system "mongoimport -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
|
7
|
+
|
8
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
9
|
+
|
10
|
+
database 'test' do
|
11
|
+
strategy DataAnon::Strategy::MongoDB::Blacklist
|
12
|
+
source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
|
13
|
+
|
14
|
+
collection 'users' do
|
15
|
+
anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
|
16
|
+
anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
|
17
|
+
anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
|
18
|
+
anonymize('password') { |field| "password" }
|
19
|
+
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
20
|
+
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
21
|
+
end
|
22
|
+
|
23
|
+
collection 'plans' do
|
24
|
+
anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
|
25
|
+
anonymize 'public_sharing','photo_sharing'
|
26
|
+
|
27
|
+
collection 'features' do
|
28
|
+
anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
|
29
|
+
|
30
|
+
document 'users' do
|
31
|
+
anonymize 'max', 'additional'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'data-anonymization'
|
2
|
+
|
3
|
+
require 'mongo'
|
4
|
+
Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
|
5
|
+
Mongo::Connection.from_uri("mongodb://localhost/dest").drop_database('dest')
|
6
|
+
system "mongoimport -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
|
7
|
+
system "mongoimport -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
|
8
|
+
|
9
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
10
|
+
|
11
|
+
database 'test' do
|
12
|
+
strategy DataAnon::Strategy::MongoDB::Whitelist
|
13
|
+
source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
|
14
|
+
destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest'
|
15
|
+
|
16
|
+
collection 'users' do
|
17
|
+
whitelist '_id','failed_attempts','updated_at'
|
18
|
+
anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
|
19
|
+
anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
|
20
|
+
anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
|
21
|
+
anonymize('password') { |field| "password" }
|
22
|
+
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
23
|
+
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
24
|
+
anonymize 'password_reset_answer','password_reset_question'
|
25
|
+
end
|
26
|
+
|
27
|
+
collection 'plans' do
|
28
|
+
whitelist '_id', 'name','term', 'created_at'
|
29
|
+
anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
|
30
|
+
anonymize 'public_sharing','photo_sharing'
|
31
|
+
|
32
|
+
collection 'features' do
|
33
|
+
anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
|
34
|
+
whitelist 'type'
|
35
|
+
|
36
|
+
document 'users' do
|
37
|
+
anonymize 'max', 'additional'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
system "rake empty_dest" # clean destination database on every call
|
2
|
+
|
3
|
+
require 'data-anonymization'
|
4
|
+
|
5
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
6
|
+
|
7
|
+
database 'Chinook' do
|
8
|
+
strategy DataAnon::Strategy::Whitelist
|
9
|
+
source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
|
10
|
+
destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
|
11
|
+
|
12
|
+
default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
|
13
|
+
|
14
|
+
table 'Genre' do
|
15
|
+
primary_key 'GenreId'
|
16
|
+
whitelist 'GenreId'
|
17
|
+
anonymize 'Name' do |field|
|
18
|
+
field.value + " test"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
table 'MediaType' do
|
23
|
+
primary_key 'MediaTypeId'
|
24
|
+
anonymize('MediaTypeId') { |field| field.value } # same as whitelist
|
25
|
+
anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
table 'Employee' do
|
30
|
+
primary_key 'EmployeeId'
|
31
|
+
whitelist 'EmployeeId', 'ReportsTo', 'Title'
|
32
|
+
anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
|
33
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
34
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
35
|
+
anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
|
36
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
37
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
38
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
39
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
40
|
+
anonymize('Country') {|field| "USA" }
|
41
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
42
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
43
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
44
|
+
end
|
45
|
+
|
46
|
+
table 'Customer' do
|
47
|
+
primary_key 'CustomerId'
|
48
|
+
whitelist 'SupportRepId', 'Company'
|
49
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
50
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
51
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
52
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
53
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
54
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
55
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
56
|
+
anonymize('Country') {|field| "USA" }
|
57
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
58
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
end
|
63
|
+
|
data/lib/core/database.rb
CHANGED
@@ -2,6 +2,7 @@ module DataAnon
|
|
2
2
|
module Core
|
3
3
|
|
4
4
|
class Database
|
5
|
+
include Utils::Logging
|
5
6
|
|
6
7
|
def initialize name
|
7
8
|
@name = name
|
@@ -35,18 +36,35 @@ module DataAnon
|
|
35
36
|
|
36
37
|
def table (name, &block)
|
37
38
|
table = @strategy.new(@source_database, @destination_database, name, @user_defaults).process_fields(&block)
|
38
|
-
@tables<< table
|
39
|
+
@tables << table
|
39
40
|
end
|
41
|
+
alias :collection :table
|
40
42
|
|
41
43
|
def anonymize
|
42
|
-
|
44
|
+
begin
|
45
|
+
@execution_strategy.new.anonymize @tables
|
46
|
+
rescue => e
|
47
|
+
logger.error "\n#{e.message} \n #{e.backtrace}"
|
48
|
+
end
|
49
|
+
if @strategy.whitelist?
|
50
|
+
logger.info("Fields missing the anonymization strategy")
|
51
|
+
@tables.each { |table| table.fields_missing_strategy.print }
|
52
|
+
end
|
53
|
+
|
54
|
+
@tables.each { |table| table.errors.print }
|
43
55
|
end
|
44
56
|
|
45
57
|
end
|
46
58
|
|
47
59
|
class Sequential
|
48
60
|
def anonymize tables
|
49
|
-
tables.each
|
61
|
+
tables.each do |table|
|
62
|
+
begin
|
63
|
+
table.process
|
64
|
+
rescue => e
|
65
|
+
logger.error "\n#{e.message} \n #{e.backtrace}"
|
66
|
+
end
|
67
|
+
end
|
50
68
|
end
|
51
69
|
end
|
52
70
|
|
data/lib/core/field.rb
CHANGED
@@ -3,14 +3,17 @@ module DataAnon
|
|
3
3
|
|
4
4
|
class Field
|
5
5
|
|
6
|
-
def initialize name, value, row_number, ar_record
|
6
|
+
def initialize name, value, row_number, ar_record, table_name = "unknown"
|
7
7
|
@name = name
|
8
8
|
@value = value
|
9
9
|
@row_number = row_number
|
10
10
|
@ar_record = ar_record
|
11
|
+
@table_name = table_name
|
11
12
|
end
|
12
13
|
|
13
|
-
attr_accessor :name, :value, :row_number, :ar_record
|
14
|
+
attr_accessor :name, :value, :row_number, :ar_record, :table_name
|
15
|
+
|
16
|
+
alias :collection_name :table_name
|
14
17
|
|
15
18
|
end
|
16
19
|
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Core
|
3
|
+
|
4
|
+
class FieldsMissingStrategy
|
5
|
+
include Utils::Logging
|
6
|
+
|
7
|
+
def initialize table_name
|
8
|
+
@table_name = table_name
|
9
|
+
@fields_missing_strategy = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def missing field_name
|
13
|
+
return if @fields_missing_strategy.include? field_name
|
14
|
+
@fields_missing_strategy << field_name
|
15
|
+
end
|
16
|
+
|
17
|
+
def fields_missing_strategy
|
18
|
+
@fields_missing_strategy
|
19
|
+
end
|
20
|
+
|
21
|
+
def print
|
22
|
+
@fields_missing_strategy.each do |field_name|
|
23
|
+
logger.info("#{@table_name}.#{field_name}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Core
|
3
|
+
|
4
|
+
class TableErrors
|
5
|
+
include Utils::Logging
|
6
|
+
|
7
|
+
def initialize table_name
|
8
|
+
@table_name = table_name
|
9
|
+
@errors = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def log_error record, exception
|
13
|
+
@errors << { :record => record, :exception => exception}
|
14
|
+
raise "Reached limit of error for a table" if @errors.length > 100
|
15
|
+
end
|
16
|
+
|
17
|
+
def errors
|
18
|
+
@errors
|
19
|
+
end
|
20
|
+
|
21
|
+
def print
|
22
|
+
return if @errors.length == 0
|
23
|
+
logger.error("Errors while processing table #{@table_name}:")
|
24
|
+
@errors.each do |error|
|
25
|
+
logger.error(error[:exception])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
data/lib/data-anonymization.rb
CHANGED
@@ -7,11 +7,22 @@ require "utils/random_string"
|
|
7
7
|
require "utils/random_string_chars_only"
|
8
8
|
require "utils/geojson_parser"
|
9
9
|
require "utils/progress_bar"
|
10
|
+
require "utils/parallel_progress_bar"
|
10
11
|
require "utils/resource"
|
12
|
+
require "utils/template_helper"
|
11
13
|
require "parallel/table"
|
12
14
|
require "core/database"
|
15
|
+
require "core/fields_missing_strategy"
|
16
|
+
require "thor/helpers/rdbms_dsl_generator"
|
13
17
|
require "core/field"
|
18
|
+
require "core/table_errors"
|
14
19
|
require "strategy/strategies"
|
15
20
|
require "utils/database"
|
16
21
|
require "core/dsl"
|
17
22
|
|
23
|
+
begin
|
24
|
+
require 'mongo'
|
25
|
+
require "thor/helpers/mongodb_dsl_generator"
|
26
|
+
rescue LoadError
|
27
|
+
"Ignoring the mongodb specific libraries if monog driver is not specified in gem"
|
28
|
+
end
|
data/lib/parallel/table.rb
CHANGED
@@ -5,7 +5,14 @@ module DataAnon
|
|
5
5
|
class Table
|
6
6
|
|
7
7
|
def anonymize tables
|
8
|
-
::Parallel.each(tables)
|
8
|
+
::Parallel.each(tables) do |table|
|
9
|
+
begin
|
10
|
+
table.progress_bar_class DataAnon::Utils::ParallelProgressBar
|
11
|
+
table.process
|
12
|
+
rescue => e
|
13
|
+
logger.error "\n#{e.message} \n #{e.backtrace}"
|
14
|
+
end
|
15
|
+
end
|
9
16
|
end
|
10
17
|
|
11
18
|
end
|
data/lib/strategy/base.rb
CHANGED
@@ -3,12 +3,20 @@ module DataAnon
|
|
3
3
|
class Base
|
4
4
|
include Utils::Logging
|
5
5
|
|
6
|
+
attr_accessor :fields, :user_strategies, :fields_missing_strategy, :errors
|
7
|
+
|
6
8
|
def initialize source_database, destination_database, name, user_strategies
|
7
9
|
@name = name
|
8
10
|
@user_strategies = user_strategies
|
9
11
|
@fields = {}
|
10
12
|
@source_database = source_database
|
11
13
|
@destination_database = destination_database
|
14
|
+
@fields_missing_strategy = DataAnon::Core::FieldsMissingStrategy.new name
|
15
|
+
@errors = DataAnon::Core::TableErrors.new(@name)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.whitelist?
|
19
|
+
false
|
12
20
|
end
|
13
21
|
|
14
22
|
def process_fields &block
|
@@ -20,19 +28,10 @@ module DataAnon
|
|
20
28
|
@primary_keys = fields
|
21
29
|
end
|
22
30
|
|
23
|
-
def is_primary_key? field
|
24
|
-
@primary_keys.select { |key| field.downcase == key.downcase }.length > 0
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
31
|
def whitelist *fields
|
29
32
|
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
|
30
33
|
end
|
31
34
|
|
32
|
-
def fields
|
33
|
-
@fields
|
34
|
-
end
|
35
|
-
|
36
35
|
def anonymize *fields, &block
|
37
36
|
if block.nil?
|
38
37
|
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
|
@@ -49,6 +48,15 @@ module DataAnon
|
|
49
48
|
end
|
50
49
|
end
|
51
50
|
|
51
|
+
def is_primary_key? field
|
52
|
+
@primary_keys.select { |key| field.downcase == key.downcase }.length > 0
|
53
|
+
end
|
54
|
+
|
55
|
+
def default_strategy field_name
|
56
|
+
@fields_missing_strategy.missing field_name
|
57
|
+
DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
|
58
|
+
end
|
59
|
+
|
52
60
|
def dest_table
|
53
61
|
return @dest_table unless @dest_table.nil?
|
54
62
|
DataAnon::Utils::DestinationDatabase.establish_connection @destination_database if @destination_database
|
@@ -65,17 +73,30 @@ module DataAnon
|
|
65
73
|
logger.debug "Processing table #{@name} with fields strategies #{@fields}"
|
66
74
|
total = source_table.count
|
67
75
|
if total > 0
|
68
|
-
index =
|
69
|
-
|
76
|
+
index = 0
|
77
|
+
progress = progress_bar.new(@name, total)
|
70
78
|
source_table.all.each do |record|
|
71
|
-
process_record index, record
|
72
79
|
index += 1
|
73
|
-
|
80
|
+
begin
|
81
|
+
process_record index, record
|
82
|
+
rescue => exception
|
83
|
+
@errors.log_error record, exception
|
84
|
+
end
|
85
|
+
progress.show index
|
74
86
|
end
|
75
|
-
|
87
|
+
progress.close
|
76
88
|
end
|
77
89
|
end
|
78
90
|
|
91
|
+
def progress_bar
|
92
|
+
@progress_bar || DataAnon::Utils::ProgressBar
|
93
|
+
end
|
94
|
+
|
95
|
+
def progress_bar_class progress_bar
|
96
|
+
@progress_bar = progress_bar
|
97
|
+
end
|
98
|
+
|
99
|
+
|
79
100
|
end
|
80
101
|
end
|
81
102
|
end
|