data-anonymization 0.3.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -1
- data/.rvmrc +1 -1
- data/.travis.yml +2 -0
- data/Gemfile +2 -0
- data/README.md +295 -258
- data/bin/datanon +57 -0
- data/data-anonymization.gemspec +2 -1
- data/examples/blacklist_dsl.rb +42 -0
- data/examples/mongodb_blacklist_dsl.rb +38 -0
- data/examples/mongodb_whitelist_dsl.rb +44 -0
- data/examples/whitelist_dsl.rb +63 -0
- data/lib/core/database.rb +21 -3
- data/lib/core/field.rb +5 -2
- data/lib/core/fields_missing_strategy.rb +30 -0
- data/lib/core/table_errors.rb +32 -0
- data/lib/data-anonymization.rb +11 -0
- data/lib/parallel/table.rb +8 -1
- data/lib/strategy/base.rb +35 -14
- data/lib/strategy/blacklist.rb +1 -1
- data/lib/strategy/field/anonymize_array.rb +28 -0
- data/lib/strategy/field/contact/random_address.rb +12 -0
- data/lib/strategy/field/contact/random_city.rb +12 -0
- data/lib/strategy/field/contact/random_phone_number.rb +4 -0
- data/lib/strategy/field/contact/random_province.rb +12 -0
- data/lib/strategy/field/contact/random_zipcode.rb +12 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +19 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +19 -0
- data/lib/strategy/field/datetime/date_delta.rb +10 -0
- data/lib/strategy/field/datetime/date_time_delta.rb +9 -0
- data/lib/strategy/field/datetime/time_delta.rb +8 -0
- data/lib/strategy/field/default_anon.rb +4 -1
- data/lib/strategy/field/email/gmail_template.rb +8 -0
- data/lib/strategy/field/email/random_email.rb +7 -0
- data/lib/strategy/field/email/random_mailinator_email.rb +5 -0
- data/lib/strategy/field/fields.rb +4 -0
- data/lib/strategy/field/name/random_first_name.rb +10 -0
- data/lib/strategy/field/name/random_full_name.rb +10 -2
- data/lib/strategy/field/name/random_last_name.rb +9 -0
- data/lib/strategy/field/name/random_user_name.rb +5 -0
- data/lib/strategy/field/number/random_big_decimal_delta.rb +6 -0
- data/lib/strategy/field/number/random_float.rb +4 -0
- data/lib/strategy/field/number/random_float_delta.rb +6 -0
- data/lib/strategy/field/number/random_integer.rb +4 -0
- data/lib/strategy/field/number/random_integer_delta.rb +6 -0
- data/lib/strategy/field/string/formatted_string_numbers.rb +10 -6
- data/lib/strategy/field/string/lorem_ipsum.rb +9 -0
- data/lib/strategy/field/string/random_formatted_string.rb +39 -0
- data/lib/strategy/field/string/random_string.rb +6 -0
- data/lib/strategy/field/string/random_url.rb +7 -1
- data/lib/strategy/field/string/select_from_database.rb +7 -5
- data/lib/strategy/field/string/select_from_file.rb +7 -0
- data/lib/strategy/field/string/select_from_list.rb +8 -0
- data/lib/strategy/field/string/string_template.rb +11 -0
- data/lib/strategy/mongodb/anonymize_field.rb +44 -0
- data/lib/strategy/mongodb/blacklist.rb +29 -0
- data/lib/strategy/mongodb/whitelist.rb +62 -0
- data/lib/strategy/strategies.rb +10 -1
- data/lib/strategy/whitelist.rb +7 -2
- data/lib/thor/helpers/mongodb_dsl_generator.rb +66 -0
- data/lib/thor/helpers/rdbms_dsl_generator.rb +36 -0
- data/lib/thor/templates/mongodb_whitelist_template.erb +15 -0
- data/lib/thor/templates/whitelist_template.erb +21 -0
- data/lib/utils/database.rb +4 -0
- data/lib/utils/parallel_progress_bar.rb +24 -0
- data/lib/utils/progress_bar.rb +34 -22
- data/lib/utils/random_string.rb +3 -2
- data/lib/utils/random_string_chars_only.rb +3 -5
- data/lib/utils/template_helper.rb +44 -0
- data/lib/version.rb +1 -1
- data/spec/acceptance/mongodb_blacklist_spec.rb +75 -0
- data/spec/acceptance/mongodb_whitelist_spec.rb +107 -0
- data/spec/core/fields_missing_strategy_spec.rb +26 -0
- data/spec/strategy/field/name/random_first_name_spec.rb +1 -1
- data/spec/strategy/field/name/random_full_name_spec.rb +12 -7
- data/spec/strategy/field/name/random_last_name_spec.rb +1 -1
- data/spec/strategy/field/string/random_formatted_string_spec.rb +39 -0
- data/spec/strategy/field/string/select_from_file_spec.rb +21 -0
- data/spec/strategy/mongodb/anonymize_field_spec.rb +52 -0
- data/spec/utils/random_float_spec.rb +12 -0
- data/spec/utils/random_string_char_only_spec.rb +12 -0
- data/spec/utils/template_helper_spec.rb +14 -0
- metadata +56 -6
- data/blacklist_dsl.rb +0 -17
- data/blacklist_nosql_dsl.rb +0 -36
- data/whitelist_dsl.rb +0 -42
data/bin/datanon
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
require 'thor'
|
5
|
+
require 'data-anonymization'
|
6
|
+
|
7
|
+
class AnonymizationCLI < Thor
|
8
|
+
|
9
|
+
include Thor::Actions
|
10
|
+
|
11
|
+
desc "generate_rdbms_dsl", "Generates a base anonymization script(whitelist strategy) for a RDBMS database using the database schema"
|
12
|
+
|
13
|
+
def generate_rdbms_dsl
|
14
|
+
|
15
|
+
configuration_hash = {:adapter => options["adapter"],
|
16
|
+
:host => options["host"],
|
17
|
+
:port => options["port"],
|
18
|
+
:database => options["database"],
|
19
|
+
:username => options["username"],
|
20
|
+
:password => options["password"]
|
21
|
+
}
|
22
|
+
create_file "rdbms_whitelist_generated.rb"
|
23
|
+
DataAnon::ThorHelpers::RDBMSDSLGenerator.new.generate_whitelist_script(configuration_hash)
|
24
|
+
end
|
25
|
+
|
26
|
+
method_option :adapter, :required => true, :aliases => "-a", :desc => "Activerecord database adapter to be used [required]", :for => :generate_rdbms_dsl
|
27
|
+
method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_rdbms_dsl
|
28
|
+
method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_rdbms_dsl
|
29
|
+
method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port provided by AR will be used", :for => :generate_rdbms_dsl
|
30
|
+
method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_rdbms_dsl
|
31
|
+
method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_rdbms_dsl
|
32
|
+
|
33
|
+
desc "generate_mongo_dsl", "Generates a base anonymization script(whitelist strategy) for a Mongo DB using the database schema"
|
34
|
+
|
35
|
+
def generate_mongo_dsl
|
36
|
+
|
37
|
+
configuration_hash = {:host => options["host"],
|
38
|
+
:port => options["port"],
|
39
|
+
:database => options["database"],
|
40
|
+
:username => options["username"],
|
41
|
+
:password => options["password"]
|
42
|
+
}
|
43
|
+
|
44
|
+
create_file "mongodb_whitelist_generated.rb"
|
45
|
+
DataAnon::ThorHelpers::MongoDBDSLGenerator.new(configuration_hash, options["whitelist_patterns"]).generate
|
46
|
+
end
|
47
|
+
|
48
|
+
method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_mongo_dsl
|
49
|
+
method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_mongo_dsl
|
50
|
+
method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port will be used", :for => :generate_mongo_dsl
|
51
|
+
method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_mongo_dsl
|
52
|
+
method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_mongo_dsl
|
53
|
+
method_option :whitelist_patterns, :aliases => "-r", :desc => "Whitelist Patterns", :for => :generate_mongo_dsl
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
AnonymizationCLI.start
|
data/data-anonymization.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
|
|
13
13
|
gem.homepage = "http://sunitparekh.github.com/data-anonymization"
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split($/).select { |f| !f.match(/^sample-data/) }
|
16
|
-
gem.executables =
|
16
|
+
gem.executables = "datanon"
|
17
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
@@ -24,4 +24,5 @@ Gem::Specification.new do |gem|
|
|
24
24
|
gem.add_dependency('rgeo-geojson', '~> 0.2.3')
|
25
25
|
gem.add_dependency('powerbar', '~> 1.0.8')
|
26
26
|
gem.add_dependency('parallel', '~> 0.5.18')
|
27
|
+
gem.add_dependency('thor', '~> 0.16.0')
|
27
28
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
system "bundle exec ruby whitelist_dsl.rb"
|
2
|
+
|
3
|
+
require 'data-anonymization'
|
4
|
+
|
5
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
6
|
+
|
7
|
+
database 'Chinook' do
|
8
|
+
strategy DataAnon::Strategy::Blacklist
|
9
|
+
source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
|
10
|
+
|
11
|
+
table 'Employee' do
|
12
|
+
primary_key 'EmployeeId'
|
13
|
+
anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
|
14
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
15
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
16
|
+
anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
|
17
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
18
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
19
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
20
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
21
|
+
anonymize('Country') {|field| "USA" }
|
22
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
23
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
24
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
25
|
+
end
|
26
|
+
|
27
|
+
table 'Customer' do
|
28
|
+
primary_key 'CustomerId'
|
29
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
30
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
31
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
32
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
33
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
34
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
35
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
36
|
+
anonymize('Country') {|field| "USA" }
|
37
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
38
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'data-anonymization'
|
2
|
+
|
3
|
+
require 'mongo'
|
4
|
+
Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
|
5
|
+
system "mongoimport -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
|
6
|
+
system "mongoimport -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
|
7
|
+
|
8
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
9
|
+
|
10
|
+
database 'test' do
|
11
|
+
strategy DataAnon::Strategy::MongoDB::Blacklist
|
12
|
+
source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
|
13
|
+
|
14
|
+
collection 'users' do
|
15
|
+
anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
|
16
|
+
anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
|
17
|
+
anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
|
18
|
+
anonymize('password') { |field| "password" }
|
19
|
+
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
20
|
+
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
21
|
+
end
|
22
|
+
|
23
|
+
collection 'plans' do
|
24
|
+
anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
|
25
|
+
anonymize 'public_sharing','photo_sharing'
|
26
|
+
|
27
|
+
collection 'features' do
|
28
|
+
anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
|
29
|
+
|
30
|
+
document 'users' do
|
31
|
+
anonymize 'max', 'additional'
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'data-anonymization'
|
2
|
+
|
3
|
+
require 'mongo'
|
4
|
+
Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
|
5
|
+
Mongo::Connection.from_uri("mongodb://localhost/dest").drop_database('dest')
|
6
|
+
system "mongoimport -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
|
7
|
+
system "mongoimport -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
|
8
|
+
|
9
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
10
|
+
|
11
|
+
database 'test' do
|
12
|
+
strategy DataAnon::Strategy::MongoDB::Whitelist
|
13
|
+
source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
|
14
|
+
destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest'
|
15
|
+
|
16
|
+
collection 'users' do
|
17
|
+
whitelist '_id','failed_attempts','updated_at'
|
18
|
+
anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
|
19
|
+
anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
|
20
|
+
anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
|
21
|
+
anonymize('password') { |field| "password" }
|
22
|
+
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
23
|
+
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
24
|
+
anonymize 'password_reset_answer','password_reset_question'
|
25
|
+
end
|
26
|
+
|
27
|
+
collection 'plans' do
|
28
|
+
whitelist '_id', 'name','term', 'created_at'
|
29
|
+
anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
|
30
|
+
anonymize 'public_sharing','photo_sharing'
|
31
|
+
|
32
|
+
collection 'features' do
|
33
|
+
anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
|
34
|
+
whitelist 'type'
|
35
|
+
|
36
|
+
document 'users' do
|
37
|
+
anonymize 'max', 'additional'
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
system "rake empty_dest" # clean destination database on every call
|
2
|
+
|
3
|
+
require 'data-anonymization'
|
4
|
+
|
5
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
6
|
+
|
7
|
+
database 'Chinook' do
|
8
|
+
strategy DataAnon::Strategy::Whitelist
|
9
|
+
source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
|
10
|
+
destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
|
11
|
+
|
12
|
+
default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
|
13
|
+
|
14
|
+
table 'Genre' do
|
15
|
+
primary_key 'GenreId'
|
16
|
+
whitelist 'GenreId'
|
17
|
+
anonymize 'Name' do |field|
|
18
|
+
field.value + " test"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
table 'MediaType' do
|
23
|
+
primary_key 'MediaTypeId'
|
24
|
+
anonymize('MediaTypeId') { |field| field.value } # same as whitelist
|
25
|
+
anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
table 'Employee' do
|
30
|
+
primary_key 'EmployeeId'
|
31
|
+
whitelist 'EmployeeId', 'ReportsTo', 'Title'
|
32
|
+
anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
|
33
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
34
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
35
|
+
anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
|
36
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
37
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
38
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
39
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
40
|
+
anonymize('Country') {|field| "USA" }
|
41
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
42
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
43
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
44
|
+
end
|
45
|
+
|
46
|
+
table 'Customer' do
|
47
|
+
primary_key 'CustomerId'
|
48
|
+
whitelist 'SupportRepId', 'Company'
|
49
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
50
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
51
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
52
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
53
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
54
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
55
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
56
|
+
anonymize('Country') {|field| "USA" }
|
57
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
58
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
end
|
63
|
+
|
data/lib/core/database.rb
CHANGED
@@ -2,6 +2,7 @@ module DataAnon
|
|
2
2
|
module Core
|
3
3
|
|
4
4
|
class Database
|
5
|
+
include Utils::Logging
|
5
6
|
|
6
7
|
def initialize name
|
7
8
|
@name = name
|
@@ -35,18 +36,35 @@ module DataAnon
|
|
35
36
|
|
36
37
|
def table (name, &block)
|
37
38
|
table = @strategy.new(@source_database, @destination_database, name, @user_defaults).process_fields(&block)
|
38
|
-
@tables<< table
|
39
|
+
@tables << table
|
39
40
|
end
|
41
|
+
alias :collection :table
|
40
42
|
|
41
43
|
def anonymize
|
42
|
-
|
44
|
+
begin
|
45
|
+
@execution_strategy.new.anonymize @tables
|
46
|
+
rescue => e
|
47
|
+
logger.error "\n#{e.message} \n #{e.backtrace}"
|
48
|
+
end
|
49
|
+
if @strategy.whitelist?
|
50
|
+
logger.info("Fields missing the anonymization strategy")
|
51
|
+
@tables.each { |table| table.fields_missing_strategy.print }
|
52
|
+
end
|
53
|
+
|
54
|
+
@tables.each { |table| table.errors.print }
|
43
55
|
end
|
44
56
|
|
45
57
|
end
|
46
58
|
|
47
59
|
class Sequential
|
48
60
|
def anonymize tables
|
49
|
-
tables.each
|
61
|
+
tables.each do |table|
|
62
|
+
begin
|
63
|
+
table.process
|
64
|
+
rescue => e
|
65
|
+
logger.error "\n#{e.message} \n #{e.backtrace}"
|
66
|
+
end
|
67
|
+
end
|
50
68
|
end
|
51
69
|
end
|
52
70
|
|
data/lib/core/field.rb
CHANGED
@@ -3,14 +3,17 @@ module DataAnon
|
|
3
3
|
|
4
4
|
class Field
|
5
5
|
|
6
|
-
def initialize name, value, row_number, ar_record
|
6
|
+
def initialize name, value, row_number, ar_record, table_name = "unknown"
|
7
7
|
@name = name
|
8
8
|
@value = value
|
9
9
|
@row_number = row_number
|
10
10
|
@ar_record = ar_record
|
11
|
+
@table_name = table_name
|
11
12
|
end
|
12
13
|
|
13
|
-
attr_accessor :name, :value, :row_number, :ar_record
|
14
|
+
attr_accessor :name, :value, :row_number, :ar_record, :table_name
|
15
|
+
|
16
|
+
alias :collection_name :table_name
|
14
17
|
|
15
18
|
end
|
16
19
|
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Core
|
3
|
+
|
4
|
+
class FieldsMissingStrategy
|
5
|
+
include Utils::Logging
|
6
|
+
|
7
|
+
def initialize table_name
|
8
|
+
@table_name = table_name
|
9
|
+
@fields_missing_strategy = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def missing field_name
|
13
|
+
return if @fields_missing_strategy.include? field_name
|
14
|
+
@fields_missing_strategy << field_name
|
15
|
+
end
|
16
|
+
|
17
|
+
def fields_missing_strategy
|
18
|
+
@fields_missing_strategy
|
19
|
+
end
|
20
|
+
|
21
|
+
def print
|
22
|
+
@fields_missing_strategy.each do |field_name|
|
23
|
+
logger.info("#{@table_name}.#{field_name}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Core
|
3
|
+
|
4
|
+
class TableErrors
|
5
|
+
include Utils::Logging
|
6
|
+
|
7
|
+
def initialize table_name
|
8
|
+
@table_name = table_name
|
9
|
+
@errors = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def log_error record, exception
|
13
|
+
@errors << { :record => record, :exception => exception}
|
14
|
+
raise "Reached limit of error for a table" if @errors.length > 100
|
15
|
+
end
|
16
|
+
|
17
|
+
def errors
|
18
|
+
@errors
|
19
|
+
end
|
20
|
+
|
21
|
+
def print
|
22
|
+
return if @errors.length == 0
|
23
|
+
logger.error("Errors while processing table #{@table_name}:")
|
24
|
+
@errors.each do |error|
|
25
|
+
logger.error(error[:exception])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
data/lib/data-anonymization.rb
CHANGED
@@ -7,11 +7,22 @@ require "utils/random_string"
|
|
7
7
|
require "utils/random_string_chars_only"
|
8
8
|
require "utils/geojson_parser"
|
9
9
|
require "utils/progress_bar"
|
10
|
+
require "utils/parallel_progress_bar"
|
10
11
|
require "utils/resource"
|
12
|
+
require "utils/template_helper"
|
11
13
|
require "parallel/table"
|
12
14
|
require "core/database"
|
15
|
+
require "core/fields_missing_strategy"
|
16
|
+
require "thor/helpers/rdbms_dsl_generator"
|
13
17
|
require "core/field"
|
18
|
+
require "core/table_errors"
|
14
19
|
require "strategy/strategies"
|
15
20
|
require "utils/database"
|
16
21
|
require "core/dsl"
|
17
22
|
|
23
|
+
begin
|
24
|
+
require 'mongo'
|
25
|
+
require "thor/helpers/mongodb_dsl_generator"
|
26
|
+
rescue LoadError
|
27
|
+
"Ignoring the mongodb specific libraries if monog driver is not specified in gem"
|
28
|
+
end
|
data/lib/parallel/table.rb
CHANGED
@@ -5,7 +5,14 @@ module DataAnon
|
|
5
5
|
class Table
|
6
6
|
|
7
7
|
def anonymize tables
|
8
|
-
::Parallel.each(tables)
|
8
|
+
::Parallel.each(tables) do |table|
|
9
|
+
begin
|
10
|
+
table.progress_bar_class DataAnon::Utils::ParallelProgressBar
|
11
|
+
table.process
|
12
|
+
rescue => e
|
13
|
+
logger.error "\n#{e.message} \n #{e.backtrace}"
|
14
|
+
end
|
15
|
+
end
|
9
16
|
end
|
10
17
|
|
11
18
|
end
|
data/lib/strategy/base.rb
CHANGED
@@ -3,12 +3,20 @@ module DataAnon
|
|
3
3
|
class Base
|
4
4
|
include Utils::Logging
|
5
5
|
|
6
|
+
attr_accessor :fields, :user_strategies, :fields_missing_strategy, :errors
|
7
|
+
|
6
8
|
def initialize source_database, destination_database, name, user_strategies
|
7
9
|
@name = name
|
8
10
|
@user_strategies = user_strategies
|
9
11
|
@fields = {}
|
10
12
|
@source_database = source_database
|
11
13
|
@destination_database = destination_database
|
14
|
+
@fields_missing_strategy = DataAnon::Core::FieldsMissingStrategy.new name
|
15
|
+
@errors = DataAnon::Core::TableErrors.new(@name)
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.whitelist?
|
19
|
+
false
|
12
20
|
end
|
13
21
|
|
14
22
|
def process_fields &block
|
@@ -20,19 +28,10 @@ module DataAnon
|
|
20
28
|
@primary_keys = fields
|
21
29
|
end
|
22
30
|
|
23
|
-
def is_primary_key? field
|
24
|
-
@primary_keys.select { |key| field.downcase == key.downcase }.length > 0
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
31
|
def whitelist *fields
|
29
32
|
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
|
30
33
|
end
|
31
34
|
|
32
|
-
def fields
|
33
|
-
@fields
|
34
|
-
end
|
35
|
-
|
36
35
|
def anonymize *fields, &block
|
37
36
|
if block.nil?
|
38
37
|
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
|
@@ -49,6 +48,15 @@ module DataAnon
|
|
49
48
|
end
|
50
49
|
end
|
51
50
|
|
51
|
+
def is_primary_key? field
|
52
|
+
@primary_keys.select { |key| field.downcase == key.downcase }.length > 0
|
53
|
+
end
|
54
|
+
|
55
|
+
def default_strategy field_name
|
56
|
+
@fields_missing_strategy.missing field_name
|
57
|
+
DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
|
58
|
+
end
|
59
|
+
|
52
60
|
def dest_table
|
53
61
|
return @dest_table unless @dest_table.nil?
|
54
62
|
DataAnon::Utils::DestinationDatabase.establish_connection @destination_database if @destination_database
|
@@ -65,17 +73,30 @@ module DataAnon
|
|
65
73
|
logger.debug "Processing table #{@name} with fields strategies #{@fields}"
|
66
74
|
total = source_table.count
|
67
75
|
if total > 0
|
68
|
-
index =
|
69
|
-
|
76
|
+
index = 0
|
77
|
+
progress = progress_bar.new(@name, total)
|
70
78
|
source_table.all.each do |record|
|
71
|
-
process_record index, record
|
72
79
|
index += 1
|
73
|
-
|
80
|
+
begin
|
81
|
+
process_record index, record
|
82
|
+
rescue => exception
|
83
|
+
@errors.log_error record, exception
|
84
|
+
end
|
85
|
+
progress.show index
|
74
86
|
end
|
75
|
-
|
87
|
+
progress.close
|
76
88
|
end
|
77
89
|
end
|
78
90
|
|
91
|
+
def progress_bar
|
92
|
+
@progress_bar || DataAnon::Utils::ProgressBar
|
93
|
+
end
|
94
|
+
|
95
|
+
def progress_bar_class progress_bar
|
96
|
+
@progress_bar = progress_bar
|
97
|
+
end
|
98
|
+
|
99
|
+
|
79
100
|
end
|
80
101
|
end
|
81
102
|
end
|