data-anonymization 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.gitignore +2 -1
  2. data/.rvmrc +1 -1
  3. data/.travis.yml +2 -0
  4. data/Gemfile +2 -0
  5. data/README.md +295 -258
  6. data/bin/datanon +57 -0
  7. data/data-anonymization.gemspec +2 -1
  8. data/examples/blacklist_dsl.rb +42 -0
  9. data/examples/mongodb_blacklist_dsl.rb +38 -0
  10. data/examples/mongodb_whitelist_dsl.rb +44 -0
  11. data/examples/whitelist_dsl.rb +63 -0
  12. data/lib/core/database.rb +21 -3
  13. data/lib/core/field.rb +5 -2
  14. data/lib/core/fields_missing_strategy.rb +30 -0
  15. data/lib/core/table_errors.rb +32 -0
  16. data/lib/data-anonymization.rb +11 -0
  17. data/lib/parallel/table.rb +8 -1
  18. data/lib/strategy/base.rb +35 -14
  19. data/lib/strategy/blacklist.rb +1 -1
  20. data/lib/strategy/field/anonymize_array.rb +28 -0
  21. data/lib/strategy/field/contact/random_address.rb +12 -0
  22. data/lib/strategy/field/contact/random_city.rb +12 -0
  23. data/lib/strategy/field/contact/random_phone_number.rb +4 -0
  24. data/lib/strategy/field/contact/random_province.rb +12 -0
  25. data/lib/strategy/field/contact/random_zipcode.rb +12 -0
  26. data/lib/strategy/field/datetime/anonymize_date.rb +15 -0
  27. data/lib/strategy/field/datetime/anonymize_datetime.rb +19 -0
  28. data/lib/strategy/field/datetime/anonymize_time.rb +19 -0
  29. data/lib/strategy/field/datetime/date_delta.rb +10 -0
  30. data/lib/strategy/field/datetime/date_time_delta.rb +9 -0
  31. data/lib/strategy/field/datetime/time_delta.rb +8 -0
  32. data/lib/strategy/field/default_anon.rb +4 -1
  33. data/lib/strategy/field/email/gmail_template.rb +8 -0
  34. data/lib/strategy/field/email/random_email.rb +7 -0
  35. data/lib/strategy/field/email/random_mailinator_email.rb +5 -0
  36. data/lib/strategy/field/fields.rb +4 -0
  37. data/lib/strategy/field/name/random_first_name.rb +10 -0
  38. data/lib/strategy/field/name/random_full_name.rb +10 -2
  39. data/lib/strategy/field/name/random_last_name.rb +9 -0
  40. data/lib/strategy/field/name/random_user_name.rb +5 -0
  41. data/lib/strategy/field/number/random_big_decimal_delta.rb +6 -0
  42. data/lib/strategy/field/number/random_float.rb +4 -0
  43. data/lib/strategy/field/number/random_float_delta.rb +6 -0
  44. data/lib/strategy/field/number/random_integer.rb +4 -0
  45. data/lib/strategy/field/number/random_integer_delta.rb +6 -0
  46. data/lib/strategy/field/string/formatted_string_numbers.rb +10 -6
  47. data/lib/strategy/field/string/lorem_ipsum.rb +9 -0
  48. data/lib/strategy/field/string/random_formatted_string.rb +39 -0
  49. data/lib/strategy/field/string/random_string.rb +6 -0
  50. data/lib/strategy/field/string/random_url.rb +7 -1
  51. data/lib/strategy/field/string/select_from_database.rb +7 -5
  52. data/lib/strategy/field/string/select_from_file.rb +7 -0
  53. data/lib/strategy/field/string/select_from_list.rb +8 -0
  54. data/lib/strategy/field/string/string_template.rb +11 -0
  55. data/lib/strategy/mongodb/anonymize_field.rb +44 -0
  56. data/lib/strategy/mongodb/blacklist.rb +29 -0
  57. data/lib/strategy/mongodb/whitelist.rb +62 -0
  58. data/lib/strategy/strategies.rb +10 -1
  59. data/lib/strategy/whitelist.rb +7 -2
  60. data/lib/thor/helpers/mongodb_dsl_generator.rb +66 -0
  61. data/lib/thor/helpers/rdbms_dsl_generator.rb +36 -0
  62. data/lib/thor/templates/mongodb_whitelist_template.erb +15 -0
  63. data/lib/thor/templates/whitelist_template.erb +21 -0
  64. data/lib/utils/database.rb +4 -0
  65. data/lib/utils/parallel_progress_bar.rb +24 -0
  66. data/lib/utils/progress_bar.rb +34 -22
  67. data/lib/utils/random_string.rb +3 -2
  68. data/lib/utils/random_string_chars_only.rb +3 -5
  69. data/lib/utils/template_helper.rb +44 -0
  70. data/lib/version.rb +1 -1
  71. data/spec/acceptance/mongodb_blacklist_spec.rb +75 -0
  72. data/spec/acceptance/mongodb_whitelist_spec.rb +107 -0
  73. data/spec/core/fields_missing_strategy_spec.rb +26 -0
  74. data/spec/strategy/field/name/random_first_name_spec.rb +1 -1
  75. data/spec/strategy/field/name/random_full_name_spec.rb +12 -7
  76. data/spec/strategy/field/name/random_last_name_spec.rb +1 -1
  77. data/spec/strategy/field/string/random_formatted_string_spec.rb +39 -0
  78. data/spec/strategy/field/string/select_from_file_spec.rb +21 -0
  79. data/spec/strategy/mongodb/anonymize_field_spec.rb +52 -0
  80. data/spec/utils/random_float_spec.rb +12 -0
  81. data/spec/utils/random_string_char_only_spec.rb +12 -0
  82. data/spec/utils/template_helper_spec.rb +14 -0
  83. metadata +56 -6
  84. data/blacklist_dsl.rb +0 -17
  85. data/blacklist_nosql_dsl.rb +0 -36
  86. data/whitelist_dsl.rb +0 -42
data/bin/datanon ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+
4
+ require 'thor'
5
+ require 'data-anonymization'
6
+
7
+ class AnonymizationCLI < Thor
8
+
9
+ include Thor::Actions
10
+
11
+ desc "generate_rdbms_dsl", "Generates a base anonymization script(whitelist strategy) for a RDBMS database using the database schema"
12
+
13
+ def generate_rdbms_dsl
14
+
15
+ configuration_hash = {:adapter => options["adapter"],
16
+ :host => options["host"],
17
+ :port => options["port"],
18
+ :database => options["database"],
19
+ :username => options["username"],
20
+ :password => options["password"]
21
+ }
22
+ create_file "rdbms_whitelist_generated.rb"
23
+ DataAnon::ThorHelpers::RDBMSDSLGenerator.new.generate_whitelist_script(configuration_hash)
24
+ end
25
+
26
+ method_option :adapter, :required => true, :aliases => "-a", :desc => "Activerecord database adapter to be used [required]", :for => :generate_rdbms_dsl
27
+ method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_rdbms_dsl
28
+ method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_rdbms_dsl
29
+ method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port provided by AR will be used", :for => :generate_rdbms_dsl
30
+ method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_rdbms_dsl
31
+ method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_rdbms_dsl
32
+
33
+ desc "generate_mongo_dsl", "Generates a base anonymization script(whitelist strategy) for a Mongo DB using the database schema"
34
+
35
+ def generate_mongo_dsl
36
+
37
+ configuration_hash = {:host => options["host"],
38
+ :port => options["port"],
39
+ :database => options["database"],
40
+ :username => options["username"],
41
+ :password => options["password"]
42
+ }
43
+
44
+ create_file "mongodb_whitelist_generated.rb"
45
+ DataAnon::ThorHelpers::MongoDBDSLGenerator.new(configuration_hash, options["whitelist_patterns"]).generate
46
+ end
47
+
48
+ method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_mongo_dsl
49
+ method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_mongo_dsl
50
+ method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port will be used", :for => :generate_mongo_dsl
51
+ method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_mongo_dsl
52
+ method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_mongo_dsl
53
+ method_option :whitelist_patterns, :aliases => "-r", :desc => "Whitelist Patterns", :for => :generate_mongo_dsl
54
+
55
+ end
56
+
57
+ AnonymizationCLI.start
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.homepage = "http://sunitparekh.github.com/data-anonymization"
14
14
 
15
15
  gem.files = `git ls-files`.split($/).select { |f| !f.match(/^sample-data/) }
16
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.executables = "datanon"
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
19
 
@@ -24,4 +24,5 @@ Gem::Specification.new do |gem|
24
24
  gem.add_dependency('rgeo-geojson', '~> 0.2.3')
25
25
  gem.add_dependency('powerbar', '~> 1.0.8')
26
26
  gem.add_dependency('parallel', '~> 0.5.18')
27
+ gem.add_dependency('thor', '~> 0.16.0')
27
28
  end
@@ -0,0 +1,42 @@
1
+ system "bundle exec ruby whitelist_dsl.rb"
2
+
3
+ require 'data-anonymization'
4
+
5
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
6
+
7
+ database 'Chinook' do
8
+ strategy DataAnon::Strategy::Blacklist
9
+ source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
10
+
11
+ table 'Employee' do
12
+ primary_key 'EmployeeId'
13
+ anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
14
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
15
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
16
+ anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
17
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
18
+ anonymize('City').using FieldStrategy::RandomCity.region_US
19
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
20
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
21
+ anonymize('Country') {|field| "USA" }
22
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
23
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
24
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
25
+ end
26
+
27
+ table 'Customer' do
28
+ primary_key 'CustomerId'
29
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
30
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
31
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
32
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
33
+ anonymize('City').using FieldStrategy::RandomCity.region_US
34
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
35
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
36
+ anonymize('Country') {|field| "USA" }
37
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
38
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
39
+ end
40
+
41
+ end
42
+
@@ -0,0 +1,38 @@
1
+ require 'data-anonymization'
2
+
3
+ require 'mongo'
4
+ Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
5
+ system "mongoimport -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
6
+ system "mongoimport -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
7
+
8
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
9
+
10
+ database 'test' do
11
+ strategy DataAnon::Strategy::MongoDB::Blacklist
12
+ source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
13
+
14
+ collection 'users' do
15
+ anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
16
+ anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
17
+ anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
18
+ anonymize('password') { |field| "password" }
19
+ anonymize('first_name').using FieldStrategy::RandomFirstName.new
20
+ anonymize('last_name').using FieldStrategy::RandomLastName.new
21
+ end
22
+
23
+ collection 'plans' do
24
+ anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
25
+ anonymize 'public_sharing','photo_sharing'
26
+
27
+ collection 'features' do
28
+ anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
29
+
30
+ document 'users' do
31
+ anonymize 'max', 'additional'
32
+ end
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
@@ -0,0 +1,44 @@
1
+ require 'data-anonymization'
2
+
3
+ require 'mongo'
4
+ Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
5
+ Mongo::Connection.from_uri("mongodb://localhost/dest").drop_database('dest')
6
+ system "mongoimport -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
7
+ system "mongoimport -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
8
+
9
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
10
+
11
+ database 'test' do
12
+ strategy DataAnon::Strategy::MongoDB::Whitelist
13
+ source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
14
+ destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest'
15
+
16
+ collection 'users' do
17
+ whitelist '_id','failed_attempts','updated_at'
18
+ anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
19
+ anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
20
+ anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
21
+ anonymize('password') { |field| "password" }
22
+ anonymize('first_name').using FieldStrategy::RandomFirstName.new
23
+ anonymize('last_name').using FieldStrategy::RandomLastName.new
24
+ anonymize 'password_reset_answer','password_reset_question'
25
+ end
26
+
27
+ collection 'plans' do
28
+ whitelist '_id', 'name','term', 'created_at'
29
+ anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
30
+ anonymize 'public_sharing','photo_sharing'
31
+
32
+ collection 'features' do
33
+ anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
34
+ whitelist 'type'
35
+
36
+ document 'users' do
37
+ anonymize 'max', 'additional'
38
+ end
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+
@@ -0,0 +1,63 @@
1
+ system "rake empty_dest" # clean destination database on every call
2
+
3
+ require 'data-anonymization'
4
+
5
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
6
+
7
+ database 'Chinook' do
8
+ strategy DataAnon::Strategy::Whitelist
9
+ source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
10
+ destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
11
+
12
+ default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
13
+
14
+ table 'Genre' do
15
+ primary_key 'GenreId'
16
+ whitelist 'GenreId'
17
+ anonymize 'Name' do |field|
18
+ field.value + " test"
19
+ end
20
+ end
21
+
22
+ table 'MediaType' do
23
+ primary_key 'MediaTypeId'
24
+ anonymize('MediaTypeId') { |field| field.value } # same as whitelist
25
+ anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
26
+
27
+ end
28
+
29
+ table 'Employee' do
30
+ primary_key 'EmployeeId'
31
+ whitelist 'EmployeeId', 'ReportsTo', 'Title'
32
+ anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
33
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
34
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
35
+ anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
36
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
37
+ anonymize('City').using FieldStrategy::RandomCity.region_US
38
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
39
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
40
+ anonymize('Country') {|field| "USA" }
41
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
42
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
43
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
44
+ end
45
+
46
+ table 'Customer' do
47
+ primary_key 'CustomerId'
48
+ whitelist 'SupportRepId', 'Company'
49
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
50
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
51
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
52
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
53
+ anonymize('City').using FieldStrategy::RandomCity.region_US
54
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
55
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
56
+ anonymize('Country') {|field| "USA" }
57
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
58
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
59
+ end
60
+
61
+
62
+ end
63
+
data/lib/core/database.rb CHANGED
@@ -2,6 +2,7 @@ module DataAnon
2
2
  module Core
3
3
 
4
4
  class Database
5
+ include Utils::Logging
5
6
 
6
7
  def initialize name
7
8
  @name = name
@@ -35,18 +36,35 @@ module DataAnon
35
36
 
36
37
  def table (name, &block)
37
38
  table = @strategy.new(@source_database, @destination_database, name, @user_defaults).process_fields(&block)
38
- @tables<< table
39
+ @tables << table
39
40
  end
41
+ alias :collection :table
40
42
 
41
43
  def anonymize
42
- @execution_strategy.new.anonymize @tables
44
+ begin
45
+ @execution_strategy.new.anonymize @tables
46
+ rescue => e
47
+ logger.error "\n#{e.message} \n #{e.backtrace}"
48
+ end
49
+ if @strategy.whitelist?
50
+ logger.info("Fields missing the anonymization strategy")
51
+ @tables.each { |table| table.fields_missing_strategy.print }
52
+ end
53
+
54
+ @tables.each { |table| table.errors.print }
43
55
  end
44
56
 
45
57
  end
46
58
 
47
59
  class Sequential
48
60
  def anonymize tables
49
- tables.each { |table| table.process }
61
+ tables.each do |table|
62
+ begin
63
+ table.process
64
+ rescue => e
65
+ logger.error "\n#{e.message} \n #{e.backtrace}"
66
+ end
67
+ end
50
68
  end
51
69
  end
52
70
 
data/lib/core/field.rb CHANGED
@@ -3,14 +3,17 @@ module DataAnon
3
3
 
4
4
  class Field
5
5
 
6
- def initialize name, value, row_number, ar_record
6
+ def initialize name, value, row_number, ar_record, table_name = "unknown"
7
7
  @name = name
8
8
  @value = value
9
9
  @row_number = row_number
10
10
  @ar_record = ar_record
11
+ @table_name = table_name
11
12
  end
12
13
 
13
- attr_accessor :name, :value, :row_number, :ar_record
14
+ attr_accessor :name, :value, :row_number, :ar_record, :table_name
15
+
16
+ alias :collection_name :table_name
14
17
 
15
18
  end
16
19
 
@@ -0,0 +1,30 @@
1
+ module DataAnon
2
+ module Core
3
+
4
+ class FieldsMissingStrategy
5
+ include Utils::Logging
6
+
7
+ def initialize table_name
8
+ @table_name = table_name
9
+ @fields_missing_strategy = []
10
+ end
11
+
12
+ def missing field_name
13
+ return if @fields_missing_strategy.include? field_name
14
+ @fields_missing_strategy << field_name
15
+ end
16
+
17
+ def fields_missing_strategy
18
+ @fields_missing_strategy
19
+ end
20
+
21
+ def print
22
+ @fields_missing_strategy.each do |field_name|
23
+ logger.info("#{@table_name}.#{field_name}")
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,32 @@
1
+ module DataAnon
2
+ module Core
3
+
4
+ class TableErrors
5
+ include Utils::Logging
6
+
7
+ def initialize table_name
8
+ @table_name = table_name
9
+ @errors = []
10
+ end
11
+
12
+ def log_error record, exception
13
+ @errors << { :record => record, :exception => exception}
14
+ raise "Reached limit of error for a table" if @errors.length > 100
15
+ end
16
+
17
+ def errors
18
+ @errors
19
+ end
20
+
21
+ def print
22
+ return if @errors.length == 0
23
+ logger.error("Errors while processing table #{@table_name}:")
24
+ @errors.each do |error|
25
+ logger.error(error[:exception])
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ end
32
+ end
@@ -7,11 +7,22 @@ require "utils/random_string"
7
7
  require "utils/random_string_chars_only"
8
8
  require "utils/geojson_parser"
9
9
  require "utils/progress_bar"
10
+ require "utils/parallel_progress_bar"
10
11
  require "utils/resource"
12
+ require "utils/template_helper"
11
13
  require "parallel/table"
12
14
  require "core/database"
15
+ require "core/fields_missing_strategy"
16
+ require "thor/helpers/rdbms_dsl_generator"
13
17
  require "core/field"
18
+ require "core/table_errors"
14
19
  require "strategy/strategies"
15
20
  require "utils/database"
16
21
  require "core/dsl"
17
22
 
23
+ begin
24
+ require 'mongo'
25
+ require "thor/helpers/mongodb_dsl_generator"
26
+ rescue LoadError
27
+ "Ignoring the mongodb specific libraries if monog driver is not specified in gem"
28
+ end
@@ -5,7 +5,14 @@ module DataAnon
5
5
  class Table
6
6
 
7
7
  def anonymize tables
8
- ::Parallel.each(tables) { |table| table.process }
8
+ ::Parallel.each(tables) do |table|
9
+ begin
10
+ table.progress_bar_class DataAnon::Utils::ParallelProgressBar
11
+ table.process
12
+ rescue => e
13
+ logger.error "\n#{e.message} \n #{e.backtrace}"
14
+ end
15
+ end
9
16
  end
10
17
 
11
18
  end
data/lib/strategy/base.rb CHANGED
@@ -3,12 +3,20 @@ module DataAnon
3
3
  class Base
4
4
  include Utils::Logging
5
5
 
6
+ attr_accessor :fields, :user_strategies, :fields_missing_strategy, :errors
7
+
6
8
  def initialize source_database, destination_database, name, user_strategies
7
9
  @name = name
8
10
  @user_strategies = user_strategies
9
11
  @fields = {}
10
12
  @source_database = source_database
11
13
  @destination_database = destination_database
14
+ @fields_missing_strategy = DataAnon::Core::FieldsMissingStrategy.new name
15
+ @errors = DataAnon::Core::TableErrors.new(@name)
16
+ end
17
+
18
+ def self.whitelist?
19
+ false
12
20
  end
13
21
 
14
22
  def process_fields &block
@@ -20,19 +28,10 @@ module DataAnon
20
28
  @primary_keys = fields
21
29
  end
22
30
 
23
- def is_primary_key? field
24
- @primary_keys.select { |key| field.downcase == key.downcase }.length > 0
25
- end
26
-
27
-
28
31
  def whitelist *fields
29
32
  fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
30
33
  end
31
34
 
32
- def fields
33
- @fields
34
- end
35
-
36
35
  def anonymize *fields, &block
37
36
  if block.nil?
38
37
  fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
@@ -49,6 +48,15 @@ module DataAnon
49
48
  end
50
49
  end
51
50
 
51
+ def is_primary_key? field
52
+ @primary_keys.select { |key| field.downcase == key.downcase }.length > 0
53
+ end
54
+
55
+ def default_strategy field_name
56
+ @fields_missing_strategy.missing field_name
57
+ DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
58
+ end
59
+
52
60
  def dest_table
53
61
  return @dest_table unless @dest_table.nil?
54
62
  DataAnon::Utils::DestinationDatabase.establish_connection @destination_database if @destination_database
@@ -65,17 +73,30 @@ module DataAnon
65
73
  logger.debug "Processing table #{@name} with fields strategies #{@fields}"
66
74
  total = source_table.count
67
75
  if total > 0
68
- index = 1
69
- progress_bar = DataAnon::Utils::ProgressBar.new @name, total
76
+ index = 0
77
+ progress = progress_bar.new(@name, total)
70
78
  source_table.all.each do |record|
71
- process_record index, record
72
79
  index += 1
73
- progress_bar.show(index)
80
+ begin
81
+ process_record index, record
82
+ rescue => exception
83
+ @errors.log_error record, exception
84
+ end
85
+ progress.show index
74
86
  end
75
- progress_bar.close
87
+ progress.close
76
88
  end
77
89
  end
78
90
 
91
+ def progress_bar
92
+ @progress_bar || DataAnon::Utils::ProgressBar
93
+ end
94
+
95
+ def progress_bar_class progress_bar
96
+ @progress_bar = progress_bar
97
+ end
98
+
99
+
79
100
  end
80
101
  end
81
102
  end