data-anonymization 0.3.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. data/.gitignore +2 -1
  2. data/.rvmrc +1 -1
  3. data/.travis.yml +2 -0
  4. data/Gemfile +2 -0
  5. data/README.md +295 -258
  6. data/bin/datanon +57 -0
  7. data/data-anonymization.gemspec +2 -1
  8. data/examples/blacklist_dsl.rb +42 -0
  9. data/examples/mongodb_blacklist_dsl.rb +38 -0
  10. data/examples/mongodb_whitelist_dsl.rb +44 -0
  11. data/examples/whitelist_dsl.rb +63 -0
  12. data/lib/core/database.rb +21 -3
  13. data/lib/core/field.rb +5 -2
  14. data/lib/core/fields_missing_strategy.rb +30 -0
  15. data/lib/core/table_errors.rb +32 -0
  16. data/lib/data-anonymization.rb +11 -0
  17. data/lib/parallel/table.rb +8 -1
  18. data/lib/strategy/base.rb +35 -14
  19. data/lib/strategy/blacklist.rb +1 -1
  20. data/lib/strategy/field/anonymize_array.rb +28 -0
  21. data/lib/strategy/field/contact/random_address.rb +12 -0
  22. data/lib/strategy/field/contact/random_city.rb +12 -0
  23. data/lib/strategy/field/contact/random_phone_number.rb +4 -0
  24. data/lib/strategy/field/contact/random_province.rb +12 -0
  25. data/lib/strategy/field/contact/random_zipcode.rb +12 -0
  26. data/lib/strategy/field/datetime/anonymize_date.rb +15 -0
  27. data/lib/strategy/field/datetime/anonymize_datetime.rb +19 -0
  28. data/lib/strategy/field/datetime/anonymize_time.rb +19 -0
  29. data/lib/strategy/field/datetime/date_delta.rb +10 -0
  30. data/lib/strategy/field/datetime/date_time_delta.rb +9 -0
  31. data/lib/strategy/field/datetime/time_delta.rb +8 -0
  32. data/lib/strategy/field/default_anon.rb +4 -1
  33. data/lib/strategy/field/email/gmail_template.rb +8 -0
  34. data/lib/strategy/field/email/random_email.rb +7 -0
  35. data/lib/strategy/field/email/random_mailinator_email.rb +5 -0
  36. data/lib/strategy/field/fields.rb +4 -0
  37. data/lib/strategy/field/name/random_first_name.rb +10 -0
  38. data/lib/strategy/field/name/random_full_name.rb +10 -2
  39. data/lib/strategy/field/name/random_last_name.rb +9 -0
  40. data/lib/strategy/field/name/random_user_name.rb +5 -0
  41. data/lib/strategy/field/number/random_big_decimal_delta.rb +6 -0
  42. data/lib/strategy/field/number/random_float.rb +4 -0
  43. data/lib/strategy/field/number/random_float_delta.rb +6 -0
  44. data/lib/strategy/field/number/random_integer.rb +4 -0
  45. data/lib/strategy/field/number/random_integer_delta.rb +6 -0
  46. data/lib/strategy/field/string/formatted_string_numbers.rb +10 -6
  47. data/lib/strategy/field/string/lorem_ipsum.rb +9 -0
  48. data/lib/strategy/field/string/random_formatted_string.rb +39 -0
  49. data/lib/strategy/field/string/random_string.rb +6 -0
  50. data/lib/strategy/field/string/random_url.rb +7 -1
  51. data/lib/strategy/field/string/select_from_database.rb +7 -5
  52. data/lib/strategy/field/string/select_from_file.rb +7 -0
  53. data/lib/strategy/field/string/select_from_list.rb +8 -0
  54. data/lib/strategy/field/string/string_template.rb +11 -0
  55. data/lib/strategy/mongodb/anonymize_field.rb +44 -0
  56. data/lib/strategy/mongodb/blacklist.rb +29 -0
  57. data/lib/strategy/mongodb/whitelist.rb +62 -0
  58. data/lib/strategy/strategies.rb +10 -1
  59. data/lib/strategy/whitelist.rb +7 -2
  60. data/lib/thor/helpers/mongodb_dsl_generator.rb +66 -0
  61. data/lib/thor/helpers/rdbms_dsl_generator.rb +36 -0
  62. data/lib/thor/templates/mongodb_whitelist_template.erb +15 -0
  63. data/lib/thor/templates/whitelist_template.erb +21 -0
  64. data/lib/utils/database.rb +4 -0
  65. data/lib/utils/parallel_progress_bar.rb +24 -0
  66. data/lib/utils/progress_bar.rb +34 -22
  67. data/lib/utils/random_string.rb +3 -2
  68. data/lib/utils/random_string_chars_only.rb +3 -5
  69. data/lib/utils/template_helper.rb +44 -0
  70. data/lib/version.rb +1 -1
  71. data/spec/acceptance/mongodb_blacklist_spec.rb +75 -0
  72. data/spec/acceptance/mongodb_whitelist_spec.rb +107 -0
  73. data/spec/core/fields_missing_strategy_spec.rb +26 -0
  74. data/spec/strategy/field/name/random_first_name_spec.rb +1 -1
  75. data/spec/strategy/field/name/random_full_name_spec.rb +12 -7
  76. data/spec/strategy/field/name/random_last_name_spec.rb +1 -1
  77. data/spec/strategy/field/string/random_formatted_string_spec.rb +39 -0
  78. data/spec/strategy/field/string/select_from_file_spec.rb +21 -0
  79. data/spec/strategy/mongodb/anonymize_field_spec.rb +52 -0
  80. data/spec/utils/random_float_spec.rb +12 -0
  81. data/spec/utils/random_string_char_only_spec.rb +12 -0
  82. data/spec/utils/template_helper_spec.rb +14 -0
  83. metadata +56 -6
  84. data/blacklist_dsl.rb +0 -17
  85. data/blacklist_nosql_dsl.rb +0 -36
  86. data/whitelist_dsl.rb +0 -42
data/bin/datanon ADDED
@@ -0,0 +1,57 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: UTF-8
3
+
4
+ require 'thor'
5
+ require 'data-anonymization'
6
+
7
+ class AnonymizationCLI < Thor
8
+
9
+ include Thor::Actions
10
+
11
+ desc "generate_rdbms_dsl", "Generates a base anonymization script(whitelist strategy) for a RDBMS database using the database schema"
12
+
13
+ def generate_rdbms_dsl
14
+
15
+ configuration_hash = {:adapter => options["adapter"],
16
+ :host => options["host"],
17
+ :port => options["port"],
18
+ :database => options["database"],
19
+ :username => options["username"],
20
+ :password => options["password"]
21
+ }
22
+ create_file "rdbms_whitelist_generated.rb"
23
+ DataAnon::ThorHelpers::RDBMSDSLGenerator.new.generate_whitelist_script(configuration_hash)
24
+ end
25
+
26
+ method_option :adapter, :required => true, :aliases => "-a", :desc => "Activerecord database adapter to be used [required]", :for => :generate_rdbms_dsl
27
+ method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_rdbms_dsl
28
+ method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_rdbms_dsl
29
+ method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port provided by AR will be used", :for => :generate_rdbms_dsl
30
+ method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_rdbms_dsl
31
+ method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_rdbms_dsl
32
+
33
+ desc "generate_mongo_dsl", "Generates a base anonymization script(whitelist strategy) for a Mongo DB using the database schema"
34
+
35
+ def generate_mongo_dsl
36
+
37
+ configuration_hash = {:host => options["host"],
38
+ :port => options["port"],
39
+ :database => options["database"],
40
+ :username => options["username"],
41
+ :password => options["password"]
42
+ }
43
+
44
+ create_file "mongodb_whitelist_generated.rb"
45
+ DataAnon::ThorHelpers::MongoDBDSLGenerator.new(configuration_hash, options["whitelist_patterns"]).generate
46
+ end
47
+
48
+ method_option :host, :required => true, :aliases => "-h", :desc => "Source Database host [required]", :for => :generate_mongo_dsl
49
+ method_option :database, :required => true, :aliases => "-d", :desc => "Database name [required]", :for => :generate_mongo_dsl
50
+ method_option :port, :aliases => "-p", :desc => "Port to connect to. If not provided default port will be used", :for => :generate_mongo_dsl
51
+ method_option :username, :aliases => "-u", :desc => "Username", :for => :generate_mongo_dsl
52
+ method_option :password, :aliases => "-w", :desc => "Password", :for => :generate_mongo_dsl
53
+ method_option :whitelist_patterns, :aliases => "-r", :desc => "Whitelist Patterns", :for => :generate_mongo_dsl
54
+
55
+ end
56
+
57
+ AnonymizationCLI.start
@@ -13,7 +13,7 @@ Gem::Specification.new do |gem|
13
13
  gem.homepage = "http://sunitparekh.github.com/data-anonymization"
14
14
 
15
15
  gem.files = `git ls-files`.split($/).select { |f| !f.match(/^sample-data/) }
16
- gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.executables = "datanon"
17
17
  gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
18
  gem.require_paths = ["lib"]
19
19
 
@@ -24,4 +24,5 @@ Gem::Specification.new do |gem|
24
24
  gem.add_dependency('rgeo-geojson', '~> 0.2.3')
25
25
  gem.add_dependency('powerbar', '~> 1.0.8')
26
26
  gem.add_dependency('parallel', '~> 0.5.18')
27
+ gem.add_dependency('thor', '~> 0.16.0')
27
28
  end
@@ -0,0 +1,42 @@
1
+ system "bundle exec ruby whitelist_dsl.rb"
2
+
3
+ require 'data-anonymization'
4
+
5
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
6
+
7
+ database 'Chinook' do
8
+ strategy DataAnon::Strategy::Blacklist
9
+ source_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
10
+
11
+ table 'Employee' do
12
+ primary_key 'EmployeeId'
13
+ anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
14
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
15
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
16
+ anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
17
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
18
+ anonymize('City').using FieldStrategy::RandomCity.region_US
19
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
20
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
21
+ anonymize('Country') {|field| "USA" }
22
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
23
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
24
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
25
+ end
26
+
27
+ table 'Customer' do
28
+ primary_key 'CustomerId'
29
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
30
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
31
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
32
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
33
+ anonymize('City').using FieldStrategy::RandomCity.region_US
34
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
35
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
36
+ anonymize('Country') {|field| "USA" }
37
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
38
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
39
+ end
40
+
41
+ end
42
+
@@ -0,0 +1,38 @@
1
+ require 'data-anonymization'
2
+
3
+ require 'mongo'
4
+ Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
5
+ system "mongoimport -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
6
+ system "mongoimport -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
7
+
8
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
9
+
10
+ database 'test' do
11
+ strategy DataAnon::Strategy::MongoDB::Blacklist
12
+ source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
13
+
14
+ collection 'users' do
15
+ anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
16
+ anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
17
+ anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
18
+ anonymize('password') { |field| "password" }
19
+ anonymize('first_name').using FieldStrategy::RandomFirstName.new
20
+ anonymize('last_name').using FieldStrategy::RandomLastName.new
21
+ end
22
+
23
+ collection 'plans' do
24
+ anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
25
+ anonymize 'public_sharing','photo_sharing'
26
+
27
+ collection 'features' do
28
+ anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
29
+
30
+ document 'users' do
31
+ anonymize 'max', 'additional'
32
+ end
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
@@ -0,0 +1,44 @@
1
+ require 'data-anonymization'
2
+
3
+ require 'mongo'
4
+ Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
5
+ Mongo::Connection.from_uri("mongodb://localhost/dest").drop_database('dest')
6
+ system "mongoimport -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
7
+ system "mongoimport -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
8
+
9
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
10
+
11
+ database 'test' do
12
+ strategy DataAnon::Strategy::MongoDB::Whitelist
13
+ source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
14
+ destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest'
15
+
16
+ collection 'users' do
17
+ whitelist '_id','failed_attempts','updated_at'
18
+ anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
19
+ anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
20
+ anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
21
+ anonymize('password') { |field| "password" }
22
+ anonymize('first_name').using FieldStrategy::RandomFirstName.new
23
+ anonymize('last_name').using FieldStrategy::RandomLastName.new
24
+ anonymize 'password_reset_answer','password_reset_question'
25
+ end
26
+
27
+ collection 'plans' do
28
+ whitelist '_id', 'name','term', 'created_at'
29
+ anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
30
+ anonymize 'public_sharing','photo_sharing'
31
+
32
+ collection 'features' do
33
+ anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
34
+ whitelist 'type'
35
+
36
+ document 'users' do
37
+ anonymize 'max', 'additional'
38
+ end
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+
@@ -0,0 +1,63 @@
1
+ system "rake empty_dest" # clean destination database on every call
2
+
3
+ require 'data-anonymization'
4
+
5
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
6
+
7
+ database 'Chinook' do
8
+ strategy DataAnon::Strategy::Whitelist
9
+ source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
10
+ destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
11
+
12
+ default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
13
+
14
+ table 'Genre' do
15
+ primary_key 'GenreId'
16
+ whitelist 'GenreId'
17
+ anonymize 'Name' do |field|
18
+ field.value + " test"
19
+ end
20
+ end
21
+
22
+ table 'MediaType' do
23
+ primary_key 'MediaTypeId'
24
+ anonymize('MediaTypeId') { |field| field.value } # same as whitelist
25
+ anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
26
+
27
+ end
28
+
29
+ table 'Employee' do
30
+ primary_key 'EmployeeId'
31
+ whitelist 'EmployeeId', 'ReportsTo', 'Title'
32
+ anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
33
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
34
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
35
+ anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
36
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
37
+ anonymize('City').using FieldStrategy::RandomCity.region_US
38
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
39
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
40
+ anonymize('Country') {|field| "USA" }
41
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
42
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
43
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
44
+ end
45
+
46
+ table 'Customer' do
47
+ primary_key 'CustomerId'
48
+ whitelist 'SupportRepId', 'Company'
49
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
50
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
51
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
52
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
53
+ anonymize('City').using FieldStrategy::RandomCity.region_US
54
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
55
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
56
+ anonymize('Country') {|field| "USA" }
57
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
58
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
59
+ end
60
+
61
+
62
+ end
63
+
data/lib/core/database.rb CHANGED
@@ -2,6 +2,7 @@ module DataAnon
2
2
  module Core
3
3
 
4
4
  class Database
5
+ include Utils::Logging
5
6
 
6
7
  def initialize name
7
8
  @name = name
@@ -35,18 +36,35 @@ module DataAnon
35
36
 
36
37
  def table (name, &block)
37
38
  table = @strategy.new(@source_database, @destination_database, name, @user_defaults).process_fields(&block)
38
- @tables<< table
39
+ @tables << table
39
40
  end
41
+ alias :collection :table
40
42
 
41
43
  def anonymize
42
- @execution_strategy.new.anonymize @tables
44
+ begin
45
+ @execution_strategy.new.anonymize @tables
46
+ rescue => e
47
+ logger.error "\n#{e.message} \n #{e.backtrace}"
48
+ end
49
+ if @strategy.whitelist?
50
+ logger.info("Fields missing the anonymization strategy")
51
+ @tables.each { |table| table.fields_missing_strategy.print }
52
+ end
53
+
54
+ @tables.each { |table| table.errors.print }
43
55
  end
44
56
 
45
57
  end
46
58
 
47
59
  class Sequential
48
60
  def anonymize tables
49
- tables.each { |table| table.process }
61
+ tables.each do |table|
62
+ begin
63
+ table.process
64
+ rescue => e
65
+ logger.error "\n#{e.message} \n #{e.backtrace}"
66
+ end
67
+ end
50
68
  end
51
69
  end
52
70
 
data/lib/core/field.rb CHANGED
@@ -3,14 +3,17 @@ module DataAnon
3
3
 
4
4
  class Field
5
5
 
6
- def initialize name, value, row_number, ar_record
6
+ def initialize name, value, row_number, ar_record, table_name = "unknown"
7
7
  @name = name
8
8
  @value = value
9
9
  @row_number = row_number
10
10
  @ar_record = ar_record
11
+ @table_name = table_name
11
12
  end
12
13
 
13
- attr_accessor :name, :value, :row_number, :ar_record
14
+ attr_accessor :name, :value, :row_number, :ar_record, :table_name
15
+
16
+ alias :collection_name :table_name
14
17
 
15
18
  end
16
19
 
@@ -0,0 +1,30 @@
1
+ module DataAnon
2
+ module Core
3
+
4
+ class FieldsMissingStrategy
5
+ include Utils::Logging
6
+
7
+ def initialize table_name
8
+ @table_name = table_name
9
+ @fields_missing_strategy = []
10
+ end
11
+
12
+ def missing field_name
13
+ return if @fields_missing_strategy.include? field_name
14
+ @fields_missing_strategy << field_name
15
+ end
16
+
17
+ def fields_missing_strategy
18
+ @fields_missing_strategy
19
+ end
20
+
21
+ def print
22
+ @fields_missing_strategy.each do |field_name|
23
+ logger.info("#{@table_name}.#{field_name}")
24
+ end
25
+ end
26
+
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,32 @@
1
+ module DataAnon
2
+ module Core
3
+
4
+ class TableErrors
5
+ include Utils::Logging
6
+
7
+ def initialize table_name
8
+ @table_name = table_name
9
+ @errors = []
10
+ end
11
+
12
+ def log_error record, exception
13
+ @errors << { :record => record, :exception => exception}
14
+ raise "Reached limit of error for a table" if @errors.length > 100
15
+ end
16
+
17
+ def errors
18
+ @errors
19
+ end
20
+
21
+ def print
22
+ return if @errors.length == 0
23
+ logger.error("Errors while processing table #{@table_name}:")
24
+ @errors.each do |error|
25
+ logger.error(error[:exception])
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ end
32
+ end
@@ -7,11 +7,22 @@ require "utils/random_string"
7
7
  require "utils/random_string_chars_only"
8
8
  require "utils/geojson_parser"
9
9
  require "utils/progress_bar"
10
+ require "utils/parallel_progress_bar"
10
11
  require "utils/resource"
12
+ require "utils/template_helper"
11
13
  require "parallel/table"
12
14
  require "core/database"
15
+ require "core/fields_missing_strategy"
16
+ require "thor/helpers/rdbms_dsl_generator"
13
17
  require "core/field"
18
+ require "core/table_errors"
14
19
  require "strategy/strategies"
15
20
  require "utils/database"
16
21
  require "core/dsl"
17
22
 
23
+ begin
24
+ require 'mongo'
25
+ require "thor/helpers/mongodb_dsl_generator"
26
+ rescue LoadError
27
+ "Ignoring the mongodb specific libraries if monog driver is not specified in gem"
28
+ end
@@ -5,7 +5,14 @@ module DataAnon
5
5
  class Table
6
6
 
7
7
  def anonymize tables
8
- ::Parallel.each(tables) { |table| table.process }
8
+ ::Parallel.each(tables) do |table|
9
+ begin
10
+ table.progress_bar_class DataAnon::Utils::ParallelProgressBar
11
+ table.process
12
+ rescue => e
13
+ logger.error "\n#{e.message} \n #{e.backtrace}"
14
+ end
15
+ end
9
16
  end
10
17
 
11
18
  end
data/lib/strategy/base.rb CHANGED
@@ -3,12 +3,20 @@ module DataAnon
3
3
  class Base
4
4
  include Utils::Logging
5
5
 
6
+ attr_accessor :fields, :user_strategies, :fields_missing_strategy, :errors
7
+
6
8
  def initialize source_database, destination_database, name, user_strategies
7
9
  @name = name
8
10
  @user_strategies = user_strategies
9
11
  @fields = {}
10
12
  @source_database = source_database
11
13
  @destination_database = destination_database
14
+ @fields_missing_strategy = DataAnon::Core::FieldsMissingStrategy.new name
15
+ @errors = DataAnon::Core::TableErrors.new(@name)
16
+ end
17
+
18
+ def self.whitelist?
19
+ false
12
20
  end
13
21
 
14
22
  def process_fields &block
@@ -20,19 +28,10 @@ module DataAnon
20
28
  @primary_keys = fields
21
29
  end
22
30
 
23
- def is_primary_key? field
24
- @primary_keys.select { |key| field.downcase == key.downcase }.length > 0
25
- end
26
-
27
-
28
31
  def whitelist *fields
29
32
  fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
30
33
  end
31
34
 
32
- def fields
33
- @fields
34
- end
35
-
36
35
  def anonymize *fields, &block
37
36
  if block.nil?
38
37
  fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
@@ -49,6 +48,15 @@ module DataAnon
49
48
  end
50
49
  end
51
50
 
51
+ def is_primary_key? field
52
+ @primary_keys.select { |key| field.downcase == key.downcase }.length > 0
53
+ end
54
+
55
+ def default_strategy field_name
56
+ @fields_missing_strategy.missing field_name
57
+ DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
58
+ end
59
+
52
60
  def dest_table
53
61
  return @dest_table unless @dest_table.nil?
54
62
  DataAnon::Utils::DestinationDatabase.establish_connection @destination_database if @destination_database
@@ -65,17 +73,30 @@ module DataAnon
65
73
  logger.debug "Processing table #{@name} with fields strategies #{@fields}"
66
74
  total = source_table.count
67
75
  if total > 0
68
- index = 1
69
- progress_bar = DataAnon::Utils::ProgressBar.new @name, total
76
+ index = 0
77
+ progress = progress_bar.new(@name, total)
70
78
  source_table.all.each do |record|
71
- process_record index, record
72
79
  index += 1
73
- progress_bar.show(index)
80
+ begin
81
+ process_record index, record
82
+ rescue => exception
83
+ @errors.log_error record, exception
84
+ end
85
+ progress.show index
74
86
  end
75
- progress_bar.close
87
+ progress.close
76
88
  end
77
89
  end
78
90
 
91
+ def progress_bar
92
+ @progress_bar || DataAnon::Utils::ProgressBar
93
+ end
94
+
95
+ def progress_bar_class progress_bar
96
+ @progress_bar = progress_bar
97
+ end
98
+
99
+
79
100
  end
80
101
  end
81
102
  end