data-anonymization 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.gitignore +2 -1
  2. data/.rvmrc +1 -1
  3. data/.travis.yml +2 -0
  4. data/Gemfile +2 -0
  5. data/README.md +295 -258
  6. data/bin/datanon +57 -0
  7. data/data-anonymization.gemspec +2 -1
  8. data/examples/blacklist_dsl.rb +42 -0
  9. data/examples/mongodb_blacklist_dsl.rb +38 -0
  10. data/examples/mongodb_whitelist_dsl.rb +44 -0
  11. data/examples/whitelist_dsl.rb +63 -0
  12. data/lib/core/database.rb +21 -3
  13. data/lib/core/field.rb +5 -2
  14. data/lib/core/fields_missing_strategy.rb +30 -0
  15. data/lib/core/table_errors.rb +32 -0
  16. data/lib/data-anonymization.rb +11 -0
  17. data/lib/parallel/table.rb +8 -1
  18. data/lib/strategy/base.rb +35 -14
  19. data/lib/strategy/blacklist.rb +1 -1
  20. data/lib/strategy/field/anonymize_array.rb +28 -0
  21. data/lib/strategy/field/contact/random_address.rb +12 -0
  22. data/lib/strategy/field/contact/random_city.rb +12 -0
  23. data/lib/strategy/field/contact/random_phone_number.rb +4 -0
  24. data/lib/strategy/field/contact/random_province.rb +12 -0
  25. data/lib/strategy/field/contact/random_zipcode.rb +12 -0
  26. data/lib/strategy/field/datetime/anonymize_date.rb +15 -0
  27. data/lib/strategy/field/datetime/anonymize_datetime.rb +19 -0
  28. data/lib/strategy/field/datetime/anonymize_time.rb +19 -0
  29. data/lib/strategy/field/datetime/date_delta.rb +10 -0
  30. data/lib/strategy/field/datetime/date_time_delta.rb +9 -0
  31. data/lib/strategy/field/datetime/time_delta.rb +8 -0
  32. data/lib/strategy/field/default_anon.rb +4 -1
  33. data/lib/strategy/field/email/gmail_template.rb +8 -0
  34. data/lib/strategy/field/email/random_email.rb +7 -0
  35. data/lib/strategy/field/email/random_mailinator_email.rb +5 -0
  36. data/lib/strategy/field/fields.rb +4 -0
  37. data/lib/strategy/field/name/random_first_name.rb +10 -0
  38. data/lib/strategy/field/name/random_full_name.rb +10 -2
  39. data/lib/strategy/field/name/random_last_name.rb +9 -0
  40. data/lib/strategy/field/name/random_user_name.rb +5 -0
  41. data/lib/strategy/field/number/random_big_decimal_delta.rb +6 -0
  42. data/lib/strategy/field/number/random_float.rb +4 -0
  43. data/lib/strategy/field/number/random_float_delta.rb +6 -0
  44. data/lib/strategy/field/number/random_integer.rb +4 -0
  45. data/lib/strategy/field/number/random_integer_delta.rb +6 -0
  46. data/lib/strategy/field/string/formatted_string_numbers.rb +10 -6
  47. data/lib/strategy/field/string/lorem_ipsum.rb +9 -0
  48. data/lib/strategy/field/string/random_formatted_string.rb +39 -0
  49. data/lib/strategy/field/string/random_string.rb +6 -0
  50. data/lib/strategy/field/string/random_url.rb +7 -1
  51. data/lib/strategy/field/string/select_from_database.rb +7 -5
  52. data/lib/strategy/field/string/select_from_file.rb +7 -0
  53. data/lib/strategy/field/string/select_from_list.rb +8 -0
  54. data/lib/strategy/field/string/string_template.rb +11 -0
  55. data/lib/strategy/mongodb/anonymize_field.rb +44 -0
  56. data/lib/strategy/mongodb/blacklist.rb +29 -0
  57. data/lib/strategy/mongodb/whitelist.rb +62 -0
  58. data/lib/strategy/strategies.rb +10 -1
  59. data/lib/strategy/whitelist.rb +7 -2
  60. data/lib/thor/helpers/mongodb_dsl_generator.rb +66 -0
  61. data/lib/thor/helpers/rdbms_dsl_generator.rb +36 -0
  62. data/lib/thor/templates/mongodb_whitelist_template.erb +15 -0
  63. data/lib/thor/templates/whitelist_template.erb +21 -0
  64. data/lib/utils/database.rb +4 -0
  65. data/lib/utils/parallel_progress_bar.rb +24 -0
  66. data/lib/utils/progress_bar.rb +34 -22
  67. data/lib/utils/random_string.rb +3 -2
  68. data/lib/utils/random_string_chars_only.rb +3 -5
  69. data/lib/utils/template_helper.rb +44 -0
  70. data/lib/version.rb +1 -1
  71. data/spec/acceptance/mongodb_blacklist_spec.rb +75 -0
  72. data/spec/acceptance/mongodb_whitelist_spec.rb +107 -0
  73. data/spec/core/fields_missing_strategy_spec.rb +26 -0
  74. data/spec/strategy/field/name/random_first_name_spec.rb +1 -1
  75. data/spec/strategy/field/name/random_full_name_spec.rb +12 -7
  76. data/spec/strategy/field/name/random_last_name_spec.rb +1 -1
  77. data/spec/strategy/field/string/random_formatted_string_spec.rb +39 -0
  78. data/spec/strategy/field/string/select_from_file_spec.rb +21 -0
  79. data/spec/strategy/mongodb/anonymize_field_spec.rb +52 -0
  80. data/spec/utils/random_float_spec.rb +12 -0
  81. data/spec/utils/random_string_char_only_spec.rb +12 -0
  82. data/spec/utils/template_helper_spec.rb +14 -0
  83. metadata +56 -6
  84. data/blacklist_dsl.rb +0 -17
  85. data/blacklist_nosql_dsl.rb +0 -36
  86. data/whitelist_dsl.rb +0 -42
@@ -0,0 +1,36 @@
1
+ require 'thor'
2
+ require 'active_record'
3
+ require 'erb'
4
+
5
+ module DataAnon
6
+ module ThorHelpers
7
+ class RDBMSDSLGenerator
8
+
9
+ def self.source_root
10
+ File.dirname(__FILE__)
11
+ end
12
+
13
+ def generate_whitelist_script(configuration_hash)
14
+
15
+ @configuration_hash = configuration_hash
16
+ @ar_object = ActiveRecord::Base.establish_connection(@configuration_hash)
17
+
18
+ @tables = @ar_object.connection.tables
19
+
20
+ erb = ERB.new( File.new(RDBMSDSLGenerator.source_root + "/../templates/whitelist_template.erb").read, nil, '-')
21
+
22
+ File.open('rdbms_whitelist_generated.rb', 'w') do |f|
23
+ f.write erb.result(binding)
24
+ f.close
25
+ end
26
+
27
+ rescue => e
28
+ puts "\e[31mActiverecord was unable to establish a connection to the specified database. Please check the configuration options and try again.\e[0m"
29
+ puts e.backtrace
30
+ end
31
+
32
+ end
33
+ end
34
+ end
35
+
36
+
@@ -0,0 +1,15 @@
1
+ require 'data-anonymization'
2
+ require 'mongo'
3
+
4
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
5
+
6
+ database 'test' do
7
+
8
+ strategy DataAnon::Strategy::MongoDB::Whitelist
9
+ source_db <%= DataAnon::Utils::TemplateHelper.source_connection_specs_mongo @configuration_hash %>
10
+ destination_db <%= DataAnon::Utils::TemplateHelper.destination_connection_specs_mongo %>
11
+
12
+ <%= @output.join("\n") %>
13
+
14
+ end
15
+
@@ -0,0 +1,21 @@
1
+ require 'data-anonymization'
2
+
3
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
4
+
5
+ database 'Template' do
6
+
7
+ strategy DataAnon::Strategy::Whitelist
8
+ source_db <%= DataAnon::Utils::TemplateHelper.source_connection_specs_rdbms @configuration_hash %>
9
+ destination_db <%= DataAnon::Utils::TemplateHelper.destination_connection_specs_rdbms @configuration_hash %>
10
+
11
+ <% @tables.each do |table| %>
12
+ table '<%= table %>' do
13
+ primary_key '<%= @ar_object.connection.primary_key("#{table}").nil? ? "<No primary key found. Possible composite key. Please enter the value>" : @ar_object.connection.primary_key("#{table}") %>'
14
+ <%- @ar_object.connection.indexes("#{table}").each do |index| -%>
15
+ whitelist '<%= index.columns.first %>'
16
+ <%- end -%>
17
+ end
18
+ <% end %>
19
+
20
+ end
21
+
@@ -14,6 +14,10 @@ module DataAnon
14
14
  self.abstract_class = true
15
15
  end
16
16
 
17
+ class DisableReferentialIntegrityDatabase < ActiveRecord::Base
18
+ self.abstract_class = true
19
+ end
20
+
17
21
  class SourceDatabase < ActiveRecord::Base
18
22
  self.abstract_class = true
19
23
  end
@@ -0,0 +1,24 @@
1
+ require 'powerbar'
2
+
3
+ module DataAnon
4
+ module Utils
5
+
6
+ class ParallelProgressBar < ProgressBar
7
+ include Utils::Logging
8
+
9
+ def initialize table_name, total
10
+ @total = total
11
+ @table_name = table_name
12
+ end
13
+
14
+ protected
15
+
16
+ def show_progress index
17
+ suffix = started(index) ? "STARTED" : (complete(index) ? "COMPLETE" : "")
18
+ logger.info("%-30s [ %7d/%-7d ] %s" % [@table_name, index, @total, suffix])
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+ end
@@ -4,51 +4,63 @@ module DataAnon
4
4
  module Utils
5
5
 
6
6
  class ProgressBar
7
- include Utils::Logging
8
7
 
9
8
  def initialize table_name, total
10
9
  @total = total
11
10
  @table_name = table_name
12
- @progress_bar = PowerBar.new if total > 0 && show_progress && !parallel?
11
+ @power_bar = PowerBar.new if show_progress_env
12
+ apply_power_bar_settings if show_progress_env
13
13
  end
14
14
 
15
- def show_progress
16
- ENV['show_progress'] != 'false'
17
- end
18
-
19
- def parallel?
20
- ENV['parallel_execution'] == 'true'
15
+ def apply_power_bar_settings
16
+ @power_bar.settings.tty.finite.template.main = \
17
+ "${<msg>} ${<bar> }\e[0m${<rate>/s} \e[33;1m${<percent>%} " +
18
+ "\e[36;1m${<elapsed>}\e[31;1m${ ETA: <eta>}"
19
+ @power_bar.settings.tty.finite.template.padchar = "\e[0m\u2589"
20
+ @power_bar.settings.tty.finite.template.barchar = "\e[34;1m\u2589"
21
+ @power_bar.settings.tty.finite.template.exit = "\e[?25h\e[0m" # clean up after us
22
+ @power_bar.settings.tty.finite.template.close = "\e[?25h\e[0m\n" # clean up after us
23
+ @power_bar.settings.tty.finite.output = Proc.new { |s| $stderr.print s }
21
24
  end
22
25
 
23
26
  def show index
24
- if started(index) || regular_interval(index) || complete(index)
25
- if @progress_bar
26
- msg = "Table: %-15s [ %6d/%-6d ]" % [ @table_name,index,@total]
27
- @progress_bar.show(:msg => msg, :done => index, :total => @total)
28
- elsif parallel?
29
- suffix = ""
30
- suffix = "STARTED" if started(index)
31
- suffix = "COMPLETE" if complete(index)
32
- logger.info("Table: %-15s [ %6d/%-6d ] %s" % [ @table_name,index,@total, suffix])
33
- end
27
+ if show_progress? index
28
+ show_progress index
34
29
  end
35
30
  end
36
31
 
32
+ def close
33
+ @power_bar.close if @power_bar
34
+ end
35
+
36
+ protected
37
+
38
+ def show_progress? index
39
+ show_progress_env && (started(index) || regular_interval(index) || complete(index))
40
+ end
41
+
42
+ def show_progress_env
43
+ ENV['show_progress'] == "false" ? false : true
44
+ end
45
+
46
+ def show_progress counter
47
+ sleep 0.1
48
+ msg = "%-20s [%6d/%-6d]" % [@table_name, counter, @total]
49
+ @power_bar.show({:msg => msg, :done => counter, :total => @total})
50
+ end
51
+
37
52
  def complete index
38
53
  index == @total
39
54
  end
40
55
 
41
56
  def regular_interval index
42
- index % 1000 == 0
57
+ (index % 1000) == 0
43
58
  end
44
59
 
45
60
  def started index
46
61
  index == 1
47
62
  end
48
63
 
49
- def close
50
- @progress_bar.close if @progress_bar
51
- end
52
64
 
53
65
  end
54
66
 
@@ -2,9 +2,10 @@ module DataAnon
2
2
  module Utils
3
3
  class RandomString
4
4
 
5
- def self.generate length = nil
5
+ RANDOM_STRING_CHARS = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ0123456789'
6
+
7
+ def self.generate length = nil, chars = RANDOM_STRING_CHARS
6
8
  length ||= Random.new.rand 5...15
7
- chars = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
8
9
  random_string = ''
9
10
  length.times { random_string << chars[rand(chars.size)] }
10
11
  random_string
@@ -2,12 +2,10 @@ module DataAnon
2
2
  module Utils
3
3
  class RandomStringCharsOnly
4
4
 
5
+ CHARS = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
6
+
5
7
  def self.generate length = nil
6
- length ||= Random.new.rand 5...15
7
- chars = 'abcdefghjkmnpqrstuvwxyz'
8
- random_string = ''
9
- length.times { random_string << chars[rand(chars.size)] }
10
- random_string
8
+ RandomString.generate length, CHARS
11
9
  end
12
10
  end
13
11
  end
@@ -0,0 +1,44 @@
1
+ module DataAnon
2
+ module Utils
3
+ class TemplateHelper
4
+
5
+ def self.source_connection_specs_rdbms config_hash
6
+
7
+ config_hash.keys.reject{|key| config_hash[key].nil? }.collect { |key|
8
+ if ((config_hash[key].class.to_s.downcase == "string"))
9
+ ":#{key} => '#{config_hash[key]}'"
10
+ elsif ((config_hash[key].class.to_s.downcase == "fixnum"))
11
+ ":#{key} => #{config_hash[key]}"
12
+ end
13
+ }.join ', '
14
+
15
+ end
16
+
17
+ def self.destination_connection_specs_rdbms config_hash
18
+
19
+ config_hash.keys.collect { |key|
20
+ ":#{key} => '<enter_value>'"
21
+ }.join ', '
22
+
23
+ end
24
+
25
+ def self.source_connection_specs_mongo config_hash
26
+ ":mongodb_uri => '#{self.mongo_uri config_hash}', :database => '#{config_hash[:database]}'"
27
+ end
28
+
29
+ def self.destination_connection_specs_mongo
30
+ ":mongodb_uri => '<enter value>', :database => '<enter value>'"
31
+ end
32
+
33
+ def self.mongo_uri config_hash
34
+ if config_hash[:user].nil?
35
+ mongo_uri = "mongodb://#{config_hash[:host]}#{config_hash[:port].nil? ? "" : ":#{config_hash[:port]}"}/#{config_hash[:database]}"
36
+ else
37
+ credentials = "#{config_hash[:username]}:#{config_hash[:password]}"
38
+ mongo_uri = "mongodb://#{config_hash[:host]}#{config_hash[:port].nil? ? "" : ":#{config_hash[:port]}"}@#{credentials}/#{config_hash[:database]}"
39
+ end
40
+ mongo_uri
41
+ end
42
+ end
43
+ end
44
+ end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module DataAnonymization
2
- VERSION = "0.3.0"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -0,0 +1,75 @@
1
+ require "spec_helper"
2
+ require 'mongo'
3
+
4
+ describe "End 2 End MongoDB Blacklist Acceptance Test" do
5
+
6
+ before(:each) do
7
+ Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
8
+ users = [
9
+ {
10
+ "_id" => 1,
11
+ "user_id" => "sunitparekh",
12
+ "date_of_birth" => Time.new(2012, 7, 14, 13, 1, 0),
13
+ "email" => "parekh.sunit@gmail.com",
14
+ "password" => "TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@",
15
+ "failed_attempts" => 0,
16
+ "first_name" => "Sunit",
17
+ "last_name" => "Parekh",
18
+ "password_reset_answer" => "manza",
19
+ "password_reset_question" => "My new car modal?",
20
+ "updated_at" => Time.new(2012, 8, 15, 13, 1, 0)
21
+ },
22
+ {
23
+ "_id" => 2,
24
+ "user_id" => "anandagrawal",
25
+ "date_of_birth" => Time.new(2011, 8, 11, 13, 1, 0),
26
+ "email" => "anandagrawal84@gmail.com",
27
+ "password" => "Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum",
28
+ "failed_attempts" => 0,
29
+ "first_name" => "Anand",
30
+ "last_name" => "Agrawal",
31
+ "password_reset_answer" => "android",
32
+ "password_reset_question" => "My phone?",
33
+ "updated_at" => Time.new(2012, 2, 11, 13, 1, 0)
34
+ }
35
+ ]
36
+ users_coll = Mongo::Connection.from_uri("mongodb://localhost/dest",{:safe => true})['test']['users']
37
+ users.each { |p| users_coll.save p }
38
+ end
39
+
40
+ it "should anonymize plans collection" do
41
+
42
+ database 'test' do
43
+ strategy DataAnon::Strategy::MongoDB::Blacklist
44
+ source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test', :options => {:safe => true}
45
+
46
+ collection 'users' do
47
+ anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
48
+ anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
49
+ anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
50
+ anonymize('password') { |field| "password" }
51
+ anonymize('first_name').using FieldStrategy::RandomFirstName.new
52
+ anonymize('last_name').using FieldStrategy::RandomLastName.new
53
+ end
54
+
55
+ end
56
+
57
+ users_coll = Mongo::Connection.from_uri("mongodb://localhost/test")['test']['users']
58
+ users_coll.count.should be 2
59
+ user = users_coll.find_one({'_id' => 1})
60
+
61
+ user['_id'].should == 1
62
+ user['user_id'].should == "user-1"
63
+ user['date_of_birth'].should_not == Time.new(2012, 7, 14, 13, 1, 0)
64
+ user['email'].should_not == "parekh.sunit@gmail.com"
65
+ user['password'].should == "password"
66
+ user['failed_attempts'].should == 0
67
+ user['first_name'].should_not be "Sunit"
68
+ user['last_name'].should_not be "Parekh"
69
+ user['password_reset_answer'].should == "manza"
70
+ user['password_reset_question'].should == "My new car modal?"
71
+ user['updated_at'].should == Time.new(2012, 8, 15, 13, 1, 0)
72
+
73
+
74
+ end
75
+ end
@@ -0,0 +1,107 @@
1
+ require "spec_helper"
2
+ require 'mongo'
3
+
4
+ describe "End 2 End MongoDB Whitelist Acceptance Test" do
5
+
6
+ before(:each) do
7
+ Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
8
+ Mongo::Connection.from_uri("mongodb://localhost/dest").drop_database('dest')
9
+ plans = [
10
+ {
11
+ "_id" => 1,
12
+ "name" => "Free",
13
+ "nick_names" => ["Name1","Name2"],
14
+ "features" => [
15
+ {
16
+ "max_storage" => 21474836480,
17
+ "type" => "AmazonS3",
18
+ "users" => {"max" => 1, "additional" => false}
19
+ },
20
+ {
21
+ "max_storage" => 21474836480,
22
+ "type" => "DropBox",
23
+ "users" => {"max" => 1, "additional" => false}
24
+ }
25
+ ],
26
+ "term" => "month",
27
+ "public_sharing" => false,
28
+ "photo_sharing" => true,
29
+ "created_at" => Time.new(2012, 6, 21, 13, 30, 0)
30
+ },
31
+ {
32
+ "_id" => 2,
33
+ "name" => "Team",
34
+ "plan_aliases" => ["Business", "Paid"],
35
+ "features" => [
36
+ {
37
+ "max_storage" => 53687091200,
38
+ "type" => "AmazonS3",
39
+ "users" => {"max" => 5, "additional" => true}
40
+ },
41
+ {
42
+ "max_storage" => 53687091200,
43
+ "type" => "DropBox",
44
+ "users" => {"max" => 5, "additional" => true}
45
+ }
46
+ ],
47
+ "term" => "month",
48
+ "public_sharing" => true,
49
+ "photo_sharing" => true,
50
+ "created_at" => Time.new(2012, 8, 11, 13, 1, 0)
51
+ }
52
+ ]
53
+ plans_coll = Mongo::Connection.from_uri("mongodb://localhost/dest")['test']['plans']
54
+ plans.each { |p| plans_coll.save p }
55
+ end
56
+
57
+ it "should anonymize plans collection" do
58
+
59
+ database 'test' do
60
+ strategy DataAnon::Strategy::MongoDB::Whitelist
61
+ source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
62
+ destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest', :options => {:safe => true}
63
+
64
+ collection 'plans' do
65
+ whitelist '_id', 'name', 'term', 'created_at'
66
+ anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free", "Team", "Business", "Paid"])
67
+ anonymize 'public_sharing', 'photo_sharing'
68
+
69
+ collection 'features' do
70
+ anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240, 21474836480, 53687091200])
71
+ whitelist 'type'
72
+
73
+ document 'users' do
74
+ anonymize 'max', 'additional'
75
+ end
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ plans_coll = Mongo::Connection.from_uri("mongodb://localhost/dest")['dest']['plans']
82
+ plans_coll.count.should be 2
83
+ plan = plans_coll.find_one({ '_id' => 1})
84
+
85
+ plan['_id'].should == 1
86
+ plan['name'].should == "Free"
87
+ plan['nick_names'][0].should_not == "Name1"
88
+ plan['nick_names'][1].should_not == "Name2"
89
+ plan['term'].should == "month"
90
+ plan['created_at'].should == Time.new(2012, 6, 21, 13, 30, 0)
91
+ plan['plan_aliases'].should be_nil
92
+ [true,false].should include(plan['public_sharing'])
93
+ [true,false].should include(plan['photo_sharing'])
94
+ plan['features'].length.should == 2
95
+ feature1 = plan['features'][0]
96
+ [10737418240, 21474836480, 53687091200].should include(feature1['max_storage'])
97
+ feature1['type'].should == "AmazonS3"
98
+ feature1['users']['max'].should be_kind_of(Fixnum)
99
+ [true,false].should include(feature1['users']['additional'])
100
+
101
+
102
+ plan = plans_coll.find_one({ '_id' => 2})
103
+ plan['plan_aliases'].length.should == 2
104
+ ["Free", "Team", "Business", "Paid"].should include(plan['plan_aliases'][0])
105
+ ["Free", "Team", "Business", "Paid"].should include(plan['plan_aliases'][1])
106
+ end
107
+ end