data-anonymization 0.3.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. data/.gitignore +2 -1
  2. data/.rvmrc +1 -1
  3. data/.travis.yml +2 -0
  4. data/Gemfile +2 -0
  5. data/README.md +295 -258
  6. data/bin/datanon +57 -0
  7. data/data-anonymization.gemspec +2 -1
  8. data/examples/blacklist_dsl.rb +42 -0
  9. data/examples/mongodb_blacklist_dsl.rb +38 -0
  10. data/examples/mongodb_whitelist_dsl.rb +44 -0
  11. data/examples/whitelist_dsl.rb +63 -0
  12. data/lib/core/database.rb +21 -3
  13. data/lib/core/field.rb +5 -2
  14. data/lib/core/fields_missing_strategy.rb +30 -0
  15. data/lib/core/table_errors.rb +32 -0
  16. data/lib/data-anonymization.rb +11 -0
  17. data/lib/parallel/table.rb +8 -1
  18. data/lib/strategy/base.rb +35 -14
  19. data/lib/strategy/blacklist.rb +1 -1
  20. data/lib/strategy/field/anonymize_array.rb +28 -0
  21. data/lib/strategy/field/contact/random_address.rb +12 -0
  22. data/lib/strategy/field/contact/random_city.rb +12 -0
  23. data/lib/strategy/field/contact/random_phone_number.rb +4 -0
  24. data/lib/strategy/field/contact/random_province.rb +12 -0
  25. data/lib/strategy/field/contact/random_zipcode.rb +12 -0
  26. data/lib/strategy/field/datetime/anonymize_date.rb +15 -0
  27. data/lib/strategy/field/datetime/anonymize_datetime.rb +19 -0
  28. data/lib/strategy/field/datetime/anonymize_time.rb +19 -0
  29. data/lib/strategy/field/datetime/date_delta.rb +10 -0
  30. data/lib/strategy/field/datetime/date_time_delta.rb +9 -0
  31. data/lib/strategy/field/datetime/time_delta.rb +8 -0
  32. data/lib/strategy/field/default_anon.rb +4 -1
  33. data/lib/strategy/field/email/gmail_template.rb +8 -0
  34. data/lib/strategy/field/email/random_email.rb +7 -0
  35. data/lib/strategy/field/email/random_mailinator_email.rb +5 -0
  36. data/lib/strategy/field/fields.rb +4 -0
  37. data/lib/strategy/field/name/random_first_name.rb +10 -0
  38. data/lib/strategy/field/name/random_full_name.rb +10 -2
  39. data/lib/strategy/field/name/random_last_name.rb +9 -0
  40. data/lib/strategy/field/name/random_user_name.rb +5 -0
  41. data/lib/strategy/field/number/random_big_decimal_delta.rb +6 -0
  42. data/lib/strategy/field/number/random_float.rb +4 -0
  43. data/lib/strategy/field/number/random_float_delta.rb +6 -0
  44. data/lib/strategy/field/number/random_integer.rb +4 -0
  45. data/lib/strategy/field/number/random_integer_delta.rb +6 -0
  46. data/lib/strategy/field/string/formatted_string_numbers.rb +10 -6
  47. data/lib/strategy/field/string/lorem_ipsum.rb +9 -0
  48. data/lib/strategy/field/string/random_formatted_string.rb +39 -0
  49. data/lib/strategy/field/string/random_string.rb +6 -0
  50. data/lib/strategy/field/string/random_url.rb +7 -1
  51. data/lib/strategy/field/string/select_from_database.rb +7 -5
  52. data/lib/strategy/field/string/select_from_file.rb +7 -0
  53. data/lib/strategy/field/string/select_from_list.rb +8 -0
  54. data/lib/strategy/field/string/string_template.rb +11 -0
  55. data/lib/strategy/mongodb/anonymize_field.rb +44 -0
  56. data/lib/strategy/mongodb/blacklist.rb +29 -0
  57. data/lib/strategy/mongodb/whitelist.rb +62 -0
  58. data/lib/strategy/strategies.rb +10 -1
  59. data/lib/strategy/whitelist.rb +7 -2
  60. data/lib/thor/helpers/mongodb_dsl_generator.rb +66 -0
  61. data/lib/thor/helpers/rdbms_dsl_generator.rb +36 -0
  62. data/lib/thor/templates/mongodb_whitelist_template.erb +15 -0
  63. data/lib/thor/templates/whitelist_template.erb +21 -0
  64. data/lib/utils/database.rb +4 -0
  65. data/lib/utils/parallel_progress_bar.rb +24 -0
  66. data/lib/utils/progress_bar.rb +34 -22
  67. data/lib/utils/random_string.rb +3 -2
  68. data/lib/utils/random_string_chars_only.rb +3 -5
  69. data/lib/utils/template_helper.rb +44 -0
  70. data/lib/version.rb +1 -1
  71. data/spec/acceptance/mongodb_blacklist_spec.rb +75 -0
  72. data/spec/acceptance/mongodb_whitelist_spec.rb +107 -0
  73. data/spec/core/fields_missing_strategy_spec.rb +26 -0
  74. data/spec/strategy/field/name/random_first_name_spec.rb +1 -1
  75. data/spec/strategy/field/name/random_full_name_spec.rb +12 -7
  76. data/spec/strategy/field/name/random_last_name_spec.rb +1 -1
  77. data/spec/strategy/field/string/random_formatted_string_spec.rb +39 -0
  78. data/spec/strategy/field/string/select_from_file_spec.rb +21 -0
  79. data/spec/strategy/mongodb/anonymize_field_spec.rb +52 -0
  80. data/spec/utils/random_float_spec.rb +12 -0
  81. data/spec/utils/random_string_char_only_spec.rb +12 -0
  82. data/spec/utils/template_helper_spec.rb +14 -0
  83. metadata +56 -6
  84. data/blacklist_dsl.rb +0 -17
  85. data/blacklist_nosql_dsl.rb +0 -36
  86. data/whitelist_dsl.rb +0 -42
@@ -0,0 +1,36 @@
1
+ require 'thor'
2
+ require 'active_record'
3
+ require 'erb'
4
+
5
+ module DataAnon
6
+ module ThorHelpers
7
+ class RDBMSDSLGenerator
8
+
9
+ def self.source_root
10
+ File.dirname(__FILE__)
11
+ end
12
+
13
+ def generate_whitelist_script(configuration_hash)
14
+
15
+ @configuration_hash = configuration_hash
16
+ @ar_object = ActiveRecord::Base.establish_connection(@configuration_hash)
17
+
18
+ @tables = @ar_object.connection.tables
19
+
20
+ erb = ERB.new( File.new(RDBMSDSLGenerator.source_root + "/../templates/whitelist_template.erb").read, nil, '-')
21
+
22
+ File.open('rdbms_whitelist_generated.rb', 'w') do |f|
23
+ f.write erb.result(binding)
24
+ f.close
25
+ end
26
+
27
+ rescue => e
28
+ puts "\e[31mActiverecord was unable to establish a connection to the specified database. Please check the configuration options and try again.\e[0m"
29
+ puts e.backtrace
30
+ end
31
+
32
+ end
33
+ end
34
+ end
35
+
36
+
@@ -0,0 +1,15 @@
1
+ require 'data-anonymization'
2
+ require 'mongo'
3
+
4
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
5
+
6
+ database 'test' do
7
+
8
+ strategy DataAnon::Strategy::MongoDB::Whitelist
9
+ source_db <%= DataAnon::Utils::TemplateHelper.source_connection_specs_mongo @configuration_hash %>
10
+ destination_db <%= DataAnon::Utils::TemplateHelper.destination_connection_specs_mongo %>
11
+
12
+ <%= @output.join("\n") %>
13
+
14
+ end
15
+
@@ -0,0 +1,21 @@
1
+ require 'data-anonymization'
2
+
3
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
4
+
5
+ database 'Template' do
6
+
7
+ strategy DataAnon::Strategy::Whitelist
8
+ source_db <%= DataAnon::Utils::TemplateHelper.source_connection_specs_rdbms @configuration_hash %>
9
+ destination_db <%= DataAnon::Utils::TemplateHelper.destination_connection_specs_rdbms @configuration_hash %>
10
+
11
+ <% @tables.each do |table| %>
12
+ table '<%= table %>' do
13
+ primary_key '<%= @ar_object.connection.primary_key("#{table}").nil? ? "<No primary key found. Possible composite key. Please enter the value>" : @ar_object.connection.primary_key("#{table}") %>'
14
+ <%- @ar_object.connection.indexes("#{table}").each do |index| -%>
15
+ whitelist '<%= index.columns.first %>'
16
+ <%- end -%>
17
+ end
18
+ <% end %>
19
+
20
+ end
21
+
@@ -14,6 +14,10 @@ module DataAnon
14
14
  self.abstract_class = true
15
15
  end
16
16
 
17
+ class DisableReferentialIntegrityDatabase < ActiveRecord::Base
18
+ self.abstract_class = true
19
+ end
20
+
17
21
  class SourceDatabase < ActiveRecord::Base
18
22
  self.abstract_class = true
19
23
  end
@@ -0,0 +1,24 @@
1
+ require 'powerbar'
2
+
3
+ module DataAnon
4
+ module Utils
5
+
6
+ class ParallelProgressBar < ProgressBar
7
+ include Utils::Logging
8
+
9
+ def initialize table_name, total
10
+ @total = total
11
+ @table_name = table_name
12
+ end
13
+
14
+ protected
15
+
16
+ def show_progress index
17
+ suffix = started(index) ? "STARTED" : (complete(index) ? "COMPLETE" : "")
18
+ logger.info("%-30s [ %7d/%-7d ] %s" % [@table_name, index, @total, suffix])
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+ end
@@ -4,51 +4,63 @@ module DataAnon
4
4
  module Utils
5
5
 
6
6
  class ProgressBar
7
- include Utils::Logging
8
7
 
9
8
  def initialize table_name, total
10
9
  @total = total
11
10
  @table_name = table_name
12
- @progress_bar = PowerBar.new if total > 0 && show_progress && !parallel?
11
+ @power_bar = PowerBar.new if show_progress_env
12
+ apply_power_bar_settings if show_progress_env
13
13
  end
14
14
 
15
- def show_progress
16
- ENV['show_progress'] != 'false'
17
- end
18
-
19
- def parallel?
20
- ENV['parallel_execution'] == 'true'
15
+ def apply_power_bar_settings
16
+ @power_bar.settings.tty.finite.template.main = \
17
+ "${<msg>} ${<bar> }\e[0m${<rate>/s} \e[33;1m${<percent>%} " +
18
+ "\e[36;1m${<elapsed>}\e[31;1m${ ETA: <eta>}"
19
+ @power_bar.settings.tty.finite.template.padchar = "\e[0m\u2589"
20
+ @power_bar.settings.tty.finite.template.barchar = "\e[34;1m\u2589"
21
+ @power_bar.settings.tty.finite.template.exit = "\e[?25h\e[0m" # clean up after us
22
+ @power_bar.settings.tty.finite.template.close = "\e[?25h\e[0m\n" # clean up after us
23
+ @power_bar.settings.tty.finite.output = Proc.new { |s| $stderr.print s }
21
24
  end
22
25
 
23
26
  def show index
24
- if started(index) || regular_interval(index) || complete(index)
25
- if @progress_bar
26
- msg = "Table: %-15s [ %6d/%-6d ]" % [ @table_name,index,@total]
27
- @progress_bar.show(:msg => msg, :done => index, :total => @total)
28
- elsif parallel?
29
- suffix = ""
30
- suffix = "STARTED" if started(index)
31
- suffix = "COMPLETE" if complete(index)
32
- logger.info("Table: %-15s [ %6d/%-6d ] %s" % [ @table_name,index,@total, suffix])
33
- end
27
+ if show_progress? index
28
+ show_progress index
34
29
  end
35
30
  end
36
31
 
32
+ def close
33
+ @power_bar.close if @power_bar
34
+ end
35
+
36
+ protected
37
+
38
+ def show_progress? index
39
+ show_progress_env && (started(index) || regular_interval(index) || complete(index))
40
+ end
41
+
42
+ def show_progress_env
43
+ ENV['show_progress'] == "false" ? false : true
44
+ end
45
+
46
+ def show_progress counter
47
+ sleep 0.1
48
+ msg = "%-20s [%6d/%-6d]" % [@table_name, counter, @total]
49
+ @power_bar.show({:msg => msg, :done => counter, :total => @total})
50
+ end
51
+
37
52
  def complete index
38
53
  index == @total
39
54
  end
40
55
 
41
56
  def regular_interval index
42
- index % 1000 == 0
57
+ (index % 1000) == 0
43
58
  end
44
59
 
45
60
  def started index
46
61
  index == 1
47
62
  end
48
63
 
49
- def close
50
- @progress_bar.close if @progress_bar
51
- end
52
64
 
53
65
  end
54
66
 
@@ -2,9 +2,10 @@ module DataAnon
2
2
  module Utils
3
3
  class RandomString
4
4
 
5
- def self.generate length = nil
5
+ RANDOM_STRING_CHARS = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ0123456789'
6
+
7
+ def self.generate length = nil, chars = RANDOM_STRING_CHARS
6
8
  length ||= Random.new.rand 5...15
7
- chars = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ23456789'
8
9
  random_string = ''
9
10
  length.times { random_string << chars[rand(chars.size)] }
10
11
  random_string
@@ -2,12 +2,10 @@ module DataAnon
2
2
  module Utils
3
3
  class RandomStringCharsOnly
4
4
 
5
+ CHARS = 'abcdefghjkmnpqrstuvwxyzABCDEFGHJKLMNPQRSTUVWXYZ'
6
+
5
7
  def self.generate length = nil
6
- length ||= Random.new.rand 5...15
7
- chars = 'abcdefghjkmnpqrstuvwxyz'
8
- random_string = ''
9
- length.times { random_string << chars[rand(chars.size)] }
10
- random_string
8
+ RandomString.generate length, CHARS
11
9
  end
12
10
  end
13
11
  end
@@ -0,0 +1,44 @@
1
+ module DataAnon
2
+ module Utils
3
+ class TemplateHelper
4
+
5
+ def self.source_connection_specs_rdbms config_hash
6
+
7
+ config_hash.keys.reject{|key| config_hash[key].nil? }.collect { |key|
8
+ if ((config_hash[key].class.to_s.downcase == "string"))
9
+ ":#{key} => '#{config_hash[key]}'"
10
+ elsif ((config_hash[key].class.to_s.downcase == "fixnum"))
11
+ ":#{key} => #{config_hash[key]}"
12
+ end
13
+ }.join ', '
14
+
15
+ end
16
+
17
+ def self.destination_connection_specs_rdbms config_hash
18
+
19
+ config_hash.keys.collect { |key|
20
+ ":#{key} => '<enter_value>'"
21
+ }.join ', '
22
+
23
+ end
24
+
25
+ def self.source_connection_specs_mongo config_hash
26
+ ":mongodb_uri => '#{self.mongo_uri config_hash}', :database => '#{config_hash[:database]}'"
27
+ end
28
+
29
+ def self.destination_connection_specs_mongo
30
+ ":mongodb_uri => '<enter value>', :database => '<enter value>'"
31
+ end
32
+
33
+ def self.mongo_uri config_hash
34
+ if config_hash[:user].nil?
35
+ mongo_uri = "mongodb://#{config_hash[:host]}#{config_hash[:port].nil? ? "" : ":#{config_hash[:port]}"}/#{config_hash[:database]}"
36
+ else
37
+ credentials = "#{config_hash[:username]}:#{config_hash[:password]}"
38
+ mongo_uri = "mongodb://#{config_hash[:host]}#{config_hash[:port].nil? ? "" : ":#{config_hash[:port]}"}@#{credentials}/#{config_hash[:database]}"
39
+ end
40
+ mongo_uri
41
+ end
42
+ end
43
+ end
44
+ end
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module DataAnonymization
2
- VERSION = "0.3.0"
2
+ VERSION = "0.5.0"
3
3
  end
@@ -0,0 +1,75 @@
1
+ require "spec_helper"
2
+ require 'mongo'
3
+
4
+ describe "End 2 End MongoDB Blacklist Acceptance Test" do
5
+
6
+ before(:each) do
7
+ Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
8
+ users = [
9
+ {
10
+ "_id" => 1,
11
+ "user_id" => "sunitparekh",
12
+ "date_of_birth" => Time.new(2012, 7, 14, 13, 1, 0),
13
+ "email" => "parekh.sunit@gmail.com",
14
+ "password" => "TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@",
15
+ "failed_attempts" => 0,
16
+ "first_name" => "Sunit",
17
+ "last_name" => "Parekh",
18
+ "password_reset_answer" => "manza",
19
+ "password_reset_question" => "My new car modal?",
20
+ "updated_at" => Time.new(2012, 8, 15, 13, 1, 0)
21
+ },
22
+ {
23
+ "_id" => 2,
24
+ "user_id" => "anandagrawal",
25
+ "date_of_birth" => Time.new(2011, 8, 11, 13, 1, 0),
26
+ "email" => "anandagrawal84@gmail.com",
27
+ "password" => "Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum",
28
+ "failed_attempts" => 0,
29
+ "first_name" => "Anand",
30
+ "last_name" => "Agrawal",
31
+ "password_reset_answer" => "android",
32
+ "password_reset_question" => "My phone?",
33
+ "updated_at" => Time.new(2012, 2, 11, 13, 1, 0)
34
+ }
35
+ ]
36
+ users_coll = Mongo::Connection.from_uri("mongodb://localhost/dest",{:safe => true})['test']['users']
37
+ users.each { |p| users_coll.save p }
38
+ end
39
+
40
+ it "should anonymize plans collection" do
41
+
42
+ database 'test' do
43
+ strategy DataAnon::Strategy::MongoDB::Blacklist
44
+ source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test', :options => {:safe => true}
45
+
46
+ collection 'users' do
47
+ anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
48
+ anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
49
+ anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
50
+ anonymize('password') { |field| "password" }
51
+ anonymize('first_name').using FieldStrategy::RandomFirstName.new
52
+ anonymize('last_name').using FieldStrategy::RandomLastName.new
53
+ end
54
+
55
+ end
56
+
57
+ users_coll = Mongo::Connection.from_uri("mongodb://localhost/test")['test']['users']
58
+ users_coll.count.should be 2
59
+ user = users_coll.find_one({'_id' => 1})
60
+
61
+ user['_id'].should == 1
62
+ user['user_id'].should == "user-1"
63
+ user['date_of_birth'].should_not == Time.new(2012, 7, 14, 13, 1, 0)
64
+ user['email'].should_not == "parekh.sunit@gmail.com"
65
+ user['password'].should == "password"
66
+ user['failed_attempts'].should == 0
67
+ user['first_name'].should_not be "Sunit"
68
+ user['last_name'].should_not be "Parekh"
69
+ user['password_reset_answer'].should == "manza"
70
+ user['password_reset_question'].should == "My new car modal?"
71
+ user['updated_at'].should == Time.new(2012, 8, 15, 13, 1, 0)
72
+
73
+
74
+ end
75
+ end
@@ -0,0 +1,107 @@
1
+ require "spec_helper"
2
+ require 'mongo'
3
+
4
+ describe "End 2 End MongoDB Whitelist Acceptance Test" do
5
+
6
+ before(:each) do
7
+ Mongo::Connection.from_uri("mongodb://localhost/test").drop_database('test')
8
+ Mongo::Connection.from_uri("mongodb://localhost/dest").drop_database('dest')
9
+ plans = [
10
+ {
11
+ "_id" => 1,
12
+ "name" => "Free",
13
+ "nick_names" => ["Name1","Name2"],
14
+ "features" => [
15
+ {
16
+ "max_storage" => 21474836480,
17
+ "type" => "AmazonS3",
18
+ "users" => {"max" => 1, "additional" => false}
19
+ },
20
+ {
21
+ "max_storage" => 21474836480,
22
+ "type" => "DropBox",
23
+ "users" => {"max" => 1, "additional" => false}
24
+ }
25
+ ],
26
+ "term" => "month",
27
+ "public_sharing" => false,
28
+ "photo_sharing" => true,
29
+ "created_at" => Time.new(2012, 6, 21, 13, 30, 0)
30
+ },
31
+ {
32
+ "_id" => 2,
33
+ "name" => "Team",
34
+ "plan_aliases" => ["Business", "Paid"],
35
+ "features" => [
36
+ {
37
+ "max_storage" => 53687091200,
38
+ "type" => "AmazonS3",
39
+ "users" => {"max" => 5, "additional" => true}
40
+ },
41
+ {
42
+ "max_storage" => 53687091200,
43
+ "type" => "DropBox",
44
+ "users" => {"max" => 5, "additional" => true}
45
+ }
46
+ ],
47
+ "term" => "month",
48
+ "public_sharing" => true,
49
+ "photo_sharing" => true,
50
+ "created_at" => Time.new(2012, 8, 11, 13, 1, 0)
51
+ }
52
+ ]
53
+ plans_coll = Mongo::Connection.from_uri("mongodb://localhost/dest")['test']['plans']
54
+ plans.each { |p| plans_coll.save p }
55
+ end
56
+
57
+ it "should anonymize plans collection" do
58
+
59
+ database 'test' do
60
+ strategy DataAnon::Strategy::MongoDB::Whitelist
61
+ source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
62
+ destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest', :options => {:safe => true}
63
+
64
+ collection 'plans' do
65
+ whitelist '_id', 'name', 'term', 'created_at'
66
+ anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free", "Team", "Business", "Paid"])
67
+ anonymize 'public_sharing', 'photo_sharing'
68
+
69
+ collection 'features' do
70
+ anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240, 21474836480, 53687091200])
71
+ whitelist 'type'
72
+
73
+ document 'users' do
74
+ anonymize 'max', 'additional'
75
+ end
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ plans_coll = Mongo::Connection.from_uri("mongodb://localhost/dest")['dest']['plans']
82
+ plans_coll.count.should be 2
83
+ plan = plans_coll.find_one({ '_id' => 1})
84
+
85
+ plan['_id'].should == 1
86
+ plan['name'].should == "Free"
87
+ plan['nick_names'][0].should_not == "Name1"
88
+ plan['nick_names'][1].should_not == "Name2"
89
+ plan['term'].should == "month"
90
+ plan['created_at'].should == Time.new(2012, 6, 21, 13, 30, 0)
91
+ plan['plan_aliases'].should be_nil
92
+ [true,false].should include(plan['public_sharing'])
93
+ [true,false].should include(plan['photo_sharing'])
94
+ plan['features'].length.should == 2
95
+ feature1 = plan['features'][0]
96
+ [10737418240, 21474836480, 53687091200].should include(feature1['max_storage'])
97
+ feature1['type'].should == "AmazonS3"
98
+ feature1['users']['max'].should be_kind_of(Fixnum)
99
+ [true,false].should include(feature1['users']['additional'])
100
+
101
+
102
+ plan = plans_coll.find_one({ '_id' => 2})
103
+ plan['plan_aliases'].length.should == 2
104
+ ["Free", "Team", "Business", "Paid"].should include(plan['plan_aliases'][0])
105
+ ["Free", "Team", "Business", "Paid"].should include(plan['plan_aliases'][1])
106
+ end
107
+ end