data-anonymization 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1eed8d87b9e7060ee7d3813c77b52b838f789d92
4
- data.tar.gz: bc475f130cf331debd4cbc835ab5391aa38c3129
3
+ metadata.gz: 6455e294c22b99a0181092f93c506060c5c9b383
4
+ data.tar.gz: 4da392d7b4712d941b9cab8206d178bae89eb288
5
5
  SHA512:
6
- metadata.gz: e8e6e315a8468c1208c3fd65cf3ea575bbce766290cbbfc9fe323a329a438a02abe63d3751dabda4dd7a1e39a147f0d75aaf88b95beb4c6f09cef7969730b09a
7
- data.tar.gz: aef7a9d30b0d1868435445f91a1cbe6412c8628eabcaffa2e51ea74fa721d1a2f3a53aa54295aabcfee229ae7b9333065ab119056c40946dca39ffc78a520ed7
6
+ metadata.gz: 5b0da43a74bf21505f6e462ac73c17fff694f24082478f29fee83c4a9746e2703cc6497fb0e333b811992a806fdec5cb8b3bbcabefe667548577809445c86148
7
+ data.tar.gz: ba546b36a7681cdafb7cbcb0d98025ee357194e3b1ab5eedb4825fa484a47d8b50e32d0890ef7e2c0cb9f9327bd066fd76986280d8f5955b963827fbdd44c991
data/.gitignore CHANGED
@@ -18,4 +18,5 @@ tmp
18
18
  .idea
19
19
  sample-data/chinook-empty.sqlite
20
20
  tmp
21
- examples/mongodb_whitelist_generated.rb
21
+ examples/mongodb_whitelist_generated.rb
22
+ data
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- ruby-2.3.1
1
+ ruby-2.4.1
data/.travis.yml CHANGED
@@ -1,8 +1,9 @@
1
1
  language: ruby
2
2
  services:
3
3
  - mongodb
4
- before_install: gem install bundler -v 1.12.5
4
+ before_install: gem install bundler -v 1.15.3
5
5
  before_script: rake empty_dest
6
6
  rvm:
7
- - 2.2.5
8
- - 2.3.1
7
+ - 2.2.6
8
+ - 2.3.3
9
+ - 2.4.1
data/README.md CHANGED
@@ -70,6 +70,10 @@ Postgresql database having **composite primary key**
70
70
 
71
71
  ## Changelog
72
72
 
73
+ #### 0.8.1 (Aug 19, 2017)
74
+ 1. Multi-threading support added by [stanislav-tyutin](https://github.com/stanislav-tyutin) using Pull Request.
75
+ 2. Fixed to work with Ruby 2.4.x, issue with Integer data type
76
+
73
77
  #### 0.8.0 (Oct 31, 2016)
74
78
  1. Upgraded to rails 5.x
75
79
 
@@ -25,6 +25,6 @@ Gem::Specification.new do |gem|
25
25
  gem.add_dependency('rgeo', '~> 0.5')
26
26
  gem.add_dependency('rgeo-geojson', '~> 0.4')
27
27
  gem.add_dependency('powerbar', '~> 1.0')
28
- gem.add_dependency('parallel', '~> 1.9')
29
- gem.add_dependency('thor', '~> 0.19')
28
+ gem.add_dependency('parallel', '~> 1.12')
29
+ gem.add_dependency('thor', '~> 0.20')
30
30
  end
@@ -1,4 +1,4 @@
1
- system "bundle exec ruby examples/whitelist_dsl.rb"
1
+ system 'bundle exec ruby examples/whitelist_dsl.rb'
2
2
 
3
3
  require 'data-anonymization'
4
4
 
@@ -18,7 +18,7 @@ database 'Chinook' do
18
18
  anonymize('City').using FieldStrategy::RandomCity.region_US
19
19
  anonymize('State').using FieldStrategy::RandomProvince.region_US
20
20
  anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
21
- anonymize('Country') {|field| "USA" }
21
+ anonymize('Country') {|field| 'USA'}
22
22
  anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
23
23
  anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
24
24
  anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
@@ -33,7 +33,7 @@ database 'Chinook' do
33
33
  anonymize('City').using FieldStrategy::RandomCity.region_US
34
34
  anonymize('State').using FieldStrategy::RandomProvince.region_US
35
35
  anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
36
- anonymize('Country') {|field| "USA" }
36
+ anonymize('Country') {|field| 'USA'}
37
37
  anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
38
38
  anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
39
39
  end
@@ -4,20 +4,20 @@ require 'mongo'
4
4
  DataAnon::Utils::Logging.logger.level = Logger::INFO
5
5
  Mongo::Logger.logger.level = Logger::WARN
6
6
 
7
- Mongo::Client.new("mongodb://localhost/test").database.drop
8
- system "mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
9
- system "mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
7
+ Mongo::Client.new('mongodb://localhost/test').database.drop
8
+ system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
9
+ system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
10
10
 
11
11
 
12
12
  database 'test' do
13
13
  strategy DataAnon::Strategy::MongoDB::Blacklist
14
- source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
14
+ source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
15
15
 
16
16
  collection 'users' do
17
17
  anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
18
18
  anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
19
19
  anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
20
- anonymize('password') { |field| "password" }
20
+ anonymize('password') { |field| 'password'}
21
21
  anonymize('first_name').using FieldStrategy::RandomFirstName.new
22
22
  anonymize('last_name').using FieldStrategy::RandomLastName.new
23
23
  end
@@ -5,22 +5,22 @@ DataAnon::Utils::Logging.logger.level = Logger::INFO
5
5
  Mongo::Logger.logger.level = Logger::WARN
6
6
 
7
7
 
8
- Mongo::Client.new("mongodb://localhost/test").database.drop
9
- Mongo::Client.new("mongodb://localhost/dest").database.drop
10
- system "mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
11
- system "mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
8
+ Mongo::Client.new('mongodb://localhost/test').database.drop
9
+ Mongo::Client.new('mongodb://localhost/dest').database.drop
10
+ system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
11
+ system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
12
12
 
13
13
  database 'test' do
14
14
  strategy DataAnon::Strategy::MongoDB::Whitelist
15
- source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
16
- destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest'
15
+ source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
16
+ destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest'
17
17
 
18
18
  collection 'users' do
19
19
  whitelist '_id','failed_attempts','updated_at'
20
20
  anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
21
21
  anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
22
22
  anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
23
- anonymize('password') { |field| "password" }
23
+ anonymize('password') { |field| 'password'}
24
24
  anonymize('first_name').using FieldStrategy::RandomFirstName.new
25
25
  anonymize('last_name').using FieldStrategy::RandomLastName.new
26
26
  anonymize 'password_reset_answer','password_reset_question'
@@ -1,4 +1,4 @@
1
- system "rake empty_dest" # clean destination database on every call
1
+ system 'rake empty_dest' # clean destination database on every call
2
2
 
3
3
  require 'data-anonymization'
4
4
 
@@ -15,7 +15,7 @@ database 'Chinook' do
15
15
  primary_key 'GenreId'
16
16
  whitelist 'GenreId'
17
17
  anonymize 'Name' do |field|
18
- field.value + " test"
18
+ field.value + ' test'
19
19
  end
20
20
  end
21
21
 
@@ -39,7 +39,7 @@ database 'Chinook' do
39
39
  anonymize('City').using FieldStrategy::RandomCity.region_US
40
40
  anonymize('State').using FieldStrategy::RandomProvince.region_US
41
41
  anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
42
- anonymize('Country') {|field| "USA" }
42
+ anonymize('Country') {|field| 'USA'}
43
43
  anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
44
44
  anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
45
45
  end
@@ -56,7 +56,7 @@ database 'Chinook' do
56
56
  anonymize('City').using FieldStrategy::RandomCity.region_US
57
57
  anonymize('State').using FieldStrategy::RandomProvince.region_US
58
58
  anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
59
- anonymize('Country') {|field| "USA" }
59
+ anonymize('Country') {|field| 'USA'}
60
60
  anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
61
61
  anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
62
62
  anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
@@ -0,0 +1,66 @@
1
+ system 'rake empty_dest' # clean destination database on every call
2
+
3
+ require 'data-anonymization'
4
+
5
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
6
+
7
+ database 'Chinook' do
8
+ strategy DataAnon::Strategy::Whitelist
9
+ source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
10
+ destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
11
+
12
+ default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
13
+
14
+ table 'Genre' do
15
+ primary_key 'GenreId'
16
+ whitelist 'GenreId'
17
+ anonymize 'Name' do |field|
18
+ field.value + ' test'
19
+ end
20
+ end
21
+
22
+ table 'MediaType' do
23
+ primary_key 'MediaTypeId'
24
+ anonymize('MediaTypeId') { |field| field.value } # same as whitelist
25
+ anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
26
+
27
+ end
28
+
29
+ table 'Customer' do
30
+ primary_key 'CustomerId'
31
+ thread_num 5 # thread_num
32
+
33
+ whitelist 'CustomerId', 'SupportRepId', 'Company'
34
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
35
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
36
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
37
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
38
+ anonymize('City').using FieldStrategy::RandomCity.region_US
39
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
40
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
41
+ anonymize('Country') {|field| 'USA'}
42
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
43
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
44
+ end
45
+
46
+ table 'Employee' do
47
+ thread_num 5 # thread_num
48
+
49
+ whitelist 'EmployeeId', 'ReportsTo', 'Title'
50
+ anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
51
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
52
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
53
+ anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
54
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
55
+ anonymize('City').using FieldStrategy::RandomCity.region_US
56
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
57
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
58
+ anonymize('Country') {|field| 'USA'}
59
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
60
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
61
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
62
+ end
63
+
64
+
65
+
66
+ end
data/lib/core/field.rb CHANGED
@@ -3,7 +3,7 @@ module DataAnon
3
3
 
4
4
  class Field
5
5
 
6
- def initialize name, value, row_number, ar_record, table_name = "unknown"
6
+ def initialize name, value, row_number, ar_record, table_name = 'unknown'
7
7
  @name = name
8
8
  @value = value
9
9
  @row_number = row_number
@@ -11,7 +11,7 @@ module DataAnon
11
11
 
12
12
  def log_error record, exception
13
13
  @errors << { :record => record, :exception => exception}
14
- raise "Reached limit of error for a table" if @errors.length > 100
14
+ raise 'Reached limit of error for a table' if @errors.length > 100
15
15
  end
16
16
 
17
17
  def errors
@@ -1,28 +1,28 @@
1
- require "version"
1
+ require 'version'
2
2
 
3
- require "utils/logging"
4
- require "utils/random_int"
5
- require "utils/random_float"
6
- require "utils/random_string"
7
- require "utils/random_string_chars_only"
8
- require "utils/geojson_parser"
9
- require "utils/progress_bar"
10
- require "utils/parallel_progress_bar"
11
- require "utils/resource"
12
- require "utils/template_helper"
13
- require "parallel/table"
14
- require "core/database"
15
- require "core/fields_missing_strategy"
16
- require "thor/helpers/rdbms_dsl_generator"
17
- require "core/field"
18
- require "core/table_errors"
19
- require "strategy/strategies"
20
- require "utils/database"
21
- require "core/dsl"
3
+ require 'utils/logging'
4
+ require 'utils/random_int'
5
+ require 'utils/random_float'
6
+ require 'utils/random_string'
7
+ require 'utils/random_string_chars_only'
8
+ require 'utils/geojson_parser'
9
+ require 'utils/progress_bar'
10
+ require 'utils/parallel_progress_bar'
11
+ require 'utils/resource'
12
+ require 'utils/template_helper'
13
+ require 'parallel/table'
14
+ require 'core/database'
15
+ require 'core/fields_missing_strategy'
16
+ require 'thor/helpers/rdbms_dsl_generator'
17
+ require 'core/field'
18
+ require 'core/table_errors'
19
+ require 'strategy/strategies'
20
+ require 'utils/database'
21
+ require 'core/dsl'
22
22
 
23
23
  begin
24
24
  require 'mongo'
25
- require "thor/helpers/mongodb_dsl_generator"
25
+ require 'thor/helpers/mongodb_dsl_generator'
26
26
  rescue LoadError
27
- "Ignoring the mongodb specific libraries if monog driver is not specified in gem"
27
+ 'Ignoring the mongodb specific libraries if monog driver is not specified in gem'
28
28
  end
data/lib/strategy/base.rb CHANGED
@@ -37,6 +37,10 @@ module DataAnon
37
37
  @limit = limit
38
38
  end
39
39
 
40
+ def thread_num thread_num
41
+ @thread_num = thread_num
42
+ end
43
+
40
44
  def whitelist *fields
41
45
  fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new }
42
46
  end
@@ -95,13 +99,15 @@ module DataAnon
95
99
  progress = progress_bar.new(@name, total)
96
100
  if @primary_keys.empty? || !@batch_size.present?
97
101
  process_table progress
102
+ elsif @thread_num.present?
103
+ process_table_in_threads progress
98
104
  else
99
105
  process_table_in_batches progress
100
106
  end
101
107
  progress.close
102
108
  end
103
109
  if source_table.respond_to?('clear_all_connections!')
104
- source_table.clear_all_connections!
110
+ source_table.clear_all_connections!
105
111
  end
106
112
  end
107
113
 
@@ -134,6 +140,40 @@ module DataAnon
134
140
  end
135
141
  end
136
142
 
143
+ def process_table_in_threads progress
144
+ logger.info "Processing table #{@name} records in batch size of #{@batch_size} [THREADS]"
145
+
146
+ index = 0
147
+ threads = []
148
+
149
+ source_table.find_in_batches(batch_size: @batch_size) do |records|
150
+ until threads.count(&:alive?) <= @thread_num
151
+ thr = threads.delete_at 0
152
+ thr.join
153
+ progress.show index
154
+ end
155
+
156
+ thr = Thread.new {
157
+ records.each do |record|
158
+ begin
159
+ process_record_if index, record
160
+ index += 1
161
+ rescue => exception
162
+ puts exception.inspect
163
+ @errors.log_error record, exception
164
+ end
165
+ end
166
+ }
167
+ threads << thr
168
+ end
169
+
170
+ until threads.empty?
171
+ thr = threads.delete_at 0
172
+ thr.join
173
+ progress.show index
174
+ end
175
+ end
176
+
137
177
  def source_table_limited
138
178
  @source_table_limited ||= begin
139
179
  if @limit.present?
@@ -5,6 +5,7 @@ module DataAnon
5
5
  class DefaultAnon
6
6
 
7
7
  DEFAULT_STRATEGIES = {:string => FieldStrategy::RandomString.new,
8
+ :integer => FieldStrategy::RandomIntegerDelta.new(5),
8
9
  :fixnum => FieldStrategy::RandomIntegerDelta.new(5),
9
10
  :bignum => FieldStrategy::RandomIntegerDelta.new(5000),
10
11
  :float => FieldStrategy::RandomFloatDelta.new(5.0),
@@ -9,5 +9,5 @@ begin
9
9
  require 'strategy/mongodb/whitelist'
10
10
  require 'strategy/mongodb/blacklist'
11
11
  rescue LoadError
12
- "Ignoring the mongodb specific libraries if monog driver is not specified in gem"
12
+ 'Ignoring the mongodb specific libraries if monog driver is not specified in gem'
13
13
  end
@@ -18,13 +18,13 @@ module DataAnon
18
18
 
19
19
  def generate
20
20
 
21
- db = Mongo::Connection.from_uri(@mongodb_uri)[@configuration_hash[:database]]
21
+ db = Mongo::Client.new(@mongodb_uri, :database => @configuration_hash[:database])
22
22
  collections = db.collections
23
23
  collections.each do |collection|
24
24
  unless collection.name.start_with?('system.')
25
25
  depth = 2
26
26
  @output << "\tcollection '#{collection.name}' do"
27
- document = collection.find_one
27
+ document = collection.find({}).first
28
28
  process_document(depth, document)
29
29
  @output << "\tend\n"
30
30
  end
@@ -63,4 +63,3 @@ module DataAnon
63
63
  end
64
64
  end
65
65
  end
66
-
@@ -5,9 +5,11 @@ module DataAnon
5
5
  def self.source_connection_specs_rdbms config_hash
6
6
 
7
7
  config_hash.keys.reject{|key| config_hash[key].nil? }.collect { |key|
8
- if ((config_hash[key].class.to_s.downcase == "string"))
8
+ if ((config_hash[key].class.to_s.downcase == 'string'))
9
9
  ":#{key} => '#{config_hash[key]}'"
10
- elsif ((config_hash[key].class.to_s.downcase == "fixnum"))
10
+ elsif ((config_hash[key].class.to_s.downcase == 'integer'))
11
+ ":#{key} => #{config_hash[key]}"
12
+ elsif ((config_hash[key].class.to_s.downcase == 'fixnum'))
11
13
  ":#{key} => #{config_hash[key]}"
12
14
  end
13
15
  }.join ', '
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module DataAnonymization
2
- VERSION = '0.8.0'
2
+ VERSION = '0.8.1'
3
3
  end
@@ -1,56 +1,56 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
  require 'mongo'
3
3
 
4
- describe "End 2 End MongoDB Blacklist Acceptance Test" do
4
+ describe 'End 2 End MongoDB Blacklist Acceptance Test' do
5
5
 
6
6
  before(:each) do
7
- Mongo::Client.new("mongodb://localhost/test").database().drop()
7
+ Mongo::Client.new('mongodb://localhost/test').database().drop()
8
8
  users = [
9
9
  {
10
- "_id" => 1,
11
- "USER_ID" => "sunitparekh",
12
- "date_of_birth" => Time.new(2012, 7, 14, 13, 1, 0),
13
- "email" => "parekh-sunit@mailinator.com",
14
- "password" => "TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@",
15
- "failed_attempts" => 0,
16
- "first_name" => "Sunit",
17
- "last_name" => "Parekh",
18
- "password_reset_answer" => "manza",
19
- "password_reset_question" => "My new car modal?",
20
- "updated_at" => Time.new(2012, 8, 15, 13, 1, 0),
21
- "alternate_emails" => ["abc@test.com","abc2@test.com"]
10
+ '_id' => 1,
11
+ 'USER_ID' => 'sunitparekh',
12
+ 'date_of_birth' => Time.new(2012, 7, 14, 13, 1, 0),
13
+ 'email' => 'parekh-sunit@mailinator.com',
14
+ 'password' => 'TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@',
15
+ 'failed_attempts' => 0,
16
+ 'first_name' => 'Sunit',
17
+ 'last_name' => 'Parekh',
18
+ 'password_reset_answer' => 'manza',
19
+ 'password_reset_question' => 'My new car modal?',
20
+ 'updated_at' => Time.new(2012, 8, 15, 13, 1, 0),
21
+ 'alternate_emails' => ['abc@test.com', 'abc2@test.com']
22
22
 
23
23
  },
24
24
  {
25
- "_id" => 2,
26
- "USER_ID" => "anandagrawal",
27
- "date_of_birth" => Time.new(2011, 8, 11, 13, 1, 0),
28
- "email" => "anand-agrawal@mailinator.com",
29
- "password" => "Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum",
30
- "failed_attempts" => 0,
31
- "first_name" => "Anand",
32
- "last_name" => "Agrawal",
33
- "password_reset_answer" => "android",
34
- "password_reset_question" => "My phone?",
35
- "updated_at" => Time.new(2012, 2, 11, 13, 1, 0),
36
- "alternate_emails" => ["abc@test.com","abc2@test.com"]
25
+ '_id' => 2,
26
+ 'USER_ID' => 'anandagrawal',
27
+ 'date_of_birth' => Time.new(2011, 8, 11, 13, 1, 0),
28
+ 'email' => 'anand-agrawal@mailinator.com',
29
+ 'password' => 'Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum',
30
+ 'failed_attempts' => 0,
31
+ 'first_name' => 'Anand',
32
+ 'last_name' => 'Agrawal',
33
+ 'password_reset_answer' => 'android',
34
+ 'password_reset_question' => 'My phone?',
35
+ 'updated_at' => Time.new(2012, 2, 11, 13, 1, 0),
36
+ 'alternate_emails' => ['abc@test.com', 'abc2@test.com']
37
37
  }
38
38
  ]
39
- users_coll = Mongo::Client.new("mongodb://localhost/test").database().collection('users')
39
+ users_coll = Mongo::Client.new('mongodb://localhost/test').database().collection('users')
40
40
  users.each { |p| users_coll.insert_one p }
41
41
  end
42
42
 
43
- it "should anonymize plans collection" do
43
+ it 'should anonymize plans collection' do
44
44
 
45
45
  database 'test' do
46
46
  strategy DataAnon::Strategy::MongoDB::Blacklist
47
- source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
47
+ source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
48
48
 
49
49
  collection 'users' do
50
50
  anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
51
51
  anonymize('USER_ID').using FieldStrategy::StringTemplate.new('user-#{row_number}')
52
52
  anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
53
- anonymize('password') { |field| "password" }
53
+ anonymize('password') { |field| 'password'}
54
54
  anonymize('first_name').using FieldStrategy::RandomFirstName.new
55
55
  anonymize('last_name').using FieldStrategy::RandomLastName.new
56
56
  anonymize('alternate_emails').using FieldStrategy::AnonymizeArray.new(FieldStrategy::RandomMailinatorEmail.new)
@@ -58,20 +58,20 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
58
58
 
59
59
  end
60
60
 
61
- users_coll = Mongo::Client.new("mongodb://localhost/test").database().collection('users')
61
+ users_coll = Mongo::Client.new('mongodb://localhost/test').database().collection('users')
62
62
  users_coll.find.count.to_int.should be 2
63
63
  user = users_coll.find({'_id' => 1}).to_a[0]
64
64
 
65
65
  user['_id'].should == 1
66
- user['USER_ID'].should == "user-1"
66
+ user['USER_ID'].should == 'user-1'
67
67
  user['date_of_birth'].to_i.should_not == Time.new(2012, 7, 14, 13, 1, 0).to_i
68
- user['email'].should_not == "parekh-sunit@mailinator.com"
69
- user['password'].should == "password"
68
+ user['email'].should_not == 'parekh-sunit@mailinator.com'
69
+ user['password'].should == 'password'
70
70
  user['failed_attempts'].should == 0
71
- user['first_name'].should_not be "Sunit"
72
- user['last_name'].should_not be "Parekh"
73
- user['password_reset_answer'].should == "manza"
74
- user['password_reset_question'].should == "My new car modal?"
71
+ user['first_name'].should_not be 'Sunit'
72
+ user['last_name'].should_not be 'Parekh'
73
+ user['password_reset_answer'].should == 'manza'
74
+ user['password_reset_question'].should == 'My new car modal?'
75
75
  user['updated_at'].to_i.should == Time.new(2012, 8, 15, 13, 1, 0).to_i
76
76
  user['alternate_emails'].length.should == 2
77
77
  user['alternate_emails'][0].should_not == 'abc@test.com'
@@ -1,65 +1,65 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
  require 'mongo'
3
3
 
4
- describe "End 2 End MongoDB Whitelist Acceptance Test" do
4
+ describe 'End 2 End MongoDB Whitelist Acceptance Test' do
5
5
 
6
6
  before(:each) do
7
- Mongo::Client.new("mongodb://localhost/test").database.drop
8
- Mongo::Client.new("mongodb://localhost/dest").database.drop
7
+ Mongo::Client.new('mongodb://localhost/test').database.drop
8
+ Mongo::Client.new('mongodb://localhost/dest').database.drop
9
9
  plans = [
10
10
  {
11
- "_id" => 1,
12
- "name" => "Free",
13
- "nick_names" => ["Name1","Name2"],
14
- "features" => [
11
+ '_id' => 1,
12
+ 'name' => 'Free',
13
+ 'nick_names' => ['Name1', 'Name2'],
14
+ 'features' => [
15
15
  {
16
- "max_storage" => 21474836480,
17
- "type" => "AmazonS3",
18
- "users" => {"max" => 1, "additional" => false}
16
+ 'max_storage' => 21474836480,
17
+ 'type' => 'AmazonS3',
18
+ 'users' => {'max' => 1, 'additional' => false}
19
19
  },
20
20
  {
21
- "max_storage" => 21474836480,
22
- "type" => "DropBox",
23
- "users" => {"max" => 1, "additional" => false}
21
+ 'max_storage' => 21474836480,
22
+ 'type' => 'DropBox',
23
+ 'users' => {'max' => 1, 'additional' => false}
24
24
  }
25
25
  ],
26
- "term" => "month",
27
- "public_sharing" => false,
28
- "photo_sharing" => true,
29
- "created_at" => Time.new(2012, 6, 21, 13, 30, 0)
26
+ 'term' => 'month',
27
+ 'public_sharing' => false,
28
+ 'photo_sharing' => true,
29
+ 'created_at' => Time.new(2012, 6, 21, 13, 30, 0)
30
30
  },
31
31
  {
32
- "_id" => 2,
33
- "name" => "Team",
34
- "plan_aliases" => ["Business", "Paid"],
35
- "features" => [
32
+ '_id' => 2,
33
+ 'name' => 'Team',
34
+ 'plan_aliases' => ['Business', 'Paid'],
35
+ 'features' => [
36
36
  {
37
- "max_storage" => 53687091200,
38
- "type" => "AmazonS3",
39
- "users" => {"max" => 5, "additional" => true}
37
+ 'max_storage' => 53687091200,
38
+ 'type' => 'AmazonS3',
39
+ 'users' => {'max' => 5, 'additional' => true}
40
40
  },
41
41
  {
42
- "max_storage" => 53687091200,
43
- "type" => "DropBox",
44
- "users" => {"max" => 5, "additional" => true}
42
+ 'max_storage' => 53687091200,
43
+ 'type' => 'DropBox',
44
+ 'users' => {'max' => 5, 'additional' => true}
45
45
  }
46
46
  ],
47
- "term" => "month",
48
- "public_sharing" => true,
49
- "photo_sharing" => true,
50
- "created_at" => Time.new(2012, 8, 11, 13, 1, 0)
47
+ 'term' => 'month',
48
+ 'public_sharing' => true,
49
+ 'photo_sharing' => true,
50
+ 'created_at' => Time.new(2012, 8, 11, 13, 1, 0)
51
51
  }
52
52
  ]
53
- plans_coll = Mongo::Client.new("mongodb://localhost/test").database.collection('plans')
53
+ plans_coll = Mongo::Client.new('mongodb://localhost/test').database.collection('plans')
54
54
  plans.each { |p| plans_coll.insert_one p }
55
55
  end
56
56
 
57
- it "should anonymize plans collection" do
57
+ it 'should anonymize plans collection' do
58
58
 
59
59
  database 'dest' do
60
60
  strategy DataAnon::Strategy::MongoDB::Whitelist
61
- source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
62
- destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest'
61
+ source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
62
+ destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest'
63
63
 
64
64
  collection 'plans' do
65
65
  whitelist '_id', 'name', 'term', 'created_at'
@@ -78,15 +78,15 @@ describe "End 2 End MongoDB Whitelist Acceptance Test" do
78
78
 
79
79
  end
80
80
 
81
- plans_coll = Mongo::Client.new("mongodb://localhost/dest").database.collection('plans')
81
+ plans_coll = Mongo::Client.new('mongodb://localhost/dest').database.collection('plans')
82
82
  plans_coll.find.count.to_int.should be 2
83
83
  plan = plans_coll.find({ '_id' => 1}).to_a[0]
84
84
 
85
85
  plan['_id'].should == 1
86
- plan['name'].should == "Free"
87
- plan['nick_names'][0].should_not == "Name1"
88
- plan['nick_names'][1].should_not == "Name2"
89
- plan['term'].should == "month"
86
+ plan['name'].should == 'Free'
87
+ plan['nick_names'][0].should_not == 'Name1'
88
+ plan['nick_names'][1].should_not == 'Name2'
89
+ plan['term'].should == 'month'
90
90
  plan['created_at'].should == Time.new(2012, 6, 21, 13, 30, 0)
91
91
  plan['plan_aliases'].should be_nil
92
92
  [true,false].should include(plan['public_sharing'])
@@ -94,14 +94,14 @@ describe "End 2 End MongoDB Whitelist Acceptance Test" do
94
94
  plan['features'].length.should == 2
95
95
  feature1 = plan['features'][0]
96
96
  [10737418240, 21474836480, 53687091200].should include(feature1['max_storage'])
97
- feature1['type'].should == "AmazonS3"
97
+ feature1['type'].should == 'AmazonS3'
98
98
  feature1['users']['max'].should be_kind_of(Fixnum)
99
99
  [true,false].should include(feature1['users']['additional'])
100
100
 
101
101
 
102
102
  plan = plans_coll.find({ '_id' => 2}).to_a[0]
103
103
  plan['plan_aliases'].length.should == 2
104
- ["Free", "Team", "Business", "Paid"].should include(plan['plan_aliases'][0])
105
- ["Free", "Team", "Business", "Paid"].should include(plan['plan_aliases'][1])
104
+ ['Free', 'Team', 'Business', 'Paid'].should include(plan['plan_aliases'][0])
105
+ ['Free', 'Team', 'Business', 'Paid'].should include(plan['plan_aliases'][1])
106
106
  end
107
107
  end
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe 'End 2 End RDBMS Whitelist Acceptance Test using SQLite database' do
4
4
 
@@ -1,6 +1,6 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe "End 2 End RDBMS Whitelist Acceptance Test using SQLite database" do
3
+ describe 'End 2 End RDBMS Whitelist Acceptance Test using SQLite database' do
4
4
 
5
5
  source_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
6
6
  dest_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer-dest.sqlite'}
@@ -13,9 +13,9 @@ describe "End 2 End RDBMS Whitelist Acceptance Test using SQLite database" do
13
13
  CustomerSample.create_schema dest_connection_spec
14
14
  end
15
15
 
16
- it "should anonymize customer table record " do
16
+ it 'should anonymize customer table record ' do
17
17
 
18
- database "Customer" do
18
+ database 'Customer' do
19
19
  strategy DataAnon::Strategy::Whitelist
20
20
  source_db source_connection_spec
21
21
  destination_db dest_connection_spec
@@ -37,14 +37,14 @@ describe "End 2 End RDBMS Whitelist Acceptance Test using SQLite database" do
37
37
 
38
38
  DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
39
39
  dest_table = DataAnon::Utils::DestinationTable.create 'customers'
40
- new_rec = dest_table.where("cust_id" => CustomerSample::SAMPLE_DATA[0][:cust_id]).first
41
- new_rec.first_name.should_not be("Sunit")
42
- new_rec.last_name.should_not be("Parekh")
40
+ new_rec = dest_table.where('cust_id' => CustomerSample::SAMPLE_DATA[0][:cust_id]).first
41
+ new_rec.first_name.should_not be('Sunit')
42
+ new_rec.last_name.should_not be('Parekh')
43
43
  new_rec.birth_date.should_not be(Date.new(1977,7,8))
44
44
  new_rec.address.should == 'F 501 Shanti Nagar'
45
45
  ['Gujrat','Karnataka'].should include(new_rec.state)
46
46
  new_rec.zipcode.should == '411048'
47
- new_rec.phone.should_not be "9923700662"
47
+ new_rec.phone.should_not be '9923700662'
48
48
  new_rec.email.should == 'test+1@gmail.com'
49
49
  [true,false].should include(new_rec.terms_n_condition)
50
50
  new_rec.age.should be_between(0,100)
@@ -1,26 +1,26 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe DataAnon::Core::FieldsMissingStrategy do
4
4
 
5
5
  FMS = DataAnon::Core::FieldsMissingStrategy
6
6
 
7
- it "should be able to add field for new table that doesn't exist" do
8
- users = FMS.new("users")
9
- users.missing("confirm_email")
10
- users.fields_missing_strategy.should == ["confirm_email"]
7
+ it 'should be able to add field for new table that doesnot exist' do
8
+ users = FMS.new('users')
9
+ users.missing('confirm_email')
10
+ users.fields_missing_strategy.should == ['confirm_email']
11
11
  end
12
12
 
13
- it "should be able to take care for same field appearing multiple time" do
14
- users = FMS.new("users")
15
- users.missing("confirm_email")
16
- users.missing("confirm_email")
17
- users.fields_missing_strategy.should == ["confirm_email"]
13
+ it 'should be able to take care for same field appearing multiple time' do
14
+ users = FMS.new('users')
15
+ users.missing('confirm_email')
16
+ users.missing('confirm_email')
17
+ users.fields_missing_strategy.should == ['confirm_email']
18
18
  end
19
19
 
20
- it "should be able to add multiple fields for table" do
21
- users = FMS.new("users")
22
- users.missing("confirm_email")
23
- users.missing("password_reset")
24
- users.fields_missing_strategy.should == ["confirm_email","password_reset"]
20
+ it 'should be able to add multiple fields for table' do
21
+ users = FMS.new('users')
22
+ users.missing('confirm_email')
23
+ users.missing('password_reset')
24
+ users.fields_missing_strategy.should == %w(confirm_email password_reset)
25
25
  end
26
26
  end
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe DataAnon::Strategy::Field::RandomAddress do
4
4
 
@@ -7,6 +7,6 @@ describe DataAnon::Strategy::Field::RandomAddress do
7
7
 
8
8
  describe 'anonymized address should be different from original address' do
9
9
  let(:anonymized_address) {RandomAddress.region_US.anonymize(field)}
10
- it {anonymized_address.should_not eq("1 Infinite Loop")}
10
+ it {anonymized_address.should_not eq('1 Infinite Loop')}
11
11
  end
12
12
  end
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe FieldStrategy::DefaultAnon do
4
4
 
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe FieldStrategy::RandomBoolean do
4
4
 
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe FieldStrategy::Whitelist do
4
4
 
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe DataAnon::Strategy::MongoDB::AnonymizeField do
4
4
 
@@ -6,30 +6,30 @@ describe DataAnon::Strategy::MongoDB::AnonymizeField do
6
6
  it 'should do callback recursive in case of sub document' do
7
7
  sub_document = {'key' => 'value'}
8
8
  field_strategy = {'key' => FieldStrategy::LoremIpsum.new}
9
- anonymization_strategy = double("AnonymizationStrategy")
10
- anonymization_strategy.should_receive(:anonymize_document).with(sub_document,1,field_strategy).and_return({'key' => "anonymized_value"})
9
+ anonymization_strategy = double('AnonymizationStrategy')
10
+ anonymization_strategy.should_receive(:anonymize_document).with(sub_document,1,field_strategy).and_return({'key' => 'anonymized_value'})
11
11
  field = DataAnon::Core::Field.new('sub_document_field', sub_document,1,nil)
12
12
  anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field, field_strategy,anonymization_strategy)
13
13
  anonymized_value = anonymize_field.anonymize
14
- anonymized_value['key'].should == "anonymized_value"
14
+ anonymized_value['key'].should == 'anonymized_value'
15
15
  end
16
16
 
17
17
  it 'should do callback recursive multiple time in case of array of sub document' do
18
18
  sub_documents = [{'key' => 'value1'},{'key' => 'value2'}]
19
19
  field_strategy = {'key' => FieldStrategy::LoremIpsum.new}
20
- anonymization_strategy = double("AnonymizationStrategy")
21
- anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value1'},1,field_strategy).and_return({'key' => "anonymized_value1"})
22
- anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value2'},1,field_strategy).and_return({'key' => "anonymized_value2"})
20
+ anonymization_strategy = double('AnonymizationStrategy')
21
+ anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value1'},1,field_strategy).and_return({'key' => 'anonymized_value1'})
22
+ anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value2'},1,field_strategy).and_return({'key' => 'anonymized_value2'})
23
23
  field = DataAnon::Core::Field.new('sub_document_field', sub_documents,1,nil)
24
24
  anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field, field_strategy,anonymization_strategy)
25
25
  anonymized_value = anonymize_field.anonymize
26
26
  anonymized_value.length.should == 2
27
- anonymized_value[0]['key'].should == "anonymized_value1"
28
- anonymized_value[1]['key'].should == "anonymized_value2"
27
+ anonymized_value[0]['key'].should == 'anonymized_value1'
28
+ anonymized_value[1]['key'].should == 'anonymized_value2'
29
29
  end
30
30
 
31
31
  it 'should anonymize array field data type' do
32
- anonymization_strategy = double("AnonymizationStrategy")
32
+ anonymization_strategy = double('AnonymizationStrategy')
33
33
  anonymization_strategy.should_not_receive(:anonymize_document)
34
34
  field = DataAnon::Core::Field.new('tags',['tag1','tag2'],1,nil)
35
35
  anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field,FieldStrategy::SelectFromList.new(['tag4','tag5','tag6','tag7','tag8']),anonymization_strategy)
@@ -40,7 +40,7 @@ describe DataAnon::Strategy::MongoDB::AnonymizeField do
40
40
  end
41
41
 
42
42
  it 'should anonymize field and return anonymized value using passed strategy' do
43
- anonymization_strategy = double("AnonymizationStrategy")
43
+ anonymization_strategy = double('AnonymizationStrategy')
44
44
  anonymization_strategy.should_not_receive(:anonymize_document)
45
45
  field = DataAnon::Core::Field.new('boolean_field',false,1,nil)
46
46
  anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field,FieldStrategy::RandomBoolean.new,anonymization_strategy)
@@ -1,14 +1,14 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe "Template Helper" do
3
+ describe 'Template Helper' do
4
4
 
5
- it "should return a correctly formatted string based on input connection hash for source" do
6
- connection_hash = {:adapter => "test_adapter", :port => 5000}
5
+ it 'should return a correctly formatted string based on input connection hash for source' do
6
+ connection_hash = {adapter: 'test_adapter', port: 5000}
7
7
  DataAnon::Utils::TemplateHelper.source_connection_specs_rdbms(connection_hash).should eq(":adapter => 'test_adapter', :port => 5000")
8
8
  end
9
9
 
10
- it "should return a correctly formatted string based on input connection hash for destination" do
11
- connection_hash = {:adapter => "test_adapter", :port => 5000}
10
+ it 'should return a correctly formatted string based on input connection hash for destination' do
11
+ connection_hash = {adapter: 'test_adapter', port: 5000}
12
12
  DataAnon::Utils::TemplateHelper.destination_connection_specs_rdbms(connection_hash).should eq(":adapter => '<enter_value>', :port => '<enter_value>'")
13
13
  end
14
14
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data-anonymization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sunit Parekh
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-10-31 00:00:00.000000000 Z
13
+ date: 2017-08-19 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activerecord
@@ -102,28 +102,28 @@ dependencies:
102
102
  requirements:
103
103
  - - "~>"
104
104
  - !ruby/object:Gem::Version
105
- version: '1.9'
105
+ version: '1.12'
106
106
  type: :runtime
107
107
  prerelease: false
108
108
  version_requirements: !ruby/object:Gem::Requirement
109
109
  requirements:
110
110
  - - "~>"
111
111
  - !ruby/object:Gem::Version
112
- version: '1.9'
112
+ version: '1.12'
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: thor
115
115
  requirement: !ruby/object:Gem::Requirement
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: '0.19'
119
+ version: '0.20'
120
120
  type: :runtime
121
121
  prerelease: false
122
122
  version_requirements: !ruby/object:Gem::Requirement
123
123
  requirements:
124
124
  - - "~>"
125
125
  - !ruby/object:Gem::Version
126
- version: '0.19'
126
+ version: '0.20'
127
127
  description: Data anonymization tool for RDBMS and MongoDB databases
128
128
  email:
129
129
  - parekh.sunit@gmail.com
@@ -151,6 +151,7 @@ files:
151
151
  - examples/mongodb_blacklist_dsl.rb
152
152
  - examples/mongodb_whitelist_dsl.rb
153
153
  - examples/whitelist_dsl.rb
154
+ - examples/whitelist_dsl_threads.rb
154
155
  - lib/core/database.rb
155
156
  - lib/core/dsl.rb
156
157
  - lib/core/field.rb
@@ -297,7 +298,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
297
298
  version: '0'
298
299
  requirements: []
299
300
  rubyforge_project:
300
- rubygems_version: 2.5.1
301
+ rubygems_version: 2.6.12
301
302
  signing_key:
302
303
  specification_version: 4
303
304
  summary: Tool to create anonymized production data dump to use for performance and