data-anonymization 0.8.0 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1eed8d87b9e7060ee7d3813c77b52b838f789d92
4
- data.tar.gz: bc475f130cf331debd4cbc835ab5391aa38c3129
3
+ metadata.gz: 6455e294c22b99a0181092f93c506060c5c9b383
4
+ data.tar.gz: 4da392d7b4712d941b9cab8206d178bae89eb288
5
5
  SHA512:
6
- metadata.gz: e8e6e315a8468c1208c3fd65cf3ea575bbce766290cbbfc9fe323a329a438a02abe63d3751dabda4dd7a1e39a147f0d75aaf88b95beb4c6f09cef7969730b09a
7
- data.tar.gz: aef7a9d30b0d1868435445f91a1cbe6412c8628eabcaffa2e51ea74fa721d1a2f3a53aa54295aabcfee229ae7b9333065ab119056c40946dca39ffc78a520ed7
6
+ metadata.gz: 5b0da43a74bf21505f6e462ac73c17fff694f24082478f29fee83c4a9746e2703cc6497fb0e333b811992a806fdec5cb8b3bbcabefe667548577809445c86148
7
+ data.tar.gz: ba546b36a7681cdafb7cbcb0d98025ee357194e3b1ab5eedb4825fa484a47d8b50e32d0890ef7e2c0cb9f9327bd066fd76986280d8f5955b963827fbdd44c991
data/.gitignore CHANGED
@@ -18,4 +18,5 @@ tmp
18
18
  .idea
19
19
  sample-data/chinook-empty.sqlite
20
20
  tmp
21
- examples/mongodb_whitelist_generated.rb
21
+ examples/mongodb_whitelist_generated.rb
22
+ data
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- ruby-2.3.1
1
+ ruby-2.4.1
data/.travis.yml CHANGED
@@ -1,8 +1,9 @@
1
1
  language: ruby
2
2
  services:
3
3
  - mongodb
4
- before_install: gem install bundler -v 1.12.5
4
+ before_install: gem install bundler -v 1.15.3
5
5
  before_script: rake empty_dest
6
6
  rvm:
7
- - 2.2.5
8
- - 2.3.1
7
+ - 2.2.6
8
+ - 2.3.3
9
+ - 2.4.1
data/README.md CHANGED
@@ -70,6 +70,10 @@ Postgresql database having **composite primary key**
70
70
 
71
71
  ## Changelog
72
72
 
73
+ #### 0.8.1 (Aug 19, 2017)
74
+ 1. Multi-threading support added by [stanislav-tyutin](https://github.com/stanislav-tyutin) using Pull Request.
75
+ 2. Fixed to work with Ruby 2.4.x, issue with Integer data type
76
+
73
77
  #### 0.8.0 (Oct 31, 2016)
74
78
  1. Upgraded to rails 5.x
75
79
 
@@ -25,6 +25,6 @@ Gem::Specification.new do |gem|
25
25
  gem.add_dependency('rgeo', '~> 0.5')
26
26
  gem.add_dependency('rgeo-geojson', '~> 0.4')
27
27
  gem.add_dependency('powerbar', '~> 1.0')
28
- gem.add_dependency('parallel', '~> 1.9')
29
- gem.add_dependency('thor', '~> 0.19')
28
+ gem.add_dependency('parallel', '~> 1.12')
29
+ gem.add_dependency('thor', '~> 0.20')
30
30
  end
@@ -1,4 +1,4 @@
1
- system "bundle exec ruby examples/whitelist_dsl.rb"
1
+ system 'bundle exec ruby examples/whitelist_dsl.rb'
2
2
 
3
3
  require 'data-anonymization'
4
4
 
@@ -18,7 +18,7 @@ database 'Chinook' do
18
18
  anonymize('City').using FieldStrategy::RandomCity.region_US
19
19
  anonymize('State').using FieldStrategy::RandomProvince.region_US
20
20
  anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
21
- anonymize('Country') {|field| "USA" }
21
+ anonymize('Country') {|field| 'USA'}
22
22
  anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
23
23
  anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
24
24
  anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
@@ -33,7 +33,7 @@ database 'Chinook' do
33
33
  anonymize('City').using FieldStrategy::RandomCity.region_US
34
34
  anonymize('State').using FieldStrategy::RandomProvince.region_US
35
35
  anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
36
- anonymize('Country') {|field| "USA" }
36
+ anonymize('Country') {|field| 'USA'}
37
37
  anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
38
38
  anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
39
39
  end
@@ -4,20 +4,20 @@ require 'mongo'
4
4
  DataAnon::Utils::Logging.logger.level = Logger::INFO
5
5
  Mongo::Logger.logger.level = Logger::WARN
6
6
 
7
- Mongo::Client.new("mongodb://localhost/test").database.drop
8
- system "mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
9
- system "mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
7
+ Mongo::Client.new('mongodb://localhost/test').database.drop
8
+ system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
9
+ system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
10
10
 
11
11
 
12
12
  database 'test' do
13
13
  strategy DataAnon::Strategy::MongoDB::Blacklist
14
- source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
14
+ source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
15
15
 
16
16
  collection 'users' do
17
17
  anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
18
18
  anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
19
19
  anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
20
- anonymize('password') { |field| "password" }
20
+ anonymize('password') { |field| 'password'}
21
21
  anonymize('first_name').using FieldStrategy::RandomFirstName.new
22
22
  anonymize('last_name').using FieldStrategy::RandomLastName.new
23
23
  end
@@ -5,22 +5,22 @@ DataAnon::Utils::Logging.logger.level = Logger::INFO
5
5
  Mongo::Logger.logger.level = Logger::WARN
6
6
 
7
7
 
8
- Mongo::Client.new("mongodb://localhost/test").database.drop
9
- Mongo::Client.new("mongodb://localhost/dest").database.drop
10
- system "mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json"
11
- system "mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json"
8
+ Mongo::Client.new('mongodb://localhost/test').database.drop
9
+ Mongo::Client.new('mongodb://localhost/dest').database.drop
10
+ system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
11
+ system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
12
12
 
13
13
  database 'test' do
14
14
  strategy DataAnon::Strategy::MongoDB::Whitelist
15
- source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
16
- destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest'
15
+ source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
16
+ destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest'
17
17
 
18
18
  collection 'users' do
19
19
  whitelist '_id','failed_attempts','updated_at'
20
20
  anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
21
21
  anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
22
22
  anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
23
- anonymize('password') { |field| "password" }
23
+ anonymize('password') { |field| 'password'}
24
24
  anonymize('first_name').using FieldStrategy::RandomFirstName.new
25
25
  anonymize('last_name').using FieldStrategy::RandomLastName.new
26
26
  anonymize 'password_reset_answer','password_reset_question'
@@ -1,4 +1,4 @@
1
- system "rake empty_dest" # clean destination database on every call
1
+ system 'rake empty_dest' # clean destination database on every call
2
2
 
3
3
  require 'data-anonymization'
4
4
 
@@ -15,7 +15,7 @@ database 'Chinook' do
15
15
  primary_key 'GenreId'
16
16
  whitelist 'GenreId'
17
17
  anonymize 'Name' do |field|
18
- field.value + " test"
18
+ field.value + ' test'
19
19
  end
20
20
  end
21
21
 
@@ -39,7 +39,7 @@ database 'Chinook' do
39
39
  anonymize('City').using FieldStrategy::RandomCity.region_US
40
40
  anonymize('State').using FieldStrategy::RandomProvince.region_US
41
41
  anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
42
- anonymize('Country') {|field| "USA" }
42
+ anonymize('Country') {|field| 'USA'}
43
43
  anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
44
44
  anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
45
45
  end
@@ -56,7 +56,7 @@ database 'Chinook' do
56
56
  anonymize('City').using FieldStrategy::RandomCity.region_US
57
57
  anonymize('State').using FieldStrategy::RandomProvince.region_US
58
58
  anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
59
- anonymize('Country') {|field| "USA" }
59
+ anonymize('Country') {|field| 'USA'}
60
60
  anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
61
61
  anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
62
62
  anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
@@ -0,0 +1,66 @@
1
+ system 'rake empty_dest' # clean destination database on every call
2
+
3
+ require 'data-anonymization'
4
+
5
+ DataAnon::Utils::Logging.logger.level = Logger::INFO
6
+
7
+ database 'Chinook' do
8
+ strategy DataAnon::Strategy::Whitelist
9
+ source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
10
+ destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
11
+
12
+ default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
13
+
14
+ table 'Genre' do
15
+ primary_key 'GenreId'
16
+ whitelist 'GenreId'
17
+ anonymize 'Name' do |field|
18
+ field.value + ' test'
19
+ end
20
+ end
21
+
22
+ table 'MediaType' do
23
+ primary_key 'MediaTypeId'
24
+ anonymize('MediaTypeId') { |field| field.value } # same as whitelist
25
+ anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
26
+
27
+ end
28
+
29
+ table 'Customer' do
30
+ primary_key 'CustomerId'
31
+ thread_num 5 # thread_num
32
+
33
+ whitelist 'CustomerId', 'SupportRepId', 'Company'
34
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
35
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
36
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
37
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
38
+ anonymize('City').using FieldStrategy::RandomCity.region_US
39
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
40
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
41
+ anonymize('Country') {|field| 'USA'}
42
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
43
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
44
+ end
45
+
46
+ table 'Employee' do
47
+ thread_num 5 # thread_num
48
+
49
+ whitelist 'EmployeeId', 'ReportsTo', 'Title'
50
+ anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
51
+ anonymize('FirstName').using FieldStrategy::RandomFirstName.new
52
+ anonymize('LastName').using FieldStrategy::RandomLastName.new
53
+ anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
54
+ anonymize('Address').using FieldStrategy::RandomAddress.region_US
55
+ anonymize('City').using FieldStrategy::RandomCity.region_US
56
+ anonymize('State').using FieldStrategy::RandomProvince.region_US
57
+ anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
58
+ anonymize('Country') {|field| 'USA'}
59
+ anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
60
+ anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
61
+ anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
62
+ end
63
+
64
+
65
+
66
+ end
data/lib/core/field.rb CHANGED
@@ -3,7 +3,7 @@ module DataAnon
3
3
 
4
4
  class Field
5
5
 
6
- def initialize name, value, row_number, ar_record, table_name = "unknown"
6
+ def initialize name, value, row_number, ar_record, table_name = 'unknown'
7
7
  @name = name
8
8
  @value = value
9
9
  @row_number = row_number
@@ -11,7 +11,7 @@ module DataAnon
11
11
 
12
12
  def log_error record, exception
13
13
  @errors << { :record => record, :exception => exception}
14
- raise "Reached limit of error for a table" if @errors.length > 100
14
+ raise 'Reached limit of error for a table' if @errors.length > 100
15
15
  end
16
16
 
17
17
  def errors
@@ -1,28 +1,28 @@
1
- require "version"
1
+ require 'version'
2
2
 
3
- require "utils/logging"
4
- require "utils/random_int"
5
- require "utils/random_float"
6
- require "utils/random_string"
7
- require "utils/random_string_chars_only"
8
- require "utils/geojson_parser"
9
- require "utils/progress_bar"
10
- require "utils/parallel_progress_bar"
11
- require "utils/resource"
12
- require "utils/template_helper"
13
- require "parallel/table"
14
- require "core/database"
15
- require "core/fields_missing_strategy"
16
- require "thor/helpers/rdbms_dsl_generator"
17
- require "core/field"
18
- require "core/table_errors"
19
- require "strategy/strategies"
20
- require "utils/database"
21
- require "core/dsl"
3
+ require 'utils/logging'
4
+ require 'utils/random_int'
5
+ require 'utils/random_float'
6
+ require 'utils/random_string'
7
+ require 'utils/random_string_chars_only'
8
+ require 'utils/geojson_parser'
9
+ require 'utils/progress_bar'
10
+ require 'utils/parallel_progress_bar'
11
+ require 'utils/resource'
12
+ require 'utils/template_helper'
13
+ require 'parallel/table'
14
+ require 'core/database'
15
+ require 'core/fields_missing_strategy'
16
+ require 'thor/helpers/rdbms_dsl_generator'
17
+ require 'core/field'
18
+ require 'core/table_errors'
19
+ require 'strategy/strategies'
20
+ require 'utils/database'
21
+ require 'core/dsl'
22
22
 
23
23
  begin
24
24
  require 'mongo'
25
- require "thor/helpers/mongodb_dsl_generator"
25
+ require 'thor/helpers/mongodb_dsl_generator'
26
26
  rescue LoadError
27
- "Ignoring the mongodb specific libraries if monog driver is not specified in gem"
27
+ 'Ignoring the mongodb specific libraries if monog driver is not specified in gem'
28
28
  end
data/lib/strategy/base.rb CHANGED
@@ -37,6 +37,10 @@ module DataAnon
37
37
  @limit = limit
38
38
  end
39
39
 
40
+ def thread_num thread_num
41
+ @thread_num = thread_num
42
+ end
43
+
40
44
  def whitelist *fields
41
45
  fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new }
42
46
  end
@@ -95,13 +99,15 @@ module DataAnon
95
99
  progress = progress_bar.new(@name, total)
96
100
  if @primary_keys.empty? || !@batch_size.present?
97
101
  process_table progress
102
+ elsif @thread_num.present?
103
+ process_table_in_threads progress
98
104
  else
99
105
  process_table_in_batches progress
100
106
  end
101
107
  progress.close
102
108
  end
103
109
  if source_table.respond_to?('clear_all_connections!')
104
- source_table.clear_all_connections!
110
+ source_table.clear_all_connections!
105
111
  end
106
112
  end
107
113
 
@@ -134,6 +140,40 @@ module DataAnon
134
140
  end
135
141
  end
136
142
 
143
+ def process_table_in_threads progress
144
+ logger.info "Processing table #{@name} records in batch size of #{@batch_size} [THREADS]"
145
+
146
+ index = 0
147
+ threads = []
148
+
149
+ source_table.find_in_batches(batch_size: @batch_size) do |records|
150
+ until threads.count(&:alive?) <= @thread_num
151
+ thr = threads.delete_at 0
152
+ thr.join
153
+ progress.show index
154
+ end
155
+
156
+ thr = Thread.new {
157
+ records.each do |record|
158
+ begin
159
+ process_record_if index, record
160
+ index += 1
161
+ rescue => exception
162
+ puts exception.inspect
163
+ @errors.log_error record, exception
164
+ end
165
+ end
166
+ }
167
+ threads << thr
168
+ end
169
+
170
+ until threads.empty?
171
+ thr = threads.delete_at 0
172
+ thr.join
173
+ progress.show index
174
+ end
175
+ end
176
+
137
177
  def source_table_limited
138
178
  @source_table_limited ||= begin
139
179
  if @limit.present?
@@ -5,6 +5,7 @@ module DataAnon
5
5
  class DefaultAnon
6
6
 
7
7
  DEFAULT_STRATEGIES = {:string => FieldStrategy::RandomString.new,
8
+ :integer => FieldStrategy::RandomIntegerDelta.new(5),
8
9
  :fixnum => FieldStrategy::RandomIntegerDelta.new(5),
9
10
  :bignum => FieldStrategy::RandomIntegerDelta.new(5000),
10
11
  :float => FieldStrategy::RandomFloatDelta.new(5.0),
@@ -9,5 +9,5 @@ begin
9
9
  require 'strategy/mongodb/whitelist'
10
10
  require 'strategy/mongodb/blacklist'
11
11
  rescue LoadError
12
- "Ignoring the mongodb specific libraries if monog driver is not specified in gem"
12
+ 'Ignoring the mongodb specific libraries if monog driver is not specified in gem'
13
13
  end
@@ -18,13 +18,13 @@ module DataAnon
18
18
 
19
19
  def generate
20
20
 
21
- db = Mongo::Connection.from_uri(@mongodb_uri)[@configuration_hash[:database]]
21
+ db = Mongo::Client.new(@mongodb_uri, :database => @configuration_hash[:database])
22
22
  collections = db.collections
23
23
  collections.each do |collection|
24
24
  unless collection.name.start_with?('system.')
25
25
  depth = 2
26
26
  @output << "\tcollection '#{collection.name}' do"
27
- document = collection.find_one
27
+ document = collection.find({}).first
28
28
  process_document(depth, document)
29
29
  @output << "\tend\n"
30
30
  end
@@ -63,4 +63,3 @@ module DataAnon
63
63
  end
64
64
  end
65
65
  end
66
-
@@ -5,9 +5,11 @@ module DataAnon
5
5
  def self.source_connection_specs_rdbms config_hash
6
6
 
7
7
  config_hash.keys.reject{|key| config_hash[key].nil? }.collect { |key|
8
- if ((config_hash[key].class.to_s.downcase == "string"))
8
+ if ((config_hash[key].class.to_s.downcase == 'string'))
9
9
  ":#{key} => '#{config_hash[key]}'"
10
- elsif ((config_hash[key].class.to_s.downcase == "fixnum"))
10
+ elsif ((config_hash[key].class.to_s.downcase == 'integer'))
11
+ ":#{key} => #{config_hash[key]}"
12
+ elsif ((config_hash[key].class.to_s.downcase == 'fixnum'))
11
13
  ":#{key} => #{config_hash[key]}"
12
14
  end
13
15
  }.join ', '
data/lib/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module DataAnonymization
2
- VERSION = '0.8.0'
2
+ VERSION = '0.8.1'
3
3
  end
@@ -1,56 +1,56 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
  require 'mongo'
3
3
 
4
- describe "End 2 End MongoDB Blacklist Acceptance Test" do
4
+ describe 'End 2 End MongoDB Blacklist Acceptance Test' do
5
5
 
6
6
  before(:each) do
7
- Mongo::Client.new("mongodb://localhost/test").database().drop()
7
+ Mongo::Client.new('mongodb://localhost/test').database().drop()
8
8
  users = [
9
9
  {
10
- "_id" => 1,
11
- "USER_ID" => "sunitparekh",
12
- "date_of_birth" => Time.new(2012, 7, 14, 13, 1, 0),
13
- "email" => "parekh-sunit@mailinator.com",
14
- "password" => "TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@",
15
- "failed_attempts" => 0,
16
- "first_name" => "Sunit",
17
- "last_name" => "Parekh",
18
- "password_reset_answer" => "manza",
19
- "password_reset_question" => "My new car modal?",
20
- "updated_at" => Time.new(2012, 8, 15, 13, 1, 0),
21
- "alternate_emails" => ["abc@test.com","abc2@test.com"]
10
+ '_id' => 1,
11
+ 'USER_ID' => 'sunitparekh',
12
+ 'date_of_birth' => Time.new(2012, 7, 14, 13, 1, 0),
13
+ 'email' => 'parekh-sunit@mailinator.com',
14
+ 'password' => 'TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@',
15
+ 'failed_attempts' => 0,
16
+ 'first_name' => 'Sunit',
17
+ 'last_name' => 'Parekh',
18
+ 'password_reset_answer' => 'manza',
19
+ 'password_reset_question' => 'My new car modal?',
20
+ 'updated_at' => Time.new(2012, 8, 15, 13, 1, 0),
21
+ 'alternate_emails' => ['abc@test.com', 'abc2@test.com']
22
22
 
23
23
  },
24
24
  {
25
- "_id" => 2,
26
- "USER_ID" => "anandagrawal",
27
- "date_of_birth" => Time.new(2011, 8, 11, 13, 1, 0),
28
- "email" => "anand-agrawal@mailinator.com",
29
- "password" => "Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum",
30
- "failed_attempts" => 0,
31
- "first_name" => "Anand",
32
- "last_name" => "Agrawal",
33
- "password_reset_answer" => "android",
34
- "password_reset_question" => "My phone?",
35
- "updated_at" => Time.new(2012, 2, 11, 13, 1, 0),
36
- "alternate_emails" => ["abc@test.com","abc2@test.com"]
25
+ '_id' => 2,
26
+ 'USER_ID' => 'anandagrawal',
27
+ 'date_of_birth' => Time.new(2011, 8, 11, 13, 1, 0),
28
+ 'email' => 'anand-agrawal@mailinator.com',
29
+ 'password' => 'Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum',
30
+ 'failed_attempts' => 0,
31
+ 'first_name' => 'Anand',
32
+ 'last_name' => 'Agrawal',
33
+ 'password_reset_answer' => 'android',
34
+ 'password_reset_question' => 'My phone?',
35
+ 'updated_at' => Time.new(2012, 2, 11, 13, 1, 0),
36
+ 'alternate_emails' => ['abc@test.com', 'abc2@test.com']
37
37
  }
38
38
  ]
39
- users_coll = Mongo::Client.new("mongodb://localhost/test").database().collection('users')
39
+ users_coll = Mongo::Client.new('mongodb://localhost/test').database().collection('users')
40
40
  users.each { |p| users_coll.insert_one p }
41
41
  end
42
42
 
43
- it "should anonymize plans collection" do
43
+ it 'should anonymize plans collection' do
44
44
 
45
45
  database 'test' do
46
46
  strategy DataAnon::Strategy::MongoDB::Blacklist
47
- source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
47
+ source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
48
48
 
49
49
  collection 'users' do
50
50
  anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
51
51
  anonymize('USER_ID').using FieldStrategy::StringTemplate.new('user-#{row_number}')
52
52
  anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
53
- anonymize('password') { |field| "password" }
53
+ anonymize('password') { |field| 'password'}
54
54
  anonymize('first_name').using FieldStrategy::RandomFirstName.new
55
55
  anonymize('last_name').using FieldStrategy::RandomLastName.new
56
56
  anonymize('alternate_emails').using FieldStrategy::AnonymizeArray.new(FieldStrategy::RandomMailinatorEmail.new)
@@ -58,20 +58,20 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
58
58
 
59
59
  end
60
60
 
61
- users_coll = Mongo::Client.new("mongodb://localhost/test").database().collection('users')
61
+ users_coll = Mongo::Client.new('mongodb://localhost/test').database().collection('users')
62
62
  users_coll.find.count.to_int.should be 2
63
63
  user = users_coll.find({'_id' => 1}).to_a[0]
64
64
 
65
65
  user['_id'].should == 1
66
- user['USER_ID'].should == "user-1"
66
+ user['USER_ID'].should == 'user-1'
67
67
  user['date_of_birth'].to_i.should_not == Time.new(2012, 7, 14, 13, 1, 0).to_i
68
- user['email'].should_not == "parekh-sunit@mailinator.com"
69
- user['password'].should == "password"
68
+ user['email'].should_not == 'parekh-sunit@mailinator.com'
69
+ user['password'].should == 'password'
70
70
  user['failed_attempts'].should == 0
71
- user['first_name'].should_not be "Sunit"
72
- user['last_name'].should_not be "Parekh"
73
- user['password_reset_answer'].should == "manza"
74
- user['password_reset_question'].should == "My new car modal?"
71
+ user['first_name'].should_not be 'Sunit'
72
+ user['last_name'].should_not be 'Parekh'
73
+ user['password_reset_answer'].should == 'manza'
74
+ user['password_reset_question'].should == 'My new car modal?'
75
75
  user['updated_at'].to_i.should == Time.new(2012, 8, 15, 13, 1, 0).to_i
76
76
  user['alternate_emails'].length.should == 2
77
77
  user['alternate_emails'][0].should_not == 'abc@test.com'
@@ -1,65 +1,65 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
  require 'mongo'
3
3
 
4
- describe "End 2 End MongoDB Whitelist Acceptance Test" do
4
+ describe 'End 2 End MongoDB Whitelist Acceptance Test' do
5
5
 
6
6
  before(:each) do
7
- Mongo::Client.new("mongodb://localhost/test").database.drop
8
- Mongo::Client.new("mongodb://localhost/dest").database.drop
7
+ Mongo::Client.new('mongodb://localhost/test').database.drop
8
+ Mongo::Client.new('mongodb://localhost/dest').database.drop
9
9
  plans = [
10
10
  {
11
- "_id" => 1,
12
- "name" => "Free",
13
- "nick_names" => ["Name1","Name2"],
14
- "features" => [
11
+ '_id' => 1,
12
+ 'name' => 'Free',
13
+ 'nick_names' => ['Name1', 'Name2'],
14
+ 'features' => [
15
15
  {
16
- "max_storage" => 21474836480,
17
- "type" => "AmazonS3",
18
- "users" => {"max" => 1, "additional" => false}
16
+ 'max_storage' => 21474836480,
17
+ 'type' => 'AmazonS3',
18
+ 'users' => {'max' => 1, 'additional' => false}
19
19
  },
20
20
  {
21
- "max_storage" => 21474836480,
22
- "type" => "DropBox",
23
- "users" => {"max" => 1, "additional" => false}
21
+ 'max_storage' => 21474836480,
22
+ 'type' => 'DropBox',
23
+ 'users' => {'max' => 1, 'additional' => false}
24
24
  }
25
25
  ],
26
- "term" => "month",
27
- "public_sharing" => false,
28
- "photo_sharing" => true,
29
- "created_at" => Time.new(2012, 6, 21, 13, 30, 0)
26
+ 'term' => 'month',
27
+ 'public_sharing' => false,
28
+ 'photo_sharing' => true,
29
+ 'created_at' => Time.new(2012, 6, 21, 13, 30, 0)
30
30
  },
31
31
  {
32
- "_id" => 2,
33
- "name" => "Team",
34
- "plan_aliases" => ["Business", "Paid"],
35
- "features" => [
32
+ '_id' => 2,
33
+ 'name' => 'Team',
34
+ 'plan_aliases' => ['Business', 'Paid'],
35
+ 'features' => [
36
36
  {
37
- "max_storage" => 53687091200,
38
- "type" => "AmazonS3",
39
- "users" => {"max" => 5, "additional" => true}
37
+ 'max_storage' => 53687091200,
38
+ 'type' => 'AmazonS3',
39
+ 'users' => {'max' => 5, 'additional' => true}
40
40
  },
41
41
  {
42
- "max_storage" => 53687091200,
43
- "type" => "DropBox",
44
- "users" => {"max" => 5, "additional" => true}
42
+ 'max_storage' => 53687091200,
43
+ 'type' => 'DropBox',
44
+ 'users' => {'max' => 5, 'additional' => true}
45
45
  }
46
46
  ],
47
- "term" => "month",
48
- "public_sharing" => true,
49
- "photo_sharing" => true,
50
- "created_at" => Time.new(2012, 8, 11, 13, 1, 0)
47
+ 'term' => 'month',
48
+ 'public_sharing' => true,
49
+ 'photo_sharing' => true,
50
+ 'created_at' => Time.new(2012, 8, 11, 13, 1, 0)
51
51
  }
52
52
  ]
53
- plans_coll = Mongo::Client.new("mongodb://localhost/test").database.collection('plans')
53
+ plans_coll = Mongo::Client.new('mongodb://localhost/test').database.collection('plans')
54
54
  plans.each { |p| plans_coll.insert_one p }
55
55
  end
56
56
 
57
- it "should anonymize plans collection" do
57
+ it 'should anonymize plans collection' do
58
58
 
59
59
  database 'dest' do
60
60
  strategy DataAnon::Strategy::MongoDB::Whitelist
61
- source_db :mongodb_uri => "mongodb://localhost/test", :database => 'test'
62
- destination_db :mongodb_uri => "mongodb://localhost/dest", :database => 'dest'
61
+ source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
62
+ destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest'
63
63
 
64
64
  collection 'plans' do
65
65
  whitelist '_id', 'name', 'term', 'created_at'
@@ -78,15 +78,15 @@ describe "End 2 End MongoDB Whitelist Acceptance Test" do
78
78
 
79
79
  end
80
80
 
81
- plans_coll = Mongo::Client.new("mongodb://localhost/dest").database.collection('plans')
81
+ plans_coll = Mongo::Client.new('mongodb://localhost/dest').database.collection('plans')
82
82
  plans_coll.find.count.to_int.should be 2
83
83
  plan = plans_coll.find({ '_id' => 1}).to_a[0]
84
84
 
85
85
  plan['_id'].should == 1
86
- plan['name'].should == "Free"
87
- plan['nick_names'][0].should_not == "Name1"
88
- plan['nick_names'][1].should_not == "Name2"
89
- plan['term'].should == "month"
86
+ plan['name'].should == 'Free'
87
+ plan['nick_names'][0].should_not == 'Name1'
88
+ plan['nick_names'][1].should_not == 'Name2'
89
+ plan['term'].should == 'month'
90
90
  plan['created_at'].should == Time.new(2012, 6, 21, 13, 30, 0)
91
91
  plan['plan_aliases'].should be_nil
92
92
  [true,false].should include(plan['public_sharing'])
@@ -94,14 +94,14 @@ describe "End 2 End MongoDB Whitelist Acceptance Test" do
94
94
  plan['features'].length.should == 2
95
95
  feature1 = plan['features'][0]
96
96
  [10737418240, 21474836480, 53687091200].should include(feature1['max_storage'])
97
- feature1['type'].should == "AmazonS3"
97
+ feature1['type'].should == 'AmazonS3'
98
98
  feature1['users']['max'].should be_kind_of(Fixnum)
99
99
  [true,false].should include(feature1['users']['additional'])
100
100
 
101
101
 
102
102
  plan = plans_coll.find({ '_id' => 2}).to_a[0]
103
103
  plan['plan_aliases'].length.should == 2
104
- ["Free", "Team", "Business", "Paid"].should include(plan['plan_aliases'][0])
105
- ["Free", "Team", "Business", "Paid"].should include(plan['plan_aliases'][1])
104
+ ['Free', 'Team', 'Business', 'Paid'].should include(plan['plan_aliases'][0])
105
+ ['Free', 'Team', 'Business', 'Paid'].should include(plan['plan_aliases'][1])
106
106
  end
107
107
  end
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe 'End 2 End RDBMS Whitelist Acceptance Test using SQLite database' do
4
4
 
@@ -1,6 +1,6 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe "End 2 End RDBMS Whitelist Acceptance Test using SQLite database" do
3
+ describe 'End 2 End RDBMS Whitelist Acceptance Test using SQLite database' do
4
4
 
5
5
  source_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
6
6
  dest_connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer-dest.sqlite'}
@@ -13,9 +13,9 @@ describe "End 2 End RDBMS Whitelist Acceptance Test using SQLite database" do
13
13
  CustomerSample.create_schema dest_connection_spec
14
14
  end
15
15
 
16
- it "should anonymize customer table record " do
16
+ it 'should anonymize customer table record ' do
17
17
 
18
- database "Customer" do
18
+ database 'Customer' do
19
19
  strategy DataAnon::Strategy::Whitelist
20
20
  source_db source_connection_spec
21
21
  destination_db dest_connection_spec
@@ -37,14 +37,14 @@ describe "End 2 End RDBMS Whitelist Acceptance Test using SQLite database" do
37
37
 
38
38
  DataAnon::Utils::DestinationDatabase.establish_connection dest_connection_spec
39
39
  dest_table = DataAnon::Utils::DestinationTable.create 'customers'
40
- new_rec = dest_table.where("cust_id" => CustomerSample::SAMPLE_DATA[0][:cust_id]).first
41
- new_rec.first_name.should_not be("Sunit")
42
- new_rec.last_name.should_not be("Parekh")
40
+ new_rec = dest_table.where('cust_id' => CustomerSample::SAMPLE_DATA[0][:cust_id]).first
41
+ new_rec.first_name.should_not be('Sunit')
42
+ new_rec.last_name.should_not be('Parekh')
43
43
  new_rec.birth_date.should_not be(Date.new(1977,7,8))
44
44
  new_rec.address.should == 'F 501 Shanti Nagar'
45
45
  ['Gujrat','Karnataka'].should include(new_rec.state)
46
46
  new_rec.zipcode.should == '411048'
47
- new_rec.phone.should_not be "9923700662"
47
+ new_rec.phone.should_not be '9923700662'
48
48
  new_rec.email.should == 'test+1@gmail.com'
49
49
  [true,false].should include(new_rec.terms_n_condition)
50
50
  new_rec.age.should be_between(0,100)
@@ -1,26 +1,26 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe DataAnon::Core::FieldsMissingStrategy do
4
4
 
5
5
  FMS = DataAnon::Core::FieldsMissingStrategy
6
6
 
7
- it "should be able to add field for new table that doesn't exist" do
8
- users = FMS.new("users")
9
- users.missing("confirm_email")
10
- users.fields_missing_strategy.should == ["confirm_email"]
7
+ it 'should be able to add field for new table that doesnot exist' do
8
+ users = FMS.new('users')
9
+ users.missing('confirm_email')
10
+ users.fields_missing_strategy.should == ['confirm_email']
11
11
  end
12
12
 
13
- it "should be able to take care for same field appearing multiple time" do
14
- users = FMS.new("users")
15
- users.missing("confirm_email")
16
- users.missing("confirm_email")
17
- users.fields_missing_strategy.should == ["confirm_email"]
13
+ it 'should be able to take care for same field appearing multiple time' do
14
+ users = FMS.new('users')
15
+ users.missing('confirm_email')
16
+ users.missing('confirm_email')
17
+ users.fields_missing_strategy.should == ['confirm_email']
18
18
  end
19
19
 
20
- it "should be able to add multiple fields for table" do
21
- users = FMS.new("users")
22
- users.missing("confirm_email")
23
- users.missing("password_reset")
24
- users.fields_missing_strategy.should == ["confirm_email","password_reset"]
20
+ it 'should be able to add multiple fields for table' do
21
+ users = FMS.new('users')
22
+ users.missing('confirm_email')
23
+ users.missing('password_reset')
24
+ users.fields_missing_strategy.should == %w(confirm_email password_reset)
25
25
  end
26
26
  end
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe DataAnon::Strategy::Field::RandomAddress do
4
4
 
@@ -7,6 +7,6 @@ describe DataAnon::Strategy::Field::RandomAddress do
7
7
 
8
8
  describe 'anonymized address should be different from original address' do
9
9
  let(:anonymized_address) {RandomAddress.region_US.anonymize(field)}
10
- it {anonymized_address.should_not eq("1 Infinite Loop")}
10
+ it {anonymized_address.should_not eq('1 Infinite Loop')}
11
11
  end
12
12
  end
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe FieldStrategy::DefaultAnon do
4
4
 
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe FieldStrategy::RandomBoolean do
4
4
 
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe FieldStrategy::Whitelist do
4
4
 
@@ -1,4 +1,4 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe DataAnon::Strategy::MongoDB::AnonymizeField do
4
4
 
@@ -6,30 +6,30 @@ describe DataAnon::Strategy::MongoDB::AnonymizeField do
6
6
  it 'should do callback recursive in case of sub document' do
7
7
  sub_document = {'key' => 'value'}
8
8
  field_strategy = {'key' => FieldStrategy::LoremIpsum.new}
9
- anonymization_strategy = double("AnonymizationStrategy")
10
- anonymization_strategy.should_receive(:anonymize_document).with(sub_document,1,field_strategy).and_return({'key' => "anonymized_value"})
9
+ anonymization_strategy = double('AnonymizationStrategy')
10
+ anonymization_strategy.should_receive(:anonymize_document).with(sub_document,1,field_strategy).and_return({'key' => 'anonymized_value'})
11
11
  field = DataAnon::Core::Field.new('sub_document_field', sub_document,1,nil)
12
12
  anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field, field_strategy,anonymization_strategy)
13
13
  anonymized_value = anonymize_field.anonymize
14
- anonymized_value['key'].should == "anonymized_value"
14
+ anonymized_value['key'].should == 'anonymized_value'
15
15
  end
16
16
 
17
17
  it 'should do callback recursive multiple time in case of array of sub document' do
18
18
  sub_documents = [{'key' => 'value1'},{'key' => 'value2'}]
19
19
  field_strategy = {'key' => FieldStrategy::LoremIpsum.new}
20
- anonymization_strategy = double("AnonymizationStrategy")
21
- anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value1'},1,field_strategy).and_return({'key' => "anonymized_value1"})
22
- anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value2'},1,field_strategy).and_return({'key' => "anonymized_value2"})
20
+ anonymization_strategy = double('AnonymizationStrategy')
21
+ anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value1'},1,field_strategy).and_return({'key' => 'anonymized_value1'})
22
+ anonymization_strategy.should_receive(:anonymize_document).with({'key' => 'value2'},1,field_strategy).and_return({'key' => 'anonymized_value2'})
23
23
  field = DataAnon::Core::Field.new('sub_document_field', sub_documents,1,nil)
24
24
  anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field, field_strategy,anonymization_strategy)
25
25
  anonymized_value = anonymize_field.anonymize
26
26
  anonymized_value.length.should == 2
27
- anonymized_value[0]['key'].should == "anonymized_value1"
28
- anonymized_value[1]['key'].should == "anonymized_value2"
27
+ anonymized_value[0]['key'].should == 'anonymized_value1'
28
+ anonymized_value[1]['key'].should == 'anonymized_value2'
29
29
  end
30
30
 
31
31
  it 'should anonymize array field data type' do
32
- anonymization_strategy = double("AnonymizationStrategy")
32
+ anonymization_strategy = double('AnonymizationStrategy')
33
33
  anonymization_strategy.should_not_receive(:anonymize_document)
34
34
  field = DataAnon::Core::Field.new('tags',['tag1','tag2'],1,nil)
35
35
  anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field,FieldStrategy::SelectFromList.new(['tag4','tag5','tag6','tag7','tag8']),anonymization_strategy)
@@ -40,7 +40,7 @@ describe DataAnon::Strategy::MongoDB::AnonymizeField do
40
40
  end
41
41
 
42
42
  it 'should anonymize field and return anonymized value using passed strategy' do
43
- anonymization_strategy = double("AnonymizationStrategy")
43
+ anonymization_strategy = double('AnonymizationStrategy')
44
44
  anonymization_strategy.should_not_receive(:anonymize_document)
45
45
  field = DataAnon::Core::Field.new('boolean_field',false,1,nil)
46
46
  anonymize_field = DataAnon::Strategy::MongoDB::AnonymizeField.new(field,FieldStrategy::RandomBoolean.new,anonymization_strategy)
@@ -1,14 +1,14 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe "Template Helper" do
3
+ describe 'Template Helper' do
4
4
 
5
- it "should return a correctly formatted string based on input connection hash for source" do
6
- connection_hash = {:adapter => "test_adapter", :port => 5000}
5
+ it 'should return a correctly formatted string based on input connection hash for source' do
6
+ connection_hash = {adapter: 'test_adapter', port: 5000}
7
7
  DataAnon::Utils::TemplateHelper.source_connection_specs_rdbms(connection_hash).should eq(":adapter => 'test_adapter', :port => 5000")
8
8
  end
9
9
 
10
- it "should return a correctly formatted string based on input connection hash for destination" do
11
- connection_hash = {:adapter => "test_adapter", :port => 5000}
10
+ it 'should return a correctly formatted string based on input connection hash for destination' do
11
+ connection_hash = {adapter: 'test_adapter', port: 5000}
12
12
  DataAnon::Utils::TemplateHelper.destination_connection_specs_rdbms(connection_hash).should eq(":adapter => '<enter_value>', :port => '<enter_value>'")
13
13
  end
14
14
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data-anonymization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sunit Parekh
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2016-10-31 00:00:00.000000000 Z
13
+ date: 2017-08-19 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activerecord
@@ -102,28 +102,28 @@ dependencies:
102
102
  requirements:
103
103
  - - "~>"
104
104
  - !ruby/object:Gem::Version
105
- version: '1.9'
105
+ version: '1.12'
106
106
  type: :runtime
107
107
  prerelease: false
108
108
  version_requirements: !ruby/object:Gem::Requirement
109
109
  requirements:
110
110
  - - "~>"
111
111
  - !ruby/object:Gem::Version
112
- version: '1.9'
112
+ version: '1.12'
113
113
  - !ruby/object:Gem::Dependency
114
114
  name: thor
115
115
  requirement: !ruby/object:Gem::Requirement
116
116
  requirements:
117
117
  - - "~>"
118
118
  - !ruby/object:Gem::Version
119
- version: '0.19'
119
+ version: '0.20'
120
120
  type: :runtime
121
121
  prerelease: false
122
122
  version_requirements: !ruby/object:Gem::Requirement
123
123
  requirements:
124
124
  - - "~>"
125
125
  - !ruby/object:Gem::Version
126
- version: '0.19'
126
+ version: '0.20'
127
127
  description: Data anonymization tool for RDBMS and MongoDB databases
128
128
  email:
129
129
  - parekh.sunit@gmail.com
@@ -151,6 +151,7 @@ files:
151
151
  - examples/mongodb_blacklist_dsl.rb
152
152
  - examples/mongodb_whitelist_dsl.rb
153
153
  - examples/whitelist_dsl.rb
154
+ - examples/whitelist_dsl_threads.rb
154
155
  - lib/core/database.rb
155
156
  - lib/core/dsl.rb
156
157
  - lib/core/field.rb
@@ -297,7 +298,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
297
298
  version: '0'
298
299
  requirements: []
299
300
  rubyforge_project:
300
- rubygems_version: 2.5.1
301
+ rubygems_version: 2.6.12
301
302
  signing_key:
302
303
  specification_version: 4
303
304
  summary: Tool to create anonymized production data dump to use for performance and