data-anonymization 0.8.0 → 0.8.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/.ruby-version +1 -1
- data/.travis.yml +4 -3
- data/README.md +24 -8
- data/commands.txt +4 -0
- data/data-anonymization.gemspec +7 -7
- data/examples/blacklist_dsl.rb +3 -3
- data/examples/mongodb_blacklist_dsl.rb +5 -5
- data/examples/mongodb_whitelist_dsl.rb +7 -7
- data/examples/whitelist_dsl.rb +4 -4
- data/examples/whitelist_dsl_threads.rb +66 -0
- data/lib/core/field.rb +1 -1
- data/lib/core/table_errors.rb +1 -1
- data/lib/data-anonymization.rb +22 -22
- data/lib/strategy/base.rb +41 -1
- data/lib/strategy/blacklist.rb +3 -2
- data/lib/strategy/field/datetime/anonymize_time.rb +3 -3
- data/lib/strategy/field/default_anon.rb +1 -0
- data/lib/strategy/field/number/random_big_decimal_delta.rb +1 -1
- data/lib/strategy/field/string/random_url.rb +1 -1
- data/lib/strategy/field/string/select_from_database.rb +14 -7
- data/lib/strategy/strategies.rb +1 -1
- data/lib/thor/helpers/mongodb_dsl_generator.rb +2 -3
- data/lib/utils/database.rb +1 -1
- data/lib/utils/random_int.rb +2 -2
- data/lib/utils/template_helper.rb +4 -2
- data/lib/version.rb +1 -1
- data/spec/acceptance/mongodb_blacklist_spec.rb +39 -39
- data/spec/acceptance/mongodb_whitelist_spec.rb +45 -45
- data/spec/acceptance/rdbms_whitelist_spec.rb +1 -1
- data/spec/acceptance/rdbms_whitelist_with_primary_key_spec.rb +8 -8
- data/spec/core/fields_missing_strategy_spec.rb +15 -15
- data/spec/strategy/field/contact/random_address_spec.rb +2 -2
- data/spec/strategy/field/default_anon_spec.rb +3 -3
- data/spec/strategy/field/number/random_big_decimal_delta_spec.rb +1 -1
- data/spec/strategy/field/random_boolean_spec.rb +1 -1
- data/spec/strategy/field/whitelist_spec.rb +1 -1
- data/spec/strategy/mongodb/anonymize_field_spec.rb +11 -11
- data/spec/utils/database_spec.rb +4 -4
- data/spec/utils/template_helper_spec.rb +6 -6
- metadata +19 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7c24dc45a72724bdc3a3d96977afbf04382ab3d40e04981b5b9a6676ffaf83f0
|
4
|
+
data.tar.gz: 1b9bfd7209781e2afc867248ad374e5cac923c975b1933691e0216f0684721c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 58d1c365443b98223f2a59f31eab5700ad8023df5924c61157104fb503946a84145d6d8b5d413f0549935644a83de9386dcd5e663dc7caa451ad13d20881d57e
|
7
|
+
data.tar.gz: 382c0e5c77ae689d09351969898baba8b5cc8dea3775293ce402fa4f953307141295ad76e8fbe589ebac7be3125a989083eb6e489087c050abb5c9d768518dc9
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-2.
|
1
|
+
ruby-2.7.1
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,17 @@
|
|
1
1
|
# Data::Anonymization
|
2
2
|
Afraid of using production data due to privacy issues? Data Anonymization is a tool that helps you build anonymized production data dumps which you can use for performance testing, security testing, debugging and development.
|
3
3
|
|
4
|
+
## Java/Kotlin version
|
5
|
+
|
6
|
+
Java/Kotlin version of tool supporting RDBMS databases is available with similar easy to use DSL.
|
7
|
+
* [Kotlin/Java Data Anonymization Tool](https://github.com/dataanon/data-anon)
|
8
|
+
* [Kotlin Maven Sample Project](https://github.com/dataanon/dataanon-kotlin-sample)
|
9
|
+
* [Java Maven Sample Project](https://github.com/dataanon/dataanon-java-sample)
|
10
|
+
|
11
|
+
|
12
|
+
----------------------
|
13
|
+
|
14
|
+
|
4
15
|
[<img src="https://secure.travis-ci.org/sunitparekh/data-anonymization.png?branch=master">](http://travis-ci.org/sunitparekh/data-anonymization)
|
5
16
|
[<img src="https://gemnasium.com/sunitparekh/data-anonymization.png?travis">](https://gemnasium.com/sunitparekh/data-anonymization)
|
6
17
|
[<img src="https://codeclimate.com/badge.png">](https://codeclimate.com/github/sunitparekh/data-anonymization)
|
@@ -70,22 +81,26 @@ Postgresql database having **composite primary key**
|
|
70
81
|
|
71
82
|
## Changelog
|
72
83
|
|
84
|
+
#### 0.8.1 (Aug 19, 2017)
|
85
|
+
1. Multi-threading support added by [stanislav-tyutin](https://github.com/stanislav-tyutin) using Pull Request.
|
86
|
+
2. Fixed to work with Ruby 2.4.x, issue with Integer data type
|
87
|
+
|
73
88
|
#### 0.8.0 (Oct 31, 2016)
|
74
|
-
1. Upgraded to rails 5.x
|
89
|
+
1. Upgraded to rails 5.x
|
75
90
|
|
76
91
|
#### 0.7.4 (Oct 29, 2016)
|
77
|
-
1. Continue to work on rails 4.x. Minor changes based on feedback.
|
92
|
+
1. Continue to work on rails 4.x. Minor changes based on feedback.
|
78
93
|
|
79
94
|
#### 0.8.0.rc1 (Sep 5, 2016)
|
80
|
-
1. Upgraded to rails 5.0, please report any issue or use case not working.
|
95
|
+
1. Upgraded to rails 5.0, please report any issue or use case not working.
|
81
96
|
|
82
97
|
#### 0.7.3 (Feb 5, 2016)
|
83
|
-
1. Fixed issue with batchsize. Thanks to [Jan Raasch](https://github.com/janraasch) for sending pull request.
|
98
|
+
1. Fixed issue with batchsize. Thanks to [Jan Raasch](https://github.com/janraasch) for sending pull request.
|
84
99
|
|
85
100
|
#### 0.7.2 (Sep 26, 2015)
|
86
101
|
1. Upgraded MongoDB to latest gem version 2.1.0 and tested with MongoDB 3.x version.
|
87
|
-
2. Upgraded gems to latest version
|
88
|
-
3. Adding limit functionality - Merge pull request #27 from yanismydj/master
|
102
|
+
2. Upgraded gems to latest version
|
103
|
+
3. Adding limit functionality - Merge pull request #27 from yanismydj/master
|
89
104
|
|
90
105
|
#### 0.7.1 (Jun 13, 2015)
|
91
106
|
1. Fixed issues with empty array data for MongoDB
|
@@ -224,6 +239,7 @@ Read more about [blacklist and whitelist here](http://sunitspace.blogspot.in/201
|
|
224
239
|
5. Make sure to give proper case for fields and table names.
|
225
240
|
6. Use skip and continue to apply different strategies for records.
|
226
241
|
7. Use 'limit' to limit the number of rows that will be imported in whitelist
|
242
|
+
8. RDBMS databases utilizing schemas can be specified via `schema_search_path`: `source_db { ... schema_search_path: 'public,my_special_schema' }`
|
227
243
|
|
228
244
|
## DSL Generation
|
229
245
|
|
@@ -269,7 +285,7 @@ datanon generate_rdbms_dsl -a postgresql -h 123.456.7.8 -d production_db
|
|
269
285
|
|
270
286
|
```
|
271
287
|
|
272
|
-
The relevant db gems must be installed so that AR has the adapters required to establish the connection to the databases. The script generates a file named **rdbms_whitelist_generated.rb** in the same location as the project.
|
288
|
+
The relevant db gems must be installed so that AR has the adapters required to establish the connection to the databases. The script generates a file named **rdbms_whitelist_generated.rb** in the same location as the project.
|
273
289
|
|
274
290
|
### MongoDB whitelist generation
|
275
291
|
|
@@ -374,7 +390,7 @@ has following attribute accessor
|
|
374
390
|
</tr>
|
375
391
|
<tr>
|
376
392
|
<td align="left">Text</td>
|
377
|
-
<td align="left"><a href="http://rubydoc.info/github/sunitparekh/data-anonymization/DataAnon/Strategy/Field/RandomUrl">
|
393
|
+
<td align="left"><a href="http://rubydoc.info/github/sunitparekh/data-anonymization/DataAnon/Strategy/Field/RandomUrl">RandomUrl</a></td>
|
378
394
|
<td align="left">Anonymizes a URL while mainting the structure</td>
|
379
395
|
</tr>
|
380
396
|
</table><table>
|
data/commands.txt
ADDED
data/data-anonymization.gemspec
CHANGED
@@ -19,12 +19,12 @@ Gem::Specification.new do |gem|
|
|
19
19
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
20
20
|
gem.require_paths = ['lib']
|
21
21
|
|
22
|
-
gem.add_dependency('activerecord', '~>
|
23
|
-
gem.add_dependency('composite_primary_keys', '~>
|
24
|
-
gem.add_dependency('activesupport', '~>
|
25
|
-
gem.add_dependency('rgeo', '~> 0
|
26
|
-
gem.add_dependency('rgeo-geojson', '~> 0
|
22
|
+
gem.add_dependency('activerecord', '~> 6.0')
|
23
|
+
gem.add_dependency('composite_primary_keys', '~> 12.0')
|
24
|
+
gem.add_dependency('activesupport', '~> 6.0')
|
25
|
+
gem.add_dependency('rgeo', '~> 1.0')
|
26
|
+
gem.add_dependency('rgeo-geojson', '~> 2.0')
|
27
27
|
gem.add_dependency('powerbar', '~> 1.0')
|
28
|
-
gem.add_dependency('parallel', '~> 1.
|
29
|
-
gem.add_dependency('thor', '~> 0.
|
28
|
+
gem.add_dependency('parallel', '~> 1.12')
|
29
|
+
gem.add_dependency('thor', '~> 0.20.3')
|
30
30
|
end
|
data/examples/blacklist_dsl.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
system
|
1
|
+
system 'bundle exec ruby examples/whitelist_dsl.rb'
|
2
2
|
|
3
3
|
require 'data-anonymization'
|
4
4
|
|
@@ -18,7 +18,7 @@ database 'Chinook' do
|
|
18
18
|
anonymize('City').using FieldStrategy::RandomCity.region_US
|
19
19
|
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
20
20
|
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
21
|
-
anonymize('Country') {|field|
|
21
|
+
anonymize('Country') {|field| 'USA'}
|
22
22
|
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
23
23
|
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
24
24
|
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
@@ -33,7 +33,7 @@ database 'Chinook' do
|
|
33
33
|
anonymize('City').using FieldStrategy::RandomCity.region_US
|
34
34
|
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
35
35
|
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
36
|
-
anonymize('Country') {|field|
|
36
|
+
anonymize('Country') {|field| 'USA'}
|
37
37
|
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
38
38
|
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
39
39
|
end
|
@@ -4,20 +4,20 @@ require 'mongo'
|
|
4
4
|
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
5
5
|
Mongo::Logger.logger.level = Logger::WARN
|
6
6
|
|
7
|
-
Mongo::Client.new(
|
8
|
-
system
|
9
|
-
system
|
7
|
+
Mongo::Client.new('mongodb://localhost/test').database.drop
|
8
|
+
system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
|
9
|
+
system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
|
10
10
|
|
11
11
|
|
12
12
|
database 'test' do
|
13
13
|
strategy DataAnon::Strategy::MongoDB::Blacklist
|
14
|
-
source_db :mongodb_uri =>
|
14
|
+
source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
|
15
15
|
|
16
16
|
collection 'users' do
|
17
17
|
anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
|
18
18
|
anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
|
19
19
|
anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
|
20
|
-
anonymize('password') { |field|
|
20
|
+
anonymize('password') { |field| 'password'}
|
21
21
|
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
22
22
|
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
23
23
|
end
|
@@ -5,22 +5,22 @@ DataAnon::Utils::Logging.logger.level = Logger::INFO
|
|
5
5
|
Mongo::Logger.logger.level = Logger::WARN
|
6
6
|
|
7
7
|
|
8
|
-
Mongo::Client.new(
|
9
|
-
Mongo::Client.new(
|
10
|
-
system
|
11
|
-
system
|
8
|
+
Mongo::Client.new('mongodb://localhost/test').database.drop
|
9
|
+
Mongo::Client.new('mongodb://localhost/dest').database.drop
|
10
|
+
system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
|
11
|
+
system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
|
12
12
|
|
13
13
|
database 'test' do
|
14
14
|
strategy DataAnon::Strategy::MongoDB::Whitelist
|
15
|
-
source_db :mongodb_uri =>
|
16
|
-
destination_db :mongodb_uri =>
|
15
|
+
source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
|
16
|
+
destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest'
|
17
17
|
|
18
18
|
collection 'users' do
|
19
19
|
whitelist '_id','failed_attempts','updated_at'
|
20
20
|
anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
|
21
21
|
anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
|
22
22
|
anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
|
23
|
-
anonymize('password') { |field|
|
23
|
+
anonymize('password') { |field| 'password'}
|
24
24
|
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
25
25
|
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
26
26
|
anonymize 'password_reset_answer','password_reset_question'
|
data/examples/whitelist_dsl.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
system
|
1
|
+
system 'rake empty_dest' # clean destination database on every call
|
2
2
|
|
3
3
|
require 'data-anonymization'
|
4
4
|
|
@@ -15,7 +15,7 @@ database 'Chinook' do
|
|
15
15
|
primary_key 'GenreId'
|
16
16
|
whitelist 'GenreId'
|
17
17
|
anonymize 'Name' do |field|
|
18
|
-
field.value +
|
18
|
+
field.value + ' test'
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
@@ -39,7 +39,7 @@ database 'Chinook' do
|
|
39
39
|
anonymize('City').using FieldStrategy::RandomCity.region_US
|
40
40
|
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
41
41
|
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
42
|
-
anonymize('Country') {|field|
|
42
|
+
anonymize('Country') {|field| 'USA'}
|
43
43
|
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
44
44
|
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
45
45
|
end
|
@@ -56,7 +56,7 @@ database 'Chinook' do
|
|
56
56
|
anonymize('City').using FieldStrategy::RandomCity.region_US
|
57
57
|
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
58
58
|
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
59
|
-
anonymize('Country') {|field|
|
59
|
+
anonymize('Country') {|field| 'USA'}
|
60
60
|
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
61
61
|
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
62
62
|
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
@@ -0,0 +1,66 @@
|
|
1
|
+
system 'rake empty_dest' # clean destination database on every call
|
2
|
+
|
3
|
+
require 'data-anonymization'
|
4
|
+
|
5
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
6
|
+
|
7
|
+
database 'Chinook' do
|
8
|
+
strategy DataAnon::Strategy::Whitelist
|
9
|
+
source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
|
10
|
+
destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
|
11
|
+
|
12
|
+
default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
|
13
|
+
|
14
|
+
table 'Genre' do
|
15
|
+
primary_key 'GenreId'
|
16
|
+
whitelist 'GenreId'
|
17
|
+
anonymize 'Name' do |field|
|
18
|
+
field.value + ' test'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
table 'MediaType' do
|
23
|
+
primary_key 'MediaTypeId'
|
24
|
+
anonymize('MediaTypeId') { |field| field.value } # same as whitelist
|
25
|
+
anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
table 'Customer' do
|
30
|
+
primary_key 'CustomerId'
|
31
|
+
thread_num 5 # thread_num
|
32
|
+
|
33
|
+
whitelist 'CustomerId', 'SupportRepId', 'Company'
|
34
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
35
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
36
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
37
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
38
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
39
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
40
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
41
|
+
anonymize('Country') {|field| 'USA'}
|
42
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
43
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
44
|
+
end
|
45
|
+
|
46
|
+
table 'Employee' do
|
47
|
+
thread_num 5 # thread_num
|
48
|
+
|
49
|
+
whitelist 'EmployeeId', 'ReportsTo', 'Title'
|
50
|
+
anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
|
51
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
52
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
53
|
+
anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
|
54
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
55
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
56
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
57
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
58
|
+
anonymize('Country') {|field| 'USA'}
|
59
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
60
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
61
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
end
|
data/lib/core/field.rb
CHANGED
data/lib/core/table_errors.rb
CHANGED
@@ -11,7 +11,7 @@ module DataAnon
|
|
11
11
|
|
12
12
|
def log_error record, exception
|
13
13
|
@errors << { :record => record, :exception => exception}
|
14
|
-
raise
|
14
|
+
raise 'Reached limit of error for a table' if @errors.length > 100
|
15
15
|
end
|
16
16
|
|
17
17
|
def errors
|
data/lib/data-anonymization.rb
CHANGED
@@ -1,28 +1,28 @@
|
|
1
|
-
require
|
1
|
+
require 'version'
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
18
|
-
require
|
19
|
-
require
|
20
|
-
require
|
21
|
-
require
|
3
|
+
require 'utils/logging'
|
4
|
+
require 'utils/random_int'
|
5
|
+
require 'utils/random_float'
|
6
|
+
require 'utils/random_string'
|
7
|
+
require 'utils/random_string_chars_only'
|
8
|
+
require 'utils/geojson_parser'
|
9
|
+
require 'utils/progress_bar'
|
10
|
+
require 'utils/parallel_progress_bar'
|
11
|
+
require 'utils/resource'
|
12
|
+
require 'utils/template_helper'
|
13
|
+
require 'parallel/table'
|
14
|
+
require 'core/database'
|
15
|
+
require 'core/fields_missing_strategy'
|
16
|
+
require 'thor/helpers/rdbms_dsl_generator'
|
17
|
+
require 'core/field'
|
18
|
+
require 'core/table_errors'
|
19
|
+
require 'strategy/strategies'
|
20
|
+
require 'utils/database'
|
21
|
+
require 'core/dsl'
|
22
22
|
|
23
23
|
begin
|
24
24
|
require 'mongo'
|
25
|
-
require
|
25
|
+
require 'thor/helpers/mongodb_dsl_generator'
|
26
26
|
rescue LoadError
|
27
|
-
|
27
|
+
'Ignoring the mongodb specific libraries if monog driver is not specified in gem'
|
28
28
|
end
|
data/lib/strategy/base.rb
CHANGED
@@ -37,6 +37,10 @@ module DataAnon
|
|
37
37
|
@limit = limit
|
38
38
|
end
|
39
39
|
|
40
|
+
def thread_num thread_num
|
41
|
+
@thread_num = thread_num
|
42
|
+
end
|
43
|
+
|
40
44
|
def whitelist *fields
|
41
45
|
fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new }
|
42
46
|
end
|
@@ -95,13 +99,15 @@ module DataAnon
|
|
95
99
|
progress = progress_bar.new(@name, total)
|
96
100
|
if @primary_keys.empty? || !@batch_size.present?
|
97
101
|
process_table progress
|
102
|
+
elsif @thread_num.present?
|
103
|
+
process_table_in_threads progress
|
98
104
|
else
|
99
105
|
process_table_in_batches progress
|
100
106
|
end
|
101
107
|
progress.close
|
102
108
|
end
|
103
109
|
if source_table.respond_to?('clear_all_connections!')
|
104
|
-
|
110
|
+
source_table.clear_all_connections!
|
105
111
|
end
|
106
112
|
end
|
107
113
|
|
@@ -134,6 +140,40 @@ module DataAnon
|
|
134
140
|
end
|
135
141
|
end
|
136
142
|
|
143
|
+
def process_table_in_threads progress
|
144
|
+
logger.info "Processing table #{@name} records in batch size of #{@batch_size} [THREADS]"
|
145
|
+
|
146
|
+
index = 0
|
147
|
+
threads = []
|
148
|
+
|
149
|
+
source_table.find_in_batches(batch_size: @batch_size) do |records|
|
150
|
+
until threads.count(&:alive?) <= @thread_num
|
151
|
+
thr = threads.delete_at 0
|
152
|
+
thr.join
|
153
|
+
progress.show index
|
154
|
+
end
|
155
|
+
|
156
|
+
thr = Thread.new {
|
157
|
+
records.each do |record|
|
158
|
+
begin
|
159
|
+
process_record_if index, record
|
160
|
+
index += 1
|
161
|
+
rescue => exception
|
162
|
+
puts exception.inspect
|
163
|
+
@errors.log_error record, exception
|
164
|
+
end
|
165
|
+
end
|
166
|
+
}
|
167
|
+
threads << thr
|
168
|
+
end
|
169
|
+
|
170
|
+
until threads.empty?
|
171
|
+
thr = threads.delete_at 0
|
172
|
+
thr.join
|
173
|
+
progress.show index
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
137
177
|
def source_table_limited
|
138
178
|
@source_table_limited ||= begin
|
139
179
|
if @limit.present?
|