data-anonymization 0.8.0 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +2 -1
- data/.ruby-version +1 -1
- data/.travis.yml +4 -3
- data/README.md +24 -8
- data/commands.txt +4 -0
- data/data-anonymization.gemspec +7 -7
- data/examples/blacklist_dsl.rb +3 -3
- data/examples/mongodb_blacklist_dsl.rb +5 -5
- data/examples/mongodb_whitelist_dsl.rb +7 -7
- data/examples/whitelist_dsl.rb +4 -4
- data/examples/whitelist_dsl_threads.rb +66 -0
- data/lib/core/field.rb +1 -1
- data/lib/core/table_errors.rb +1 -1
- data/lib/data-anonymization.rb +22 -22
- data/lib/strategy/base.rb +41 -1
- data/lib/strategy/blacklist.rb +3 -2
- data/lib/strategy/field/datetime/anonymize_time.rb +3 -3
- data/lib/strategy/field/default_anon.rb +1 -0
- data/lib/strategy/field/number/random_big_decimal_delta.rb +1 -1
- data/lib/strategy/field/string/random_url.rb +1 -1
- data/lib/strategy/field/string/select_from_database.rb +14 -7
- data/lib/strategy/strategies.rb +1 -1
- data/lib/thor/helpers/mongodb_dsl_generator.rb +2 -3
- data/lib/utils/database.rb +1 -1
- data/lib/utils/random_int.rb +2 -2
- data/lib/utils/template_helper.rb +4 -2
- data/lib/version.rb +1 -1
- data/spec/acceptance/mongodb_blacklist_spec.rb +39 -39
- data/spec/acceptance/mongodb_whitelist_spec.rb +45 -45
- data/spec/acceptance/rdbms_whitelist_spec.rb +1 -1
- data/spec/acceptance/rdbms_whitelist_with_primary_key_spec.rb +8 -8
- data/spec/core/fields_missing_strategy_spec.rb +15 -15
- data/spec/strategy/field/contact/random_address_spec.rb +2 -2
- data/spec/strategy/field/default_anon_spec.rb +3 -3
- data/spec/strategy/field/number/random_big_decimal_delta_spec.rb +1 -1
- data/spec/strategy/field/random_boolean_spec.rb +1 -1
- data/spec/strategy/field/whitelist_spec.rb +1 -1
- data/spec/strategy/mongodb/anonymize_field_spec.rb +11 -11
- data/spec/utils/database_spec.rb +4 -4
- data/spec/utils/template_helper_spec.rb +6 -6
- metadata +19 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7c24dc45a72724bdc3a3d96977afbf04382ab3d40e04981b5b9a6676ffaf83f0
|
4
|
+
data.tar.gz: 1b9bfd7209781e2afc867248ad374e5cac923c975b1933691e0216f0684721c1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 58d1c365443b98223f2a59f31eab5700ad8023df5924c61157104fb503946a84145d6d8b5d413f0549935644a83de9386dcd5e663dc7caa451ad13d20881d57e
|
7
|
+
data.tar.gz: 382c0e5c77ae689d09351969898baba8b5cc8dea3775293ce402fa4f953307141295ad76e8fbe589ebac7be3125a989083eb6e489087c050abb5c9d768518dc9
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-2.
|
1
|
+
ruby-2.7.1
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,17 @@
|
|
1
1
|
# Data::Anonymization
|
2
2
|
Afraid of using production data due to privacy issues? Data Anonymization is a tool that helps you build anonymized production data dumps which you can use for performance testing, security testing, debugging and development.
|
3
3
|
|
4
|
+
## Java/Kotlin version
|
5
|
+
|
6
|
+
Java/Kotlin version of tool supporting RDBMS databases is available with similar easy to use DSL.
|
7
|
+
* [Kotlin/Java Data Anonymization Tool](https://github.com/dataanon/data-anon)
|
8
|
+
* [Kotlin Maven Sample Project](https://github.com/dataanon/dataanon-kotlin-sample)
|
9
|
+
* [Java Maven Sample Project](https://github.com/dataanon/dataanon-java-sample)
|
10
|
+
|
11
|
+
|
12
|
+
----------------------
|
13
|
+
|
14
|
+
|
4
15
|
[<img src="https://secure.travis-ci.org/sunitparekh/data-anonymization.png?branch=master">](http://travis-ci.org/sunitparekh/data-anonymization)
|
5
16
|
[<img src="https://gemnasium.com/sunitparekh/data-anonymization.png?travis">](https://gemnasium.com/sunitparekh/data-anonymization)
|
6
17
|
[<img src="https://codeclimate.com/badge.png">](https://codeclimate.com/github/sunitparekh/data-anonymization)
|
@@ -70,22 +81,26 @@ Postgresql database having **composite primary key**
|
|
70
81
|
|
71
82
|
## Changelog
|
72
83
|
|
84
|
+
#### 0.8.1 (Aug 19, 2017)
|
85
|
+
1. Multi-threading support added by [stanislav-tyutin](https://github.com/stanislav-tyutin) using Pull Request.
|
86
|
+
2. Fixed to work with Ruby 2.4.x, issue with Integer data type
|
87
|
+
|
73
88
|
#### 0.8.0 (Oct 31, 2016)
|
74
|
-
1. Upgraded to rails 5.x
|
89
|
+
1. Upgraded to rails 5.x
|
75
90
|
|
76
91
|
#### 0.7.4 (Oct 29, 2016)
|
77
|
-
1. Continue to work on rails 4.x. Minor changes based on feedback.
|
92
|
+
1. Continue to work on rails 4.x. Minor changes based on feedback.
|
78
93
|
|
79
94
|
#### 0.8.0.rc1 (Sep 5, 2016)
|
80
|
-
1. Upgraded to rails 5.0, please report any issue or use case not working.
|
95
|
+
1. Upgraded to rails 5.0, please report any issue or use case not working.
|
81
96
|
|
82
97
|
#### 0.7.3 (Feb 5, 2016)
|
83
|
-
1. Fixed issue with batchsize. Thanks to [Jan Raasch](https://github.com/janraasch) for sending pull request.
|
98
|
+
1. Fixed issue with batchsize. Thanks to [Jan Raasch](https://github.com/janraasch) for sending pull request.
|
84
99
|
|
85
100
|
#### 0.7.2 (Sep 26, 2015)
|
86
101
|
1. Upgraded MongoDB to latest gem version 2.1.0 and tested with MongoDB 3.x version.
|
87
|
-
2. Upgraded gems to latest version
|
88
|
-
3. Adding limit functionality - Merge pull request #27 from yanismydj/master
|
102
|
+
2. Upgraded gems to latest version
|
103
|
+
3. Adding limit functionality - Merge pull request #27 from yanismydj/master
|
89
104
|
|
90
105
|
#### 0.7.1 (Jun 13, 2015)
|
91
106
|
1. Fixed issues with empty array data for MongoDB
|
@@ -224,6 +239,7 @@ Read more about [blacklist and whitelist here](http://sunitspace.blogspot.in/201
|
|
224
239
|
5. Make sure to give proper case for fields and table names.
|
225
240
|
6. Use skip and continue to apply different strategies for records.
|
226
241
|
7. Use 'limit' to limit the number of rows that will be imported in whitelist
|
242
|
+
8. RDBMS databases utilizing schemas can be specified via `schema_search_path`: `source_db { ... schema_search_path: 'public,my_special_schema' }`
|
227
243
|
|
228
244
|
## DSL Generation
|
229
245
|
|
@@ -269,7 +285,7 @@ datanon generate_rdbms_dsl -a postgresql -h 123.456.7.8 -d production_db
|
|
269
285
|
|
270
286
|
```
|
271
287
|
|
272
|
-
The relevant db gems must be installed so that AR has the adapters required to establish the connection to the databases. The script generates a file named **rdbms_whitelist_generated.rb** in the same location as the project.
|
288
|
+
The relevant db gems must be installed so that AR has the adapters required to establish the connection to the databases. The script generates a file named **rdbms_whitelist_generated.rb** in the same location as the project.
|
273
289
|
|
274
290
|
### MongoDB whitelist generation
|
275
291
|
|
@@ -374,7 +390,7 @@ has following attribute accessor
|
|
374
390
|
</tr>
|
375
391
|
<tr>
|
376
392
|
<td align="left">Text</td>
|
377
|
-
<td align="left"><a href="http://rubydoc.info/github/sunitparekh/data-anonymization/DataAnon/Strategy/Field/RandomUrl">
|
393
|
+
<td align="left"><a href="http://rubydoc.info/github/sunitparekh/data-anonymization/DataAnon/Strategy/Field/RandomUrl">RandomUrl</a></td>
|
378
394
|
<td align="left">Anonymizes a URL while mainting the structure</td>
|
379
395
|
</tr>
|
380
396
|
</table><table>
|
data/commands.txt
ADDED
data/data-anonymization.gemspec
CHANGED
@@ -19,12 +19,12 @@ Gem::Specification.new do |gem|
|
|
19
19
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
20
20
|
gem.require_paths = ['lib']
|
21
21
|
|
22
|
-
gem.add_dependency('activerecord', '~>
|
23
|
-
gem.add_dependency('composite_primary_keys', '~>
|
24
|
-
gem.add_dependency('activesupport', '~>
|
25
|
-
gem.add_dependency('rgeo', '~> 0
|
26
|
-
gem.add_dependency('rgeo-geojson', '~> 0
|
22
|
+
gem.add_dependency('activerecord', '~> 6.0')
|
23
|
+
gem.add_dependency('composite_primary_keys', '~> 12.0')
|
24
|
+
gem.add_dependency('activesupport', '~> 6.0')
|
25
|
+
gem.add_dependency('rgeo', '~> 1.0')
|
26
|
+
gem.add_dependency('rgeo-geojson', '~> 2.0')
|
27
27
|
gem.add_dependency('powerbar', '~> 1.0')
|
28
|
-
gem.add_dependency('parallel', '~> 1.
|
29
|
-
gem.add_dependency('thor', '~> 0.
|
28
|
+
gem.add_dependency('parallel', '~> 1.12')
|
29
|
+
gem.add_dependency('thor', '~> 0.20.3')
|
30
30
|
end
|
data/examples/blacklist_dsl.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
system
|
1
|
+
system 'bundle exec ruby examples/whitelist_dsl.rb'
|
2
2
|
|
3
3
|
require 'data-anonymization'
|
4
4
|
|
@@ -18,7 +18,7 @@ database 'Chinook' do
|
|
18
18
|
anonymize('City').using FieldStrategy::RandomCity.region_US
|
19
19
|
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
20
20
|
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
21
|
-
anonymize('Country') {|field|
|
21
|
+
anonymize('Country') {|field| 'USA'}
|
22
22
|
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
23
23
|
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
24
24
|
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
@@ -33,7 +33,7 @@ database 'Chinook' do
|
|
33
33
|
anonymize('City').using FieldStrategy::RandomCity.region_US
|
34
34
|
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
35
35
|
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
36
|
-
anonymize('Country') {|field|
|
36
|
+
anonymize('Country') {|field| 'USA'}
|
37
37
|
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
38
38
|
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
39
39
|
end
|
@@ -4,20 +4,20 @@ require 'mongo'
|
|
4
4
|
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
5
5
|
Mongo::Logger.logger.level = Logger::WARN
|
6
6
|
|
7
|
-
Mongo::Client.new(
|
8
|
-
system
|
9
|
-
system
|
7
|
+
Mongo::Client.new('mongodb://localhost/test').database.drop
|
8
|
+
system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
|
9
|
+
system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
|
10
10
|
|
11
11
|
|
12
12
|
database 'test' do
|
13
13
|
strategy DataAnon::Strategy::MongoDB::Blacklist
|
14
|
-
source_db :mongodb_uri =>
|
14
|
+
source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
|
15
15
|
|
16
16
|
collection 'users' do
|
17
17
|
anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
|
18
18
|
anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
|
19
19
|
anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
|
20
|
-
anonymize('password') { |field|
|
20
|
+
anonymize('password') { |field| 'password'}
|
21
21
|
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
22
22
|
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
23
23
|
end
|
@@ -5,22 +5,22 @@ DataAnon::Utils::Logging.logger.level = Logger::INFO
|
|
5
5
|
Mongo::Logger.logger.level = Logger::WARN
|
6
6
|
|
7
7
|
|
8
|
-
Mongo::Client.new(
|
9
|
-
Mongo::Client.new(
|
10
|
-
system
|
11
|
-
system
|
8
|
+
Mongo::Client.new('mongodb://localhost/test').database.drop
|
9
|
+
Mongo::Client.new('mongodb://localhost/dest').database.drop
|
10
|
+
system 'mongoimport --host=127.0.0.1 -d test --drop -c users --jsonArray ./sample-data/mongo/users.json'
|
11
|
+
system 'mongoimport --host=127.0.0.1 -d test --drop -c plans --jsonArray ./sample-data/mongo/plans.json'
|
12
12
|
|
13
13
|
database 'test' do
|
14
14
|
strategy DataAnon::Strategy::MongoDB::Whitelist
|
15
|
-
source_db :mongodb_uri =>
|
16
|
-
destination_db :mongodb_uri =>
|
15
|
+
source_db :mongodb_uri => 'mongodb://localhost/test', :database => 'test'
|
16
|
+
destination_db :mongodb_uri => 'mongodb://localhost/dest', :database => 'dest'
|
17
17
|
|
18
18
|
collection 'users' do
|
19
19
|
whitelist '_id','failed_attempts','updated_at'
|
20
20
|
anonymize('date_of_birth').using FieldStrategy::TimeDelta.new(5,30)
|
21
21
|
anonymize('user_id').using FieldStrategy::StringTemplate.new('user-#{row_number}')
|
22
22
|
anonymize('email').using FieldStrategy::RandomMailinatorEmail.new
|
23
|
-
anonymize('password') { |field|
|
23
|
+
anonymize('password') { |field| 'password'}
|
24
24
|
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
25
25
|
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
26
26
|
anonymize 'password_reset_answer','password_reset_question'
|
data/examples/whitelist_dsl.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
system
|
1
|
+
system 'rake empty_dest' # clean destination database on every call
|
2
2
|
|
3
3
|
require 'data-anonymization'
|
4
4
|
|
@@ -15,7 +15,7 @@ database 'Chinook' do
|
|
15
15
|
primary_key 'GenreId'
|
16
16
|
whitelist 'GenreId'
|
17
17
|
anonymize 'Name' do |field|
|
18
|
-
field.value +
|
18
|
+
field.value + ' test'
|
19
19
|
end
|
20
20
|
end
|
21
21
|
|
@@ -39,7 +39,7 @@ database 'Chinook' do
|
|
39
39
|
anonymize('City').using FieldStrategy::RandomCity.region_US
|
40
40
|
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
41
41
|
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
42
|
-
anonymize('Country') {|field|
|
42
|
+
anonymize('Country') {|field| 'USA'}
|
43
43
|
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
44
44
|
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
45
45
|
end
|
@@ -56,7 +56,7 @@ database 'Chinook' do
|
|
56
56
|
anonymize('City').using FieldStrategy::RandomCity.region_US
|
57
57
|
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
58
58
|
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
59
|
-
anonymize('Country') {|field|
|
59
|
+
anonymize('Country') {|field| 'USA'}
|
60
60
|
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
61
61
|
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
62
62
|
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
@@ -0,0 +1,66 @@
|
|
1
|
+
system 'rake empty_dest' # clean destination database on every call
|
2
|
+
|
3
|
+
require 'data-anonymization'
|
4
|
+
|
5
|
+
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
6
|
+
|
7
|
+
database 'Chinook' do
|
8
|
+
strategy DataAnon::Strategy::Whitelist
|
9
|
+
source_db :adapter => 'sqlite3', :database => 'sample-data/chinook.sqlite'
|
10
|
+
destination_db :adapter => 'sqlite3', :database => 'sample-data/chinook-empty.sqlite'
|
11
|
+
|
12
|
+
default_field_strategies :string => FieldStrategy::StringTemplate.new('Sunit #{row_number} Parekh')
|
13
|
+
|
14
|
+
table 'Genre' do
|
15
|
+
primary_key 'GenreId'
|
16
|
+
whitelist 'GenreId'
|
17
|
+
anonymize 'Name' do |field|
|
18
|
+
field.value + ' test'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
table 'MediaType' do
|
23
|
+
primary_key 'MediaTypeId'
|
24
|
+
anonymize('MediaTypeId') { |field| field.value } # same as whitelist
|
25
|
+
anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type #{row_number}')
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
table 'Customer' do
|
30
|
+
primary_key 'CustomerId'
|
31
|
+
thread_num 5 # thread_num
|
32
|
+
|
33
|
+
whitelist 'CustomerId', 'SupportRepId', 'Company'
|
34
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
35
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
36
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
37
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
38
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
39
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
40
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
41
|
+
anonymize('Country') {|field| 'USA'}
|
42
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
43
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
44
|
+
end
|
45
|
+
|
46
|
+
table 'Employee' do
|
47
|
+
thread_num 5 # thread_num
|
48
|
+
|
49
|
+
whitelist 'EmployeeId', 'ReportsTo', 'Title'
|
50
|
+
anonymize('BirthDate').using FieldStrategy::DateTimeDelta.new(1, 1)
|
51
|
+
anonymize('FirstName').using FieldStrategy::RandomFirstName.new
|
52
|
+
anonymize('LastName').using FieldStrategy::RandomLastName.new
|
53
|
+
anonymize('HireDate').using FieldStrategy::DateTimeDelta.new(2, 0)
|
54
|
+
anonymize('Address').using FieldStrategy::RandomAddress.region_US
|
55
|
+
anonymize('City').using FieldStrategy::RandomCity.region_US
|
56
|
+
anonymize('State').using FieldStrategy::RandomProvince.region_US
|
57
|
+
anonymize('PostalCode').using FieldStrategy::RandomZipcode.region_US
|
58
|
+
anonymize('Country') {|field| 'USA'}
|
59
|
+
anonymize('Phone').using FieldStrategy::RandomPhoneNumber.new
|
60
|
+
anonymize('Fax').using FieldStrategy::RandomPhoneNumber.new
|
61
|
+
anonymize('Email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
62
|
+
end
|
63
|
+
|
64
|
+
|
65
|
+
|
66
|
+
end
|
data/lib/core/field.rb
CHANGED
data/lib/core/table_errors.rb
CHANGED
@@ -11,7 +11,7 @@ module DataAnon
|
|
11
11
|
|
12
12
|
def log_error record, exception
|
13
13
|
@errors << { :record => record, :exception => exception}
|
14
|
-
raise
|
14
|
+
raise 'Reached limit of error for a table' if @errors.length > 100
|
15
15
|
end
|
16
16
|
|
17
17
|
def errors
|
data/lib/data-anonymization.rb
CHANGED
@@ -1,28 +1,28 @@
|
|
1
|
-
require
|
1
|
+
require 'version'
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
7
|
-
require
|
8
|
-
require
|
9
|
-
require
|
10
|
-
require
|
11
|
-
require
|
12
|
-
require
|
13
|
-
require
|
14
|
-
require
|
15
|
-
require
|
16
|
-
require
|
17
|
-
require
|
18
|
-
require
|
19
|
-
require
|
20
|
-
require
|
21
|
-
require
|
3
|
+
require 'utils/logging'
|
4
|
+
require 'utils/random_int'
|
5
|
+
require 'utils/random_float'
|
6
|
+
require 'utils/random_string'
|
7
|
+
require 'utils/random_string_chars_only'
|
8
|
+
require 'utils/geojson_parser'
|
9
|
+
require 'utils/progress_bar'
|
10
|
+
require 'utils/parallel_progress_bar'
|
11
|
+
require 'utils/resource'
|
12
|
+
require 'utils/template_helper'
|
13
|
+
require 'parallel/table'
|
14
|
+
require 'core/database'
|
15
|
+
require 'core/fields_missing_strategy'
|
16
|
+
require 'thor/helpers/rdbms_dsl_generator'
|
17
|
+
require 'core/field'
|
18
|
+
require 'core/table_errors'
|
19
|
+
require 'strategy/strategies'
|
20
|
+
require 'utils/database'
|
21
|
+
require 'core/dsl'
|
22
22
|
|
23
23
|
begin
|
24
24
|
require 'mongo'
|
25
|
-
require
|
25
|
+
require 'thor/helpers/mongodb_dsl_generator'
|
26
26
|
rescue LoadError
|
27
|
-
|
27
|
+
'Ignoring the mongodb specific libraries if monog driver is not specified in gem'
|
28
28
|
end
|
data/lib/strategy/base.rb
CHANGED
@@ -37,6 +37,10 @@ module DataAnon
|
|
37
37
|
@limit = limit
|
38
38
|
end
|
39
39
|
|
40
|
+
def thread_num thread_num
|
41
|
+
@thread_num = thread_num
|
42
|
+
end
|
43
|
+
|
40
44
|
def whitelist *fields
|
41
45
|
fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new }
|
42
46
|
end
|
@@ -95,13 +99,15 @@ module DataAnon
|
|
95
99
|
progress = progress_bar.new(@name, total)
|
96
100
|
if @primary_keys.empty? || !@batch_size.present?
|
97
101
|
process_table progress
|
102
|
+
elsif @thread_num.present?
|
103
|
+
process_table_in_threads progress
|
98
104
|
else
|
99
105
|
process_table_in_batches progress
|
100
106
|
end
|
101
107
|
progress.close
|
102
108
|
end
|
103
109
|
if source_table.respond_to?('clear_all_connections!')
|
104
|
-
|
110
|
+
source_table.clear_all_connections!
|
105
111
|
end
|
106
112
|
end
|
107
113
|
|
@@ -134,6 +140,40 @@ module DataAnon
|
|
134
140
|
end
|
135
141
|
end
|
136
142
|
|
143
|
+
def process_table_in_threads progress
|
144
|
+
logger.info "Processing table #{@name} records in batch size of #{@batch_size} [THREADS]"
|
145
|
+
|
146
|
+
index = 0
|
147
|
+
threads = []
|
148
|
+
|
149
|
+
source_table.find_in_batches(batch_size: @batch_size) do |records|
|
150
|
+
until threads.count(&:alive?) <= @thread_num
|
151
|
+
thr = threads.delete_at 0
|
152
|
+
thr.join
|
153
|
+
progress.show index
|
154
|
+
end
|
155
|
+
|
156
|
+
thr = Thread.new {
|
157
|
+
records.each do |record|
|
158
|
+
begin
|
159
|
+
process_record_if index, record
|
160
|
+
index += 1
|
161
|
+
rescue => exception
|
162
|
+
puts exception.inspect
|
163
|
+
@errors.log_error record, exception
|
164
|
+
end
|
165
|
+
end
|
166
|
+
}
|
167
|
+
threads << thr
|
168
|
+
end
|
169
|
+
|
170
|
+
until threads.empty?
|
171
|
+
thr = threads.delete_at 0
|
172
|
+
thr.join
|
173
|
+
progress.show index
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
137
177
|
def source_table_limited
|
138
178
|
@source_table_limited ||= begin
|
139
179
|
if @limit.present?
|