data-anonymization 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +37 -0
- data/examples/mongodb_blacklist_dsl.rb +2 -2
- data/examples/mongodb_whitelist_dsl.rb +1 -1
- data/lib/strategy/base.rb +17 -2
- data/lib/strategy/mongodb/anonymize_field.rb +2 -2
- data/lib/version.rb +1 -1
- data/spec/acceptance/mongodb_blacklist_spec.rb +13 -6
- data/spec/acceptance/rdbms_blacklist_spec.rb +48 -4
- data/spec/support/customer_sample.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: afcaf064c0b8135149e5fb8e559ab95df8088516
|
4
|
+
data.tar.gz: 795dbba88fead534acdcb87b4cf18fd4bdf12ad8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85daa8177667c232eae29f3ad9a9f021a5d679898679967a5d435d4b3e3b7eb9bf0a6e84cd5441b43d823005a9dc0c295adfa3b9ca9387e1593ff01f4dc950d2
|
7
|
+
data.tar.gz: 2ce7411e57e5427e9fbcd211b37a60acc3669d38a2405364e51aeb27f5ff2d43c027df5e55291e8be39f81ad4543045c318a5d20422c0444780d912da9595c1d
|
data/README.md
CHANGED
@@ -70,6 +70,11 @@ Postgresql database having **composite primary key**
|
|
70
70
|
|
71
71
|
## Changelog
|
72
72
|
|
73
|
+
#### 0.7.1 (Jun 13, 2015)
|
74
|
+
1. Fixed issues with empty array data for MongoDB
|
75
|
+
2. Added feature to skip and continue records during anaonymisation, this is useful to apply different strategies for different types of records.
|
76
|
+
|
77
|
+
|
73
78
|
#### 0.7.0 (Mar 9, 2015)
|
74
79
|
1. Removed downcase from field name since it was causing issues with upper case field names. So now for databsae where case matters field name case should be maintained.
|
75
80
|
2. Upgraded gems to latest version
|
@@ -200,6 +205,7 @@ Read more about [blacklist and whitelist here](http://sunitspace.blogspot.in/201
|
|
200
205
|
3. To run anonymization in parallel at Table level, provided no FK constraint on tables use DataAnon::Parallel::Table strategy
|
201
206
|
4. For large table to load them in batches from table set 'batch_size' and it will use RoR's batch mode processing. Checkout [example](https://github.com/sunitparekh/data-anonymization/blob/master/examples/whitelist_dsl.rb) on how to use batch processing.
|
202
207
|
5. Make sure to give proper case for fields and table names.
|
208
|
+
6. Use skip and continue to apply different strategies for records.
|
203
209
|
|
204
210
|
## DSL Generation
|
205
211
|
|
@@ -567,6 +573,37 @@ ENV['show_progress'] = 'false'
|
|
567
573
|
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
568
574
|
```
|
569
575
|
|
576
|
+
## Skip and Continue records
|
577
|
+
|
578
|
+
*Skip* is used to skip records during anonymization when condition returns true. This records are ignored,
|
579
|
+
in blacklist it remains as it is in database and in case of whitelist this records will not be copied to destination database.
|
580
|
+
|
581
|
+
```ruby
|
582
|
+
table 'customers' do
|
583
|
+
skip { |index, record| record['age'] < 18 }
|
584
|
+
|
585
|
+
primary_key 'cust_id'
|
586
|
+
anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
587
|
+
anonymize 'terms_n_condition', 'age'
|
588
|
+
end
|
589
|
+
```
|
590
|
+
|
591
|
+
|
592
|
+
*Continue* is exactly opposite of Skip and it continue with anonymization only if given condition returns true.
|
593
|
+
In case of blacklist records are anonymized for matching conditions and for whitelist records are anonymized and copied
|
594
|
+
to new database for matching conditions.
|
595
|
+
|
596
|
+
```ruby
|
597
|
+
table 'customers' do
|
598
|
+
continue { |index, record| record['age'] > 18 }
|
599
|
+
|
600
|
+
primary_key 'cust_id'
|
601
|
+
anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
602
|
+
anonymize 'terms_n_condition', 'age'
|
603
|
+
end
|
604
|
+
```
|
605
|
+
|
606
|
+
|
570
607
|
## Want to contribute?
|
571
608
|
|
572
609
|
1. Fork it
|
@@ -21,10 +21,10 @@ database 'test' do
|
|
21
21
|
end
|
22
22
|
|
23
23
|
collection 'plans' do
|
24
|
-
anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
|
24
|
+
anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"]))
|
25
25
|
anonymize 'public_sharing','photo_sharing'
|
26
26
|
|
27
|
-
|
27
|
+
document 'features' do
|
28
28
|
anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
|
29
29
|
|
30
30
|
document 'users' do
|
@@ -26,7 +26,7 @@ database 'test' do
|
|
26
26
|
|
27
27
|
collection 'plans' do
|
28
28
|
whitelist '_id', 'name','term', 'created_at'
|
29
|
-
anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
|
29
|
+
anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"]))
|
30
30
|
anonymize 'public_sharing','photo_sharing'
|
31
31
|
|
32
32
|
collection 'features' do
|
data/lib/strategy/base.rb
CHANGED
@@ -37,6 +37,14 @@ module DataAnon
|
|
37
37
|
fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new }
|
38
38
|
end
|
39
39
|
|
40
|
+
def skip &block
|
41
|
+
@skip_block = block
|
42
|
+
end
|
43
|
+
|
44
|
+
def continue &block
|
45
|
+
@continue_block = block
|
46
|
+
end
|
47
|
+
|
40
48
|
def anonymize *fields, &block
|
41
49
|
if block.nil?
|
42
50
|
fields.each { |f| @fields[f] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
|
@@ -93,7 +101,7 @@ module DataAnon
|
|
93
101
|
source_table.all.each do |record|
|
94
102
|
index += 1
|
95
103
|
begin
|
96
|
-
|
104
|
+
process_record_if index, record
|
97
105
|
rescue => exception
|
98
106
|
@errors.log_error record, exception
|
99
107
|
end
|
@@ -107,7 +115,7 @@ module DataAnon
|
|
107
115
|
source_table.find_each(:batch_size => @batch_size) do |record|
|
108
116
|
index += 1
|
109
117
|
begin
|
110
|
-
|
118
|
+
process_record_if index, record
|
111
119
|
rescue => exception
|
112
120
|
@errors.log_error record, exception
|
113
121
|
end
|
@@ -115,6 +123,13 @@ module DataAnon
|
|
115
123
|
end
|
116
124
|
end
|
117
125
|
|
126
|
+
def process_record_if index, record
|
127
|
+
return if @skip_block && @skip_block.call(index, record)
|
128
|
+
return if @continue_block && !@continue_block.call(index, record)
|
129
|
+
|
130
|
+
process_record index, record
|
131
|
+
end
|
132
|
+
|
118
133
|
def progress_bar
|
119
134
|
@progress_bar || DataAnon::Utils::ProgressBar
|
120
135
|
end
|
@@ -30,11 +30,11 @@ module DataAnon
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def sub_documents?
|
33
|
-
@field.value.kind_of?(Array) && @field.value[0].kind_of?(Hash)
|
33
|
+
@field.value.kind_of?(Array) && (@field_strategy.kind_of?(Hash) || @field.value[0].kind_of?(Hash))
|
34
34
|
end
|
35
35
|
|
36
36
|
def sub_document?
|
37
|
-
@field.value.kind_of?
|
37
|
+
@field.value.kind_of?(Hash)
|
38
38
|
end
|
39
39
|
|
40
40
|
|
data/lib/version.rb
CHANGED
@@ -10,27 +10,30 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
|
|
10
10
|
"_id" => 1,
|
11
11
|
"USER_ID" => "sunitparekh",
|
12
12
|
"date_of_birth" => Time.new(2012, 7, 14, 13, 1, 0),
|
13
|
-
"email" => "parekh
|
13
|
+
"email" => "parekh-sunit@mailinator.com",
|
14
14
|
"password" => "TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@",
|
15
15
|
"failed_attempts" => 0,
|
16
16
|
"first_name" => "Sunit",
|
17
17
|
"last_name" => "Parekh",
|
18
18
|
"password_reset_answer" => "manza",
|
19
19
|
"password_reset_question" => "My new car modal?",
|
20
|
-
"updated_at" => Time.new(2012, 8, 15, 13, 1, 0)
|
20
|
+
"updated_at" => Time.new(2012, 8, 15, 13, 1, 0),
|
21
|
+
"alternate_emails" => ["abc@test.com","abc2@test.com"]
|
22
|
+
|
21
23
|
},
|
22
24
|
{
|
23
25
|
"_id" => 2,
|
24
|
-
"
|
26
|
+
"USER_ID" => "anandagrawal",
|
25
27
|
"date_of_birth" => Time.new(2011, 8, 11, 13, 1, 0),
|
26
|
-
"email" => "
|
28
|
+
"email" => "anand-agrawal@mailinator.com",
|
27
29
|
"password" => "Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum",
|
28
30
|
"failed_attempts" => 0,
|
29
31
|
"first_name" => "Anand",
|
30
32
|
"last_name" => "Agrawal",
|
31
33
|
"password_reset_answer" => "android",
|
32
34
|
"password_reset_question" => "My phone?",
|
33
|
-
"updated_at" => Time.new(2012, 2, 11, 13, 1, 0)
|
35
|
+
"updated_at" => Time.new(2012, 2, 11, 13, 1, 0),
|
36
|
+
"alternate_emails" => ["abc@test.com","abc2@test.com"]
|
34
37
|
}
|
35
38
|
]
|
36
39
|
users_coll = Mongo::Connection.from_uri("mongodb://localhost/dest")['test']['users']
|
@@ -50,6 +53,7 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
|
|
50
53
|
anonymize('password') { |field| "password" }
|
51
54
|
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
52
55
|
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
56
|
+
anonymize('alternate_emails').using FieldStrategy::AnonymizeArray.new(FieldStrategy::RandomMailinatorEmail.new)
|
53
57
|
end
|
54
58
|
|
55
59
|
end
|
@@ -61,7 +65,7 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
|
|
61
65
|
user['_id'].should == 1
|
62
66
|
user['USER_ID'].should == "user-1"
|
63
67
|
user['date_of_birth'].to_i.should_not == Time.new(2012, 7, 14, 13, 1, 0).to_i
|
64
|
-
user['email'].should_not == "parekh
|
68
|
+
user['email'].should_not == "parekh-sunit@mailinator.com"
|
65
69
|
user['password'].should == "password"
|
66
70
|
user['failed_attempts'].should == 0
|
67
71
|
user['first_name'].should_not be "Sunit"
|
@@ -69,6 +73,9 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
|
|
69
73
|
user['password_reset_answer'].should == "manza"
|
70
74
|
user['password_reset_question'].should == "My new car modal?"
|
71
75
|
user['updated_at'].to_i.should == Time.new(2012, 8, 15, 13, 1, 0).to_i
|
76
|
+
user['alternate_emails'].length.should == 2
|
77
|
+
user['alternate_emails'][0].should_not == 'abc@test.com'
|
78
|
+
user['alternate_emails'][1].should_not == 'abc2@test.com'
|
72
79
|
|
73
80
|
|
74
81
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe 'End 2 End RDBMS Blacklist Acceptance Test using SQLite database' do
|
4
4
|
connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
|
5
5
|
|
6
6
|
before(:each) do
|
@@ -9,9 +9,9 @@ describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
|
|
9
9
|
CustomerSample.insert_record connection_spec, CustomerSample::SAMPLE_DATA
|
10
10
|
end
|
11
11
|
|
12
|
-
it
|
12
|
+
it 'should anonymize customer table record ' do
|
13
13
|
|
14
|
-
database
|
14
|
+
database 'Customer' do
|
15
15
|
strategy DataAnon::Strategy::Blacklist
|
16
16
|
source_db connection_spec
|
17
17
|
|
@@ -27,5 +27,49 @@ describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
|
|
27
27
|
new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
|
28
28
|
new_rec['email'].should == 'test+1@gmail.com'
|
29
29
|
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should skip anonymization of the record if condition in skip is true' do
|
33
|
+
database 'Customer' do
|
34
|
+
strategy DataAnon::Strategy::Blacklist
|
35
|
+
source_db connection_spec
|
36
|
+
|
37
|
+
table 'customers' do
|
38
|
+
skip { |index, record| record['age'] > 18 }
|
39
|
+
|
40
|
+
primary_key 'cust_id'
|
41
|
+
anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
42
|
+
anonymize 'terms_n_condition', 'age'
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
DataAnon::Utils::SourceDatabase.establish_connection connection_spec
|
47
|
+
source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
|
48
|
+
new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
|
49
|
+
new_rec['email'].should_not == 'test+1@gmail.com'
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'should continue with anonymization of the record if condition in skip is true' do
|
54
|
+
database 'Customer' do
|
55
|
+
strategy DataAnon::Strategy::Blacklist
|
56
|
+
source_db connection_spec
|
57
|
+
|
58
|
+
table 'customers' do
|
59
|
+
continue { |index, record| record['age'] > 18 }
|
60
|
+
|
61
|
+
primary_key 'cust_id'
|
62
|
+
anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
63
|
+
anonymize 'terms_n_condition', 'age'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
DataAnon::Utils::SourceDatabase.establish_connection connection_spec
|
68
|
+
source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
|
69
|
+
new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
|
70
|
+
new_rec['email'].should == 'test+1@gmail.com'
|
71
|
+
|
72
|
+
|
73
|
+
|
30
74
|
end
|
31
75
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data-anonymization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sunit Parekh
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2015-
|
13
|
+
date: 2015-06-13 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activerecord
|