data-anonymization 0.7.0 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +37 -0
- data/examples/mongodb_blacklist_dsl.rb +2 -2
- data/examples/mongodb_whitelist_dsl.rb +1 -1
- data/lib/strategy/base.rb +17 -2
- data/lib/strategy/mongodb/anonymize_field.rb +2 -2
- data/lib/version.rb +1 -1
- data/spec/acceptance/mongodb_blacklist_spec.rb +13 -6
- data/spec/acceptance/rdbms_blacklist_spec.rb +48 -4
- data/spec/support/customer_sample.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: afcaf064c0b8135149e5fb8e559ab95df8088516
|
4
|
+
data.tar.gz: 795dbba88fead534acdcb87b4cf18fd4bdf12ad8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 85daa8177667c232eae29f3ad9a9f021a5d679898679967a5d435d4b3e3b7eb9bf0a6e84cd5441b43d823005a9dc0c295adfa3b9ca9387e1593ff01f4dc950d2
|
7
|
+
data.tar.gz: 2ce7411e57e5427e9fbcd211b37a60acc3669d38a2405364e51aeb27f5ff2d43c027df5e55291e8be39f81ad4543045c318a5d20422c0444780d912da9595c1d
|
data/README.md
CHANGED
@@ -70,6 +70,11 @@ Postgresql database having **composite primary key**
|
|
70
70
|
|
71
71
|
## Changelog
|
72
72
|
|
73
|
+
#### 0.7.1 (Jun 13, 2015)
|
74
|
+
1. Fixed issues with empty array data for MongoDB
|
75
|
+
2. Added feature to skip and continue records during anaonymisation, this is useful to apply different strategies for different types of records.
|
76
|
+
|
77
|
+
|
73
78
|
#### 0.7.0 (Mar 9, 2015)
|
74
79
|
1. Removed downcase from field name since it was causing issues with upper case field names. So now for databsae where case matters field name case should be maintained.
|
75
80
|
2. Upgraded gems to latest version
|
@@ -200,6 +205,7 @@ Read more about [blacklist and whitelist here](http://sunitspace.blogspot.in/201
|
|
200
205
|
3. To run anonymization in parallel at Table level, provided no FK constraint on tables use DataAnon::Parallel::Table strategy
|
201
206
|
4. For large table to load them in batches from table set 'batch_size' and it will use RoR's batch mode processing. Checkout [example](https://github.com/sunitparekh/data-anonymization/blob/master/examples/whitelist_dsl.rb) on how to use batch processing.
|
202
207
|
5. Make sure to give proper case for fields and table names.
|
208
|
+
6. Use skip and continue to apply different strategies for records.
|
203
209
|
|
204
210
|
## DSL Generation
|
205
211
|
|
@@ -567,6 +573,37 @@ ENV['show_progress'] = 'false'
|
|
567
573
|
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
568
574
|
```
|
569
575
|
|
576
|
+
## Skip and Continue records
|
577
|
+
|
578
|
+
*Skip* is used to skip records during anonymization when condition returns true. This records are ignored,
|
579
|
+
in blacklist it remains as it is in database and in case of whitelist this records will not be copied to destination database.
|
580
|
+
|
581
|
+
```ruby
|
582
|
+
table 'customers' do
|
583
|
+
skip { |index, record| record['age'] < 18 }
|
584
|
+
|
585
|
+
primary_key 'cust_id'
|
586
|
+
anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
587
|
+
anonymize 'terms_n_condition', 'age'
|
588
|
+
end
|
589
|
+
```
|
590
|
+
|
591
|
+
|
592
|
+
*Continue* is exactly opposite of Skip and it continue with anonymization only if given condition returns true.
|
593
|
+
In case of blacklist records are anonymized for matching conditions and for whitelist records are anonymized and copied
|
594
|
+
to new database for matching conditions.
|
595
|
+
|
596
|
+
```ruby
|
597
|
+
table 'customers' do
|
598
|
+
continue { |index, record| record['age'] > 18 }
|
599
|
+
|
600
|
+
primary_key 'cust_id'
|
601
|
+
anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
602
|
+
anonymize 'terms_n_condition', 'age'
|
603
|
+
end
|
604
|
+
```
|
605
|
+
|
606
|
+
|
570
607
|
## Want to contribute?
|
571
608
|
|
572
609
|
1. Fork it
|
@@ -21,10 +21,10 @@ database 'test' do
|
|
21
21
|
end
|
22
22
|
|
23
23
|
collection 'plans' do
|
24
|
-
anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
|
24
|
+
anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"]))
|
25
25
|
anonymize 'public_sharing','photo_sharing'
|
26
26
|
|
27
|
-
|
27
|
+
document 'features' do
|
28
28
|
anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
|
29
29
|
|
30
30
|
document 'users' do
|
@@ -26,7 +26,7 @@ database 'test' do
|
|
26
26
|
|
27
27
|
collection 'plans' do
|
28
28
|
whitelist '_id', 'name','term', 'created_at'
|
29
|
-
anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
|
29
|
+
anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"]))
|
30
30
|
anonymize 'public_sharing','photo_sharing'
|
31
31
|
|
32
32
|
collection 'features' do
|
data/lib/strategy/base.rb
CHANGED
@@ -37,6 +37,14 @@ module DataAnon
|
|
37
37
|
fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new }
|
38
38
|
end
|
39
39
|
|
40
|
+
def skip &block
|
41
|
+
@skip_block = block
|
42
|
+
end
|
43
|
+
|
44
|
+
def continue &block
|
45
|
+
@continue_block = block
|
46
|
+
end
|
47
|
+
|
40
48
|
def anonymize *fields, &block
|
41
49
|
if block.nil?
|
42
50
|
fields.each { |f| @fields[f] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
|
@@ -93,7 +101,7 @@ module DataAnon
|
|
93
101
|
source_table.all.each do |record|
|
94
102
|
index += 1
|
95
103
|
begin
|
96
|
-
|
104
|
+
process_record_if index, record
|
97
105
|
rescue => exception
|
98
106
|
@errors.log_error record, exception
|
99
107
|
end
|
@@ -107,7 +115,7 @@ module DataAnon
|
|
107
115
|
source_table.find_each(:batch_size => @batch_size) do |record|
|
108
116
|
index += 1
|
109
117
|
begin
|
110
|
-
|
118
|
+
process_record_if index, record
|
111
119
|
rescue => exception
|
112
120
|
@errors.log_error record, exception
|
113
121
|
end
|
@@ -115,6 +123,13 @@ module DataAnon
|
|
115
123
|
end
|
116
124
|
end
|
117
125
|
|
126
|
+
def process_record_if index, record
|
127
|
+
return if @skip_block && @skip_block.call(index, record)
|
128
|
+
return if @continue_block && !@continue_block.call(index, record)
|
129
|
+
|
130
|
+
process_record index, record
|
131
|
+
end
|
132
|
+
|
118
133
|
def progress_bar
|
119
134
|
@progress_bar || DataAnon::Utils::ProgressBar
|
120
135
|
end
|
@@ -30,11 +30,11 @@ module DataAnon
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def sub_documents?
|
33
|
-
@field.value.kind_of?(Array) && @field.value[0].kind_of?(Hash)
|
33
|
+
@field.value.kind_of?(Array) && (@field_strategy.kind_of?(Hash) || @field.value[0].kind_of?(Hash))
|
34
34
|
end
|
35
35
|
|
36
36
|
def sub_document?
|
37
|
-
@field.value.kind_of?
|
37
|
+
@field.value.kind_of?(Hash)
|
38
38
|
end
|
39
39
|
|
40
40
|
|
data/lib/version.rb
CHANGED
@@ -10,27 +10,30 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
|
|
10
10
|
"_id" => 1,
|
11
11
|
"USER_ID" => "sunitparekh",
|
12
12
|
"date_of_birth" => Time.new(2012, 7, 14, 13, 1, 0),
|
13
|
-
"email" => "parekh
|
13
|
+
"email" => "parekh-sunit@mailinator.com",
|
14
14
|
"password" => "TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@",
|
15
15
|
"failed_attempts" => 0,
|
16
16
|
"first_name" => "Sunit",
|
17
17
|
"last_name" => "Parekh",
|
18
18
|
"password_reset_answer" => "manza",
|
19
19
|
"password_reset_question" => "My new car modal?",
|
20
|
-
"updated_at" => Time.new(2012, 8, 15, 13, 1, 0)
|
20
|
+
"updated_at" => Time.new(2012, 8, 15, 13, 1, 0),
|
21
|
+
"alternate_emails" => ["abc@test.com","abc2@test.com"]
|
22
|
+
|
21
23
|
},
|
22
24
|
{
|
23
25
|
"_id" => 2,
|
24
|
-
"
|
26
|
+
"USER_ID" => "anandagrawal",
|
25
27
|
"date_of_birth" => Time.new(2011, 8, 11, 13, 1, 0),
|
26
|
-
"email" => "
|
28
|
+
"email" => "anand-agrawal@mailinator.com",
|
27
29
|
"password" => "Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum",
|
28
30
|
"failed_attempts" => 0,
|
29
31
|
"first_name" => "Anand",
|
30
32
|
"last_name" => "Agrawal",
|
31
33
|
"password_reset_answer" => "android",
|
32
34
|
"password_reset_question" => "My phone?",
|
33
|
-
"updated_at" => Time.new(2012, 2, 11, 13, 1, 0)
|
35
|
+
"updated_at" => Time.new(2012, 2, 11, 13, 1, 0),
|
36
|
+
"alternate_emails" => ["abc@test.com","abc2@test.com"]
|
34
37
|
}
|
35
38
|
]
|
36
39
|
users_coll = Mongo::Connection.from_uri("mongodb://localhost/dest")['test']['users']
|
@@ -50,6 +53,7 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
|
|
50
53
|
anonymize('password') { |field| "password" }
|
51
54
|
anonymize('first_name').using FieldStrategy::RandomFirstName.new
|
52
55
|
anonymize('last_name').using FieldStrategy::RandomLastName.new
|
56
|
+
anonymize('alternate_emails').using FieldStrategy::AnonymizeArray.new(FieldStrategy::RandomMailinatorEmail.new)
|
53
57
|
end
|
54
58
|
|
55
59
|
end
|
@@ -61,7 +65,7 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
|
|
61
65
|
user['_id'].should == 1
|
62
66
|
user['USER_ID'].should == "user-1"
|
63
67
|
user['date_of_birth'].to_i.should_not == Time.new(2012, 7, 14, 13, 1, 0).to_i
|
64
|
-
user['email'].should_not == "parekh
|
68
|
+
user['email'].should_not == "parekh-sunit@mailinator.com"
|
65
69
|
user['password'].should == "password"
|
66
70
|
user['failed_attempts'].should == 0
|
67
71
|
user['first_name'].should_not be "Sunit"
|
@@ -69,6 +73,9 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
|
|
69
73
|
user['password_reset_answer'].should == "manza"
|
70
74
|
user['password_reset_question'].should == "My new car modal?"
|
71
75
|
user['updated_at'].to_i.should == Time.new(2012, 8, 15, 13, 1, 0).to_i
|
76
|
+
user['alternate_emails'].length.should == 2
|
77
|
+
user['alternate_emails'][0].should_not == 'abc@test.com'
|
78
|
+
user['alternate_emails'][1].should_not == 'abc2@test.com'
|
72
79
|
|
73
80
|
|
74
81
|
end
|
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe
|
3
|
+
describe 'End 2 End RDBMS Blacklist Acceptance Test using SQLite database' do
|
4
4
|
connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
|
5
5
|
|
6
6
|
before(:each) do
|
@@ -9,9 +9,9 @@ describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
|
|
9
9
|
CustomerSample.insert_record connection_spec, CustomerSample::SAMPLE_DATA
|
10
10
|
end
|
11
11
|
|
12
|
-
it
|
12
|
+
it 'should anonymize customer table record ' do
|
13
13
|
|
14
|
-
database
|
14
|
+
database 'Customer' do
|
15
15
|
strategy DataAnon::Strategy::Blacklist
|
16
16
|
source_db connection_spec
|
17
17
|
|
@@ -27,5 +27,49 @@ describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
|
|
27
27
|
new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
|
28
28
|
new_rec['email'].should == 'test+1@gmail.com'
|
29
29
|
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'should skip anonymization of the record if condition in skip is true' do
|
33
|
+
database 'Customer' do
|
34
|
+
strategy DataAnon::Strategy::Blacklist
|
35
|
+
source_db connection_spec
|
36
|
+
|
37
|
+
table 'customers' do
|
38
|
+
skip { |index, record| record['age'] > 18 }
|
39
|
+
|
40
|
+
primary_key 'cust_id'
|
41
|
+
anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
42
|
+
anonymize 'terms_n_condition', 'age'
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
DataAnon::Utils::SourceDatabase.establish_connection connection_spec
|
47
|
+
source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
|
48
|
+
new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
|
49
|
+
new_rec['email'].should_not == 'test+1@gmail.com'
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'should continue with anonymization of the record if condition in skip is true' do
|
54
|
+
database 'Customer' do
|
55
|
+
strategy DataAnon::Strategy::Blacklist
|
56
|
+
source_db connection_spec
|
57
|
+
|
58
|
+
table 'customers' do
|
59
|
+
continue { |index, record| record['age'] > 18 }
|
60
|
+
|
61
|
+
primary_key 'cust_id'
|
62
|
+
anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
|
63
|
+
anonymize 'terms_n_condition', 'age'
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
DataAnon::Utils::SourceDatabase.establish_connection connection_spec
|
68
|
+
source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
|
69
|
+
new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
|
70
|
+
new_rec['email'].should == 'test+1@gmail.com'
|
71
|
+
|
72
|
+
|
73
|
+
|
30
74
|
end
|
31
75
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data-anonymization
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sunit Parekh
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2015-
|
13
|
+
date: 2015-06-13 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: activerecord
|