data-anonymization 0.7.0 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 05181aca5f0046f53ca999bbe73aea0360f013b6
4
- data.tar.gz: d538308e3ccb427af3860b657362f3de2e6aff5b
3
+ metadata.gz: afcaf064c0b8135149e5fb8e559ab95df8088516
4
+ data.tar.gz: 795dbba88fead534acdcb87b4cf18fd4bdf12ad8
5
5
  SHA512:
6
- metadata.gz: 83a63960a9dcd64cd74ba4aa01117f94f813a118af79b5e9ca57eb333fbc143699b07ba801909ff22f03e276ff268b84b6b2c70030ce3eaa06526eb3abacb91e
7
- data.tar.gz: 25fc3f8467d4ce3dc6c0ea455a9fa1660113f0a94e2a39edf48e953d87af3519cf03c0a253c8e7d23b6beb4e195f033f3664d0a70bf83b2d1fa89e86f3bb30e4
6
+ metadata.gz: 85daa8177667c232eae29f3ad9a9f021a5d679898679967a5d435d4b3e3b7eb9bf0a6e84cd5441b43d823005a9dc0c295adfa3b9ca9387e1593ff01f4dc950d2
7
+ data.tar.gz: 2ce7411e57e5427e9fbcd211b37a60acc3669d38a2405364e51aeb27f5ff2d43c027df5e55291e8be39f81ad4543045c318a5d20422c0444780d912da9595c1d
data/README.md CHANGED
@@ -70,6 +70,11 @@ Postgresql database having **composite primary key**
70
70
 
71
71
  ## Changelog
72
72
 
73
+ #### 0.7.1 (Jun 13, 2015)
74
+ 1. Fixed issues with empty array data for MongoDB
75
+ 2. Added feature to skip and continue records during anaonymisation, this is useful to apply different strategies for different types of records.
76
+
77
+
73
78
  #### 0.7.0 (Mar 9, 2015)
74
79
  1. Removed downcase from field name since it was causing issues with upper case field names. So now for databsae where case matters field name case should be maintained.
75
80
  2. Upgraded gems to latest version
@@ -200,6 +205,7 @@ Read more about [blacklist and whitelist here](http://sunitspace.blogspot.in/201
200
205
  3. To run anonymization in parallel at Table level, provided no FK constraint on tables use DataAnon::Parallel::Table strategy
201
206
  4. For large table to load them in batches from table set 'batch_size' and it will use RoR's batch mode processing. Checkout [example](https://github.com/sunitparekh/data-anonymization/blob/master/examples/whitelist_dsl.rb) on how to use batch processing.
202
207
  5. Make sure to give proper case for fields and table names.
208
+ 6. Use skip and continue to apply different strategies for records.
203
209
 
204
210
  ## DSL Generation
205
211
 
@@ -567,6 +573,37 @@ ENV['show_progress'] = 'false'
567
573
  DataAnon::Utils::Logging.logger.level = Logger::INFO
568
574
  ```
569
575
 
576
+ ## Skip and Continue records
577
+
578
+ *Skip* is used to skip records during anonymization when condition returns true. This records are ignored,
579
+ in blacklist it remains as it is in database and in case of whitelist this records will not be copied to destination database.
580
+
581
+ ```ruby
582
+ table 'customers' do
583
+ skip { |index, record| record['age'] < 18 }
584
+
585
+ primary_key 'cust_id'
586
+ anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
587
+ anonymize 'terms_n_condition', 'age'
588
+ end
589
+ ```
590
+
591
+
592
+ *Continue* is exactly opposite of Skip and it continue with anonymization only if given condition returns true.
593
+ In case of blacklist records are anonymized for matching conditions and for whitelist records are anonymized and copied
594
+ to new database for matching conditions.
595
+
596
+ ```ruby
597
+ table 'customers' do
598
+ continue { |index, record| record['age'] > 18 }
599
+
600
+ primary_key 'cust_id'
601
+ anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
602
+ anonymize 'terms_n_condition', 'age'
603
+ end
604
+ ```
605
+
606
+
570
607
  ## Want to contribute?
571
608
 
572
609
  1. Fork it
@@ -21,10 +21,10 @@ database 'test' do
21
21
  end
22
22
 
23
23
  collection 'plans' do
24
- anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
24
+ anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"]))
25
25
  anonymize 'public_sharing','photo_sharing'
26
26
 
27
- collection 'features' do
27
+ document 'features' do
28
28
  anonymize('max_storage').using FieldStrategy::SelectFromList.new([10737418240,21474836480,53687091200])
29
29
 
30
30
  document 'users' do
@@ -26,7 +26,7 @@ database 'test' do
26
26
 
27
27
  collection 'plans' do
28
28
  whitelist '_id', 'name','term', 'created_at'
29
- anonymize('plan_aliases').using FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"])
29
+ anonymize('plan_aliases').using FieldStrategy::AnonymizeArray.new(FieldStrategy::SelectFromList.new(["Free","Team","Business","Paid"]))
30
30
  anonymize 'public_sharing','photo_sharing'
31
31
 
32
32
  collection 'features' do
@@ -37,6 +37,14 @@ module DataAnon
37
37
  fields.each { |f| @fields[f] = DataAnon::Strategy::Field::Whitelist.new }
38
38
  end
39
39
 
40
+ def skip &block
41
+ @skip_block = block
42
+ end
43
+
44
+ def continue &block
45
+ @continue_block = block
46
+ end
47
+
40
48
  def anonymize *fields, &block
41
49
  if block.nil?
42
50
  fields.each { |f| @fields[f] = DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies) }
@@ -93,7 +101,7 @@ module DataAnon
93
101
  source_table.all.each do |record|
94
102
  index += 1
95
103
  begin
96
- process_record index, record
104
+ process_record_if index, record
97
105
  rescue => exception
98
106
  @errors.log_error record, exception
99
107
  end
@@ -107,7 +115,7 @@ module DataAnon
107
115
  source_table.find_each(:batch_size => @batch_size) do |record|
108
116
  index += 1
109
117
  begin
110
- process_record index, record
118
+ process_record_if index, record
111
119
  rescue => exception
112
120
  @errors.log_error record, exception
113
121
  end
@@ -115,6 +123,13 @@ module DataAnon
115
123
  end
116
124
  end
117
125
 
126
+ def process_record_if index, record
127
+ return if @skip_block && @skip_block.call(index, record)
128
+ return if @continue_block && !@continue_block.call(index, record)
129
+
130
+ process_record index, record
131
+ end
132
+
118
133
  def progress_bar
119
134
  @progress_bar || DataAnon::Utils::ProgressBar
120
135
  end
@@ -30,11 +30,11 @@ module DataAnon
30
30
  end
31
31
 
32
32
  def sub_documents?
33
- @field.value.kind_of?(Array) && @field.value[0].kind_of?(Hash)
33
+ @field.value.kind_of?(Array) && (@field_strategy.kind_of?(Hash) || @field.value[0].kind_of?(Hash))
34
34
  end
35
35
 
36
36
  def sub_document?
37
- @field.value.kind_of? Hash
37
+ @field.value.kind_of?(Hash)
38
38
  end
39
39
 
40
40
 
@@ -1,3 +1,3 @@
1
1
  module DataAnonymization
2
- VERSION = '0.7.0'
2
+ VERSION = '0.7.1'
3
3
  end
@@ -10,27 +10,30 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
10
10
  "_id" => 1,
11
11
  "USER_ID" => "sunitparekh",
12
12
  "date_of_birth" => Time.new(2012, 7, 14, 13, 1, 0),
13
- "email" => "parekh.sunit@gmail.com",
13
+ "email" => "parekh-sunit@mailinator.com",
14
14
  "password" => "TfqIK8Pd8GlbMDFZCX4l/5EtnOkfLCeynOL85tJQuxum&382knaflk@@",
15
15
  "failed_attempts" => 0,
16
16
  "first_name" => "Sunit",
17
17
  "last_name" => "Parekh",
18
18
  "password_reset_answer" => "manza",
19
19
  "password_reset_question" => "My new car modal?",
20
- "updated_at" => Time.new(2012, 8, 15, 13, 1, 0)
20
+ "updated_at" => Time.new(2012, 8, 15, 13, 1, 0),
21
+ "alternate_emails" => ["abc@test.com","abc2@test.com"]
22
+
21
23
  },
22
24
  {
23
25
  "_id" => 2,
24
- "user_id" => "anandagrawal",
26
+ "USER_ID" => "anandagrawal",
25
27
  "date_of_birth" => Time.new(2011, 8, 11, 13, 1, 0),
26
- "email" => "anandagrawal84@gmail.com",
28
+ "email" => "anand-agrawal@mailinator.com",
27
29
  "password" => "Tz548O0RWusldVAWkwqfzO3jK/X4l/5EtnOkfLCeynOL85tJQuxum",
28
30
  "failed_attempts" => 0,
29
31
  "first_name" => "Anand",
30
32
  "last_name" => "Agrawal",
31
33
  "password_reset_answer" => "android",
32
34
  "password_reset_question" => "My phone?",
33
- "updated_at" => Time.new(2012, 2, 11, 13, 1, 0)
35
+ "updated_at" => Time.new(2012, 2, 11, 13, 1, 0),
36
+ "alternate_emails" => ["abc@test.com","abc2@test.com"]
34
37
  }
35
38
  ]
36
39
  users_coll = Mongo::Connection.from_uri("mongodb://localhost/dest")['test']['users']
@@ -50,6 +53,7 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
50
53
  anonymize('password') { |field| "password" }
51
54
  anonymize('first_name').using FieldStrategy::RandomFirstName.new
52
55
  anonymize('last_name').using FieldStrategy::RandomLastName.new
56
+ anonymize('alternate_emails').using FieldStrategy::AnonymizeArray.new(FieldStrategy::RandomMailinatorEmail.new)
53
57
  end
54
58
 
55
59
  end
@@ -61,7 +65,7 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
61
65
  user['_id'].should == 1
62
66
  user['USER_ID'].should == "user-1"
63
67
  user['date_of_birth'].to_i.should_not == Time.new(2012, 7, 14, 13, 1, 0).to_i
64
- user['email'].should_not == "parekh.sunit@gmail.com"
68
+ user['email'].should_not == "parekh-sunit@mailinator.com"
65
69
  user['password'].should == "password"
66
70
  user['failed_attempts'].should == 0
67
71
  user['first_name'].should_not be "Sunit"
@@ -69,6 +73,9 @@ describe "End 2 End MongoDB Blacklist Acceptance Test" do
69
73
  user['password_reset_answer'].should == "manza"
70
74
  user['password_reset_question'].should == "My new car modal?"
71
75
  user['updated_at'].to_i.should == Time.new(2012, 8, 15, 13, 1, 0).to_i
76
+ user['alternate_emails'].length.should == 2
77
+ user['alternate_emails'][0].should_not == 'abc@test.com'
78
+ user['alternate_emails'][1].should_not == 'abc2@test.com'
72
79
 
73
80
 
74
81
  end
@@ -1,6 +1,6 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
3
+ describe 'End 2 End RDBMS Blacklist Acceptance Test using SQLite database' do
4
4
  connection_spec = {:adapter => 'sqlite3', :database => 'tmp/customer.sqlite'}
5
5
 
6
6
  before(:each) do
@@ -9,9 +9,9 @@ describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
9
9
  CustomerSample.insert_record connection_spec, CustomerSample::SAMPLE_DATA
10
10
  end
11
11
 
12
- it "should anonymize customer table record " do
12
+ it 'should anonymize customer table record ' do
13
13
 
14
- database "Customer" do
14
+ database 'Customer' do
15
15
  strategy DataAnon::Strategy::Blacklist
16
16
  source_db connection_spec
17
17
 
@@ -27,5 +27,49 @@ describe "End 2 End RDBMS Blacklist Acceptance Test using SQLite database" do
27
27
  new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
28
28
  new_rec['email'].should == 'test+1@gmail.com'
29
29
 
30
+ end
31
+
32
+ it 'should skip anonymization of the record if condition in skip is true' do
33
+ database 'Customer' do
34
+ strategy DataAnon::Strategy::Blacklist
35
+ source_db connection_spec
36
+
37
+ table 'customers' do
38
+ skip { |index, record| record['age'] > 18 }
39
+
40
+ primary_key 'cust_id'
41
+ anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
42
+ anonymize 'terms_n_condition', 'age'
43
+ end
44
+ end
45
+
46
+ DataAnon::Utils::SourceDatabase.establish_connection connection_spec
47
+ source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
48
+ new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
49
+ new_rec['email'].should_not == 'test+1@gmail.com'
50
+
51
+ end
52
+
53
+ it 'should continue with anonymization of the record if condition in skip is true' do
54
+ database 'Customer' do
55
+ strategy DataAnon::Strategy::Blacklist
56
+ source_db connection_spec
57
+
58
+ table 'customers' do
59
+ continue { |index, record| record['age'] > 18 }
60
+
61
+ primary_key 'cust_id'
62
+ anonymize('email').using FieldStrategy::StringTemplate.new('test+#{row_number}@gmail.com')
63
+ anonymize 'terms_n_condition', 'age'
64
+ end
65
+ end
66
+
67
+ DataAnon::Utils::SourceDatabase.establish_connection connection_spec
68
+ source = DataAnon::Utils::SourceTable.create 'customers', ['cust_id']
69
+ new_rec = source.find(CustomerSample::SAMPLE_DATA[:cust_id])
70
+ new_rec['email'].should == 'test+1@gmail.com'
71
+
72
+
73
+
30
74
  end
31
75
  end
@@ -20,7 +20,7 @@ class CustomerSample
20
20
  t.float :latitude
21
21
  t.float :longitude
22
22
 
23
- t.timestamps
23
+ t.timestamps null: true
24
24
  end
25
25
  end
26
26
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data-anonymization
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sunit Parekh
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2015-03-09 00:00:00.000000000 Z
13
+ date: 2015-06-13 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: activerecord