data-anonymization 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.documentup.json +1 -0
- data/.travis.yml +0 -1
- data/README.md +277 -52
- data/blacklist_dsl.rb +1 -3
- data/data-anonymization.gemspec +4 -0
- data/lib/core/dsl.rb +1 -1
- data/lib/data-anonymization.rb +3 -0
- data/lib/strategy/base.rb +21 -11
- data/lib/strategy/blacklist.rb +2 -1
- data/lib/strategy/field/contact/geojson_base.rb +24 -0
- data/lib/strategy/field/contact/random_address.rb +17 -0
- data/lib/strategy/field/contact/random_city.rb +17 -0
- data/lib/strategy/field/contact/random_phone_number.rb +13 -0
- data/lib/strategy/field/contact/random_province.rb +17 -0
- data/lib/strategy/field/contact/random_zipcode.rb +17 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
- data/lib/strategy/field/datetime/date_delta.rb +21 -0
- data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
- data/lib/strategy/field/datetime/time_delta.rb +12 -0
- data/lib/strategy/field/default_anon.rb +12 -7
- data/lib/strategy/field/email/gmail_template.rb +16 -0
- data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
- data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
- data/lib/strategy/field/fields.rb +51 -20
- data/lib/strategy/field/name/random_first_name.rb +14 -0
- data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
- data/lib/strategy/field/name/random_last_name.rb +14 -0
- data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
- data/lib/strategy/field/number/random_float.rb +23 -0
- data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
- data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
- data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
- data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
- data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
- data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
- data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
- data/lib/strategy/field/string/select_from_file.rb +18 -0
- data/lib/strategy/field/string/select_from_list.rb +17 -0
- data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
- data/lib/strategy/whitelist.rb +4 -2
- data/lib/utils/database.rb +8 -6
- data/lib/utils/geojson_parser.rb +42 -0
- data/lib/utils/logging.rb +0 -9
- data/lib/utils/progress_bar.rb +29 -0
- data/lib/utils/random_float.rb +12 -0
- data/lib/utils/random_int.rb +3 -7
- data/lib/utils/resource.rb +4 -0
- data/lib/version.rb +1 -1
- data/resources/UK_addresses.geojson +300 -0
- data/resources/US_addresses.geojson +300 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
- data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
- data/spec/resource/sample.geojson +1 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/strategy/field/contact/random_address_spec.rb +12 -0
- data/spec/strategy/field/contact/random_city_spec.rb +14 -0
- data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
- data/spec/strategy/field/contact/random_province_spec.rb +14 -0
- data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
- data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
- data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
- data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
- data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
- data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
- data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
- data/spec/strategy/field/default_anon_spec.rb +42 -0
- data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
- data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
- data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
- data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
- data/spec/strategy/field/number/random_float_spec.rb +28 -0
- data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
- data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
- data/spec/strategy/field/random_boolean_spec.rb +2 -2
- data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
- data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
- data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
- data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
- data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
- data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
- data/spec/strategy/field/whitelist_spec.rb +2 -2
- data/spec/support/customer_sample.rb +1 -1
- data/spec/utils/database_spec.rb +2 -2
- data/spec/utils/geojson_parser_spec.rb +38 -0
- data/whitelist_dsl.rb +4 -6
- metadata +163 -59
- data/lib/strategy/field/anonymize_time.rb +0 -57
- data/lib/strategy/field/gmail_template.rb +0 -17
- data/lib/strategy/field/random_first_name.rb +0 -18
- data/lib/strategy/field/random_last_name.rb +0 -19
- data/lib/strategy/field/random_selection.rb +0 -23
- data/lib/strategy/field/user_name_template.rb +0 -22
- data/spec/strategy/field/anonymize_time_spec.rb +0 -23
- data/spec/strategy/field/gmail_template_spec.rb +0 -14
- data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
- data/spec/strategy/field/random_phone_number_spec.rb +0 -35
- data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -0,0 +1,16 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class GmailTemplate
|
5
|
+
|
6
|
+
def initialize username = 'someusername'
|
7
|
+
@username = username
|
8
|
+
end
|
9
|
+
|
10
|
+
def anonymize field
|
11
|
+
"#{@username}+#{field.row_number}@gmail.com"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
File without changes
|
@@ -1,23 +1,54 @@
|
|
1
1
|
require 'strategy/field/whitelist'
|
2
|
-
require 'strategy/field/string_template'
|
3
|
-
require 'strategy/field/user_name_template'
|
4
|
-
require 'strategy/field/random_string'
|
5
|
-
require 'strategy/field/random_int'
|
6
2
|
require 'strategy/field/random_boolean'
|
7
|
-
|
8
|
-
require 'strategy/field/random_integer_delta'
|
9
|
-
require 'strategy/field/random_float_delta'
|
10
|
-
require 'strategy/field/random_selection'
|
11
|
-
require 'strategy/field/distinct_column_values'
|
12
|
-
require 'strategy/field/lorem_ipsum'
|
13
|
-
require 'strategy/field/gmail_template'
|
14
|
-
require 'strategy/field/date_time_delta'
|
15
|
-
require 'strategy/field/default_anon'
|
16
|
-
require 'strategy/field/random_email'
|
17
|
-
require 'strategy/field/random_mailinator_email'
|
18
|
-
require 'strategy/field/random_phone_number'
|
19
|
-
require 'strategy/field/random_first_name'
|
20
|
-
require 'strategy/field/random_last_name'
|
21
|
-
require 'strategy/field/random_full_name'
|
22
|
-
require 'strategy/field/random_user_name'
|
3
|
+
|
23
4
|
require 'strategy/field/anonymous'
|
5
|
+
|
6
|
+
# string
|
7
|
+
require 'strategy/field/string/lorem_ipsum'
|
8
|
+
require 'strategy/field/string/string_template'
|
9
|
+
require 'strategy/field/string/random_string'
|
10
|
+
require 'strategy/field/string/formatted_string_numbers'
|
11
|
+
|
12
|
+
require 'strategy/field/string/select_from_file'
|
13
|
+
require 'strategy/field/string/select_from_list'
|
14
|
+
require 'strategy/field/string/select_from_database'
|
15
|
+
|
16
|
+
# number
|
17
|
+
require 'strategy/field/number/random_integer'
|
18
|
+
require 'strategy/field/number/random_float'
|
19
|
+
require 'strategy/field/number/random_integer_delta'
|
20
|
+
require 'strategy/field/number/random_float_delta'
|
21
|
+
|
22
|
+
# contact
|
23
|
+
require 'strategy/field/contact/geojson_base'
|
24
|
+
require 'strategy/field/contact/random_phone_number'
|
25
|
+
require 'strategy/field/contact/random_address'
|
26
|
+
require 'strategy/field/contact/random_zipcode'
|
27
|
+
require 'strategy/field/contact/random_city'
|
28
|
+
require 'strategy/field/contact/random_province'
|
29
|
+
|
30
|
+
# datetime
|
31
|
+
require 'strategy/field/datetime/anonymize_time'
|
32
|
+
require 'strategy/field/datetime/anonymize_datetime'
|
33
|
+
require 'strategy/field/datetime/anonymize_date'
|
34
|
+
require 'strategy/field/datetime/date_time_delta'
|
35
|
+
require 'strategy/field/datetime/time_delta'
|
36
|
+
require 'strategy/field/datetime/date_delta'
|
37
|
+
|
38
|
+
# email
|
39
|
+
require 'strategy/field/email/random_email'
|
40
|
+
require 'strategy/field/email/gmail_template'
|
41
|
+
require 'strategy/field/email/random_mailinator_email'
|
42
|
+
|
43
|
+
# name
|
44
|
+
require 'strategy/field/name/random_first_name'
|
45
|
+
require 'strategy/field/name/random_last_name'
|
46
|
+
require 'strategy/field/name/random_full_name'
|
47
|
+
require 'strategy/field/name/random_user_name'
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
FieldStrategy = DataAnon::Strategy::Field
|
52
|
+
|
53
|
+
require 'strategy/field/default_anon'
|
54
|
+
|
File without changes
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class RandomFloat
|
7
|
+
|
8
|
+
def initialize from = 0.0, to = 100.0
|
9
|
+
@from = from
|
10
|
+
@to = to
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
def anonymize field
|
15
|
+
DataAnon::Utils::RandomFloat.generate(@from,@to)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -3,7 +3,7 @@ module DataAnon
|
|
3
3
|
module Field
|
4
4
|
|
5
5
|
|
6
|
-
class
|
6
|
+
class FormattedStringNumber
|
7
7
|
|
8
8
|
def anonymize field
|
9
9
|
@original_phone_number = field.value
|
@@ -18,7 +18,10 @@ module DataAnon
|
|
18
18
|
|
19
19
|
@anonymized_phone_number
|
20
20
|
end
|
21
|
+
|
21
22
|
end
|
23
|
+
|
24
|
+
|
22
25
|
end
|
23
26
|
end
|
24
27
|
end
|
File without changes
|
File without changes
|
@@ -2,18 +2,17 @@ module DataAnon
|
|
2
2
|
module Strategy
|
3
3
|
module Field
|
4
4
|
|
5
|
-
class
|
5
|
+
class SelectFromDatabase
|
6
6
|
include Utils::Logging
|
7
7
|
|
8
8
|
def initialize table_name, field_name
|
9
|
-
source = Utils::SourceTable.create table_name
|
9
|
+
source = Utils::SourceTable.create table_name, []
|
10
10
|
@values = source.select(field_name).uniq.collect { |record| record[field_name]}
|
11
11
|
logger.debug "For field strategy #{table_name}:#{field_name} using values #{@values} "
|
12
12
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def anonymize field
|
16
|
-
return @values[0] if @values.length == 1
|
17
16
|
@values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
|
18
17
|
end
|
19
18
|
|
File without changes
|
data/lib/strategy/whitelist.rb
CHANGED
@@ -5,14 +5,16 @@ module DataAnon
|
|
5
5
|
def process_record(index, record)
|
6
6
|
dest_record_map = {}
|
7
7
|
record.attributes.each do |field_name, field_value|
|
8
|
-
unless field_value.nil? || field_name
|
8
|
+
unless field_value.nil? || is_primary_key?(field_name)
|
9
9
|
field = DataAnon::Core::Field.new(field_name, field_value, index, record)
|
10
10
|
field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
|
11
11
|
dest_record_map[field_name] = field_strategy.anonymize(field)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
dest_record = dest_table.new dest_record_map
|
15
|
-
|
15
|
+
@primary_keys.each do |key|
|
16
|
+
dest_record[key] = record[key]
|
17
|
+
end
|
16
18
|
dest_record.save!
|
17
19
|
end
|
18
20
|
|
data/lib/utils/database.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'active_record'
|
2
|
+
require 'composite_primary_keys'
|
2
3
|
require 'logger'
|
3
4
|
|
4
5
|
module DataAnon
|
@@ -23,10 +24,11 @@ module DataAnon
|
|
23
24
|
|
24
25
|
class BaseTable
|
25
26
|
|
26
|
-
def self.create_table table_name,
|
27
|
+
def self.create_table database, table_name, primary_keys
|
27
28
|
Class.new(database) do
|
28
29
|
self.table_name = table_name
|
29
|
-
self.
|
30
|
+
self.primary_keys = primary_keys if primary_keys.length > 1
|
31
|
+
self.primary_key = primary_keys[0] if primary_keys.length == 1
|
30
32
|
self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
|
31
33
|
end
|
32
34
|
end
|
@@ -35,16 +37,16 @@ module DataAnon
|
|
35
37
|
|
36
38
|
class SourceTable < BaseTable
|
37
39
|
|
38
|
-
def self.create table_name, primary_key
|
39
|
-
create_table table_name, primary_key
|
40
|
+
def self.create table_name, primary_key
|
41
|
+
create_table SourceDatabase, table_name, primary_key
|
40
42
|
end
|
41
43
|
|
42
44
|
end
|
43
45
|
|
44
46
|
class DestinationTable < BaseTable
|
45
47
|
|
46
|
-
def self.create table_name, primary_key
|
47
|
-
create_table table_name, primary_key
|
48
|
+
def self.create table_name, primary_key
|
49
|
+
create_table DestinationDatabase, table_name, primary_key
|
48
50
|
end
|
49
51
|
|
50
52
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'rgeo/geo_json'
|
2
|
+
|
3
|
+
module DataAnon
|
4
|
+
module Utils
|
5
|
+
class GeojsonParser
|
6
|
+
|
7
|
+
|
8
|
+
def self.address file_path
|
9
|
+
self.new(file_path).parse 'address'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.zipcode file_path
|
13
|
+
self.new(file_path).parse 'postcode'
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.province file_path
|
17
|
+
self.new(file_path).parse 'province'
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.city file_path
|
21
|
+
self.new(file_path).parse 'city'
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.country file_path
|
25
|
+
self.new(file_path).parse 'country'
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize file_path
|
29
|
+
@places = File.read(file_path).split(/\n/)
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse property
|
33
|
+
result_list = []
|
34
|
+
@places.each do |loc|
|
35
|
+
geom = RGeo::GeoJSON.decode(loc, :json_parser => :json)
|
36
|
+
result_list.push(geom[property])
|
37
|
+
end
|
38
|
+
result_list
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/utils/logging.rb
CHANGED
@@ -15,15 +15,6 @@ module DataAnon
|
|
15
15
|
@@logger
|
16
16
|
end
|
17
17
|
|
18
|
-
def progress_logger
|
19
|
-
@@progress_logger ||= (self.progress_logger = Logger.new(STDOUT) )
|
20
|
-
end
|
21
|
-
|
22
|
-
def progress_logger= logger
|
23
|
-
logger.formatter = proc { |severity, datetime, progname, msg| msg }
|
24
|
-
@@progress_logger = logger
|
25
|
-
end
|
26
|
-
|
27
18
|
end
|
28
19
|
end
|
29
20
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Utils
|
3
|
+
|
4
|
+
class ProgressBar
|
5
|
+
|
6
|
+
def initialize table_name, total
|
7
|
+
@total = total
|
8
|
+
@table_name = table_name
|
9
|
+
@progress_bar = PowerBar.new if total > 0 && show_progress
|
10
|
+
end
|
11
|
+
|
12
|
+
def show_progress
|
13
|
+
ENV['show_progress'] != 'false'
|
14
|
+
end
|
15
|
+
|
16
|
+
def show index
|
17
|
+
if @progress_bar && ((index % 1000 == 0) || (index == @total) || (index == 1))
|
18
|
+
@progress_bar.show(:msg => "Table: #{@table_name} (#{index}/#{@total})", :done => index, :total => @total)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def close
|
23
|
+
@progress_bar.close if @progress_bar
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|