data-anonymization 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.documentup.json +1 -0
- data/.travis.yml +0 -1
- data/README.md +277 -52
- data/blacklist_dsl.rb +1 -3
- data/data-anonymization.gemspec +4 -0
- data/lib/core/dsl.rb +1 -1
- data/lib/data-anonymization.rb +3 -0
- data/lib/strategy/base.rb +21 -11
- data/lib/strategy/blacklist.rb +2 -1
- data/lib/strategy/field/contact/geojson_base.rb +24 -0
- data/lib/strategy/field/contact/random_address.rb +17 -0
- data/lib/strategy/field/contact/random_city.rb +17 -0
- data/lib/strategy/field/contact/random_phone_number.rb +13 -0
- data/lib/strategy/field/contact/random_province.rb +17 -0
- data/lib/strategy/field/contact/random_zipcode.rb +17 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
- data/lib/strategy/field/datetime/date_delta.rb +21 -0
- data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
- data/lib/strategy/field/datetime/time_delta.rb +12 -0
- data/lib/strategy/field/default_anon.rb +12 -7
- data/lib/strategy/field/email/gmail_template.rb +16 -0
- data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
- data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
- data/lib/strategy/field/fields.rb +51 -20
- data/lib/strategy/field/name/random_first_name.rb +14 -0
- data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
- data/lib/strategy/field/name/random_last_name.rb +14 -0
- data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
- data/lib/strategy/field/number/random_float.rb +23 -0
- data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
- data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
- data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
- data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
- data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
- data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
- data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
- data/lib/strategy/field/string/select_from_file.rb +18 -0
- data/lib/strategy/field/string/select_from_list.rb +17 -0
- data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
- data/lib/strategy/whitelist.rb +4 -2
- data/lib/utils/database.rb +8 -6
- data/lib/utils/geojson_parser.rb +42 -0
- data/lib/utils/logging.rb +0 -9
- data/lib/utils/progress_bar.rb +29 -0
- data/lib/utils/random_float.rb +12 -0
- data/lib/utils/random_int.rb +3 -7
- data/lib/utils/resource.rb +4 -0
- data/lib/version.rb +1 -1
- data/resources/UK_addresses.geojson +300 -0
- data/resources/US_addresses.geojson +300 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
- data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
- data/spec/resource/sample.geojson +1 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/strategy/field/contact/random_address_spec.rb +12 -0
- data/spec/strategy/field/contact/random_city_spec.rb +14 -0
- data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
- data/spec/strategy/field/contact/random_province_spec.rb +14 -0
- data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
- data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
- data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
- data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
- data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
- data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
- data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
- data/spec/strategy/field/default_anon_spec.rb +42 -0
- data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
- data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
- data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
- data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
- data/spec/strategy/field/number/random_float_spec.rb +28 -0
- data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
- data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
- data/spec/strategy/field/random_boolean_spec.rb +2 -2
- data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
- data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
- data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
- data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
- data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
- data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
- data/spec/strategy/field/whitelist_spec.rb +2 -2
- data/spec/support/customer_sample.rb +1 -1
- data/spec/utils/database_spec.rb +2 -2
- data/spec/utils/geojson_parser_spec.rb +38 -0
- data/whitelist_dsl.rb +4 -6
- metadata +163 -59
- data/lib/strategy/field/anonymize_time.rb +0 -57
- data/lib/strategy/field/gmail_template.rb +0 -17
- data/lib/strategy/field/random_first_name.rb +0 -18
- data/lib/strategy/field/random_last_name.rb +0 -19
- data/lib/strategy/field/random_selection.rb +0 -23
- data/lib/strategy/field/user_name_template.rb +0 -22
- data/spec/strategy/field/anonymize_time_spec.rb +0 -23
- data/spec/strategy/field/gmail_template_spec.rb +0 -14
- data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
- data/spec/strategy/field/random_phone_number_spec.rb +0 -35
- data/spec/strategy/field/user_name_template_spec.rb +0 -13
data/blacklist_dsl.rb
CHANGED
@@ -2,8 +2,6 @@ system "bundle exec ruby whitelist_dsl.rb"
|
|
2
2
|
|
3
3
|
require 'data-anonymization'
|
4
4
|
|
5
|
-
FS = DataAnon::Strategy::Field
|
6
|
-
|
7
5
|
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
8
6
|
|
9
7
|
database 'Chinook' do
|
@@ -12,7 +10,7 @@ database 'Chinook' do
|
|
12
10
|
|
13
11
|
table 'MediaType' do
|
14
12
|
primary_key 'MediaTypeId'
|
15
|
-
anonymize('Name').using
|
13
|
+
anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type 100#{row_number}')
|
16
14
|
end
|
17
15
|
|
18
16
|
end
|
data/data-anonymization.gemspec
CHANGED
@@ -18,5 +18,9 @@ Gem::Specification.new do |gem|
|
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
20
20
|
gem.add_dependency('activerecord', '~> 3.2.8')
|
21
|
+
gem.add_dependency('composite_primary_keys', '~> 5.0.8')
|
21
22
|
gem.add_dependency('activesupport', '~> 3.2.8')
|
23
|
+
gem.add_dependency('rgeo', '~> 0.3.15')
|
24
|
+
gem.add_dependency('rgeo-geojson', '~> 0.2.3')
|
25
|
+
gem.add_dependency('powerbar', '~> 1.0.8')
|
22
26
|
end
|
data/lib/core/dsl.rb
CHANGED
data/lib/data-anonymization.rb
CHANGED
@@ -2,7 +2,10 @@ require "version"
|
|
2
2
|
|
3
3
|
require "utils/logging"
|
4
4
|
require "utils/random_int"
|
5
|
+
require "utils/random_float"
|
5
6
|
require "utils/random_string"
|
7
|
+
require "utils/geojson_parser"
|
8
|
+
require "utils/progress_bar"
|
6
9
|
require "utils/resource"
|
7
10
|
require "core/database"
|
8
11
|
require "core/field"
|
data/lib/strategy/base.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'powerbar'
|
2
|
+
|
1
3
|
module DataAnon
|
2
4
|
module Strategy
|
3
5
|
class Base
|
@@ -14,10 +16,15 @@ module DataAnon
|
|
14
16
|
self
|
15
17
|
end
|
16
18
|
|
17
|
-
def primary_key
|
18
|
-
@
|
19
|
+
def primary_key *fields
|
20
|
+
@primary_keys = fields
|
21
|
+
end
|
22
|
+
|
23
|
+
def is_primary_key? field
|
24
|
+
@primary_keys.select { |key| field.downcase == key.downcase }.length > 0
|
19
25
|
end
|
20
26
|
|
27
|
+
|
21
28
|
def whitelist *fields
|
22
29
|
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
|
23
30
|
end
|
@@ -43,23 +50,26 @@ module DataAnon
|
|
43
50
|
end
|
44
51
|
|
45
52
|
def dest_table
|
46
|
-
@dest_table ||= Utils::DestinationTable.create @name, @
|
53
|
+
@dest_table ||= Utils::DestinationTable.create @name, @primary_keys
|
47
54
|
end
|
48
55
|
|
49
56
|
def source_table
|
50
|
-
@source_table ||= Utils::SourceTable.create @name, @
|
57
|
+
@source_table ||= Utils::SourceTable.create @name, @primary_keys
|
51
58
|
end
|
52
59
|
|
53
60
|
def process
|
54
61
|
logger.debug "Processing table #{@name} with fields strategies #{@fields}"
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
62
|
+
total = source_table.count
|
63
|
+
if total > 0
|
64
|
+
index = 1
|
65
|
+
progress_bar = DataAnon::Utils::ProgressBar.new @name, total
|
66
|
+
source_table.all.each do |record|
|
67
|
+
process_record index, record
|
68
|
+
index += 1
|
69
|
+
progress_bar.show(index)
|
70
|
+
end
|
71
|
+
progress_bar.close
|
61
72
|
end
|
62
|
-
progress_logger.info " DONE\n"
|
63
73
|
end
|
64
74
|
|
65
75
|
end
|
data/lib/strategy/blacklist.rb
CHANGED
@@ -6,13 +6,14 @@ module DataAnon
|
|
6
6
|
@fields.each do |field, strategy|
|
7
7
|
database_field_name = record.attributes.select { |k,v| k.downcase == field }.keys[0]
|
8
8
|
field_value = record.attributes[database_field_name]
|
9
|
-
unless field_value.nil? || database_field_name
|
9
|
+
unless field_value.nil? || is_primary_key?(database_field_name)
|
10
10
|
field = DataAnon::Core::Field.new(database_field_name, field_value, index, record)
|
11
11
|
record[database_field_name] = strategy.anonymize(field)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
record.save!
|
15
15
|
end
|
16
|
+
|
16
17
|
end
|
17
18
|
end
|
18
19
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class GeojsonBase
|
5
|
+
|
6
|
+
def self.region_US
|
7
|
+
self.new DataAnon::Utils::Resource.file('US_addresses.geojson')
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.region_UK
|
11
|
+
self.new DataAnon::Utils::Resource.file('UK_addresses.geojson')
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize file_path
|
15
|
+
raise "Load and set the @values member variable in constructor"
|
16
|
+
end
|
17
|
+
|
18
|
+
def anonymize field
|
19
|
+
@values.sample
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class AnonymizeDate
|
6
|
+
|
7
|
+
|
8
|
+
def self.only_month
|
9
|
+
self.new true, false
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.only_day
|
13
|
+
self.new false, true
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize anonymize_month, anonymize_day
|
17
|
+
|
18
|
+
@anonymize_month = anonymize_month
|
19
|
+
@anonymize_day = anonymize_day
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
def anonymize field
|
24
|
+
|
25
|
+
original_time = field.value
|
26
|
+
|
27
|
+
year = original_time.year
|
28
|
+
month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : original_time.month
|
29
|
+
days_in_month = Time.new(year,month,1,1,1,1).end_of_month.day
|
30
|
+
day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,days_in_month) : original_time.day
|
31
|
+
|
32
|
+
Date.new(year, month, day)
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class AnonymizeTime
|
6
|
+
|
7
|
+
DEFAULT_ANONYMIZATION = true
|
8
|
+
|
9
|
+
def self.only_month
|
10
|
+
self.new true, false, false, false
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.only_day
|
14
|
+
self.new false, true, false, false
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.only_hour
|
18
|
+
self.new false, false, true, false
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.only_minute
|
22
|
+
self.new false, false, false, true
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize anonymize_month, anonymize_day, anonymize_hour, anonymize_min
|
26
|
+
|
27
|
+
@anonymize_month = anonymize_month
|
28
|
+
@anonymize_day = anonymize_day
|
29
|
+
@anonymize_hour = anonymize_hour
|
30
|
+
@anonymize_min = anonymize_min
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
def anonymize field
|
35
|
+
|
36
|
+
original_time = field.value
|
37
|
+
|
38
|
+
year = original_time.year
|
39
|
+
month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : original_time.month
|
40
|
+
days_in_month = Time.new(year,month,1,1,1,1).end_of_month.day
|
41
|
+
day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,days_in_month) : original_time.day
|
42
|
+
hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(1,24) : original_time.hour
|
43
|
+
min = @anonymize_min? DataAnon::Utils::RandomInt.generate(1,60) : original_time.min
|
44
|
+
sec = original_time.sec
|
45
|
+
|
46
|
+
create_object(year, month, day, hour, min, sec)
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def create_object(year, month, day, hour, min, sec)
|
52
|
+
Time.new(year, month, day, hour, min, sec)
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class DateDelta
|
6
|
+
|
7
|
+
DEFAULT_DAY_DELTA = 10
|
8
|
+
|
9
|
+
def initialize day_delta = DEFAULT_DAY_DELTA
|
10
|
+
@day_delta = day_delta
|
11
|
+
end
|
12
|
+
|
13
|
+
def anonymize field
|
14
|
+
day_adjustment = DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta)
|
15
|
+
return field.value + day_adjustment.days
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -13,9 +13,9 @@ module DataAnon
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def anonymize field
|
16
|
-
day_adjustment =
|
17
|
-
minute_adjustment =
|
18
|
-
return field.value + day_adjustment.days + minute_adjustment.minutes
|
16
|
+
day_adjustment = DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta)
|
17
|
+
minute_adjustment = DataAnon::Utils::RandomInt.generate(-@minute_delta,@minute_delta)
|
18
|
+
return field.value + (day_adjustment.days + minute_adjustment.minutes)
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
@@ -4,19 +4,24 @@ module DataAnon
|
|
4
4
|
|
5
5
|
class DefaultAnon
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
7
|
+
DEFAULT_STRATEGIES = {:string => FieldStrategy::LoremIpsum.new,
|
8
|
+
:fixnum => FieldStrategy::RandomIntegerDelta.new(5),
|
9
|
+
:bignum => FieldStrategy::RandomIntegerDelta.new(5000),
|
10
|
+
:float => FieldStrategy::RandomFloatDelta.new(5.0),
|
11
|
+
:datetime => FieldStrategy::DateTimeDelta.new,
|
12
|
+
:time => FieldStrategy::TimeDelta.new,
|
13
|
+
:date => FieldStrategy::DateDelta.new,
|
14
|
+
:trueclass => FieldStrategy::RandomBoolean.new,
|
15
|
+
:falseclass => FieldStrategy::RandomBoolean.new
|
12
16
|
}
|
13
17
|
|
14
|
-
def initialize user_defaults
|
18
|
+
def initialize user_defaults = {}
|
15
19
|
@user_defaults = DEFAULT_STRATEGIES.merge user_defaults
|
16
20
|
end
|
17
21
|
|
18
22
|
def anonymize field
|
19
|
-
strategy = @user_defaults[field.value.class.to_s.downcase.to_sym]
|
23
|
+
strategy = @user_defaults[field.value.class.to_s.downcase.to_sym]
|
24
|
+
raise "No strategy defined for datatype #{field.value.class}" unless strategy
|
20
25
|
strategy.anonymize field
|
21
26
|
end
|
22
27
|
|