data-anonymization 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.documentup.json +1 -0
- data/.travis.yml +0 -1
- data/README.md +277 -52
- data/blacklist_dsl.rb +1 -3
- data/data-anonymization.gemspec +4 -0
- data/lib/core/dsl.rb +1 -1
- data/lib/data-anonymization.rb +3 -0
- data/lib/strategy/base.rb +21 -11
- data/lib/strategy/blacklist.rb +2 -1
- data/lib/strategy/field/contact/geojson_base.rb +24 -0
- data/lib/strategy/field/contact/random_address.rb +17 -0
- data/lib/strategy/field/contact/random_city.rb +17 -0
- data/lib/strategy/field/contact/random_phone_number.rb +13 -0
- data/lib/strategy/field/contact/random_province.rb +17 -0
- data/lib/strategy/field/contact/random_zipcode.rb +17 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
- data/lib/strategy/field/datetime/date_delta.rb +21 -0
- data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
- data/lib/strategy/field/datetime/time_delta.rb +12 -0
- data/lib/strategy/field/default_anon.rb +12 -7
- data/lib/strategy/field/email/gmail_template.rb +16 -0
- data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
- data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
- data/lib/strategy/field/fields.rb +51 -20
- data/lib/strategy/field/name/random_first_name.rb +14 -0
- data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
- data/lib/strategy/field/name/random_last_name.rb +14 -0
- data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
- data/lib/strategy/field/number/random_float.rb +23 -0
- data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
- data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
- data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
- data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
- data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
- data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
- data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
- data/lib/strategy/field/string/select_from_file.rb +18 -0
- data/lib/strategy/field/string/select_from_list.rb +17 -0
- data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
- data/lib/strategy/whitelist.rb +4 -2
- data/lib/utils/database.rb +8 -6
- data/lib/utils/geojson_parser.rb +42 -0
- data/lib/utils/logging.rb +0 -9
- data/lib/utils/progress_bar.rb +29 -0
- data/lib/utils/random_float.rb +12 -0
- data/lib/utils/random_int.rb +3 -7
- data/lib/utils/resource.rb +4 -0
- data/lib/version.rb +1 -1
- data/resources/UK_addresses.geojson +300 -0
- data/resources/US_addresses.geojson +300 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
- data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
- data/spec/resource/sample.geojson +1 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/strategy/field/contact/random_address_spec.rb +12 -0
- data/spec/strategy/field/contact/random_city_spec.rb +14 -0
- data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
- data/spec/strategy/field/contact/random_province_spec.rb +14 -0
- data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
- data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
- data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
- data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
- data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
- data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
- data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
- data/spec/strategy/field/default_anon_spec.rb +42 -0
- data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
- data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
- data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
- data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
- data/spec/strategy/field/number/random_float_spec.rb +28 -0
- data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
- data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
- data/spec/strategy/field/random_boolean_spec.rb +2 -2
- data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
- data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
- data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
- data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
- data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
- data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
- data/spec/strategy/field/whitelist_spec.rb +2 -2
- data/spec/support/customer_sample.rb +1 -1
- data/spec/utils/database_spec.rb +2 -2
- data/spec/utils/geojson_parser_spec.rb +38 -0
- data/whitelist_dsl.rb +4 -6
- metadata +163 -59
- data/lib/strategy/field/anonymize_time.rb +0 -57
- data/lib/strategy/field/gmail_template.rb +0 -17
- data/lib/strategy/field/random_first_name.rb +0 -18
- data/lib/strategy/field/random_last_name.rb +0 -19
- data/lib/strategy/field/random_selection.rb +0 -23
- data/lib/strategy/field/user_name_template.rb +0 -22
- data/spec/strategy/field/anonymize_time_spec.rb +0 -23
- data/spec/strategy/field/gmail_template_spec.rb +0 -14
- data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
- data/spec/strategy/field/random_phone_number_spec.rb +0 -35
- data/spec/strategy/field/user_name_template_spec.rb +0 -13
data/blacklist_dsl.rb
CHANGED
@@ -2,8 +2,6 @@ system "bundle exec ruby whitelist_dsl.rb"
|
|
2
2
|
|
3
3
|
require 'data-anonymization'
|
4
4
|
|
5
|
-
FS = DataAnon::Strategy::Field
|
6
|
-
|
7
5
|
DataAnon::Utils::Logging.logger.level = Logger::INFO
|
8
6
|
|
9
7
|
database 'Chinook' do
|
@@ -12,7 +10,7 @@ database 'Chinook' do
|
|
12
10
|
|
13
11
|
table 'MediaType' do
|
14
12
|
primary_key 'MediaTypeId'
|
15
|
-
anonymize('Name').using
|
13
|
+
anonymize('Name').using FieldStrategy::StringTemplate.new('Media Type 100#{row_number}')
|
16
14
|
end
|
17
15
|
|
18
16
|
end
|
data/data-anonymization.gemspec
CHANGED
@@ -18,5 +18,9 @@ Gem::Specification.new do |gem|
|
|
18
18
|
gem.require_paths = ["lib"]
|
19
19
|
|
20
20
|
gem.add_dependency('activerecord', '~> 3.2.8')
|
21
|
+
gem.add_dependency('composite_primary_keys', '~> 5.0.8')
|
21
22
|
gem.add_dependency('activesupport', '~> 3.2.8')
|
23
|
+
gem.add_dependency('rgeo', '~> 0.3.15')
|
24
|
+
gem.add_dependency('rgeo-geojson', '~> 0.2.3')
|
25
|
+
gem.add_dependency('powerbar', '~> 1.0.8')
|
22
26
|
end
|
data/lib/core/dsl.rb
CHANGED
data/lib/data-anonymization.rb
CHANGED
@@ -2,7 +2,10 @@ require "version"
|
|
2
2
|
|
3
3
|
require "utils/logging"
|
4
4
|
require "utils/random_int"
|
5
|
+
require "utils/random_float"
|
5
6
|
require "utils/random_string"
|
7
|
+
require "utils/geojson_parser"
|
8
|
+
require "utils/progress_bar"
|
6
9
|
require "utils/resource"
|
7
10
|
require "core/database"
|
8
11
|
require "core/field"
|
data/lib/strategy/base.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'powerbar'
|
2
|
+
|
1
3
|
module DataAnon
|
2
4
|
module Strategy
|
3
5
|
class Base
|
@@ -14,10 +16,15 @@ module DataAnon
|
|
14
16
|
self
|
15
17
|
end
|
16
18
|
|
17
|
-
def primary_key
|
18
|
-
@
|
19
|
+
def primary_key *fields
|
20
|
+
@primary_keys = fields
|
21
|
+
end
|
22
|
+
|
23
|
+
def is_primary_key? field
|
24
|
+
@primary_keys.select { |key| field.downcase == key.downcase }.length > 0
|
19
25
|
end
|
20
26
|
|
27
|
+
|
21
28
|
def whitelist *fields
|
22
29
|
fields.each { |f| @fields[f.downcase] = DataAnon::Strategy::Field::Whitelist.new }
|
23
30
|
end
|
@@ -43,23 +50,26 @@ module DataAnon
|
|
43
50
|
end
|
44
51
|
|
45
52
|
def dest_table
|
46
|
-
@dest_table ||= Utils::DestinationTable.create @name, @
|
53
|
+
@dest_table ||= Utils::DestinationTable.create @name, @primary_keys
|
47
54
|
end
|
48
55
|
|
49
56
|
def source_table
|
50
|
-
@source_table ||= Utils::SourceTable.create @name, @
|
57
|
+
@source_table ||= Utils::SourceTable.create @name, @primary_keys
|
51
58
|
end
|
52
59
|
|
53
60
|
def process
|
54
61
|
logger.debug "Processing table #{@name} with fields strategies #{@fields}"
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
62
|
+
total = source_table.count
|
63
|
+
if total > 0
|
64
|
+
index = 1
|
65
|
+
progress_bar = DataAnon::Utils::ProgressBar.new @name, total
|
66
|
+
source_table.all.each do |record|
|
67
|
+
process_record index, record
|
68
|
+
index += 1
|
69
|
+
progress_bar.show(index)
|
70
|
+
end
|
71
|
+
progress_bar.close
|
61
72
|
end
|
62
|
-
progress_logger.info " DONE\n"
|
63
73
|
end
|
64
74
|
|
65
75
|
end
|
data/lib/strategy/blacklist.rb
CHANGED
@@ -6,13 +6,14 @@ module DataAnon
|
|
6
6
|
@fields.each do |field, strategy|
|
7
7
|
database_field_name = record.attributes.select { |k,v| k.downcase == field }.keys[0]
|
8
8
|
field_value = record.attributes[database_field_name]
|
9
|
-
unless field_value.nil? || database_field_name
|
9
|
+
unless field_value.nil? || is_primary_key?(database_field_name)
|
10
10
|
field = DataAnon::Core::Field.new(database_field_name, field_value, index, record)
|
11
11
|
record[database_field_name] = strategy.anonymize(field)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
record.save!
|
15
15
|
end
|
16
|
+
|
16
17
|
end
|
17
18
|
end
|
18
19
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class GeojsonBase
|
5
|
+
|
6
|
+
def self.region_US
|
7
|
+
self.new DataAnon::Utils::Resource.file('US_addresses.geojson')
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.region_UK
|
11
|
+
self.new DataAnon::Utils::Resource.file('UK_addresses.geojson')
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize file_path
|
15
|
+
raise "Load and set the @values member variable in constructor"
|
16
|
+
end
|
17
|
+
|
18
|
+
def anonymize field
|
19
|
+
@values.sample
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class AnonymizeDate
|
6
|
+
|
7
|
+
|
8
|
+
def self.only_month
|
9
|
+
self.new true, false
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.only_day
|
13
|
+
self.new false, true
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize anonymize_month, anonymize_day
|
17
|
+
|
18
|
+
@anonymize_month = anonymize_month
|
19
|
+
@anonymize_day = anonymize_day
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
def anonymize field
|
24
|
+
|
25
|
+
original_time = field.value
|
26
|
+
|
27
|
+
year = original_time.year
|
28
|
+
month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : original_time.month
|
29
|
+
days_in_month = Time.new(year,month,1,1,1,1).end_of_month.day
|
30
|
+
day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,days_in_month) : original_time.day
|
31
|
+
|
32
|
+
Date.new(year, month, day)
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class AnonymizeTime
|
6
|
+
|
7
|
+
DEFAULT_ANONYMIZATION = true
|
8
|
+
|
9
|
+
def self.only_month
|
10
|
+
self.new true, false, false, false
|
11
|
+
end
|
12
|
+
|
13
|
+
def self.only_day
|
14
|
+
self.new false, true, false, false
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.only_hour
|
18
|
+
self.new false, false, true, false
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.only_minute
|
22
|
+
self.new false, false, false, true
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize anonymize_month, anonymize_day, anonymize_hour, anonymize_min
|
26
|
+
|
27
|
+
@anonymize_month = anonymize_month
|
28
|
+
@anonymize_day = anonymize_day
|
29
|
+
@anonymize_hour = anonymize_hour
|
30
|
+
@anonymize_min = anonymize_min
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
def anonymize field
|
35
|
+
|
36
|
+
original_time = field.value
|
37
|
+
|
38
|
+
year = original_time.year
|
39
|
+
month = @anonymize_month? DataAnon::Utils::RandomInt.generate(1,12) : original_time.month
|
40
|
+
days_in_month = Time.new(year,month,1,1,1,1).end_of_month.day
|
41
|
+
day = @anonymize_day? DataAnon::Utils::RandomInt.generate(1,days_in_month) : original_time.day
|
42
|
+
hour = @anonymize_hour? DataAnon::Utils::RandomInt.generate(1,24) : original_time.hour
|
43
|
+
min = @anonymize_min? DataAnon::Utils::RandomInt.generate(1,60) : original_time.min
|
44
|
+
sec = original_time.sec
|
45
|
+
|
46
|
+
create_object(year, month, day, hour, min, sec)
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def create_object(year, month, day, hour, min, sec)
|
52
|
+
Time.new(year, month, day, hour, min, sec)
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
class DateDelta
|
6
|
+
|
7
|
+
DEFAULT_DAY_DELTA = 10
|
8
|
+
|
9
|
+
def initialize day_delta = DEFAULT_DAY_DELTA
|
10
|
+
@day_delta = day_delta
|
11
|
+
end
|
12
|
+
|
13
|
+
def anonymize field
|
14
|
+
day_adjustment = DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta)
|
15
|
+
return field.value + day_adjustment.days
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -13,9 +13,9 @@ module DataAnon
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def anonymize field
|
16
|
-
day_adjustment =
|
17
|
-
minute_adjustment =
|
18
|
-
return field.value + day_adjustment.days + minute_adjustment.minutes
|
16
|
+
day_adjustment = DataAnon::Utils::RandomInt.generate(-@day_delta,@day_delta)
|
17
|
+
minute_adjustment = DataAnon::Utils::RandomInt.generate(-@minute_delta,@minute_delta)
|
18
|
+
return field.value + (day_adjustment.days + minute_adjustment.minutes)
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
@@ -4,19 +4,24 @@ module DataAnon
|
|
4
4
|
|
5
5
|
class DefaultAnon
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
7
|
+
DEFAULT_STRATEGIES = {:string => FieldStrategy::LoremIpsum.new,
|
8
|
+
:fixnum => FieldStrategy::RandomIntegerDelta.new(5),
|
9
|
+
:bignum => FieldStrategy::RandomIntegerDelta.new(5000),
|
10
|
+
:float => FieldStrategy::RandomFloatDelta.new(5.0),
|
11
|
+
:datetime => FieldStrategy::DateTimeDelta.new,
|
12
|
+
:time => FieldStrategy::TimeDelta.new,
|
13
|
+
:date => FieldStrategy::DateDelta.new,
|
14
|
+
:trueclass => FieldStrategy::RandomBoolean.new,
|
15
|
+
:falseclass => FieldStrategy::RandomBoolean.new
|
12
16
|
}
|
13
17
|
|
14
|
-
def initialize user_defaults
|
18
|
+
def initialize user_defaults = {}
|
15
19
|
@user_defaults = DEFAULT_STRATEGIES.merge user_defaults
|
16
20
|
end
|
17
21
|
|
18
22
|
def anonymize field
|
19
|
-
strategy = @user_defaults[field.value.class.to_s.downcase.to_sym]
|
23
|
+
strategy = @user_defaults[field.value.class.to_s.downcase.to_sym]
|
24
|
+
raise "No strategy defined for datatype #{field.value.class}" unless strategy
|
20
25
|
strategy.anonymize field
|
21
26
|
end
|
22
27
|
|