data-anonymization 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.documentup.json +1 -0
- data/.travis.yml +0 -1
- data/README.md +277 -52
- data/blacklist_dsl.rb +1 -3
- data/data-anonymization.gemspec +4 -0
- data/lib/core/dsl.rb +1 -1
- data/lib/data-anonymization.rb +3 -0
- data/lib/strategy/base.rb +21 -11
- data/lib/strategy/blacklist.rb +2 -1
- data/lib/strategy/field/contact/geojson_base.rb +24 -0
- data/lib/strategy/field/contact/random_address.rb +17 -0
- data/lib/strategy/field/contact/random_city.rb +17 -0
- data/lib/strategy/field/contact/random_phone_number.rb +13 -0
- data/lib/strategy/field/contact/random_province.rb +17 -0
- data/lib/strategy/field/contact/random_zipcode.rb +17 -0
- data/lib/strategy/field/datetime/anonymize_date.rb +39 -0
- data/lib/strategy/field/datetime/anonymize_datetime.rb +15 -0
- data/lib/strategy/field/datetime/anonymize_time.rb +58 -0
- data/lib/strategy/field/datetime/date_delta.rb +21 -0
- data/lib/strategy/field/{date_time_delta.rb → datetime/date_time_delta.rb} +3 -3
- data/lib/strategy/field/datetime/time_delta.rb +12 -0
- data/lib/strategy/field/default_anon.rb +12 -7
- data/lib/strategy/field/email/gmail_template.rb +16 -0
- data/lib/strategy/field/{random_email.rb → email/random_email.rb} +0 -0
- data/lib/strategy/field/{random_mailinator_email.rb → email/random_mailinator_email.rb} +0 -2
- data/lib/strategy/field/fields.rb +51 -20
- data/lib/strategy/field/name/random_first_name.rb +14 -0
- data/lib/strategy/field/{random_full_name.rb → name/random_full_name.rb} +0 -0
- data/lib/strategy/field/name/random_last_name.rb +14 -0
- data/lib/strategy/field/{random_user_name.rb → name/random_user_name.rb} +0 -0
- data/lib/strategy/field/number/random_float.rb +23 -0
- data/lib/strategy/field/{random_float_delta.rb → number/random_float_delta.rb} +2 -4
- data/lib/strategy/field/{random_int.rb → number/random_integer.rb} +1 -1
- data/lib/strategy/field/{random_integer_delta.rb → number/random_integer_delta.rb} +2 -5
- data/lib/strategy/field/{random_phone_number.rb → string/formatted_string_numbers.rb} +4 -1
- data/lib/strategy/field/{lorem_ipsum.rb → string/lorem_ipsum.rb} +0 -0
- data/lib/strategy/field/{random_string.rb → string/random_string.rb} +0 -0
- data/lib/strategy/field/{distinct_column_values.rb → string/select_from_database.rb} +2 -3
- data/lib/strategy/field/string/select_from_file.rb +18 -0
- data/lib/strategy/field/string/select_from_list.rb +17 -0
- data/lib/strategy/field/{string_template.rb → string/string_template.rb} +0 -0
- data/lib/strategy/whitelist.rb +4 -2
- data/lib/utils/database.rb +8 -6
- data/lib/utils/geojson_parser.rb +42 -0
- data/lib/utils/logging.rb +0 -9
- data/lib/utils/progress_bar.rb +29 -0
- data/lib/utils/random_float.rb +12 -0
- data/lib/utils/random_int.rb +3 -7
- data/lib/utils/resource.rb +4 -0
- data/lib/version.rb +1 -1
- data/resources/UK_addresses.geojson +300 -0
- data/resources/US_addresses.geojson +300 -0
- data/spec/acceptance/rdbms_blacklist_spec.rb +2 -2
- data/spec/acceptance/rdbms_whitelist_spec.rb +6 -8
- data/spec/resource/sample.geojson +1 -0
- data/spec/spec_helper.rb +3 -2
- data/spec/strategy/field/contact/random_address_spec.rb +12 -0
- data/spec/strategy/field/contact/random_city_spec.rb +14 -0
- data/spec/strategy/field/contact/random_phone_number_spec.rb +16 -0
- data/spec/strategy/field/contact/random_province_spec.rb +14 -0
- data/spec/strategy/field/contact/random_zipcode_spec.rb +14 -0
- data/spec/strategy/field/datetime/anonymize_date_spec.rb +27 -0
- data/spec/strategy/field/datetime/anonymize_datetime_spec.rb +57 -0
- data/spec/strategy/field/datetime/anonymize_time_spec.rb +57 -0
- data/spec/strategy/field/datetime/date_delta_spec.rb +36 -0
- data/spec/strategy/field/{date_time_delta_spec.rb → datetime/date_time_delta_spec.rb} +3 -2
- data/spec/strategy/field/datetime/time_delta_spec.rb +44 -0
- data/spec/strategy/field/default_anon_spec.rb +42 -0
- data/spec/strategy/field/email/gmail_template_spec.rb +17 -0
- data/spec/strategy/field/{random_email_spec.rb → email/random_email_spec.rb} +2 -2
- data/spec/strategy/field/email/random_mailinator_email_spec.rb +14 -0
- data/spec/strategy/field/{random_first_name_spec.rb → name/random_first_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_full_name_spec.rb → name/random_full_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_last_name_spec.rb → name/random_last_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_user_name_spec.rb → name/random_user_name_spec.rb} +2 -2
- data/spec/strategy/field/{random_float_delta_spec.rb → number/random_float_delta_spec.rb} +2 -2
- data/spec/strategy/field/number/random_float_spec.rb +28 -0
- data/spec/strategy/field/{random_integer_delta_spec.rb → number/random_integer_delta_spec.rb} +3 -5
- data/spec/strategy/field/{random_int_spec.rb → number/random_integer_spec.rb} +4 -4
- data/spec/strategy/field/random_boolean_spec.rb +2 -2
- data/spec/strategy/field/string/formatted_string_numbers_spec.rb +15 -0
- data/spec/strategy/field/{lorem_ipsum_spec.rb → string/lorem_ipsum_spec.rb} +2 -2
- data/spec/strategy/field/{random_string_spec.rb → string/random_string_spec.rb} +2 -2
- data/spec/strategy/field/{distinct_column_values_spec.rb → string/select_from_database_spec.rb} +3 -3
- data/spec/strategy/field/{random_selection_spec.rb → string/select_from_list_spec.rb} +5 -5
- data/spec/strategy/field/{string_template_spec.rb → string/string_template_spec.rb} +2 -2
- data/spec/strategy/field/whitelist_spec.rb +2 -2
- data/spec/support/customer_sample.rb +1 -1
- data/spec/utils/database_spec.rb +2 -2
- data/spec/utils/geojson_parser_spec.rb +38 -0
- data/whitelist_dsl.rb +4 -6
- metadata +163 -59
- data/lib/strategy/field/anonymize_time.rb +0 -57
- data/lib/strategy/field/gmail_template.rb +0 -17
- data/lib/strategy/field/random_first_name.rb +0 -18
- data/lib/strategy/field/random_last_name.rb +0 -19
- data/lib/strategy/field/random_selection.rb +0 -23
- data/lib/strategy/field/user_name_template.rb +0 -22
- data/spec/strategy/field/anonymize_time_spec.rb +0 -23
- data/spec/strategy/field/gmail_template_spec.rb +0 -14
- data/spec/strategy/field/random_mailinator_email_spec.rb +0 -21
- data/spec/strategy/field/random_phone_number_spec.rb +0 -35
- data/spec/strategy/field/user_name_template_spec.rb +0 -13
@@ -0,0 +1,16 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
class GmailTemplate
|
5
|
+
|
6
|
+
def initialize username = 'someusername'
|
7
|
+
@username = username
|
8
|
+
end
|
9
|
+
|
10
|
+
def anonymize field
|
11
|
+
"#{@username}+#{field.row_number}@gmail.com"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
File without changes
|
@@ -1,23 +1,54 @@
|
|
1
1
|
require 'strategy/field/whitelist'
|
2
|
-
require 'strategy/field/string_template'
|
3
|
-
require 'strategy/field/user_name_template'
|
4
|
-
require 'strategy/field/random_string'
|
5
|
-
require 'strategy/field/random_int'
|
6
2
|
require 'strategy/field/random_boolean'
|
7
|
-
|
8
|
-
require 'strategy/field/random_integer_delta'
|
9
|
-
require 'strategy/field/random_float_delta'
|
10
|
-
require 'strategy/field/random_selection'
|
11
|
-
require 'strategy/field/distinct_column_values'
|
12
|
-
require 'strategy/field/lorem_ipsum'
|
13
|
-
require 'strategy/field/gmail_template'
|
14
|
-
require 'strategy/field/date_time_delta'
|
15
|
-
require 'strategy/field/default_anon'
|
16
|
-
require 'strategy/field/random_email'
|
17
|
-
require 'strategy/field/random_mailinator_email'
|
18
|
-
require 'strategy/field/random_phone_number'
|
19
|
-
require 'strategy/field/random_first_name'
|
20
|
-
require 'strategy/field/random_last_name'
|
21
|
-
require 'strategy/field/random_full_name'
|
22
|
-
require 'strategy/field/random_user_name'
|
3
|
+
|
23
4
|
require 'strategy/field/anonymous'
|
5
|
+
|
6
|
+
# string
|
7
|
+
require 'strategy/field/string/lorem_ipsum'
|
8
|
+
require 'strategy/field/string/string_template'
|
9
|
+
require 'strategy/field/string/random_string'
|
10
|
+
require 'strategy/field/string/formatted_string_numbers'
|
11
|
+
|
12
|
+
require 'strategy/field/string/select_from_file'
|
13
|
+
require 'strategy/field/string/select_from_list'
|
14
|
+
require 'strategy/field/string/select_from_database'
|
15
|
+
|
16
|
+
# number
|
17
|
+
require 'strategy/field/number/random_integer'
|
18
|
+
require 'strategy/field/number/random_float'
|
19
|
+
require 'strategy/field/number/random_integer_delta'
|
20
|
+
require 'strategy/field/number/random_float_delta'
|
21
|
+
|
22
|
+
# contact
|
23
|
+
require 'strategy/field/contact/geojson_base'
|
24
|
+
require 'strategy/field/contact/random_phone_number'
|
25
|
+
require 'strategy/field/contact/random_address'
|
26
|
+
require 'strategy/field/contact/random_zipcode'
|
27
|
+
require 'strategy/field/contact/random_city'
|
28
|
+
require 'strategy/field/contact/random_province'
|
29
|
+
|
30
|
+
# datetime
|
31
|
+
require 'strategy/field/datetime/anonymize_time'
|
32
|
+
require 'strategy/field/datetime/anonymize_datetime'
|
33
|
+
require 'strategy/field/datetime/anonymize_date'
|
34
|
+
require 'strategy/field/datetime/date_time_delta'
|
35
|
+
require 'strategy/field/datetime/time_delta'
|
36
|
+
require 'strategy/field/datetime/date_delta'
|
37
|
+
|
38
|
+
# email
|
39
|
+
require 'strategy/field/email/random_email'
|
40
|
+
require 'strategy/field/email/gmail_template'
|
41
|
+
require 'strategy/field/email/random_mailinator_email'
|
42
|
+
|
43
|
+
# name
|
44
|
+
require 'strategy/field/name/random_first_name'
|
45
|
+
require 'strategy/field/name/random_last_name'
|
46
|
+
require 'strategy/field/name/random_full_name'
|
47
|
+
require 'strategy/field/name/random_user_name'
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
FieldStrategy = DataAnon::Strategy::Field
|
52
|
+
|
53
|
+
require 'strategy/field/default_anon'
|
54
|
+
|
File without changes
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Strategy
|
3
|
+
module Field
|
4
|
+
|
5
|
+
|
6
|
+
class RandomFloat
|
7
|
+
|
8
|
+
def initialize from = 0.0, to = 100.0
|
9
|
+
@from = from
|
10
|
+
@to = to
|
11
|
+
|
12
|
+
end
|
13
|
+
|
14
|
+
def anonymize field
|
15
|
+
DataAnon::Utils::RandomFloat.generate(@from,@to)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -3,7 +3,7 @@ module DataAnon
|
|
3
3
|
module Field
|
4
4
|
|
5
5
|
|
6
|
-
class
|
6
|
+
class FormattedStringNumber
|
7
7
|
|
8
8
|
def anonymize field
|
9
9
|
@original_phone_number = field.value
|
@@ -18,7 +18,10 @@ module DataAnon
|
|
18
18
|
|
19
19
|
@anonymized_phone_number
|
20
20
|
end
|
21
|
+
|
21
22
|
end
|
23
|
+
|
24
|
+
|
22
25
|
end
|
23
26
|
end
|
24
27
|
end
|
File without changes
|
File without changes
|
@@ -2,18 +2,17 @@ module DataAnon
|
|
2
2
|
module Strategy
|
3
3
|
module Field
|
4
4
|
|
5
|
-
class
|
5
|
+
class SelectFromDatabase
|
6
6
|
include Utils::Logging
|
7
7
|
|
8
8
|
def initialize table_name, field_name
|
9
|
-
source = Utils::SourceTable.create table_name
|
9
|
+
source = Utils::SourceTable.create table_name, []
|
10
10
|
@values = source.select(field_name).uniq.collect { |record| record[field_name]}
|
11
11
|
logger.debug "For field strategy #{table_name}:#{field_name} using values #{@values} "
|
12
12
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def anonymize field
|
16
|
-
return @values[0] if @values.length == 1
|
17
16
|
@values[DataAnon::Utils::RandomInt.generate(0,(@values.length - 1))]
|
18
17
|
end
|
19
18
|
|
File without changes
|
data/lib/strategy/whitelist.rb
CHANGED
@@ -5,14 +5,16 @@ module DataAnon
|
|
5
5
|
def process_record(index, record)
|
6
6
|
dest_record_map = {}
|
7
7
|
record.attributes.each do |field_name, field_value|
|
8
|
-
unless field_value.nil? || field_name
|
8
|
+
unless field_value.nil? || is_primary_key?(field_name)
|
9
9
|
field = DataAnon::Core::Field.new(field_name, field_value, index, record)
|
10
10
|
field_strategy = @fields[field_name.downcase] || DataAnon::Strategy::Field::DefaultAnon.new(@user_strategies)
|
11
11
|
dest_record_map[field_name] = field_strategy.anonymize(field)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
dest_record = dest_table.new dest_record_map
|
15
|
-
|
15
|
+
@primary_keys.each do |key|
|
16
|
+
dest_record[key] = record[key]
|
17
|
+
end
|
16
18
|
dest_record.save!
|
17
19
|
end
|
18
20
|
|
data/lib/utils/database.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'active_record'
|
2
|
+
require 'composite_primary_keys'
|
2
3
|
require 'logger'
|
3
4
|
|
4
5
|
module DataAnon
|
@@ -23,10 +24,11 @@ module DataAnon
|
|
23
24
|
|
24
25
|
class BaseTable
|
25
26
|
|
26
|
-
def self.create_table table_name,
|
27
|
+
def self.create_table database, table_name, primary_keys
|
27
28
|
Class.new(database) do
|
28
29
|
self.table_name = table_name
|
29
|
-
self.
|
30
|
+
self.primary_keys = primary_keys if primary_keys.length > 1
|
31
|
+
self.primary_key = primary_keys[0] if primary_keys.length == 1
|
30
32
|
self.mass_assignment_sanitizer = MassAssignmentIgnoreSanitizer.new(self)
|
31
33
|
end
|
32
34
|
end
|
@@ -35,16 +37,16 @@ module DataAnon
|
|
35
37
|
|
36
38
|
class SourceTable < BaseTable
|
37
39
|
|
38
|
-
def self.create table_name, primary_key
|
39
|
-
create_table table_name, primary_key
|
40
|
+
def self.create table_name, primary_key
|
41
|
+
create_table SourceDatabase, table_name, primary_key
|
40
42
|
end
|
41
43
|
|
42
44
|
end
|
43
45
|
|
44
46
|
class DestinationTable < BaseTable
|
45
47
|
|
46
|
-
def self.create table_name, primary_key
|
47
|
-
create_table table_name, primary_key
|
48
|
+
def self.create table_name, primary_key
|
49
|
+
create_table DestinationDatabase, table_name, primary_key
|
48
50
|
end
|
49
51
|
|
50
52
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'rgeo/geo_json'
|
2
|
+
|
3
|
+
module DataAnon
|
4
|
+
module Utils
|
5
|
+
class GeojsonParser
|
6
|
+
|
7
|
+
|
8
|
+
def self.address file_path
|
9
|
+
self.new(file_path).parse 'address'
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.zipcode file_path
|
13
|
+
self.new(file_path).parse 'postcode'
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.province file_path
|
17
|
+
self.new(file_path).parse 'province'
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.city file_path
|
21
|
+
self.new(file_path).parse 'city'
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.country file_path
|
25
|
+
self.new(file_path).parse 'country'
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize file_path
|
29
|
+
@places = File.read(file_path).split(/\n/)
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse property
|
33
|
+
result_list = []
|
34
|
+
@places.each do |loc|
|
35
|
+
geom = RGeo::GeoJSON.decode(loc, :json_parser => :json)
|
36
|
+
result_list.push(geom[property])
|
37
|
+
end
|
38
|
+
result_list
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/utils/logging.rb
CHANGED
@@ -15,15 +15,6 @@ module DataAnon
|
|
15
15
|
@@logger
|
16
16
|
end
|
17
17
|
|
18
|
-
def progress_logger
|
19
|
-
@@progress_logger ||= (self.progress_logger = Logger.new(STDOUT) )
|
20
|
-
end
|
21
|
-
|
22
|
-
def progress_logger= logger
|
23
|
-
logger.formatter = proc { |severity, datetime, progname, msg| msg }
|
24
|
-
@@progress_logger = logger
|
25
|
-
end
|
26
|
-
|
27
18
|
end
|
28
19
|
end
|
29
20
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module DataAnon
|
2
|
+
module Utils
|
3
|
+
|
4
|
+
class ProgressBar
|
5
|
+
|
6
|
+
def initialize table_name, total
|
7
|
+
@total = total
|
8
|
+
@table_name = table_name
|
9
|
+
@progress_bar = PowerBar.new if total > 0 && show_progress
|
10
|
+
end
|
11
|
+
|
12
|
+
def show_progress
|
13
|
+
ENV['show_progress'] != 'false'
|
14
|
+
end
|
15
|
+
|
16
|
+
def show index
|
17
|
+
if @progress_bar && ((index % 1000 == 0) || (index == @total) || (index == 1))
|
18
|
+
@progress_bar.show(:msg => "Table: #{@table_name} (#{index}/#{@total})", :done => index, :total => @total)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def close
|
23
|
+
@progress_bar.close if @progress_bar
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|