privacy 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.DS_Store +0 -0
- data/README.md +2 -0
- data/lib/privacy/person.rb +133 -0
- data/lib/privacy/processor.rb +20 -3
- data/lib/privacy/version.rb +1 -1
- data/privacy.gemspec +21 -21
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e388db32406558d4970fab62d76fb4bf8e3a6cccbddf3cfd6f5a47b0a3af160d
|
4
|
+
data.tar.gz: 8358c360c06bb0386c768e283ffb123f14030b81bc96f79f4c3cb4dfec51f2f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8e8a6eadc68c458d508189c35ec34bdfa245d18159755c075cace122029a5c899f589134fc4156618120c8ace5d49a7ca26f770b6125695b5a6a91956cc0454a
|
7
|
+
data.tar.gz: 8a05cd62e6c983aafe69c9dec2a546629f9f2126c2ab25f78fbbe29dbfef6bd2825eae08abcac149fa28be7b64858cfc49eed3302a30c27c176ef2fffa20ca97
|
data/.DS_Store
ADDED
Binary file
|
data/README.md
CHANGED
@@ -32,6 +32,8 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
32
32
|
|
33
33
|
To release a new version, change version number and `rake release`.
|
34
34
|
|
35
|
+
To work on the code, use `bundle exec privacy process`.
|
36
|
+
|
35
37
|
## Contributing
|
36
38
|
|
37
39
|
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/privacy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/privacy/blob/master/CODE_OF_CONDUCT.md).
|
@@ -0,0 +1,133 @@
|
|
1
|
+
class Privacy::Person
|
2
|
+
attr_reader :identifier, :row, :index, :columns, :data, :combinations, :initials
|
3
|
+
|
4
|
+
COLUMNS_TO_ANONYMIZE = [
|
5
|
+
'Nom _identifier_',
|
6
|
+
'Prénom _identifier_',
|
7
|
+
'Date de naissance _identifier_',
|
8
|
+
'Lieu naissance Pays _identifier_',
|
9
|
+
'Lieu naissance Ville _identifier_',
|
10
|
+
'Lieu naissance Département _identifier_',
|
11
|
+
'Sexe _identifier_',
|
12
|
+
'Nationalité _identifier_',
|
13
|
+
'Adresse _identifier_ [Date]',
|
14
|
+
'Adresse _identifier_ [Département]',
|
15
|
+
'Adresse _identifier_ [Ville]',
|
16
|
+
'Adresse _identifier_ [Pays]',
|
17
|
+
'Profession _identifier_',
|
18
|
+
'Date profession _identifier_'
|
19
|
+
]
|
20
|
+
|
21
|
+
INITIALS_SEPARATORS = [
|
22
|
+
'',
|
23
|
+
'.'
|
24
|
+
]
|
25
|
+
|
26
|
+
def initialize(identifier)
|
27
|
+
@identifier = identifier
|
28
|
+
puts "Identify #{identifier}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def identify_columns(row)
|
32
|
+
@columns = {}
|
33
|
+
row.each do |key, value|
|
34
|
+
COLUMNS_TO_ANONYMIZE.each do |column|
|
35
|
+
name = column_with_identifier column
|
36
|
+
if value == name
|
37
|
+
letter = key.gsub '1', ''
|
38
|
+
@columns[letter] = name
|
39
|
+
puts " #{name} on column #{letter}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def anonymize(row, index)
|
46
|
+
@row = row
|
47
|
+
@index = index + 1 # Excel is 1-indexed
|
48
|
+
anonymize_personal_data
|
49
|
+
build_combinations
|
50
|
+
remove_combinations
|
51
|
+
end
|
52
|
+
|
53
|
+
protected
|
54
|
+
|
55
|
+
def anonymize_personal_data
|
56
|
+
@data = {}
|
57
|
+
puts
|
58
|
+
puts "Anonymize row #{index}"
|
59
|
+
puts " #{identifier}"
|
60
|
+
@columns.each do |letter, column|
|
61
|
+
cell = "#{letter}#{index}"
|
62
|
+
value = row[cell]
|
63
|
+
next if value.nil?
|
64
|
+
@data[column] = value
|
65
|
+
puts " #{value} (#{column})"
|
66
|
+
row[cell] = "[#{column}]"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_combinations
|
71
|
+
puts 'build_combinations'
|
72
|
+
@combinations = []
|
73
|
+
first_name_column = column_with_identifier 'Prénom _identifier_'
|
74
|
+
last_name_column = column_with_identifier 'Nom _identifier_'
|
75
|
+
first_name = data[first_name_column]
|
76
|
+
last_name = data[last_name_column]
|
77
|
+
if !first_name.nil? && !last_name.nil?
|
78
|
+
initials = []
|
79
|
+
first_name.split(' ').each { |l| initials << l[0].upcase }
|
80
|
+
last_name.split(' ').each { |l| initials << l[0].upcase }
|
81
|
+
@combinations << "#{first_name} #{last_name}"
|
82
|
+
@combinations << " #{first_name} #{last_name}"
|
83
|
+
@combinations << "#{first_name} #{last_name} "
|
84
|
+
@combinations << "#{last_name} #{first_name}"
|
85
|
+
@combinations << " #{last_name} #{first_name}"
|
86
|
+
@combinations << "#{last_name} #{first_name} "
|
87
|
+
@combinations += initials_combinations(initials)
|
88
|
+
end
|
89
|
+
puts "Combinations: #{@combinations.join(', ')}"
|
90
|
+
end
|
91
|
+
|
92
|
+
def column_with_identifier(column)
|
93
|
+
column.gsub '_identifier_', identifier
|
94
|
+
end
|
95
|
+
|
96
|
+
def initials_combinations(initials)
|
97
|
+
puts "Initials for #{initials}"
|
98
|
+
combinations = []
|
99
|
+
initials.each do |letter|
|
100
|
+
initials.shift
|
101
|
+
recursive_combinations = initials_combinations initials
|
102
|
+
INITIALS_SEPARATORS.each do |separator|
|
103
|
+
initial_with_separator = "#{letter}#{separator}"
|
104
|
+
if recursive_combinations.none?
|
105
|
+
combinations << "#{initial_with_separator}"
|
106
|
+
combinations << "#{initial_with_separator} "
|
107
|
+
combinations << " #{initial_with_separator}"
|
108
|
+
else
|
109
|
+
recursive_combinations.each do |c|
|
110
|
+
combinations << "#{initial_with_separator}#{c}"
|
111
|
+
combinations << " #{initial_with_separator}#{c}"
|
112
|
+
combinations << "#{initial_with_separator} #{c}"
|
113
|
+
combinations << "#{initial_with_separator} #{c} "
|
114
|
+
combinations << "#{initial_with_separator}#{c} "
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
combinations.uniq
|
120
|
+
end
|
121
|
+
|
122
|
+
def remove_combinations
|
123
|
+
row.each do |key, value|
|
124
|
+
anonymized_value = value
|
125
|
+
if value.is_a? String
|
126
|
+
combinations.each do |token|
|
127
|
+
anonymized_value = anonymized_value.gsub(token, "[#{identifier}]")
|
128
|
+
end
|
129
|
+
end
|
130
|
+
row[key] = anonymized_value
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
data/lib/privacy/processor.rb
CHANGED
@@ -1,14 +1,21 @@
|
|
1
1
|
require 'creek'
|
2
2
|
require 'write_xlsx'
|
3
|
+
require 'privacy/person'
|
3
4
|
|
4
5
|
class Privacy::Processor
|
5
6
|
attr_reader :file
|
6
7
|
|
7
8
|
DIRECTORY = 'processed'
|
9
|
+
PRIVATE_DATA = {
|
10
|
+
'Nom ppmec': '[Nom]',
|
11
|
+
'Prénom ppmec': '[Prénom]',
|
12
|
+
'Nom ppvic': '[Nom]',
|
13
|
+
'Prénom ppvic': '[Prénom]'
|
14
|
+
}
|
8
15
|
|
9
16
|
def initialize(file)
|
10
17
|
@file = file
|
11
|
-
process
|
18
|
+
process unless @file.start_with? '~'
|
12
19
|
end
|
13
20
|
|
14
21
|
protected
|
@@ -25,7 +32,17 @@ class Privacy::Processor
|
|
25
32
|
end
|
26
33
|
|
27
34
|
def filter_data
|
28
|
-
|
35
|
+
@ppmec = Privacy::Person.new 'ppmec'
|
36
|
+
@ppmec.identify_columns sheet.rows.first
|
37
|
+
@ppvic = Privacy::Person.new 'ppvic'
|
38
|
+
@ppvic.identify_columns sheet.rows.first
|
39
|
+
sheet.rows.first.each do |key, value|
|
40
|
+
worksheet.write key, value
|
41
|
+
end
|
42
|
+
sheet.rows.each_with_index do |row, index|
|
43
|
+
next if index.zero?
|
44
|
+
@ppmec.anonymize row, index
|
45
|
+
@ppvic.anonymize row, index
|
29
46
|
row.each do |key, value|
|
30
47
|
worksheet.write key, value
|
31
48
|
end
|
@@ -59,7 +76,7 @@ class Privacy::Processor
|
|
59
76
|
end
|
60
77
|
|
61
78
|
def data
|
62
|
-
@data ||= Creek::Book.new path
|
79
|
+
@data ||= Creek::Book.new path, with_headers: true
|
63
80
|
end
|
64
81
|
|
65
82
|
def sheet
|
data/lib/privacy/version.rb
CHANGED
data/privacy.gemspec
CHANGED
@@ -1,31 +1,31 @@
|
|
1
1
|
require_relative 'lib/privacy/version'
|
2
2
|
|
3
|
-
Gem::Specification.new do |
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "privacy"
|
5
|
+
s.version = Privacy::VERSION
|
6
|
+
s.authors = ["Arnaud Levy"]
|
7
|
+
s.email = ["contact@arnaudlevy.com"]
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
s.summary = "Privacy removes personal data from a xls file"
|
10
|
+
s.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
|
11
|
+
s.homepage = "https://github.com/arnaudlevy/privacy"
|
12
|
+
s.license = "MIT"
|
13
|
+
s.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
14
|
|
15
|
-
#
|
15
|
+
# s.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
s.metadata["homepage_uri"] = s.homepage
|
18
|
+
s.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
|
19
19
|
|
20
20
|
# Specify which files should be added to the gem when it is released.
|
21
21
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
-
|
23
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|
|
22
|
+
s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|s|features)/}) }
|
24
24
|
end
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
25
|
+
s.bindir = "exe"
|
26
|
+
s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
s.require_paths = ["lib"]
|
28
|
+
s.add_dependency "thor"
|
29
|
+
s.add_dependency "creek"
|
30
|
+
s.add_dependency "write_xlsx"
|
31
31
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: privacy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arnaud Levy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -61,6 +61,7 @@ executables:
|
|
61
61
|
extensions: []
|
62
62
|
extra_rdoc_files: []
|
63
63
|
files:
|
64
|
+
- ".DS_Store"
|
64
65
|
- ".gitignore"
|
65
66
|
- CODE_OF_CONDUCT.md
|
66
67
|
- Gemfile
|
@@ -72,6 +73,7 @@ files:
|
|
72
73
|
- bin/setup
|
73
74
|
- exe/privacy
|
74
75
|
- lib/privacy.rb
|
76
|
+
- lib/privacy/person.rb
|
75
77
|
- lib/privacy/processor.rb
|
76
78
|
- lib/privacy/version.rb
|
77
79
|
- privacy.gemspec
|