privacy 0.1.0 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.DS_Store +0 -0
- data/.gitignore +3 -0
- data/Gemfile.lock +17 -1
- data/README.md +8 -6
- data/lib/privacy.rb +4 -2
- data/lib/privacy/person.rb +133 -0
- data/lib/privacy/processor.rb +79 -0
- data/lib/privacy/version.rb +1 -1
- data/privacy.gemspec +21 -19
- metadata +37 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70f75e3ceaea1bcf698189b550f06b19898d813fb1f6c69ce47bb3c2770f7eb9
|
4
|
+
data.tar.gz: d234953c1819d6ae303cabc4239be5b06be7945b0fdf3de2b8091859a440e961
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 542f23027312376c876cdccab5fe24de7e7168becf3f842ce143eb5bf892c876089845168dc2fd99033f59edf0b018125a2f5e1fe386f59844f876f2c6330644
|
7
|
+
data.tar.gz: 7d1a71839a9b8abe462579f73eaf91ac0da26ae12cad07940d495d56f35b697d1b1a4290d3a92e55c4e13ed73a016ab12a6414543971e7dcb5dd9ae12f0396aa
|
data/.DS_Store
ADDED
Binary file
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,12 +1,28 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
privacy (0.1.
|
4
|
+
privacy (0.1.4)
|
5
|
+
creek
|
6
|
+
thor
|
7
|
+
write_xlsx
|
5
8
|
|
6
9
|
GEM
|
7
10
|
remote: https://rubygems.org/
|
8
11
|
specs:
|
12
|
+
creek (2.5.2)
|
13
|
+
nokogiri (>= 1.10.0)
|
14
|
+
rubyzip (>= 1.0.0)
|
15
|
+
mini_portile2 (2.4.0)
|
16
|
+
nokogiri (1.10.9)
|
17
|
+
mini_portile2 (~> 2.4.0)
|
9
18
|
rake (12.3.3)
|
19
|
+
rubyzip (2.3.0)
|
20
|
+
thor (1.0.1)
|
21
|
+
write_xlsx (0.85.7)
|
22
|
+
rubyzip (>= 1.0.0)
|
23
|
+
zip-zip
|
24
|
+
zip-zip (0.3)
|
25
|
+
rubyzip (>= 1.0.0)
|
10
26
|
|
11
27
|
PLATFORMS
|
12
28
|
ruby
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# Privacy
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
Privacy removes personal data from a xls file.
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -22,9 +20,9 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
Process all excel files in current directory `privacy process`.
|
24
|
+
|
25
|
+
Process a specific file in current directory `privacy process /file.xlsx`.
|
28
26
|
|
29
27
|
## Development
|
30
28
|
|
@@ -32,6 +30,10 @@ After checking out the repo, run `bin/setup` to install dependencies. You can al
|
|
32
30
|
|
33
31
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
34
32
|
|
33
|
+
To release a new version, change version number and `rake release`.
|
34
|
+
|
35
|
+
To work on the code, use `bundle exec privacy process`.
|
36
|
+
|
35
37
|
## Contributing
|
36
38
|
|
37
39
|
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/privacy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/privacy/blob/master/CODE_OF_CONDUCT.md).
|
data/lib/privacy.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
require 'privacy/version'
|
2
|
+
require 'privacy/processor'
|
2
3
|
require 'thor'
|
3
4
|
|
4
5
|
module Privacy
|
5
6
|
class CLI < Thor
|
6
7
|
desc "process [file]", "Make data private"
|
7
|
-
def process(file)
|
8
|
-
|
8
|
+
def process(file = nil)
|
9
|
+
file.nil? ? Dir.glob("*.xlsx") { |file| Processor.new(file) }
|
10
|
+
: Processor.new(file)
|
9
11
|
end
|
10
12
|
end
|
11
13
|
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
class Privacy::Person
|
2
|
+
attr_reader :identifier, :row, :index, :columns, :data, :combinations, :initials
|
3
|
+
|
4
|
+
COLUMNS_TO_ANONYMIZE = [
|
5
|
+
'Nom _identifier_',
|
6
|
+
'Prénom _identifier_',
|
7
|
+
'Date de naissance _identifier_',
|
8
|
+
'Lieu naissance Pays _identifier_',
|
9
|
+
'Lieu naissance Ville _identifier_',
|
10
|
+
'Lieu naissance Département _identifier_',
|
11
|
+
'Sexe _identifier_',
|
12
|
+
'Nationalité _identifier_',
|
13
|
+
'Adresse _identifier_ [Date]',
|
14
|
+
'Adresse _identifier_ [Département]',
|
15
|
+
'Adresse _identifier_ [Ville]',
|
16
|
+
'Adresse _identifier_ [Pays]',
|
17
|
+
'Profession _identifier_',
|
18
|
+
'Date profession _identifier_'
|
19
|
+
]
|
20
|
+
|
21
|
+
INITIALS_SEPARATORS = [
|
22
|
+
'',
|
23
|
+
'.'
|
24
|
+
]
|
25
|
+
|
26
|
+
def initialize(identifier)
|
27
|
+
@identifier = identifier
|
28
|
+
puts "Identify #{identifier}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def identify_columns(row)
|
32
|
+
@columns = {}
|
33
|
+
row.each do |key, value|
|
34
|
+
COLUMNS_TO_ANONYMIZE.each do |column|
|
35
|
+
name = column_with_identifier column
|
36
|
+
if value == name
|
37
|
+
letter = key.gsub '1', ''
|
38
|
+
@columns[letter] = name
|
39
|
+
puts " #{name} on column #{letter}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def anonymize(row, index)
|
46
|
+
@row = row
|
47
|
+
@index = index + 1 # Excel is 1-indexed
|
48
|
+
anonymize_personal_data
|
49
|
+
build_combinations
|
50
|
+
remove_combinations
|
51
|
+
end
|
52
|
+
|
53
|
+
protected
|
54
|
+
|
55
|
+
def anonymize_personal_data
|
56
|
+
@data = {}
|
57
|
+
puts
|
58
|
+
puts "Anonymize row #{index}"
|
59
|
+
puts " #{identifier}"
|
60
|
+
@columns.each do |letter, column|
|
61
|
+
cell = "#{letter}#{index}"
|
62
|
+
value = row[cell]
|
63
|
+
next if value.nil?
|
64
|
+
@data[column] = value
|
65
|
+
puts " #{value} (#{column})"
|
66
|
+
row[cell] = "[#{column}]"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_combinations
|
71
|
+
puts 'build_combinations'
|
72
|
+
@combinations = []
|
73
|
+
first_name_column = column_with_identifier 'Prénom _identifier_'
|
74
|
+
last_name_column = column_with_identifier 'Nom _identifier_'
|
75
|
+
first_name = data[first_name_column]
|
76
|
+
last_name = data[last_name_column]
|
77
|
+
if !first_name.nil? && !last_name.nil?
|
78
|
+
initials = []
|
79
|
+
first_name.split(' ').each { |l| initials << l[0].upcase }
|
80
|
+
last_name.split(' ').each { |l| initials << l[0].upcase }
|
81
|
+
@combinations << "#{first_name} #{last_name}"
|
82
|
+
@combinations << " #{first_name} #{last_name}"
|
83
|
+
@combinations << "#{first_name} #{last_name} "
|
84
|
+
@combinations << "#{last_name} #{first_name}"
|
85
|
+
@combinations << " #{last_name} #{first_name}"
|
86
|
+
@combinations << "#{last_name} #{first_name} "
|
87
|
+
@combinations += initials_combinations(initials)
|
88
|
+
end
|
89
|
+
puts "Combinations: #{@combinations.join(', ')}"
|
90
|
+
end
|
91
|
+
|
92
|
+
def column_with_identifier(column)
|
93
|
+
column.gsub '_identifier_', identifier
|
94
|
+
end
|
95
|
+
|
96
|
+
def initials_combinations(initials)
|
97
|
+
puts "Initials for #{initials}"
|
98
|
+
combinations = []
|
99
|
+
initials.each do |letter|
|
100
|
+
initials.shift
|
101
|
+
recursive_combinations = initials_combinations initials
|
102
|
+
INITIALS_SEPARATORS.each do |separator|
|
103
|
+
initial_with_separator = "#{letter}#{separator}"
|
104
|
+
if recursive_combinations.none?
|
105
|
+
combinations << "#{initial_with_separator}"
|
106
|
+
combinations << "#{initial_with_separator} "
|
107
|
+
combinations << " #{initial_with_separator}"
|
108
|
+
else
|
109
|
+
recursive_combinations.each do |c|
|
110
|
+
combinations << "#{initial_with_separator}#{c}"
|
111
|
+
combinations << " #{initial_with_separator}#{c}"
|
112
|
+
combinations << "#{initial_with_separator} #{c}"
|
113
|
+
combinations << "#{initial_with_separator} #{c} "
|
114
|
+
combinations << "#{initial_with_separator}#{c} "
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
combinations.uniq
|
120
|
+
end
|
121
|
+
|
122
|
+
def remove_combinations
|
123
|
+
row.each do |key, value|
|
124
|
+
anonymized_value = value
|
125
|
+
if value.is_a? String
|
126
|
+
combinations.each do |token|
|
127
|
+
anonymized_value = anonymized_value.gsub(token, "[#{identifier}]")
|
128
|
+
end
|
129
|
+
end
|
130
|
+
row[key] = anonymized_value
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'creek'
|
2
|
+
require 'write_xlsx'
|
3
|
+
require 'privacy/person'
|
4
|
+
|
5
|
+
class Privacy::Processor
|
6
|
+
attr_reader :file
|
7
|
+
|
8
|
+
DIRECTORY = 'processed'
|
9
|
+
|
10
|
+
def initialize(file)
|
11
|
+
@file = file
|
12
|
+
process unless @file.start_with? '~'
|
13
|
+
end
|
14
|
+
|
15
|
+
protected
|
16
|
+
|
17
|
+
def process
|
18
|
+
puts "Processing #{@file}"
|
19
|
+
make_directory
|
20
|
+
filter_data
|
21
|
+
write_file
|
22
|
+
end
|
23
|
+
|
24
|
+
def make_directory
|
25
|
+
Dir.mkdir DIRECTORY unless File.exists? DIRECTORY
|
26
|
+
end
|
27
|
+
|
28
|
+
def filter_data
|
29
|
+
@ppmec = Privacy::Person.new 'ppmec'
|
30
|
+
@ppmec.identify_columns sheet.rows.first
|
31
|
+
@ppvic = Privacy::Person.new 'ppvic'
|
32
|
+
@ppvic.identify_columns sheet.rows.first
|
33
|
+
sheet.rows.first.each do |key, value|
|
34
|
+
worksheet.write key, value
|
35
|
+
end
|
36
|
+
sheet.rows.each_with_index do |row, index|
|
37
|
+
next if index.zero?
|
38
|
+
@ppmec.anonymize row, index
|
39
|
+
@ppvic.anonymize row, index
|
40
|
+
row.each do |key, value|
|
41
|
+
worksheet.write key, value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def write_file
|
47
|
+
workbook.close
|
48
|
+
end
|
49
|
+
|
50
|
+
# New file
|
51
|
+
# http://cxn03651.github.io/write_xlsx/index.html
|
52
|
+
|
53
|
+
def workbook
|
54
|
+
@workbook ||= WriteXLSX.new processed_file_name
|
55
|
+
end
|
56
|
+
|
57
|
+
def worksheet
|
58
|
+
@worksheet ||= workbook.add_worksheet
|
59
|
+
end
|
60
|
+
|
61
|
+
def processed_file_name
|
62
|
+
"#{DIRECTORY}/#{file}"
|
63
|
+
end
|
64
|
+
|
65
|
+
# Current file
|
66
|
+
# https://github.com/pythonicrubyist/creek
|
67
|
+
|
68
|
+
def path
|
69
|
+
@path ||= File.expand_path file
|
70
|
+
end
|
71
|
+
|
72
|
+
def data
|
73
|
+
@data ||= Creek::Book.new path, with_headers: true
|
74
|
+
end
|
75
|
+
|
76
|
+
def sheet
|
77
|
+
@sheet ||= data.sheets[0]
|
78
|
+
end
|
79
|
+
end
|
data/lib/privacy/version.rb
CHANGED
data/privacy.gemspec
CHANGED
@@ -1,29 +1,31 @@
|
|
1
1
|
require_relative 'lib/privacy/version'
|
2
2
|
|
3
|
-
Gem::Specification.new do |
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "privacy"
|
5
|
+
s.version = Privacy::VERSION
|
6
|
+
s.authors = ["Arnaud Levy"]
|
7
|
+
s.email = ["contact@arnaudlevy.com"]
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
s.summary = "Privacy removes personal data from a xls file"
|
10
|
+
s.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
|
11
|
+
s.homepage = "https://github.com/arnaudlevy/privacy"
|
12
|
+
s.license = "MIT"
|
13
|
+
s.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
14
|
|
15
|
-
#
|
15
|
+
# s.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
s.metadata["homepage_uri"] = s.homepage
|
18
|
+
s.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
|
19
19
|
|
20
20
|
# Specify which files should be added to the gem when it is released.
|
21
21
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
-
|
23
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|
|
22
|
+
s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|s|features)/}) }
|
24
24
|
end
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
s.bindir = "exe"
|
26
|
+
s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
s.require_paths = ["lib"]
|
28
|
+
s.add_dependency "thor"
|
29
|
+
s.add_dependency "creek"
|
30
|
+
s.add_dependency "write_xlsx"
|
29
31
|
end
|
metadata
CHANGED
@@ -1,29 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: privacy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arnaud Levy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0
|
19
|
+
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: creek
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: write_xlsx
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
27
55
|
description: Remove first name and last name from xls files, replace them with an
|
28
56
|
anonymous placeholder
|
29
57
|
email:
|
@@ -33,6 +61,7 @@ executables:
|
|
33
61
|
extensions: []
|
34
62
|
extra_rdoc_files: []
|
35
63
|
files:
|
64
|
+
- ".DS_Store"
|
36
65
|
- ".gitignore"
|
37
66
|
- CODE_OF_CONDUCT.md
|
38
67
|
- Gemfile
|
@@ -44,6 +73,8 @@ files:
|
|
44
73
|
- bin/setup
|
45
74
|
- exe/privacy
|
46
75
|
- lib/privacy.rb
|
76
|
+
- lib/privacy/person.rb
|
77
|
+
- lib/privacy/processor.rb
|
47
78
|
- lib/privacy/version.rb
|
48
79
|
- privacy.gemspec
|
49
80
|
homepage: https://github.com/arnaudlevy/privacy
|