privacy 0.1.0 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.DS_Store +0 -0
- data/.gitignore +3 -0
- data/Gemfile.lock +17 -1
- data/README.md +8 -6
- data/lib/privacy.rb +4 -2
- data/lib/privacy/person.rb +133 -0
- data/lib/privacy/processor.rb +79 -0
- data/lib/privacy/version.rb +1 -1
- data/privacy.gemspec +21 -19
- metadata +37 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 70f75e3ceaea1bcf698189b550f06b19898d813fb1f6c69ce47bb3c2770f7eb9
|
4
|
+
data.tar.gz: d234953c1819d6ae303cabc4239be5b06be7945b0fdf3de2b8091859a440e961
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 542f23027312376c876cdccab5fe24de7e7168becf3f842ce143eb5bf892c876089845168dc2fd99033f59edf0b018125a2f5e1fe386f59844f876f2c6330644
|
7
|
+
data.tar.gz: 7d1a71839a9b8abe462579f73eaf91ac0da26ae12cad07940d495d56f35b697d1b1a4290d3a92e55c4e13ed73a016ab12a6414543971e7dcb5dd9ae12f0396aa
|
data/.DS_Store
ADDED
Binary file
|
data/.gitignore
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,12 +1,28 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
privacy (0.1.
|
4
|
+
privacy (0.1.4)
|
5
|
+
creek
|
6
|
+
thor
|
7
|
+
write_xlsx
|
5
8
|
|
6
9
|
GEM
|
7
10
|
remote: https://rubygems.org/
|
8
11
|
specs:
|
12
|
+
creek (2.5.2)
|
13
|
+
nokogiri (>= 1.10.0)
|
14
|
+
rubyzip (>= 1.0.0)
|
15
|
+
mini_portile2 (2.4.0)
|
16
|
+
nokogiri (1.10.9)
|
17
|
+
mini_portile2 (~> 2.4.0)
|
9
18
|
rake (12.3.3)
|
19
|
+
rubyzip (2.3.0)
|
20
|
+
thor (1.0.1)
|
21
|
+
write_xlsx (0.85.7)
|
22
|
+
rubyzip (>= 1.0.0)
|
23
|
+
zip-zip
|
24
|
+
zip-zip (0.3)
|
25
|
+
rubyzip (>= 1.0.0)
|
10
26
|
|
11
27
|
PLATFORMS
|
12
28
|
ruby
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# Privacy
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
Privacy removes personal data from a xls file.
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -22,9 +20,9 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
Process all excel files in current directory `privacy process`.
|
24
|
+
|
25
|
+
Process a specific file in current directory `privacy process /file.xlsx`.
|
28
26
|
|
29
27
|
## Development
|
30
28
|
|
@@ -32,6 +30,10 @@ After checking out the repo, run `bin/setup` to install dependencies. You can al
|
|
32
30
|
|
33
31
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
34
32
|
|
33
|
+
To release a new version, change version number and `rake release`.
|
34
|
+
|
35
|
+
To work on the code, use `bundle exec privacy process`.
|
36
|
+
|
35
37
|
## Contributing
|
36
38
|
|
37
39
|
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/privacy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/privacy/blob/master/CODE_OF_CONDUCT.md).
|
data/lib/privacy.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
require 'privacy/version'
|
2
|
+
require 'privacy/processor'
|
2
3
|
require 'thor'
|
3
4
|
|
4
5
|
module Privacy
|
5
6
|
class CLI < Thor
|
6
7
|
desc "process [file]", "Make data private"
|
7
|
-
def process(file)
|
8
|
-
|
8
|
+
def process(file = nil)
|
9
|
+
file.nil? ? Dir.glob("*.xlsx") { |file| Processor.new(file) }
|
10
|
+
: Processor.new(file)
|
9
11
|
end
|
10
12
|
end
|
11
13
|
end
|
@@ -0,0 +1,133 @@
|
|
1
|
+
class Privacy::Person
|
2
|
+
attr_reader :identifier, :row, :index, :columns, :data, :combinations, :initials
|
3
|
+
|
4
|
+
COLUMNS_TO_ANONYMIZE = [
|
5
|
+
'Nom _identifier_',
|
6
|
+
'Prénom _identifier_',
|
7
|
+
'Date de naissance _identifier_',
|
8
|
+
'Lieu naissance Pays _identifier_',
|
9
|
+
'Lieu naissance Ville _identifier_',
|
10
|
+
'Lieu naissance Département _identifier_',
|
11
|
+
'Sexe _identifier_',
|
12
|
+
'Nationalité _identifier_',
|
13
|
+
'Adresse _identifier_ [Date]',
|
14
|
+
'Adresse _identifier_ [Département]',
|
15
|
+
'Adresse _identifier_ [Ville]',
|
16
|
+
'Adresse _identifier_ [Pays]',
|
17
|
+
'Profession _identifier_',
|
18
|
+
'Date profession _identifier_'
|
19
|
+
]
|
20
|
+
|
21
|
+
INITIALS_SEPARATORS = [
|
22
|
+
'',
|
23
|
+
'.'
|
24
|
+
]
|
25
|
+
|
26
|
+
def initialize(identifier)
|
27
|
+
@identifier = identifier
|
28
|
+
puts "Identify #{identifier}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def identify_columns(row)
|
32
|
+
@columns = {}
|
33
|
+
row.each do |key, value|
|
34
|
+
COLUMNS_TO_ANONYMIZE.each do |column|
|
35
|
+
name = column_with_identifier column
|
36
|
+
if value == name
|
37
|
+
letter = key.gsub '1', ''
|
38
|
+
@columns[letter] = name
|
39
|
+
puts " #{name} on column #{letter}"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def anonymize(row, index)
|
46
|
+
@row = row
|
47
|
+
@index = index + 1 # Excel is 1-indexed
|
48
|
+
anonymize_personal_data
|
49
|
+
build_combinations
|
50
|
+
remove_combinations
|
51
|
+
end
|
52
|
+
|
53
|
+
protected
|
54
|
+
|
55
|
+
def anonymize_personal_data
|
56
|
+
@data = {}
|
57
|
+
puts
|
58
|
+
puts "Anonymize row #{index}"
|
59
|
+
puts " #{identifier}"
|
60
|
+
@columns.each do |letter, column|
|
61
|
+
cell = "#{letter}#{index}"
|
62
|
+
value = row[cell]
|
63
|
+
next if value.nil?
|
64
|
+
@data[column] = value
|
65
|
+
puts " #{value} (#{column})"
|
66
|
+
row[cell] = "[#{column}]"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def build_combinations
|
71
|
+
puts 'build_combinations'
|
72
|
+
@combinations = []
|
73
|
+
first_name_column = column_with_identifier 'Prénom _identifier_'
|
74
|
+
last_name_column = column_with_identifier 'Nom _identifier_'
|
75
|
+
first_name = data[first_name_column]
|
76
|
+
last_name = data[last_name_column]
|
77
|
+
if !first_name.nil? && !last_name.nil?
|
78
|
+
initials = []
|
79
|
+
first_name.split(' ').each { |l| initials << l[0].upcase }
|
80
|
+
last_name.split(' ').each { |l| initials << l[0].upcase }
|
81
|
+
@combinations << "#{first_name} #{last_name}"
|
82
|
+
@combinations << " #{first_name} #{last_name}"
|
83
|
+
@combinations << "#{first_name} #{last_name} "
|
84
|
+
@combinations << "#{last_name} #{first_name}"
|
85
|
+
@combinations << " #{last_name} #{first_name}"
|
86
|
+
@combinations << "#{last_name} #{first_name} "
|
87
|
+
@combinations += initials_combinations(initials)
|
88
|
+
end
|
89
|
+
puts "Combinations: #{@combinations.join(', ')}"
|
90
|
+
end
|
91
|
+
|
92
|
+
def column_with_identifier(column)
|
93
|
+
column.gsub '_identifier_', identifier
|
94
|
+
end
|
95
|
+
|
96
|
+
def initials_combinations(initials)
|
97
|
+
puts "Initials for #{initials}"
|
98
|
+
combinations = []
|
99
|
+
initials.each do |letter|
|
100
|
+
initials.shift
|
101
|
+
recursive_combinations = initials_combinations initials
|
102
|
+
INITIALS_SEPARATORS.each do |separator|
|
103
|
+
initial_with_separator = "#{letter}#{separator}"
|
104
|
+
if recursive_combinations.none?
|
105
|
+
combinations << "#{initial_with_separator}"
|
106
|
+
combinations << "#{initial_with_separator} "
|
107
|
+
combinations << " #{initial_with_separator}"
|
108
|
+
else
|
109
|
+
recursive_combinations.each do |c|
|
110
|
+
combinations << "#{initial_with_separator}#{c}"
|
111
|
+
combinations << " #{initial_with_separator}#{c}"
|
112
|
+
combinations << "#{initial_with_separator} #{c}"
|
113
|
+
combinations << "#{initial_with_separator} #{c} "
|
114
|
+
combinations << "#{initial_with_separator}#{c} "
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
combinations.uniq
|
120
|
+
end
|
121
|
+
|
122
|
+
def remove_combinations
|
123
|
+
row.each do |key, value|
|
124
|
+
anonymized_value = value
|
125
|
+
if value.is_a? String
|
126
|
+
combinations.each do |token|
|
127
|
+
anonymized_value = anonymized_value.gsub(token, "[#{identifier}]")
|
128
|
+
end
|
129
|
+
end
|
130
|
+
row[key] = anonymized_value
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'creek'
|
2
|
+
require 'write_xlsx'
|
3
|
+
require 'privacy/person'
|
4
|
+
|
5
|
+
class Privacy::Processor
|
6
|
+
attr_reader :file
|
7
|
+
|
8
|
+
DIRECTORY = 'processed'
|
9
|
+
|
10
|
+
def initialize(file)
|
11
|
+
@file = file
|
12
|
+
process unless @file.start_with? '~'
|
13
|
+
end
|
14
|
+
|
15
|
+
protected
|
16
|
+
|
17
|
+
def process
|
18
|
+
puts "Processing #{@file}"
|
19
|
+
make_directory
|
20
|
+
filter_data
|
21
|
+
write_file
|
22
|
+
end
|
23
|
+
|
24
|
+
def make_directory
|
25
|
+
Dir.mkdir DIRECTORY unless File.exists? DIRECTORY
|
26
|
+
end
|
27
|
+
|
28
|
+
def filter_data
|
29
|
+
@ppmec = Privacy::Person.new 'ppmec'
|
30
|
+
@ppmec.identify_columns sheet.rows.first
|
31
|
+
@ppvic = Privacy::Person.new 'ppvic'
|
32
|
+
@ppvic.identify_columns sheet.rows.first
|
33
|
+
sheet.rows.first.each do |key, value|
|
34
|
+
worksheet.write key, value
|
35
|
+
end
|
36
|
+
sheet.rows.each_with_index do |row, index|
|
37
|
+
next if index.zero?
|
38
|
+
@ppmec.anonymize row, index
|
39
|
+
@ppvic.anonymize row, index
|
40
|
+
row.each do |key, value|
|
41
|
+
worksheet.write key, value
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def write_file
|
47
|
+
workbook.close
|
48
|
+
end
|
49
|
+
|
50
|
+
# New file
|
51
|
+
# http://cxn03651.github.io/write_xlsx/index.html
|
52
|
+
|
53
|
+
def workbook
|
54
|
+
@workbook ||= WriteXLSX.new processed_file_name
|
55
|
+
end
|
56
|
+
|
57
|
+
def worksheet
|
58
|
+
@worksheet ||= workbook.add_worksheet
|
59
|
+
end
|
60
|
+
|
61
|
+
def processed_file_name
|
62
|
+
"#{DIRECTORY}/#{file}"
|
63
|
+
end
|
64
|
+
|
65
|
+
# Current file
|
66
|
+
# https://github.com/pythonicrubyist/creek
|
67
|
+
|
68
|
+
def path
|
69
|
+
@path ||= File.expand_path file
|
70
|
+
end
|
71
|
+
|
72
|
+
def data
|
73
|
+
@data ||= Creek::Book.new path, with_headers: true
|
74
|
+
end
|
75
|
+
|
76
|
+
def sheet
|
77
|
+
@sheet ||= data.sheets[0]
|
78
|
+
end
|
79
|
+
end
|
data/lib/privacy/version.rb
CHANGED
data/privacy.gemspec
CHANGED
@@ -1,29 +1,31 @@
|
|
1
1
|
require_relative 'lib/privacy/version'
|
2
2
|
|
3
|
-
Gem::Specification.new do |
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = "privacy"
|
5
|
+
s.version = Privacy::VERSION
|
6
|
+
s.authors = ["Arnaud Levy"]
|
7
|
+
s.email = ["contact@arnaudlevy.com"]
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
s.summary = "Privacy removes personal data from a xls file"
|
10
|
+
s.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
|
11
|
+
s.homepage = "https://github.com/arnaudlevy/privacy"
|
12
|
+
s.license = "MIT"
|
13
|
+
s.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
|
14
14
|
|
15
|
-
#
|
15
|
+
# s.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
|
16
16
|
|
17
|
-
|
18
|
-
|
17
|
+
s.metadata["homepage_uri"] = s.homepage
|
18
|
+
s.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
|
19
19
|
|
20
20
|
# Specify which files should be added to the gem when it is released.
|
21
21
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
22
|
-
|
23
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|
|
22
|
+
s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
23
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|s|features)/}) }
|
24
24
|
end
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
25
|
+
s.bindir = "exe"
|
26
|
+
s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
s.require_paths = ["lib"]
|
28
|
+
s.add_dependency "thor"
|
29
|
+
s.add_dependency "creek"
|
30
|
+
s.add_dependency "write_xlsx"
|
29
31
|
end
|
metadata
CHANGED
@@ -1,29 +1,57 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: privacy
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arnaud Levy
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0
|
19
|
+
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: creek
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: write_xlsx
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
27
55
|
description: Remove first name and last name from xls files, replace them with an
|
28
56
|
anonymous placeholder
|
29
57
|
email:
|
@@ -33,6 +61,7 @@ executables:
|
|
33
61
|
extensions: []
|
34
62
|
extra_rdoc_files: []
|
35
63
|
files:
|
64
|
+
- ".DS_Store"
|
36
65
|
- ".gitignore"
|
37
66
|
- CODE_OF_CONDUCT.md
|
38
67
|
- Gemfile
|
@@ -44,6 +73,8 @@ files:
|
|
44
73
|
- bin/setup
|
45
74
|
- exe/privacy
|
46
75
|
- lib/privacy.rb
|
76
|
+
- lib/privacy/person.rb
|
77
|
+
- lib/privacy/processor.rb
|
47
78
|
- lib/privacy/version.rb
|
48
79
|
- privacy.gemspec
|
49
80
|
homepage: https://github.com/arnaudlevy/privacy
|