privacy 0.1.0 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e43f59881052bff4df200d87ea0058f3c1400235d7bbc947dcde15727b1cd48
4
- data.tar.gz: 57fd3cf9d93c96980faa238f0a2611ceffed53782acaf34d981b76d3b6552877
3
+ metadata.gz: 70f75e3ceaea1bcf698189b550f06b19898d813fb1f6c69ce47bb3c2770f7eb9
4
+ data.tar.gz: d234953c1819d6ae303cabc4239be5b06be7945b0fdf3de2b8091859a440e961
5
5
  SHA512:
6
- metadata.gz: 70766c9d2aa81bdd286cdbae07f8080ecb19996b50285b66d0a1e846f8c457409f425ba01d16d9f213ce76604e0d244fb151c17572399806fa3b047b4b137161
7
- data.tar.gz: 334375cc60576731aaf10c28a7409d61fd3f35f39dd645515fffba4f85cca2cd5d29ee8fcd210e18996099b7e53b10007c7cdbfa1b98983eb186f20300447f1e
6
+ metadata.gz: 542f23027312376c876cdccab5fe24de7e7168becf3f842ce143eb5bf892c876089845168dc2fd99033f59edf0b018125a2f5e1fe386f59844f876f2c6330644
7
+ data.tar.gz: 7d1a71839a9b8abe462579f73eaf91ac0da26ae12cad07940d495d56f35b697d1b1a4290d3a92e55c4e13ed73a016ab12a6414543971e7dcb5dd9ae12f0396aa
Binary file
data/.gitignore CHANGED
@@ -6,3 +6,6 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ *.xlsx
10
+ *.xls
11
+ *.csv
@@ -1,12 +1,28 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- privacy (0.1.0)
4
+ privacy (0.1.4)
5
+ creek
6
+ thor
7
+ write_xlsx
5
8
 
6
9
  GEM
7
10
  remote: https://rubygems.org/
8
11
  specs:
12
+ creek (2.5.2)
13
+ nokogiri (>= 1.10.0)
14
+ rubyzip (>= 1.0.0)
15
+ mini_portile2 (2.4.0)
16
+ nokogiri (1.10.9)
17
+ mini_portile2 (~> 2.4.0)
9
18
  rake (12.3.3)
19
+ rubyzip (2.3.0)
20
+ thor (1.0.1)
21
+ write_xlsx (0.85.7)
22
+ rubyzip (>= 1.0.0)
23
+ zip-zip
24
+ zip-zip (0.3)
25
+ rubyzip (>= 1.0.0)
10
26
 
11
27
  PLATFORMS
12
28
  ruby
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # Privacy
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/privacy`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
3
+ Privacy removes personal data from a xls file.
6
4
 
7
5
  ## Installation
8
6
 
@@ -22,9 +20,9 @@ Or install it yourself as:
22
20
 
23
21
  ## Usage
24
22
 
25
- ```
26
- privacy process ./file.xlsx
27
- ```
23
+ Process all excel files in current directory `privacy process`.
24
+
25
+ Process a specific file in current directory `privacy process /file.xlsx`.
28
26
 
29
27
  ## Development
30
28
 
@@ -32,6 +30,10 @@ After checking out the repo, run `bin/setup` to install dependencies. You can al
32
30
 
33
31
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
34
32
 
33
+ To release a new version, change version number and `rake release`.
34
+
35
+ To work on the code, use `bundle exec privacy process`.
36
+
35
37
  ## Contributing
36
38
 
37
39
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/privacy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/privacy/blob/master/CODE_OF_CONDUCT.md).
@@ -1,11 +1,13 @@
1
1
  require 'privacy/version'
2
+ require 'privacy/processor'
2
3
  require 'thor'
3
4
 
4
5
  module Privacy
5
6
  class CLI < Thor
6
7
  desc "process [file]", "Make data private"
7
- def process(file)
8
- puts file
8
+ def process(file = nil)
9
+ file.nil? ? Dir.glob("*.xlsx") { |file| Processor.new(file) }
10
+ : Processor.new(file)
9
11
  end
10
12
  end
11
13
  end
@@ -0,0 +1,133 @@
1
+ class Privacy::Person
2
+ attr_reader :identifier, :row, :index, :columns, :data, :combinations, :initials
3
+
4
+ COLUMNS_TO_ANONYMIZE = [
5
+ 'Nom _identifier_',
6
+ 'Prénom _identifier_',
7
+ 'Date de naissance _identifier_',
8
+ 'Lieu naissance Pays _identifier_',
9
+ 'Lieu naissance Ville _identifier_',
10
+ 'Lieu naissance Département _identifier_',
11
+ 'Sexe _identifier_',
12
+ 'Nationalité _identifier_',
13
+ 'Adresse _identifier_ [Date]',
14
+ 'Adresse _identifier_ [Département]',
15
+ 'Adresse _identifier_ [Ville]',
16
+ 'Adresse _identifier_ [Pays]',
17
+ 'Profession _identifier_',
18
+ 'Date profession _identifier_'
19
+ ]
20
+
21
+ INITIALS_SEPARATORS = [
22
+ '',
23
+ '.'
24
+ ]
25
+
26
+ def initialize(identifier)
27
+ @identifier = identifier
28
+ puts "Identify #{identifier}"
29
+ end
30
+
31
+ def identify_columns(row)
32
+ @columns = {}
33
+ row.each do |key, value|
34
+ COLUMNS_TO_ANONYMIZE.each do |column|
35
+ name = column_with_identifier column
36
+ if value == name
37
+ letter = key.gsub '1', ''
38
+ @columns[letter] = name
39
+ puts " #{name} on column #{letter}"
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ def anonymize(row, index)
46
+ @row = row
47
+ @index = index + 1 # Excel is 1-indexed
48
+ anonymize_personal_data
49
+ build_combinations
50
+ remove_combinations
51
+ end
52
+
53
+ protected
54
+
55
+ def anonymize_personal_data
56
+ @data = {}
57
+ puts
58
+ puts "Anonymize row #{index}"
59
+ puts " #{identifier}"
60
+ @columns.each do |letter, column|
61
+ cell = "#{letter}#{index}"
62
+ value = row[cell]
63
+ next if value.nil?
64
+ @data[column] = value
65
+ puts " #{value} (#{column})"
66
+ row[cell] = "[#{column}]"
67
+ end
68
+ end
69
+
70
+ def build_combinations
71
+ puts 'build_combinations'
72
+ @combinations = []
73
+ first_name_column = column_with_identifier 'Prénom _identifier_'
74
+ last_name_column = column_with_identifier 'Nom _identifier_'
75
+ first_name = data[first_name_column]
76
+ last_name = data[last_name_column]
77
+ if !first_name.nil? && !last_name.nil?
78
+ initials = []
79
+ first_name.split(' ').each { |l| initials << l[0].upcase }
80
+ last_name.split(' ').each { |l| initials << l[0].upcase }
81
+ @combinations << "#{first_name} #{last_name}"
82
+ @combinations << " #{first_name} #{last_name}"
83
+ @combinations << "#{first_name} #{last_name} "
84
+ @combinations << "#{last_name} #{first_name}"
85
+ @combinations << " #{last_name} #{first_name}"
86
+ @combinations << "#{last_name} #{first_name} "
87
+ @combinations += initials_combinations(initials)
88
+ end
89
+ puts "Combinations: #{@combinations.join(', ')}"
90
+ end
91
+
92
+ def column_with_identifier(column)
93
+ column.gsub '_identifier_', identifier
94
+ end
95
+
96
+ def initials_combinations(initials)
97
+ puts "Initials for #{initials}"
98
+ combinations = []
99
+ initials.each do |letter|
100
+ initials.shift
101
+ recursive_combinations = initials_combinations initials
102
+ INITIALS_SEPARATORS.each do |separator|
103
+ initial_with_separator = "#{letter}#{separator}"
104
+ if recursive_combinations.none?
105
+ combinations << "#{initial_with_separator}"
106
+ combinations << "#{initial_with_separator} "
107
+ combinations << " #{initial_with_separator}"
108
+ else
109
+ recursive_combinations.each do |c|
110
+ combinations << "#{initial_with_separator}#{c}"
111
+ combinations << " #{initial_with_separator}#{c}"
112
+ combinations << "#{initial_with_separator} #{c}"
113
+ combinations << "#{initial_with_separator} #{c} "
114
+ combinations << "#{initial_with_separator}#{c} "
115
+ end
116
+ end
117
+ end
118
+ end
119
+ combinations.uniq
120
+ end
121
+
122
+ def remove_combinations
123
+ row.each do |key, value|
124
+ anonymized_value = value
125
+ if value.is_a? String
126
+ combinations.each do |token|
127
+ anonymized_value = anonymized_value.gsub(token, "[#{identifier}]")
128
+ end
129
+ end
130
+ row[key] = anonymized_value
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,79 @@
1
+ require 'creek'
2
+ require 'write_xlsx'
3
+ require 'privacy/person'
4
+
5
+ class Privacy::Processor
6
+ attr_reader :file
7
+
8
+ DIRECTORY = 'processed'
9
+
10
+ def initialize(file)
11
+ @file = file
12
+ process unless @file.start_with? '~'
13
+ end
14
+
15
+ protected
16
+
17
+ def process
18
+ puts "Processing #{@file}"
19
+ make_directory
20
+ filter_data
21
+ write_file
22
+ end
23
+
24
+ def make_directory
25
+ Dir.mkdir DIRECTORY unless File.exists? DIRECTORY
26
+ end
27
+
28
+ def filter_data
29
+ @ppmec = Privacy::Person.new 'ppmec'
30
+ @ppmec.identify_columns sheet.rows.first
31
+ @ppvic = Privacy::Person.new 'ppvic'
32
+ @ppvic.identify_columns sheet.rows.first
33
+ sheet.rows.first.each do |key, value|
34
+ worksheet.write key, value
35
+ end
36
+ sheet.rows.each_with_index do |row, index|
37
+ next if index.zero?
38
+ @ppmec.anonymize row, index
39
+ @ppvic.anonymize row, index
40
+ row.each do |key, value|
41
+ worksheet.write key, value
42
+ end
43
+ end
44
+ end
45
+
46
+ def write_file
47
+ workbook.close
48
+ end
49
+
50
+ # New file
51
+ # http://cxn03651.github.io/write_xlsx/index.html
52
+
53
+ def workbook
54
+ @workbook ||= WriteXLSX.new processed_file_name
55
+ end
56
+
57
+ def worksheet
58
+ @worksheet ||= workbook.add_worksheet
59
+ end
60
+
61
+ def processed_file_name
62
+ "#{DIRECTORY}/#{file}"
63
+ end
64
+
65
+ # Current file
66
+ # https://github.com/pythonicrubyist/creek
67
+
68
+ def path
69
+ @path ||= File.expand_path file
70
+ end
71
+
72
+ def data
73
+ @data ||= Creek::Book.new path, with_headers: true
74
+ end
75
+
76
+ def sheet
77
+ @sheet ||= data.sheets[0]
78
+ end
79
+ end
@@ -1,3 +1,3 @@
1
1
  module Privacy
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -1,29 +1,31 @@
1
1
  require_relative 'lib/privacy/version'
2
2
 
3
- Gem::Specification.new do |spec|
4
- spec.name = "privacy"
5
- spec.version = Privacy::VERSION
6
- spec.authors = ["Arnaud Levy"]
7
- spec.email = ["contact@arnaudlevy.com"]
3
+ Gem::Specification.new do |s|
4
+ s.name = "privacy"
5
+ s.version = Privacy::VERSION
6
+ s.authors = ["Arnaud Levy"]
7
+ s.email = ["contact@arnaudlevy.com"]
8
8
 
9
- spec.summary = "Privacy removes personal data from a xls file"
10
- spec.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
11
- spec.homepage = "https://github.com/arnaudlevy/privacy"
12
- spec.license = "MIT"
13
- spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
9
+ s.summary = "Privacy removes personal data from a xls file"
10
+ s.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
11
+ s.homepage = "https://github.com/arnaudlevy/privacy"
12
+ s.license = "MIT"
13
+ s.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
14
 
15
- # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
15
+ # s.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
16
16
 
17
- spec.metadata["homepage_uri"] = spec.homepage
18
- spec.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
17
+ s.metadata["homepage_uri"] = s.homepage
18
+ s.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
19
19
 
20
20
  # Specify which files should be added to the gem when it is released.
21
21
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|s|features)/}) }
24
24
  end
25
- spec.bindir = "exe"
26
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
- spec.require_paths = ["lib"]
28
- spec.add_dependency "thor", "~> 0.20"
25
+ s.bindir = "exe"
26
+ s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ s.require_paths = ["lib"]
28
+ s.add_dependency "thor"
29
+ s.add_dependency "creek"
30
+ s.add_dependency "write_xlsx"
29
31
  end
metadata CHANGED
@@ -1,29 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: privacy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-27 00:00:00.000000000 Z
11
+ date: 2020-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.20'
19
+ version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0.20'
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: creek
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: write_xlsx
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
27
55
  description: Remove first name and last name from xls files, replace them with an
28
56
  anonymous placeholder
29
57
  email:
@@ -33,6 +61,7 @@ executables:
33
61
  extensions: []
34
62
  extra_rdoc_files: []
35
63
  files:
64
+ - ".DS_Store"
36
65
  - ".gitignore"
37
66
  - CODE_OF_CONDUCT.md
38
67
  - Gemfile
@@ -44,6 +73,8 @@ files:
44
73
  - bin/setup
45
74
  - exe/privacy
46
75
  - lib/privacy.rb
76
+ - lib/privacy/person.rb
77
+ - lib/privacy/processor.rb
47
78
  - lib/privacy/version.rb
48
79
  - privacy.gemspec
49
80
  homepage: https://github.com/arnaudlevy/privacy