privacy 0.1.0 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8e43f59881052bff4df200d87ea0058f3c1400235d7bbc947dcde15727b1cd48
4
- data.tar.gz: 57fd3cf9d93c96980faa238f0a2611ceffed53782acaf34d981b76d3b6552877
3
+ metadata.gz: 70f75e3ceaea1bcf698189b550f06b19898d813fb1f6c69ce47bb3c2770f7eb9
4
+ data.tar.gz: d234953c1819d6ae303cabc4239be5b06be7945b0fdf3de2b8091859a440e961
5
5
  SHA512:
6
- metadata.gz: 70766c9d2aa81bdd286cdbae07f8080ecb19996b50285b66d0a1e846f8c457409f425ba01d16d9f213ce76604e0d244fb151c17572399806fa3b047b4b137161
7
- data.tar.gz: 334375cc60576731aaf10c28a7409d61fd3f35f39dd645515fffba4f85cca2cd5d29ee8fcd210e18996099b7e53b10007c7cdbfa1b98983eb186f20300447f1e
6
+ metadata.gz: 542f23027312376c876cdccab5fe24de7e7168becf3f842ce143eb5bf892c876089845168dc2fd99033f59edf0b018125a2f5e1fe386f59844f876f2c6330644
7
+ data.tar.gz: 7d1a71839a9b8abe462579f73eaf91ac0da26ae12cad07940d495d56f35b697d1b1a4290d3a92e55c4e13ed73a016ab12a6414543971e7dcb5dd9ae12f0396aa
Binary file
data/.gitignore CHANGED
@@ -6,3 +6,6 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ *.xlsx
10
+ *.xls
11
+ *.csv
@@ -1,12 +1,28 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- privacy (0.1.0)
4
+ privacy (0.1.4)
5
+ creek
6
+ thor
7
+ write_xlsx
5
8
 
6
9
  GEM
7
10
  remote: https://rubygems.org/
8
11
  specs:
12
+ creek (2.5.2)
13
+ nokogiri (>= 1.10.0)
14
+ rubyzip (>= 1.0.0)
15
+ mini_portile2 (2.4.0)
16
+ nokogiri (1.10.9)
17
+ mini_portile2 (~> 2.4.0)
9
18
  rake (12.3.3)
19
+ rubyzip (2.3.0)
20
+ thor (1.0.1)
21
+ write_xlsx (0.85.7)
22
+ rubyzip (>= 1.0.0)
23
+ zip-zip
24
+ zip-zip (0.3)
25
+ rubyzip (>= 1.0.0)
10
26
 
11
27
  PLATFORMS
12
28
  ruby
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # Privacy
2
2
 
3
- Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/privacy`. To experiment with that code, run `bin/console` for an interactive prompt.
4
-
5
- TODO: Delete this and the text above, and describe your gem
3
+ Privacy removes personal data from a xls file.
6
4
 
7
5
  ## Installation
8
6
 
@@ -22,9 +20,9 @@ Or install it yourself as:
22
20
 
23
21
  ## Usage
24
22
 
25
- ```
26
- privacy process ./file.xlsx
27
- ```
23
+ Process all excel files in current directory `privacy process`.
24
+
25
+ Process a specific file in current directory `privacy process /file.xlsx`.
28
26
 
29
27
  ## Development
30
28
 
@@ -32,6 +30,10 @@ After checking out the repo, run `bin/setup` to install dependencies. You can al
32
30
 
33
31
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
34
32
 
33
+ To release a new version, change version number and `rake release`.
34
+
35
+ To work on the code, use `bundle exec privacy process`.
36
+
35
37
  ## Contributing
36
38
 
37
39
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/privacy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/privacy/blob/master/CODE_OF_CONDUCT.md).
@@ -1,11 +1,13 @@
1
1
  require 'privacy/version'
2
+ require 'privacy/processor'
2
3
  require 'thor'
3
4
 
4
5
  module Privacy
5
6
  class CLI < Thor
6
7
  desc "process [file]", "Make data private"
7
- def process(file)
8
- puts file
8
+ def process(file = nil)
9
+ file.nil? ? Dir.glob("*.xlsx") { |file| Processor.new(file) }
10
+ : Processor.new(file)
9
11
  end
10
12
  end
11
13
  end
@@ -0,0 +1,133 @@
1
+ class Privacy::Person
2
+ attr_reader :identifier, :row, :index, :columns, :data, :combinations, :initials
3
+
4
+ COLUMNS_TO_ANONYMIZE = [
5
+ 'Nom _identifier_',
6
+ 'Prénom _identifier_',
7
+ 'Date de naissance _identifier_',
8
+ 'Lieu naissance Pays _identifier_',
9
+ 'Lieu naissance Ville _identifier_',
10
+ 'Lieu naissance Département _identifier_',
11
+ 'Sexe _identifier_',
12
+ 'Nationalité _identifier_',
13
+ 'Adresse _identifier_ [Date]',
14
+ 'Adresse _identifier_ [Département]',
15
+ 'Adresse _identifier_ [Ville]',
16
+ 'Adresse _identifier_ [Pays]',
17
+ 'Profession _identifier_',
18
+ 'Date profession _identifier_'
19
+ ]
20
+
21
+ INITIALS_SEPARATORS = [
22
+ '',
23
+ '.'
24
+ ]
25
+
26
+ def initialize(identifier)
27
+ @identifier = identifier
28
+ puts "Identify #{identifier}"
29
+ end
30
+
31
+ def identify_columns(row)
32
+ @columns = {}
33
+ row.each do |key, value|
34
+ COLUMNS_TO_ANONYMIZE.each do |column|
35
+ name = column_with_identifier column
36
+ if value == name
37
+ letter = key.gsub '1', ''
38
+ @columns[letter] = name
39
+ puts " #{name} on column #{letter}"
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ def anonymize(row, index)
46
+ @row = row
47
+ @index = index + 1 # Excel is 1-indexed
48
+ anonymize_personal_data
49
+ build_combinations
50
+ remove_combinations
51
+ end
52
+
53
+ protected
54
+
55
+ def anonymize_personal_data
56
+ @data = {}
57
+ puts
58
+ puts "Anonymize row #{index}"
59
+ puts " #{identifier}"
60
+ @columns.each do |letter, column|
61
+ cell = "#{letter}#{index}"
62
+ value = row[cell]
63
+ next if value.nil?
64
+ @data[column] = value
65
+ puts " #{value} (#{column})"
66
+ row[cell] = "[#{column}]"
67
+ end
68
+ end
69
+
70
+ def build_combinations
71
+ puts 'build_combinations'
72
+ @combinations = []
73
+ first_name_column = column_with_identifier 'Prénom _identifier_'
74
+ last_name_column = column_with_identifier 'Nom _identifier_'
75
+ first_name = data[first_name_column]
76
+ last_name = data[last_name_column]
77
+ if !first_name.nil? && !last_name.nil?
78
+ initials = []
79
+ first_name.split(' ').each { |l| initials << l[0].upcase }
80
+ last_name.split(' ').each { |l| initials << l[0].upcase }
81
+ @combinations << "#{first_name} #{last_name}"
82
+ @combinations << " #{first_name} #{last_name}"
83
+ @combinations << "#{first_name} #{last_name} "
84
+ @combinations << "#{last_name} #{first_name}"
85
+ @combinations << " #{last_name} #{first_name}"
86
+ @combinations << "#{last_name} #{first_name} "
87
+ @combinations += initials_combinations(initials)
88
+ end
89
+ puts "Combinations: #{@combinations.join(', ')}"
90
+ end
91
+
92
+ def column_with_identifier(column)
93
+ column.gsub '_identifier_', identifier
94
+ end
95
+
96
+ def initials_combinations(initials)
97
+ puts "Initials for #{initials}"
98
+ combinations = []
99
+ initials.each do |letter|
100
+ initials.shift
101
+ recursive_combinations = initials_combinations initials
102
+ INITIALS_SEPARATORS.each do |separator|
103
+ initial_with_separator = "#{letter}#{separator}"
104
+ if recursive_combinations.none?
105
+ combinations << "#{initial_with_separator}"
106
+ combinations << "#{initial_with_separator} "
107
+ combinations << " #{initial_with_separator}"
108
+ else
109
+ recursive_combinations.each do |c|
110
+ combinations << "#{initial_with_separator}#{c}"
111
+ combinations << " #{initial_with_separator}#{c}"
112
+ combinations << "#{initial_with_separator} #{c}"
113
+ combinations << "#{initial_with_separator} #{c} "
114
+ combinations << "#{initial_with_separator}#{c} "
115
+ end
116
+ end
117
+ end
118
+ end
119
+ combinations.uniq
120
+ end
121
+
122
+ def remove_combinations
123
+ row.each do |key, value|
124
+ anonymized_value = value
125
+ if value.is_a? String
126
+ combinations.each do |token|
127
+ anonymized_value = anonymized_value.gsub(token, "[#{identifier}]")
128
+ end
129
+ end
130
+ row[key] = anonymized_value
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,79 @@
1
+ require 'creek'
2
+ require 'write_xlsx'
3
+ require 'privacy/person'
4
+
5
+ class Privacy::Processor
6
+ attr_reader :file
7
+
8
+ DIRECTORY = 'processed'
9
+
10
+ def initialize(file)
11
+ @file = file
12
+ process unless @file.start_with? '~'
13
+ end
14
+
15
+ protected
16
+
17
+ def process
18
+ puts "Processing #{@file}"
19
+ make_directory
20
+ filter_data
21
+ write_file
22
+ end
23
+
24
+ def make_directory
25
+ Dir.mkdir DIRECTORY unless File.exists? DIRECTORY
26
+ end
27
+
28
+ def filter_data
29
+ @ppmec = Privacy::Person.new 'ppmec'
30
+ @ppmec.identify_columns sheet.rows.first
31
+ @ppvic = Privacy::Person.new 'ppvic'
32
+ @ppvic.identify_columns sheet.rows.first
33
+ sheet.rows.first.each do |key, value|
34
+ worksheet.write key, value
35
+ end
36
+ sheet.rows.each_with_index do |row, index|
37
+ next if index.zero?
38
+ @ppmec.anonymize row, index
39
+ @ppvic.anonymize row, index
40
+ row.each do |key, value|
41
+ worksheet.write key, value
42
+ end
43
+ end
44
+ end
45
+
46
+ def write_file
47
+ workbook.close
48
+ end
49
+
50
+ # New file
51
+ # http://cxn03651.github.io/write_xlsx/index.html
52
+
53
+ def workbook
54
+ @workbook ||= WriteXLSX.new processed_file_name
55
+ end
56
+
57
+ def worksheet
58
+ @worksheet ||= workbook.add_worksheet
59
+ end
60
+
61
+ def processed_file_name
62
+ "#{DIRECTORY}/#{file}"
63
+ end
64
+
65
+ # Current file
66
+ # https://github.com/pythonicrubyist/creek
67
+
68
+ def path
69
+ @path ||= File.expand_path file
70
+ end
71
+
72
+ def data
73
+ @data ||= Creek::Book.new path, with_headers: true
74
+ end
75
+
76
+ def sheet
77
+ @sheet ||= data.sheets[0]
78
+ end
79
+ end
@@ -1,3 +1,3 @@
1
1
  module Privacy
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -1,29 +1,31 @@
1
1
  require_relative 'lib/privacy/version'
2
2
 
3
- Gem::Specification.new do |spec|
4
- spec.name = "privacy"
5
- spec.version = Privacy::VERSION
6
- spec.authors = ["Arnaud Levy"]
7
- spec.email = ["contact@arnaudlevy.com"]
3
+ Gem::Specification.new do |s|
4
+ s.name = "privacy"
5
+ s.version = Privacy::VERSION
6
+ s.authors = ["Arnaud Levy"]
7
+ s.email = ["contact@arnaudlevy.com"]
8
8
 
9
- spec.summary = "Privacy removes personal data from a xls file"
10
- spec.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
11
- spec.homepage = "https://github.com/arnaudlevy/privacy"
12
- spec.license = "MIT"
13
- spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
9
+ s.summary = "Privacy removes personal data from a xls file"
10
+ s.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
11
+ s.homepage = "https://github.com/arnaudlevy/privacy"
12
+ s.license = "MIT"
13
+ s.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
14
 
15
- # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
15
+ # s.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
16
16
 
17
- spec.metadata["homepage_uri"] = spec.homepage
18
- spec.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
17
+ s.metadata["homepage_uri"] = s.homepage
18
+ s.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
19
19
 
20
20
  # Specify which files should be added to the gem when it is released.
21
21
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|s|features)/}) }
24
24
  end
25
- spec.bindir = "exe"
26
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
- spec.require_paths = ["lib"]
28
- spec.add_dependency "thor", "~> 0.20"
25
+ s.bindir = "exe"
26
+ s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ s.require_paths = ["lib"]
28
+ s.add_dependency "thor"
29
+ s.add_dependency "creek"
30
+ s.add_dependency "write_xlsx"
29
31
  end
metadata CHANGED
@@ -1,29 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: privacy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-27 00:00:00.000000000 Z
11
+ date: 2020-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '0.20'
19
+ version: '0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '0.20'
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: creek
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: write_xlsx
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
27
55
  description: Remove first name and last name from xls files, replace them with an
28
56
  anonymous placeholder
29
57
  email:
@@ -33,6 +61,7 @@ executables:
33
61
  extensions: []
34
62
  extra_rdoc_files: []
35
63
  files:
64
+ - ".DS_Store"
36
65
  - ".gitignore"
37
66
  - CODE_OF_CONDUCT.md
38
67
  - Gemfile
@@ -44,6 +73,8 @@ files:
44
73
  - bin/setup
45
74
  - exe/privacy
46
75
  - lib/privacy.rb
76
+ - lib/privacy/person.rb
77
+ - lib/privacy/processor.rb
47
78
  - lib/privacy/version.rb
48
79
  - privacy.gemspec
49
80
  homepage: https://github.com/arnaudlevy/privacy