privacy 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8be253d2d26c29d1d4f5145621bafad66858146e1e2e1e311f2a97b3a5f56479
4
- data.tar.gz: b20aaaa204b7b1bb814c6a42bb32105b2236641d381048941dae452f9db69362
3
+ metadata.gz: e388db32406558d4970fab62d76fb4bf8e3a6cccbddf3cfd6f5a47b0a3af160d
4
+ data.tar.gz: 8358c360c06bb0386c768e283ffb123f14030b81bc96f79f4c3cb4dfec51f2f9
5
5
  SHA512:
6
- metadata.gz: 1ec29124dac0e2b84f2bfbb0a2625f9441e43f51f4416c62b606cbf5b91a5d6cda846caf629b3b6cfbbb1989e71ec2321479d37b557423cba9995a95e5808184
7
- data.tar.gz: 49802f38af22e86afb99ab8c628f2f367415116bcc4301e9757fc14d7f54ed339338e3c4356dffbab4b3c672b625cf59d1e52c0ce65c9c1eb8ccb44189f0c560
6
+ metadata.gz: 8e8a6eadc68c458d508189c35ec34bdfa245d18159755c075cace122029a5c899f589134fc4156618120c8ace5d49a7ca26f770b6125695b5a6a91956cc0454a
7
+ data.tar.gz: 8a05cd62e6c983aafe69c9dec2a546629f9f2126c2ab25f78fbbe29dbfef6bd2825eae08abcac149fa28be7b64858cfc49eed3302a30c27c176ef2fffa20ca97
Binary file
data/README.md CHANGED
@@ -32,6 +32,8 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
32
32
 
33
33
  To release a new version, change version number and `rake release`.
34
34
 
35
+ To work on the code, use `bundle exec privacy process`.
36
+
35
37
  ## Contributing
36
38
 
37
39
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/privacy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/privacy/blob/master/CODE_OF_CONDUCT.md).
@@ -0,0 +1,133 @@
1
+ class Privacy::Person
2
+ attr_reader :identifier, :row, :index, :columns, :data, :combinations, :initials
3
+
4
+ COLUMNS_TO_ANONYMIZE = [
5
+ 'Nom _identifier_',
6
+ 'Prénom _identifier_',
7
+ 'Date de naissance _identifier_',
8
+ 'Lieu naissance Pays _identifier_',
9
+ 'Lieu naissance Ville _identifier_',
10
+ 'Lieu naissance Département _identifier_',
11
+ 'Sexe _identifier_',
12
+ 'Nationalité _identifier_',
13
+ 'Adresse _identifier_ [Date]',
14
+ 'Adresse _identifier_ [Département]',
15
+ 'Adresse _identifier_ [Ville]',
16
+ 'Adresse _identifier_ [Pays]',
17
+ 'Profession _identifier_',
18
+ 'Date profession _identifier_'
19
+ ]
20
+
21
+ INITIALS_SEPARATORS = [
22
+ '',
23
+ '.'
24
+ ]
25
+
26
+ def initialize(identifier)
27
+ @identifier = identifier
28
+ puts "Identify #{identifier}"
29
+ end
30
+
31
+ def identify_columns(row)
32
+ @columns = {}
33
+ row.each do |key, value|
34
+ COLUMNS_TO_ANONYMIZE.each do |column|
35
+ name = column_with_identifier column
36
+ if value == name
37
+ letter = key.gsub '1', ''
38
+ @columns[letter] = name
39
+ puts " #{name} on column #{letter}"
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ def anonymize(row, index)
46
+ @row = row
47
+ @index = index + 1 # Excel is 1-indexed
48
+ anonymize_personal_data
49
+ build_combinations
50
+ remove_combinations
51
+ end
52
+
53
+ protected
54
+
55
+ def anonymize_personal_data
56
+ @data = {}
57
+ puts
58
+ puts "Anonymize row #{index}"
59
+ puts " #{identifier}"
60
+ @columns.each do |letter, column|
61
+ cell = "#{letter}#{index}"
62
+ value = row[cell]
63
+ next if value.nil?
64
+ @data[column] = value
65
+ puts " #{value} (#{column})"
66
+ row[cell] = "[#{column}]"
67
+ end
68
+ end
69
+
70
+ def build_combinations
71
+ puts 'build_combinations'
72
+ @combinations = []
73
+ first_name_column = column_with_identifier 'Prénom _identifier_'
74
+ last_name_column = column_with_identifier 'Nom _identifier_'
75
+ first_name = data[first_name_column]
76
+ last_name = data[last_name_column]
77
+ if !first_name.nil? && !last_name.nil?
78
+ initials = []
79
+ first_name.split(' ').each { |l| initials << l[0].upcase }
80
+ last_name.split(' ').each { |l| initials << l[0].upcase }
81
+ @combinations << "#{first_name} #{last_name}"
82
+ @combinations << " #{first_name} #{last_name}"
83
+ @combinations << "#{first_name} #{last_name} "
84
+ @combinations << "#{last_name} #{first_name}"
85
+ @combinations << " #{last_name} #{first_name}"
86
+ @combinations << "#{last_name} #{first_name} "
87
+ @combinations += initials_combinations(initials)
88
+ end
89
+ puts "Combinations: #{@combinations.join(', ')}"
90
+ end
91
+
92
+ def column_with_identifier(column)
93
+ column.gsub '_identifier_', identifier
94
+ end
95
+
96
+ def initials_combinations(initials)
97
+ puts "Initials for #{initials}"
98
+ combinations = []
99
+ initials.each do |letter|
100
+ initials.shift
101
+ recursive_combinations = initials_combinations initials
102
+ INITIALS_SEPARATORS.each do |separator|
103
+ initial_with_separator = "#{letter}#{separator}"
104
+ if recursive_combinations.none?
105
+ combinations << "#{initial_with_separator}"
106
+ combinations << "#{initial_with_separator} "
107
+ combinations << " #{initial_with_separator}"
108
+ else
109
+ recursive_combinations.each do |c|
110
+ combinations << "#{initial_with_separator}#{c}"
111
+ combinations << " #{initial_with_separator}#{c}"
112
+ combinations << "#{initial_with_separator} #{c}"
113
+ combinations << "#{initial_with_separator} #{c} "
114
+ combinations << "#{initial_with_separator}#{c} "
115
+ end
116
+ end
117
+ end
118
+ end
119
+ combinations.uniq
120
+ end
121
+
122
+ def remove_combinations
123
+ row.each do |key, value|
124
+ anonymized_value = value
125
+ if value.is_a? String
126
+ combinations.each do |token|
127
+ anonymized_value = anonymized_value.gsub(token, "[#{identifier}]")
128
+ end
129
+ end
130
+ row[key] = anonymized_value
131
+ end
132
+ end
133
+ end
@@ -1,14 +1,21 @@
1
1
  require 'creek'
2
2
  require 'write_xlsx'
3
+ require 'privacy/person'
3
4
 
4
5
  class Privacy::Processor
5
6
  attr_reader :file
6
7
 
7
8
  DIRECTORY = 'processed'
9
+ PRIVATE_DATA = {
10
+ 'Nom ppmec': '[Nom]',
11
+ 'Prénom ppmec': '[Prénom]',
12
+ 'Nom ppvic': '[Nom]',
13
+ 'Prénom ppvic': '[Prénom]'
14
+ }
8
15
 
9
16
  def initialize(file)
10
17
  @file = file
11
- process
18
+ process unless @file.start_with? '~'
12
19
  end
13
20
 
14
21
  protected
@@ -25,7 +32,17 @@ class Privacy::Processor
25
32
  end
26
33
 
27
34
  def filter_data
28
- sheet.rows.each do |row|
35
+ @ppmec = Privacy::Person.new 'ppmec'
36
+ @ppmec.identify_columns sheet.rows.first
37
+ @ppvic = Privacy::Person.new 'ppvic'
38
+ @ppvic.identify_columns sheet.rows.first
39
+ sheet.rows.first.each do |key, value|
40
+ worksheet.write key, value
41
+ end
42
+ sheet.rows.each_with_index do |row, index|
43
+ next if index.zero?
44
+ @ppmec.anonymize row, index
45
+ @ppvic.anonymize row, index
29
46
  row.each do |key, value|
30
47
  worksheet.write key, value
31
48
  end
@@ -59,7 +76,7 @@ class Privacy::Processor
59
76
  end
60
77
 
61
78
  def data
62
- @data ||= Creek::Book.new path
79
+ @data ||= Creek::Book.new path, with_headers: true
63
80
  end
64
81
 
65
82
  def sheet
@@ -1,3 +1,3 @@
1
1
  module Privacy
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
@@ -1,31 +1,31 @@
1
1
  require_relative 'lib/privacy/version'
2
2
 
3
- Gem::Specification.new do |spec|
4
- spec.name = "privacy"
5
- spec.version = Privacy::VERSION
6
- spec.authors = ["Arnaud Levy"]
7
- spec.email = ["contact@arnaudlevy.com"]
3
+ Gem::Specification.new do |s|
4
+ s.name = "privacy"
5
+ s.version = Privacy::VERSION
6
+ s.authors = ["Arnaud Levy"]
7
+ s.email = ["contact@arnaudlevy.com"]
8
8
 
9
- spec.summary = "Privacy removes personal data from a xls file"
10
- spec.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
11
- spec.homepage = "https://github.com/arnaudlevy/privacy"
12
- spec.license = "MIT"
13
- spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
9
+ s.summary = "Privacy removes personal data from a xls file"
10
+ s.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
11
+ s.homepage = "https://github.com/arnaudlevy/privacy"
12
+ s.license = "MIT"
13
+ s.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
14
 
15
- # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
15
+ # s.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
16
16
 
17
- spec.metadata["homepage_uri"] = spec.homepage
18
- spec.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
17
+ s.metadata["homepage_uri"] = s.homepage
18
+ s.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
19
19
 
20
20
  # Specify which files should be added to the gem when it is released.
21
21
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|s|features)/}) }
24
24
  end
25
- spec.bindir = "exe"
26
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
- spec.require_paths = ["lib"]
28
- spec.add_dependency "thor"
29
- spec.add_dependency "creek"
30
- spec.add_dependency "write_xlsx"
25
+ s.bindir = "exe"
26
+ s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ s.require_paths = ["lib"]
28
+ s.add_dependency "thor"
29
+ s.add_dependency "creek"
30
+ s.add_dependency "write_xlsx"
31
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: privacy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-28 00:00:00.000000000 Z
11
+ date: 2020-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -61,6 +61,7 @@ executables:
61
61
  extensions: []
62
62
  extra_rdoc_files: []
63
63
  files:
64
+ - ".DS_Store"
64
65
  - ".gitignore"
65
66
  - CODE_OF_CONDUCT.md
66
67
  - Gemfile
@@ -72,6 +73,7 @@ files:
72
73
  - bin/setup
73
74
  - exe/privacy
74
75
  - lib/privacy.rb
76
+ - lib/privacy/person.rb
75
77
  - lib/privacy/processor.rb
76
78
  - lib/privacy/version.rb
77
79
  - privacy.gemspec