privacy 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8be253d2d26c29d1d4f5145621bafad66858146e1e2e1e311f2a97b3a5f56479
4
- data.tar.gz: b20aaaa204b7b1bb814c6a42bb32105b2236641d381048941dae452f9db69362
3
+ metadata.gz: e388db32406558d4970fab62d76fb4bf8e3a6cccbddf3cfd6f5a47b0a3af160d
4
+ data.tar.gz: 8358c360c06bb0386c768e283ffb123f14030b81bc96f79f4c3cb4dfec51f2f9
5
5
  SHA512:
6
- metadata.gz: 1ec29124dac0e2b84f2bfbb0a2625f9441e43f51f4416c62b606cbf5b91a5d6cda846caf629b3b6cfbbb1989e71ec2321479d37b557423cba9995a95e5808184
7
- data.tar.gz: 49802f38af22e86afb99ab8c628f2f367415116bcc4301e9757fc14d7f54ed339338e3c4356dffbab4b3c672b625cf59d1e52c0ce65c9c1eb8ccb44189f0c560
6
+ metadata.gz: 8e8a6eadc68c458d508189c35ec34bdfa245d18159755c075cace122029a5c899f589134fc4156618120c8ace5d49a7ca26f770b6125695b5a6a91956cc0454a
7
+ data.tar.gz: 8a05cd62e6c983aafe69c9dec2a546629f9f2126c2ab25f78fbbe29dbfef6bd2825eae08abcac149fa28be7b64858cfc49eed3302a30c27c176ef2fffa20ca97
Binary file
data/README.md CHANGED
@@ -32,6 +32,8 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
32
32
 
33
33
  To release a new version, change version number and `rake release`.
34
34
 
35
+ To work on the code, use `bundle exec privacy process`.
36
+
35
37
  ## Contributing
36
38
 
37
39
  Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/privacy. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/[USERNAME]/privacy/blob/master/CODE_OF_CONDUCT.md).
@@ -0,0 +1,133 @@
1
+ class Privacy::Person
2
+ attr_reader :identifier, :row, :index, :columns, :data, :combinations, :initials
3
+
4
+ COLUMNS_TO_ANONYMIZE = [
5
+ 'Nom _identifier_',
6
+ 'Prénom _identifier_',
7
+ 'Date de naissance _identifier_',
8
+ 'Lieu naissance Pays _identifier_',
9
+ 'Lieu naissance Ville _identifier_',
10
+ 'Lieu naissance Département _identifier_',
11
+ 'Sexe _identifier_',
12
+ 'Nationalité _identifier_',
13
+ 'Adresse _identifier_ [Date]',
14
+ 'Adresse _identifier_ [Département]',
15
+ 'Adresse _identifier_ [Ville]',
16
+ 'Adresse _identifier_ [Pays]',
17
+ 'Profession _identifier_',
18
+ 'Date profession _identifier_'
19
+ ]
20
+
21
+ INITIALS_SEPARATORS = [
22
+ '',
23
+ '.'
24
+ ]
25
+
26
+ def initialize(identifier)
27
+ @identifier = identifier
28
+ puts "Identify #{identifier}"
29
+ end
30
+
31
+ def identify_columns(row)
32
+ @columns = {}
33
+ row.each do |key, value|
34
+ COLUMNS_TO_ANONYMIZE.each do |column|
35
+ name = column_with_identifier column
36
+ if value == name
37
+ letter = key.gsub '1', ''
38
+ @columns[letter] = name
39
+ puts " #{name} on column #{letter}"
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ def anonymize(row, index)
46
+ @row = row
47
+ @index = index + 1 # Excel is 1-indexed
48
+ anonymize_personal_data
49
+ build_combinations
50
+ remove_combinations
51
+ end
52
+
53
+ protected
54
+
55
+ def anonymize_personal_data
56
+ @data = {}
57
+ puts
58
+ puts "Anonymize row #{index}"
59
+ puts " #{identifier}"
60
+ @columns.each do |letter, column|
61
+ cell = "#{letter}#{index}"
62
+ value = row[cell]
63
+ next if value.nil?
64
+ @data[column] = value
65
+ puts " #{value} (#{column})"
66
+ row[cell] = "[#{column}]"
67
+ end
68
+ end
69
+
70
+ def build_combinations
71
+ puts 'build_combinations'
72
+ @combinations = []
73
+ first_name_column = column_with_identifier 'Prénom _identifier_'
74
+ last_name_column = column_with_identifier 'Nom _identifier_'
75
+ first_name = data[first_name_column]
76
+ last_name = data[last_name_column]
77
+ if !first_name.nil? && !last_name.nil?
78
+ initials = []
79
+ first_name.split(' ').each { |l| initials << l[0].upcase }
80
+ last_name.split(' ').each { |l| initials << l[0].upcase }
81
+ @combinations << "#{first_name} #{last_name}"
82
+ @combinations << " #{first_name} #{last_name}"
83
+ @combinations << "#{first_name} #{last_name} "
84
+ @combinations << "#{last_name} #{first_name}"
85
+ @combinations << " #{last_name} #{first_name}"
86
+ @combinations << "#{last_name} #{first_name} "
87
+ @combinations += initials_combinations(initials)
88
+ end
89
+ puts "Combinations: #{@combinations.join(', ')}"
90
+ end
91
+
92
+ def column_with_identifier(column)
93
+ column.gsub '_identifier_', identifier
94
+ end
95
+
96
+ def initials_combinations(initials)
97
+ puts "Initials for #{initials}"
98
+ combinations = []
99
+ initials.each do |letter|
100
+ initials.shift
101
+ recursive_combinations = initials_combinations initials
102
+ INITIALS_SEPARATORS.each do |separator|
103
+ initial_with_separator = "#{letter}#{separator}"
104
+ if recursive_combinations.none?
105
+ combinations << "#{initial_with_separator}"
106
+ combinations << "#{initial_with_separator} "
107
+ combinations << " #{initial_with_separator}"
108
+ else
109
+ recursive_combinations.each do |c|
110
+ combinations << "#{initial_with_separator}#{c}"
111
+ combinations << " #{initial_with_separator}#{c}"
112
+ combinations << "#{initial_with_separator} #{c}"
113
+ combinations << "#{initial_with_separator} #{c} "
114
+ combinations << "#{initial_with_separator}#{c} "
115
+ end
116
+ end
117
+ end
118
+ end
119
+ combinations.uniq
120
+ end
121
+
122
+ def remove_combinations
123
+ row.each do |key, value|
124
+ anonymized_value = value
125
+ if value.is_a? String
126
+ combinations.each do |token|
127
+ anonymized_value = anonymized_value.gsub(token, "[#{identifier}]")
128
+ end
129
+ end
130
+ row[key] = anonymized_value
131
+ end
132
+ end
133
+ end
@@ -1,14 +1,21 @@
1
1
  require 'creek'
2
2
  require 'write_xlsx'
3
+ require 'privacy/person'
3
4
 
4
5
  class Privacy::Processor
5
6
  attr_reader :file
6
7
 
7
8
  DIRECTORY = 'processed'
9
+ PRIVATE_DATA = {
10
+ 'Nom ppmec': '[Nom]',
11
+ 'Prénom ppmec': '[Prénom]',
12
+ 'Nom ppvic': '[Nom]',
13
+ 'Prénom ppvic': '[Prénom]'
14
+ }
8
15
 
9
16
  def initialize(file)
10
17
  @file = file
11
- process
18
+ process unless @file.start_with? '~'
12
19
  end
13
20
 
14
21
  protected
@@ -25,7 +32,17 @@ class Privacy::Processor
25
32
  end
26
33
 
27
34
  def filter_data
28
- sheet.rows.each do |row|
35
+ @ppmec = Privacy::Person.new 'ppmec'
36
+ @ppmec.identify_columns sheet.rows.first
37
+ @ppvic = Privacy::Person.new 'ppvic'
38
+ @ppvic.identify_columns sheet.rows.first
39
+ sheet.rows.first.each do |key, value|
40
+ worksheet.write key, value
41
+ end
42
+ sheet.rows.each_with_index do |row, index|
43
+ next if index.zero?
44
+ @ppmec.anonymize row, index
45
+ @ppvic.anonymize row, index
29
46
  row.each do |key, value|
30
47
  worksheet.write key, value
31
48
  end
@@ -59,7 +76,7 @@ class Privacy::Processor
59
76
  end
60
77
 
61
78
  def data
62
- @data ||= Creek::Book.new path
79
+ @data ||= Creek::Book.new path, with_headers: true
63
80
  end
64
81
 
65
82
  def sheet
@@ -1,3 +1,3 @@
1
1
  module Privacy
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
@@ -1,31 +1,31 @@
1
1
  require_relative 'lib/privacy/version'
2
2
 
3
- Gem::Specification.new do |spec|
4
- spec.name = "privacy"
5
- spec.version = Privacy::VERSION
6
- spec.authors = ["Arnaud Levy"]
7
- spec.email = ["contact@arnaudlevy.com"]
3
+ Gem::Specification.new do |s|
4
+ s.name = "privacy"
5
+ s.version = Privacy::VERSION
6
+ s.authors = ["Arnaud Levy"]
7
+ s.email = ["contact@arnaudlevy.com"]
8
8
 
9
- spec.summary = "Privacy removes personal data from a xls file"
10
- spec.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
11
- spec.homepage = "https://github.com/arnaudlevy/privacy"
12
- spec.license = "MIT"
13
- spec.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
9
+ s.summary = "Privacy removes personal data from a xls file"
10
+ s.description = "Remove first name and last name from xls files, replace them with an anonymous placeholder"
11
+ s.homepage = "https://github.com/arnaudlevy/privacy"
12
+ s.license = "MIT"
13
+ s.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
14
14
 
15
- # spec.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
15
+ # s.metadata["allowed_push_host"] = "TODO: Set to 'http://mygemserver.com'"
16
16
 
17
- spec.metadata["homepage_uri"] = spec.homepage
18
- spec.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
17
+ s.metadata["homepage_uri"] = s.homepage
18
+ s.metadata["source_code_uri"] = "https://github.com/arnaudlevy/privacy"
19
19
 
20
20
  # Specify which files should be added to the gem when it is released.
21
21
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
22
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
- `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
+ s.files = Dir.chdir(File.expand_path('..', __FILE__)) do
23
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|s|features)/}) }
24
24
  end
25
- spec.bindir = "exe"
26
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
- spec.require_paths = ["lib"]
28
- spec.add_dependency "thor"
29
- spec.add_dependency "creek"
30
- spec.add_dependency "write_xlsx"
25
+ s.bindir = "exe"
26
+ s.executables = s.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ s.require_paths = ["lib"]
28
+ s.add_dependency "thor"
29
+ s.add_dependency "creek"
30
+ s.add_dependency "write_xlsx"
31
31
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: privacy
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arnaud Levy
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-28 00:00:00.000000000 Z
11
+ date: 2020-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -61,6 +61,7 @@ executables:
61
61
  extensions: []
62
62
  extra_rdoc_files: []
63
63
  files:
64
+ - ".DS_Store"
64
65
  - ".gitignore"
65
66
  - CODE_OF_CONDUCT.md
66
67
  - Gemfile
@@ -72,6 +73,7 @@ files:
72
73
  - bin/setup
73
74
  - exe/privacy
74
75
  - lib/privacy.rb
76
+ - lib/privacy/person.rb
75
77
  - lib/privacy/processor.rb
76
78
  - lib/privacy/version.rb
77
79
  - privacy.gemspec