confidential_info_redactor 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: ea6f9a1d3391a351050aaaca8cc00fff7d54b794
4
- data.tar.gz: b6892d0b8e61b4d3dd92f2aba3387396e6cf5f65
2
+ SHA256:
3
+ metadata.gz: 64cb868a941bb737cc49f0ea98334c89cb719a9fc01e3ada9997e29df07347d1
4
+ data.tar.gz: 158de52cd0bfb45766e66e45a200590bb0b95282bb49688161393691f75bb92b
5
5
  SHA512:
6
- metadata.gz: 63937326d768ceeb5b018bee2fb92831dd87ec8b1f1fe9533b999f97383540cef93282eff2a02fa469a77160cf6df717882b188d6b573f6e2162636fcea9e0dd
7
- data.tar.gz: 93fd0e9d80d33927233b1232e1d283d347c547a9ab559be307dd1bfb111941bc19083357adabbbfa590486a09177b13add4a77cbe050643da1e3e1f402b22e58
6
+ metadata.gz: 6c455ad2194369013dc3a451196d9df40b9df359eeea0e62de3a5650398a3d139beed9b3db2638728f498fbfedb281b35ed9e8e3d64d311de9709370366358fa
7
+ data.tar.gz: 5174e8f4ef98a9194b0554bee6e326072bea9e3b4b03187db55b50f4cf4df39c38940bdc6283badb1c62ea63d4e3e82463b135ebff352d38e7c4b38446b44f14
data/README.md CHANGED
@@ -62,6 +62,13 @@ ConfidentialInfoRedactor::Redactor.new.hyperlinks(text)
62
62
  ConfidentialInfoRedactor::Redactor.new(tokens: tokens).proper_nouns(text)
63
63
  # => '<redacted> announced a merger with <redacted> that will happen on December 15th, 2020 for $200,000,000,000. Please contact <redacted> at j.smith@example.com or visit http://www.super-fake-merger.com.'
64
64
 
65
+ # Possessive proper nouns are handled automatically
66
+ text = "The teacher took John's book from the shelf."
67
+ tokens = ConfidentialInfoRedactor::Extractor.new.extract(text)
68
+ # => ["John"]
69
+ ConfidentialInfoRedactor::Redactor.new(tokens: tokens).redact(text)
70
+ # => "The teacher took <redacted>'s book from the shelf."
71
+
65
72
  # It is possible to 'turn off' any of the specific redactors
66
73
  ConfidentialInfoRedactor::Redactor.new(tokens: tokens, ignore_numbers: true).redact(text)
67
74
  # => '<redacted> announced a merger with <redacted> that will happen on <redacted date> for $200,000,000,000. Please contact <redacted> at <redacted> or visit <redacted>.'
@@ -33,11 +33,11 @@ module ConfidentialInfoRedactor
33
33
  private
34
34
 
35
35
  def extract_preliminary_terms(segment)
36
- segment.to_s.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(PUNCTUATION_REGEX, '').gsub(/\'$/, '')) }.compact
36
+ segment.to_s.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(PUNCTUATION_REGEX, '').gsub(/\'s\z/, '').gsub(/\'$/, '')) }.compact
37
37
  end
38
38
 
39
39
  def clean_token(token)
40
- token.gsub(PUNCTUATION_REGEX, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip
40
+ token.gsub(PUNCTUATION_REGEX, '').gsub(/\'s\z/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip
41
41
  end
42
42
 
43
43
  def non_confidential_token?(token, includes_confidential)
@@ -75,6 +75,7 @@ module ConfidentialInfoRedactor
75
75
 
76
76
  def redact_tokens(txt)
77
77
  tokens.sort_by{ |x| x.split.count }.reverse.each do |token|
78
+ txt.gsub!(/(?<=\s|^|\")#{Regexp.escape(token)}(?=['\u2019]s\b)/, "#{token_text}")
78
79
  txt.gsub!(/(?<=\s|^|\")#{Regexp.escape(token)}(?=\W|$)/, "#{token_text}")
79
80
  end
80
81
  txt.strip
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactor
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.2"
3
3
  end
@@ -143,6 +143,11 @@ RSpec.describe ConfidentialInfoRedactor::Extractor do
143
143
  text = 'John'
144
144
  expect(described_class.new(language: 'en').extract(text)).to eq(['John'])
145
145
  end
146
+
147
+ it 'extracts the proper nouns from a text #018' do
148
+ text = "The teacher took John's book from the shelf."
149
+ expect(described_class.new(language: 'en').extract(text)).to eq(['John'])
150
+ end
146
151
  end
147
152
 
148
153
  context 'German (de)' do
@@ -181,5 +181,19 @@ RSpec.describe ConfidentialInfoRedactor::Redactor do
181
181
  text = 'My Transformation - avoid Trans.'
182
182
  expect(described_class.new(language: 'en', tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', hyperlink_text: '*****', email_text: '*****').redact(text)).to eq('My Transformation - avoid *****.')
183
183
  end
184
+
185
+ it 'redacts all confidential information from a text #008' do
186
+ text = "The teacher took John's book from the shelf."
187
+ tokens = ConfidentialInfoRedactor::Extractor.new.extract(text)
188
+ expect(tokens).to eq(['John'])
189
+ expect(described_class.new(language: 'en', tokens: tokens).redact(text)).to eq("The teacher took <redacted>'s book from the shelf.")
190
+ end
191
+
192
+ it 'redacts all confidential information from a text #009' do
193
+ text = "The cat's toy rolled under the couch and disappeared."
194
+ tokens = ConfidentialInfoRedactor::Extractor.new.extract(text)
195
+ expect(tokens).to eq([])
196
+ expect(described_class.new(language: 'en', tokens: tokens).redact(text)).to eq("The cat's toy rolled under the couch and disappeared.")
197
+ end
184
198
  end
185
199
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-11 00:00:00.000000000 Z
11
+ date: 2026-03-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -98,7 +98,7 @@ homepage: https://github.com/diasks2/confidential_info_redactor
98
98
  licenses:
99
99
  - MIT
100
100
  metadata: {}
101
- post_install_message:
101
+ post_install_message:
102
102
  rdoc_options: []
103
103
  require_paths:
104
104
  - lib
@@ -113,9 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
113
  - !ruby/object:Gem::Version
114
114
  version: '0'
115
115
  requirements: []
116
- rubyforge_project:
117
- rubygems_version: 2.4.1
118
- signing_key:
116
+ rubygems_version: 3.3.26
117
+ signing_key:
119
118
  specification_version: 4
120
119
  summary: Semi-automatically redact confidential information from a text
121
120
  test_files: