confidential_info_redactor_lite 1.0.8 → 1.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 513a500576d83e2caf15ad2bfe3a6456951c4d50
4
- data.tar.gz: 746a022cc529b41737fc60a40f82617ce3eaa731
3
+ metadata.gz: 7df96ec7fe0c910eb8ab77e6cc54bf018d142ded
4
+ data.tar.gz: 3951ed10c6e222bb24e1725a0944cd65807e02e9
5
5
  SHA512:
6
- metadata.gz: 04cf1bb7cefd6e0e4dded00472b074c487ee64750a447c016648707ba2f82dd2785772c9a1aa3c8425d39b6ac5bfb7d4158e4cc9318ef87658902998d49d5c2b
7
- data.tar.gz: 89b189d9896b4dc33d600e89d9746618ee3183555dd5a76dbb06c550142c5f19323d1f01be43081b1a1d3fe41301e14153b8f9d653bd71674a286d6f0baa1958
6
+ metadata.gz: 9cbff4361d65830508c0dc36174485f97df3e8e6451cf0f7ada707752e50720e1690ea14aa99f7e4ca49914974916f3b0c4ec1a47a4d22f25fc96f2e1f9aed79
7
+ data.tar.gz: e794763d38415090f1b3af1c0bd497543d5e65dc3651942e969155034554a38389068989b501d3f9d97f8aa8346a3a262c656eb86521e790424b53e44d6bc190
@@ -5,7 +5,7 @@ module ConfidentialInfoRedactorLite
5
5
  # This class redacts various tokens from a text
6
6
  class Redactor
7
7
  # Rubular: http://rubular.com/r/OI2wQZ0KSl
8
- NUMBER_REGEX = /(?<=\A|\A\()[^(]?\d+((,|\.|\/)*\d)*(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$)|(?<=[[:cntrl:]]|[[:space:]]|\s|\s\(|\s'|\s‘)[^('‘]?\d+((,|\.|\/)*\d)*\"*(?=(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$|(?<=\A|\A\(|\s|[[:cntrl:]]|[[:space:]]|\s\()[^(]?\d+((,|\.|\/)*\d)*\D{2}(?=($|\s+))/
8
+ NUMBER_REGEX = /(?<=\A|\A\()[^(]?\d+((,|\.|\/)*\d)*(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$)|(?<=[[:cntrl:]]|[[:space:]]|\s|\s\(|\s'|\s‘)[^('‘]?\d+((,|\.|\/)*\d)*\"*(?=(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$|(?<=\A|\A\(|\s|[[:cntrl:]]|[[:space:]]|\s\()[^(]?\d+((,|\.|\/)*\d)*\D{2}(?=($|\s+))|\d+/
9
9
  # Rubular: http://rubular.com/r/mxcj2G0Jfa
10
10
  EMAIL_REGEX = /(?<=\A|\s|\()[\w+\-.]+@[a-z\d\-]+(\.[a-z]+)*\.[a-z]+(?=\z|\s|\.|\))/i
11
11
 
@@ -117,13 +117,17 @@ module ConfidentialInfoRedactorLite
117
117
  original_sentence_array = txt.split(' ')
118
118
  redacted_sentence_array = redacted_text.split(' ')
119
119
  diff = original_sentence_array - redacted_sentence_array
120
- final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
121
- .map { |token| token[-1].eql?(')') ? token[0...-1] : token }
122
- .map { |token| token[-1].eql?("'") ? token[0...-1] : token }
123
- .map { |token| token[-1].eql?('') ? token[0...-1] : token }
124
- .map { |token| token[0].eql?('(') ? token[1..token.length] : token }
125
- .map { |token| token[0].eql?("'") ? token[1..token.length] : token }
126
- .map { |token| token[0].eql?("‘") ? token[1..token.length] : token }
120
+ if original_sentence_array.length.eql?(1)
121
+ final_number_tokens = diff
122
+ else
123
+ final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
124
+ .map { |token| token[-1].eql?(')') ? token[0...-1] : token }
125
+ .map { |token| token[-1].eql?("'") ? token[0...-1] : token }
126
+ .map { |token| token[-1].eql?('’') ? token[0...-1] : token }
127
+ .map { |token| token[0].eql?('(') ? token[1..token.length] : token }
128
+ .map { |token| token[0].eql?("'") ? token[1..token.length] : token }
129
+ .map { |token| token[0].eql?("‘") ? token[1..token.length] : token }
130
+ end
127
131
  end
128
132
  [redacted_text.gsub(/(?<=[^\>]|\A)#{Regexp.escape(number_text)}/, "<span class='confidentialNumber'>#{number_text}</span>"), final_number_tokens]
129
133
  end
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "1.0.8"
2
+ VERSION = "1.0.9"
3
3
  end
@@ -157,6 +157,31 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
157
157
  text = "Page 4"
158
158
  expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["4"])
159
159
  end
160
+
161
+ it 'redacts numbers from a text #014' do
162
+ text = "88966-5.0-ENG"
163
+ expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["88966-5.0-ENG"])
164
+ end
165
+
166
+ it 'redacts numbers from a text #015' do
167
+ text = "85dB(A)"
168
+ expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["85dB(A)"])
169
+ end
170
+
171
+ it 'redacts numbers from a text #016' do
172
+ text = "Machine Standard operating conditions are between 2.3, and +40°C."
173
+ expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["2.3,", "+40°C"])
174
+ end
175
+
176
+ it 'redacts numbers from a text #017' do
177
+ text = "(inH2O). CP2.0 RM6 +40°C RM6-Anlage RM6C (see Fig.6)."
178
+ expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["inH2O", "CP2.0", "RM6", "+40°C", "RM6-Anlage", "RM6C", "Fig.6"])
179
+ end
180
+
181
+ it 'redacts numbers from a text #018' do
182
+ text = "CP3.0"
183
+ expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["CP3.0"])
184
+ end
160
185
  end
161
186
 
162
187
  describe '#numbers_html' do
@@ -324,7 +349,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
324
349
  Don’t forget to use your imagination and creativity!
325
350
  EOF
326
351
  tokens = ConfidentialInfoRedactorLite::Extractor.new(corpus: corpus).extract(text)
327
- expect(described_class.new(language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)).to eq("<redacted>\n\n <redacted> is hosting the <redacted number> <redacted>. So get out your putter and your camera and see if you have what it takes. Are you a <redacted>?\n\n <redacted>: <redacted number>) <redacted> of <redacted number> professional miniature golfers, each from a different country. (<redacted number> points; <redacted number> bonus points if the professional miniature golfers are also from <redacted number> different continents) <redacted number>) <redacted> of yourself next to each obstacle in our list of the Top <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points for each obstacle that exactly matches the one pictured in the article) <redacted number>) <redacted> your own full-size miniature golf hole. (<redacted number> points; up to <redacted number> bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole) <redacted number>) <redacted> of yourself making a hole-in-one on two consecutive miniature golf holes. <redacted> video must be one continuous shot with no editing. (<redacted number> points) <redacted number>) <redacted> of yourself with the <redacted> mascot. (<redacted number> points; <redacted number> bonus points if you are wearing a <redacted> t-shirt) <redacted number>) <redacted> of yourself with the completed <redacted> wobblehead. (<redacted number> points; <redacted number> bonus points if the picture is taken at a miniature golf course) <redacted number>) <redacted> of a completed scorecard from a round of miniature golf. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> of completed scorecards from <redacted number> different miniature golf courses. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> an entry to the <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points if your entry gets more than <redacted number> votes) <redacted number>) <redacted> from the <redacted> app showing a 9-hole score below par. (<redacted number> points) <redacted number>) <redacted> from the <redacted> app showing that you have successfully unlocked all of the holes in the game. (<redacted number> points) <redacted number>) <redacted> of the <redacted> wobblehead at a <redacted>. (<redacted number> points) <redacted number>) <redacted> and submit the <redacted> ‘Practice <redacted>’ and ‘Final <redacted>’ for any one of the <redacted> math or physics lessons. (<redacted number> points; <redacted number> bonus points if you complete two lessons) <redacted number>) <redacted> of yourself with at least <redacted number> different colored miniature golf balls. (<redacted number> points; <redacted number> bonus points for each additional color {limit of <redacted number> bonus points}) <redacted number>) <redacted> of yourself with a famous golfer or miniature golfer. (<redacted number> points; <redacted number> bonus points if the golfer is on the <redacted> tour <redacted> you are wearing a <redacted> t-shirt in the picture) <redacted number>) <redacted> of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (<redacted number> points) <redacted number>) <redacted> of yourself successfully making a trick miniature golf shot. (<redacted number> points; up to <redacted number> bonus points available depending on the difficulty and complexity of the trick shot)\n\n\n Prizes: <redacted number> <redacted> <redacted>\n\n <redacted>\n (<redacted number> <redacted number> <redacted> - <redacted>)\n\n <redacted> team will judge the scavenger hunt and all decisions will be final. <redacted> is sponsoring it. <redacted> scavenger hunt is open to anyone and everyone. <redacted> scavenger hunt ends on <redacted date>.\n\n <redacted> enter the scavenger hunt, send an email to info <redacted> putterking <redacted> com with the subject line: \"<redacted>\". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as <redacted>, <redacted>, <redacted>, <redacted>, etc. for your submissions.\n\n <redacted> entering the <redacted>, you allow <redacted> to use or link to any of the pictures or videos you submit for advertisements and promotions.\n\n Don’t forget to use your imagination and creativity!")
352
+ expect(described_class.new(language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)).to eq("<redacted>\n\n <redacted> is hosting the <redacted number> <redacted>. So get out your putter and your camera and see if you have what it takes. Are you a <redacted>?\n\n <redacted>: <redacted number>) <redacted> of <redacted number> professional miniature golfers, each from a different country. (<redacted number> points; <redacted number> bonus points if the professional miniature golfers are also from <redacted number> different continents) <redacted number>) <redacted> of yourself next to each obstacle in our list of the Top <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points for each obstacle that exactly matches the one pictured in the article) <redacted number>) <redacted> your own full-size miniature golf hole. (<redacted number> points; up to <redacted number> bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole) <redacted number>) <redacted> of yourself making a hole-in-one on two consecutive miniature golf holes. <redacted> video must be one continuous shot with no editing. (<redacted number> points) <redacted number>) <redacted> of yourself with the <redacted> mascot. (<redacted number> points; <redacted number> bonus points if you are wearing a <redacted> t-shirt) <redacted number>) <redacted> of yourself with the completed <redacted> wobblehead. (<redacted number> points; <redacted number> bonus points if the picture is taken at a miniature golf course) <redacted number>) <redacted> of a completed scorecard from a round of miniature golf. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> of completed scorecards from <redacted number> different miniature golf courses. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> an entry to the <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points if your entry gets more than <redacted number> votes) <redacted number>) <redacted> from the <redacted> app showing a <redacted number> -hole score below par. (<redacted number> points) <redacted number>) <redacted> from the <redacted> app showing that you have successfully unlocked all of the holes in the game. (<redacted number> points) <redacted number>) <redacted> of the <redacted> wobblehead at a <redacted>. (<redacted number> points) <redacted number>) <redacted> and submit the <redacted> ‘Practice <redacted>’ and ‘Final <redacted>’ for any one of the <redacted> math or physics lessons. (<redacted number> points; <redacted number> bonus points if you complete two lessons) <redacted number>) <redacted> of yourself with at least <redacted number> different colored miniature golf balls. (<redacted number> points; <redacted number> bonus points for each additional color {limit of <redacted number> bonus points}) <redacted number>) <redacted> of yourself with a famous golfer or miniature golfer. (<redacted number> points; <redacted number> bonus points if the golfer is on the <redacted> tour <redacted> you are wearing a <redacted> t-shirt in the picture) <redacted number>) <redacted> of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (<redacted number> points) <redacted number>) <redacted> of yourself successfully making a trick miniature golf shot. (<redacted number> points; up to <redacted number> bonus points available depending on the difficulty and complexity of the trick shot)\n\n\n Prizes: <redacted number> <redacted> <redacted>\n\n <redacted>\n (<redacted number> <redacted number> <redacted> - <redacted>)\n\n <redacted> team will judge the scavenger hunt and all decisions will be final. <redacted> is sponsoring it. <redacted> scavenger hunt is open to anyone and everyone. <redacted> scavenger hunt ends on <redacted date>.\n\n <redacted> enter the scavenger hunt, send an email to info <redacted> putterking <redacted> com with the subject line: \"<redacted>\". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as <redacted>, <redacted>, <redacted>, <redacted>, etc. for your submissions.\n\n <redacted> entering the <redacted>, you allow <redacted> to use or link to any of the pictures or videos you submit for advertisements and promotions.\n\n Don’t forget to use your imagination and creativity!")
328
353
  end
329
354
 
330
355
  it 'redacts all confidential information from a text #003' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.8
4
+ version: 1.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias