confidential_info_redactor_lite 1.0.8 → 1.0.9
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7df96ec7fe0c910eb8ab77e6cc54bf018d142ded
|
4
|
+
data.tar.gz: 3951ed10c6e222bb24e1725a0944cd65807e02e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9cbff4361d65830508c0dc36174485f97df3e8e6451cf0f7ada707752e50720e1690ea14aa99f7e4ca49914974916f3b0c4ec1a47a4d22f25fc96f2e1f9aed79
|
7
|
+
data.tar.gz: e794763d38415090f1b3af1c0bd497543d5e65dc3651942e969155034554a38389068989b501d3f9d97f8aa8346a3a262c656eb86521e790424b53e44d6bc190
|
@@ -5,7 +5,7 @@ module ConfidentialInfoRedactorLite
|
|
5
5
|
# This class redacts various tokens from a text
|
6
6
|
class Redactor
|
7
7
|
# Rubular: http://rubular.com/r/OI2wQZ0KSl
|
8
|
-
NUMBER_REGEX = /(?<=\A|\A\()[^(]?\d+((,|\.|\/)*\d)*(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$)|(?<=[[:cntrl:]]|[[:space:]]|\s|\s\(|\s'|\s‘)[^('‘]?\d+((,|\.|\/)*\d)*\"*(?=(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$|(?<=\A|\A\(|\s|[[:cntrl:]]|[[:space:]]|\s\()[^(]?\d+((,|\.|\/)*\d)*\D{2}(?=($|\s+))
|
8
|
+
NUMBER_REGEX = /(?<=\A|\A\()[^(]?\d+((,|\.|\/)*\d)*(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$)|(?<=[[:cntrl:]]|[[:space:]]|\s|\s\(|\s'|\s‘)[^('‘]?\d+((,|\.|\/)*\d)*\"*(?=(\D?\s|\s|[[:cntrl:]]|[[:space:]]|\.?\s|\.$|$))|(?<=\s)\d+(nd|th|st)|(?<=\s)\d+\/\d+\"*(?=\s)|(?<=\()\S{1}\d+(?=\))|(?<=\s{1})\S{1}\d+\z|^\d+$|(?<=\A|\A\(|\s|[[:cntrl:]]|[[:space:]]|\s\()[^(]?\d+((,|\.|\/)*\d)*\D{2}(?=($|\s+))|\d+/
|
9
9
|
# Rubular: http://rubular.com/r/mxcj2G0Jfa
|
10
10
|
EMAIL_REGEX = /(?<=\A|\s|\()[\w+\-.]+@[a-z\d\-]+(\.[a-z]+)*\.[a-z]+(?=\z|\s|\.|\))/i
|
11
11
|
|
@@ -117,13 +117,17 @@ module ConfidentialInfoRedactorLite
|
|
117
117
|
original_sentence_array = txt.split(' ')
|
118
118
|
redacted_sentence_array = redacted_text.split(' ')
|
119
119
|
diff = original_sentence_array - redacted_sentence_array
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
120
|
+
if original_sentence_array.length.eql?(1)
|
121
|
+
final_number_tokens = diff
|
122
|
+
else
|
123
|
+
final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
|
124
|
+
.map { |token| token[-1].eql?(')') ? token[0...-1] : token }
|
125
|
+
.map { |token| token[-1].eql?("'") ? token[0...-1] : token }
|
126
|
+
.map { |token| token[-1].eql?('’') ? token[0...-1] : token }
|
127
|
+
.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
|
128
|
+
.map { |token| token[0].eql?("'") ? token[1..token.length] : token }
|
129
|
+
.map { |token| token[0].eql?("‘") ? token[1..token.length] : token }
|
130
|
+
end
|
127
131
|
end
|
128
132
|
[redacted_text.gsub(/(?<=[^\>]|\A)#{Regexp.escape(number_text)}/, "<span class='confidentialNumber'>#{number_text}</span>"), final_number_tokens]
|
129
133
|
end
|
@@ -157,6 +157,31 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
|
|
157
157
|
text = "Page 4"
|
158
158
|
expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["4"])
|
159
159
|
end
|
160
|
+
|
161
|
+
it 'redacts numbers from a text #014' do
|
162
|
+
text = "88966-5.0-ENG"
|
163
|
+
expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["88966-5.0-ENG"])
|
164
|
+
end
|
165
|
+
|
166
|
+
it 'redacts numbers from a text #015' do
|
167
|
+
text = "85dB(A)"
|
168
|
+
expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["85dB(A)"])
|
169
|
+
end
|
170
|
+
|
171
|
+
it 'redacts numbers from a text #016' do
|
172
|
+
text = "Machine Standard operating conditions are between 2.3, and +40°C."
|
173
|
+
expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["2.3,", "+40°C"])
|
174
|
+
end
|
175
|
+
|
176
|
+
it 'redacts numbers from a text #017' do
|
177
|
+
text = "(inH2O). CP2.0 RM6 +40°C RM6-Anlage RM6C (see Fig.6)."
|
178
|
+
expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["inH2O", "CP2.0", "RM6", "+40°C", "RM6-Anlage", "RM6C", "Fig.6"])
|
179
|
+
end
|
180
|
+
|
181
|
+
it 'redacts numbers from a text #018' do
|
182
|
+
text = "CP3.0"
|
183
|
+
expect(described_class.new(language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers_html(text)[1]).to eq(["CP3.0"])
|
184
|
+
end
|
160
185
|
end
|
161
186
|
|
162
187
|
describe '#numbers_html' do
|
@@ -324,7 +349,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
|
|
324
349
|
Don’t forget to use your imagination and creativity!
|
325
350
|
EOF
|
326
351
|
tokens = ConfidentialInfoRedactorLite::Extractor.new(corpus: corpus).extract(text)
|
327
|
-
expect(described_class.new(language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)).to eq("<redacted>\n\n <redacted> is hosting the <redacted number> <redacted>. So get out your putter and your camera and see if you have what it takes. Are you a <redacted>?\n\n <redacted>: <redacted number>) <redacted> of <redacted number> professional miniature golfers, each from a different country. (<redacted number> points; <redacted number> bonus points if the professional miniature golfers are also from <redacted number> different continents) <redacted number>) <redacted> of yourself next to each obstacle in our list of the Top <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points for each obstacle that exactly matches the one pictured in the article) <redacted number>) <redacted> your own full-size miniature golf hole. (<redacted number> points; up to <redacted number> bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole) <redacted number>) <redacted> of yourself making a hole-in-one on two consecutive miniature golf holes. <redacted> video must be one continuous shot with no editing. (<redacted number> points) <redacted number>) <redacted> of yourself with the <redacted> mascot. (<redacted number> points; <redacted number> bonus points if you are wearing a <redacted> t-shirt) <redacted number>) <redacted> of yourself with the completed <redacted> wobblehead. (<redacted number> points; <redacted number> bonus points if the picture is taken at a miniature golf course) <redacted number>) <redacted> of a completed scorecard from a round of miniature golf. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> of completed scorecards from <redacted number> different miniature golf courses. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> an entry to the <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points if your entry gets more than <redacted number> votes) <redacted number>) <redacted> from the <redacted> app showing a
|
352
|
+
expect(described_class.new(language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact(text)).to eq("<redacted>\n\n <redacted> is hosting the <redacted number> <redacted>. So get out your putter and your camera and see if you have what it takes. Are you a <redacted>?\n\n <redacted>: <redacted number>) <redacted> of <redacted number> professional miniature golfers, each from a different country. (<redacted number> points; <redacted number> bonus points if the professional miniature golfers are also from <redacted number> different continents) <redacted number>) <redacted> of yourself next to each obstacle in our list of the Top <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points for each obstacle that exactly matches the one pictured in the article) <redacted number>) <redacted> your own full-size miniature golf hole. (<redacted number> points; up to <redacted number> bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole) <redacted number>) <redacted> of yourself making a hole-in-one on two consecutive miniature golf holes. <redacted> video must be one continuous shot with no editing. (<redacted number> points) <redacted number>) <redacted> of yourself with the <redacted> mascot. (<redacted number> points; <redacted number> bonus points if you are wearing a <redacted> t-shirt) <redacted number>) <redacted> of yourself with the completed <redacted> wobblehead. (<redacted number> points; <redacted number> bonus points if the picture is taken at a miniature golf course) <redacted number>) <redacted> of a completed scorecard from a round of miniature golf. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> of completed scorecards from <redacted number> different miniature golf courses. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> an entry to the <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points if your entry gets more than <redacted number> votes) <redacted number>) <redacted> from the <redacted> app showing a <redacted number> -hole score below par. (<redacted number> points) <redacted number>) <redacted> from the <redacted> app showing that you have successfully unlocked all of the holes in the game. (<redacted number> points) <redacted number>) <redacted> of the <redacted> wobblehead at a <redacted>. (<redacted number> points) <redacted number>) <redacted> and submit the <redacted> ‘Practice <redacted>’ and ‘Final <redacted>’ for any one of the <redacted> math or physics lessons. (<redacted number> points; <redacted number> bonus points if you complete two lessons) <redacted number>) <redacted> of yourself with at least <redacted number> different colored miniature golf balls. (<redacted number> points; <redacted number> bonus points for each additional color {limit of <redacted number> bonus points}) <redacted number>) <redacted> of yourself with a famous golfer or miniature golfer. (<redacted number> points; <redacted number> bonus points if the golfer is on the <redacted> tour <redacted> you are wearing a <redacted> t-shirt in the picture) <redacted number>) <redacted> of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (<redacted number> points) <redacted number>) <redacted> of yourself successfully making a trick miniature golf shot. (<redacted number> points; up to <redacted number> bonus points available depending on the difficulty and complexity of the trick shot)\n\n\n Prizes: <redacted number> <redacted> <redacted>\n\n <redacted>\n (<redacted number> <redacted number> <redacted> - <redacted>)\n\n <redacted> team will judge the scavenger hunt and all decisions will be final. <redacted> is sponsoring it. <redacted> scavenger hunt is open to anyone and everyone. <redacted> scavenger hunt ends on <redacted date>.\n\n <redacted> enter the scavenger hunt, send an email to info <redacted> putterking <redacted> com with the subject line: \"<redacted>\". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as <redacted>, <redacted>, <redacted>, <redacted>, etc. for your submissions.\n\n <redacted> entering the <redacted>, you allow <redacted> to use or link to any of the pictures or videos you submit for advertisements and promotions.\n\n Don’t forget to use your imagination and creativity!")
|
328
353
|
end
|
329
354
|
|
330
355
|
it 'redacts all confidential information from a text #003' do
|