confidential_info_redactor_lite 0.0.24 → 0.0.25
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e6584e5ec77fa94af369f94094b9b299ca07202
|
4
|
+
data.tar.gz: dea7c2fa65b217d6b5eb5c6851028b2ca3b78b4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6de045b28a80c2d57889bd601737b12ede3aa3374c5ac4c6e6b95706ce12db478b49006818b47459250937d30e1f6577a5ffb2a5989fb1c2ec1940917ff38251
|
7
|
+
data.tar.gz: 9e9ffe65a2fc50b82064fd66a56b537ce4b4943de63102bc2d1a909084781f5a56023fe1d3ecb7391137e07868c0abbe84e302219232b22da950c07276001416
|
@@ -97,10 +97,14 @@ module ConfidentialInfoRedactorLite
|
|
97
97
|
|
98
98
|
def redact_numbers_html(txt)
|
99
99
|
redacted_text = redact_numbers(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<").gsub(/\>\s#{Regexp.escape(email_text)}\s\</, ">#{email_text}<").gsub(/\>\s#{Regexp.escape(hyperlink_text)}\s\</, ">#{hyperlink_text}<")
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
100
|
+
if language.eql?('ja')
|
101
|
+
final_number_tokens = txt.scan(/[0123456789]+|\d+/)
|
102
|
+
else
|
103
|
+
original_sentence_array = txt.split(' ')
|
104
|
+
redacted_sentence_array = redacted_text.split(' ')
|
105
|
+
diff = original_sentence_array - redacted_sentence_array
|
106
|
+
final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
|
107
|
+
end
|
104
108
|
[redacted_text.gsub(/(?<=[^\>])#{Regexp.escape(number_text)}/, "<span class='confidentialNumber'>#{number_text}</span>"), final_number_tokens]
|
105
109
|
end
|
106
110
|
|
@@ -115,44 +119,47 @@ module ConfidentialInfoRedactorLite
|
|
115
119
|
|
116
120
|
def redact_dates_html(txt)
|
117
121
|
redacted_text = redact_dates(txt)
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
122
|
+
if language.eql?('ja')
|
123
|
+
final_date_tokens = txt.scan(/[0123456789]+年[0123456789]+月[0123456789]+日|[0123456789]+月[0123456789]+日/)
|
124
|
+
else
|
125
|
+
original_sentence_array = txt.split(' ')
|
126
|
+
redacted_sentence_array = redacted_text.split(' ')
|
127
|
+
diff = original_sentence_array - redacted_sentence_array
|
128
|
+
date_tokens = []
|
129
|
+
redacted_text.split(' ').each_with_index do |redacted_token, index|
|
130
|
+
if redacted_token.gsub(/\./, '') == date_text
|
131
|
+
original_sentence_array.each_with_index do |original_token, i|
|
132
|
+
if redacted_sentence_array[index - 1] == original_token &&
|
133
|
+
diff.include?(original_sentence_array[i + 1]) &&
|
134
|
+
original_sentence_array[i + 2] == redacted_sentence_array[index + 1]
|
135
|
+
date_tokens << original_sentence_array[i + 1]
|
136
|
+
end
|
137
|
+
if redacted_sentence_array[index - 1] == original_token &&
|
138
|
+
diff.include?(original_sentence_array[i + 1]) &&
|
139
|
+
diff.include?(original_sentence_array[i + 2]) &&
|
140
|
+
original_sentence_array[i + 3] == redacted_sentence_array[index + 1]
|
141
|
+
date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2]
|
142
|
+
end
|
143
|
+
if redacted_sentence_array[index - 1] == original_token &&
|
144
|
+
diff.include?(original_sentence_array[i + 1]) &&
|
145
|
+
diff.include?(original_sentence_array[i + 2]) &&
|
146
|
+
diff.include?(original_sentence_array[i + 3]) &&
|
147
|
+
original_sentence_array[i + 4] == redacted_sentence_array[index + 1]
|
148
|
+
date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3]
|
149
|
+
end
|
150
|
+
if redacted_sentence_array[index - 1] == original_token &&
|
151
|
+
diff.include?(original_sentence_array[i + 1]) &&
|
152
|
+
diff.include?(original_sentence_array[i + 2]) &&
|
153
|
+
diff.include?(original_sentence_array[i + 3]) &&
|
154
|
+
diff.include?(original_sentence_array[i + 4]) &&
|
155
|
+
original_sentence_array[i + 5] == redacted_sentence_array[index + 1]
|
156
|
+
date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3] + ' ' + original_sentence_array[i + 4]
|
157
|
+
end
|
150
158
|
end
|
151
159
|
end
|
152
160
|
end
|
161
|
+
final_date_tokens = date_tokens.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
|
153
162
|
end
|
154
|
-
|
155
|
-
final_date_tokens = date_tokens.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
|
156
163
|
[redacted_text.gsub(/#{Regexp.escape(date_text)}/, "<span class='confidentialDate'>#{date_text}</span>"), final_date_tokens]
|
157
164
|
end
|
158
165
|
|
@@ -49,6 +49,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
|
|
49
49
|
text = 'On May 1st, 2000 Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020.'
|
50
50
|
expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, date_text: "*****").dates_html).to eq(["On <span class='confidentialDate'>*****</span> Coca-Cola announced a merger with Pepsi that will happen on <span class='confidentialDate'>*****</span>.", ['May 1st, 2000', 'December 15th, 2020']])
|
51
51
|
end
|
52
|
+
|
53
|
+
it 'surrounds the redacted dates in spans and return the redacted dates from a text #002' do
|
54
|
+
text = '2011年12月31日です。'
|
55
|
+
expect(described_class.new(text: text, language: 'ja', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, date_text: "*****").dates_html).to eq(["<span class='confidentialDate'>*****</span> です。", ["2011年12月31日"]])
|
56
|
+
end
|
52
57
|
end
|
53
58
|
|
54
59
|
describe '#numbers' do
|
@@ -88,6 +93,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
|
|
88
93
|
text = 'It was his 1st) time, not yet his 10th, not even his 2nd. The wood was 3/4" thick. It cost $200,000.'
|
89
94
|
expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: "*****").numbers_html).to eq(["It was his <span class='confidentialNumber'>*****</span>) time, not yet his <span class='confidentialNumber'>*****</span>, not even his <span class='confidentialNumber'>*****</span>. The wood was <span class='confidentialNumber'>*****</span> thick. It cost <span class='confidentialNumber'>*****</span>.", ["1st", "10th,", "2nd", "3/4\"", "$200,000"]])
|
90
95
|
end
|
96
|
+
|
97
|
+
it 'surrounds the redacted numbers in spans and return the redacted numbers from a text #002' do
|
98
|
+
text = 'プロのミニチュアゴルファー2人のサイン。2人の出身国は別であること。(45ポイント;それぞれが別の大陸出身だった場合、5ボーナスポイント。)'
|
99
|
+
expect(described_class.new(text: text, language: 'ja', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: "*****").numbers_html).to eq(["プロのミニチュアゴルファー <span class='confidentialNumber'>*****</span> 人のサイン。 <span class='confidentialNumber'>*****</span> 人の出身国は別であること。( <span class='confidentialNumber'>*****</span> ポイント;それぞれが別の大陸出身だった場合、 <span class='confidentialNumber'>*****</span> ボーナスポイント。)", ["2", "2", "45", "5"]])
|
100
|
+
end
|
91
101
|
end
|
92
102
|
|
93
103
|
describe '#emails' do
|