confidential_info_redactor_lite 0.0.24 → 0.0.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e6584e5ec77fa94af369f94094b9b299ca07202
|
4
|
+
data.tar.gz: dea7c2fa65b217d6b5eb5c6851028b2ca3b78b4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6de045b28a80c2d57889bd601737b12ede3aa3374c5ac4c6e6b95706ce12db478b49006818b47459250937d30e1f6577a5ffb2a5989fb1c2ec1940917ff38251
|
7
|
+
data.tar.gz: 9e9ffe65a2fc50b82064fd66a56b537ce4b4943de63102bc2d1a909084781f5a56023fe1d3ecb7391137e07868c0abbe84e302219232b22da950c07276001416
|
@@ -97,10 +97,14 @@ module ConfidentialInfoRedactorLite
|
|
97
97
|
|
98
98
|
def redact_numbers_html(txt)
|
99
99
|
redacted_text = redact_numbers(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<").gsub(/\>\s#{Regexp.escape(email_text)}\s\</, ">#{email_text}<").gsub(/\>\s#{Regexp.escape(hyperlink_text)}\s\</, ">#{hyperlink_text}<")
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
100
|
+
if language.eql?('ja')
|
101
|
+
final_number_tokens = txt.scan(/[0123456789]+|\d+/)
|
102
|
+
else
|
103
|
+
original_sentence_array = txt.split(' ')
|
104
|
+
redacted_sentence_array = redacted_text.split(' ')
|
105
|
+
diff = original_sentence_array - redacted_sentence_array
|
106
|
+
final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
|
107
|
+
end
|
104
108
|
[redacted_text.gsub(/(?<=[^\>])#{Regexp.escape(number_text)}/, "<span class='confidentialNumber'>#{number_text}</span>"), final_number_tokens]
|
105
109
|
end
|
106
110
|
|
@@ -115,44 +119,47 @@ module ConfidentialInfoRedactorLite
|
|
115
119
|
|
116
120
|
def redact_dates_html(txt)
|
117
121
|
redacted_text = redact_dates(txt)
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
122
|
+
if language.eql?('ja')
|
123
|
+
final_date_tokens = txt.scan(/[0123456789]+年[0123456789]+月[0123456789]+日|[0123456789]+月[0123456789]+日/)
|
124
|
+
else
|
125
|
+
original_sentence_array = txt.split(' ')
|
126
|
+
redacted_sentence_array = redacted_text.split(' ')
|
127
|
+
diff = original_sentence_array - redacted_sentence_array
|
128
|
+
date_tokens = []
|
129
|
+
redacted_text.split(' ').each_with_index do |redacted_token, index|
|
130
|
+
if redacted_token.gsub(/\./, '') == date_text
|
131
|
+
original_sentence_array.each_with_index do |original_token, i|
|
132
|
+
if redacted_sentence_array[index - 1] == original_token &&
|
133
|
+
diff.include?(original_sentence_array[i + 1]) &&
|
134
|
+
original_sentence_array[i + 2] == redacted_sentence_array[index + 1]
|
135
|
+
date_tokens << original_sentence_array[i + 1]
|
136
|
+
end
|
137
|
+
if redacted_sentence_array[index - 1] == original_token &&
|
138
|
+
diff.include?(original_sentence_array[i + 1]) &&
|
139
|
+
diff.include?(original_sentence_array[i + 2]) &&
|
140
|
+
original_sentence_array[i + 3] == redacted_sentence_array[index + 1]
|
141
|
+
date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2]
|
142
|
+
end
|
143
|
+
if redacted_sentence_array[index - 1] == original_token &&
|
144
|
+
diff.include?(original_sentence_array[i + 1]) &&
|
145
|
+
diff.include?(original_sentence_array[i + 2]) &&
|
146
|
+
diff.include?(original_sentence_array[i + 3]) &&
|
147
|
+
original_sentence_array[i + 4] == redacted_sentence_array[index + 1]
|
148
|
+
date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3]
|
149
|
+
end
|
150
|
+
if redacted_sentence_array[index - 1] == original_token &&
|
151
|
+
diff.include?(original_sentence_array[i + 1]) &&
|
152
|
+
diff.include?(original_sentence_array[i + 2]) &&
|
153
|
+
diff.include?(original_sentence_array[i + 3]) &&
|
154
|
+
diff.include?(original_sentence_array[i + 4]) &&
|
155
|
+
original_sentence_array[i + 5] == redacted_sentence_array[index + 1]
|
156
|
+
date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3] + ' ' + original_sentence_array[i + 4]
|
157
|
+
end
|
150
158
|
end
|
151
159
|
end
|
152
160
|
end
|
161
|
+
final_date_tokens = date_tokens.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
|
153
162
|
end
|
154
|
-
|
155
|
-
final_date_tokens = date_tokens.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
|
156
163
|
[redacted_text.gsub(/#{Regexp.escape(date_text)}/, "<span class='confidentialDate'>#{date_text}</span>"), final_date_tokens]
|
157
164
|
end
|
158
165
|
|
@@ -49,6 +49,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
|
|
49
49
|
text = 'On May 1st, 2000 Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020.'
|
50
50
|
expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, date_text: "*****").dates_html).to eq(["On <span class='confidentialDate'>*****</span> Coca-Cola announced a merger with Pepsi that will happen on <span class='confidentialDate'>*****</span>.", ['May 1st, 2000', 'December 15th, 2020']])
|
51
51
|
end
|
52
|
+
|
53
|
+
it 'surrounds the redacted dates in spans and return the redacted dates from a text #002' do
|
54
|
+
text = '2011年12月31日です。'
|
55
|
+
expect(described_class.new(text: text, language: 'ja', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, date_text: "*****").dates_html).to eq(["<span class='confidentialDate'>*****</span> です。", ["2011年12月31日"]])
|
56
|
+
end
|
52
57
|
end
|
53
58
|
|
54
59
|
describe '#numbers' do
|
@@ -88,6 +93,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
|
|
88
93
|
text = 'It was his 1st) time, not yet his 10th, not even his 2nd. The wood was 3/4" thick. It cost $200,000.'
|
89
94
|
expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: "*****").numbers_html).to eq(["It was his <span class='confidentialNumber'>*****</span>) time, not yet his <span class='confidentialNumber'>*****</span>, not even his <span class='confidentialNumber'>*****</span>. The wood was <span class='confidentialNumber'>*****</span> thick. It cost <span class='confidentialNumber'>*****</span>.", ["1st", "10th,", "2nd", "3/4\"", "$200,000"]])
|
90
95
|
end
|
96
|
+
|
97
|
+
it 'surrounds the redacted numbers in spans and return the redacted numbers from a text #002' do
|
98
|
+
text = 'プロのミニチュアゴルファー2人のサイン。2人の出身国は別であること。(45ポイント;それぞれが別の大陸出身だった場合、5ボーナスポイント。)'
|
99
|
+
expect(described_class.new(text: text, language: 'ja', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: "*****").numbers_html).to eq(["プロのミニチュアゴルファー <span class='confidentialNumber'>*****</span> 人のサイン。 <span class='confidentialNumber'>*****</span> 人の出身国は別であること。( <span class='confidentialNumber'>*****</span> ポイント;それぞれが別の大陸出身だった場合、 <span class='confidentialNumber'>*****</span> ボーナスポイント。)", ["2", "2", "45", "5"]])
|
100
|
+
end
|
91
101
|
end
|
92
102
|
|
93
103
|
describe '#emails' do
|