confidential_info_redactor_lite 0.0.24 → 0.0.25

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9761f67ff135e132a20ef01c222b4f22faf131f1
4
- data.tar.gz: 2d85c726798fc13bf87bcc9089fc866e66f40230
3
+ metadata.gz: 1e6584e5ec77fa94af369f94094b9b299ca07202
4
+ data.tar.gz: dea7c2fa65b217d6b5eb5c6851028b2ca3b78b4f
5
5
  SHA512:
6
- metadata.gz: f6c6c47fc76d60e8e05dc48fd92ebc4321eca1fc72a3d52a3ea9ac9976b4c60dcd400b5ab7587e39980396f49b107590ee2eb34d21d321ba891b247e2ff9fc62
7
- data.tar.gz: f584cb53e3e713b1a2f91011926c85d9ec479786e6fe4ece719f57619ddb70011d54e823641ed65526f3a07c080d8b116e25c0bb429a576e264f20eb1053a0e0
6
+ metadata.gz: 6de045b28a80c2d57889bd601737b12ede3aa3374c5ac4c6e6b95706ce12db478b49006818b47459250937d30e1f6577a5ffb2a5989fb1c2ec1940917ff38251
7
+ data.tar.gz: 9e9ffe65a2fc50b82064fd66a56b537ce4b4943de63102bc2d1a909084781f5a56023fe1d3ecb7391137e07868c0abbe84e302219232b22da950c07276001416
@@ -97,10 +97,14 @@ module ConfidentialInfoRedactorLite
97
97
 
98
98
  def redact_numbers_html(txt)
99
99
  redacted_text = redact_numbers(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<").gsub(/\>\s#{Regexp.escape(email_text)}\s\</, ">#{email_text}<").gsub(/\>\s#{Regexp.escape(hyperlink_text)}\s\</, ">#{hyperlink_text}<")
100
- original_sentence_array = txt.split(' ')
101
- redacted_sentence_array = redacted_text.split(' ')
102
- diff = original_sentence_array - redacted_sentence_array
103
- final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
100
+ if language.eql?('ja')
101
+ final_number_tokens = txt.scan(/[0123456789]+|\d+/)
102
+ else
103
+ original_sentence_array = txt.split(' ')
104
+ redacted_sentence_array = redacted_text.split(' ')
105
+ diff = original_sentence_array - redacted_sentence_array
106
+ final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
107
+ end
104
108
  [redacted_text.gsub(/(?<=[^\>])#{Regexp.escape(number_text)}/, "<span class='confidentialNumber'>#{number_text}</span>"), final_number_tokens]
105
109
  end
106
110
 
@@ -115,44 +119,47 @@ module ConfidentialInfoRedactorLite
115
119
 
116
120
  def redact_dates_html(txt)
117
121
  redacted_text = redact_dates(txt)
118
- original_sentence_array = txt.split(' ')
119
- redacted_sentence_array = redacted_text.split(' ')
120
- diff = original_sentence_array - redacted_sentence_array
121
- date_tokens = []
122
- redacted_text.split(' ').each_with_index do |redacted_token, index|
123
- if redacted_token.gsub(/\./, '') == date_text
124
- original_sentence_array.each_with_index do |original_token, i|
125
- if redacted_sentence_array[index - 1] == original_token &&
126
- diff.include?(original_sentence_array[i + 1]) &&
127
- original_sentence_array[i + 2] == redacted_sentence_array[index + 1]
128
- date_tokens << original_sentence_array[i + 1]
129
- end
130
- if redacted_sentence_array[index - 1] == original_token &&
131
- diff.include?(original_sentence_array[i + 1]) &&
132
- diff.include?(original_sentence_array[i + 2]) &&
133
- original_sentence_array[i + 3] == redacted_sentence_array[index + 1]
134
- date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2]
135
- end
136
- if redacted_sentence_array[index - 1] == original_token &&
137
- diff.include?(original_sentence_array[i + 1]) &&
138
- diff.include?(original_sentence_array[i + 2]) &&
139
- diff.include?(original_sentence_array[i + 3]) &&
140
- original_sentence_array[i + 4] == redacted_sentence_array[index + 1]
141
- date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3]
142
- end
143
- if redacted_sentence_array[index - 1] == original_token &&
144
- diff.include?(original_sentence_array[i + 1]) &&
145
- diff.include?(original_sentence_array[i + 2]) &&
146
- diff.include?(original_sentence_array[i + 3]) &&
147
- diff.include?(original_sentence_array[i + 4]) &&
148
- original_sentence_array[i + 5] == redacted_sentence_array[index + 1]
149
- date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3] + ' ' + original_sentence_array[i + 4]
122
+ if language.eql?('ja')
123
+ final_date_tokens = txt.scan(/[0123456789]+年[0123456789]+月[0123456789]+日|[0123456789]+月[0123456789]+日/)
124
+ else
125
+ original_sentence_array = txt.split(' ')
126
+ redacted_sentence_array = redacted_text.split(' ')
127
+ diff = original_sentence_array - redacted_sentence_array
128
+ date_tokens = []
129
+ redacted_text.split(' ').each_with_index do |redacted_token, index|
130
+ if redacted_token.gsub(/\./, '') == date_text
131
+ original_sentence_array.each_with_index do |original_token, i|
132
+ if redacted_sentence_array[index - 1] == original_token &&
133
+ diff.include?(original_sentence_array[i + 1]) &&
134
+ original_sentence_array[i + 2] == redacted_sentence_array[index + 1]
135
+ date_tokens << original_sentence_array[i + 1]
136
+ end
137
+ if redacted_sentence_array[index - 1] == original_token &&
138
+ diff.include?(original_sentence_array[i + 1]) &&
139
+ diff.include?(original_sentence_array[i + 2]) &&
140
+ original_sentence_array[i + 3] == redacted_sentence_array[index + 1]
141
+ date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2]
142
+ end
143
+ if redacted_sentence_array[index - 1] == original_token &&
144
+ diff.include?(original_sentence_array[i + 1]) &&
145
+ diff.include?(original_sentence_array[i + 2]) &&
146
+ diff.include?(original_sentence_array[i + 3]) &&
147
+ original_sentence_array[i + 4] == redacted_sentence_array[index + 1]
148
+ date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3]
149
+ end
150
+ if redacted_sentence_array[index - 1] == original_token &&
151
+ diff.include?(original_sentence_array[i + 1]) &&
152
+ diff.include?(original_sentence_array[i + 2]) &&
153
+ diff.include?(original_sentence_array[i + 3]) &&
154
+ diff.include?(original_sentence_array[i + 4]) &&
155
+ original_sentence_array[i + 5] == redacted_sentence_array[index + 1]
156
+ date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3] + ' ' + original_sentence_array[i + 4]
157
+ end
150
158
  end
151
159
  end
152
160
  end
161
+ final_date_tokens = date_tokens.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
153
162
  end
154
-
155
- final_date_tokens = date_tokens.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
156
163
  [redacted_text.gsub(/#{Regexp.escape(date_text)}/, "<span class='confidentialDate'>#{date_text}</span>"), final_date_tokens]
157
164
  end
158
165
 
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "0.0.24"
2
+ VERSION = "0.0.25"
3
3
  end
@@ -49,6 +49,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
49
49
  text = 'On May 1st, 2000 Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020.'
50
50
  expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, date_text: "*****").dates_html).to eq(["On <span class='confidentialDate'>*****</span> Coca-Cola announced a merger with Pepsi that will happen on <span class='confidentialDate'>*****</span>.", ['May 1st, 2000', 'December 15th, 2020']])
51
51
  end
52
+
53
+ it 'surrounds the redacted dates in spans and return the redacted dates from a text #002' do
54
+ text = '2011年12月31日です。'
55
+ expect(described_class.new(text: text, language: 'ja', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, date_text: "*****").dates_html).to eq(["<span class='confidentialDate'>*****</span> です。", ["2011年12月31日"]])
56
+ end
52
57
  end
53
58
 
54
59
  describe '#numbers' do
@@ -88,6 +93,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
88
93
  text = 'It was his 1st) time, not yet his 10th, not even his 2nd. The wood was 3/4" thick. It cost $200,000.'
89
94
  expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: "*****").numbers_html).to eq(["It was his <span class='confidentialNumber'>*****</span>) time, not yet his <span class='confidentialNumber'>*****</span>, not even his <span class='confidentialNumber'>*****</span>. The wood was <span class='confidentialNumber'>*****</span> thick. It cost <span class='confidentialNumber'>*****</span>.", ["1st", "10th,", "2nd", "3/4\"", "$200,000"]])
90
95
  end
96
+
97
+ it 'surrounds the redacted numbers in spans and return the redacted numbers from a text #002' do
98
+ text = 'プロのミニチュアゴルファー2人のサイン。2人の出身国は別であること。(45ポイント;それぞれが別の大陸出身だった場合、5ボーナスポイント。)'
99
+ expect(described_class.new(text: text, language: 'ja', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: "*****").numbers_html).to eq(["プロのミニチュアゴルファー <span class='confidentialNumber'>*****</span> 人のサイン。 <span class='confidentialNumber'>*****</span> 人の出身国は別であること。( <span class='confidentialNumber'>*****</span> ポイント;それぞれが別の大陸出身だった場合、 <span class='confidentialNumber'>*****</span> ボーナスポイント。)", ["2", "2", "45", "5"]])
100
+ end
91
101
  end
92
102
 
93
103
  describe '#emails' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.24
4
+ version: 0.0.25
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias