confidential_info_redactor_lite 0.0.24 → 0.0.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9761f67ff135e132a20ef01c222b4f22faf131f1
4
- data.tar.gz: 2d85c726798fc13bf87bcc9089fc866e66f40230
3
+ metadata.gz: 1e6584e5ec77fa94af369f94094b9b299ca07202
4
+ data.tar.gz: dea7c2fa65b217d6b5eb5c6851028b2ca3b78b4f
5
5
  SHA512:
6
- metadata.gz: f6c6c47fc76d60e8e05dc48fd92ebc4321eca1fc72a3d52a3ea9ac9976b4c60dcd400b5ab7587e39980396f49b107590ee2eb34d21d321ba891b247e2ff9fc62
7
- data.tar.gz: f584cb53e3e713b1a2f91011926c85d9ec479786e6fe4ece719f57619ddb70011d54e823641ed65526f3a07c080d8b116e25c0bb429a576e264f20eb1053a0e0
6
+ metadata.gz: 6de045b28a80c2d57889bd601737b12ede3aa3374c5ac4c6e6b95706ce12db478b49006818b47459250937d30e1f6577a5ffb2a5989fb1c2ec1940917ff38251
7
+ data.tar.gz: 9e9ffe65a2fc50b82064fd66a56b537ce4b4943de63102bc2d1a909084781f5a56023fe1d3ecb7391137e07868c0abbe84e302219232b22da950c07276001416
@@ -97,10 +97,14 @@ module ConfidentialInfoRedactorLite
97
97
 
98
98
  def redact_numbers_html(txt)
99
99
  redacted_text = redact_numbers(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<").gsub(/\>\s#{Regexp.escape(email_text)}\s\</, ">#{email_text}<").gsub(/\>\s#{Regexp.escape(hyperlink_text)}\s\</, ">#{hyperlink_text}<")
100
- original_sentence_array = txt.split(' ')
101
- redacted_sentence_array = redacted_text.split(' ')
102
- diff = original_sentence_array - redacted_sentence_array
103
- final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
100
+ if language.eql?('ja')
101
+ final_number_tokens = txt.scan(/[0123456789]+|\d+/)
102
+ else
103
+ original_sentence_array = txt.split(' ')
104
+ redacted_sentence_array = redacted_text.split(' ')
105
+ diff = original_sentence_array - redacted_sentence_array
106
+ final_number_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
107
+ end
104
108
  [redacted_text.gsub(/(?<=[^\>])#{Regexp.escape(number_text)}/, "<span class='confidentialNumber'>#{number_text}</span>"), final_number_tokens]
105
109
  end
106
110
 
@@ -115,44 +119,47 @@ module ConfidentialInfoRedactorLite
115
119
 
116
120
  def redact_dates_html(txt)
117
121
  redacted_text = redact_dates(txt)
118
- original_sentence_array = txt.split(' ')
119
- redacted_sentence_array = redacted_text.split(' ')
120
- diff = original_sentence_array - redacted_sentence_array
121
- date_tokens = []
122
- redacted_text.split(' ').each_with_index do |redacted_token, index|
123
- if redacted_token.gsub(/\./, '') == date_text
124
- original_sentence_array.each_with_index do |original_token, i|
125
- if redacted_sentence_array[index - 1] == original_token &&
126
- diff.include?(original_sentence_array[i + 1]) &&
127
- original_sentence_array[i + 2] == redacted_sentence_array[index + 1]
128
- date_tokens << original_sentence_array[i + 1]
129
- end
130
- if redacted_sentence_array[index - 1] == original_token &&
131
- diff.include?(original_sentence_array[i + 1]) &&
132
- diff.include?(original_sentence_array[i + 2]) &&
133
- original_sentence_array[i + 3] == redacted_sentence_array[index + 1]
134
- date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2]
135
- end
136
- if redacted_sentence_array[index - 1] == original_token &&
137
- diff.include?(original_sentence_array[i + 1]) &&
138
- diff.include?(original_sentence_array[i + 2]) &&
139
- diff.include?(original_sentence_array[i + 3]) &&
140
- original_sentence_array[i + 4] == redacted_sentence_array[index + 1]
141
- date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3]
142
- end
143
- if redacted_sentence_array[index - 1] == original_token &&
144
- diff.include?(original_sentence_array[i + 1]) &&
145
- diff.include?(original_sentence_array[i + 2]) &&
146
- diff.include?(original_sentence_array[i + 3]) &&
147
- diff.include?(original_sentence_array[i + 4]) &&
148
- original_sentence_array[i + 5] == redacted_sentence_array[index + 1]
149
- date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3] + ' ' + original_sentence_array[i + 4]
122
+ if language.eql?('ja')
123
+ final_date_tokens = txt.scan(/[0123456789]+年[0123456789]+月[0123456789]+日|[0123456789]+月[0123456789]+日/)
124
+ else
125
+ original_sentence_array = txt.split(' ')
126
+ redacted_sentence_array = redacted_text.split(' ')
127
+ diff = original_sentence_array - redacted_sentence_array
128
+ date_tokens = []
129
+ redacted_text.split(' ').each_with_index do |redacted_token, index|
130
+ if redacted_token.gsub(/\./, '') == date_text
131
+ original_sentence_array.each_with_index do |original_token, i|
132
+ if redacted_sentence_array[index - 1] == original_token &&
133
+ diff.include?(original_sentence_array[i + 1]) &&
134
+ original_sentence_array[i + 2] == redacted_sentence_array[index + 1]
135
+ date_tokens << original_sentence_array[i + 1]
136
+ end
137
+ if redacted_sentence_array[index - 1] == original_token &&
138
+ diff.include?(original_sentence_array[i + 1]) &&
139
+ diff.include?(original_sentence_array[i + 2]) &&
140
+ original_sentence_array[i + 3] == redacted_sentence_array[index + 1]
141
+ date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2]
142
+ end
143
+ if redacted_sentence_array[index - 1] == original_token &&
144
+ diff.include?(original_sentence_array[i + 1]) &&
145
+ diff.include?(original_sentence_array[i + 2]) &&
146
+ diff.include?(original_sentence_array[i + 3]) &&
147
+ original_sentence_array[i + 4] == redacted_sentence_array[index + 1]
148
+ date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3]
149
+ end
150
+ if redacted_sentence_array[index - 1] == original_token &&
151
+ diff.include?(original_sentence_array[i + 1]) &&
152
+ diff.include?(original_sentence_array[i + 2]) &&
153
+ diff.include?(original_sentence_array[i + 3]) &&
154
+ diff.include?(original_sentence_array[i + 4]) &&
155
+ original_sentence_array[i + 5] == redacted_sentence_array[index + 1]
156
+ date_tokens << original_sentence_array[i + 1] + ' ' + original_sentence_array[i + 2] + ' ' + original_sentence_array[i + 3] + ' ' + original_sentence_array[i + 4]
157
+ end
150
158
  end
151
159
  end
152
160
  end
161
+ final_date_tokens = date_tokens.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
153
162
  end
154
-
155
- final_date_tokens = date_tokens.map { |token| token[-1].eql?('.') ? token[0...-1] : token }
156
163
  [redacted_text.gsub(/#{Regexp.escape(date_text)}/, "<span class='confidentialDate'>#{date_text}</span>"), final_date_tokens]
157
164
  end
158
165
 
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "0.0.24"
2
+ VERSION = "0.0.25"
3
3
  end
@@ -49,6 +49,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
49
49
  text = 'On May 1st, 2000 Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020.'
50
50
  expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, date_text: "*****").dates_html).to eq(["On <span class='confidentialDate'>*****</span> Coca-Cola announced a merger with Pepsi that will happen on <span class='confidentialDate'>*****</span>.", ['May 1st, 2000', 'December 15th, 2020']])
51
51
  end
52
+
53
+ it 'surrounds the redacted dates in spans and return the redacted dates from a text #002' do
54
+ text = '2011年12月31日です。'
55
+ expect(described_class.new(text: text, language: 'ja', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, date_text: "*****").dates_html).to eq(["<span class='confidentialDate'>*****</span> です。", ["2011年12月31日"]])
56
+ end
52
57
  end
53
58
 
54
59
  describe '#numbers' do
@@ -88,6 +93,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
88
93
  text = 'It was his 1st) time, not yet his 10th, not even his 2nd. The wood was 3/4" thick. It cost $200,000.'
89
94
  expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: "*****").numbers_html).to eq(["It was his <span class='confidentialNumber'>*****</span>) time, not yet his <span class='confidentialNumber'>*****</span>, not even his <span class='confidentialNumber'>*****</span>. The wood was <span class='confidentialNumber'>*****</span> thick. It cost <span class='confidentialNumber'>*****</span>.", ["1st", "10th,", "2nd", "3/4\"", "$200,000"]])
90
95
  end
96
+
97
+ it 'surrounds the redacted numbers in spans and return the redacted numbers from a text #002' do
98
+ text = 'プロのミニチュアゴルファー2人のサイン。2人の出身国は別であること。(45ポイント;それぞれが別の大陸出身だった場合、5ボーナスポイント。)'
99
+ expect(described_class.new(text: text, language: 'ja', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: "*****").numbers_html).to eq(["プロのミニチュアゴルファー <span class='confidentialNumber'>*****</span> 人のサイン。 <span class='confidentialNumber'>*****</span> 人の出身国は別であること。( <span class='confidentialNumber'>*****</span> ポイント;それぞれが別の大陸出身だった場合、 <span class='confidentialNumber'>*****</span> ボーナスポイント。)", ["2", "2", "45", "5"]])
100
+ end
91
101
  end
92
102
 
93
103
  describe '#emails' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.24
4
+ version: 0.0.25
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias