RubyGems - confidential_info_redactor_lite - Versions diffs - 0.0.12 → 0.0.13 - Mend

confidential_info_redactor_lite 0.0.12 → 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/confidential_info_redactor_lite/hyperlink.rb +2 -2
data/lib/confidential_info_redactor_lite/redactor.rb +10 -8
data/lib/confidential_info_redactor_lite/version.rb +1 -1
data/spec/confidential_info_redactor_lite/hyperlink_spec.rb +1 -1
data/spec/confidential_info_redactor_lite/redactor_spec.rb +15 -9
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ad118f207a2c77d6576afe2450015ffb53362817
-  data.tar.gz: 5c62ec610e4ca6361fbf977614eb6a8565424451
+  metadata.gz: 6b37f8797a8ca98088c77462fd3cc34f7bfbb9ef
+  data.tar.gz: ef75c18fada7655ab22d171c1741a9accce73b92
 SHA512:
-  metadata.gz: 4da649dbf21c536a4ba5404636297447db7adcafb78e5cbd7375a5d8bd21ee4d087c192214cfe52167c5a0090fd853fdccbe0d9f9aaf7212bb7d422ceb0348ab
-  data.tar.gz: 05c05072e9ebb34139c959f4e1c2351a36a6e48ea7d8bcd4a853cd75eea3ccc57e8fd4d5095e90a86c1fef8d84eb4657117f654457ed88eb1461fcb1962a6c28
+  metadata.gz: f63e4997217c4e89dd432427d3a3a1d734cf5c2703fbc03343c90444948e41d618b4b4f014c8ce167bec4e5899a70fc20e5adcea14e3eda63f5328dd0c3877b9
+  data.tar.gz: 110de24454372d0572f6a411ca4fd14a9471fb152509d9aecc6707a071c392307bda310ed058b82535755a422e9e59ea15761ca4662f259206ddd9ddea795156

data/lib/confidential_info_redactor_lite/hyperlink.rb CHANGED Viewed

@@ -20,9 +20,9 @@ module ConfidentialInfoRedactorLite
       new_string = string.dup
       string.split(/\s+/).each do |token|
         if !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX) && token.include?('">')
-          new_string = new_string.gsub(/#{Regexp.escape(token.split('">')[0].gsub(/\.\z/, ''))}/, ' <redacted> ')
+          new_string = new_string.gsub(/#{Regexp.escape(token.split('">')[0].gsub(/\.\z/, ''))}/, ' <redacted hyperlink> ')
         elsif !(token !~ URI.regexp) && token !~ NON_HYPERLINK_REGEX && !(token !~ HYPERLINK_REGEX)
-          new_string = new_string.gsub(/#{Regexp.escape(token.gsub(/\.\z/, ''))}/, ' <redacted> ')
+          new_string = new_string.gsub(/#{Regexp.escape(token.gsub(/\.\z/, ''))}/, ' <redacted hyperlink> ')
         end
       end
       new_string

data/lib/confidential_info_redactor_lite/redactor.rb CHANGED Viewed

@@ -9,7 +9,7 @@ module ConfidentialInfoRedactorLite
     # Rubular: http://rubular.com/r/mxcj2G0Jfa
     EMAIL_REGEX = /(?<=\A|\s|\()[\w+\-.]+@[a-z\d\-]+(\.[a-z]+)*\.[a-z]+(?=\z|\s|\.|\))/i
-    attr_reader :text, :language, :number_text, :date_text, :token_text, :tokens, :ignore_emails, :ignore_dates, :ignore_numbers, :ignore_hyperlinks, :dow, :dow_abbr, :months, :months_abbr
+    attr_reader :text, :language, :email_text, :hyperlink_text, :number_text, :date_text, :token_text, :tokens, :ignore_emails, :ignore_dates, :ignore_numbers, :ignore_hyperlinks, :dow, :dow_abbr, :months, :months_abbr
     def initialize(text:, dow:, dow_abbr:, months:, months_abbr:, **args)
       @text = text
       @language = args[:language] || 'en'
@@ -17,6 +17,8 @@ module ConfidentialInfoRedactorLite
       @number_text = args[:number_text] || '<redacted number>'
       @date_text = args[:date_text] || '<redacted date>'
       @token_text = args[:token_text] || '<redacted>'
+      @email_text = args[:email_text] || '<redacted email>'
+      @hyperlink_text = args[:hyperlink_text] || '<redacted hyperlink>'
       @ignore_emails = args[:ignore_emails]
       @ignore_dates = args[:ignore_dates]
       @ignore_numbers = args[:ignore_numbers]
@@ -85,16 +87,16 @@ module ConfidentialInfoRedactorLite
     private
     def redact_hyperlinks_html(txt)
-      redacted_text = redact_hyperlinks(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<")
+      redacted_text = redact_hyperlinks(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<").gsub(/\>\s#{Regexp.escape(email_text)}\s\</, ">#{email_text}<").gsub(/\>\s#{Regexp.escape(hyperlink_text)}\s\</, ">#{hyperlink_text}<")
       original_sentence_array = txt.split(' ')
       redacted_sentence_array = redacted_text.split(' ')
       diff = original_sentence_array - redacted_sentence_array
       final_hyperlinks_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
-      [redacted_text.gsub(/(?<=[^\>])#{Regexp.escape(token_text)}/, "<span class='confidentialHyperlinks'>#{token_text}</span>"), final_hyperlinks_tokens]
+      [redacted_text.gsub(/(?<=[^\>])#{Regexp.escape(hyperlink_text)}/, "<span class='confidentialHyperlinks'>#{hyperlink_text}</span>"), final_hyperlinks_tokens]
     end
     def redact_numbers_html(txt)
-      redacted_text = redact_numbers(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<")
+      redacted_text = redact_numbers(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<").gsub(/\>\s#{Regexp.escape(email_text)}\s\</, ">#{email_text}<").gsub(/\>\s#{Regexp.escape(hyperlink_text)}\s\</, ">#{hyperlink_text}<")
       original_sentence_array = txt.split(' ')
       redacted_sentence_array = redacted_text.split(' ')
       diff = original_sentence_array - redacted_sentence_array
@@ -103,12 +105,12 @@ module ConfidentialInfoRedactorLite
     end
     def redact_emails_html(txt)
-      redacted_text = redact_emails(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<")
+      redacted_text = redact_emails(txt).gsub(/\>\s#{Regexp.escape(token_text)}\s\</, ">#{token_text}<").gsub(/\>\s#{Regexp.escape(number_text)}\s\</, ">#{number_text}<").gsub(/\>\s#{Regexp.escape(date_text)}\s\</, ">#{date_text}<").gsub(/\>\s#{Regexp.escape(email_text)}\s\</, ">#{email_text}<").gsub(/\>\s#{Regexp.escape(hyperlink_text)}\s\</, ">#{hyperlink_text}<")
       original_sentence_array = txt.split(' ')
       redacted_sentence_array = redacted_text.split(' ')
       diff = original_sentence_array - redacted_sentence_array
       final_email_tokens = diff.map { |token| token[-1].eql?('.') ? token[0...-1] : token }.map { |token| token[-1].eql?(')') ? token[0...-1] : token }.map { |token| token[0].eql?('(') ? token[1..token.length] : token }
-      [redacted_text.gsub(/(?<=[^\>])#{Regexp.escape(token_text)}/, "<span class='confidentialEmail'>#{token_text}</span>"), final_email_tokens]
+      [redacted_text.gsub(/(?<=[^\>])#{Regexp.escape(email_text)}/, "<span class='confidentialEmail'>#{email_text}</span>"), final_email_tokens]
     end
     def redact_dates_html(txt)
@@ -155,7 +157,7 @@ module ConfidentialInfoRedactorLite
     end
     def redact_hyperlinks(txt)
-      ConfidentialInfoRedactorLite::Hyperlink.new(string: txt).replace.gsub(/<redacted>/, "#{token_text}").gsub(/\s*#{Regexp.escape(token_text)}\s*/, " #{token_text} ").gsub(/#{Regexp.escape(token_text)}\s{1}\.{1}/, "#{token_text}.").gsub(/#{Regexp.escape(token_text)}\s{1}\,{1}/, "#{token_text},")
+      ConfidentialInfoRedactorLite::Hyperlink.new(string: txt).replace.gsub(/<redacted hyperlink>/, "#{hyperlink_text}").gsub(/\s*#{Regexp.escape(hyperlink_text)}\s*/, " #{hyperlink_text} ").gsub(/#{Regexp.escape(hyperlink_text)}\s{1}\.{1}/, "#{hyperlink_text}.").gsub(/#{Regexp.escape(hyperlink_text)}\s{1}\,{1}/, "#{hyperlink_text},")
     end
     def redact_dates(txt)
@@ -167,7 +169,7 @@ module ConfidentialInfoRedactorLite
     end
     def redact_emails(txt)
-      txt.gsub(EMAIL_REGEX, "#{token_text}")
+      txt.gsub(EMAIL_REGEX, "#{email_text}")
     end
     def redact_tokens(txt)

data/lib/confidential_info_redactor_lite/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module ConfidentialInfoRedactorLite
-  VERSION = "0.0.12"
+  VERSION = "0.0.13"
 end

data/spec/confidential_info_redactor_lite/hyperlink_spec.rb CHANGED Viewed

@@ -49,7 +49,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Hyperlink do
     it 'replaces the hyperlinks in a string with regular tokens #001' do
       string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
       ws = described_class.new(string: string)
-      expect(ws.replace).to eq("Today the date is: Jan 1. Visit  <redacted>  or  <redacted> ")
+      expect(ws.replace).to eq("Today the date is: Jan 1. Visit  <redacted hyperlink>  or  <redacted hyperlink> ")
     end
     it 'replaces the hyperlinks in a string with regular tokens #002' do

data/spec/confidential_info_redactor_lite/redactor_spec.rb CHANGED Viewed

@@ -88,33 +88,33 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
   describe '#emails' do
     it 'redacts email addresses from a text #001' do
       text = 'His email is john@gmail.com or you can try k.light@tuv.eu.us.'
-      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails).to eq('His email is <redacted> or you can try <redacted>.')
+      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails).to eq('His email is <redacted email> or you can try <redacted email>.')
     end
     it 'redacts email addresses from a text #002' do
       text = 'His email is (john@gmail.com) or you can try (k.light@tuv.eu.us).'
-      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails).to eq('His email is (<redacted>) or you can try (<redacted>).')
+      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails).to eq('His email is (<redacted email>) or you can try (<redacted email>).')
     end
   end
   describe '#emails_html' do
     it 'surrounds the redacted emails in spans and return the redacted emails from a text #001' do
       text = 'His email is (john@gmail.com) or you can try (k.light@tuv.eu.us).'
-      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, token_text: "*****").emails_html).to eq(["His email is (<span class='confidentialEmail'>*****</span>) or you can try (<span class='confidentialEmail'>*****</span>).", ["john@gmail.com", "k.light@tuv.eu.us"]])
+      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, token_text: "*****").emails_html).to eq(["His email is (<span class='confidentialEmail'><redacted email></span>) or you can try (<span class='confidentialEmail'><redacted email></span>).", ["john@gmail.com", "k.light@tuv.eu.us"]])
     end
   end
   describe '#hyperlinks' do
     it 'redacts hyperlinks from a text #001' do
       text = 'Visit https://www.tm-town.com for more info.'
-      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).hyperlinks).to eq('Visit <redacted> for more info.')
+      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).hyperlinks).to eq('Visit <redacted hyperlink> for more info.')
     end
   end
   describe '#hyperlinks_html' do
     it 'surrounds the redacted hyperlinks in spans and return the redacted hyperlinks from a text #001' do
       text = 'Visit https://www.tm-town.com for more info or https://www.google.com.'
-      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, token_text: "*****").hyperlinks_html).to eq(["Visit <span class='confidentialHyperlinks'>*****</span> for more info or <span class='confidentialHyperlinks'>*****</span>.", ["https://www.tm-town.com", "https://www.google.com"]])
+      expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, token_text: "*****", hyperlink_text: "*****", email_text: "*****").hyperlinks_html).to eq(["Visit <span class='confidentialHyperlinks'>*****</span> for more info or <span class='confidentialHyperlinks'>*****</span>.", ["https://www.tm-town.com", "https://www.google.com"]])
     end
   end
@@ -204,19 +204,19 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
     it 'redacts all confidential information from a text #003' do
       tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
       text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
-      expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('<redacted> announced a merger with <redacted> that will happen on <redacted date> for <redacted number>. Please contact <redacted> at <redacted> or visit <redacted>.')
+      expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('<redacted> announced a merger with <redacted> that will happen on <redacted date> for <redacted number>. Please contact <redacted> at <redacted email> or visit <redacted hyperlink>.')
     end
     it 'redacts all confidential information from a text #004' do
       tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
       text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
-      expect(described_class.new(text: text, language: 'en', tokens: tokens, ignore_numbers: true, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('<redacted> announced a merger with <redacted> that will happen on <redacted date> for $200,000,000,000. Please contact <redacted> at <redacted> or visit <redacted>.')
+      expect(described_class.new(text: text, language: 'en', tokens: tokens, ignore_numbers: true, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('<redacted> announced a merger with <redacted> that will happen on <redacted date> for $200,000,000,000. Please contact <redacted> at <redacted email> or visit <redacted hyperlink>.')
     end
     it 'redacts all confidential information from a text #005' do
       tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
       text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
-      expect(described_class.new(text: text, language: 'en', tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('***** announced a merger with ***** that will happen on ^^redacted date^^ for **redacted number**. Please contact ***** at ***** or visit *****.')
+      expect(described_class.new(text: text, language: 'en', tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', hyperlink_text: '*****', email_text: '*****', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('***** announced a merger with ***** that will happen on ^^redacted date^^ for **redacted number**. Please contact ***** at ***** or visit *****.')
     end
   end
@@ -224,7 +224,13 @@ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
     it 'redacts all confidential information from a text #001' do
       tokens = ['Coca-Cola', 'Pepsi']
       text = 'Coca-Cola announced a merger with Pepsi that will happen on on December 15th, 2020 for $200,000,000,000. Find out more at https://www.merger.com or contact john@merger.com.'
-      expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: '*****', date_text: '*****', token_text: '*****').redact_html).to eq("Coca-Cola announced a merger with Pepsi that will happen on on <span class='confidentialDate'>*****</span> for <span class='confidentialNumber'>*****</span>. Find out more at <span class='confidentialHyperlinks'>*****</span> or contact <span class='confidentialEmail'>*****</span>.")
+      expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, number_text: '*****', hyperlink_text: '*****', email_text: '*****', date_text: '*****', token_text: '*****').redact_html).to eq("Coca-Cola announced a merger with Pepsi that will happen on on <span class='confidentialDate'>*****</span> for <span class='confidentialNumber'>*****</span>. Find out more at <span class='confidentialHyperlinks'>*****</span> or contact <span class='confidentialEmail'>*****</span>.")
+    end
+    it 'redacts all confidential information from a text #002' do
+      tokens = ['Coca-Cola', 'Pepsi']
+      text = 'Coca-Cola announced a merger with Pepsi that will happen on on December 15th, 2020 for $200,000,000,000. Find out more at https://www.merger.com or contact john@merger.com.'
+      expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr, email_text: '**email**', number_text: '**number**', date_text: '**date**', hyperlink_text: '**url**', token_text: '*****').redact).to eq("***** announced a merger with ***** that will happen on on **date** for **number**. Find out more at **url** or contact **email**.")
     end
   end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: confidential_info_redactor_lite
 version: !ruby/object:Gem::Version
-  version: 0.0.12
+  version: 0.0.13
 platform: ruby
 authors:
 - Kevin S. Dias