RubyGems - confidential_info_redactor_lite - Versions diffs - 0.0.14 → 0.0.15 - Mend

confidential_info_redactor_lite 0.0.14 → 0.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/lib/confidential_info_redactor_lite/extractor.rb +6 -6
data/lib/confidential_info_redactor_lite/version.rb +1 -1
data/spec/confidential_info_redactor_lite/extractor_spec.rb +16 -1
metadata +1 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 7d546836b7298bd773fad876c9af181000f3867b
-  data.tar.gz: b66109b97cf689dabf3f92fa4445969aa0b24f70
+  metadata.gz: 456160b2c0748728398a7a5804f40389d141a56a
+  data.tar.gz: 29194c0181768c1f35b9558e5934c6133a376a41
 SHA512:
-  metadata.gz: 4c986381225e75100ea16ac1c8c53fc9de21cdfcf0302adb0f3cc55c36a63e6d35b5474e3990f92495b7977cbd3618383517d876dfc11431c25ee178f7aa34ae
-  data.tar.gz: 5084bd86c4c02ae26a7be74850d9c7431c99deb9373c71f0904afcfa63148eaf6bd1fc8f55636d73347f67ae8c3c081d96b6e1340d7f9fe076d0a84b516bc1a2
+  metadata.gz: f8d4a592e035ccadef1c8bd595978d998dfa297064fef08abe396d7b309441b54dfbd5cd02a0aeca231765f25d5316a8d334af5018d3a6244ec40e493db5c653
+  data.tar.gz: 22d0eea27d9a10fb5f42c8076b713995490ba95918761816cdeee4aa43acfb37248133944c4adcb396959f30bb7cbeb69624c8119abc5fea21c419043b4abfa4

data/lib/confidential_info_redactor_lite/extractor.rb CHANGED Viewed

@@ -13,19 +13,19 @@ module ConfidentialInfoRedactorLite
     def extract
       extracted_terms = []
       PragmaticSegmenter::Segmenter.new(text: text, language: language).segment.each do |segment|
-        initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '')) }.compact
+        initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/”/,'').gsub(/\'$/, '')) }.compact
         initial_extracted_terms.each do |ngram|
           ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
-            if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'the' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'deutsche' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2)
-              extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1] unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip.split(' ')[1])
+            if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ')[0] != 'the' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'deutsche' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2)
+              extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ')[1] unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').gsub(/”/,'').strip.split(' ')[1])
             else
               tracker = true
-              unless t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2) && t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1].downcase.eql?('bank')
-                t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').each do |token|
+              unless t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ').length.eql?(2) && t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ')[1].downcase.eql?('bank')
+                t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ').each do |token|
                   tracker = false if corpus.include?(token.downcase)
                 end
               end
-              extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip) || !tracker
+              extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/”/,'').gsub(/\'$/, '').strip) || !tracker
             end
           end
         end

data/lib/confidential_info_redactor_lite/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module ConfidentialInfoRedactorLite
-  VERSION = "0.0.14"
+  VERSION = "0.0.15"
 end

data/spec/confidential_info_redactor_lite/extractor_spec.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 require 'spec_helper'
 RSpec.describe ConfidentialInfoRedactorLite::Extractor do
-  let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to', 'then', 'coming', 'screenshot', 'putter', 'king', 'miniature', 'good', 'bad', 'vs.', 'carbs', 'all', 'natural', 'peanut', 'butter', 'world', 'heritage', 'site', 'gift', 'card', 'engraved', 'crystal', 'trophy'] }
+  let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to', 'then', 'coming', 'screenshot', 'putter', 'king', 'miniature', 'good', 'bad', 'vs.', 'carbs', 'all', 'natural', 'peanut', 'butter', 'world', 'heritage', 'site', 'gift', 'card', 'engraved', 'crystal', 'trophy', 'reducing'] }
   describe '#extract' do
     context 'English (en)' do
       it 'extracts the proper nouns from a text #001' do
@@ -119,6 +119,21 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
         text = 'GOOD CARBS VS. BAD CARBS'
         expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
       end
+      it 'extracts the proper nouns from a text #013' do
+        text = 'Reducing”'
+        expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
+      end
+      it 'extracts the proper nouns from a text #014' do
+        text = '”'
+        expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
+      end
+      it 'extracts the proper nouns from a text #015' do
+        text = '“Reducing'
+        expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
+      end
     end
     context 'German (de)' do

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: confidential_info_redactor_lite
 version: !ruby/object:Gem::Version
-  version: 0.0.14
+  version: 0.0.15
 platform: ruby
 authors:
 - Kevin S. Dias