RubyGems - confidential_info_redactor_lite - Versions diffs - 0.0.7 → 0.0.8 - Mend

confidential_info_redactor_lite 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/lib/confidential_info_redactor_lite/extractor.rb +5 -1
data/lib/confidential_info_redactor_lite/version.rb +1 -1
data/spec/confidential_info_redactor_lite/extractor_spec.rb +7 -2
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ae3ba12bb0731420494dd385bff38b9315ff938d
-  data.tar.gz: 5482bf82d5ea1551b205885e6aaec908f328910f
+  metadata.gz: a00a35159e096f07346d94b3ffd1ebf139ef9561
+  data.tar.gz: adacee6c75993572f2343bfef71fa94b70ab885a
 SHA512:
-  metadata.gz: 187274627b9d905463e45e6c24465cb5d55cf7e8c84207568e3feaa237a6e96d31035e20c21a623e177a3ca8a60c5a157fa25ac878271e853ffcd630bb361164
-  data.tar.gz: c96104ef54fc174b547591e7569c6c404d1d8c6d077e2cd4e87db0673817956050df526104b2b38d920c873a8d993294993d897765988b36362760ffe0af20c4
+  metadata.gz: 8958fef084a98d839334725773f4fa1a5a4ec0de746d0826a02902398ac83184052b9cc12bd9e22bfc819133a33d12b1f8891731801fcc3d49a717cac605711b
+  data.tar.gz: eb78915b7d9f8ccdb57d377b1af1be2afa754a91f12a222a7946b532b590212dd788eefcc3e8f0a808eebd8a63f5017f1df79bbac23c293c51ade05143e82c7e

data/lib/confidential_info_redactor_lite/extractor.rb CHANGED Viewed

@@ -16,7 +16,11 @@ module ConfidentialInfoRedactorLite
         initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '')) }.compact
         initial_extracted_terms.each do |ngram|
           ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
-            extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip)
+            if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'the'
+              extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1] unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip.split(' ')[1])
+            else
+              extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip)
+            end
           end
         end
       end

data/lib/confidential_info_redactor_lite/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module ConfidentialInfoRedactorLite
-  VERSION = "0.0.7"
+  VERSION = "0.0.8"
 end

data/spec/confidential_info_redactor_lite/extractor_spec.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 require 'spec_helper'
 RSpec.describe ConfidentialInfoRedactorLite::Extractor do
-  let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to'] }
+  let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to', 'then', 'coming', 'screenshot'] }
   describe '#extract' do
     context 'English (en)' do
       it 'extracts the proper nouns from a text #001' do
@@ -87,7 +87,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
           Don’t forget to use your imagination and creativity!
         EOF
-        expect(described_class.new(text: text, corpus: corpus).extract).to eq(["Putter King Miniature Golf Scavenger Hunt", "Putter King", "Annual Miniature Golf Scavenger Hunt", "The Official List", "Nostalgic Miniature Golf Obstacles", "Putter King Hole Design Contest", "Screenshot", "World Heritage Site", "PGA", "iTunes", "Gift Card", "Putter King Scavenger Hunt Trophy", "Engraved Crystal Trophy", "Picture Coming Soon", "The Putter King", "The U.S. Government", "Putter King Scavenger Hunt Submission", "YouTube", "Flickr", "Picasa", "Photobucket"])
+        expect(described_class.new(text: text, corpus: corpus).extract).to eq(["Putter King Miniature Golf Scavenger Hunt", "Putter King", "Annual Miniature Golf Scavenger Hunt", "The Official List", "Nostalgic Miniature Golf Obstacles", "Putter King Hole Design Contest", "World Heritage Site", "PGA", "iTunes", "Gift Card", "Putter King Scavenger Hunt Trophy", "Engraved Crystal Trophy", "The Putter King", "The U.S. Government", "Putter King Scavenger Hunt Submission", "YouTube", "Flickr", "Picasa", "Photobucket"])
       end
       it 'extracts the proper nouns from a text #007' do
@@ -99,6 +99,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
         text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
         expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(["Coca-Cola", "Pepsi", "John Smith"])
       end
+      it 'extracts the proper nouns from a text #009' do
+        text = 'Then Peter went to the store.'
+        expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(["Peter"])
+      end
     end
     context 'German (de)' do

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: confidential_info_redactor_lite
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.8
 platform: ruby
 authors:
 - Kevin S. Dias
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-04-17 00:00:00.000000000 Z
+date: 2015-04-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler