confidential_info_redactor_lite 0.0.13 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6b37f8797a8ca98088c77462fd3cc34f7bfbb9ef
4
- data.tar.gz: ef75c18fada7655ab22d171c1741a9accce73b92
3
+ metadata.gz: 7d546836b7298bd773fad876c9af181000f3867b
4
+ data.tar.gz: b66109b97cf689dabf3f92fa4445969aa0b24f70
5
5
  SHA512:
6
- metadata.gz: f63e4997217c4e89dd432427d3a3a1d734cf5c2703fbc03343c90444948e41d618b4b4f014c8ce167bec4e5899a70fc20e5adcea14e3eda63f5328dd0c3877b9
7
- data.tar.gz: 110de24454372d0572f6a411ca4fd14a9471fb152509d9aecc6707a071c392307bda310ed058b82535755a422e9e59ea15761ca4662f259206ddd9ddea795156
6
+ metadata.gz: 4c986381225e75100ea16ac1c8c53fc9de21cdfcf0302adb0f3cc55c36a63e6d35b5474e3990f92495b7977cbd3618383517d876dfc11431c25ee178f7aa34ae
7
+ data.tar.gz: 5084bd86c4c02ae26a7be74850d9c7431c99deb9373c71f0904afcfa63148eaf6bd1fc8f55636d73347f67ae8c3c081d96b6e1340d7f9fe076d0a84b516bc1a2
@@ -19,7 +19,13 @@ module ConfidentialInfoRedactorLite
19
19
  if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'the' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'deutsche' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2)
20
20
  extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1] unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip.split(' ')[1])
21
21
  else
22
- extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip)
22
+ tracker = true
23
+ unless t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2) && t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1].downcase.eql?('bank')
24
+ t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').each do |token|
25
+ tracker = false if corpus.include?(token.downcase)
26
+ end
27
+ end
28
+ extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip) || !tracker
23
29
  end
24
30
  end
25
31
  end
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "0.0.13"
2
+ VERSION = "0.0.14"
3
3
  end
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe ConfidentialInfoRedactorLite::Extractor do
4
- let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to', 'then', 'coming', 'screenshot'] }
4
+ let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to', 'then', 'coming', 'screenshot', 'putter', 'king', 'miniature', 'good', 'bad', 'vs.', 'carbs', 'all', 'natural', 'peanut', 'butter', 'world', 'heritage', 'site', 'gift', 'card', 'engraved', 'crystal', 'trophy'] }
5
5
  describe '#extract' do
6
6
  context 'English (en)' do
7
7
  it 'extracts the proper nouns from a text #001' do
@@ -87,7 +87,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
87
87
 
88
88
  Don’t forget to use your imagination and creativity!
89
89
  EOF
90
- expect(described_class.new(text: text, corpus: corpus).extract).to eq(["Putter King Miniature Golf Scavenger Hunt", "Putter King", "Annual Miniature Golf Scavenger Hunt", "The Official List", "Nostalgic Miniature Golf Obstacles", "Putter King Hole Design Contest", "World Heritage Site", "PGA", "iTunes", "Gift Card", "Putter King Scavenger Hunt Trophy", "Engraved Crystal Trophy", "Picture Coming Soon", "The Putter King", "The U.S. Government", "Putter King Scavenger Hunt Submission", "YouTube", "Flickr", "Picasa", "Photobucket"])
90
+ expect(described_class.new(text: text, corpus: corpus).extract).to eq(["PGA", "iTunes", "YouTube", "Flickr", "Picasa", "Photobucket"])
91
91
  end
92
92
 
93
93
  it 'extracts the proper nouns from a text #007' do
@@ -104,6 +104,21 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
104
104
  text = 'Then Peter went to the store.'
105
105
  expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(["Peter"])
106
106
  end
107
+
108
+ it 'extracts the proper nouns from a text #010' do
109
+ text = 'HOW TO COOK VEGETABLES'
110
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
111
+ end
112
+
113
+ it 'extracts the proper nouns from a text #011' do
114
+ text = 'All Natural Peanut Butter'
115
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
116
+ end
117
+
118
+ it 'extracts the proper nouns from a text #012' do
119
+ text = 'GOOD CARBS VS. BAD CARBS'
120
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
121
+ end
107
122
  end
108
123
 
109
124
  context 'German (de)' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-22 00:00:00.000000000 Z
11
+ date: 2015-04-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler