confidential_info_redactor_lite 0.0.13 → 0.0.14
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d546836b7298bd773fad876c9af181000f3867b
|
4
|
+
data.tar.gz: b66109b97cf689dabf3f92fa4445969aa0b24f70
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c986381225e75100ea16ac1c8c53fc9de21cdfcf0302adb0f3cc55c36a63e6d35b5474e3990f92495b7977cbd3618383517d876dfc11431c25ee178f7aa34ae
|
7
|
+
data.tar.gz: 5084bd86c4c02ae26a7be74850d9c7431c99deb9373c71f0904afcfa63148eaf6bd1fc8f55636d73347f67ae8c3c081d96b6e1340d7f9fe076d0a84b516bc1a2
|
@@ -19,7 +19,13 @@ module ConfidentialInfoRedactorLite
|
|
19
19
|
if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'the' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'deutsche' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2)
|
20
20
|
extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1] unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip.split(' ')[1])
|
21
21
|
else
|
22
|
-
|
22
|
+
tracker = true
|
23
|
+
unless t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2) && t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1].downcase.eql?('bank')
|
24
|
+
t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').each do |token|
|
25
|
+
tracker = false if corpus.include?(token.downcase)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip) || !tracker
|
23
29
|
end
|
24
30
|
end
|
25
31
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe ConfidentialInfoRedactorLite::Extractor do
|
4
|
-
let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to', 'then', 'coming', 'screenshot'] }
|
4
|
+
let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to', 'then', 'coming', 'screenshot', 'putter', 'king', 'miniature', 'good', 'bad', 'vs.', 'carbs', 'all', 'natural', 'peanut', 'butter', 'world', 'heritage', 'site', 'gift', 'card', 'engraved', 'crystal', 'trophy'] }
|
5
5
|
describe '#extract' do
|
6
6
|
context 'English (en)' do
|
7
7
|
it 'extracts the proper nouns from a text #001' do
|
@@ -87,7 +87,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
|
|
87
87
|
|
88
88
|
Don’t forget to use your imagination and creativity!
|
89
89
|
EOF
|
90
|
-
expect(described_class.new(text: text, corpus: corpus).extract).to eq(["
|
90
|
+
expect(described_class.new(text: text, corpus: corpus).extract).to eq(["PGA", "iTunes", "YouTube", "Flickr", "Picasa", "Photobucket"])
|
91
91
|
end
|
92
92
|
|
93
93
|
it 'extracts the proper nouns from a text #007' do
|
@@ -104,6 +104,21 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
|
|
104
104
|
text = 'Then Peter went to the store.'
|
105
105
|
expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(["Peter"])
|
106
106
|
end
|
107
|
+
|
108
|
+
it 'extracts the proper nouns from a text #010' do
|
109
|
+
text = 'HOW TO COOK VEGETABLES'
|
110
|
+
expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
|
111
|
+
end
|
112
|
+
|
113
|
+
it 'extracts the proper nouns from a text #011' do
|
114
|
+
text = 'All Natural Peanut Butter'
|
115
|
+
expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
|
116
|
+
end
|
117
|
+
|
118
|
+
it 'extracts the proper nouns from a text #012' do
|
119
|
+
text = 'GOOD CARBS VS. BAD CARBS'
|
120
|
+
expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
|
121
|
+
end
|
107
122
|
end
|
108
123
|
|
109
124
|
context 'German (de)' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: confidential_info_redactor_lite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|