confidential_info_redactor_lite 0.0.20 → 0.0.21
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f4f178e9f12c2b63b517cb83c476e4bba526f60
|
4
|
+
data.tar.gz: 3724ad4b679f8c56f27a23893d6a6b2e58f47d27
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 370acfe9773d924906cd8fc5bb133cab78d605ced79cdfd317dbc7fe78d441c138e1c9df93710b9e4e608487f3647e18c4e1dc4a009fed8ca79762e0afd4d2b6
|
7
|
+
data.tar.gz: 51eda51e3bb37741e950be881473335680c2269c08378a42a63e25fc8d632527c0351c72f4a114fab43c97dc963f39c677dc7a6a266c1e65472cba405fc45f5a
|
@@ -14,7 +14,15 @@ module ConfidentialInfoRedactorLite
|
|
14
14
|
extracted_terms = []
|
15
15
|
PragmaticSegmenter::Segmenter.new(text: text, language: language).segment.each do |segment|
|
16
16
|
initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/”/,'').gsub(/\'$/, '')) }.compact
|
17
|
-
|
17
|
+
in_corpus = true
|
18
|
+
initial_extracted_terms.each do |ngram|
|
19
|
+
ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
|
20
|
+
unless corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip)
|
21
|
+
in_corpus = false
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
next if initial_extracted_terms.length.eql?(segment.split(' ').length) && in_corpus
|
18
26
|
initial_extracted_terms.each do |ngram|
|
19
27
|
ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
|
20
28
|
next if !(t !~ /.*\d+.*/)
|
@@ -142,7 +142,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
|
|
142
142
|
|
143
143
|
it 'extracts the proper nouns from a text #017' do
|
144
144
|
text = 'John'
|
145
|
-
expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
|
145
|
+
expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['John'])
|
146
146
|
end
|
147
147
|
end
|
148
148
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe ConfidentialInfoRedactorLite::Redactor do
|
4
|
-
let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please'] }
|
4
|
+
let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'prizes'] }
|
5
5
|
let(:en_dow) { %w(monday tuesday wednesday thursday friday saturday sunday) }
|
6
6
|
let(:en_dow_abbr) { %w(mon tu tue tues wed th thu thur thurs fri sat sun) }
|
7
7
|
let(:en_months) { %w(january february march april may june july august september october november december) }
|