confidential_info_redactor_lite 0.0.20 → 0.0.21

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 536892192827f07c05d351d34ba72add5dc8b572
4
- data.tar.gz: dd8caa21ae31b337c9d366e4b0dc792ee699d646
3
+ metadata.gz: 3f4f178e9f12c2b63b517cb83c476e4bba526f60
4
+ data.tar.gz: 3724ad4b679f8c56f27a23893d6a6b2e58f47d27
5
5
  SHA512:
6
- metadata.gz: 8cf63069cb1f56353515eb0c0bb4d425c2d15749e119f1cb6477b3c4a456cd5742a52729be1a79fa71669d0e11665fa90aa7a3545f4c9ce0ba0529ce680ffac5
7
- data.tar.gz: b22085b7813a3f3261711b6c5da379d5582fb6ae4d0b3eba0b2cfdb324abd3ee7c9a8218d61af5cf3e6a9e2bc2cd4993b9778be910731eccdf3febad6fd41f00
6
+ metadata.gz: 370acfe9773d924906cd8fc5bb133cab78d605ced79cdfd317dbc7fe78d441c138e1c9df93710b9e4e608487f3647e18c4e1dc4a009fed8ca79762e0afd4d2b6
7
+ data.tar.gz: 51eda51e3bb37741e950be881473335680c2269c08378a42a63e25fc8d632527c0351c72f4a114fab43c97dc963f39c677dc7a6a266c1e65472cba405fc45f5a
@@ -14,7 +14,15 @@ module ConfidentialInfoRedactorLite
14
14
  extracted_terms = []
15
15
  PragmaticSegmenter::Segmenter.new(text: text, language: language).segment.each do |segment|
16
16
  initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/”/,'').gsub(/\'$/, '')) }.compact
17
- next if initial_extracted_terms.length.eql?(segment.split(' ').length)
17
+ in_corpus = true
18
+ initial_extracted_terms.each do |ngram|
19
+ ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
20
+ unless corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip)
21
+ in_corpus = false
22
+ end
23
+ end
24
+ end
25
+ next if initial_extracted_terms.length.eql?(segment.split(' ').length) && in_corpus
18
26
  initial_extracted_terms.each do |ngram|
19
27
  ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
20
28
  next if !(t !~ /.*\d+.*/)
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "0.0.20"
2
+ VERSION = "0.0.21"
3
3
  end
@@ -142,7 +142,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
142
142
 
143
143
  it 'extracts the proper nouns from a text #017' do
144
144
  text = 'John'
145
- expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
145
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['John'])
146
146
  end
147
147
  end
148
148
 
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe ConfidentialInfoRedactorLite::Redactor do
4
- let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please'] }
4
+ let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'prizes'] }
5
5
  let(:en_dow) { %w(monday tuesday wednesday thursday friday saturday sunday) }
6
6
  let(:en_dow_abbr) { %w(mon tu tue tues wed th thu thur thurs fri sat sun) }
7
7
  let(:en_months) { %w(january february march april may june july august september october november december) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.20
4
+ version: 0.0.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias