confidential_info_redactor_lite 0.0.20 → 0.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 536892192827f07c05d351d34ba72add5dc8b572
4
- data.tar.gz: dd8caa21ae31b337c9d366e4b0dc792ee699d646
3
+ metadata.gz: 3f4f178e9f12c2b63b517cb83c476e4bba526f60
4
+ data.tar.gz: 3724ad4b679f8c56f27a23893d6a6b2e58f47d27
5
5
  SHA512:
6
- metadata.gz: 8cf63069cb1f56353515eb0c0bb4d425c2d15749e119f1cb6477b3c4a456cd5742a52729be1a79fa71669d0e11665fa90aa7a3545f4c9ce0ba0529ce680ffac5
7
- data.tar.gz: b22085b7813a3f3261711b6c5da379d5582fb6ae4d0b3eba0b2cfdb324abd3ee7c9a8218d61af5cf3e6a9e2bc2cd4993b9778be910731eccdf3febad6fd41f00
6
+ metadata.gz: 370acfe9773d924906cd8fc5bb133cab78d605ced79cdfd317dbc7fe78d441c138e1c9df93710b9e4e608487f3647e18c4e1dc4a009fed8ca79762e0afd4d2b6
7
+ data.tar.gz: 51eda51e3bb37741e950be881473335680c2269c08378a42a63e25fc8d632527c0351c72f4a114fab43c97dc963f39c677dc7a6a266c1e65472cba405fc45f5a
@@ -14,7 +14,15 @@ module ConfidentialInfoRedactorLite
14
14
  extracted_terms = []
15
15
  PragmaticSegmenter::Segmenter.new(text: text, language: language).segment.each do |segment|
16
16
  initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/”/,'').gsub(/\'$/, '')) }.compact
17
- next if initial_extracted_terms.length.eql?(segment.split(' ').length)
17
+ in_corpus = true
18
+ initial_extracted_terms.each do |ngram|
19
+ ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
20
+ unless corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip)
21
+ in_corpus = false
22
+ end
23
+ end
24
+ end
25
+ next if initial_extracted_terms.length.eql?(segment.split(' ').length) && in_corpus
18
26
  initial_extracted_terms.each do |ngram|
19
27
  ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
20
28
  next if !(t !~ /.*\d+.*/)
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "0.0.20"
2
+ VERSION = "0.0.21"
3
3
  end
@@ -142,7 +142,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
142
142
 
143
143
  it 'extracts the proper nouns from a text #017' do
144
144
  text = 'John'
145
- expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq([])
145
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['John'])
146
146
  end
147
147
  end
148
148
 
@@ -1,7 +1,7 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  RSpec.describe ConfidentialInfoRedactorLite::Redactor do
4
- let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please'] }
4
+ let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'prizes'] }
5
5
  let(:en_dow) { %w(monday tuesday wednesday thursday friday saturday sunday) }
6
6
  let(:en_dow_abbr) { %w(mon tu tue tues wed th thu thur thurs fri sat sun) }
7
7
  let(:en_months) { %w(january february march april may june july august september october november december) }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.20
4
+ version: 0.0.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias