confidential_info_redactor 0.0.14 → 0.0.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f372184466d5b6452bc24fcba0cc4b7f6754d5c8
4
- data.tar.gz: e6cc96f50cb2ff83e4d74b5e9fb201c2c0f93a17
3
+ metadata.gz: cdee56aab509c61bb5746e12b5649f9dad2ad3ce
4
+ data.tar.gz: 3e47c212345e9986a53bdc6d1d38fc1c5c974341
5
5
  SHA512:
6
- metadata.gz: 3752768a77fd3514e3717363c9c23cfccfe62b6acbdba59a540d9eb1506a55573a582dc581b7f433c94822c43cbfc9d82d27ca754a0b51751306f8cfdc9d8ea7
7
- data.tar.gz: 2de4f5514ea01869ae0f552d9bdefeba79c60c79cbb62167644f47c2a3a3d0213fb546c5c70fd3466e0afd8d421a3c712e163a6cfbc3e82629e44e8501798d7a
6
+ metadata.gz: f7ff4ea744c2b5ec7fc2927efa23f0c0894973bf01cd4a2a39d21ea8575c274ec6e93d0a7e18f6f23bc28d9581b26cbea1de40a2d182eee2fe0cfd3be1ff28b9
7
+ data.tar.gz: e0ed6e7ac500a44fe84393ae2554623f6571f52141cf4e588e02de796dc752f4b0cdbd9d85077cac3f2d3dd2d9c20df7ff31af50f4fa4ac59363b91319c2f6af
@@ -23,6 +23,7 @@ module ConfidentialInfoRedactor
23
23
  extracted_terms = []
24
24
  PragmaticSegmenter::Segmenter.new(text: text, language: language).segment.each do |segment|
25
25
  initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/”/,'').gsub(/\'$/, '')) }.compact
26
+ next if initial_extracted_terms.length.eql?(segment.split(' ').length)
26
27
  initial_extracted_terms.each do |ngram|
27
28
  ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
28
29
  next if !(t !~ /.*\d+.*/)
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactor
2
- VERSION = "0.0.14"
2
+ VERSION = "0.0.15"
3
3
  end
@@ -134,10 +134,15 @@ RSpec.describe ConfidentialInfoRedactor::Extractor do
134
134
  expect(described_class.new(text: text, language: 'en').extract).to eq([])
135
135
  end
136
136
 
137
- it 'extracts the proper nouns from a text #015' do
137
+ it 'extracts the proper nouns from a text #016' do
138
138
  text = 'Corrigendum to Council Regulation (EC) No 85/2009 of 19 January 2009 amending Regulation (EC) No 1083/2006 laying down general provisions on the European Regional Development Fund, the European Social Fund and the Cohesion Fund concerning certain provisions relating to financial management'
139
139
  expect(described_class.new(text: text, language: 'en').extract).to eq(["Corrigendum"])
140
140
  end
141
+
142
+ it 'extracts the proper nouns from a text #017' do
143
+ text = 'John'
144
+ expect(described_class.new(text: text, language: 'en').extract).to eq([])
145
+ end
141
146
  end
142
147
 
143
148
  context 'German (de)' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.14
4
+ version: 0.0.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias