confidential_info_redactor 0.0.13 → 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d698521a92a28fa94c1cd8f2c6317b4249a0a19d
4
- data.tar.gz: ab825e72bb74de93b72efaac0364f4b01ba7b3fb
3
+ metadata.gz: f372184466d5b6452bc24fcba0cc4b7f6754d5c8
4
+ data.tar.gz: e6cc96f50cb2ff83e4d74b5e9fb201c2c0f93a17
5
5
  SHA512:
6
- metadata.gz: b91adab393e7137f24f9255a20b9d35cb812fbbb0513b0a3daf01944065d3659ef40864bc8013c7291d210593a60341e1dae898d6c5c97262029d7f0fefc8a5f
7
- data.tar.gz: c554f97b0ce9fe0341ab983641bc347c415d770561acb150ce810236271c485cd0b395d8ebf8eef333372ebbb6719a703adca82cef36855022b8d565c1d196e1
6
+ metadata.gz: 3752768a77fd3514e3717363c9c23cfccfe62b6acbdba59a540d9eb1506a55573a582dc581b7f433c94822c43cbfc9d82d27ca754a0b51751306f8cfdc9d8ea7
7
+ data.tar.gz: 2de4f5514ea01869ae0f552d9bdefeba79c60c79cbb62167644f47c2a3a3d0213fb546c5c70fd3466e0afd8d421a3c712e163a6cfbc3e82629e44e8501798d7a
@@ -25,6 +25,7 @@ module ConfidentialInfoRedactor
25
25
  initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/”/,'').gsub(/\'$/, '')) }.compact
26
26
  initial_extracted_terms.each do |ngram|
27
27
  ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
28
+ next if !(t !~ /.*\d+.*/)
28
29
  if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ')[0] != 'the' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'deutsche' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2)
29
30
  extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/”/,'').gsub(/\.\z/, '').strip.split(' ')[1] unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').gsub(/”/,'').strip.split(' ')[1])
30
31
  else
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactor
2
- VERSION = "0.0.13"
2
+ VERSION = "0.0.14"
3
3
  end
@@ -133,6 +133,11 @@ RSpec.describe ConfidentialInfoRedactor::Extractor do
133
133
  text = '“Reducing'
134
134
  expect(described_class.new(text: text, language: 'en').extract).to eq([])
135
135
  end
136
+
137
+ it 'extracts the proper nouns from a text #015' do
138
+ text = 'Corrigendum to Council Regulation (EC) No 85/2009 of 19 January 2009 amending Regulation (EC) No 1083/2006 laying down general provisions on the European Regional Development Fund, the European Social Fund and the Cohesion Fund concerning certain provisions relating to financial management'
139
+ expect(described_class.new(text: text, language: 'en').extract).to eq(["Corrigendum"])
140
+ end
136
141
  end
137
142
 
138
143
  context 'German (de)' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.13
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias