confidential_info_redactor 0.0.14 → 0.0.15
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cdee56aab509c61bb5746e12b5649f9dad2ad3ce
|
4
|
+
data.tar.gz: 3e47c212345e9986a53bdc6d1d38fc1c5c974341
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7ff4ea744c2b5ec7fc2927efa23f0c0894973bf01cd4a2a39d21ea8575c274ec6e93d0a7e18f6f23bc28d9581b26cbea1de40a2d182eee2fe0cfd3be1ff28b9
|
7
|
+
data.tar.gz: e0ed6e7ac500a44fe84393ae2554623f6571f52141cf4e588e02de796dc752f4b0cdbd9d85077cac3f2d3dd2d9c20df7ff31af50f4fa4ac59363b91319c2f6af
|
@@ -23,6 +23,7 @@ module ConfidentialInfoRedactor
|
|
23
23
|
extracted_terms = []
|
24
24
|
PragmaticSegmenter::Segmenter.new(text: text, language: language).segment.each do |segment|
|
25
25
|
initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/”/,'').gsub(/\'$/, '')) }.compact
|
26
|
+
next if initial_extracted_terms.length.eql?(segment.split(' ').length)
|
26
27
|
initial_extracted_terms.each do |ngram|
|
27
28
|
ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
|
28
29
|
next if !(t !~ /.*\d+.*/)
|
@@ -134,10 +134,15 @@ RSpec.describe ConfidentialInfoRedactor::Extractor do
|
|
134
134
|
expect(described_class.new(text: text, language: 'en').extract).to eq([])
|
135
135
|
end
|
136
136
|
|
137
|
-
it 'extracts the proper nouns from a text #
|
137
|
+
it 'extracts the proper nouns from a text #016' do
|
138
138
|
text = 'Corrigendum to Council Regulation (EC) No 85/2009 of 19 January 2009 amending Regulation (EC) No 1083/2006 laying down general provisions on the European Regional Development Fund, the European Social Fund and the Cohesion Fund concerning certain provisions relating to financial management'
|
139
139
|
expect(described_class.new(text: text, language: 'en').extract).to eq(["Corrigendum"])
|
140
140
|
end
|
141
|
+
|
142
|
+
it 'extracts the proper nouns from a text #017' do
|
143
|
+
text = 'John'
|
144
|
+
expect(described_class.new(text: text, language: 'en').extract).to eq([])
|
145
|
+
end
|
141
146
|
end
|
142
147
|
|
143
148
|
context 'German (de)' do
|