confidential_info_redactor_lite 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39a1611275052f427c8e1c916cb368b0a7337347
|
4
|
+
data.tar.gz: 528d58ac47b37eb1be8b76f5de0b5ae8518ddb72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39537bfadcce276750a5e3e1c53a275c125ddd457672063e880832e9e13138054723315bbc6a7d4ae041268009acf27ca1e54168f9b8d384ca31ddba4f34725a
|
7
|
+
data.tar.gz: b18afda080a5f8cc75235d202897576f2e76232b3d0c8903f08063e6aaff59c4149ecb7c62ade7944ef4eeff805c271d5c3bed8fe2f57c907edbae053e409a71
|
@@ -18,7 +18,7 @@ module ConfidentialInfoRedactorLite
|
|
18
18
|
next if initial_extracted_terms.length.eql?(segment.split(' ').length) && !in_corpus?(initial_extracted_terms)
|
19
19
|
search_ngrams(initial_extracted_terms, extracted_terms)
|
20
20
|
end
|
21
|
-
extracted_terms.map { |t| t.gsub(/\{\}/, '') }.uniq.reject(&:empty?)
|
21
|
+
extracted_terms.map { |t| t.gsub(/\{\}/, '') }.delete_if { |t| t.length == 1 }.uniq.reject(&:empty?)
|
22
22
|
end
|
23
23
|
|
24
24
|
private
|
@@ -154,6 +154,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
|
|
154
154
|
text = 'Westin{}'
|
155
155
|
expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Westin"])
|
156
156
|
end
|
157
|
+
|
158
|
+
it 'extracts the proper nouns from a text #020' do
|
159
|
+
text = 'Document No:-'
|
160
|
+
expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Document No"])
|
161
|
+
end
|
157
162
|
end
|
158
163
|
|
159
164
|
context 'German (de)' do
|