confidential_info_redactor_lite 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 39a1611275052f427c8e1c916cb368b0a7337347
|
4
|
+
data.tar.gz: 528d58ac47b37eb1be8b76f5de0b5ae8518ddb72
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39537bfadcce276750a5e3e1c53a275c125ddd457672063e880832e9e13138054723315bbc6a7d4ae041268009acf27ca1e54168f9b8d384ca31ddba4f34725a
|
7
|
+
data.tar.gz: b18afda080a5f8cc75235d202897576f2e76232b3d0c8903f08063e6aaff59c4149ecb7c62ade7944ef4eeff805c271d5c3bed8fe2f57c907edbae053e409a71
|
@@ -18,7 +18,7 @@ module ConfidentialInfoRedactorLite
|
|
18
18
|
next if initial_extracted_terms.length.eql?(segment.split(' ').length) && !in_corpus?(initial_extracted_terms)
|
19
19
|
search_ngrams(initial_extracted_terms, extracted_terms)
|
20
20
|
end
|
21
|
-
extracted_terms.map { |t| t.gsub(/\{\}/, '') }.uniq.reject(&:empty?)
|
21
|
+
extracted_terms.map { |t| t.gsub(/\{\}/, '') }.delete_if { |t| t.length == 1 }.uniq.reject(&:empty?)
|
22
22
|
end
|
23
23
|
|
24
24
|
private
|
@@ -154,6 +154,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
|
|
154
154
|
text = 'Westin{}'
|
155
155
|
expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Westin"])
|
156
156
|
end
|
157
|
+
|
158
|
+
it 'extracts the proper nouns from a text #020' do
|
159
|
+
text = 'Document No:-'
|
160
|
+
expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Document No"])
|
161
|
+
end
|
157
162
|
end
|
158
163
|
|
159
164
|
context 'German (de)' do
|