confidential_info_redactor_lite 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5944b7a68fd13e6993315805c0cfdccb59e4fd9d
|
4
|
+
data.tar.gz: bb7a44c64076277287649e006002b7cf396e2043
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d060ed3456c2c8eaeebf8b486d70a9a15d4f35e9c7b8102ebf72a14ed4e3486cdfc0c40a151733fb6054e052145717806d8e391747e1a95dce425690440b8df
|
7
|
+
data.tar.gz: 7d0729c5404c6425f720ef0ba3bd7fe602b35e2e8d94df70be6c27aaf2a93f84f9afc29450ecdfea9d721a9f7b7b152063d0f09fa9e41b818031b6d0185c5cf2
|
@@ -18,7 +18,7 @@ module ConfidentialInfoRedactorLite
|
|
18
18
|
next if initial_extracted_terms.length.eql?(segment.split(' ').length) && !in_corpus?(initial_extracted_terms)
|
19
19
|
search_ngrams(initial_extracted_terms, extracted_terms)
|
20
20
|
end
|
21
|
-
extracted_terms.uniq.reject(&:empty?)
|
21
|
+
extracted_terms.map { |t| t.gsub(/\{\}/, '') }.uniq.reject(&:empty?)
|
22
22
|
end
|
23
23
|
|
24
24
|
private
|
@@ -149,6 +149,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
|
|
149
149
|
text = 'John and Jane Doe'
|
150
150
|
expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["John", "Jane Doe"])
|
151
151
|
end
|
152
|
+
|
153
|
+
it 'extracts the proper nouns from a text #019' do
|
154
|
+
text = 'Westin{}'
|
155
|
+
expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Westin"])
|
156
|
+
end
|
152
157
|
end
|
153
158
|
|
154
159
|
context 'German (de)' do
|