confidential_info_redactor_lite 1.0.11 → 1.0.12
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 96535d227ea64669725219062661210c94bdb9dc
|
4
|
+
data.tar.gz: 4090e57ce0c991d4d33d1dc37207d0e016410de5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d118b2bf42e77d568d548ff9757a5a009a88d57ce5b1a510143723fe030a8a5f034703ebbfa638bb348aa1f54d3c97c836aaea077ff9f355be19a6418131c607
|
7
|
+
data.tar.gz: eac920b4c54966f04f39116790eb974d7b700eb423737546c1cb30c8b25b38e4d0f530825ed446160e81e4b6ea0a52e5b329041c82f9ac8bb0d6e7e0081ec5f4
|
@@ -2,7 +2,7 @@ module ConfidentialInfoRedactorLite
|
|
2
2
|
# This class extracts proper nouns from a text
|
3
3
|
class Extractor
|
4
4
|
# Rubular: http://rubular.com/r/qE0g4r9zR7
|
5
|
-
EXTRACT_REGEX = /(?<=\s|^|\s\"|\s\“|\s\«|\s\‹|\s\”|\s\»|\s\›)(
|
5
|
+
EXTRACT_REGEX = /(?<=\s|^|\s\"|\s\“|\s\«|\s\‹|\s\”|\s\»|\s\›)(\p{Lu}\S*\s)*\p{Lu}\S*(?=(\s|\.|\z))|(?<=\s|^|\s\"|\s\”|\s\»|\s\›|\s\“|\s\«|\s\‹)[i][A-Z][a-z]+/
|
6
6
|
|
7
7
|
PUNCTUATION_REGEX = /[\?\)\(\!\\\/\"\:\;\,\”\“\«\»\‹\›]/
|
8
8
|
attr_reader :language, :corpus
|
@@ -159,6 +159,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
|
|
159
159
|
text = 'Document No:-'
|
160
160
|
expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Document No"])
|
161
161
|
end
|
162
|
+
|
163
|
+
it 'extracts the proper nouns from a text #021' do
|
164
|
+
text = 'Óscar is the best.'
|
165
|
+
expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Óscar"])
|
166
|
+
end
|
162
167
|
end
|
163
168
|
|
164
169
|
context 'German (de)' do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: confidential_info_redactor_lite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-07-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|