confidential_info_redactor_lite 1.0.11 → 1.0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6a5ec9e9eeb82984f87cba0c612404b7a6e9f4b4
4
- data.tar.gz: 5e410055fe45224d70930f7e6c8f2858eeda4bc0
3
+ metadata.gz: 96535d227ea64669725219062661210c94bdb9dc
4
+ data.tar.gz: 4090e57ce0c991d4d33d1dc37207d0e016410de5
5
5
  SHA512:
6
- metadata.gz: 5e3c14787d5013846629bbab16ef5117fe96a0e8d1193be266e1325f7d21ff978100f4af666289d933869f744d7bd0383e7c136f81d6bac733e72dabde40f766
7
- data.tar.gz: ef113dda2f3a5928f1281d0fb96fd9d99bc8d4d8d7bc0c5aa5d227b77e3182b05934e498aa8c7481bb1c80ee51be7a1132c3740207b33e19264c9005bf8d5b77
6
+ metadata.gz: d118b2bf42e77d568d548ff9757a5a009a88d57ce5b1a510143723fe030a8a5f034703ebbfa638bb348aa1f54d3c97c836aaea077ff9f355be19a6418131c607
7
+ data.tar.gz: eac920b4c54966f04f39116790eb974d7b700eb423737546c1cb30c8b25b38e4d0f530825ed446160e81e4b6ea0a52e5b329041c82f9ac8bb0d6e7e0081ec5f4
@@ -2,7 +2,7 @@ module ConfidentialInfoRedactorLite
2
2
  # This class extracts proper nouns from a text
3
3
  class Extractor
4
4
  # Rubular: http://rubular.com/r/qE0g4r9zR7
5
- EXTRACT_REGEX = /(?<=\s|^|\s\"|\s\“|\s\«|\s\‹|\s\”|\s\»|\s\›)([A-Z]\S*\s)*[A-Z]\S*(?=(\s|\.|\z))|(?<=\s|^|\s\"|\s\”|\s\»|\s\›|\s\“|\s\«|\s\‹)[i][A-Z][a-z]+/
5
+ EXTRACT_REGEX = /(?<=\s|^|\s\"|\s\“|\s\«|\s\‹|\s\”|\s\»|\s\›)(\p{Lu}\S*\s)*\p{Lu}\S*(?=(\s|\.|\z))|(?<=\s|^|\s\"|\s\”|\s\»|\s\›|\s\“|\s\«|\s\‹)[i][A-Z][a-z]+/
6
6
 
7
7
  PUNCTUATION_REGEX = /[\?\)\(\!\\\/\"\:\;\,\”\“\«\»\‹\›]/
8
8
  attr_reader :language, :corpus
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "1.0.11"
2
+ VERSION = "1.0.12"
3
3
  end
@@ -159,6 +159,11 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
159
159
  text = 'Document No:-'
160
160
  expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Document No"])
161
161
  end
162
+
163
+ it 'extracts the proper nouns from a text #021' do
164
+ text = 'Óscar is the best.'
165
+ expect(described_class.new(corpus: corpus, language: 'en').extract(text)).to eq(["Óscar"])
166
+ end
162
167
  end
163
168
 
164
169
  context 'German (de)' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.11
4
+ version: 1.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-03-07 00:00:00.000000000 Z
11
+ date: 2016-07-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler