confidential_info_redactor_lite 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a00a35159e096f07346d94b3ffd1ebf139ef9561
4
- data.tar.gz: adacee6c75993572f2343bfef71fa94b70ab885a
3
+ metadata.gz: 216e96faca24f56d0c98efd9fe537d443ebf99ee
4
+ data.tar.gz: 79fd2f8c443c6d3875298b860a81ba84b4d68113
5
5
  SHA512:
6
- metadata.gz: 8958fef084a98d839334725773f4fa1a5a4ec0de746d0826a02902398ac83184052b9cc12bd9e22bfc819133a33d12b1f8891731801fcc3d49a717cac605711b
7
- data.tar.gz: eb78915b7d9f8ccdb57d377b1af1be2afa754a91f12a222a7946b532b590212dd788eefcc3e8f0a808eebd8a63f5017f1df79bbac23c293c51ade05143e82c7e
6
+ metadata.gz: f82751b04aa5c32af2de0fc4048a3fa79a82dbf0a9fdbe63b8118371172c62fc0b0d9cffa9eb036e1529e2b66b1ff37ce77e1cb4ee849fe45cff55cfd0b5ab5d
7
+ data.tar.gz: 7b2e643f54eb7bcfd624f16fc64ea2301f99cbad9c19a8b11c10faa1a3bd9282a319fbbd506ece94a3ea42246e8f310337a73ff2a5611632b8d0e9f04aab83e6
@@ -16,7 +16,7 @@ module ConfidentialInfoRedactorLite
16
16
  initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '')) }.compact
17
17
  initial_extracted_terms.each do |ngram|
18
18
  ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
19
- if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'the'
19
+ if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'the' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'deutsche' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2)
20
20
  extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1] unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip.split(' ')[1])
21
21
  else
22
22
  extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip)
@@ -25,14 +25,7 @@ module ConfidentialInfoRedactorLite
25
25
  end
26
26
  end
27
27
 
28
- if language.eql?('de')
29
- extracted_terms.delete_if do |token|
30
- corpus.include?(token.split(' ')[0].downcase.strip) &&
31
- token.split(' ')[0].downcase.strip != 'deutsche'
32
- end.uniq.reject(&:empty?)
33
- else
34
- extracted_terms.uniq.reject(&:empty?)
35
- end
28
+ extracted_terms.uniq.reject(&:empty?)
36
29
  end
37
30
  end
38
31
  end
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactorLite
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.9"
3
3
  end
@@ -87,7 +87,7 @@ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
87
87
 
88
88
  Don’t forget to use your imagination and creativity!
89
89
  EOF
90
- expect(described_class.new(text: text, corpus: corpus).extract).to eq(["Putter King Miniature Golf Scavenger Hunt", "Putter King", "Annual Miniature Golf Scavenger Hunt", "The Official List", "Nostalgic Miniature Golf Obstacles", "Putter King Hole Design Contest", "World Heritage Site", "PGA", "iTunes", "Gift Card", "Putter King Scavenger Hunt Trophy", "Engraved Crystal Trophy", "The Putter King", "The U.S. Government", "Putter King Scavenger Hunt Submission", "YouTube", "Flickr", "Picasa", "Photobucket"])
90
+ expect(described_class.new(text: text, corpus: corpus).extract).to eq(["Putter King Miniature Golf Scavenger Hunt", "Putter King", "Annual Miniature Golf Scavenger Hunt", "The Official List", "Nostalgic Miniature Golf Obstacles", "Putter King Hole Design Contest", "World Heritage Site", "PGA", "iTunes", "Gift Card", "Putter King Scavenger Hunt Trophy", "Engraved Crystal Trophy", "Picture Coming Soon", "The Putter King", "The U.S. Government", "Putter King Scavenger Hunt Submission", "YouTube", "Flickr", "Picasa", "Photobucket"])
91
91
  end
92
92
 
93
93
  it 'extracts the proper nouns from a text #007' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias