confidential_info_redactor 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 31951b79282316f6c8b8057a54d5264fa1f2f1ca
4
- data.tar.gz: 3773d4657c9c8605f337dd96abc22c491a72249e
3
+ metadata.gz: aeaf6f5f94253c91e2374def04cea61f7ca39222
4
+ data.tar.gz: 8a40c1c9eed054684d03eb928509e48397fad473
5
5
  SHA512:
6
- metadata.gz: 24a9b492845cd7d2ebd1f6eb7e2aff4452f9f77fe6140f036e7ea3c803ef0f4c7ab3f0c8d871542afb2d2bb6e8f061f0f903f5445acdc555ebe93f729e856631
7
- data.tar.gz: 9e282995ade4f1e7a8b8605c35278df5b4abb0625ed84dcafb17404336ee5ef57549979f071bc506117ef558cbb20197fa83961476d1135a5ad126453e45fc6b
6
+ metadata.gz: abed97c367ac42cf53a3ffeda3700d28fa672e8215af327c9691107589d79a2c3ebe4807f8ddcb74589d770fc290e51422f73f367d491660f0b1902631002340
7
+ data.tar.gz: c190149bd1e64fd19ca1624a3e84be4d6e106441aee2f962a6d2df06005781392bc16ff65a1a023fe0b75f73256840e150a4d4dabac93af865ab35ae7c62983d
@@ -25,19 +25,16 @@ module ConfidentialInfoRedactor
25
25
  initial_extracted_terms = segment.gsub(EXTRACT_REGEX).map { |match| match unless corpus.include?(match.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '')) }.compact
26
26
  initial_extracted_terms.each do |ngram|
27
27
  ngram.split(/[\?\)\(\!\\\/\"\:\;\,]/).each do |t|
28
- extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip)
28
+ if corpus.include?(t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0]) && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'the' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[0] != 'deutsche' && t.downcase.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ').length.eql?(2)
29
+ extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip.split(' ')[1] unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip.split(' ')[1])
30
+ else
31
+ extracted_terms << t.gsub(/[\?\)\(\!\\\/\"\:\;\,]/, '').gsub(/\'$/, '').gsub(/\.\z/, '').strip unless corpus.include?(t.downcase.gsub(/[\?\.\)\(\!\\\/\"\:\;]/, '').gsub(/\'$/, '').strip)
32
+ end
29
33
  end
30
34
  end
31
35
  end
32
36
 
33
- if language.eql?('de')
34
- extracted_terms.delete_if do |token|
35
- corpus.include?(token.split(' ')[0].downcase.strip) &&
36
- token.split(' ')[0].downcase.strip != 'deutsche'
37
- end.uniq.reject(&:empty?)
38
- else
39
- extracted_terms.uniq.reject(&:empty?)
40
- end
37
+ extracted_terms.uniq.reject(&:empty?)
41
38
  end
42
39
  end
43
40
  end
@@ -1,3 +1,3 @@
1
1
  module ConfidentialInfoRedactor
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
@@ -86,7 +86,7 @@ RSpec.describe ConfidentialInfoRedactor::Extractor do
86
86
 
87
87
  Don’t forget to use your imagination and creativity!
88
88
  EOF
89
- expect(described_class.new(text: text).extract).to eq(["Putter King Miniature Golf Scavenger Hunt", "Putter King", "Annual Miniature Golf Scavenger Hunt", "The Official List", "Nostalgic Miniature Golf Obstacles", "Putter King Hole Design Contest", "World Heritage Site", "PGA", "iTunes", "Gift Card", "Putter King Scavenger Hunt Trophy", "Engraved Crystal Trophy", "Picture Coming Soon", "The Putter King", "The U.S. Government", "Putter King Scavenger Hunt Submission", "YouTube", "Flickr", "Picasa", "Photobucket"])
89
+ expect(described_class.new(text: text).extract).to eq(["Putter King Miniature Golf Scavenger Hunt", "Annual Miniature Golf Scavenger Hunt", "The Official List", "Nostalgic Miniature Golf Obstacles", "Putter King Hole Design Contest", "World Heritage Site", "PGA", "iTunes", "Putter King Scavenger Hunt Trophy", "Engraved Crystal Trophy", "Picture Coming Soon", "The Putter King", "The U.S. Government", "Putter King Scavenger Hunt Submission", "YouTube", "Flickr", "Picasa", "Photobucket"])
90
90
  end
91
91
 
92
92
  it 'extracts the proper nouns from a text #007' do
@@ -98,6 +98,11 @@ RSpec.describe ConfidentialInfoRedactor::Extractor do
98
98
  text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
99
99
  expect(described_class.new(text: text, language: 'en').extract).to eq(["Coca-Cola", "Pepsi", "John Smith"])
100
100
  end
101
+
102
+ it 'extracts the proper nouns from a text #009' do
103
+ text = 'Then Peter went to the store.'
104
+ expect(described_class.new(text: text, language: 'en').extract).to eq(["Peter"])
105
+ end
101
106
  end
102
107
 
103
108
  context 'German (de)' do
@@ -139,7 +139,7 @@ RSpec.describe ConfidentialInfoRedactor::Redactor do
139
139
  Don’t forget to use your imagination and creativity!
140
140
  EOF
141
141
  tokens = ConfidentialInfoRedactor::Extractor.new(text: text).extract
142
- expect(described_class.new(text: text, language: 'en', tokens: tokens).redact).to eq(" <redacted>\n\n <redacted> is hosting the <redacted number> <redacted>. So get out your putter and your camera and see if you have what it takes. Are you a King?\n\n <redacted>: <redacted number>) Autographs of <redacted number> professional miniature golfers, each from a different country. (<redacted number> points; <redacted number> bonus points if the professional miniature golfers are also from <redacted number> different continents) <redacted number>) Picture of yourself next to each obstacle in our list of the Top <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points for each obstacle that exactly matches the one pictured in the article) <redacted number>) Build your own full-size miniature golf hole. (<redacted number> points; up to <redacted number> bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole) <redacted number>) Video of yourself making a hole-in-one on two consecutive miniature golf holes. The video must be one continuous shot with no editing. (<redacted number> points) <redacted number>) Picture of yourself with the <redacted> mascot. (<redacted number> points; <redacted number> bonus points if you are wearing a <redacted> t-shirt) <redacted number>) Picture of yourself with the completed <redacted> wobblehead. (<redacted number> points; <redacted number> bonus points if the picture is taken at a miniature golf course) <redacted number>) Picture of a completed scorecard from a round of miniature golf. The round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) Picture of completed scorecards from <redacted number> different miniature golf courses. Each round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) Submit an entry to the <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points if your entry gets more than <redacted number> votes) <redacted number>) Screenshot from the <redacted> app showing a 9-hole score below par. (<redacted number> points) <redacted number>) Screenshot from the <redacted> app showing that you have successfully unlocked all of the holes in the game. (<redacted number> points) <redacted number>) Picture of the <redacted> wobblehead at a <redacted>. (<redacted number> points) <redacted number>) Complete and submit the <redacted> ‘Practice Activity’ and ‘Final Project’ for any one of the <redacted> math or physics lessons. (<redacted number> points; <redacted number> bonus points if you complete two lessons) <redacted number>) Picture of yourself with at least <redacted number> different colored miniature golf balls. (<redacted number> points; <redacted number> bonus points for each additional color {limit of <redacted number> bonus points}) <redacted number>) Picture of yourself with a famous golfer or miniature golfer. (<redacted number> points; <redacted number> bonus points if the golfer is on the <redacted> tour AND you are wearing a <redacted> t-shirt in the picture) <redacted number>) Video of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (<redacted number> points) <redacted number>) Video of yourself successfully making a trick miniature golf shot. (<redacted number> points; up to <redacted number> bonus points available depending on the difficulty and complexity of the trick shot)\n\n\n Prizes: <redacted number> <redacted> <redacted>\n\n <redacted>\n (<redacted number> <redacted number> <redacted> - <redacted>)\n\n <redacted> team will judge the scavenger hunt and all decisions will be final. <redacted> is sponsoring it. The scavenger hunt is open to anyone and everyone. The scavenger hunt ends on <redacted date>.\n\n To enter the scavenger hunt, send an email to info AT putterking DOT com with the subject line: \"<redacted>\". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as <redacted>, <redacted>, <redacted>, <redacted>, etc. for your submissions.\n\n By entering the <redacted>, you allow <redacted> to use or link to any of the pictures or videos you submit for advertisements and promotions.\n\n Don’t forget to use your imagination and creativity!\n")
142
+ expect(described_class.new(text: text, language: 'en', tokens: tokens).redact).to eq(" <redacted>\n\n Putter King is hosting the <redacted number> <redacted>. So get out your putter and your camera and see if you have what it takes. Are you a King?\n\n <redacted>: <redacted number>) Autographs of <redacted number> professional miniature golfers, each from a different country. (<redacted number> points; <redacted number> bonus points if the professional miniature golfers are also from <redacted number> different continents) <redacted number>) Picture of yourself next to each obstacle in our list of the Top <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points for each obstacle that exactly matches the one pictured in the article) <redacted number>) Build your own full-size miniature golf hole. (<redacted number> points; up to <redacted number> bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole) <redacted number>) Video of yourself making a hole-in-one on two consecutive miniature golf holes. The video must be one continuous shot with no editing. (<redacted number> points) <redacted number>) Picture of yourself with the Putter King mascot. (<redacted number> points; <redacted number> bonus points if you are wearing a Putter King t-shirt) <redacted number>) Picture of yourself with the completed Putter King wobblehead. (<redacted number> points; <redacted number> bonus points if the picture is taken at a miniature golf course) <redacted number>) Picture of a completed scorecard from a round of miniature golf. The round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) Picture of completed scorecards from <redacted number> different miniature golf courses. Each round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) Submit an entry to the <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points if your entry gets more than <redacted number> votes) <redacted number>) Screenshot from the Putter King app showing a 9-hole score below par. (<redacted number> points) <redacted number>) Screenshot from the Putter King app showing that you have successfully unlocked all of the holes in the game. (<redacted number> points) <redacted number>) Picture of the Putter King wobblehead at a <redacted>. (<redacted number> points) <redacted number>) Complete and submit the Putter King ‘Practice Activity’ and ‘Final Project’ for any one of the Putter King math or physics lessons. (<redacted number> points; <redacted number> bonus points if you complete two lessons) <redacted number>) Picture of yourself with at least <redacted number> different colored miniature golf balls. (<redacted number> points; <redacted number> bonus points for each additional color {limit of <redacted number> bonus points}) <redacted number>) Picture of yourself with a famous golfer or miniature golfer. (<redacted number> points; <redacted number> bonus points if the golfer is on the <redacted> tour AND you are wearing a Putter King t-shirt in the picture) <redacted number>) Video of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (<redacted number> points) <redacted number>) Video of yourself successfully making a trick miniature golf shot. (<redacted number> points; up to <redacted number> bonus points available depending on the difficulty and complexity of the trick shot)\n\n\n Prizes: <redacted number> <redacted> Gift Card\n\n <redacted>\n (<redacted number> <redacted number> <redacted> - <redacted>)\n\n <redacted> team will judge the scavenger hunt and all decisions will be final. <redacted> is sponsoring it. The scavenger hunt is open to anyone and everyone. The scavenger hunt ends on <redacted date>.\n\n To enter the scavenger hunt, send an email to info AT putterking DOT com with the subject line: \"<redacted>\". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as <redacted>, <redacted>, <redacted>, <redacted>, etc. for your submissions.\n\n By entering the <redacted>, you allow Putter King to use or link to any of the pictures or videos you submit for advertisements and promotions.\n\n Don’t forget to use your imagination and creativity!\n")
143
143
  end
144
144
 
145
145
  it 'redacts all confidential information from a text #003' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: confidential_info_redactor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin S. Dias
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-17 00:00:00.000000000 Z
11
+ date: 2015-04-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler