confidential_info_redactor_lite 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,164 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ConfidentialInfoRedactorLite::Redactor do
4
+ let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please'] }
5
+ let(:en_dow) { %w(monday tuesday wednesday thursday friday saturday sunday) }
6
+ let(:en_dow_abbr) { %w(mon tu tue tues wed th thu thur thurs fri sat sun) }
7
+ let(:en_months) { %w(january february march april may june july august september october november december) }
8
+ let(:en_month_abbr) { %w(jan feb mar apr jun jul aug sep sept oct nov dec) }
9
+
10
+ describe '#dates' do
11
+ it 'redacts dates from a text #001' do
12
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000.'
13
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).dates).to eq('Coca-Cola announced a merger with Pepsi that will happen on <redacted date> for $200,000,000,000.')
14
+ end
15
+
16
+ it 'redacts dates from a text #002' do
17
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020.'
18
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).dates).to eq('Coca-Cola announced a merger with Pepsi that will happen on <redacted date>.')
19
+ end
20
+
21
+ it 'redacts dates from a text #003' do
22
+ text = 'December 5, 2010 - Coca-Cola announced a merger with Pepsi.'
23
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).dates).to eq('<redacted date> - Coca-Cola announced a merger with Pepsi.')
24
+ end
25
+
26
+ it 'redacts dates from a text #004' do
27
+ text = 'The scavenger hunt ends on Dec. 31st, 2011.'
28
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).dates).to eq('The scavenger hunt ends on <redacted date>.')
29
+ end
30
+ end
31
+
32
+ describe '#numbers' do
33
+ it 'redacts numbers from a text #001' do
34
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on <redacted date> for $200,000,000,000.'
35
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers).to eq('Coca-Cola announced a merger with Pepsi that will happen on <redacted date> for <redacted number>.')
36
+ end
37
+
38
+ it 'redacts numbers from a text #002' do
39
+ text = '200 years ago.'
40
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers).to eq('<redacted number> years ago.')
41
+ end
42
+
43
+ it 'redacts numbers from a text #003' do
44
+ text = 'It was his 1st time, not yet his 10th, not even his 2nd. The wood was 3/4" thick.'
45
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).numbers).to eq('It was his <redacted number> time, not yet his <redacted number>, not even his <redacted number>. The wood was <redacted number> thick.')
46
+ end
47
+ end
48
+
49
+ describe '#emails' do
50
+ it 'redacts email addresses from a text #001' do
51
+ text = 'His email is john@gmail.com or you can try k.light@tuv.eu.us.'
52
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).emails).to eq('His email is <redacted> or you can try <redacted>.')
53
+ end
54
+ end
55
+
56
+ describe '#hyperlinks' do
57
+ it 'redacts hyperlinks from a text #001' do
58
+ text = 'Visit https://www.tm-town.com for more info.'
59
+ expect(described_class.new(text: text, language: 'en', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).hyperlinks).to eq('Visit <redacted> for more info.')
60
+ end
61
+ end
62
+
63
+ describe '#proper_nouns' do
64
+ it 'redacts tokens from a text #001' do
65
+ tokens = ['Coca-Cola', 'Pepsi']
66
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on on December 15th, 2020 for $200,000,000,000.'
67
+ expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).proper_nouns).to eq('<redacted> announced a merger with <redacted> that will happen on on December 15th, 2020 for $200,000,000,000.')
68
+ end
69
+
70
+ it 'redacts tokens from a text #002' do
71
+ tokens = ['Coca-Cola', 'Pepsi']
72
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on on December 15th, 2020 for $200,000,000,000.'
73
+ expect(described_class.new(text: text, language: 'en', tokens: tokens, token_text: '*****', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).proper_nouns).to eq('***** announced a merger with ***** that will happen on on December 15th, 2020 for $200,000,000,000.')
74
+ end
75
+ end
76
+
77
+ describe '#redact' do
78
+ it 'redacts all confidential information from a text #001' do
79
+ tokens = ['Coca-Cola', 'Pepsi']
80
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on on December 15th, 2020 for $200,000,000,000.'
81
+ expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('<redacted> announced a merger with <redacted> that will happen on on <redacted date> for <redacted number>.')
82
+ end
83
+
84
+ it 'redacts all confidential information from a text #002' do
85
+ text = <<-EOF
86
+ Putter King Miniature Golf Scavenger Hunt
87
+
88
+ Putter King is hosting the 1st Annual Miniature Golf Scavenger Hunt. So get out your putter and your camera and see if you have what it takes. Are you a King?
89
+
90
+ The Official List:
91
+
92
+ #1) Autographs of 2 professional miniature golfers, each from a different country. (45 points; 5 bonus points if the professional miniature golfers are also from 2 different continents)
93
+
94
+ #2) Picture of yourself next to each obstacle in our list of the Top 10 Nostalgic Miniature Golf Obstacles. (120 points; 20 bonus points for each obstacle that exactly matches the one pictured in the article)
95
+
96
+ #3) Build your own full-size miniature golf hole. (75 points; up to 100 bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole)
97
+
98
+ #4) Video of yourself making a hole-in-one on two consecutive miniature golf holes. The video must be one continuous shot with no editing. (60 points)
99
+
100
+ #5) Picture of yourself with the Putter King mascot. (50 points; 15 bonus points if you are wearing a Putter King t-shirt)
101
+
102
+ #6) Picture of yourself with the completed Putter King wobblehead. (15 points; 15 bonus points if the picture is taken at a miniature golf course)
103
+
104
+ #7) Picture of a completed scorecard from a round of miniature golf. The round of golf must have taken place after the start of this scavenger hunt. (10 points)
105
+
106
+ #8) Picture of completed scorecards from 5 different miniature golf courses. Each round of golf must have taken place after the start of this scavenger hunt. (35 points)
107
+
108
+ #9) Submit an entry to the 2011 Putter King Hole Design Contest. (60 points; 40 bonus points if your entry gets more than 100 votes)
109
+
110
+ #10) Screenshot from the Putter King app showing a 9-hole score below par. (10 points)
111
+
112
+ #11) Screenshot from the Putter King app showing that you have successfully unlocked all of the holes in the game. (45 points)
113
+
114
+ #12) Picture of the Putter King wobblehead at a World Heritage Site. (55 points)
115
+
116
+ #13) Complete and submit the Putter King ‘Practice Activity’ and ‘Final Project’ for any one of the Putter King math or physics lessons. (40 points; 20 bonus points if you complete two lessons)
117
+
118
+ #14) Picture of yourself with at least 6 different colored miniature golf balls. (10 points; 2 bonus points for each additional color {limit of 10 bonus points})
119
+
120
+ #15) Picture of yourself with a famous golfer or miniature golfer. (15 points; 150 bonus points if the golfer is on the PGA tour AND you are wearing a Putter King t-shirt in the picture)
121
+
122
+ #16) Video of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (30 points)
123
+
124
+ #17) Video of yourself successfully making a trick miniature golf shot. (40 points; up to 100 bonus points available depending on the difficulty and complexity of the trick shot)
125
+
126
+
127
+ Prizes:
128
+
129
+ $100 iTunes Gift Card
130
+
131
+ Putter King Scavenger Hunt Trophy
132
+ (6 3/4" Engraved Crystal Trophy - Picture Coming Soon)
133
+
134
+ The Putter King team will judge the scavenger hunt and all decisions will be final. The U.S. Government is sponsoring it. The scavenger hunt is open to anyone and everyone. The scavenger hunt ends on Dec. 31st, 2011.
135
+
136
+ To enter the scavenger hunt, send an email to info AT putterking DOT com with the subject line: "Putter King Scavenger Hunt Submission". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as YouTube, Flickr, Picasa, Photobucket, etc. for your submissions.
137
+
138
+ By entering the Putter King Miniature Golf Scavenger Hunt, you allow Putter King to use or link to any of the pictures or videos you submit for advertisements and promotions.
139
+
140
+ Don’t forget to use your imagination and creativity!
141
+ EOF
142
+ tokens = ConfidentialInfoRedactorLite::Extractor.new(text: text, corpus: corpus).extract
143
+ expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq(" <redacted>\n\n <redacted> is hosting the <redacted number> <redacted>. So get out your putter and your camera and see if you have what it takes. Are you a <redacted>?\n\n <redacted>: <redacted number>) <redacted> of <redacted number> professional miniature golfers, each from a different country. (<redacted number> points; <redacted number> bonus points if the professional miniature golfers are also from <redacted number> different continents) <redacted number>) <redacted> of yourself next to each obstacle in our list of the <redacted>p <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points for each obstacle that exactly matches the one pictured in the article) <redacted number>) <redacted> your own full-size miniature golf hole. (<redacted number> points; up to <redacted number> bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole) <redacted number>) <redacted> of yourself making a hole-in-one on two consecutive miniature golf holes. <redacted> video must be one continuous shot with no editing. (<redacted number> points) <redacted number>) <redacted> of yourself with the <redacted> mascot. (<redacted number> points; <redacted number> bonus points if you are wearing a <redacted> t-shirt) <redacted number>) <redacted> of yourself with the completed <redacted> wobblehead. (<redacted number> points; <redacted number> bonus points if the picture is taken at a miniature golf course) <redacted number>) <redacted> of a completed scorecard from a round of miniature golf. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> of completed scorecards from <redacted number> different miniature golf courses. <redacted> round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) <redacted> an entry to the <redacted number> <redacted>. (<redacted number> points; <redacted number> bonus points if your entry gets more than <redacted number> votes) <redacted number>) <redacted> from the <redacted> app showing a 9-hole score below par. (<redacted number> points) <redacted number>) <redacted> from the <redacted> app showing that you have successfully unlocked all of the holes in the game. (<redacted number> points) <redacted number>) <redacted> of the <redacted> wobblehead at a <redacted>. (<redacted number> points) <redacted number>) <redacted> and submit the <redacted> ‘Practice <redacted>’ and ‘Final <redacted>’ for any one of the <redacted> math or physics lessons. (<redacted number> points; <redacted number> bonus points if you complete two lessons) <redacted number>) <redacted> of yourself with at least <redacted number> different colored miniature golf balls. (<redacted number> points; <redacted number> bonus points for each additional color {limit of <redacted number> bonus points}) <redacted number>) <redacted> of yourself with a famous golfer or miniature golfer. (<redacted number> points; <redacted number> bonus points if the golfer is on the <redacted> tour <redacted> you are wearing a <redacted> t-shirt in the picture) <redacted number>) <redacted> of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (<redacted number> points) <redacted number>) <redacted> of yourself successfully making a trick miniature golf shot. (<redacted number> points; up to <redacted number> bonus points available depending on the difficulty and complexity of the trick shot)\n\n\n <redacted>: <redacted number> <redacted> <redacted>\n\n <redacted>\n (<redacted number> <redacted number> <redacted> - <redacted>)\n\n <redacted> team will judge the scavenger hunt and all decisions will be final. <redacted> is sponsoring it. <redacted> scavenger hunt is open to anyone and everyone. <redacted> scavenger hunt ends on <redacted date>.\n\n <redacted> enter the scavenger hunt, send an email to info <redacted> putterking <redacted> com with the subject line: \"<redacted>\". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as <redacted>, <redacted>, <redacted>, <redacted>, etc. for your submissions.\n\n <redacted> entering the <redacted>, you allow <redacted> to use or link to any of the pictures or videos you submit for advertisements and promotions.\n\n Don’t forget to use your imagination and creativity!\n")
144
+ end
145
+
146
+ it 'redacts all confidential information from a text #003' do
147
+ tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
148
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
149
+ expect(described_class.new(text: text, language: 'en', tokens: tokens, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('<redacted> announced a merger with <redacted> that will happen on <redacted date> for <redacted number>. Please contact <redacted> at <redacted> or visit <redacted>.')
150
+ end
151
+
152
+ it 'redacts all confidential information from a text #004' do
153
+ tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
154
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
155
+ expect(described_class.new(text: text, language: 'en', tokens: tokens, ignore_numbers: true, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('<redacted> announced a merger with <redacted> that will happen on <redacted date> for $200,000,000,000. Please contact <redacted> at <redacted> or visit <redacted>.')
156
+ end
157
+
158
+ it 'redacts all confidential information from a text #005' do
159
+ tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
160
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
161
+ expect(described_class.new(text: text, language: 'en', tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr).redact).to eq('***** announced a merger with ***** that will happen on ^^redacted date^^ for **redacted number**. Please contact ***** at ***** or visit *****.')
162
+ end
163
+ end
164
+ end
@@ -0,0 +1 @@
1
+ require 'confidential_info_redactor_lite'
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: confidential_info_redactor_lite
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.3
5
+ platform: ruby
6
+ authors:
7
+ - Kevin S. Dias
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pragmatic_segmenter
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ description: The lite version of https://github.com/diasks2/confidential_info_redactor
70
+ - include your own language packs.
71
+ email:
72
+ - diasks2@gmail.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - ".gitignore"
78
+ - ".rspec"
79
+ - ".travis.yml"
80
+ - Gemfile
81
+ - LICENSE.txt
82
+ - README.md
83
+ - Rakefile
84
+ - confidential_info_redactor_lite.gemspec
85
+ - lib/confidential_info_redactor_lite.rb
86
+ - lib/confidential_info_redactor_lite/date.rb
87
+ - lib/confidential_info_redactor_lite/extractor.rb
88
+ - lib/confidential_info_redactor_lite/hyperlink.rb
89
+ - lib/confidential_info_redactor_lite/redactor.rb
90
+ - lib/confidential_info_redactor_lite/version.rb
91
+ - spec/confidential_info_redactor/date_spec.rb
92
+ - spec/confidential_info_redactor/extractor_spec.rb
93
+ - spec/confidential_info_redactor/hyperlink_spec.rb
94
+ - spec/confidential_info_redactor/redactor_spec.rb
95
+ - spec/spec_helper.rb
96
+ homepage: ''
97
+ licenses:
98
+ - MIT
99
+ metadata: {}
100
+ post_install_message:
101
+ rdoc_options: []
102
+ require_paths:
103
+ - lib
104
+ required_ruby_version: !ruby/object:Gem::Requirement
105
+ requirements:
106
+ - - ">="
107
+ - !ruby/object:Gem::Version
108
+ version: 2.1.0
109
+ required_rubygems_version: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - ">="
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ requirements: []
115
+ rubyforge_project:
116
+ rubygems_version: 2.4.1
117
+ signing_key:
118
+ specification_version: 4
119
+ summary: Semi-automatically redact confidential information from a text (supply your
120
+ own language packs)
121
+ test_files:
122
+ - spec/confidential_info_redactor/date_spec.rb
123
+ - spec/confidential_info_redactor/extractor_spec.rb
124
+ - spec/confidential_info_redactor/hyperlink_spec.rb
125
+ - spec/confidential_info_redactor/redactor_spec.rb
126
+ - spec/spec_helper.rb