confidential_info_redactor_lite 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ module ConfidentialInfoRedactorLite
2
+ VERSION = "0.0.3"
3
+ end
@@ -0,0 +1,283 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ConfidentialInfoRedactorLite::Date do
4
+ let(:en_dow) { %w(monday tuesday wednesday thursday friday saturday sunday) }
5
+ let(:en_dow_abbr) { %w(mon tu tue tues wed th thu thur thurs fri sat sun) }
6
+ let(:en_months) { %w(january february march april may june july august september october november december) }
7
+ let(:en_month_abbr) { %w(jan feb mar apr jun jul aug sep sept oct nov dec) }
8
+
9
+ let(:de_dow) { %w(montag dienstag mittwoch donnerstag freitag samstag sonntag sonnabend) }
10
+ let(:de_dow_abbr) { %w(mo di mi do fr sa so) }
11
+ let(:de_months) { %w(januar februar märz april mai juni juli august september oktober november dezember) }
12
+ let(:de_month_abbr) { %w(jan jän feb märz apr mai juni juli aug sep sept okt nov dez) }
13
+
14
+ context '#includes_date?' do
15
+ it 'returns true if the string includes a date #001' do
16
+ string = 'Today is Monday, April 4th, 2011, aka 04/04/2011.'
17
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
18
+ expect(ws.includes_date?).to eq(true)
19
+ end
20
+
21
+ it 'returns true if the string includes a date #002' do
22
+ string = 'Today is Monday April 4th 2011.'
23
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
24
+ expect(ws.includes_date?).to eq(true)
25
+ end
26
+
27
+ it 'returns true if the string includes a date #003' do
28
+ string = 'Today is April 4th, 2011.'
29
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
30
+ expect(ws.includes_date?).to eq(true)
31
+ end
32
+
33
+ it 'returns true if the string includes a date #004' do
34
+ string = 'Today is Mon., Apr. 4, 2011.'
35
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
36
+ expect(ws.includes_date?).to eq(true)
37
+ end
38
+
39
+ it 'returns true if the string includes a date #005' do
40
+ string = 'Today is 04/04/2011.'
41
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
42
+ expect(ws.includes_date?).to eq(true)
43
+ end
44
+
45
+ it 'returns true if the string includes a date #006' do
46
+ string = 'Today is 04.04.2011.'
47
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
48
+ expect(ws.includes_date?).to eq(true)
49
+ end
50
+
51
+ it 'returns true if the string includes a date #007' do
52
+ string = 'Today is 2011.04.04.'
53
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
54
+ expect(ws.includes_date?).to eq(true)
55
+ end
56
+
57
+ it 'returns true if the string includes a date #008' do
58
+ string = 'Today is 2011/04/04.'
59
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
60
+ expect(ws.includes_date?).to eq(true)
61
+ end
62
+
63
+ it 'returns true if the string includes a date #009' do
64
+ string = 'Today is 2011-04-04.'
65
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
66
+ expect(ws.includes_date?).to eq(true)
67
+ end
68
+
69
+ it 'returns true if the string includes a date #010' do
70
+ string = 'Today is 04-04-2011.'
71
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
72
+ expect(ws.includes_date?).to eq(true)
73
+ end
74
+
75
+ it 'returns true if the string includes a date #011' do
76
+ string = 'Today is 2003 November 9.'
77
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
78
+ expect(ws.includes_date?).to eq(true)
79
+ end
80
+
81
+ it 'returns true if the string includes a date #012' do
82
+ string = 'Today is 2003Nov9.'
83
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
84
+ expect(ws.includes_date?).to eq(true)
85
+ end
86
+
87
+ it 'returns true if the string includes a date #013' do
88
+ string = 'Today is 2003Nov09.'
89
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
90
+ expect(ws.includes_date?).to eq(true)
91
+ end
92
+
93
+ it 'returns true if the string includes a date #014' do
94
+ string = 'Today is 2003-Nov-9.'
95
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
96
+ expect(ws.includes_date?).to eq(true)
97
+ end
98
+
99
+ it 'returns true if the string includes a date #015' do
100
+ string = 'Today is 2003-Nov-09.'
101
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
102
+ expect(ws.includes_date?).to eq(true)
103
+ end
104
+
105
+ it 'returns true if the string includes a date #016' do
106
+ string = 'Today is 2003-Nov-9, Sunday.'
107
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
108
+ expect(ws.includes_date?).to eq(true)
109
+ end
110
+
111
+ it 'returns true if the string includes a date #017' do
112
+ string = 'Today is 2003. november 9.'
113
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
114
+ expect(ws.includes_date?).to eq(true)
115
+ end
116
+
117
+ it 'returns true if the string includes a date #018' do
118
+ string = 'Today is 2003.11.9.'
119
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
120
+ expect(ws.includes_date?).to eq(true)
121
+ end
122
+
123
+ it 'returns true if the string includes a date #019' do
124
+ string = 'Today is Monday, Apr. 4, 2011.'
125
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
126
+ expect(ws.includes_date?).to eq(true)
127
+ end
128
+
129
+ it 'returns true if the string includes a date #020' do
130
+ string = 'Today is 2003/11/09.'
131
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
132
+ expect(ws.includes_date?).to eq(true)
133
+ end
134
+
135
+ it 'returns true if the string includes a date #021' do
136
+ string = 'Today is 20030109.'
137
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
138
+ expect(ws.includes_date?).to eq(true)
139
+ end
140
+
141
+ it 'returns true if the string includes a date #022' do
142
+ string = 'Today is 01092003.'
143
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
144
+ expect(ws.includes_date?).to eq(true)
145
+ end
146
+
147
+ it 'returns true if the string includes a date #023' do
148
+ string = 'Today is Sunday, November 9, 2014.'
149
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
150
+ expect(ws.includes_date?).to eq(true)
151
+ end
152
+
153
+ it 'returns true if the string includes a date #024' do
154
+ string = 'Today is November 9, 2014.'
155
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
156
+ expect(ws.includes_date?).to eq(true)
157
+ end
158
+
159
+ it 'returns true if the string includes a date #025' do
160
+ string = 'Today is Nov. 9, 2014.'
161
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
162
+ expect(ws.includes_date?).to eq(true)
163
+ end
164
+
165
+ it 'returns true if the string includes a date #026' do
166
+ string = 'Today is july 1st.'
167
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
168
+ expect(ws.includes_date?).to eq(true)
169
+ end
170
+
171
+ it 'returns true if the string includes a date #027' do
172
+ string = 'Today is jul. 1st.'
173
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
174
+ expect(ws.includes_date?).to eq(true)
175
+ end
176
+
177
+ it 'returns true if the string includes a date #028' do
178
+ string = 'Today is 8 November 2014.'
179
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
180
+ expect(ws.includes_date?).to eq(true)
181
+ end
182
+
183
+ it 'returns true if the string includes a date #029' do
184
+ string = 'Today is 8. November 2014.'
185
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
186
+ expect(ws.includes_date?).to eq(true)
187
+ end
188
+
189
+ it 'returns true if the string includes a date #030' do
190
+ string = 'Today is 08-Nov-2014.'
191
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
192
+ expect(ws.includes_date?).to eq(true)
193
+ end
194
+
195
+ it 'returns true if the string includes a date #031' do
196
+ string = 'Today is 08Nov14.'
197
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
198
+ expect(ws.includes_date?).to eq(true)
199
+ end
200
+
201
+ it 'returns true if the string includes a date #032' do
202
+ string = 'Today is 8th November 2014.'
203
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
204
+ expect(ws.includes_date?).to eq(true)
205
+ end
206
+
207
+ it 'returns true if the string includes a date #033' do
208
+ string = 'Today is the 8th of November 2014.'
209
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
210
+ expect(ws.includes_date?).to eq(true)
211
+ end
212
+
213
+ it 'returns true if the string includes a date #034' do
214
+ string = 'Today is 08/Nov/2014.'
215
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
216
+ expect(ws.includes_date?).to eq(true)
217
+ end
218
+
219
+ it 'returns true if the string includes a date #035' do
220
+ string = 'Today is Sunday, 8 November 2014.'
221
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
222
+ expect(ws.includes_date?).to eq(true)
223
+ end
224
+
225
+ it 'returns true if the string includes a date #036' do
226
+ string = 'Today is 8 November 2014.'
227
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
228
+ expect(ws.includes_date?).to eq(true)
229
+ end
230
+
231
+ it 'returns false if the string does not include a date #037' do
232
+ string = 'Hello world. There is no date here - $50,000. The sun is hot.'
233
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
234
+ expect(ws.includes_date?).to eq(false)
235
+ end
236
+ end
237
+
238
+ context '#occurences' do
239
+ it 'counts the date occurences in a string #001' do
240
+ string = 'Today is Sunday, 8 November 2014.'
241
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
242
+ expect(ws.occurences).to eq(1)
243
+ end
244
+
245
+ it 'counts the date occurences in a string #002' do
246
+ string = 'Today is Sunday, 8 November 2014. Yesterday was 07/Nov/2014.'
247
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
248
+ expect(ws.occurences).to eq(2)
249
+ end
250
+ end
251
+
252
+ context '#replace' do
253
+ context 'English (en)' do
254
+ it 'replaces the date occurences in a string #001' do
255
+ string = 'Today is Tues. March 3rd, 2011.'
256
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
257
+ expect(ws.replace).to eq('Today is <redacted date> .')
258
+ end
259
+
260
+ it 'replaces the date occurences in a string #002' do
261
+ string = 'The scavenger hunt ends on Dec. 31st, 2011.'
262
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
263
+ expect(ws.replace).to eq('The scavenger hunt ends on <redacted date> .')
264
+ end
265
+ end
266
+
267
+ context 'German (de)' do
268
+ it 'replaces the date occurences in a string #001' do
269
+ string = '15. Oktober 2015'
270
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: de_dow, dow_abbr: de_dow_abbr, months: de_months, months_abbr: de_month_abbr)
271
+ expect(ws.replace).to eq(' <redacted date> ')
272
+ end
273
+ end
274
+ end
275
+
276
+ context '#replace_number_only_date' do
277
+ it 'replaces only the number date occurences in a string' do
278
+ string = 'Today is Tues. March 3rd, 2011. 4/28/2013'
279
+ ws = ConfidentialInfoRedactorLite::Date.new(string: string, dow: en_dow, dow_abbr: en_dow_abbr, months: en_months, months_abbr: en_month_abbr)
280
+ expect(ws.replace_number_only_date).to eq("Today is Tues. March 3rd, 2011. <redacted date> ")
281
+ end
282
+ end
283
+ end
@@ -0,0 +1,116 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ConfidentialInfoRedactorLite::Extractor do
4
+ let(:corpus) { ['i', 'in', 'you', 'top', 'so', 'are', 'december', 'please', 'viele', 'mitarbeiter', 'arbeitsstelle', 'some', 'there', 'king', 'by', "don't", 'dec', 'at', 'dot', 'and', 'project', 'activity', 'complete', 'prizes', 'build', 'video', 'many', 'autographs', 'picture', 'the', 'each', 'submit', 'to'] }
5
+ describe '#extract' do
6
+ context 'English (en)' do
7
+ it 'extracts the proper nouns from a text #001' do
8
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000.'
9
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['Coca-Cola', 'Pepsi'])
10
+ end
11
+
12
+ it 'extracts the proper nouns from a text #002' do
13
+ text = 'Coca-Cola announced a merger with Pepsi.'
14
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['Coca-Cola', 'Pepsi'])
15
+ end
16
+
17
+ it 'extracts the proper nouns from a text #003' do
18
+ text = 'Many employees of Deutsche Bank are looking for another job.'
19
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['Deutsche Bank'])
20
+ end
21
+
22
+ it 'extracts the proper nouns from a text #004' do
23
+ text = 'Many employees of Deutsche Bank are looking for another job while those from Pepsi are not.'
24
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['Deutsche Bank', 'Pepsi'])
25
+ end
26
+
27
+ it 'extracts the proper nouns from a text #005' do
28
+ text = 'There are many employees at Deutsche Bank. Some are thinking about drinking Pepsi, Coke, or Sprite.'
29
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['Deutsche Bank', 'Pepsi', 'Coke', 'Sprite'])
30
+ end
31
+
32
+ it 'extracts the proper nouns from a text #006' do
33
+ text = <<-EOF
34
+ Putter King Miniature Golf Scavenger Hunt
35
+
36
+ Putter King is hosting the 1st Annual Miniature Golf Scavenger Hunt. So get out your putter and your camera and see if you have what it takes. Are you a King?
37
+
38
+ The Official List:
39
+
40
+ #1) Autographs of 2 professional miniature golfers, each from a different country. (45 points; 5 bonus points if the professional miniature golfers are also from 2 different continents)
41
+
42
+ #2) Picture of yourself next to each obstacle in our list of the Top 10 Nostalgic Miniature Golf Obstacles. (120 points; 20 bonus points for each obstacle that exactly matches the one pictured in the article)
43
+
44
+ #3) Build your own full-size miniature golf hole. (75 points; up to 100 bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole)
45
+
46
+ #4) Video of yourself making a hole-in-one on two consecutive miniature golf holes. The video must be one continuous shot with no editing. (60 points)
47
+
48
+ #5) Picture of yourself with the Putter King mascot. (50 points; 15 bonus points if you are wearing a Putter King t-shirt)
49
+
50
+ #6) Picture of yourself with the completed Putter King wobblehead. (15 points; 15 bonus points if the picture is taken at a miniature golf course)
51
+
52
+ #7) Picture of a completed scorecard from a round of miniature golf. The round of golf must have taken place after the start of this scavenger hunt. (10 points)
53
+
54
+ #8) Picture of completed scorecards from 5 different miniature golf courses. Each round of golf must have taken place after the start of this scavenger hunt. (35 points)
55
+
56
+ #9) Submit an entry to the 2011 Putter King Hole Design Contest. (60 points; 40 bonus points if your entry gets more than 100 votes)
57
+
58
+ #10) Screenshot from the Putter King app showing a 9-hole score below par. (10 points)
59
+
60
+ #11) Screenshot from the Putter King app showing that you have successfully unlocked all of the holes in the game. (45 points)
61
+
62
+ #12) Picture of the Putter King wobblehead at a World Heritage Site. (55 points)
63
+
64
+ #13) Complete and submit the Putter King ‘Practice Activity’ and ‘Final Project’ for any one of the Putter King math or physics lessons. (40 points; 20 bonus points if you complete two lessons)
65
+
66
+ #14) Picture of yourself with at least 6 different colored miniature golf balls. (10 points; 2 bonus points for each additional color {limit of 10 bonus points})
67
+
68
+ #15) Picture of yourself with a famous golfer or miniature golfer. (15 points; 150 bonus points if the golfer is on the PGA tour AND you are wearing a Putter King t-shirt in the picture)
69
+
70
+ #16) Video of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (30 points)
71
+
72
+ #17) Video of yourself successfully making a trick miniature golf shot. (40 points; up to 100 bonus points available depending on the difficulty and complexity of the trick shot)
73
+
74
+
75
+ Prizes:
76
+
77
+ $100 iTunes Gift Card
78
+
79
+ Putter King Scavenger Hunt Trophy
80
+ (6 3/4" Engraved Crystal Trophy - Picture Coming Soon)
81
+
82
+ The Putter King team will judge the scavenger hunt and all decisions will be final. The U.S. Government is sponsoring it. The scavenger hunt is open to anyone and everyone. The scavenger hunt ends on Dec. 31st, 2011.
83
+
84
+ To enter the scavenger hunt, send an email to info AT putterking DOT com with the subject line: "Putter King Scavenger Hunt Submission". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as YouTube, Flickr, Picasa, Photobucket, etc. for your submissions.
85
+
86
+ By entering the Putter King Miniature Golf Scavenger Hunt, you allow Putter King to use or link to any of the pictures or videos you submit for advertisements and promotions.
87
+
88
+ Don’t forget to use your imagination and creativity!
89
+ EOF
90
+ expect(described_class.new(text: text, corpus: corpus).extract).to eq(["Putter King Miniature Golf Scavenger Hunt", "Putter King", "Annual Miniature Golf Scavenger Hunt", "The Official List", "Nostalgic Miniature Golf Obstacles", "Putter King Hole Design Contest", "Screenshot", "World Heritage Site", "PGA", "iTunes", "Gift Card", "Putter King Scavenger Hunt Trophy", "Engraved Crystal Trophy", "Picture Coming Soon", "The Putter King", "The U.S. Government", "Putter King Scavenger Hunt Submission", "YouTube", "Flickr", "Picasa", "Photobucket"])
91
+ end
92
+
93
+ it 'extracts the proper nouns from a text #007' do
94
+ text = 'I learned that Apple has plans to release a new iPhone, iPad and iWatch.'
95
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(['Apple', 'iPhone', 'iPad', 'iWatch'])
96
+ end
97
+
98
+ it 'extracts the proper nouns from a text #008' do
99
+ text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
100
+ expect(described_class.new(text: text, corpus: corpus, language: 'en').extract).to eq(["Coca-Cola", "Pepsi", "John Smith"])
101
+ end
102
+ end
103
+
104
+ context 'German (de)' do
105
+ it 'extracts the proper nouns from a text #001' do
106
+ text = 'Viele Mitarbeiter der Deutschen Bank suchen eine andere Arbeitsstelle.'
107
+ expect(described_class.new(text: text, corpus: corpus, language: 'de').extract).to eq(['Deutschen Bank'])
108
+ end
109
+
110
+ it 'extracts the proper nouns from a text #001' do
111
+ text = 'Viele Mitarbeiter der Deutsche Bank suchen eine andere Arbeitsstelle.'
112
+ expect(described_class.new(text: text, corpus: corpus, language: 'de').extract).to eq(['Deutsche Bank'])
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,61 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe ConfidentialInfoRedactorLite::Hyperlink do
4
+ context '#hyperlink?' do
5
+ it 'returns true if the string is a hyperlink #001' do
6
+ string = "http://www.example.com/this-IS-a_test/hello.html"
7
+ ws = described_class.new(string: string)
8
+ expect(ws.hyperlink?).to eq(true)
9
+ end
10
+
11
+ it 'returns true if the string is a hyperlink #002' do
12
+ string = "http://www.google.co.uk"
13
+ ws = described_class.new(string: string)
14
+ expect(ws.hyperlink?).to eq(true)
15
+ end
16
+
17
+ it 'returns true if the string is a hyperlink #003' do
18
+ string = "https://google.co.uk"
19
+ ws = described_class.new(string: string)
20
+ expect(ws.hyperlink?).to eq(true)
21
+ end
22
+
23
+ it 'returns false if the string is not a hyperlink #004' do
24
+ string = "hello"
25
+ ws = described_class.new(string: string)
26
+ expect(ws.hyperlink?).to eq(false)
27
+ end
28
+
29
+ it 'returns false if the string is not a hyperlink #005' do
30
+ string = "john@gmail.com"
31
+ ws = described_class.new(string: string)
32
+ expect(ws.hyperlink?).to eq(false)
33
+ end
34
+
35
+ it 'returns false if the string is not a hyperlink #006' do
36
+ string = "date:"
37
+ ws = described_class.new(string: string)
38
+ expect(ws.hyperlink?).to eq(false)
39
+ end
40
+
41
+ it 'returns false if the string is not a hyperlink #007' do
42
+ string = 'The file location is c:\Users\johndoe.'
43
+ ws = described_class.new(string: string)
44
+ expect(ws.hyperlink?).to eq(false)
45
+ end
46
+ end
47
+
48
+ context '#replace' do
49
+ it 'replaces the hyperlinks in a string with regular tokens #001' do
50
+ string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
51
+ ws = described_class.new(string: string)
52
+ expect(ws.replace).to eq("Today the date is: Jan 1. Visit <redacted> or <redacted> ")
53
+ end
54
+
55
+ it 'replaces the hyperlinks in a string with regular tokens #002' do
56
+ string = 'The file location is c:\Users\johndoe or d:\Users\john\www'
57
+ ws = described_class.new(string: string)
58
+ expect(ws.replace).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
59
+ end
60
+ end
61
+ end