confidential_info_redactor 0.0.18 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -11
- data/lib/confidential_info_redactor/date.rb +98 -118
- data/lib/confidential_info_redactor/extractor.rb +66 -33
- data/lib/confidential_info_redactor/hyperlink.rb +5 -17
- data/lib/confidential_info_redactor/redactor.rb +13 -13
- data/lib/confidential_info_redactor/version.rb +1 -1
- data/lib/confidential_info_redactor/word_lists.rb +2 -2
- data/lib/confidential_info_redactor.rb +2 -1
- data/spec/confidential_info_redactor/date_spec.rb +88 -88
- data/spec/confidential_info_redactor/extractor_spec.rb +20 -20
- data/spec/confidential_info_redactor/hyperlink_spec.rb +18 -18
- data/spec/confidential_info_redactor/redactor_spec.rb +22 -22
- metadata +2 -2
@@ -5,27 +5,27 @@ RSpec.describe ConfidentialInfoRedactor::Extractor do
|
|
5
5
|
context 'English (en)' do
|
6
6
|
it 'extracts the proper nouns from a text #001' do
|
7
7
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000.'
|
8
|
-
expect(described_class.new(
|
8
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(['Coca-Cola', 'Pepsi'])
|
9
9
|
end
|
10
10
|
|
11
11
|
it 'extracts the proper nouns from a text #002' do
|
12
12
|
text = 'Coca-Cola announced a merger with Pepsi.'
|
13
|
-
expect(described_class.new(
|
13
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(['Coca-Cola', 'Pepsi'])
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'extracts the proper nouns from a text #003' do
|
17
17
|
text = 'Many employees of Deutsche Bank are looking for another job.'
|
18
|
-
expect(described_class.new(
|
18
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(['Deutsche Bank'])
|
19
19
|
end
|
20
20
|
|
21
21
|
it 'extracts the proper nouns from a text #004' do
|
22
22
|
text = 'Many employees of Deutsche Bank are looking for another job while those from Pepsi are not.'
|
23
|
-
expect(described_class.new(
|
23
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(['Deutsche Bank', 'Pepsi'])
|
24
24
|
end
|
25
25
|
|
26
26
|
it 'extracts the proper nouns from a text #005' do
|
27
27
|
text = 'There are many employees at Deutsche Bank. Some are thinking about drinking Pepsi, Coke, or Sprite.'
|
28
|
-
expect(described_class.new(
|
28
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(['Deutsche Bank', 'Pepsi', 'Coke', 'Sprite'])
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'extracts the proper nouns from a text #006' do
|
@@ -86,79 +86,79 @@ RSpec.describe ConfidentialInfoRedactor::Extractor do
|
|
86
86
|
|
87
87
|
Don’t forget to use your imagination and creativity!
|
88
88
|
EOF
|
89
|
-
expect(described_class.new(text: text).extract).to eq(["PGA", "iTunes", "YouTube", "Flickr", "Picasa", "Photobucket"])
|
89
|
+
expect(described_class.new(text: text).extract(text)).to eq(["PGA", "iTunes", "YouTube", "Flickr", "Picasa", "Photobucket"])
|
90
90
|
end
|
91
91
|
|
92
92
|
it 'extracts the proper nouns from a text #007' do
|
93
93
|
text = 'I learned that Apple has plans to release a new iPhone, iPad and iWatch.'
|
94
|
-
expect(described_class.new(
|
94
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(['Apple', 'iPhone', 'iPad', 'iWatch'])
|
95
95
|
end
|
96
96
|
|
97
97
|
it 'extracts the proper nouns from a text #008' do
|
98
98
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
|
99
|
-
expect(described_class.new(
|
99
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(["Coca-Cola", "Pepsi", "John Smith"])
|
100
100
|
end
|
101
101
|
|
102
102
|
it 'extracts the proper nouns from a text #009' do
|
103
103
|
text = 'Then Peter went to the store.'
|
104
|
-
expect(described_class.new(
|
104
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(["Peter"])
|
105
105
|
end
|
106
106
|
|
107
107
|
it 'extracts the proper nouns from a text #010' do
|
108
108
|
text = 'HOW TO COOK VEGETABLES'
|
109
|
-
expect(described_class.new(
|
109
|
+
expect(described_class.new(language: 'en').extract(text)).to eq([])
|
110
110
|
end
|
111
111
|
|
112
112
|
it 'extracts the proper nouns from a text #011' do
|
113
113
|
text = 'All Natural Peanut Butter'
|
114
|
-
expect(described_class.new(
|
114
|
+
expect(described_class.new(language: 'en').extract(text)).to eq([])
|
115
115
|
end
|
116
116
|
|
117
117
|
it 'extracts the proper nouns from a text #012' do
|
118
118
|
text = 'GOOD CARBS VS. BAD CARBS'
|
119
|
-
expect(described_class.new(
|
119
|
+
expect(described_class.new(language: 'en').extract(text)).to eq([])
|
120
120
|
end
|
121
121
|
|
122
122
|
it 'extracts the proper nouns from a text #013' do
|
123
123
|
text = 'Reducing”'
|
124
|
-
expect(described_class.new(
|
124
|
+
expect(described_class.new(language: 'en').extract(text)).to eq([])
|
125
125
|
end
|
126
126
|
|
127
127
|
it 'extracts the proper nouns from a text #014' do
|
128
128
|
text = '”'
|
129
|
-
expect(described_class.new(
|
129
|
+
expect(described_class.new(language: 'en').extract(text)).to eq([])
|
130
130
|
end
|
131
131
|
|
132
132
|
it 'extracts the proper nouns from a text #015' do
|
133
133
|
text = '“Reducing'
|
134
|
-
expect(described_class.new(
|
134
|
+
expect(described_class.new(language: 'en').extract(text)).to eq([])
|
135
135
|
end
|
136
136
|
|
137
137
|
it 'extracts the proper nouns from a text #016' do
|
138
138
|
text = 'Corrigendum to Council Regulation (EC) No 85/2009 of 19 January 2009 amending Regulation (EC) No 1083/2006 laying down general provisions on the European Regional Development Fund, the European Social Fund and the Cohesion Fund concerning certain provisions relating to financial management'
|
139
|
-
expect(described_class.new(
|
139
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(["Corrigendum"])
|
140
140
|
end
|
141
141
|
|
142
142
|
it 'extracts the proper nouns from a text #017' do
|
143
143
|
text = 'John'
|
144
|
-
expect(described_class.new(
|
144
|
+
expect(described_class.new(language: 'en').extract(text)).to eq(['John'])
|
145
145
|
end
|
146
146
|
end
|
147
147
|
|
148
148
|
context 'German (de)' do
|
149
149
|
it 'extracts the proper nouns from a text #001' do
|
150
150
|
text = 'Viele Mitarbeiter der Deutschen Bank suchen eine andere Arbeitsstelle.'
|
151
|
-
expect(described_class.new(
|
151
|
+
expect(described_class.new(language: 'de').extract(text)).to eq(['Deutschen Bank'])
|
152
152
|
end
|
153
153
|
|
154
154
|
it 'extracts the proper nouns from a text #002' do
|
155
155
|
text = 'Viele Mitarbeiter der Deutsche Bank suchen eine andere Arbeitsstelle.'
|
156
|
-
expect(described_class.new(
|
156
|
+
expect(described_class.new(language: 'de').extract(text)).to eq(['Deutsche Bank'])
|
157
157
|
end
|
158
158
|
|
159
159
|
it 'extracts the proper nouns from a text #003' do
|
160
160
|
text = 'Ich behielt diese Routine während und sogar während des Studiums an der Uni bei, und ich war damals froh, wenn ich pro Tag zwei ganze Mahlzeiten zu mir nahm.'
|
161
|
-
expect(described_class.new(
|
161
|
+
expect(described_class.new(language: 'de').extract(text)).to eq([])
|
162
162
|
end
|
163
163
|
end
|
164
164
|
end
|
@@ -4,58 +4,58 @@ RSpec.describe ConfidentialInfoRedactor::Hyperlink do
|
|
4
4
|
context '#hyperlink?' do
|
5
5
|
it 'returns true if the string is a hyperlink #001' do
|
6
6
|
string = "http://www.example.com/this-IS-a_test/hello.html"
|
7
|
-
ws = described_class.new
|
8
|
-
expect(ws.hyperlink?).to eq(true)
|
7
|
+
ws = described_class.new
|
8
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
9
9
|
end
|
10
10
|
|
11
11
|
it 'returns true if the string is a hyperlink #002' do
|
12
12
|
string = "http://www.google.co.uk"
|
13
|
-
ws = described_class.new
|
14
|
-
expect(ws.hyperlink?).to eq(true)
|
13
|
+
ws = described_class.new
|
14
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
15
15
|
end
|
16
16
|
|
17
17
|
it 'returns true if the string is a hyperlink #003' do
|
18
18
|
string = "https://google.co.uk"
|
19
|
-
ws = described_class.new
|
20
|
-
expect(ws.hyperlink?).to eq(true)
|
19
|
+
ws = described_class.new
|
20
|
+
expect(ws.hyperlink?(string)).to eq(true)
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'returns false if the string is not a hyperlink #004' do
|
24
24
|
string = "hello"
|
25
|
-
ws = described_class.new
|
26
|
-
expect(ws.hyperlink?).to eq(false)
|
25
|
+
ws = described_class.new
|
26
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
27
27
|
end
|
28
28
|
|
29
29
|
it 'returns false if the string is not a hyperlink #005' do
|
30
30
|
string = "john@gmail.com"
|
31
|
-
ws = described_class.new
|
32
|
-
expect(ws.hyperlink?).to eq(false)
|
31
|
+
ws = described_class.new
|
32
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'returns false if the string is not a hyperlink #006' do
|
36
36
|
string = "date:"
|
37
|
-
ws = described_class.new
|
38
|
-
expect(ws.hyperlink?).to eq(false)
|
37
|
+
ws = described_class.new
|
38
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
39
39
|
end
|
40
40
|
|
41
41
|
it 'returns false if the string is not a hyperlink #007' do
|
42
42
|
string = 'The file location is c:\Users\johndoe.'
|
43
|
-
ws = described_class.new
|
44
|
-
expect(ws.hyperlink?).to eq(false)
|
43
|
+
ws = described_class.new
|
44
|
+
expect(ws.hyperlink?(string)).to eq(false)
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
48
|
context '#replace' do
|
49
49
|
it 'replaces the hyperlinks in a string with regular tokens #001' do
|
50
50
|
string = "Today the date is: Jan 1. Visit https://www.example.com/hello or http://www.google.co.uk"
|
51
|
-
ws = described_class.new
|
52
|
-
expect(ws.replace).to eq("Today the date is: Jan 1. Visit <redacted> or <redacted> ")
|
51
|
+
ws = described_class.new
|
52
|
+
expect(ws.replace(string)).to eq("Today the date is: Jan 1. Visit <redacted> or <redacted> ")
|
53
53
|
end
|
54
54
|
|
55
55
|
it 'replaces the hyperlinks in a string with regular tokens #002' do
|
56
56
|
string = 'The file location is c:\Users\johndoe or d:\Users\john\www'
|
57
|
-
ws = described_class.new
|
58
|
-
expect(ws.replace).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
|
57
|
+
ws = described_class.new
|
58
|
+
expect(ws.replace(string)).to eq('The file location is c:\Users\johndoe or d:\Users\john\www')
|
59
59
|
end
|
60
60
|
end
|
61
61
|
end
|
@@ -4,68 +4,68 @@ RSpec.describe ConfidentialInfoRedactor::Redactor do
|
|
4
4
|
describe '#dates' do
|
5
5
|
it 'redacts dates from a text #001' do
|
6
6
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000.'
|
7
|
-
expect(described_class.new(
|
7
|
+
expect(described_class.new(language: 'en').dates(text)).to eq('Coca-Cola announced a merger with Pepsi that will happen on <redacted date> for $200,000,000,000.')
|
8
8
|
end
|
9
9
|
|
10
10
|
it 'redacts dates from a text #002' do
|
11
11
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020.'
|
12
|
-
expect(described_class.new(
|
12
|
+
expect(described_class.new(language: 'en').dates(text)).to eq('Coca-Cola announced a merger with Pepsi that will happen on <redacted date>.')
|
13
13
|
end
|
14
14
|
|
15
15
|
it 'redacts dates from a text #003' do
|
16
16
|
text = 'December 5, 2010 - Coca-Cola announced a merger with Pepsi.'
|
17
|
-
expect(described_class.new(
|
17
|
+
expect(described_class.new(language: 'en').dates(text)).to eq('<redacted date> - Coca-Cola announced a merger with Pepsi.')
|
18
18
|
end
|
19
19
|
|
20
20
|
it 'redacts dates from a text #004' do
|
21
21
|
text = 'The scavenger hunt ends on Dec. 31st, 2011.'
|
22
|
-
expect(described_class.new(
|
22
|
+
expect(described_class.new(language: 'en').dates(text)).to eq('The scavenger hunt ends on <redacted date>.')
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
26
|
describe '#numbers' do
|
27
27
|
it 'redacts numbers from a text #001' do
|
28
28
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on <redacted date> for $200,000,000,000.'
|
29
|
-
expect(described_class.new(
|
29
|
+
expect(described_class.new(language: 'en').numbers(text)).to eq('Coca-Cola announced a merger with Pepsi that will happen on <redacted date> for <redacted number>.')
|
30
30
|
end
|
31
31
|
|
32
32
|
it 'redacts numbers from a text #002' do
|
33
33
|
text = '200 years ago.'
|
34
|
-
expect(described_class.new(
|
34
|
+
expect(described_class.new(language: 'en').numbers(text)).to eq('<redacted number> years ago.')
|
35
35
|
end
|
36
36
|
|
37
37
|
it 'redacts numbers from a text #003' do
|
38
38
|
text = 'It was his 1st time, not yet his 10th, not even his 2nd. The wood was 3/4" thick.'
|
39
|
-
expect(described_class.new(
|
39
|
+
expect(described_class.new(language: 'en').numbers(text)).to eq('It was his <redacted number> time, not yet his <redacted number>, not even his <redacted number>. The wood was <redacted number> thick.')
|
40
40
|
end
|
41
41
|
|
42
42
|
it 'redacts numbers from a text #004' do
|
43
43
|
text = 'Checking file of %2'
|
44
|
-
expect(described_class.new(
|
44
|
+
expect(described_class.new(language: 'en').numbers(text)).to eq('Checking file of <redacted number>')
|
45
45
|
end
|
46
46
|
|
47
47
|
it 'redacts numbers from a text #005' do
|
48
48
|
text = 'zawiera pliki skompresowane (%2).'
|
49
|
-
expect(described_class.new(
|
49
|
+
expect(described_class.new(language: 'en').numbers(text)).to eq('zawiera pliki skompresowane (<redacted number>).')
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
53
|
describe '#emails' do
|
54
54
|
it 'redacts email addresses from a text #001' do
|
55
55
|
text = 'His email is john@gmail.com or you can try k.light@tuv.eu.us.'
|
56
|
-
expect(described_class.new(
|
56
|
+
expect(described_class.new(language: 'en').emails(text)).to eq('His email is <redacted> or you can try <redacted>.')
|
57
57
|
end
|
58
58
|
|
59
59
|
it 'redacts email addresses from a text #002' do
|
60
60
|
text = 'His email is (john@gmail.com) or you can try (k.light@tuv.eu.us).'
|
61
|
-
expect(described_class.new(
|
61
|
+
expect(described_class.new(language: 'en').emails(text)).to eq('His email is (<redacted>) or you can try (<redacted>).')
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
65
65
|
describe '#hyperlinks' do
|
66
66
|
it 'redacts hyperlinks from a text #001' do
|
67
67
|
text = 'Visit https://www.tm-town.com for more info.'
|
68
|
-
expect(described_class.new(
|
68
|
+
expect(described_class.new(language: 'en').hyperlinks(text)).to eq('Visit <redacted> for more info.')
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
@@ -73,13 +73,13 @@ RSpec.describe ConfidentialInfoRedactor::Redactor do
|
|
73
73
|
it 'redacts tokens from a text #001' do
|
74
74
|
tokens = ['Coca-Cola', 'Pepsi']
|
75
75
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on on December 15th, 2020 for $200,000,000,000.'
|
76
|
-
expect(described_class.new(
|
76
|
+
expect(described_class.new(language: 'en', tokens: tokens).proper_nouns(text)).to eq('<redacted> announced a merger with <redacted> that will happen on on December 15th, 2020 for $200,000,000,000.')
|
77
77
|
end
|
78
78
|
|
79
79
|
it 'redacts tokens from a text #002' do
|
80
80
|
tokens = ['Coca-Cola', 'Pepsi']
|
81
81
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on on December 15th, 2020 for $200,000,000,000.'
|
82
|
-
expect(described_class.new(
|
82
|
+
expect(described_class.new(language: 'en', tokens: tokens, token_text: '*****').proper_nouns(text)).to eq('***** announced a merger with ***** that will happen on on December 15th, 2020 for $200,000,000,000.')
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
@@ -87,7 +87,7 @@ RSpec.describe ConfidentialInfoRedactor::Redactor do
|
|
87
87
|
it 'redacts all confidential information from a text #001' do
|
88
88
|
tokens = ['Coca-Cola', 'Pepsi']
|
89
89
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on on December 15th, 2020 for $200,000,000,000.'
|
90
|
-
expect(described_class.new(
|
90
|
+
expect(described_class.new(language: 'en', tokens: tokens).redact(text)).to eq('<redacted> announced a merger with <redacted> that will happen on on <redacted date> for <redacted number>.')
|
91
91
|
end
|
92
92
|
|
93
93
|
it 'redacts all confidential information from a text #002' do
|
@@ -148,38 +148,38 @@ RSpec.describe ConfidentialInfoRedactor::Redactor do
|
|
148
148
|
|
149
149
|
Don’t forget to use your imagination and creativity!
|
150
150
|
EOF
|
151
|
-
tokens = ConfidentialInfoRedactor::Extractor.new(text
|
152
|
-
expect(described_class.new(
|
151
|
+
tokens = ConfidentialInfoRedactor::Extractor.new.extract(text)
|
152
|
+
expect(described_class.new(language: 'en', tokens: tokens).redact(text)).to eq("Putter King Miniature Golf Scavenger Hunt\n\n Putter King is hosting the <redacted number> Annual Miniature Golf Scavenger Hunt. So get out your putter and your camera and see if you have what it takes. Are you a King?\n\n The Official List: <redacted number>) Autographs of <redacted number> professional miniature golfers, each from a different country. (<redacted number> points; <redacted number> bonus points if the professional miniature golfers are also from <redacted number> different continents) <redacted number>) Picture of yourself next to each obstacle in our list of the Top <redacted number> Nostalgic Miniature Golf Obstacles. (<redacted number> points; <redacted number> bonus points for each obstacle that exactly matches the one pictured in the article) <redacted number>) Build your own full-size miniature golf hole. (<redacted number> points; up to <redacted number> bonus points available depending on the craftsmanship, playability, creativity and fun factor of your hole) <redacted number>) Video of yourself making a hole-in-one on two consecutive miniature golf holes. The video must be one continuous shot with no editing. (<redacted number> points) <redacted number>) Picture of yourself with the Putter King mascot. (<redacted number> points; <redacted number> bonus points if you are wearing a Putter King t-shirt) <redacted number>) Picture of yourself with the completed Putter King wobblehead. (<redacted number> points; <redacted number> bonus points if the picture is taken at a miniature golf course) <redacted number>) Picture of a completed scorecard from a round of miniature golf. The round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) Picture of completed scorecards from <redacted number> different miniature golf courses. Each round of golf must have taken place after the start of this scavenger hunt. (<redacted number> points) <redacted number>) Submit an entry to the <redacted number> Putter King Hole Design Contest. (<redacted number> points; <redacted number> bonus points if your entry gets more than <redacted number> votes) <redacted number>) Screenshot from the Putter King app showing a <redacted number> score below par. (<redacted number> points) <redacted number>) Screenshot from the Putter King app showing that you have successfully unlocked all of the holes in the game. (<redacted number> points) <redacted number>) Picture of the Putter King wobblehead at a World Heritage Site. (<redacted number> points) <redacted number>) Complete and submit the Putter King ‘Practice Activity’ and ‘Final Project’ for any one of the Putter King math or physics lessons. (<redacted number> points; <redacted number> bonus points if you complete two lessons) <redacted number>) Picture of yourself with at least <redacted number> different colored miniature golf balls. (<redacted number> points; <redacted number> bonus points for each additional color {limit of <redacted number> bonus points}) <redacted number>) Picture of yourself with a famous golfer or miniature golfer. (<redacted number> points; <redacted number> bonus points if the golfer is on the <redacted> tour AND you are wearing a Putter King t-shirt in the picture) <redacted number>) Video of yourself making a hole-in-one on a miniature golf hole with a loop-de-loop obstacle. (<redacted number> points) <redacted number>) Video of yourself successfully making a trick miniature golf shot. (<redacted number> points; up to <redacted number> bonus points available depending on the difficulty and complexity of the trick shot)\n\n\n Prizes: <redacted number> <redacted> Gift Card\n\n Putter King Scavenger Hunt Trophy\n (<redacted number> <redacted number> Engraved Crystal Trophy - Picture Coming Soon)\n\n The Putter King team will judge the scavenger hunt and all decisions will be final. The U.S. Government is sponsoring it. The scavenger hunt is open to anyone and everyone. The scavenger hunt ends on <redacted date>.\n\n To enter the scavenger hunt, send an email to info AT putterking DOT com with the subject line: \"Putter King Scavenger Hunt Submission\". In the email please include links to the pictures and videos you are submitting. You can utilize free photo and video hosting sites such as <redacted>, <redacted>, <redacted>, <redacted>, etc. for your submissions.\n\n By entering the Putter King Miniature Golf Scavenger Hunt, you allow Putter King to use or link to any of the pictures or videos you submit for advertisements and promotions.\n\n Don’t forget to use your imagination and creativity!")
|
153
153
|
end
|
154
154
|
|
155
155
|
it 'redacts all confidential information from a text #003' do
|
156
156
|
tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
|
157
157
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
|
158
|
-
expect(described_class.new(
|
158
|
+
expect(described_class.new(language: 'en', tokens: tokens).redact(text)).to eq('<redacted> announced a merger with <redacted> that will happen on <redacted date> for <redacted number>. Please contact <redacted> at <redacted> or visit <redacted>.')
|
159
159
|
end
|
160
160
|
|
161
161
|
it 'redacts all confidential information from a text #004' do
|
162
162
|
tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
|
163
163
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
|
164
|
-
expect(described_class.new(
|
164
|
+
expect(described_class.new(language: 'en', tokens: tokens, ignore_numbers: true).redact(text)).to eq('<redacted> announced a merger with <redacted> that will happen on <redacted date> for $200,000,000,000. Please contact <redacted> at <redacted> or visit <redacted>.')
|
165
165
|
end
|
166
166
|
|
167
167
|
it 'redacts all confidential information from a text #005' do
|
168
168
|
tokens = ['Coca-Cola', 'Pepsi', 'John Smith']
|
169
169
|
text = 'Coca-Cola announced a merger with Pepsi that will happen on December 15th, 2020 for $200,000,000,000. Please contact John Smith at j.smith@example.com or visit http://www.super-fake-merger.com.'
|
170
|
-
expect(described_class.new(
|
170
|
+
expect(described_class.new(language: 'en', tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****').redact(text)).to eq('***** announced a merger with ***** that will happen on ^^redacted date^^ for **redacted number**. Please contact ***** at ***** or visit *****.')
|
171
171
|
end
|
172
172
|
|
173
173
|
it 'redacts all confidential information from a text #006' do
|
174
174
|
tokens = ['CLA']
|
175
175
|
text = 'LEGAL DISCLAIMER - CLA will not be held reponsible for changes.'
|
176
|
-
expect(described_class.new(
|
176
|
+
expect(described_class.new(language: 'en', tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****').redact(text)).to eq("LEGAL DISCLAIMER - ***** will not be held reponsible for changes.")
|
177
177
|
end
|
178
178
|
|
179
179
|
it 'redacts all confidential information from a text #007' do
|
180
180
|
tokens = ['Trans']
|
181
181
|
text = 'My Transformation - avoid Trans.'
|
182
|
-
expect(described_class.new(
|
182
|
+
expect(described_class.new(language: 'en', tokens: tokens, number_text: '**redacted number**', date_text: '^^redacted date^^', token_text: '*****', hyperlink_text: '*****', email_text: '*****').redact(text)).to eq('My Transformation - avoid *****.')
|
183
183
|
end
|
184
184
|
end
|
185
185
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: confidential_info_redactor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin S. Dias
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-03-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|