swot-ruby 1.0.5.20250409 → 1.0.5.20250415
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +1 -1
- data/academic_data/abused.txt +6 -3
- data/academic_data/ar/edu/escuelasproa.txt +1 -0
- data/academic_data/ar/edu/iesvu.txt +1 -0
- data/academic_data/ar/edu/isvn.txt +1 -0
- data/academic_data/bd/ac/sub.txt +2 -0
- data/academic_data/bd/edu/agfuphs.txt +1 -0
- data/academic_data/bd/edu/fcub.txt +1 -0
- data/academic_data/bd/edu/hamdarduniversity.txt +1 -0
- data/academic_data/bd/edu/sss.txt +2 -0
- data/academic_data/br/com/soubraseducacional.txt +1 -0
- data/academic_data/br/edu/famef.txt +2 -0
- data/academic_data/br/einstein.txt +2 -0
- data/academic_data/br/senai/estudante/es.txt +1 -0
- data/academic_data/ca/hwcdsb.txt +1 -0
- data/academic_data/ch/rosey/stu.txt +1 -0
- data/academic_data/cl/ecas.txt +4 -0
- data/academic_data/cm/enspy-uy1.txt +2 -0
- data/academic_data/co/edu/politecnicomayor.txt +1 -0
- data/academic_data/com/iscparis.txt +1 -0
- data/academic_data/com/khaitanpublicschool.txt +1 -0
- data/academic_data/cz/cas/jh-inst.txt +2 -0
- data/academic_data/de/gs-wn.txt +1 -0
- data/academic_data/de/kbbz-dillingen.txt +2 -0
- data/academic_data/de/mpg/mpzpm.txt +1 -0
- data/academic_data/do/edu/intec.txt +2 -0
- data/academic_data/dz/ensti-annaba.txt +1 -0
- data/academic_data/edu/cabrillo.txt +1 -0
- data/academic_data/edu/gallaudet.txt +1 -0
- data/academic_data/edu/mountmarty.txt +2 -0
- data/academic_data/edu/southalabama.txt +2 -0
- data/academic_data/edu/triton.txt +1 -0
- data/academic_data/edu/txwes.txt +1 -0
- data/academic_data/edu/umbc.txt +1 -0
- data/academic_data/edu/wab.txt +3 -0
- data/academic_data/hu/semmelweis.txt +1 -0
- data/academic_data/id/ac/stitmadani.txt +1 -0
- data/academic_data/in/ladybirdschool.txt +1 -0
- data/academic_data/it/edu/galileiostiglia.txt +0 -0
- data/academic_data/kr/ac/hanbat.txt +2 -0
- data/academic_data/kr/ac/hit/s.txt +1 -0
- data/academic_data/kr/ac/idas.txt +2 -0
- data/academic_data/kr/ac/ssc.txt +1 -0
- data/academic_data/kr/ac/yit.txt +1 -0
- data/academic_data/kr/co/uos.txt +1 -0
- data/academic_data/lv/lbtu.txt +2 -0
- data/academic_data/mx/edu/cecyt02.txt +1 -0
- data/academic_data/np/edu/gaushalamultiplecampus.txt +1 -0
- data/academic_data/org/balbharati/gr.txt +1 -0
- data/academic_data/org/itlearning-campus.txt +1 -0
- data/academic_data/org/nfsg-sofia.txt +2 -0
- data/academic_data/org/ohva.txt +1 -0
- data/academic_data/org/x-camp.txt +1 -0
- data/academic_data/pe/edu/roncalli.txt +1 -0
- data/academic_data/pk/edu/lgsparagon.txt +1 -0
- data/academic_data/rs/edu/gimnazijapirot.txt +1 -0
- data/academic_data/sa/edu/alaqsa/api.txt +1 -0
- data/academic_data/stoplist.txt +17 -1
- data/academic_data/tlds.txt +0 -1
- data/academic_data/tr/edu/msu.txt +2 -2
- data/academic_data/ua/edu/megu.txt +2 -0
- data/academic_data/uz/utas.txt +1 -0
- data/academic_data/vn/edu/binhnamphong.txt +2 -0
- data/academic_data/vn/edu/buv.txt +2 -0
- data/academic_data/vn/edu/hathitrannhien.txt +2 -0
- data/academic_data/vn/edu/ipmph.txt +2 -0
- data/academic_data/zw/ac/aju.txt +1 -0
- data/data/lib/domains/abused.txt +6 -3
- data/data/lib/domains/stoplist.txt +17 -1
- data/data/lib/domains/tlds.txt +0 -1
- data/swot.gemspec +1 -1
- data/test/test_swot.rb +178 -126
- metadata +64 -5
- data/academic_data/dz/esti-annaba.txt +0 -1
- data/academic_data/ee/ivkhk.txt +0 -2
- data/academic_data/mt/edu/mcast.txt +0 -1
data/test/test_swot.rb
CHANGED
@@ -2,154 +2,206 @@
|
|
2
2
|
require 'helper'
|
3
3
|
|
4
4
|
describe Swot do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
5
|
+
describe "academic email recognition" do
|
6
|
+
it "recognizes standard academic email addresses" do
|
7
|
+
assert Swot.is_academic?('lreilly@stanford.edu')
|
8
|
+
assert Swot.is_academic?('LREILLY@STANFORD.EDU')
|
9
|
+
assert Swot.is_academic?('Lreilly@Stanford.Edu')
|
10
|
+
assert Swot.is_academic?('lreilly@slac.stanford.edu')
|
11
|
+
assert Swot.is_academic?('lreilly@strath.ac.uk')
|
12
|
+
assert Swot.is_academic?('lreilly@soft-eng.strath.ac.uk')
|
13
|
+
assert Swot.is_academic?('lee@ugr.es')
|
14
|
+
assert Swot.is_academic?('lee@uottawa.ca')
|
15
|
+
assert Swot.is_academic?('lee@mother.edu.ru')
|
16
|
+
assert Swot.is_academic?('lee@ucy.ac.cy')
|
17
|
+
end
|
18
|
+
|
19
|
+
it "recognizes email addresses with special formats" do
|
20
|
+
assert Swot.is_academic?('dave.kimura@osu.edu')
|
21
|
+
assert Swot.is_academic?('kimura.13@osu.edu')
|
22
|
+
assert Swot.is_academic?('lee@stud.uni-corvinus.hu')
|
23
|
+
assert Swot.is_academic?('lee@harvard.edu')
|
24
|
+
assert Swot.is_academic?('lee@mail.harvard.edu')
|
25
|
+
assert Swot.is_academic?('lee@strath.ac.uk ') # trailing space
|
26
|
+
end
|
27
|
+
|
28
|
+
it "handles international academic email addresses" do
|
29
|
+
assert Swot.is_academic?('dave@daffodil.ac')
|
30
|
+
assert Swot.is_academic?('dave@cti.za.ac')
|
31
|
+
assert Swot.is_academic?('dave@lsst.ac')
|
32
|
+
assert Swot.is_academic?('dave@dcc.netpoint.com.bd')
|
33
|
+
assert Swot.is_academic?('dave@student.gatewayhs.com')
|
34
|
+
assert Swot.is_academic?('dave@politecnics.barcelona')
|
35
|
+
assert Swot.is_academic?('me@nimsuniversity.org')
|
36
|
+
end
|
37
|
+
|
38
|
+
it "rejects non-academic email addresses" do
|
39
|
+
assert_not Swot.is_academic?('support@driftingruby.com')
|
40
|
+
assert_not Swot.is_academic?('lee@leerilly.net')
|
41
|
+
assert_not Swot.is_academic?('lee@gmail.com')
|
42
|
+
assert_not Swot.is_academic?('lee@stanford.edu.com')
|
43
|
+
assert_not Swot.is_academic?('lee@strath.ac.uk.com')
|
44
|
+
end
|
45
|
+
|
46
|
+
it "recognizes various education domains in email addresses" do
|
47
|
+
assert Swot.is_academic?('dave@faculdadefia.edu.br')
|
48
|
+
assert Swot.is_academic?('dave@uteco.edu.do')
|
49
|
+
assert Swot.is_academic?('dave@uca.edu.ar')
|
50
|
+
assert Swot.is_academic?('dave@my.barstow.edu')
|
51
|
+
assert Swot.is_academic?('dave@student.ccp.edu')
|
52
|
+
assert Swot.is_academic?('dave@students.prairiestate.edu')
|
53
|
+
end
|
54
|
+
|
55
|
+
it "rejects email addresses from hait.edu.cn" do
|
56
|
+
assert_not Swot.is_academic?('dave@hait.edu.cn')
|
57
|
+
end
|
58
|
+
|
59
|
+
it "rejects email addresses from mail.chzu.edu.cn" do
|
60
|
+
assert_not Swot.is_academic?('dave@mail.chzu.edu.cn')
|
61
|
+
end
|
62
|
+
|
63
|
+
it "accepts email addresses from stud.tjut.edu.cn" do
|
64
|
+
assert Swot.is_academic?('dave@stud.tjut.edu.cn')
|
65
|
+
end
|
66
|
+
|
67
|
+
it "rejects email addresses from njit.edu.cn" do
|
68
|
+
assert_not Swot.is_academic?('dave@njit.edu.cn')
|
69
|
+
end
|
70
|
+
|
71
|
+
it "rejects email addresses from hait.edu.cn" do
|
72
|
+
assert_not Swot.is_academic?('dave@hait.edu.cn')
|
73
|
+
end
|
74
|
+
|
75
|
+
it "rejects email addresses from stumail.sdut.edu.cn" do
|
76
|
+
assert_not Swot.is_academic?('dave@stumail.sdut.edu.cn')
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
describe "academic domain recognition" do
|
81
|
+
it "recognizes academic domains" do
|
82
|
+
assert Swot.is_academic?('stanford.edu')
|
83
|
+
assert Swot.is_academic?('slac.stanford.edu')
|
84
|
+
assert Swot.is_academic?('www.stanford.edu')
|
85
|
+
assert Swot.is_academic?('http://www.stanford.edu')
|
86
|
+
assert Swot.is_academic?('http://www.stanford.edu:9393')
|
87
|
+
assert Swot.is_academic?('strath.ac.uk')
|
88
|
+
assert Swot.is_academic?('soft-eng.strath.ac.uk')
|
89
|
+
assert Swot.is_academic?(' stanford.edu') # leading space
|
90
|
+
end
|
91
|
+
|
92
|
+
it "recognizes international academic domains" do
|
93
|
+
assert Swot.is_academic?('ugr.es')
|
94
|
+
assert Swot.is_academic?('uottawa.ca')
|
95
|
+
assert Swot.is_academic?('mother.edu.ru')
|
96
|
+
assert Swot.is_academic?('ucy.ac.cy')
|
97
|
+
end
|
98
|
+
|
99
|
+
it "rejects non-academic domains" do
|
100
|
+
assert_not Swot.is_academic?('leerilly.net')
|
101
|
+
assert_not Swot.is_academic?('gmail.com')
|
102
|
+
assert_not Swot.is_academic?('stanford.edu.com')
|
103
|
+
assert_not Swot.is_academic?('strath.ac.uk.com')
|
104
|
+
assert_not Swot.is_academic?(' gmail.com ')
|
105
|
+
end
|
106
|
+
|
107
|
+
it "recognizes US community college domains" do
|
108
|
+
assert Swot.is_academic?('dave@sinclair.edu')
|
109
|
+
assert Swot.is_academic?('dave@collin.edu')
|
110
|
+
assert Swot.is_academic?('dave@buc.blinn.edu')
|
111
|
+
assert Swot.is_academic?('dave@roxbury.edu')
|
112
|
+
assert Swot.is_academic?('dave@stchas.edu')
|
113
|
+
assert Swot.is_academic?('dave@email.tjc.edu')
|
114
|
+
assert Swot.is_academic?('dave@dtcc.edu')
|
115
|
+
assert Swot.is_academic?('dave@wwcc.edu')
|
116
|
+
assert Swot.is_academic?('dave@my.stchas.edu')
|
117
|
+
assert Swot.is_academic?('dave@student.egcc.edu')
|
118
|
+
assert Swot.is_academic?('dave@holmescc.edu')
|
119
|
+
assert Swot.is_academic?('dave@navarrocollege.edu')
|
120
|
+
end
|
121
|
+
|
122
|
+
it "recognizes various international academic institutions" do
|
123
|
+
assert Swot.is_academic?('dave@ucsiuniversity.edu.my')
|
124
|
+
assert Swot.is_academic?('dave@asu.edu')
|
125
|
+
assert Swot.is_academic?('dave@student.ptss.edu.my')
|
126
|
+
assert Swot.is_academic?('dave@buc.edu.eg')
|
127
|
+
assert Swot.is_academic?('dave@campusucc.edu.co')
|
128
|
+
assert Swot.is_academic?('dave@student.utem.edu.my')
|
129
|
+
assert Swot.is_academic?('dave@pucgo.edu.br')
|
130
|
+
assert Swot.is_academic?('dave@mahasiswa.itb.ac.id')
|
131
|
+
assert Swot.is_academic?('dave@case.edu')
|
132
|
+
assert Swot.is_academic?('dave@uncp.edu.pe')
|
133
|
+
assert Swot.is_academic?('dave@dyptc.edu.in')
|
134
|
+
assert Swot.is_academic?('dave@mubs.ac.ug')
|
135
|
+
assert Swot.is_academic?('dave@aluno.unip.br')
|
136
|
+
assert Swot.is_academic?('dave@aluno.unicarioca.edu.br')
|
137
|
+
assert Swot.is_academic?('dave@indoamerica.edu.ec')
|
138
|
+
assert Swot.is_academic?('dave@umbc.edu')
|
139
|
+
assert Swot.is_academic?('dave@azhar.edu.eg')
|
140
|
+
assert Swot.is_academic?('dave@cibertec.edu.pe')
|
141
|
+
assert Swot.is_academic?('dave@ivytech.edu')
|
142
|
+
assert Swot.is_academic?('dave@cwru.edu')
|
143
|
+
assert Swot.is_academic?('dave@mec.edu.py')
|
144
|
+
assert Swot.is_academic?('dave@mu.edu.sa')
|
145
|
+
assert Swot.is_academic?('dave@moe-dl.edu.my')
|
146
|
+
assert Swot.is_academic?('dave@lau.edu')
|
147
|
+
assert Swot.is_academic?('dave@os-dkorac.edu.me')
|
148
|
+
assert Swot.is_academic?('dave@continental.edu.pe')
|
149
|
+
assert Swot.is_academic?('dave@utch.edu.co')
|
150
|
+
end
|
151
|
+
|
152
|
+
it "rejects certain domains" do
|
153
|
+
assert_not Swot.is_academic?('dave@edu.azores.gov.pt')
|
154
|
+
assert_not Swot.is_academic?('dave@jmpaneracollege.com')
|
155
|
+
assert_not Swot.is_academic?('dave@workschool.ca')
|
156
|
+
assert_not Swot.is_academic?('dave@itstep.academy')
|
157
|
+
assert_not Swot.is_academic?('dave@ghanacu.org')
|
158
|
+
assert_not Swot.is_academic?('dave@alumni.imperial.ac.uk')
|
159
|
+
assert_not Swot.is_academic?('dave@hbtcm.edu.cn')
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
describe "edge cases" do
|
164
|
+
it "handles null and empty values correctly" do
|
165
|
+
assert_not Swot.is_academic?(nil)
|
166
|
+
assert_not Swot.is_academic?('')
|
167
|
+
assert_not Swot.is_academic?('the')
|
168
|
+
end
|
169
|
+
|
170
|
+
it "handles alumni domains correctly" do
|
171
|
+
assert_not Swot.is_academic?('alumni.albany.edu')
|
172
|
+
end
|
121
173
|
end
|
122
174
|
|
123
175
|
it "returns name of valid institution" do
|
124
|
-
assert_match "University of Strathclyde", Swot
|
125
|
-
assert_match "BRG Fadingerstraße Linz, Austria", Swot
|
176
|
+
assert_match "University of Strathclyde", Swot.get_institution_name('lreilly@cs.strath.ac.uk')
|
177
|
+
assert_match "BRG Fadingerstraße Linz, Austria", Swot.get_institution_name('lreilly@fadi.at')
|
126
178
|
end
|
127
179
|
|
128
180
|
it "returns nil when institution invalid" do
|
129
|
-
assert_not Swot
|
181
|
+
assert_not Swot.get_institution_name('foo@shop.com')
|
130
182
|
end
|
131
183
|
|
132
184
|
it "test aliased methods" do
|
133
|
-
assert Swot
|
134
|
-
assert_match "University of Strathclyde", Swot
|
185
|
+
assert Swot.academic?('stanford.edu')
|
186
|
+
assert_match "University of Strathclyde", Swot.school_name('lreilly@cs.strath.ac.uk')
|
135
187
|
end
|
136
188
|
|
137
189
|
it "fail blacklisted domains" do
|
138
190
|
["si.edu", " si.edu ", "imposter@si.edu", "foo.si.edu"].each do |domain|
|
139
|
-
assert Swot
|
191
|
+
assert Swot.is_academic?(domain), "#{domain} should be denied"
|
140
192
|
end
|
141
193
|
["student.tsu.edu.ph"].each do |domain|
|
142
|
-
assert Swot
|
194
|
+
assert Swot.is_academic?(domain), "#{domain} should be denied"
|
143
195
|
end
|
144
196
|
end
|
145
197
|
|
146
198
|
it "not err on tld-only domains" do
|
147
|
-
Swot
|
148
|
-
assert_not Swot
|
199
|
+
Swot.is_academic? ".com"
|
200
|
+
assert_not Swot.is_academic?(".com")
|
149
201
|
end
|
150
202
|
|
151
203
|
it "does not err on invalid domains" do
|
152
|
-
assert_not Swot
|
204
|
+
assert_not Swot.is_academic?("foo@bar.invalid")
|
153
205
|
end
|
154
206
|
|
155
207
|
# it "contains only text files" do
|