swot-ruby 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.document +5 -0
- data/.github/PULL_REQUEST_TEMPLATE.md +24 -0
- data/.github/workflows/ruby.yml +40 -0
- data/.github/workflows/update_domains.yml +31 -0
- data/.gitignore +5 -0
- data/.gitmodules +3 -0
- data/.ruby-version +1 -0
- data/.travis.yml +3 -0
- data/CONTRIBUTING.md +18 -0
- data/Gemfile +13 -0
- data/LICENSE.txt +21 -0
- data/README.md +117 -0
- data/Rakefile +21 -0
- data/VERSION +1 -0
- data/data/lib/domains/stoplist.txt +1294 -0
- data/data/lib/domains/tlds.txt +83 -0
- data/lib/swot/academic_tlds.rb +249 -0
- data/lib/swot/collection_methods.rb +20 -0
- data/lib/swot.rb +90 -0
- data/swot.gemspec +33 -0
- data/test/helper.rb +32 -0
- data/test/test_collection_methods.rb +44 -0
- data/test/test_swot.rb +122 -0
- metadata +99 -0
@@ -0,0 +1,83 @@
|
|
1
|
+
ac.bd
|
2
|
+
ac.be
|
3
|
+
ac.gg
|
4
|
+
ac.gn
|
5
|
+
ac.il
|
6
|
+
ac.in
|
7
|
+
ac.jp
|
8
|
+
ac.kr
|
9
|
+
ac.ma
|
10
|
+
ac.me
|
11
|
+
ac.mw
|
12
|
+
ac.ni
|
13
|
+
ac.om
|
14
|
+
ac.pg
|
15
|
+
ac.pr
|
16
|
+
ac.ru
|
17
|
+
ac.rw
|
18
|
+
ac.sz
|
19
|
+
ac.yu
|
20
|
+
ac.za
|
21
|
+
ac.zm
|
22
|
+
ed.ao
|
23
|
+
ed.cr
|
24
|
+
ed.jp
|
25
|
+
edu
|
26
|
+
edu.al
|
27
|
+
edu.ar
|
28
|
+
edu.az
|
29
|
+
edu.bb
|
30
|
+
edu.bd
|
31
|
+
edu.bh
|
32
|
+
edu.bs
|
33
|
+
edu.bz
|
34
|
+
edu.ck
|
35
|
+
edu.cn
|
36
|
+
edu.dz
|
37
|
+
edu.ee
|
38
|
+
edu.er
|
39
|
+
edu.gh
|
40
|
+
edu.hn
|
41
|
+
edu.in
|
42
|
+
edu.jm
|
43
|
+
edu.kn
|
44
|
+
edu.kz
|
45
|
+
edu.lr
|
46
|
+
edu.ly
|
47
|
+
edu.me
|
48
|
+
edu.mg
|
49
|
+
edu.ml
|
50
|
+
edu.mv
|
51
|
+
edu.mw
|
52
|
+
edu.ni
|
53
|
+
edu.pa
|
54
|
+
edu.pr
|
55
|
+
edu.pt
|
56
|
+
edu.pw
|
57
|
+
edu.qa
|
58
|
+
edu.sc
|
59
|
+
edu.sd
|
60
|
+
edu.sh
|
61
|
+
edu.sl
|
62
|
+
edu.sy
|
63
|
+
edu.ws
|
64
|
+
edu.ye
|
65
|
+
edu.zm
|
66
|
+
es.kr
|
67
|
+
g12.br
|
68
|
+
hs.kr
|
69
|
+
ms.kr
|
70
|
+
sc.kr
|
71
|
+
sch.ae
|
72
|
+
sch.gg
|
73
|
+
sch.je
|
74
|
+
sch.jo
|
75
|
+
sch.lk
|
76
|
+
sch.ly
|
77
|
+
sch.my
|
78
|
+
sch.om
|
79
|
+
sch.ps
|
80
|
+
sch.sa
|
81
|
+
school.za
|
82
|
+
vic.edu.au
|
83
|
+
urfu.me
|
@@ -0,0 +1,249 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
class Swot
|
4
|
+
# These top-level domains are guaranteed to be academic institutions.
|
5
|
+
ACADEMIC_TLDS = %w(
|
6
|
+
ac.ae
|
7
|
+
ac.at
|
8
|
+
ac.bd
|
9
|
+
ac.be
|
10
|
+
ac.cn
|
11
|
+
ac.cr
|
12
|
+
ac.cy
|
13
|
+
ac.fj
|
14
|
+
ac.gg
|
15
|
+
ac.gn
|
16
|
+
ac.id
|
17
|
+
ac.il
|
18
|
+
ac.in
|
19
|
+
ac.ir
|
20
|
+
ac.jp
|
21
|
+
ac.ke
|
22
|
+
ac.kr
|
23
|
+
ac.ma
|
24
|
+
ac.me
|
25
|
+
ac.mu
|
26
|
+
ac.mw
|
27
|
+
ac.mz
|
28
|
+
ac.ni
|
29
|
+
ac.nz
|
30
|
+
ac.om
|
31
|
+
ac.pa
|
32
|
+
ac.pg
|
33
|
+
ac.pr
|
34
|
+
ac.rs
|
35
|
+
ac.ru
|
36
|
+
ac.rw
|
37
|
+
ac.sz
|
38
|
+
ac.th
|
39
|
+
ac.tz
|
40
|
+
ac.ug
|
41
|
+
ac.uk
|
42
|
+
ac.yu
|
43
|
+
ac.za
|
44
|
+
ac.zm
|
45
|
+
ac.zw
|
46
|
+
cc.al.us
|
47
|
+
cc.ar.us
|
48
|
+
cc.az.us
|
49
|
+
cc.ca.us
|
50
|
+
cc.co.us
|
51
|
+
cc.fl.us
|
52
|
+
cc.ga.us
|
53
|
+
cc.hi.us
|
54
|
+
cc.ia.us
|
55
|
+
cc.id.us
|
56
|
+
cc.il.us
|
57
|
+
cc.in.us
|
58
|
+
cc.ks.us
|
59
|
+
cc.ky.us
|
60
|
+
cc.la.us
|
61
|
+
cc.md.us
|
62
|
+
cc.me.us
|
63
|
+
cc.mi.us
|
64
|
+
cc.mn.us
|
65
|
+
cc.mo.us
|
66
|
+
cc.ms.us
|
67
|
+
cc.mt.us
|
68
|
+
cc.nc.us
|
69
|
+
cc.nd.us
|
70
|
+
cc.ne.us
|
71
|
+
cc.nj.us
|
72
|
+
cc.nm.us
|
73
|
+
cc.nv.us
|
74
|
+
cc.ny.us
|
75
|
+
cc.oh.us
|
76
|
+
cc.ok.us
|
77
|
+
cc.or.us
|
78
|
+
cc.pa.us
|
79
|
+
cc.ri.us
|
80
|
+
cc.sc.us
|
81
|
+
cc.sd.us
|
82
|
+
cc.tx.us
|
83
|
+
cc.va.us
|
84
|
+
cc.vi.us
|
85
|
+
cc.wa.us
|
86
|
+
cc.wi.us
|
87
|
+
cc.wv.us
|
88
|
+
cc.wy.us
|
89
|
+
ed.ao
|
90
|
+
ed.cr
|
91
|
+
ed.jp
|
92
|
+
edu
|
93
|
+
edu.af
|
94
|
+
edu.al
|
95
|
+
edu.ar
|
96
|
+
edu.au
|
97
|
+
edu.az
|
98
|
+
edu.ba
|
99
|
+
edu.bb
|
100
|
+
edu.bd
|
101
|
+
edu.bh
|
102
|
+
edu.bi
|
103
|
+
edu.bn
|
104
|
+
edu.bo
|
105
|
+
edu.br
|
106
|
+
edu.bs
|
107
|
+
edu.bt
|
108
|
+
edu.bz
|
109
|
+
edu.ck
|
110
|
+
edu.cn
|
111
|
+
edu.co
|
112
|
+
edu.cu
|
113
|
+
edu.do
|
114
|
+
edu.dz
|
115
|
+
edu.ec
|
116
|
+
edu.ee
|
117
|
+
edu.eg
|
118
|
+
edu.er
|
119
|
+
edu.es
|
120
|
+
edu.et
|
121
|
+
edu.ge
|
122
|
+
edu.gh
|
123
|
+
edu.gr
|
124
|
+
edu.gt
|
125
|
+
edu.hk
|
126
|
+
edu.hn
|
127
|
+
edu.ht
|
128
|
+
edu.in
|
129
|
+
edu.iq
|
130
|
+
edu.jm
|
131
|
+
edu.jo
|
132
|
+
edu.kg
|
133
|
+
edu.kh
|
134
|
+
edu.kn
|
135
|
+
edu.kw
|
136
|
+
edu.ky
|
137
|
+
edu.kz
|
138
|
+
edu.la
|
139
|
+
edu.lb
|
140
|
+
edu.lr
|
141
|
+
edu.lv
|
142
|
+
edu.ly
|
143
|
+
edu.me
|
144
|
+
edu.mg
|
145
|
+
edu.mk
|
146
|
+
edu.ml
|
147
|
+
edu.mm
|
148
|
+
edu.mn
|
149
|
+
edu.mo
|
150
|
+
edu.mt
|
151
|
+
edu.mv
|
152
|
+
edu.mw
|
153
|
+
edu.mx
|
154
|
+
edu.my
|
155
|
+
edu.ni
|
156
|
+
edu.np
|
157
|
+
edu.om
|
158
|
+
edu.pa
|
159
|
+
edu.pe
|
160
|
+
edu.ph
|
161
|
+
edu.pk
|
162
|
+
edu.pl
|
163
|
+
edu.pr
|
164
|
+
edu.ps
|
165
|
+
edu.pt
|
166
|
+
edu.pw
|
167
|
+
edu.py
|
168
|
+
edu.qa
|
169
|
+
edu.rs
|
170
|
+
edu.ru
|
171
|
+
edu.sa
|
172
|
+
edu.sc
|
173
|
+
edu.sd
|
174
|
+
edu.sg
|
175
|
+
edu.sh
|
176
|
+
edu.sl
|
177
|
+
edu.sv
|
178
|
+
edu.sy
|
179
|
+
edu.tr
|
180
|
+
edu.tt
|
181
|
+
edu.tw
|
182
|
+
edu.ua
|
183
|
+
edu.uy
|
184
|
+
edu.ve
|
185
|
+
edu.vn
|
186
|
+
edu.ws
|
187
|
+
edu.ye
|
188
|
+
edu.zm
|
189
|
+
es.kr
|
190
|
+
g12.br
|
191
|
+
hs.kr
|
192
|
+
ms.kr
|
193
|
+
sc.kr
|
194
|
+
sc.ug
|
195
|
+
sch.ae
|
196
|
+
sch.gg
|
197
|
+
sch.id
|
198
|
+
sch.ir
|
199
|
+
sch.je
|
200
|
+
sch.jo
|
201
|
+
sch.lk
|
202
|
+
sch.ly
|
203
|
+
sch.my
|
204
|
+
sch.om
|
205
|
+
sch.ps
|
206
|
+
sch.sa
|
207
|
+
sch.uk
|
208
|
+
school.nz
|
209
|
+
school.za
|
210
|
+
tec.ar.us
|
211
|
+
tec.az.us
|
212
|
+
tec.co.us
|
213
|
+
tec.fl.us
|
214
|
+
tec.ga.us
|
215
|
+
tec.ia.us
|
216
|
+
tec.id.us
|
217
|
+
tec.il.us
|
218
|
+
tec.in.us
|
219
|
+
tec.ks.us
|
220
|
+
tec.ky.us
|
221
|
+
tec.la.us
|
222
|
+
tec.ma.us
|
223
|
+
tec.md.us
|
224
|
+
tec.me.us
|
225
|
+
tec.mi.us
|
226
|
+
tec.mn.us
|
227
|
+
tec.mo.us
|
228
|
+
tec.ms.us
|
229
|
+
tec.mt.us
|
230
|
+
tec.nc.us
|
231
|
+
tec.nd.us
|
232
|
+
tec.nh.us
|
233
|
+
tec.nm.us
|
234
|
+
tec.nv.us
|
235
|
+
tec.ny.us
|
236
|
+
tec.oh.us
|
237
|
+
tec.ok.us
|
238
|
+
tec.pa.us
|
239
|
+
tec.sc.us
|
240
|
+
tec.sd.us
|
241
|
+
tec.tx.us
|
242
|
+
tec.ut.us
|
243
|
+
tec.vi.us
|
244
|
+
tec.wa.us
|
245
|
+
tec.wi.us
|
246
|
+
tec.wv.us
|
247
|
+
vic.edu.au
|
248
|
+
).to_set.freeze
|
249
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# Module for methods that act on the entire Swot dataset.
|
2
|
+
|
3
|
+
module SwotCollectionMethods
|
4
|
+
|
5
|
+
# Returns an array of domain strings.
|
6
|
+
def all_domains
|
7
|
+
each_domain.map(&:to_s)
|
8
|
+
end
|
9
|
+
|
10
|
+
# Yields a Swot instance for every domain under lib/domains. Does not
|
11
|
+
# include blacklisted or ACADEMIC_TLDS domains.
|
12
|
+
#
|
13
|
+
# returns a Enumerator object with Swot instances if no block is given
|
14
|
+
def each_domain
|
15
|
+
return to_enum(:each_domain) unless block_given?
|
16
|
+
Pathname.glob(Pathname.new(Swot.domains_path).join('**/*.txt')) do |path|
|
17
|
+
yield(Swot.from_path(path))
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/swot.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
require "public_suffix"
|
2
|
+
require "naughty_or_nice"
|
3
|
+
require_relative "swot/academic_tlds"
|
4
|
+
require_relative "swot/collection_methods"
|
5
|
+
|
6
|
+
class Swot
|
7
|
+
|
8
|
+
VERSION = "0.4.2"
|
9
|
+
|
10
|
+
# These are domains that snuck into the edu registry,
|
11
|
+
# but don't pass the education sniff test
|
12
|
+
# Note: validated domain must not end with the blacklisted string
|
13
|
+
BLACKLIST = File.readlines(File.join(__dir__, '../data/lib/domains/stoplist.txt')).map(&:chomp).freeze
|
14
|
+
|
15
|
+
include NaughtyOrNice
|
16
|
+
extend SwotCollectionMethods
|
17
|
+
class << self
|
18
|
+
alias_method :is_academic?, :valid?
|
19
|
+
alias_method :academic?, :valid?
|
20
|
+
|
21
|
+
def get_institution_name(text)
|
22
|
+
Swot.new(text).institution_name
|
23
|
+
end
|
24
|
+
alias_method :school_name, :get_institution_name
|
25
|
+
|
26
|
+
def domains_path
|
27
|
+
@domains_path ||= File.expand_path "../data/lib/domains", File.dirname(__FILE__)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Returns a new Swot instance for the domain file at the given path.
|
31
|
+
# Note that the path must be absolute.
|
32
|
+
#
|
33
|
+
# Returns a Swot instance or false is no domain is found at the given path.
|
34
|
+
def from_path(path_string_or_path)
|
35
|
+
path = Pathname.new(path_string_or_path)
|
36
|
+
return false unless path.exist?
|
37
|
+
path_dir, file = path.relative_path_from(Pathname.new(domains_path)).split
|
38
|
+
backwards_path = path_dir.to_s.split('/').push(file.basename('.txt').to_s)
|
39
|
+
domain = backwards_path.reverse.join('.')
|
40
|
+
Swot.new(domain)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Figure out if an email or domain belongs to academic institution.
|
45
|
+
#
|
46
|
+
# Returns true if the domain name belongs to an academic institution;
|
47
|
+
# false otherwise.
|
48
|
+
def valid?
|
49
|
+
if domain.nil?
|
50
|
+
false
|
51
|
+
elsif BLACKLIST.any? { |d| to_s =~ /(\A|\.)#{Regexp.escape(d)}\z/ }
|
52
|
+
false
|
53
|
+
elsif ACADEMIC_TLDS.include?(domain.tld)
|
54
|
+
true
|
55
|
+
elsif academic_domain?
|
56
|
+
true
|
57
|
+
else
|
58
|
+
false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Figure out the institution name based on the email address/domain.
|
63
|
+
#
|
64
|
+
# Returns a string with the institution name; nil if nothing is found.
|
65
|
+
def institution_name
|
66
|
+
@institution_name ||= File.read(file_path, :mode => "rb", :external_encoding => "UTF-8").strip
|
67
|
+
rescue
|
68
|
+
nil
|
69
|
+
end
|
70
|
+
alias_method :school_name, :institution_name
|
71
|
+
alias_method :name, :institution_name
|
72
|
+
|
73
|
+
# Figure out if a domain name is a know academic institution.
|
74
|
+
#
|
75
|
+
# Returns true if the domain name belongs to a known academic institution;
|
76
|
+
# false otherwise.
|
77
|
+
def academic_domain?
|
78
|
+
@academic_domain ||= File.exist?(file_path) || File.exist?(file_extended_path)
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def file_path
|
84
|
+
@file_path ||= File.join(Swot::domains_path, domain.domain.to_s.split(".").reverse) + ".txt"
|
85
|
+
end
|
86
|
+
|
87
|
+
def file_extended_path
|
88
|
+
@file_extended_path ||= File.join([Swot::domains_path, domain.to_s.split(".").reverse].flatten) + ".txt"
|
89
|
+
end
|
90
|
+
end
|
data/swot.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
Gem::Specification.new do |s|
|
3
|
+
s.name = "swot-ruby"
|
4
|
+
s.version = "1.0.0"
|
5
|
+
|
6
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
7
|
+
s.authors = ["Lee Reilly"]
|
8
|
+
s.date = "2015-07-10"
|
9
|
+
s.description = "Identify email addresses or domains names that belong to colleges or universities. Help automate the process of approving or rejecting academic discounts."
|
10
|
+
s.email = "lee@leereilly.net"
|
11
|
+
s.extra_rdoc_files = [
|
12
|
+
"LICENSE.txt",
|
13
|
+
"README.md"
|
14
|
+
]
|
15
|
+
|
16
|
+
s.files = `git ls-files -z`.split("\x0") + Dir['data/lib/domains/*']
|
17
|
+
|
18
|
+
Dir.glob('lib/domains/**/*.txt').each do |path|
|
19
|
+
s.files << path if File.file?(path)
|
20
|
+
end
|
21
|
+
|
22
|
+
s.homepage = "https://github.com/kobaltz/swot"
|
23
|
+
s.licenses = ["MIT"]
|
24
|
+
s.require_paths = ["lib"]
|
25
|
+
s.required_ruby_version = Gem::Requirement.new(">= 2.0")
|
26
|
+
s.rubygems_version = "2.0.14"
|
27
|
+
s.summary = "Identify email addresses or domains names that belong to colleges or universities."
|
28
|
+
s.test_files = ["test/helper.rb", "test/test_collection_methods.rb", "test/test_swot.rb"]
|
29
|
+
|
30
|
+
s.add_dependency('public_suffix', ">= 0")
|
31
|
+
s.add_dependency('naughty_or_nice', "~> 2.0")
|
32
|
+
end
|
33
|
+
|
data/test/helper.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
begin
|
4
|
+
Bundler.setup(:default, :development)
|
5
|
+
rescue Bundler::BundlerError => e
|
6
|
+
$stderr.puts e.message
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
8
|
+
exit e.status_code
|
9
|
+
end
|
10
|
+
require 'minitest/autorun'
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
|
+
require 'swot'
|
15
|
+
|
16
|
+
# Extracted from Rails ActiveSupport::Testing::Assertions
|
17
|
+
#
|
18
|
+
# Assert that an expression is not truthy. Passes if <tt>object</tt> is
|
19
|
+
# +nil+ or +false+. "Truthy" means "considered true in a conditional"
|
20
|
+
# like <tt>if foo</tt>.
|
21
|
+
#
|
22
|
+
# assert_not nil # => true
|
23
|
+
# assert_not false # => true
|
24
|
+
# assert_not 'foo' # => Expected "foo" to be nil or false
|
25
|
+
#
|
26
|
+
# An error message can be specified.
|
27
|
+
#
|
28
|
+
# assert_not foo, 'foo should be false'
|
29
|
+
def assert_not(object, message = nil)
|
30
|
+
message ||= "Expected #{mu_pp(object)} to be nil or false"
|
31
|
+
assert !object, message
|
32
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'tmpdir'
|
4
|
+
|
5
|
+
describe 'SwotCollectionMethods' do
|
6
|
+
before do
|
7
|
+
@tmp_dir = Dir.mktmpdir "swot-test"
|
8
|
+
Swot.instance_variable_set(:@domains_path, @tmp_dir)
|
9
|
+
write_domain_file "students.texas.edu"
|
10
|
+
write_domain_file "mit.edu"
|
11
|
+
end
|
12
|
+
|
13
|
+
after do
|
14
|
+
FileUtils.remove_entry @tmp_dir
|
15
|
+
end
|
16
|
+
|
17
|
+
def write_domain_file(domain, school_name = 'The University')
|
18
|
+
parts = domain.split('.').reverse
|
19
|
+
parts.last.concat('.txt')
|
20
|
+
path = Pathname.new(@tmp_dir).join(*parts)
|
21
|
+
path.dirname.mkpath
|
22
|
+
path.open('w') { |f| f.puts school_name }
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'all_domains' do
|
26
|
+
it 'gets all domains from files on disk' do
|
27
|
+
all_domains = Swot.all_domains
|
28
|
+
assert_equal all_domains.size, 2
|
29
|
+
assert_includes all_domains, "students.texas.edu"
|
30
|
+
assert_includes all_domains, "mit.edu"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe 'each_domain' do
|
35
|
+
it 'yields a swot instance for every domain file' do
|
36
|
+
domains = []
|
37
|
+
Swot.each_domain { |d| domains << d }
|
38
|
+
assert_equal domains.size, 2
|
39
|
+
assert_equal true, domains.all?{ |d| d.is_a? Swot }
|
40
|
+
assert_includes domains.map(&:to_s), "students.texas.edu"
|
41
|
+
assert_includes domains.map(&:to_s), "mit.edu"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/test/test_swot.rb
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require 'helper'
|
3
|
+
|
4
|
+
describe Swot do
|
5
|
+
it "recognizes academic email addresses and domains" do
|
6
|
+
assert Swot::is_academic?('lreilly@stanford.edu')
|
7
|
+
assert Swot::is_academic?('LREILLY@STANFORD.EDU')
|
8
|
+
assert Swot::is_academic?('Lreilly@Stanford.Edu')
|
9
|
+
assert Swot::is_academic?('lreilly@slac.stanford.edu')
|
10
|
+
assert Swot::is_academic?('lreilly@strath.ac.uk')
|
11
|
+
assert Swot::is_academic?('lreilly@soft-eng.strath.ac.uk')
|
12
|
+
assert Swot::is_academic?('lee@ugr.es')
|
13
|
+
assert Swot::is_academic?('lee@uottawa.ca')
|
14
|
+
assert Swot::is_academic?('lee@mother.edu.ru')
|
15
|
+
assert Swot::is_academic?('lee@ucy.ac.cy')
|
16
|
+
assert Swot::is_academic?('dave.kimura@osu.edu')
|
17
|
+
assert Swot::is_academic?('kimura.13@osu.edu')
|
18
|
+
assert Swot::is_academic?('dave@daffodil.ac')
|
19
|
+
assert Swot::is_academic?('dave@cti.za.ac')
|
20
|
+
assert Swot::is_academic?('dave@lsst.ac')
|
21
|
+
assert Swot::is_academic?('dave@dcc.netpoint.com.bd')
|
22
|
+
assert Swot::is_academic?('dave@student.gatewayhs.com')
|
23
|
+
|
24
|
+
|
25
|
+
assert_not Swot::is_academic?('support@driftingruby.com')
|
26
|
+
assert_not Swot::is_academic?('lee@leerilly.net')
|
27
|
+
assert_not Swot::is_academic?('lee@gmail.com')
|
28
|
+
assert_not Swot::is_academic?('lee@stanford.edu.com')
|
29
|
+
assert_not Swot::is_academic?('lee@strath.ac.uk.com')
|
30
|
+
|
31
|
+
assert Swot::is_academic?('stanford.edu')
|
32
|
+
assert Swot::is_academic?('slac.stanford.edu')
|
33
|
+
assert Swot::is_academic?('www.stanford.edu')
|
34
|
+
assert Swot::is_academic?('http://www.stanford.edu')
|
35
|
+
assert Swot::is_academic?('http://www.stanford.edu:9393')
|
36
|
+
assert Swot::is_academic?('strath.ac.uk')
|
37
|
+
assert Swot::is_academic?('soft-eng.strath.ac.uk')
|
38
|
+
assert Swot::is_academic?('ugr.es')
|
39
|
+
assert Swot::is_academic?('uottawa.ca')
|
40
|
+
assert Swot::is_academic?('mother.edu.ru')
|
41
|
+
assert Swot::is_academic?('ucy.ac.cy')
|
42
|
+
|
43
|
+
assert_not Swot::is_academic?('leerilly.net')
|
44
|
+
assert_not Swot::is_academic?('gmail.com')
|
45
|
+
assert_not Swot::is_academic?('stanford.edu.com')
|
46
|
+
assert_not Swot::is_academic?('strath.ac.uk.com')
|
47
|
+
|
48
|
+
assert_not Swot::is_academic?(nil)
|
49
|
+
assert_not Swot::is_academic?('')
|
50
|
+
assert_not Swot::is_academic?('the')
|
51
|
+
|
52
|
+
assert Swot::is_academic?(' stanford.edu')
|
53
|
+
assert Swot::is_academic?('lee@strath.ac.uk ')
|
54
|
+
assert_not Swot::is_academic?(' gmail.com ')
|
55
|
+
|
56
|
+
assert Swot::is_academic?('lee@stud.uni-corvinus.hu')
|
57
|
+
|
58
|
+
# overkill
|
59
|
+
assert Swot::is_academic?('lee@harvard.edu')
|
60
|
+
assert Swot::is_academic?('lee@mail.harvard.edu')
|
61
|
+
end
|
62
|
+
|
63
|
+
it "returns name of valid institution" do
|
64
|
+
assert_match "University of Strathclyde", Swot::get_institution_name('lreilly@cs.strath.ac.uk')
|
65
|
+
assert_match "BRG Fadingerstraße Linz, Austria", Swot::get_institution_name('lreilly@fadi.at')
|
66
|
+
end
|
67
|
+
|
68
|
+
it "returns nil when institution invalid" do
|
69
|
+
assert_not Swot::get_institution_name('foo@shop.com')
|
70
|
+
end
|
71
|
+
|
72
|
+
it "test aliased methods" do
|
73
|
+
assert Swot::academic?('stanford.edu')
|
74
|
+
assert_match "University of Strathclyde", Swot::school_name('lreilly@cs.strath.ac.uk')
|
75
|
+
end
|
76
|
+
|
77
|
+
it "fail blacklisted domains" do
|
78
|
+
["si.edu", " si.edu ", "imposter@si.edu", "foo.si.edu", "america.edu"].each do |domain|
|
79
|
+
assert_not Swot::is_academic?(domain), "#{domain} should be denied"
|
80
|
+
end
|
81
|
+
["student.tsu.edu.ph"].each do |domain|
|
82
|
+
assert_not Swot::is_academic?(domain), "#{domain} should be denied"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
it "not err on tld-only domains" do
|
87
|
+
Swot::is_academic? ".com"
|
88
|
+
assert_not Swot::is_academic?(".com")
|
89
|
+
end
|
90
|
+
|
91
|
+
it "does not err on invalid domains" do
|
92
|
+
assert_not Swot::is_academic?("foo@bar.invalid")
|
93
|
+
end
|
94
|
+
|
95
|
+
# it "contains only text files" do
|
96
|
+
# Dir.glob("lib/domains/**/*") do |file|
|
97
|
+
# if not File.directory?(file)
|
98
|
+
# assert file.end_with?(".txt"), "#{file} should have a .txt extension"
|
99
|
+
# end
|
100
|
+
# end
|
101
|
+
# end
|
102
|
+
|
103
|
+
# it "contains no file with an invalid encoding" do
|
104
|
+
# Dir.glob("lib/domains/**/*") do |file|
|
105
|
+
# if not File.directory?(file)
|
106
|
+
# File.open(file, "r") do |fh|
|
107
|
+
# assert fh.read.valid_encoding?, "Invalid encoding for #{file}"
|
108
|
+
# end
|
109
|
+
# end
|
110
|
+
# end
|
111
|
+
# end
|
112
|
+
|
113
|
+
# it "contains only file with a single line" do
|
114
|
+
# Dir.glob("lib/domains/**/*") do |file|
|
115
|
+
# if not File.directory?(file)
|
116
|
+
# File.open(file, "r") do |fh|
|
117
|
+
# assert fh.read.lines.count == 1, "#{file} should only contain one line"
|
118
|
+
# end
|
119
|
+
# end
|
120
|
+
# end
|
121
|
+
# end
|
122
|
+
end
|