gman 7.0.1 → 7.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +3 -0
- data/.ruby-version +1 -1
- data/config/domains.txt +8259 -42
- data/config/vendor/academic.txt +6 -7
- data/config/vendor/dotgovs.csv +5634 -5560
- data/contributing.json +32 -0
- data/gman.gemspec +3 -5
- data/lib/gman.rb +1 -1
- data/lib/gman/domain_list.rb +18 -6
- data/lib/gman/identifier.rb +2 -2
- data/lib/gman/importer.rb +1 -1
- data/lib/gman/version.rb +1 -1
- data/script/cibuild +1 -1
- data/script/dedupe +1 -1
- data/script/vendor-swot +1 -1
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +99 -0
- data/spec/gman/country_code_spec.rb +36 -0
- data/spec/gman/domain_list_spec.rb +108 -0
- data/spec/gman/domains_spec.rb +22 -0
- data/spec/gman/identifier_spec.rb +182 -0
- data/spec/gman/importer_spec.rb +227 -0
- data/spec/gman/locality_spec.rb +22 -0
- data/spec/gman_spec.rb +72 -0
- data/spec/spec_helper.rb +29 -0
- metadata +52 -83
- data/.rake_tasks +0 -0
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -48
- data/test/test_gman.rb +0 -56
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domain_list.rb +0 -112
- data/test/test_gman_domains.rb +0 -32
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -244
- data/test/test_gman_locality.rb +0 -10
@@ -1,18 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanCountryCodes < Minitest::Test
|
4
|
-
should "determine a domain's country" do
|
5
|
-
name = Gman.new('whitehouse.gov').country.name
|
6
|
-
assert_equal 'United States of America', name
|
7
|
-
|
8
|
-
name = Gman.new('foo.gov.uk').country.name
|
9
|
-
assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
|
10
|
-
|
11
|
-
assert_equal 'United States of America', Gman.new('army.mil').country.name
|
12
|
-
assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
|
13
|
-
end
|
14
|
-
|
15
|
-
should 'not err out on an unknown country code' do
|
16
|
-
assert_equal nil, Gman.new('foo.eu').country
|
17
|
-
end
|
18
|
-
end
|
@@ -1,112 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanDomainList < Minitest::Test
|
4
|
-
INIT_TYPES = [:path, :contents, :data].freeze
|
5
|
-
|
6
|
-
def setup
|
7
|
-
@original_domain_list = File.read(stubbed_list_path)
|
8
|
-
end
|
9
|
-
|
10
|
-
def teardown
|
11
|
-
File.write stubbed_list_path, @original_domain_list
|
12
|
-
end
|
13
|
-
|
14
|
-
def domain_list(type)
|
15
|
-
case type
|
16
|
-
when :path
|
17
|
-
Gman::DomainList.new(path: Gman.list_path)
|
18
|
-
when :contents
|
19
|
-
contents = File.read(Gman.list_path)
|
20
|
-
Gman::DomainList.new(contents: contents)
|
21
|
-
when :data
|
22
|
-
data = Gman::DomainList.new(path: Gman.list_path).to_h
|
23
|
-
Gman::DomainList.new(data: data)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
INIT_TYPES.each do |type|
|
28
|
-
context "when initalized with #{type}" do
|
29
|
-
should 'store the init vars' do
|
30
|
-
refute domain_list(type).public_send(type).nil?
|
31
|
-
end
|
32
|
-
|
33
|
-
should 'return the domain data' do
|
34
|
-
list = domain_list(type)
|
35
|
-
assert list.data.key? 'Canada federal'
|
36
|
-
assert list.data.any? { |_key, values| values.include? 'gov' }
|
37
|
-
end
|
38
|
-
|
39
|
-
should 'return the list contents' do
|
40
|
-
list = domain_list(type)
|
41
|
-
assert_match(/^gov$/, list.contents)
|
42
|
-
end
|
43
|
-
|
44
|
-
should 'return the list path' do
|
45
|
-
list = domain_list(type)
|
46
|
-
assert_equal list.path, Gman.list_path
|
47
|
-
end
|
48
|
-
|
49
|
-
should 'return the public suffix parsed list' do
|
50
|
-
list = domain_list(type)
|
51
|
-
assert list.public_suffix_list.class == PublicSuffix::List
|
52
|
-
end
|
53
|
-
|
54
|
-
should 'know if a domain is valid' do
|
55
|
-
list = domain_list(type)
|
56
|
-
assert list.valid? 'whitehouse.gov'
|
57
|
-
end
|
58
|
-
|
59
|
-
should 'know if a domain is invalid' do
|
60
|
-
list = domain_list(type)
|
61
|
-
refute list.valid? 'example.com'
|
62
|
-
end
|
63
|
-
|
64
|
-
should 'return the domain groups' do
|
65
|
-
list = domain_list(type)
|
66
|
-
assert list.groups.include?('Canada federal')
|
67
|
-
end
|
68
|
-
|
69
|
-
should 'return the domains' do
|
70
|
-
list = domain_list(type)
|
71
|
-
assert list.domains.include?('gov')
|
72
|
-
end
|
73
|
-
|
74
|
-
should 'return the domain count' do
|
75
|
-
list = domain_list(type)
|
76
|
-
assert list.count.is_a?(Integer)
|
77
|
-
assert list.count > 100
|
78
|
-
end
|
79
|
-
|
80
|
-
should 'alphabetize the list' do
|
81
|
-
list = domain_list(type)
|
82
|
-
list.data['Canada municipal'].shuffle!
|
83
|
-
assert list.data['Canada municipal'].first != '100milehouse.com'
|
84
|
-
list.alphabetize
|
85
|
-
assert list.data['Canada municipal'].first == '100milehouse.com'
|
86
|
-
end
|
87
|
-
|
88
|
-
should 'write the list' do
|
89
|
-
list = domain_list(type)
|
90
|
-
list.instance_variable_set('@path', stubbed_list_path)
|
91
|
-
list.data = { 'foo' => ['bar.gov', 'baz.net'] }
|
92
|
-
list.write
|
93
|
-
contents = File.read(stubbed_list_path)
|
94
|
-
assert_match %r{^// foo$}, contents
|
95
|
-
expected = "// foo\nbar.gov\nbaz.net"
|
96
|
-
assert contents.include?(expected)
|
97
|
-
end
|
98
|
-
|
99
|
-
should 'output the list in public_suffix format' do
|
100
|
-
list = domain_list(type)
|
101
|
-
string = list.to_s
|
102
|
-
assert_match %r{^// Canada federal$}, string
|
103
|
-
assert string.include? "// Canada federal\ncanada\.ca\n"
|
104
|
-
end
|
105
|
-
|
106
|
-
should "find a domain's parent" do
|
107
|
-
list = domain_list(type)
|
108
|
-
assert_equal 'gov.uk', list.parent_domain('foo.gov.uk')
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
data/test/test_gman_domains.rb
DELETED
@@ -1,32 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanDomains < Minitest::Test
|
4
|
-
WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
|
5
|
-
|
6
|
-
def resolve_domains?
|
7
|
-
ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
|
8
|
-
end
|
9
|
-
|
10
|
-
should 'only contains valid domains' do
|
11
|
-
importer = Gman::Importer.new({})
|
12
|
-
if resolve_domains?
|
13
|
-
importer.logger.info <<-MSG
|
14
|
-
Validating that all domains resolve. This may take a while...
|
15
|
-
MSG
|
16
|
-
else
|
17
|
-
importer.logger.info 'Skipping domain resolution.' \
|
18
|
-
'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
|
19
|
-
'to validate that domains resolve.'
|
20
|
-
end
|
21
|
-
|
22
|
-
invalid = []
|
23
|
-
options = { skip_dupe: true, skip_resolve: !resolve_domains? }
|
24
|
-
Gman.list.to_h.each do |group, domains|
|
25
|
-
next if WHITELIST.include?(group)
|
26
|
-
Parallel.each(domains, in_threads: 4) do |domain|
|
27
|
-
invalid.push(domain) unless importer.valid_domain?(domain, options)
|
28
|
-
end
|
29
|
-
end
|
30
|
-
assert_equal [], invalid.flatten.reject(&:empty?)
|
31
|
-
end
|
32
|
-
end
|
data/test/test_gman_filter.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
HERE = File.dirname(__FILE__)
|
2
|
-
require File.join(HERE, 'helper')
|
3
|
-
|
4
|
-
class TestGmanFilter < Minitest::Test
|
5
|
-
txt_path = fixture_path 'obama.txt'
|
6
|
-
exec_path = bin_path 'gman_filter'
|
7
|
-
|
8
|
-
should 'remove non-gov/mil addresses' do
|
9
|
-
output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
|
10
|
-
expected = %w(
|
11
|
-
mr.senator@obama.senate.gov
|
12
|
-
president@whitehouse.gov
|
13
|
-
commander.in.chief@us.army.mil
|
14
|
-
).join("\n") + "\n"
|
15
|
-
assert_equal output, expected
|
16
|
-
end
|
17
|
-
end
|
@@ -1,106 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanIdentifier < Minitest::Test
|
4
|
-
should 'Parse the dotgov list' do
|
5
|
-
assert Gman.dotgov_list
|
6
|
-
assert_equal CSV::Table, Gman.dotgov_list.class
|
7
|
-
assert_equal CSV::Row, Gman.dotgov_list.first.class
|
8
|
-
assert Gman.dotgov_list.first['Domain Name']
|
9
|
-
end
|
10
|
-
|
11
|
-
context 'locality domains' do
|
12
|
-
should 'detect state domains' do
|
13
|
-
domain = Gman.new('state.ak.us')
|
14
|
-
assert domain.state?
|
15
|
-
|
16
|
-
refute domain.dotgov?
|
17
|
-
refute domain.city?
|
18
|
-
refute domain.federal?
|
19
|
-
refute domain.county?
|
20
|
-
|
21
|
-
assert_equal :state, domain.type
|
22
|
-
assert_equal 'AK', domain.state
|
23
|
-
end
|
24
|
-
|
25
|
-
should 'detect city domains' do
|
26
|
-
domain = Gman.new('ci.champaign.il.us')
|
27
|
-
assert domain.city?
|
28
|
-
|
29
|
-
refute domain.dotgov?
|
30
|
-
refute domain.state?
|
31
|
-
refute domain.federal?
|
32
|
-
refute domain.county?
|
33
|
-
|
34
|
-
assert_equal :city, domain.type
|
35
|
-
assert_equal 'IL', domain.state
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
context 'dotgovs' do
|
40
|
-
should 'detect federal dotgovs' do
|
41
|
-
domain = Gman.new 'whitehouse.gov'
|
42
|
-
assert domain.federal?
|
43
|
-
assert domain.dotgov?
|
44
|
-
|
45
|
-
refute domain.city?
|
46
|
-
refute domain.state?
|
47
|
-
refute domain.county?
|
48
|
-
|
49
|
-
assert_equal :federal, domain.type
|
50
|
-
assert_equal 'DC', domain.state
|
51
|
-
assert_equal 'Washington', domain.city
|
52
|
-
assert_equal 'Executive Office of the President', domain.agency
|
53
|
-
end
|
54
|
-
|
55
|
-
should 'detect state dotgovs' do
|
56
|
-
domain = Gman.new 'illinois.gov'
|
57
|
-
assert domain.state?
|
58
|
-
assert domain.dotgov?
|
59
|
-
|
60
|
-
refute domain.city?
|
61
|
-
refute domain.federal?
|
62
|
-
refute domain.county?
|
63
|
-
|
64
|
-
assert_equal :state, domain.type
|
65
|
-
assert_equal 'IL', domain.state
|
66
|
-
assert_equal 'Springfield', domain.city
|
67
|
-
end
|
68
|
-
|
69
|
-
should 'detect county dotgovs' do
|
70
|
-
domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
|
71
|
-
assert domain.county?
|
72
|
-
assert domain.dotgov?
|
73
|
-
|
74
|
-
refute domain.city?
|
75
|
-
refute domain.federal?
|
76
|
-
refute domain.state?
|
77
|
-
|
78
|
-
assert_equal :county, domain.type
|
79
|
-
assert_equal 'PA', domain.state
|
80
|
-
assert_equal 'Pittsburgh', domain.city
|
81
|
-
end
|
82
|
-
|
83
|
-
should 'detect the list category' do
|
84
|
-
category = Gman.new('whitehouse.gov').send('list_category')
|
85
|
-
assert_equal 'US Federal', category
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
context 'non-dotgov domains' do
|
90
|
-
should "determine a domain's group" do
|
91
|
-
assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
|
92
|
-
assert_equal :unknown, Gman.new('cityofperu.org').type
|
93
|
-
|
94
|
-
assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
|
95
|
-
assert_equal :"Canada municipal", Gman.new('acme.ca').type
|
96
|
-
|
97
|
-
assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
|
98
|
-
assert_equal :"Canada federal", Gman.new('canada.ca').type
|
99
|
-
end
|
100
|
-
|
101
|
-
should 'detect the state' do
|
102
|
-
assert_equal 'OR', Gman.new('ashland.or.us').state
|
103
|
-
refute Gman.new('canada.ca').state
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
data/test/test_gman_importer.rb
DELETED
@@ -1,244 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGManImporter < Minitest::Test
|
4
|
-
def setup
|
5
|
-
@importer = Gman::Importer.new 'test' => ['example.com']
|
6
|
-
@stdout = StringIO.new
|
7
|
-
@importer.instance_variable_set '@logger', Logger.new(@stdout)
|
8
|
-
|
9
|
-
@original_domain_list = File.read(stubbed_list_path)
|
10
|
-
end
|
11
|
-
|
12
|
-
def teardown
|
13
|
-
File.write stubbed_list_path, @original_domain_list
|
14
|
-
end
|
15
|
-
|
16
|
-
should 'init the domain list' do
|
17
|
-
assert_equal Gman::DomainList, @importer.domain_list.class
|
18
|
-
assert_equal 1, @importer.domain_list.count
|
19
|
-
assert_equal 'example.com', @importer.domain_list.domains.first
|
20
|
-
end
|
21
|
-
|
22
|
-
should 'init the logger' do
|
23
|
-
assert_equal Logger, @importer.logger.class
|
24
|
-
end
|
25
|
-
|
26
|
-
should 'return the current domain list' do
|
27
|
-
assert_equal Gman::DomainList, @importer.current.class
|
28
|
-
end
|
29
|
-
|
30
|
-
should 'return the resolver' do
|
31
|
-
assert_equal Resolv::DNS, @importer.resolver.class
|
32
|
-
end
|
33
|
-
|
34
|
-
context 'domain rejection' do
|
35
|
-
should 'return false for a rejected domain' do
|
36
|
-
refute @importer.reject 'example.com', 'reasons'
|
37
|
-
end
|
38
|
-
|
39
|
-
should 'return the reason when asked' do
|
40
|
-
with_env 'RECONCILING', 'true' do
|
41
|
-
assert_equal 'reasons', @importer.reject('example.com', 'reasons')
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
context 'manipulating the domain list' do
|
47
|
-
should 'normalize domains within the domain list' do
|
48
|
-
importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
|
49
|
-
importer.send :normalize_domains!
|
50
|
-
assert_equal 'example.com', importer.domain_list.domains.first
|
51
|
-
end
|
52
|
-
|
53
|
-
should 'remove invalid domains from the domain list' do
|
54
|
-
importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
|
55
|
-
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
56
|
-
|
57
|
-
assert_equal 2, importer.domain_list.count
|
58
|
-
importer.send :ensure_validity!
|
59
|
-
assert_equal 1, importer.domain_list.count
|
60
|
-
end
|
61
|
-
|
62
|
-
context 'writing the domain list' do
|
63
|
-
should 'add domains to the current domain list' do
|
64
|
-
domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
|
65
|
-
importer = Gman::Importer.new domains
|
66
|
-
importer.instance_variable_set '@current', stubbed_list
|
67
|
-
importer.send :add_to_current
|
68
|
-
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
69
|
-
assert_equal expected, File.open(stubbed_list_path).read
|
70
|
-
end
|
71
|
-
|
72
|
-
should 'import' do
|
73
|
-
domains = {
|
74
|
-
'test' => ['www.example.com', 'goo.github.io'],
|
75
|
-
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
76
|
-
}
|
77
|
-
|
78
|
-
importer = Gman::Importer.new domains
|
79
|
-
importer.instance_variable_set '@current', stubbed_list
|
80
|
-
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
81
|
-
importer.import(skip_resolve: true)
|
82
|
-
|
83
|
-
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
84
|
-
assert_equal expected, File.open(stubbed_list_path).read
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
context 'domain validation' do
|
90
|
-
should 'allow valid domains' do
|
91
|
-
assert @importer.send :ensure_valid, 'whitehouse.gov'
|
92
|
-
end
|
93
|
-
|
94
|
-
should 'reject empty domains' do
|
95
|
-
refute @importer.send :ensure_valid, ''
|
96
|
-
end
|
97
|
-
|
98
|
-
should 'reject blacklisted domains' do
|
99
|
-
refute @importer.send :ensure_valid, 'egovlink.com'
|
100
|
-
end
|
101
|
-
|
102
|
-
should 'reject invalid domains' do
|
103
|
-
refute @importer.send :ensure_valid, 'foo.invalid'
|
104
|
-
end
|
105
|
-
|
106
|
-
should 'reject academic domains' do
|
107
|
-
refute @importer.send :ensure_valid, 'harvard.edu'
|
108
|
-
end
|
109
|
-
|
110
|
-
should "reject regex'd domains" do
|
111
|
-
refute @importer.send :ensure_valid, 'foo.github.io'
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
context 'duplicate domains' do
|
116
|
-
should 'know a unique domain is not a dupe' do
|
117
|
-
refute @importer.send :dupe?, 'gman.com'
|
118
|
-
end
|
119
|
-
|
120
|
-
should "know when a domain's a dupe" do
|
121
|
-
assert @importer.send :dupe?, 'gov'
|
122
|
-
end
|
123
|
-
|
124
|
-
should "know when a domain's a subdomain of an existing domain" do
|
125
|
-
assert @importer.send :dupe?, 'whitehouse.gov'
|
126
|
-
end
|
127
|
-
|
128
|
-
should 'allow unique domains' do
|
129
|
-
assert @importer.send :ensure_not_dupe, 'gman.com'
|
130
|
-
end
|
131
|
-
|
132
|
-
should 'reject duplicate domains' do
|
133
|
-
refute @importer.send :ensure_not_dupe, 'gov'
|
134
|
-
end
|
135
|
-
|
136
|
-
should 'reject subdomains' do
|
137
|
-
refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
context 'domain resolution' do
|
142
|
-
should 'know if a domain resolves' do
|
143
|
-
assert @importer.domain_resolves?('github.com')
|
144
|
-
assert @importer.send :ensure_resolves, 'github.com'
|
145
|
-
end
|
146
|
-
|
147
|
-
should "know if a domain doesn't resolve" do
|
148
|
-
refute @importer.domain_resolves?('foo.invalid')
|
149
|
-
refute @importer.send :ensure_resolves, 'foo.invalid'
|
150
|
-
end
|
151
|
-
|
152
|
-
should 'know if a domain has an IP' do
|
153
|
-
end
|
154
|
-
|
155
|
-
should 'know if a domain returns a given record' do
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
context 'regex checks' do
|
160
|
-
should 'pass valid domains' do
|
161
|
-
assert @importer.send :ensure_regex, 'example.com'
|
162
|
-
end
|
163
|
-
|
164
|
-
should 'reject domains that begin with home.' do
|
165
|
-
refute @importer.send :ensure_regex, 'home.example.com'
|
166
|
-
end
|
167
|
-
|
168
|
-
should 'reject domains that begin with user.' do
|
169
|
-
refute @importer.send :ensure_regex, 'user.example.com'
|
170
|
-
end
|
171
|
-
|
172
|
-
should 'reject domains that begin with site.' do
|
173
|
-
refute @importer.send :ensure_regex, 'user.example.com'
|
174
|
-
end
|
175
|
-
|
176
|
-
should 'reject weebly domains' do
|
177
|
-
refute @importer.send :ensure_regex, 'foo.weebly.com'
|
178
|
-
end
|
179
|
-
|
180
|
-
should 'reject wordpress domains' do
|
181
|
-
refute @importer.send :ensure_regex, 'foo.wordpress.com'
|
182
|
-
end
|
183
|
-
|
184
|
-
should 'reject govoffice domains' do
|
185
|
-
refute @importer.send :ensure_regex, 'foo.govoffice.com'
|
186
|
-
refute @importer.send :ensure_regex, 'foo.govoffice1.com'
|
187
|
-
end
|
188
|
-
|
189
|
-
should 'reject homestead domains' do
|
190
|
-
refute @importer.send :ensure_regex, 'foo.homestead.com'
|
191
|
-
end
|
192
|
-
|
193
|
-
should 'reject wix domains' do
|
194
|
-
refute @importer.send :ensure_regex, 'foo.wix.com'
|
195
|
-
end
|
196
|
-
|
197
|
-
should 'reject blogspot domains' do
|
198
|
-
refute @importer.send :ensure_regex, 'foo.blogspot.com'
|
199
|
-
end
|
200
|
-
|
201
|
-
should 'reject tripod domains' do
|
202
|
-
refute @importer.send :ensure_regex, 'foo.tripod.com'
|
203
|
-
end
|
204
|
-
|
205
|
-
should 'reject squarespace domains' do
|
206
|
-
refute @importer.send :ensure_regex, 'foo.squarespace.com'
|
207
|
-
end
|
208
|
-
|
209
|
-
should 'reject github.io domains' do
|
210
|
-
refute @importer.send :ensure_regex, 'foo.github.io'
|
211
|
-
end
|
212
|
-
|
213
|
-
should 'reject locality domains' do
|
214
|
-
refute @importer.send :ensure_regex, 'ci.champaign.il.us'
|
215
|
-
end
|
216
|
-
end
|
217
|
-
|
218
|
-
context 'normalizing domains' do
|
219
|
-
should 'normalize URLs to domains' do
|
220
|
-
expected = 'example.com'
|
221
|
-
assert_equal expected, @importer.normalize_domain('http://example.com')
|
222
|
-
end
|
223
|
-
|
224
|
-
should 'strip WWW' do
|
225
|
-
assert_equal 'example.com', @importer.normalize_domain('www.example.com')
|
226
|
-
end
|
227
|
-
|
228
|
-
should 'remove trailing slashes' do
|
229
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/')
|
230
|
-
end
|
231
|
-
|
232
|
-
should 'remove paths' do
|
233
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
|
234
|
-
end
|
235
|
-
|
236
|
-
should 'remove paths with trailing slashes' do
|
237
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
|
238
|
-
end
|
239
|
-
|
240
|
-
should 'downcase' do
|
241
|
-
assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
|
242
|
-
end
|
243
|
-
end
|
244
|
-
end
|