gman 5.0.9 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +21 -0
- data/.ruby-version +1 -1
- data/Gemfile +1 -0
- data/README.md +16 -22
- data/Rakefile +3 -3
- data/bin/gman +10 -11
- data/bin/gman_filter +7 -7
- data/config/domains.txt +19 -19
- data/config/vendor/dotgovs.csv +398 -355
- data/gman.gemspec +34 -27
- data/lib/gman.rb +29 -23
- data/lib/gman/country_codes.rb +14 -15
- data/lib/gman/domain_list.rb +34 -25
- data/lib/gman/identifier.rb +39 -43
- data/lib/gman/importer.rb +111 -61
- data/lib/gman/locality.rb +22 -10
- data/lib/gman/version.rb +1 -1
- data/script/add +2 -2
- data/script/alphabetize +2 -2
- data/script/cibuild +2 -0
- data/script/dedupe +2 -2
- data/script/profile +5 -2
- data/script/prune +7 -7
- data/script/reconcile-us +26 -21
- data/script/vendor-federal-de +5 -5
- data/script/vendor-municipal-de +5 -5
- data/script/vendor-nl +12 -4
- data/script/vendor-public-suffix +8 -8
- data/script/vendor-se +8 -6
- data/script/vendor-us +7 -7
- data/test/fixtures/domains.txt +2 -0
- data/test/{obama.txt → fixtures/obama.txt} +0 -0
- data/test/helper.rb +19 -5
- data/test/test_gman.rb +43 -38
- data/test/test_gman_bin.rb +37 -43
- data/test/test_gman_country_codes.rb +10 -6
- data/test/test_gman_domains.rb +15 -10
- data/test/test_gman_filter.rb +5 -7
- data/test/test_gman_identifier.rb +36 -35
- data/test/test_gman_importer.rb +250 -0
- data/test/test_gman_locality.rb +5 -5
- metadata +28 -10
- data/lib/gman/sanctions.rb +0 -29
- data/test/test_gman_sanctions.rb +0 -20
data/test/test_gman_bin.rb
CHANGED
@@ -1,81 +1,75 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative 'helper'
|
2
2
|
|
3
3
|
class TestGmanBin < Minitest::Test
|
4
|
-
|
5
4
|
def setup
|
6
|
-
@output, @status = test_bin(
|
5
|
+
@output, @status = test_bin('whitehouse.gov')
|
7
6
|
end
|
8
7
|
|
9
|
-
should
|
10
|
-
output,
|
11
|
-
assert_match
|
8
|
+
should 'parse the domain' do
|
9
|
+
output, = test_bin('bar.gov')
|
10
|
+
assert_match(/Domain : bar.gov/, output)
|
12
11
|
|
13
|
-
output,
|
14
|
-
assert_match
|
12
|
+
output, = test_bin('foo@bar.gov')
|
13
|
+
assert_match(/Domain : bar.gov/, output)
|
15
14
|
|
16
|
-
output,
|
17
|
-
assert_match
|
15
|
+
output, = test_bin('http://bar.gov/foo')
|
16
|
+
assert_match(/Domain : bar.gov/, output)
|
18
17
|
end
|
19
18
|
|
20
|
-
should
|
21
|
-
output, status = test_bin(
|
19
|
+
should 'err on invalid domains' do
|
20
|
+
output, status = test_bin('foo.invalid')
|
22
21
|
assert_equal 1, status.exitstatus
|
23
|
-
assert_match
|
22
|
+
assert_match(/Invalid domain/, output)
|
24
23
|
end
|
25
24
|
|
26
|
-
should
|
27
|
-
output, status = test_bin(
|
25
|
+
should 'err on non-government domains' do
|
26
|
+
output, status = test_bin('github.com')
|
28
27
|
assert_equal 1, status.exitstatus
|
29
|
-
assert_match
|
28
|
+
assert_match(/Not a government domain/, output)
|
30
29
|
end
|
31
30
|
|
32
|
-
should
|
33
|
-
assert_match
|
31
|
+
should 'know the type' do
|
32
|
+
assert_match(/federal/, @output)
|
34
33
|
assert_equal 0, @status.exitstatus
|
35
34
|
end
|
36
35
|
|
37
|
-
should
|
38
|
-
assert_match
|
36
|
+
should 'know the agency' do
|
37
|
+
assert_match(/Executive Office of the President/, @output)
|
39
38
|
assert_equal 0, @status.exitstatus
|
40
39
|
end
|
41
40
|
|
42
|
-
should
|
43
|
-
assert_match
|
41
|
+
should 'know the country' do
|
42
|
+
assert_match(/United States/, @output)
|
44
43
|
assert_equal 0, @status.exitstatus
|
45
44
|
end
|
46
45
|
|
47
|
-
should
|
48
|
-
assert_match
|
46
|
+
should 'know the city' do
|
47
|
+
assert_match(/Washington/, @output)
|
49
48
|
assert_equal 0, @status.exitstatus
|
50
49
|
end
|
51
50
|
|
52
|
-
should
|
53
|
-
assert_match
|
51
|
+
should 'know the state' do
|
52
|
+
assert_match(/DC/, @output)
|
54
53
|
assert_equal 0, @status.exitstatus
|
55
54
|
end
|
56
55
|
|
57
|
-
should
|
58
|
-
output,
|
59
|
-
refute_match
|
56
|
+
should 'allow you to disable colorization' do
|
57
|
+
output, = test_bin('whitehouse.gov', '--no-color')
|
58
|
+
refute_match(/\e\[32m/, output)
|
60
59
|
end
|
61
60
|
|
62
|
-
should
|
63
|
-
assert_match
|
61
|
+
should 'color by default' do
|
62
|
+
assert_match(/\e\[32m/, @output)
|
64
63
|
end
|
65
64
|
|
66
|
-
should
|
67
|
-
output,
|
68
|
-
assert_match
|
69
|
-
|
70
|
-
output, status = test_bin("")
|
71
|
-
assert_match /Usage/i, output
|
65
|
+
should 'show help text' do
|
66
|
+
output, = test_bin
|
67
|
+
assert_match(/Usage/i, output)
|
72
68
|
|
73
|
-
output,
|
74
|
-
assert_match
|
75
|
-
end
|
69
|
+
output, = test_bin('')
|
70
|
+
assert_match(/Usage/i, output)
|
76
71
|
|
77
|
-
|
78
|
-
|
79
|
-
assert_match /SANCTIONED/, output
|
72
|
+
output, = test_bin('--no-color')
|
73
|
+
assert_match(/Usage/i, output)
|
80
74
|
end
|
81
75
|
end
|
@@ -2,13 +2,17 @@ require File.join(File.dirname(__FILE__), 'helper')
|
|
2
2
|
|
3
3
|
class TestGmanCountryCodes < Minitest::Test
|
4
4
|
should "determine a domain's country" do
|
5
|
-
|
6
|
-
assert_equal
|
7
|
-
|
8
|
-
|
5
|
+
name = Gman.new('whitehouse.gov').country.name
|
6
|
+
assert_equal 'United States of America', name
|
7
|
+
|
8
|
+
name = Gman.new('foo.gov.uk').country.name
|
9
|
+
assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
|
10
|
+
|
11
|
+
assert_equal 'United States of America', Gman.new('army.mil').country.name
|
12
|
+
assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
|
9
13
|
end
|
10
14
|
|
11
|
-
should
|
12
|
-
assert_equal nil, Gman.new(
|
15
|
+
should 'not err out on an unknown country code' do
|
16
|
+
assert_equal nil, Gman.new('foo.eu').country
|
13
17
|
end
|
14
18
|
end
|
data/test/test_gman_domains.rb
CHANGED
@@ -1,28 +1,33 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'helper')
|
2
2
|
|
3
|
-
class
|
4
|
-
|
5
|
-
WHITELIST = [ "non-us gov", "non-us mil", "US Federal"]
|
3
|
+
class TestGmanDomains < Minitest::Test
|
4
|
+
WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
|
6
5
|
|
7
6
|
def resolve_domains?
|
8
|
-
ENV[
|
7
|
+
ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
|
9
8
|
end
|
10
9
|
|
11
|
-
should
|
10
|
+
should 'only contains valid domains' do
|
12
11
|
importer = Gman::Importer.new({})
|
13
12
|
if resolve_domains?
|
14
|
-
importer.logger.info
|
13
|
+
importer.logger.info <<-MSG
|
14
|
+
Validating that all domains resolve. This may take a while...
|
15
|
+
MSG
|
15
16
|
else
|
16
|
-
importer.logger.info
|
17
|
+
importer.logger.info 'Skipping domain resolution.' \
|
18
|
+
'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
|
19
|
+
'to validate that domains resolve.'
|
17
20
|
end
|
18
21
|
|
19
22
|
invalid = []
|
20
|
-
|
23
|
+
list = Gman::DomainList.current.list
|
24
|
+
Parallel.each(list, in_threads: 2) do |group, domains|
|
21
25
|
next if WHITELIST.include?(group)
|
22
26
|
invalid.push domains.reject { |domain|
|
23
|
-
|
27
|
+
options = { skip_dupe: true, skip_resolve: !resolve_domains? }
|
28
|
+
importer.valid_domain?(domain, options)
|
24
29
|
}
|
25
30
|
end
|
26
|
-
assert_equal [], invalid.flatten.reject
|
31
|
+
assert_equal [], invalid.flatten.reject(&:empty?)
|
27
32
|
end
|
28
33
|
end
|
data/test/test_gman_filter.rb
CHANGED
@@ -2,18 +2,16 @@ HERE = File.dirname(__FILE__)
|
|
2
2
|
require File.join(HERE, 'helper')
|
3
3
|
|
4
4
|
class TestGmanFilter < Minitest::Test
|
5
|
+
txt_path = fixture_path 'obama.txt'
|
6
|
+
exec_path = bin_path 'gman_filter'
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
should "remove non-gov/mil addresses" do
|
10
|
-
filtered = `#{exec_path} < #{txt_path}`
|
8
|
+
should 'remove non-gov/mil addresses' do
|
9
|
+
output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
|
11
10
|
expected = %w(
|
12
11
|
mr.senator@obama.senate.gov
|
13
12
|
president@whitehouse.gov
|
14
13
|
commander.in.chief@us.army.mil
|
15
14
|
).join("\n") + "\n"
|
16
|
-
assert_equal
|
15
|
+
assert_equal output, expected
|
17
16
|
end
|
18
|
-
|
19
17
|
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'helper')
|
2
2
|
|
3
3
|
class TestGmanIdentifier < Minitest::Test
|
4
|
-
should
|
4
|
+
should 'Parse the dotgov list' do
|
5
5
|
assert Gman.dotgov_list
|
6
6
|
assert_equal CSV::Table, Gman.dotgov_list.class
|
7
7
|
assert_equal CSV::Row, Gman.dotgov_list.first.class
|
8
|
-
assert Gman.dotgov_list.first[
|
8
|
+
assert Gman.dotgov_list.first['Domain Name']
|
9
9
|
end
|
10
10
|
|
11
|
-
context
|
12
|
-
should
|
13
|
-
domain = Gman.new(
|
11
|
+
context 'locality domains' do
|
12
|
+
should 'detect state domains' do
|
13
|
+
domain = Gman.new('state.ak.us')
|
14
14
|
assert domain.state?
|
15
15
|
|
16
16
|
refute domain.dotgov?
|
@@ -19,11 +19,11 @@ class TestGmanIdentifier < Minitest::Test
|
|
19
19
|
refute domain.county?
|
20
20
|
|
21
21
|
assert_equal :state, domain.type
|
22
|
-
assert_equal
|
22
|
+
assert_equal 'AK', domain.state
|
23
23
|
end
|
24
24
|
|
25
|
-
should
|
26
|
-
domain = Gman.new(
|
25
|
+
should 'detect city domains' do
|
26
|
+
domain = Gman.new('ci.champaign.il.us')
|
27
27
|
assert domain.city?
|
28
28
|
|
29
29
|
refute domain.dotgov?
|
@@ -32,13 +32,13 @@ class TestGmanIdentifier < Minitest::Test
|
|
32
32
|
refute domain.county?
|
33
33
|
|
34
34
|
assert_equal :city, domain.type
|
35
|
-
assert_equal
|
35
|
+
assert_equal 'IL', domain.state
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
context
|
40
|
-
should
|
41
|
-
domain = Gman.new
|
39
|
+
context 'dotgovs' do
|
40
|
+
should 'detect federal dotgovs' do
|
41
|
+
domain = Gman.new 'whitehouse.gov'
|
42
42
|
assert domain.federal?
|
43
43
|
assert domain.dotgov?
|
44
44
|
|
@@ -47,13 +47,13 @@ class TestGmanIdentifier < Minitest::Test
|
|
47
47
|
refute domain.county?
|
48
48
|
|
49
49
|
assert_equal :federal, domain.type
|
50
|
-
assert_equal
|
51
|
-
assert_equal
|
52
|
-
assert_equal
|
50
|
+
assert_equal 'DC', domain.state
|
51
|
+
assert_equal 'Washington', domain.city
|
52
|
+
assert_equal 'Executive Office of the President', domain.agency
|
53
53
|
end
|
54
54
|
|
55
|
-
should
|
56
|
-
domain = Gman.new
|
55
|
+
should 'detect state dotgovs' do
|
56
|
+
domain = Gman.new 'illinois.gov'
|
57
57
|
assert domain.state?
|
58
58
|
assert domain.dotgov?
|
59
59
|
|
@@ -62,12 +62,12 @@ class TestGmanIdentifier < Minitest::Test
|
|
62
62
|
refute domain.county?
|
63
63
|
|
64
64
|
assert_equal :state, domain.type
|
65
|
-
assert_equal
|
66
|
-
assert_equal
|
65
|
+
assert_equal 'IL', domain.state
|
66
|
+
assert_equal 'Springfield', domain.city
|
67
67
|
end
|
68
68
|
|
69
|
-
should
|
70
|
-
domain = Gman.new
|
69
|
+
should 'detect county dotgovs' do
|
70
|
+
domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
|
71
71
|
assert domain.county?
|
72
72
|
assert domain.dotgov?
|
73
73
|
|
@@ -76,30 +76,31 @@ class TestGmanIdentifier < Minitest::Test
|
|
76
76
|
refute domain.state?
|
77
77
|
|
78
78
|
assert_equal :county, domain.type
|
79
|
-
assert_equal
|
80
|
-
assert_equal
|
79
|
+
assert_equal 'PA', domain.state
|
80
|
+
assert_equal 'Pittsburgh', domain.city
|
81
81
|
end
|
82
82
|
|
83
|
-
should
|
84
|
-
|
83
|
+
should 'detect the list category' do
|
84
|
+
category = Gman.new('whitehouse.gov').send('list_category')
|
85
|
+
assert_equal 'US Federal', category
|
85
86
|
end
|
86
87
|
end
|
87
88
|
|
88
|
-
context
|
89
|
+
context 'non-dotgov domains' do
|
89
90
|
should "determine a domain's group" do
|
90
|
-
assert_equal
|
91
|
-
assert_equal :unknown, Gman.new(
|
91
|
+
assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
|
92
|
+
assert_equal :unknown, Gman.new('cityofperu.org').type
|
92
93
|
|
93
|
-
assert_equal
|
94
|
-
assert_equal :"Canada municipal", Gman.new(
|
94
|
+
assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
|
95
|
+
assert_equal :"Canada municipal", Gman.new('acme.ca').type
|
95
96
|
|
96
|
-
assert_equal
|
97
|
-
assert_equal :"Canada federal", Gman.new(
|
97
|
+
assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
|
98
|
+
assert_equal :"Canada federal", Gman.new('canada.ca').type
|
98
99
|
end
|
99
100
|
|
100
|
-
should
|
101
|
-
assert_equal
|
102
|
-
refute Gman.new(
|
101
|
+
should 'detect the state' do
|
102
|
+
assert_equal 'OR', Gman.new('ashland.or.us').state
|
103
|
+
refute Gman.new('canada.ca').state
|
103
104
|
end
|
104
105
|
end
|
105
106
|
end
|
@@ -0,0 +1,250 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
+
|
3
|
+
class TestGManImporter < Minitest::Test
|
4
|
+
def setup
|
5
|
+
@importer = Gman::Importer.new 'test' => ['example.com']
|
6
|
+
@stdout = StringIO.new
|
7
|
+
@importer.instance_variable_set '@logger', Logger.new(@stdout)
|
8
|
+
|
9
|
+
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
10
|
+
@original_domain_list = File.open(Gman.list_path).read
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def teardown
|
15
|
+
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
16
|
+
File.write Gman.list_path, @original_domain_list
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
should 'init the domain list' do
|
21
|
+
assert_equal Gman::DomainList, @importer.domains.class
|
22
|
+
assert_equal 1, @importer.domains.domains.count
|
23
|
+
assert_equal 'example.com', @importer.domains.domains.first
|
24
|
+
end
|
25
|
+
|
26
|
+
should 'init the logger' do
|
27
|
+
assert_equal Logger, @importer.logger.class
|
28
|
+
end
|
29
|
+
|
30
|
+
should 'return the current domain list' do
|
31
|
+
assert_equal Gman::DomainList, @importer.current.class
|
32
|
+
end
|
33
|
+
|
34
|
+
should 'return the resolver' do
|
35
|
+
assert_equal Resolv::DNS, @importer.resolver.class
|
36
|
+
end
|
37
|
+
|
38
|
+
context 'domain rejection' do
|
39
|
+
should 'return false for a rejected domain' do
|
40
|
+
refute @importer.reject 'example.com', 'reasons'
|
41
|
+
end
|
42
|
+
|
43
|
+
should 'return the reason when asked' do
|
44
|
+
with_env 'RECONCILING', 'true' do
|
45
|
+
assert_equal 'reasons', @importer.reject('example.com', 'reasons')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context 'manipulating the domain list' do
|
51
|
+
should 'normalize domains within the domain list' do
|
52
|
+
importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
|
53
|
+
importer.send :normalize_domains!
|
54
|
+
assert_equal 'example.com', importer.domains.domains.first
|
55
|
+
end
|
56
|
+
|
57
|
+
should 'remove invalid domains from the domain list' do
|
58
|
+
importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
|
59
|
+
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
60
|
+
|
61
|
+
assert_equal 2, importer.domains.domains.count
|
62
|
+
importer.send :ensure_validity!
|
63
|
+
assert_equal 1, importer.domains.domains.count
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'writing the domain list' do
|
67
|
+
should 'add domains to the current domain list' do
|
68
|
+
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
69
|
+
domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
|
70
|
+
importer = Gman::Importer.new domains
|
71
|
+
importer.send :add_to_current
|
72
|
+
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
73
|
+
assert_equal expected, File.open(Gman.list_path).read
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
should 'import' do
|
78
|
+
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
79
|
+
domains = {
|
80
|
+
'test' => ['www.example.com', 'goo.github.io'],
|
81
|
+
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
82
|
+
}
|
83
|
+
|
84
|
+
importer = Gman::Importer.new domains
|
85
|
+
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
86
|
+
importer.import(skip_resolve: true)
|
87
|
+
|
88
|
+
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
89
|
+
assert_equal expected, File.open(Gman.list_path).read
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context 'domain validation' do
|
96
|
+
should 'allow valid domains' do
|
97
|
+
assert @importer.send :ensure_valid, 'whitehouse.gov'
|
98
|
+
end
|
99
|
+
|
100
|
+
should 'reject empty domains' do
|
101
|
+
refute @importer.send :ensure_valid, ''
|
102
|
+
end
|
103
|
+
|
104
|
+
should 'reject blacklisted domains' do
|
105
|
+
refute @importer.send :ensure_valid, 'egovlink.com'
|
106
|
+
end
|
107
|
+
|
108
|
+
should 'reject invalid domains' do
|
109
|
+
refute @importer.send :ensure_valid, 'foo.invalid'
|
110
|
+
end
|
111
|
+
|
112
|
+
should 'reject academic domains' do
|
113
|
+
refute @importer.send :ensure_valid, 'harvard.edu'
|
114
|
+
end
|
115
|
+
|
116
|
+
should "reject regex'd domains" do
|
117
|
+
refute @importer.send :ensure_valid, 'foo.github.io'
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
context 'duplicate domains' do
|
122
|
+
should 'know a unique domain is not a dupe' do
|
123
|
+
refute @importer.send :dupe?, 'gman.com'
|
124
|
+
end
|
125
|
+
|
126
|
+
should "know when a domain's a dupe" do
|
127
|
+
assert @importer.send :dupe?, 'gov'
|
128
|
+
end
|
129
|
+
|
130
|
+
should "know when a domain's a subdomain of an existing domain" do
|
131
|
+
assert @importer.send :dupe?, 'whitehouse.gov'
|
132
|
+
end
|
133
|
+
|
134
|
+
should 'allow unique domains' do
|
135
|
+
assert @importer.send :ensure_not_dupe, 'gman.com'
|
136
|
+
end
|
137
|
+
|
138
|
+
should 'reject duplicate domains' do
|
139
|
+
refute @importer.send :ensure_not_dupe, 'gov'
|
140
|
+
end
|
141
|
+
|
142
|
+
should 'reject subdomains' do
|
143
|
+
refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
context 'domain resolution' do
|
148
|
+
should 'know if a domain resolves' do
|
149
|
+
assert @importer.domain_resolves?('github.com')
|
150
|
+
assert @importer.send :ensure_resolves, 'github.com'
|
151
|
+
end
|
152
|
+
|
153
|
+
should "know if a domain doesn't resolve" do
|
154
|
+
refute @importer.domain_resolves?('foo.invalid')
|
155
|
+
refute @importer.send :ensure_resolves, 'foo.invalid'
|
156
|
+
end
|
157
|
+
|
158
|
+
should 'know if a domain has an IP' do
|
159
|
+
end
|
160
|
+
|
161
|
+
should 'know if a domain returns a given record' do
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
context 'regex checks' do
|
166
|
+
should 'pass valid domains' do
|
167
|
+
assert @importer.send :ensure_regex, 'example.com'
|
168
|
+
end
|
169
|
+
|
170
|
+
should 'reject domains that begin with home.' do
|
171
|
+
refute @importer.send :ensure_regex, 'home.example.com'
|
172
|
+
end
|
173
|
+
|
174
|
+
should 'reject domains that begin with user.' do
|
175
|
+
refute @importer.send :ensure_regex, 'user.example.com'
|
176
|
+
end
|
177
|
+
|
178
|
+
should 'reject domains that begin with site.' do
|
179
|
+
refute @importer.send :ensure_regex, 'user.example.com'
|
180
|
+
end
|
181
|
+
|
182
|
+
should 'reject weebly domains' do
|
183
|
+
refute @importer.send :ensure_regex, 'foo.weebly.com'
|
184
|
+
end
|
185
|
+
|
186
|
+
should 'reject wordpress domains' do
|
187
|
+
refute @importer.send :ensure_regex, 'foo.wordpress.com'
|
188
|
+
end
|
189
|
+
|
190
|
+
should 'reject govoffice domains' do
|
191
|
+
refute @importer.send :ensure_regex, 'foo.govoffice.com'
|
192
|
+
refute @importer.send :ensure_regex, 'foo.govoffice1.com'
|
193
|
+
end
|
194
|
+
|
195
|
+
should 'reject homestead domains' do
|
196
|
+
refute @importer.send :ensure_regex, 'foo.homestead.com'
|
197
|
+
end
|
198
|
+
|
199
|
+
should 'reject wix domains' do
|
200
|
+
refute @importer.send :ensure_regex, 'foo.wix.com'
|
201
|
+
end
|
202
|
+
|
203
|
+
should 'reject blogspot domains' do
|
204
|
+
refute @importer.send :ensure_regex, 'foo.blogspot.com'
|
205
|
+
end
|
206
|
+
|
207
|
+
should 'reject tripod domains' do
|
208
|
+
refute @importer.send :ensure_regex, 'foo.tripod.com'
|
209
|
+
end
|
210
|
+
|
211
|
+
should 'reject squarespace domains' do
|
212
|
+
refute @importer.send :ensure_regex, 'foo.squarespace.com'
|
213
|
+
end
|
214
|
+
|
215
|
+
should 'reject github.io domains' do
|
216
|
+
refute @importer.send :ensure_regex, 'foo.github.io'
|
217
|
+
end
|
218
|
+
|
219
|
+
should 'reject locality domains' do
|
220
|
+
refute @importer.send :ensure_regex, 'ci.champaign.il.us'
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
context 'normalizing domains' do
|
225
|
+
should 'normalize URLs to domains' do
|
226
|
+
expected = 'example.com'
|
227
|
+
assert_equal expected, @importer.normalize_domain('http://example.com')
|
228
|
+
end
|
229
|
+
|
230
|
+
should 'strip WWW' do
|
231
|
+
assert_equal 'example.com', @importer.normalize_domain('www.example.com')
|
232
|
+
end
|
233
|
+
|
234
|
+
should 'remove trailing slashes' do
|
235
|
+
assert_equal 'example.com', @importer.normalize_domain('example.com/')
|
236
|
+
end
|
237
|
+
|
238
|
+
should 'remove paths' do
|
239
|
+
assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
|
240
|
+
end
|
241
|
+
|
242
|
+
should 'remove paths with trailing slashes' do
|
243
|
+
assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
|
244
|
+
end
|
245
|
+
|
246
|
+
should 'downcase' do
|
247
|
+
assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|