gman 5.0.9 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +21 -0
- data/.ruby-version +1 -1
- data/Gemfile +1 -0
- data/README.md +16 -22
- data/Rakefile +3 -3
- data/bin/gman +10 -11
- data/bin/gman_filter +7 -7
- data/config/domains.txt +19 -19
- data/config/vendor/dotgovs.csv +398 -355
- data/gman.gemspec +34 -27
- data/lib/gman.rb +29 -23
- data/lib/gman/country_codes.rb +14 -15
- data/lib/gman/domain_list.rb +34 -25
- data/lib/gman/identifier.rb +39 -43
- data/lib/gman/importer.rb +111 -61
- data/lib/gman/locality.rb +22 -10
- data/lib/gman/version.rb +1 -1
- data/script/add +2 -2
- data/script/alphabetize +2 -2
- data/script/cibuild +2 -0
- data/script/dedupe +2 -2
- data/script/profile +5 -2
- data/script/prune +7 -7
- data/script/reconcile-us +26 -21
- data/script/vendor-federal-de +5 -5
- data/script/vendor-municipal-de +5 -5
- data/script/vendor-nl +12 -4
- data/script/vendor-public-suffix +8 -8
- data/script/vendor-se +8 -6
- data/script/vendor-us +7 -7
- data/test/fixtures/domains.txt +2 -0
- data/test/{obama.txt → fixtures/obama.txt} +0 -0
- data/test/helper.rb +19 -5
- data/test/test_gman.rb +43 -38
- data/test/test_gman_bin.rb +37 -43
- data/test/test_gman_country_codes.rb +10 -6
- data/test/test_gman_domains.rb +15 -10
- data/test/test_gman_filter.rb +5 -7
- data/test/test_gman_identifier.rb +36 -35
- data/test/test_gman_importer.rb +250 -0
- data/test/test_gman_locality.rb +5 -5
- metadata +28 -10
- data/lib/gman/sanctions.rb +0 -29
- data/test/test_gman_sanctions.rb +0 -20
data/test/test_gman_bin.rb
CHANGED
@@ -1,81 +1,75 @@
|
|
1
|
-
require_relative
|
1
|
+
require_relative 'helper'
|
2
2
|
|
3
3
|
class TestGmanBin < Minitest::Test
|
4
|
-
|
5
4
|
def setup
|
6
|
-
@output, @status = test_bin(
|
5
|
+
@output, @status = test_bin('whitehouse.gov')
|
7
6
|
end
|
8
7
|
|
9
|
-
should
|
10
|
-
output,
|
11
|
-
assert_match
|
8
|
+
should 'parse the domain' do
|
9
|
+
output, = test_bin('bar.gov')
|
10
|
+
assert_match(/Domain : bar.gov/, output)
|
12
11
|
|
13
|
-
output,
|
14
|
-
assert_match
|
12
|
+
output, = test_bin('foo@bar.gov')
|
13
|
+
assert_match(/Domain : bar.gov/, output)
|
15
14
|
|
16
|
-
output,
|
17
|
-
assert_match
|
15
|
+
output, = test_bin('http://bar.gov/foo')
|
16
|
+
assert_match(/Domain : bar.gov/, output)
|
18
17
|
end
|
19
18
|
|
20
|
-
should
|
21
|
-
output, status = test_bin(
|
19
|
+
should 'err on invalid domains' do
|
20
|
+
output, status = test_bin('foo.invalid')
|
22
21
|
assert_equal 1, status.exitstatus
|
23
|
-
assert_match
|
22
|
+
assert_match(/Invalid domain/, output)
|
24
23
|
end
|
25
24
|
|
26
|
-
should
|
27
|
-
output, status = test_bin(
|
25
|
+
should 'err on non-government domains' do
|
26
|
+
output, status = test_bin('github.com')
|
28
27
|
assert_equal 1, status.exitstatus
|
29
|
-
assert_match
|
28
|
+
assert_match(/Not a government domain/, output)
|
30
29
|
end
|
31
30
|
|
32
|
-
should
|
33
|
-
assert_match
|
31
|
+
should 'know the type' do
|
32
|
+
assert_match(/federal/, @output)
|
34
33
|
assert_equal 0, @status.exitstatus
|
35
34
|
end
|
36
35
|
|
37
|
-
should
|
38
|
-
assert_match
|
36
|
+
should 'know the agency' do
|
37
|
+
assert_match(/Executive Office of the President/, @output)
|
39
38
|
assert_equal 0, @status.exitstatus
|
40
39
|
end
|
41
40
|
|
42
|
-
should
|
43
|
-
assert_match
|
41
|
+
should 'know the country' do
|
42
|
+
assert_match(/United States/, @output)
|
44
43
|
assert_equal 0, @status.exitstatus
|
45
44
|
end
|
46
45
|
|
47
|
-
should
|
48
|
-
assert_match
|
46
|
+
should 'know the city' do
|
47
|
+
assert_match(/Washington/, @output)
|
49
48
|
assert_equal 0, @status.exitstatus
|
50
49
|
end
|
51
50
|
|
52
|
-
should
|
53
|
-
assert_match
|
51
|
+
should 'know the state' do
|
52
|
+
assert_match(/DC/, @output)
|
54
53
|
assert_equal 0, @status.exitstatus
|
55
54
|
end
|
56
55
|
|
57
|
-
should
|
58
|
-
output,
|
59
|
-
refute_match
|
56
|
+
should 'allow you to disable colorization' do
|
57
|
+
output, = test_bin('whitehouse.gov', '--no-color')
|
58
|
+
refute_match(/\e\[32m/, output)
|
60
59
|
end
|
61
60
|
|
62
|
-
should
|
63
|
-
assert_match
|
61
|
+
should 'color by default' do
|
62
|
+
assert_match(/\e\[32m/, @output)
|
64
63
|
end
|
65
64
|
|
66
|
-
should
|
67
|
-
output,
|
68
|
-
assert_match
|
69
|
-
|
70
|
-
output, status = test_bin("")
|
71
|
-
assert_match /Usage/i, output
|
65
|
+
should 'show help text' do
|
66
|
+
output, = test_bin
|
67
|
+
assert_match(/Usage/i, output)
|
72
68
|
|
73
|
-
output,
|
74
|
-
assert_match
|
75
|
-
end
|
69
|
+
output, = test_bin('')
|
70
|
+
assert_match(/Usage/i, output)
|
76
71
|
|
77
|
-
|
78
|
-
|
79
|
-
assert_match /SANCTIONED/, output
|
72
|
+
output, = test_bin('--no-color')
|
73
|
+
assert_match(/Usage/i, output)
|
80
74
|
end
|
81
75
|
end
|
@@ -2,13 +2,17 @@ require File.join(File.dirname(__FILE__), 'helper')
|
|
2
2
|
|
3
3
|
class TestGmanCountryCodes < Minitest::Test
|
4
4
|
should "determine a domain's country" do
|
5
|
-
|
6
|
-
assert_equal
|
7
|
-
|
8
|
-
|
5
|
+
name = Gman.new('whitehouse.gov').country.name
|
6
|
+
assert_equal 'United States of America', name
|
7
|
+
|
8
|
+
name = Gman.new('foo.gov.uk').country.name
|
9
|
+
assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
|
10
|
+
|
11
|
+
assert_equal 'United States of America', Gman.new('army.mil').country.name
|
12
|
+
assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
|
9
13
|
end
|
10
14
|
|
11
|
-
should
|
12
|
-
assert_equal nil, Gman.new(
|
15
|
+
should 'not err out on an unknown country code' do
|
16
|
+
assert_equal nil, Gman.new('foo.eu').country
|
13
17
|
end
|
14
18
|
end
|
data/test/test_gman_domains.rb
CHANGED
@@ -1,28 +1,33 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'helper')
|
2
2
|
|
3
|
-
class
|
4
|
-
|
5
|
-
WHITELIST = [ "non-us gov", "non-us mil", "US Federal"]
|
3
|
+
class TestGmanDomains < Minitest::Test
|
4
|
+
WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
|
6
5
|
|
7
6
|
def resolve_domains?
|
8
|
-
ENV[
|
7
|
+
ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
|
9
8
|
end
|
10
9
|
|
11
|
-
should
|
10
|
+
should 'only contains valid domains' do
|
12
11
|
importer = Gman::Importer.new({})
|
13
12
|
if resolve_domains?
|
14
|
-
importer.logger.info
|
13
|
+
importer.logger.info <<-MSG
|
14
|
+
Validating that all domains resolve. This may take a while...
|
15
|
+
MSG
|
15
16
|
else
|
16
|
-
importer.logger.info
|
17
|
+
importer.logger.info 'Skipping domain resolution.' \
|
18
|
+
'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
|
19
|
+
'to validate that domains resolve.'
|
17
20
|
end
|
18
21
|
|
19
22
|
invalid = []
|
20
|
-
|
23
|
+
list = Gman::DomainList.current.list
|
24
|
+
Parallel.each(list, in_threads: 2) do |group, domains|
|
21
25
|
next if WHITELIST.include?(group)
|
22
26
|
invalid.push domains.reject { |domain|
|
23
|
-
|
27
|
+
options = { skip_dupe: true, skip_resolve: !resolve_domains? }
|
28
|
+
importer.valid_domain?(domain, options)
|
24
29
|
}
|
25
30
|
end
|
26
|
-
assert_equal [], invalid.flatten.reject
|
31
|
+
assert_equal [], invalid.flatten.reject(&:empty?)
|
27
32
|
end
|
28
33
|
end
|
data/test/test_gman_filter.rb
CHANGED
@@ -2,18 +2,16 @@ HERE = File.dirname(__FILE__)
|
|
2
2
|
require File.join(HERE, 'helper')
|
3
3
|
|
4
4
|
class TestGmanFilter < Minitest::Test
|
5
|
+
txt_path = fixture_path 'obama.txt'
|
6
|
+
exec_path = bin_path 'gman_filter'
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
should "remove non-gov/mil addresses" do
|
10
|
-
filtered = `#{exec_path} < #{txt_path}`
|
8
|
+
should 'remove non-gov/mil addresses' do
|
9
|
+
output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
|
11
10
|
expected = %w(
|
12
11
|
mr.senator@obama.senate.gov
|
13
12
|
president@whitehouse.gov
|
14
13
|
commander.in.chief@us.army.mil
|
15
14
|
).join("\n") + "\n"
|
16
|
-
assert_equal
|
15
|
+
assert_equal output, expected
|
17
16
|
end
|
18
|
-
|
19
17
|
end
|
@@ -1,16 +1,16 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), 'helper')
|
2
2
|
|
3
3
|
class TestGmanIdentifier < Minitest::Test
|
4
|
-
should
|
4
|
+
should 'Parse the dotgov list' do
|
5
5
|
assert Gman.dotgov_list
|
6
6
|
assert_equal CSV::Table, Gman.dotgov_list.class
|
7
7
|
assert_equal CSV::Row, Gman.dotgov_list.first.class
|
8
|
-
assert Gman.dotgov_list.first[
|
8
|
+
assert Gman.dotgov_list.first['Domain Name']
|
9
9
|
end
|
10
10
|
|
11
|
-
context
|
12
|
-
should
|
13
|
-
domain = Gman.new(
|
11
|
+
context 'locality domains' do
|
12
|
+
should 'detect state domains' do
|
13
|
+
domain = Gman.new('state.ak.us')
|
14
14
|
assert domain.state?
|
15
15
|
|
16
16
|
refute domain.dotgov?
|
@@ -19,11 +19,11 @@ class TestGmanIdentifier < Minitest::Test
|
|
19
19
|
refute domain.county?
|
20
20
|
|
21
21
|
assert_equal :state, domain.type
|
22
|
-
assert_equal
|
22
|
+
assert_equal 'AK', domain.state
|
23
23
|
end
|
24
24
|
|
25
|
-
should
|
26
|
-
domain = Gman.new(
|
25
|
+
should 'detect city domains' do
|
26
|
+
domain = Gman.new('ci.champaign.il.us')
|
27
27
|
assert domain.city?
|
28
28
|
|
29
29
|
refute domain.dotgov?
|
@@ -32,13 +32,13 @@ class TestGmanIdentifier < Minitest::Test
|
|
32
32
|
refute domain.county?
|
33
33
|
|
34
34
|
assert_equal :city, domain.type
|
35
|
-
assert_equal
|
35
|
+
assert_equal 'IL', domain.state
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
|
-
context
|
40
|
-
should
|
41
|
-
domain = Gman.new
|
39
|
+
context 'dotgovs' do
|
40
|
+
should 'detect federal dotgovs' do
|
41
|
+
domain = Gman.new 'whitehouse.gov'
|
42
42
|
assert domain.federal?
|
43
43
|
assert domain.dotgov?
|
44
44
|
|
@@ -47,13 +47,13 @@ class TestGmanIdentifier < Minitest::Test
|
|
47
47
|
refute domain.county?
|
48
48
|
|
49
49
|
assert_equal :federal, domain.type
|
50
|
-
assert_equal
|
51
|
-
assert_equal
|
52
|
-
assert_equal
|
50
|
+
assert_equal 'DC', domain.state
|
51
|
+
assert_equal 'Washington', domain.city
|
52
|
+
assert_equal 'Executive Office of the President', domain.agency
|
53
53
|
end
|
54
54
|
|
55
|
-
should
|
56
|
-
domain = Gman.new
|
55
|
+
should 'detect state dotgovs' do
|
56
|
+
domain = Gman.new 'illinois.gov'
|
57
57
|
assert domain.state?
|
58
58
|
assert domain.dotgov?
|
59
59
|
|
@@ -62,12 +62,12 @@ class TestGmanIdentifier < Minitest::Test
|
|
62
62
|
refute domain.county?
|
63
63
|
|
64
64
|
assert_equal :state, domain.type
|
65
|
-
assert_equal
|
66
|
-
assert_equal
|
65
|
+
assert_equal 'IL', domain.state
|
66
|
+
assert_equal 'Springfield', domain.city
|
67
67
|
end
|
68
68
|
|
69
|
-
should
|
70
|
-
domain = Gman.new
|
69
|
+
should 'detect county dotgovs' do
|
70
|
+
domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
|
71
71
|
assert domain.county?
|
72
72
|
assert domain.dotgov?
|
73
73
|
|
@@ -76,30 +76,31 @@ class TestGmanIdentifier < Minitest::Test
|
|
76
76
|
refute domain.state?
|
77
77
|
|
78
78
|
assert_equal :county, domain.type
|
79
|
-
assert_equal
|
80
|
-
assert_equal
|
79
|
+
assert_equal 'PA', domain.state
|
80
|
+
assert_equal 'Pittsburgh', domain.city
|
81
81
|
end
|
82
82
|
|
83
|
-
should
|
84
|
-
|
83
|
+
should 'detect the list category' do
|
84
|
+
category = Gman.new('whitehouse.gov').send('list_category')
|
85
|
+
assert_equal 'US Federal', category
|
85
86
|
end
|
86
87
|
end
|
87
88
|
|
88
|
-
context
|
89
|
+
context 'non-dotgov domains' do
|
89
90
|
should "determine a domain's group" do
|
90
|
-
assert_equal
|
91
|
-
assert_equal :unknown, Gman.new(
|
91
|
+
assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
|
92
|
+
assert_equal :unknown, Gman.new('cityofperu.org').type
|
92
93
|
|
93
|
-
assert_equal
|
94
|
-
assert_equal :"Canada municipal", Gman.new(
|
94
|
+
assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
|
95
|
+
assert_equal :"Canada municipal", Gman.new('acme.ca').type
|
95
96
|
|
96
|
-
assert_equal
|
97
|
-
assert_equal :"Canada federal", Gman.new(
|
97
|
+
assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
|
98
|
+
assert_equal :"Canada federal", Gman.new('canada.ca').type
|
98
99
|
end
|
99
100
|
|
100
|
-
should
|
101
|
-
assert_equal
|
102
|
-
refute Gman.new(
|
101
|
+
should 'detect the state' do
|
102
|
+
assert_equal 'OR', Gman.new('ashland.or.us').state
|
103
|
+
refute Gman.new('canada.ca').state
|
103
104
|
end
|
104
105
|
end
|
105
106
|
end
|
@@ -0,0 +1,250 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
+
|
3
|
+
class TestGManImporter < Minitest::Test
|
4
|
+
def setup
|
5
|
+
@importer = Gman::Importer.new 'test' => ['example.com']
|
6
|
+
@stdout = StringIO.new
|
7
|
+
@importer.instance_variable_set '@logger', Logger.new(@stdout)
|
8
|
+
|
9
|
+
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
10
|
+
@original_domain_list = File.open(Gman.list_path).read
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def teardown
|
15
|
+
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
16
|
+
File.write Gman.list_path, @original_domain_list
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
should 'init the domain list' do
|
21
|
+
assert_equal Gman::DomainList, @importer.domains.class
|
22
|
+
assert_equal 1, @importer.domains.domains.count
|
23
|
+
assert_equal 'example.com', @importer.domains.domains.first
|
24
|
+
end
|
25
|
+
|
26
|
+
should 'init the logger' do
|
27
|
+
assert_equal Logger, @importer.logger.class
|
28
|
+
end
|
29
|
+
|
30
|
+
should 'return the current domain list' do
|
31
|
+
assert_equal Gman::DomainList, @importer.current.class
|
32
|
+
end
|
33
|
+
|
34
|
+
should 'return the resolver' do
|
35
|
+
assert_equal Resolv::DNS, @importer.resolver.class
|
36
|
+
end
|
37
|
+
|
38
|
+
context 'domain rejection' do
|
39
|
+
should 'return false for a rejected domain' do
|
40
|
+
refute @importer.reject 'example.com', 'reasons'
|
41
|
+
end
|
42
|
+
|
43
|
+
should 'return the reason when asked' do
|
44
|
+
with_env 'RECONCILING', 'true' do
|
45
|
+
assert_equal 'reasons', @importer.reject('example.com', 'reasons')
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context 'manipulating the domain list' do
|
51
|
+
should 'normalize domains within the domain list' do
|
52
|
+
importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
|
53
|
+
importer.send :normalize_domains!
|
54
|
+
assert_equal 'example.com', importer.domains.domains.first
|
55
|
+
end
|
56
|
+
|
57
|
+
should 'remove invalid domains from the domain list' do
|
58
|
+
importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
|
59
|
+
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
60
|
+
|
61
|
+
assert_equal 2, importer.domains.domains.count
|
62
|
+
importer.send :ensure_validity!
|
63
|
+
assert_equal 1, importer.domains.domains.count
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'writing the domain list' do
|
67
|
+
should 'add domains to the current domain list' do
|
68
|
+
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
69
|
+
domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
|
70
|
+
importer = Gman::Importer.new domains
|
71
|
+
importer.send :add_to_current
|
72
|
+
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
73
|
+
assert_equal expected, File.open(Gman.list_path).read
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
should 'import' do
|
78
|
+
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
79
|
+
domains = {
|
80
|
+
'test' => ['www.example.com', 'goo.github.io'],
|
81
|
+
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
82
|
+
}
|
83
|
+
|
84
|
+
importer = Gman::Importer.new domains
|
85
|
+
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
86
|
+
importer.import(skip_resolve: true)
|
87
|
+
|
88
|
+
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
89
|
+
assert_equal expected, File.open(Gman.list_path).read
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
context 'domain validation' do
|
96
|
+
should 'allow valid domains' do
|
97
|
+
assert @importer.send :ensure_valid, 'whitehouse.gov'
|
98
|
+
end
|
99
|
+
|
100
|
+
should 'reject empty domains' do
|
101
|
+
refute @importer.send :ensure_valid, ''
|
102
|
+
end
|
103
|
+
|
104
|
+
should 'reject blacklisted domains' do
|
105
|
+
refute @importer.send :ensure_valid, 'egovlink.com'
|
106
|
+
end
|
107
|
+
|
108
|
+
should 'reject invalid domains' do
|
109
|
+
refute @importer.send :ensure_valid, 'foo.invalid'
|
110
|
+
end
|
111
|
+
|
112
|
+
should 'reject academic domains' do
|
113
|
+
refute @importer.send :ensure_valid, 'harvard.edu'
|
114
|
+
end
|
115
|
+
|
116
|
+
should "reject regex'd domains" do
|
117
|
+
refute @importer.send :ensure_valid, 'foo.github.io'
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
context 'duplicate domains' do
|
122
|
+
should 'know a unique domain is not a dupe' do
|
123
|
+
refute @importer.send :dupe?, 'gman.com'
|
124
|
+
end
|
125
|
+
|
126
|
+
should "know when a domain's a dupe" do
|
127
|
+
assert @importer.send :dupe?, 'gov'
|
128
|
+
end
|
129
|
+
|
130
|
+
should "know when a domain's a subdomain of an existing domain" do
|
131
|
+
assert @importer.send :dupe?, 'whitehouse.gov'
|
132
|
+
end
|
133
|
+
|
134
|
+
should 'allow unique domains' do
|
135
|
+
assert @importer.send :ensure_not_dupe, 'gman.com'
|
136
|
+
end
|
137
|
+
|
138
|
+
should 'reject duplicate domains' do
|
139
|
+
refute @importer.send :ensure_not_dupe, 'gov'
|
140
|
+
end
|
141
|
+
|
142
|
+
should 'reject subdomains' do
|
143
|
+
refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
context 'domain resolution' do
|
148
|
+
should 'know if a domain resolves' do
|
149
|
+
assert @importer.domain_resolves?('github.com')
|
150
|
+
assert @importer.send :ensure_resolves, 'github.com'
|
151
|
+
end
|
152
|
+
|
153
|
+
should "know if a domain doesn't resolve" do
|
154
|
+
refute @importer.domain_resolves?('foo.invalid')
|
155
|
+
refute @importer.send :ensure_resolves, 'foo.invalid'
|
156
|
+
end
|
157
|
+
|
158
|
+
should 'know if a domain has an IP' do
|
159
|
+
end
|
160
|
+
|
161
|
+
should 'know if a domain returns a given record' do
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
context 'regex checks' do
|
166
|
+
should 'pass valid domains' do
|
167
|
+
assert @importer.send :ensure_regex, 'example.com'
|
168
|
+
end
|
169
|
+
|
170
|
+
should 'reject domains that begin with home.' do
|
171
|
+
refute @importer.send :ensure_regex, 'home.example.com'
|
172
|
+
end
|
173
|
+
|
174
|
+
should 'reject domains that begin with user.' do
|
175
|
+
refute @importer.send :ensure_regex, 'user.example.com'
|
176
|
+
end
|
177
|
+
|
178
|
+
should 'reject domains that begin with site.' do
|
179
|
+
refute @importer.send :ensure_regex, 'user.example.com'
|
180
|
+
end
|
181
|
+
|
182
|
+
should 'reject weebly domains' do
|
183
|
+
refute @importer.send :ensure_regex, 'foo.weebly.com'
|
184
|
+
end
|
185
|
+
|
186
|
+
should 'reject wordpress domains' do
|
187
|
+
refute @importer.send :ensure_regex, 'foo.wordpress.com'
|
188
|
+
end
|
189
|
+
|
190
|
+
should 'reject govoffice domains' do
|
191
|
+
refute @importer.send :ensure_regex, 'foo.govoffice.com'
|
192
|
+
refute @importer.send :ensure_regex, 'foo.govoffice1.com'
|
193
|
+
end
|
194
|
+
|
195
|
+
should 'reject homestead domains' do
|
196
|
+
refute @importer.send :ensure_regex, 'foo.homestead.com'
|
197
|
+
end
|
198
|
+
|
199
|
+
should 'reject wix domains' do
|
200
|
+
refute @importer.send :ensure_regex, 'foo.wix.com'
|
201
|
+
end
|
202
|
+
|
203
|
+
should 'reject blogspot domains' do
|
204
|
+
refute @importer.send :ensure_regex, 'foo.blogspot.com'
|
205
|
+
end
|
206
|
+
|
207
|
+
should 'reject tripod domains' do
|
208
|
+
refute @importer.send :ensure_regex, 'foo.tripod.com'
|
209
|
+
end
|
210
|
+
|
211
|
+
should 'reject squarespace domains' do
|
212
|
+
refute @importer.send :ensure_regex, 'foo.squarespace.com'
|
213
|
+
end
|
214
|
+
|
215
|
+
should 'reject github.io domains' do
|
216
|
+
refute @importer.send :ensure_regex, 'foo.github.io'
|
217
|
+
end
|
218
|
+
|
219
|
+
should 'reject locality domains' do
|
220
|
+
refute @importer.send :ensure_regex, 'ci.champaign.il.us'
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
context 'normalizing domains' do
|
225
|
+
should 'normalize URLs to domains' do
|
226
|
+
expected = 'example.com'
|
227
|
+
assert_equal expected, @importer.normalize_domain('http://example.com')
|
228
|
+
end
|
229
|
+
|
230
|
+
should 'strip WWW' do
|
231
|
+
assert_equal 'example.com', @importer.normalize_domain('www.example.com')
|
232
|
+
end
|
233
|
+
|
234
|
+
should 'remove trailing slashes' do
|
235
|
+
assert_equal 'example.com', @importer.normalize_domain('example.com/')
|
236
|
+
end
|
237
|
+
|
238
|
+
should 'remove paths' do
|
239
|
+
assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
|
240
|
+
end
|
241
|
+
|
242
|
+
should 'remove paths with trailing slashes' do
|
243
|
+
assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
|
244
|
+
end
|
245
|
+
|
246
|
+
should 'downcase' do
|
247
|
+
assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|