gman 6.0.1 → 7.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
- data/.github/config.yml +23 -0
- data/.github/funding.yml +1 -0
- data/.github/no-response.yml +15 -0
- data/.github/release-drafter.yml +4 -0
- data/.github/settings.yml +33 -0
- data/.github/stale.yml +29 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +17 -5
- data/.rubocop_todo.yml +84 -0
- data/.ruby-version +1 -1
- data/Gemfile +2 -0
- data/bin/gman +6 -4
- data/bin/gman_filter +5 -7
- data/config/domains.txt +8446 -173
- data/config/vendor/academic.txt +8038 -0
- data/config/vendor/dotgovs.csv +5786 -5560
- data/docs/CODE_OF_CONDUCT.md +46 -0
- data/docs/CONTRIBUTING.md +92 -0
- data/{README.md → docs/README.md} +3 -3
- data/docs/SECURITY.md +3 -0
- data/docs/_config.yml +2 -0
- data/gman.gemspec +18 -17
- data/lib/gman.rb +25 -21
- data/lib/gman/country_codes.rb +17 -17
- data/lib/gman/domain_list.rb +123 -41
- data/lib/gman/identifier.rb +59 -21
- data/lib/gman/importer.rb +39 -40
- data/lib/gman/locality.rb +23 -21
- data/lib/gman/version.rb +3 -1
- data/script/add +2 -0
- data/script/alphabetize +2 -0
- data/script/cibuild +1 -1
- data/script/dedupe +2 -1
- data/script/profile +2 -1
- data/script/prune +5 -3
- data/script/reconcile-us +6 -3
- data/script/vendor +1 -1
- data/script/vendor-federal-de +3 -3
- data/script/vendor-municipal-de +3 -3
- data/script/vendor-nl +4 -1
- data/script/vendor-public-suffix +7 -6
- data/script/vendor-se +3 -3
- data/script/vendor-swot +43 -0
- data/script/vendor-us +8 -5
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +101 -0
- data/spec/gman/country_code_spec.rb +39 -0
- data/spec/gman/domain_list_spec.rb +110 -0
- data/spec/gman/domains_spec.rb +25 -0
- data/spec/gman/identifier_spec.rb +218 -0
- data/spec/gman/importer_spec.rb +236 -0
- data/spec/gman/locality_spec.rb +24 -0
- data/spec/gman_spec.rb +74 -0
- data/spec/spec_helper.rb +31 -0
- metadata +86 -73
- data/CONTRIBUTING.md +0 -22
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -40
- data/test/test_gman.rb +0 -62
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domains.rb +0 -33
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -250
- data/test/test_gman_locality.rb +0 -10
data/CONTRIBUTING.md
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
# Contributing to Gman
|
2
|
-
|
3
|
-
## How to contribute
|
4
|
-
|
5
|
-
1. Fork the project
|
6
|
-
2. Create a descriptive branch
|
7
|
-
3. Make your change
|
8
|
-
4. Submit a pull request
|
9
|
-
|
10
|
-
## Code
|
11
|
-
|
12
|
-
Open an issue, or submit a pull request
|
13
|
-
|
14
|
-
## Domains
|
15
|
-
|
16
|
-
Domains live in `./config/domains.txt` as a list of TLDs and SLD+TLDs.
|
17
|
-
|
18
|
-
Right now, the only valid government top level domains (TLDs), represent the US government and are `.gov`, and `.mil`.
|
19
|
-
|
20
|
-
Secondary domains (e.g., `gov.uk`, or `mil.au`) detect non-US government entities.
|
21
|
-
|
22
|
-
To add or remove a domain from the list of known government domains, simply edit the `domains.txt` file.
|
data/Rakefile
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'bundler'
|
3
|
-
begin
|
4
|
-
Bundler.setup(:default, :development)
|
5
|
-
rescue Bundler::BundlerError => e
|
6
|
-
$stderr.puts e.message
|
7
|
-
$stderr.puts 'Run `bundle install` to install missing gems'
|
8
|
-
exit e.status_code
|
9
|
-
end
|
10
|
-
require 'rake'
|
11
|
-
|
12
|
-
require 'rake/testtask'
|
13
|
-
Rake::TestTask.new(:test) do |test|
|
14
|
-
test.libs << 'lib' << 'test'
|
15
|
-
test.pattern = 'test/**/test_gman*.rb'
|
16
|
-
test.verbose = true
|
17
|
-
end
|
18
|
-
|
19
|
-
desc 'Open console with gman loaded'
|
20
|
-
task :console do
|
21
|
-
exec 'irb -r ./lib/gman.rb'
|
22
|
-
end
|
data/test/fixtures/domains.txt
DELETED
data/test/helper.rb
DELETED
@@ -1,40 +0,0 @@
|
|
1
|
-
require 'rubygems'
|
2
|
-
require 'bundler'
|
3
|
-
require 'minitest/autorun'
|
4
|
-
require 'parallel'
|
5
|
-
require 'open3'
|
6
|
-
|
7
|
-
begin
|
8
|
-
Bundler.setup(:default, :development)
|
9
|
-
rescue Bundler::BundlerError => e
|
10
|
-
$stderr.puts e.message
|
11
|
-
$stderr.puts 'Run `bundle install` to install missing gems'
|
12
|
-
exit e.status_code
|
13
|
-
end
|
14
|
-
|
15
|
-
require 'shoulda'
|
16
|
-
|
17
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
18
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
19
|
-
require_relative '../lib/gman'
|
20
|
-
require_relative '../lib/gman/domain_list'
|
21
|
-
require_relative '../lib/gman/importer'
|
22
|
-
|
23
|
-
def bin_path(cmd = 'gman')
|
24
|
-
File.expand_path "../bin/#{cmd}", File.dirname(__FILE__)
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_bin(*args)
|
28
|
-
Open3.capture2e('bundle', 'exec', bin_path, *args)
|
29
|
-
end
|
30
|
-
|
31
|
-
def fixture_path(fixture)
|
32
|
-
File.expand_path "./fixtures/#{fixture}", File.dirname(__FILE__)
|
33
|
-
end
|
34
|
-
|
35
|
-
def with_env(key, value)
|
36
|
-
old_env = ENV[key]
|
37
|
-
ENV[key] = value
|
38
|
-
yield
|
39
|
-
ENV[key] = old_env
|
40
|
-
end
|
data/test/test_gman.rb
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
VALID = ['foo.gov',
|
4
|
-
'http://foo.mil',
|
5
|
-
'foo@bar.gc.ca',
|
6
|
-
'foo.gov.au',
|
7
|
-
'https://www.foo.gouv.fr',
|
8
|
-
'foo@ci.champaign.il.us',
|
9
|
-
'foo.bar.baz.gov.au',
|
10
|
-
'foo@bar.gov.uk',
|
11
|
-
'foo.gov',
|
12
|
-
'foo.fed.us',
|
13
|
-
'foo.state.il.us',
|
14
|
-
'state.il.us',
|
15
|
-
'foo@af.mil',
|
16
|
-
'foo.gov.in'
|
17
|
-
].freeze
|
18
|
-
|
19
|
-
INVALID = ['foo.bar.com',
|
20
|
-
'bar@foo.biz',
|
21
|
-
'http://www.foo.biz',
|
22
|
-
'foo.uk',
|
23
|
-
'gov',
|
24
|
-
'foo@k12.champaign.il.us',
|
25
|
-
'foo@kii.gov.by',
|
26
|
-
'foo',
|
27
|
-
'',
|
28
|
-
nil,
|
29
|
-
' ',
|
30
|
-
'foo.city.il.us',
|
31
|
-
'foo.ci.il.us',
|
32
|
-
'foo.zx.us',
|
33
|
-
'foo@mail.gov.ua'
|
34
|
-
].freeze
|
35
|
-
|
36
|
-
class TestGman < Minitest::Test
|
37
|
-
VALID.each do |domain|
|
38
|
-
should "recognize #{domain} as a government domain" do
|
39
|
-
assert Gman.valid?(domain)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
INVALID.each do |domain|
|
44
|
-
should "recognize #{domain} as a non-government domain" do
|
45
|
-
refute Gman.valid?(domain)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
should 'not allow educational domains' do
|
50
|
-
assert_equal false, Gman.valid?('foo@gwu.edu')
|
51
|
-
end
|
52
|
-
|
53
|
-
should 'returns the path to domains.txt' do
|
54
|
-
assert_equal true, File.exist?(Gman.list_path)
|
55
|
-
end
|
56
|
-
|
57
|
-
should 'stub domains when asked' do
|
58
|
-
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
59
|
-
assert_equal fixture_path('domains.txt'), Gman.list_path
|
60
|
-
end
|
61
|
-
end
|
62
|
-
end
|
data/test/test_gman_bin.rb
DELETED
@@ -1,75 +0,0 @@
|
|
1
|
-
require_relative 'helper'
|
2
|
-
|
3
|
-
class TestGmanBin < Minitest::Test
|
4
|
-
def setup
|
5
|
-
@output, @status = test_bin('whitehouse.gov')
|
6
|
-
end
|
7
|
-
|
8
|
-
should 'parse the domain' do
|
9
|
-
output, = test_bin('bar.gov')
|
10
|
-
assert_match(/Domain : bar.gov/, output)
|
11
|
-
|
12
|
-
output, = test_bin('foo@bar.gov')
|
13
|
-
assert_match(/Domain : bar.gov/, output)
|
14
|
-
|
15
|
-
output, = test_bin('http://bar.gov/foo')
|
16
|
-
assert_match(/Domain : bar.gov/, output)
|
17
|
-
end
|
18
|
-
|
19
|
-
should 'err on invalid domains' do
|
20
|
-
output, status = test_bin('foo.invalid')
|
21
|
-
assert_equal 1, status.exitstatus
|
22
|
-
assert_match(/Invalid domain/, output)
|
23
|
-
end
|
24
|
-
|
25
|
-
should 'err on non-government domains' do
|
26
|
-
output, status = test_bin('github.com')
|
27
|
-
assert_equal 1, status.exitstatus
|
28
|
-
assert_match(/Not a government domain/, output)
|
29
|
-
end
|
30
|
-
|
31
|
-
should 'know the type' do
|
32
|
-
assert_match(/federal/, @output)
|
33
|
-
assert_equal 0, @status.exitstatus
|
34
|
-
end
|
35
|
-
|
36
|
-
should 'know the agency' do
|
37
|
-
assert_match(/Executive Office of the President/, @output)
|
38
|
-
assert_equal 0, @status.exitstatus
|
39
|
-
end
|
40
|
-
|
41
|
-
should 'know the country' do
|
42
|
-
assert_match(/United States/, @output)
|
43
|
-
assert_equal 0, @status.exitstatus
|
44
|
-
end
|
45
|
-
|
46
|
-
should 'know the city' do
|
47
|
-
assert_match(/Washington/, @output)
|
48
|
-
assert_equal 0, @status.exitstatus
|
49
|
-
end
|
50
|
-
|
51
|
-
should 'know the state' do
|
52
|
-
assert_match(/DC/, @output)
|
53
|
-
assert_equal 0, @status.exitstatus
|
54
|
-
end
|
55
|
-
|
56
|
-
should 'allow you to disable colorization' do
|
57
|
-
output, = test_bin('whitehouse.gov', '--no-color')
|
58
|
-
refute_match(/\e\[32m/, output)
|
59
|
-
end
|
60
|
-
|
61
|
-
should 'color by default' do
|
62
|
-
assert_match(/\e\[32m/, @output)
|
63
|
-
end
|
64
|
-
|
65
|
-
should 'show help text' do
|
66
|
-
output, = test_bin
|
67
|
-
assert_match(/Usage/i, output)
|
68
|
-
|
69
|
-
output, = test_bin('')
|
70
|
-
assert_match(/Usage/i, output)
|
71
|
-
|
72
|
-
output, = test_bin('--no-color')
|
73
|
-
assert_match(/Usage/i, output)
|
74
|
-
end
|
75
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanCountryCodes < Minitest::Test
|
4
|
-
should "determine a domain's country" do
|
5
|
-
name = Gman.new('whitehouse.gov').country.name
|
6
|
-
assert_equal 'United States of America', name
|
7
|
-
|
8
|
-
name = Gman.new('foo.gov.uk').country.name
|
9
|
-
assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
|
10
|
-
|
11
|
-
assert_equal 'United States of America', Gman.new('army.mil').country.name
|
12
|
-
assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
|
13
|
-
end
|
14
|
-
|
15
|
-
should 'not err out on an unknown country code' do
|
16
|
-
assert_equal nil, Gman.new('foo.eu').country
|
17
|
-
end
|
18
|
-
end
|
data/test/test_gman_domains.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanDomains < Minitest::Test
|
4
|
-
WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
|
5
|
-
|
6
|
-
def resolve_domains?
|
7
|
-
ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
|
8
|
-
end
|
9
|
-
|
10
|
-
should 'only contains valid domains' do
|
11
|
-
importer = Gman::Importer.new({})
|
12
|
-
if resolve_domains?
|
13
|
-
importer.logger.info <<-MSG
|
14
|
-
Validating that all domains resolve. This may take a while...
|
15
|
-
MSG
|
16
|
-
else
|
17
|
-
importer.logger.info 'Skipping domain resolution.' \
|
18
|
-
'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
|
19
|
-
'to validate that domains resolve.'
|
20
|
-
end
|
21
|
-
|
22
|
-
invalid = []
|
23
|
-
list = Gman::DomainList.current.list
|
24
|
-
Parallel.each(list, in_threads: 2) do |group, domains|
|
25
|
-
next if WHITELIST.include?(group)
|
26
|
-
invalid.push domains.reject { |domain|
|
27
|
-
options = { skip_dupe: true, skip_resolve: !resolve_domains? }
|
28
|
-
importer.valid_domain?(domain, options)
|
29
|
-
}
|
30
|
-
end
|
31
|
-
assert_equal [], invalid.flatten.reject(&:empty?)
|
32
|
-
end
|
33
|
-
end
|
data/test/test_gman_filter.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
HERE = File.dirname(__FILE__)
|
2
|
-
require File.join(HERE, 'helper')
|
3
|
-
|
4
|
-
class TestGmanFilter < Minitest::Test
|
5
|
-
txt_path = fixture_path 'obama.txt'
|
6
|
-
exec_path = bin_path 'gman_filter'
|
7
|
-
|
8
|
-
should 'remove non-gov/mil addresses' do
|
9
|
-
output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
|
10
|
-
expected = %w(
|
11
|
-
mr.senator@obama.senate.gov
|
12
|
-
president@whitehouse.gov
|
13
|
-
commander.in.chief@us.army.mil
|
14
|
-
).join("\n") + "\n"
|
15
|
-
assert_equal output, expected
|
16
|
-
end
|
17
|
-
end
|
@@ -1,106 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanIdentifier < Minitest::Test
|
4
|
-
should 'Parse the dotgov list' do
|
5
|
-
assert Gman.dotgov_list
|
6
|
-
assert_equal CSV::Table, Gman.dotgov_list.class
|
7
|
-
assert_equal CSV::Row, Gman.dotgov_list.first.class
|
8
|
-
assert Gman.dotgov_list.first['Domain Name']
|
9
|
-
end
|
10
|
-
|
11
|
-
context 'locality domains' do
|
12
|
-
should 'detect state domains' do
|
13
|
-
domain = Gman.new('state.ak.us')
|
14
|
-
assert domain.state?
|
15
|
-
|
16
|
-
refute domain.dotgov?
|
17
|
-
refute domain.city?
|
18
|
-
refute domain.federal?
|
19
|
-
refute domain.county?
|
20
|
-
|
21
|
-
assert_equal :state, domain.type
|
22
|
-
assert_equal 'AK', domain.state
|
23
|
-
end
|
24
|
-
|
25
|
-
should 'detect city domains' do
|
26
|
-
domain = Gman.new('ci.champaign.il.us')
|
27
|
-
assert domain.city?
|
28
|
-
|
29
|
-
refute domain.dotgov?
|
30
|
-
refute domain.state?
|
31
|
-
refute domain.federal?
|
32
|
-
refute domain.county?
|
33
|
-
|
34
|
-
assert_equal :city, domain.type
|
35
|
-
assert_equal 'IL', domain.state
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
context 'dotgovs' do
|
40
|
-
should 'detect federal dotgovs' do
|
41
|
-
domain = Gman.new 'whitehouse.gov'
|
42
|
-
assert domain.federal?
|
43
|
-
assert domain.dotgov?
|
44
|
-
|
45
|
-
refute domain.city?
|
46
|
-
refute domain.state?
|
47
|
-
refute domain.county?
|
48
|
-
|
49
|
-
assert_equal :federal, domain.type
|
50
|
-
assert_equal 'DC', domain.state
|
51
|
-
assert_equal 'Washington', domain.city
|
52
|
-
assert_equal 'Executive Office of the President', domain.agency
|
53
|
-
end
|
54
|
-
|
55
|
-
should 'detect state dotgovs' do
|
56
|
-
domain = Gman.new 'illinois.gov'
|
57
|
-
assert domain.state?
|
58
|
-
assert domain.dotgov?
|
59
|
-
|
60
|
-
refute domain.city?
|
61
|
-
refute domain.federal?
|
62
|
-
refute domain.county?
|
63
|
-
|
64
|
-
assert_equal :state, domain.type
|
65
|
-
assert_equal 'IL', domain.state
|
66
|
-
assert_equal 'Springfield', domain.city
|
67
|
-
end
|
68
|
-
|
69
|
-
should 'detect county dotgovs' do
|
70
|
-
domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
|
71
|
-
assert domain.county?
|
72
|
-
assert domain.dotgov?
|
73
|
-
|
74
|
-
refute domain.city?
|
75
|
-
refute domain.federal?
|
76
|
-
refute domain.state?
|
77
|
-
|
78
|
-
assert_equal :county, domain.type
|
79
|
-
assert_equal 'PA', domain.state
|
80
|
-
assert_equal 'Pittsburgh', domain.city
|
81
|
-
end
|
82
|
-
|
83
|
-
should 'detect the list category' do
|
84
|
-
category = Gman.new('whitehouse.gov').send('list_category')
|
85
|
-
assert_equal 'US Federal', category
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
context 'non-dotgov domains' do
|
90
|
-
should "determine a domain's group" do
|
91
|
-
assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
|
92
|
-
assert_equal :unknown, Gman.new('cityofperu.org').type
|
93
|
-
|
94
|
-
assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
|
95
|
-
assert_equal :"Canada municipal", Gman.new('acme.ca').type
|
96
|
-
|
97
|
-
assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
|
98
|
-
assert_equal :"Canada federal", Gman.new('canada.ca').type
|
99
|
-
end
|
100
|
-
|
101
|
-
should 'detect the state' do
|
102
|
-
assert_equal 'OR', Gman.new('ashland.or.us').state
|
103
|
-
refute Gman.new('canada.ca').state
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
data/test/test_gman_importer.rb
DELETED
@@ -1,250 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGManImporter < Minitest::Test
|
4
|
-
def setup
|
5
|
-
@importer = Gman::Importer.new 'test' => ['example.com']
|
6
|
-
@stdout = StringIO.new
|
7
|
-
@importer.instance_variable_set '@logger', Logger.new(@stdout)
|
8
|
-
|
9
|
-
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
10
|
-
@original_domain_list = File.open(Gman.list_path).read
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
def teardown
|
15
|
-
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
16
|
-
File.write Gman.list_path, @original_domain_list
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
should 'init the domain list' do
|
21
|
-
assert_equal Gman::DomainList, @importer.domains.class
|
22
|
-
assert_equal 1, @importer.domains.domains.count
|
23
|
-
assert_equal 'example.com', @importer.domains.domains.first
|
24
|
-
end
|
25
|
-
|
26
|
-
should 'init the logger' do
|
27
|
-
assert_equal Logger, @importer.logger.class
|
28
|
-
end
|
29
|
-
|
30
|
-
should 'return the current domain list' do
|
31
|
-
assert_equal Gman::DomainList, @importer.current.class
|
32
|
-
end
|
33
|
-
|
34
|
-
should 'return the resolver' do
|
35
|
-
assert_equal Resolv::DNS, @importer.resolver.class
|
36
|
-
end
|
37
|
-
|
38
|
-
context 'domain rejection' do
|
39
|
-
should 'return false for a rejected domain' do
|
40
|
-
refute @importer.reject 'example.com', 'reasons'
|
41
|
-
end
|
42
|
-
|
43
|
-
should 'return the reason when asked' do
|
44
|
-
with_env 'RECONCILING', 'true' do
|
45
|
-
assert_equal 'reasons', @importer.reject('example.com', 'reasons')
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
context 'manipulating the domain list' do
|
51
|
-
should 'normalize domains within the domain list' do
|
52
|
-
importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
|
53
|
-
importer.send :normalize_domains!
|
54
|
-
assert_equal 'example.com', importer.domains.domains.first
|
55
|
-
end
|
56
|
-
|
57
|
-
should 'remove invalid domains from the domain list' do
|
58
|
-
importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
|
59
|
-
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
60
|
-
|
61
|
-
assert_equal 2, importer.domains.domains.count
|
62
|
-
importer.send :ensure_validity!
|
63
|
-
assert_equal 1, importer.domains.domains.count
|
64
|
-
end
|
65
|
-
|
66
|
-
context 'writing the domain list' do
|
67
|
-
should 'add domains to the current domain list' do
|
68
|
-
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
69
|
-
domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
|
70
|
-
importer = Gman::Importer.new domains
|
71
|
-
importer.send :add_to_current
|
72
|
-
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
73
|
-
assert_equal expected, File.open(Gman.list_path).read
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
should 'import' do
|
78
|
-
with_env 'GMAN_STUB_DOMAINS', 'true' do
|
79
|
-
domains = {
|
80
|
-
'test' => ['www.example.com', 'goo.github.io'],
|
81
|
-
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
82
|
-
}
|
83
|
-
|
84
|
-
importer = Gman::Importer.new domains
|
85
|
-
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
86
|
-
importer.import(skip_resolve: true)
|
87
|
-
|
88
|
-
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
89
|
-
assert_equal expected, File.open(Gman.list_path).read
|
90
|
-
end
|
91
|
-
end
|
92
|
-
end
|
93
|
-
end
|
94
|
-
|
95
|
-
context 'domain validation' do
|
96
|
-
should 'allow valid domains' do
|
97
|
-
assert @importer.send :ensure_valid, 'whitehouse.gov'
|
98
|
-
end
|
99
|
-
|
100
|
-
should 'reject empty domains' do
|
101
|
-
refute @importer.send :ensure_valid, ''
|
102
|
-
end
|
103
|
-
|
104
|
-
should 'reject blacklisted domains' do
|
105
|
-
refute @importer.send :ensure_valid, 'egovlink.com'
|
106
|
-
end
|
107
|
-
|
108
|
-
should 'reject invalid domains' do
|
109
|
-
refute @importer.send :ensure_valid, 'foo.invalid'
|
110
|
-
end
|
111
|
-
|
112
|
-
should 'reject academic domains' do
|
113
|
-
refute @importer.send :ensure_valid, 'harvard.edu'
|
114
|
-
end
|
115
|
-
|
116
|
-
should "reject regex'd domains" do
|
117
|
-
refute @importer.send :ensure_valid, 'foo.github.io'
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
context 'duplicate domains' do
|
122
|
-
should 'know a unique domain is not a dupe' do
|
123
|
-
refute @importer.send :dupe?, 'gman.com'
|
124
|
-
end
|
125
|
-
|
126
|
-
should "know when a domain's a dupe" do
|
127
|
-
assert @importer.send :dupe?, 'gov'
|
128
|
-
end
|
129
|
-
|
130
|
-
should "know when a domain's a subdomain of an existing domain" do
|
131
|
-
assert @importer.send :dupe?, 'whitehouse.gov'
|
132
|
-
end
|
133
|
-
|
134
|
-
should 'allow unique domains' do
|
135
|
-
assert @importer.send :ensure_not_dupe, 'gman.com'
|
136
|
-
end
|
137
|
-
|
138
|
-
should 'reject duplicate domains' do
|
139
|
-
refute @importer.send :ensure_not_dupe, 'gov'
|
140
|
-
end
|
141
|
-
|
142
|
-
should 'reject subdomains' do
|
143
|
-
refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
context 'domain resolution' do
|
148
|
-
should 'know if a domain resolves' do
|
149
|
-
assert @importer.domain_resolves?('github.com')
|
150
|
-
assert @importer.send :ensure_resolves, 'github.com'
|
151
|
-
end
|
152
|
-
|
153
|
-
should "know if a domain doesn't resolve" do
|
154
|
-
refute @importer.domain_resolves?('foo.invalid')
|
155
|
-
refute @importer.send :ensure_resolves, 'foo.invalid'
|
156
|
-
end
|
157
|
-
|
158
|
-
should 'know if a domain has an IP' do
|
159
|
-
end
|
160
|
-
|
161
|
-
should 'know if a domain returns a given record' do
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
context 'regex checks' do
|
166
|
-
should 'pass valid domains' do
|
167
|
-
assert @importer.send :ensure_regex, 'example.com'
|
168
|
-
end
|
169
|
-
|
170
|
-
should 'reject domains that begin with home.' do
|
171
|
-
refute @importer.send :ensure_regex, 'home.example.com'
|
172
|
-
end
|
173
|
-
|
174
|
-
should 'reject domains that begin with user.' do
|
175
|
-
refute @importer.send :ensure_regex, 'user.example.com'
|
176
|
-
end
|
177
|
-
|
178
|
-
should 'reject domains that begin with site.' do
|
179
|
-
refute @importer.send :ensure_regex, 'user.example.com'
|
180
|
-
end
|
181
|
-
|
182
|
-
should 'reject weebly domains' do
|
183
|
-
refute @importer.send :ensure_regex, 'foo.weebly.com'
|
184
|
-
end
|
185
|
-
|
186
|
-
should 'reject wordpress domains' do
|
187
|
-
refute @importer.send :ensure_regex, 'foo.wordpress.com'
|
188
|
-
end
|
189
|
-
|
190
|
-
should 'reject govoffice domains' do
|
191
|
-
refute @importer.send :ensure_regex, 'foo.govoffice.com'
|
192
|
-
refute @importer.send :ensure_regex, 'foo.govoffice1.com'
|
193
|
-
end
|
194
|
-
|
195
|
-
should 'reject homestead domains' do
|
196
|
-
refute @importer.send :ensure_regex, 'foo.homestead.com'
|
197
|
-
end
|
198
|
-
|
199
|
-
should 'reject wix domains' do
|
200
|
-
refute @importer.send :ensure_regex, 'foo.wix.com'
|
201
|
-
end
|
202
|
-
|
203
|
-
should 'reject blogspot domains' do
|
204
|
-
refute @importer.send :ensure_regex, 'foo.blogspot.com'
|
205
|
-
end
|
206
|
-
|
207
|
-
should 'reject tripod domains' do
|
208
|
-
refute @importer.send :ensure_regex, 'foo.tripod.com'
|
209
|
-
end
|
210
|
-
|
211
|
-
should 'reject squarespace domains' do
|
212
|
-
refute @importer.send :ensure_regex, 'foo.squarespace.com'
|
213
|
-
end
|
214
|
-
|
215
|
-
should 'reject github.io domains' do
|
216
|
-
refute @importer.send :ensure_regex, 'foo.github.io'
|
217
|
-
end
|
218
|
-
|
219
|
-
should 'reject locality domains' do
|
220
|
-
refute @importer.send :ensure_regex, 'ci.champaign.il.us'
|
221
|
-
end
|
222
|
-
end
|
223
|
-
|
224
|
-
context 'normalizing domains' do
|
225
|
-
should 'normalize URLs to domains' do
|
226
|
-
expected = 'example.com'
|
227
|
-
assert_equal expected, @importer.normalize_domain('http://example.com')
|
228
|
-
end
|
229
|
-
|
230
|
-
should 'strip WWW' do
|
231
|
-
assert_equal 'example.com', @importer.normalize_domain('www.example.com')
|
232
|
-
end
|
233
|
-
|
234
|
-
should 'remove trailing slashes' do
|
235
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/')
|
236
|
-
end
|
237
|
-
|
238
|
-
should 'remove paths' do
|
239
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
|
240
|
-
end
|
241
|
-
|
242
|
-
should 'remove paths with trailing slashes' do
|
243
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
|
244
|
-
end
|
245
|
-
|
246
|
-
should 'downcase' do
|
247
|
-
assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
|
248
|
-
end
|
249
|
-
end
|
250
|
-
end
|