gman 6.0.1 → 7.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
- data/.github/config.yml +23 -0
- data/.github/funding.yml +1 -0
- data/.github/no-response.yml +15 -0
- data/.github/release-drafter.yml +4 -0
- data/.github/settings.yml +33 -0
- data/.github/stale.yml +29 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +17 -5
- data/.rubocop_todo.yml +84 -0
- data/.ruby-version +1 -1
- data/Gemfile +2 -0
- data/bin/gman +6 -4
- data/bin/gman_filter +5 -7
- data/config/domains.txt +8446 -173
- data/config/vendor/academic.txt +8038 -0
- data/config/vendor/dotgovs.csv +5786 -5560
- data/docs/CODE_OF_CONDUCT.md +46 -0
- data/docs/CONTRIBUTING.md +92 -0
- data/{README.md → docs/README.md} +3 -3
- data/docs/SECURITY.md +3 -0
- data/docs/_config.yml +2 -0
- data/gman.gemspec +18 -17
- data/lib/gman.rb +25 -21
- data/lib/gman/country_codes.rb +17 -17
- data/lib/gman/domain_list.rb +123 -41
- data/lib/gman/identifier.rb +59 -21
- data/lib/gman/importer.rb +39 -40
- data/lib/gman/locality.rb +23 -21
- data/lib/gman/version.rb +3 -1
- data/script/add +2 -0
- data/script/alphabetize +2 -0
- data/script/cibuild +1 -1
- data/script/dedupe +2 -1
- data/script/profile +2 -1
- data/script/prune +5 -3
- data/script/reconcile-us +6 -3
- data/script/vendor +1 -1
- data/script/vendor-federal-de +3 -3
- data/script/vendor-municipal-de +3 -3
- data/script/vendor-nl +4 -1
- data/script/vendor-public-suffix +7 -6
- data/script/vendor-se +3 -3
- data/script/vendor-swot +43 -0
- data/script/vendor-us +8 -5
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +101 -0
- data/spec/gman/country_code_spec.rb +39 -0
- data/spec/gman/domain_list_spec.rb +110 -0
- data/spec/gman/domains_spec.rb +25 -0
- data/spec/gman/identifier_spec.rb +218 -0
- data/spec/gman/importer_spec.rb +236 -0
- data/spec/gman/locality_spec.rb +24 -0
- data/spec/gman_spec.rb +74 -0
- data/spec/spec_helper.rb +31 -0
- metadata +86 -73
- data/CONTRIBUTING.md +0 -22
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -40
- data/test/test_gman.rb +0 -62
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domains.rb +0 -33
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -250
- data/test/test_gman_locality.rb +0 -10
data/script/profile
CHANGED
data/script/prune
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
# Given an array of domains, removes them from the list
|
3
5
|
# Example usage: script/prune foo.invalid, bar.invalid, foo.bar.invalid
|
4
6
|
|
@@ -12,9 +14,9 @@ list = File.open('./config/domains.txt').read
|
|
12
14
|
puts "Starting list: #{Gman::DomainList.current.count} domains"
|
13
15
|
|
14
16
|
domains.each do |domain|
|
15
|
-
list.gsub!(/^#{domain}$\n/, '')
|
17
|
+
list.gsub!(/^#{Regexp.escape(domain)}$\n/, '')
|
16
18
|
end
|
17
19
|
|
18
|
-
puts "Ending list: #{Gman::DomainList.current.count} domains"
|
19
|
-
|
20
20
|
File.write './config/domains.txt', list
|
21
|
+
|
22
|
+
puts "Ending list: #{Gman::DomainList.current.count} domains"
|
data/script/reconcile-us
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
#
|
3
5
|
# Reconciles the USA.gov-maintained list of US domains with domains.txt
|
4
6
|
# to show domains listed in the USA.gov-maintained list that we reject and why
|
@@ -12,7 +14,7 @@ ENV['RECONCILING'] = 'true'
|
|
12
14
|
blacklist = ['usagovQUASI']
|
13
15
|
source = 'https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt'
|
14
16
|
|
15
|
-
data = open(source).read
|
17
|
+
data = URI.open(source).read
|
16
18
|
data = data.split('_' * 74)
|
17
19
|
data = data.last.strip
|
18
20
|
data = data.split(/\r?\n/).reject(&:empty?)
|
@@ -20,7 +22,7 @@ data = data.split(/\r?\n/).reject(&:empty?)
|
|
20
22
|
domains = {}
|
21
23
|
group = ''
|
22
24
|
data.each do |row|
|
23
|
-
if
|
25
|
+
if /^\w/.match?(row)
|
24
26
|
group = row
|
25
27
|
domains[group] = []
|
26
28
|
else
|
@@ -33,7 +35,7 @@ importer = Gman::Importer.new(domains)
|
|
33
35
|
|
34
36
|
importer.logger.info "Starting with #{importer.domains.count} domains"
|
35
37
|
|
36
|
-
importer.domains.list.
|
38
|
+
importer.domains.list.each_value do |d|
|
37
39
|
d.map! { |domain| Gman.new(domain).to_s }
|
38
40
|
d.map! { |domain| importer.normalize_domain(domain) }
|
39
41
|
end
|
@@ -44,6 +46,7 @@ importer.logger.info "Filtered down to #{count} normalized domains"
|
|
44
46
|
missing = {}
|
45
47
|
importer.domains.list.each do |g, usagovdomains|
|
46
48
|
next unless importer.current.list[g]
|
49
|
+
|
47
50
|
missing[g] = importer.current.list[g] - usagovdomains
|
48
51
|
end
|
49
52
|
|
data/script/vendor
CHANGED
data/script/vendor-federal-de
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'csv'
|
4
5
|
require 'open-uri'
|
5
6
|
require './lib/gman'
|
6
|
-
require './lib/gman/importer'
|
7
7
|
|
8
8
|
url = 'https://raw.githubusercontent.com/robbi5/german-gov-domains/master/data/domains.csv'
|
9
9
|
|
10
|
-
domains = open(url).read.encode('UTF-8')
|
10
|
+
domains = URI.open(url).read.encode('UTF-8')
|
11
11
|
domains = CSV.parse(domains, headers: true)
|
12
12
|
domains = domains.map { |row| row['Domain Name'] }
|
13
13
|
|
14
|
-
Gman.
|
14
|
+
Gman::Importer.new('German Federal' => domains).import
|
data/script/vendor-municipal-de
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'csv'
|
4
5
|
require 'open-uri'
|
5
6
|
require './lib/gman'
|
6
|
-
require './lib/gman/importer'
|
7
7
|
|
8
8
|
url = 'http://www.mik.nrw.de/nc/themen-aufgaben/kommunales/kommunale-adressen.html?tx_szkommunaldb_pi1%5Bexport%5D=csv'
|
9
9
|
|
10
|
-
csv = open(url).read.force_encoding('iso-8859-1').encode('UTF-8')
|
10
|
+
csv = URI.open(url).read.force_encoding('iso-8859-1').encode('UTF-8')
|
11
11
|
|
12
12
|
# For some reason, the header row is actually the last row
|
13
13
|
# Pop the last line off the file and prepend it at the begining
|
@@ -20,4 +20,4 @@ csv = lines.join("\n")
|
|
20
20
|
data = CSV.parse(csv, headers: true, col_sep: ';')
|
21
21
|
domains = data.map { |row| row['Internet'] }
|
22
22
|
|
23
|
-
Gman.
|
23
|
+
Gman::Importer.new('German Municipalities' => domains).import
|
data/script/vendor-nl
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
# See https://github.com/github/government.github.com/pull/367#issuecomment-102108763
|
3
5
|
|
4
6
|
require 'fileutils'
|
7
|
+
require './lib/gman'
|
5
8
|
|
6
9
|
FileUtils.rm_rf('almanak.overheid.nl')
|
7
10
|
commands = [
|
@@ -15,4 +18,4 @@ commands = [
|
|
15
18
|
]
|
16
19
|
domains = system commands.join('|')
|
17
20
|
|
18
|
-
Gman.
|
21
|
+
Gman::Importer.new('Netherlands' => domains.split("\n")).import
|
data/script/vendor-public-suffix
CHANGED
@@ -1,27 +1,28 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
# Propagates an initial list of best-guess government domains
|
3
5
|
|
4
6
|
require 'public_suffix'
|
5
7
|
require 'yaml'
|
6
8
|
require_relative '../lib/gman'
|
7
|
-
require_relative '../lib/gman/importer'
|
8
9
|
|
9
10
|
# https://gist.github.com/benbalter/6147066
|
10
|
-
REGEX = /(\.g[ou]{1,2}(v|b|vt)|\.mil|\.gc|\.fed)(\.[a-z]{2})?$/i
|
11
|
+
REGEX = /(\.g[ou]{1,2}(v|b|vt)|\.mil|\.gc|\.fed)(\.[a-z]{2})?$/i.freeze
|
11
12
|
|
12
13
|
domains = []
|
13
14
|
PublicSuffix::List.default.each do |rule|
|
14
15
|
domain = nil
|
15
16
|
|
16
17
|
if rule.parts.length == 1
|
17
|
-
domain = rule.parts.first if ".#{rule.value}"
|
18
|
-
elsif ".#{rule.value}"
|
18
|
+
domain = rule.parts.first if REGEX.match?(".#{rule.value}")
|
19
|
+
elsif REGEX.match?(".#{rule.value}")
|
19
20
|
domain = rule.parts.pop(2).join('.')
|
20
21
|
end
|
21
22
|
|
22
23
|
domains.push domain unless domain.nil? || domains.include?(domain)
|
23
24
|
end
|
24
25
|
|
25
|
-
#
|
26
|
+
# NOTE: We want to skip resolution here, because a domain like `gov.sv` may be
|
26
27
|
# a valid TLD, not have any top-level sites, and we'd still want it listed
|
27
|
-
Gman.
|
28
|
+
Gman::Importer.new('non-us gov' => domains).import(skip_resolve: true)
|
data/script/vendor-se
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'mechanize'
|
4
5
|
require 'csv'
|
5
6
|
require './lib/gman'
|
6
|
-
require './lib/gman/importer'
|
7
7
|
|
8
8
|
url = 'http://www.myndighetsregistret.scb.se/Myndighet.aspx'
|
9
9
|
agent = Mechanize.new
|
@@ -15,7 +15,7 @@ response = agent.submit(form, submit_button)
|
|
15
15
|
|
16
16
|
rows = CSV.parse(response.content, headers: true, col_sep: "\t")
|
17
17
|
domains = rows.map do |row|
|
18
|
-
row['Webbadress'] unless row['Namn']
|
18
|
+
row['Webbadress'] unless /UNIVERSITET/.match?(row['Namn'])
|
19
19
|
end
|
20
20
|
|
21
|
-
Gman.
|
21
|
+
Gman::Importer.new('Swedish Administrative Authorities' => domains).import
|
data/script/vendor-swot
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
#
|
5
|
+
# Vendors the Swot-maintained list of adademic domains into config/academic.txt
|
6
|
+
# Source: https://github.com/leereilly/swot/
|
7
|
+
#
|
8
|
+
# Usage: script/vendor-swot
|
9
|
+
#
|
10
|
+
# Will automatically fetch latest version of the list and merge
|
11
|
+
# You can check for changes and commit via `git status`
|
12
|
+
#
|
13
|
+
# It's also probably a good idea to run `script/ci-build` for good measure
|
14
|
+
#
|
15
|
+
# Note: We do this, because as a bajillion individual files, Swot takes up 30MB
|
16
|
+
|
17
|
+
require 'gman'
|
18
|
+
require 'swot'
|
19
|
+
|
20
|
+
# Generate array of all Swot domains
|
21
|
+
domains = Swot.all_domains
|
22
|
+
domains << Swot::ACADEMIC_TLDS
|
23
|
+
|
24
|
+
# Init the importer, builiding a DomainList
|
25
|
+
group = "Academic domains vendored from Swot v#{Swot::VERSION}"
|
26
|
+
hash = { group => domains }
|
27
|
+
|
28
|
+
importer = Gman::Importer.new(hash)
|
29
|
+
importer.logger.info "Importing from Swot v#{Swot::VERSION}"
|
30
|
+
importer.logger.info "Found #{domains.count} academic domains"
|
31
|
+
|
32
|
+
domain_list = importer.domain_list
|
33
|
+
domain_list.path = Gman.academic_list_path
|
34
|
+
|
35
|
+
# Cleanup and write
|
36
|
+
# Note: we're not using the import method, as that assume's we're writing the
|
37
|
+
# government domain list and would use Swot to ensure domains aren't academic
|
38
|
+
importer.send :normalize_domains!
|
39
|
+
domain_list.data[group] << Swot::BLACKLIST.map { |domain| "!#{domain}" }
|
40
|
+
domain_list.data[group] = domain_list.data[group].flatten
|
41
|
+
domain_list.write
|
42
|
+
|
43
|
+
importer.logger.info "Vendored #{importer.domain_list.count} academic domains."
|
data/script/vendor-us
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
2
4
|
#
|
3
5
|
# Vendors the USA.gov-maintained list of US domains into domains.txt
|
4
6
|
# Source: https://github.com/GSA-OCSIT/govt-urls
|
@@ -10,12 +12,13 @@
|
|
10
12
|
#
|
11
13
|
# It's also probably a good idea to run `script/ci-build` for good measure
|
12
14
|
|
13
|
-
require './lib/gman
|
15
|
+
require './lib/gman'
|
16
|
+
require 'open-uri'
|
14
17
|
|
15
|
-
blacklist = %w
|
18
|
+
blacklist = %w[usagovQUASI usagovFEDgov]
|
16
19
|
source = 'https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt'
|
17
20
|
|
18
|
-
data = open(source).read
|
21
|
+
data = URI.open(source).read
|
19
22
|
data = data.split('_' * 74)
|
20
23
|
data = data.last.strip
|
21
24
|
data = data.split(/\r?\n/).reject(&:empty?)
|
@@ -23,7 +26,7 @@ data = data.split(/\r?\n/).reject(&:empty?)
|
|
23
26
|
domains = {}
|
24
27
|
group = ''
|
25
28
|
data.each do |row|
|
26
|
-
if
|
29
|
+
if /^\w/.match?(row)
|
27
30
|
group = row
|
28
31
|
domains[group] = []
|
29
32
|
else
|
@@ -32,4 +35,4 @@ data.each do |row|
|
|
32
35
|
end
|
33
36
|
|
34
37
|
domains.reject! { |g, _| blacklist.include?(g) }
|
35
|
-
Gman.
|
38
|
+
Gman::Importer.new(domains).import
|
File without changes
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe 'Gman bin' do
|
4
|
+
let(:domain) { 'whitehouse.gov' }
|
5
|
+
let(:args) { [domain] }
|
6
|
+
let(:command) { 'gman' }
|
7
|
+
let(:bin_path) do
|
8
|
+
File.expand_path "../../bin/#{command}", File.dirname(__FILE__)
|
9
|
+
end
|
10
|
+
let(:response_parts) { Open3.capture2e('bundle', 'exec', bin_path, *args) }
|
11
|
+
let(:output) { response_parts[0] }
|
12
|
+
let(:status) { response_parts[1] }
|
13
|
+
let(:exit_code) { status.exitstatus }
|
14
|
+
|
15
|
+
context 'a valid domain' do
|
16
|
+
it 'parses the domain' do
|
17
|
+
expect(output).to match('Domain : whitehouse.gov')
|
18
|
+
end
|
19
|
+
|
20
|
+
it "knows it's valid" do
|
21
|
+
expect(output).to match('Valid government domain')
|
22
|
+
expect(exit_code).to be(0)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'knows the type' do
|
26
|
+
expect(output).to match(/federal/i)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'knows the agency' do
|
30
|
+
expect(output).to match('Executive Office of the President')
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'knows the country' do
|
34
|
+
expect(output).to match('United States')
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'knows the city' do
|
38
|
+
expect(output).to match('Washington')
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'knows the state' do
|
42
|
+
expect(output).to match('DC')
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'colors by default' do
|
46
|
+
expect(output).to match(/\e\[32m/)
|
47
|
+
end
|
48
|
+
|
49
|
+
context 'with colorization disabled' do
|
50
|
+
let(:args) { [domain, '--no-color'] }
|
51
|
+
|
52
|
+
it "doesn't color" do
|
53
|
+
expect(output).not_to match(/\e\[32m/)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'with no args' do
|
59
|
+
let(:args) { [] }
|
60
|
+
|
61
|
+
it 'displays the help text' do
|
62
|
+
expect(output).to match('USAGE')
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'an invalid domain' do
|
67
|
+
let(:domain) { 'foo.invalid' }
|
68
|
+
|
69
|
+
it 'knows the domain is invalid' do
|
70
|
+
expect(output).to match('Invalid domain')
|
71
|
+
expect(exit_code).to be(1)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context 'a non-government domain' do
|
76
|
+
let(:domain) { 'github.com' }
|
77
|
+
|
78
|
+
it "knows it's not a government domain" do
|
79
|
+
expect(output).to match('Not a government domain')
|
80
|
+
expect(exit_code).to be(1)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
context 'filtering' do
|
85
|
+
let(:command) { 'gman_filter' }
|
86
|
+
let(:txt_path) do
|
87
|
+
File.expand_path '../fixtures/obama.txt', File.dirname(__FILE__)
|
88
|
+
end
|
89
|
+
let(:args) { [txt_path] }
|
90
|
+
|
91
|
+
it 'returns only government domains' do
|
92
|
+
expected = <<~EXPECTED
|
93
|
+
mr.senator@obama.senate.gov
|
94
|
+
president@whitehouse.gov
|
95
|
+
commander.in.chief@us.army.mil
|
96
|
+
EXPECTED
|
97
|
+
|
98
|
+
expect(output).to eql(expected)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe 'Gman Country Codes' do
|
4
|
+
{
|
5
|
+
'whitehouse.gov' => 'United States of America',
|
6
|
+
'foo.gov.uk' => 'United Kingdom of Great Britain and Northern Ireland',
|
7
|
+
'army.mil' => 'United States of America',
|
8
|
+
'foo.gc.ca' => 'Canada',
|
9
|
+
'foo.eu' => nil
|
10
|
+
}.each do |domain, expected_country|
|
11
|
+
context "given #{domain.inspect}" do
|
12
|
+
subject { Gman.new(domain) }
|
13
|
+
|
14
|
+
let(:country) { subject.country }
|
15
|
+
|
16
|
+
it 'knows the country' do
|
17
|
+
if expected_country.nil?
|
18
|
+
expect(country).to be_nil
|
19
|
+
else
|
20
|
+
expect(country.name).to eql(expected_country)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'knows the alpha2' do
|
25
|
+
expected = case expected_country
|
26
|
+
when 'United States of America'
|
27
|
+
'us'
|
28
|
+
when 'Canada'
|
29
|
+
'ca'
|
30
|
+
when 'United Kingdom of Great Britain and Northern Ireland'
|
31
|
+
'gb'
|
32
|
+
else
|
33
|
+
'eu'
|
34
|
+
end
|
35
|
+
expect(subject.alpha2).to eql(expected)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe Gman::DomainList do
|
4
|
+
let(:data) { subject.data }
|
5
|
+
let(:canada) { data['Canada municipal'] }
|
6
|
+
|
7
|
+
%i[path contents data].each do |type|
|
8
|
+
context "when initialized by #{type}" do
|
9
|
+
subject do
|
10
|
+
case type
|
11
|
+
when :path
|
12
|
+
described_class.new(path: Gman.list_path)
|
13
|
+
when :contents
|
14
|
+
contents = File.read(Gman.list_path)
|
15
|
+
described_class.new(contents: contents)
|
16
|
+
when :data
|
17
|
+
data = described_class.new(path: Gman.list_path).to_h
|
18
|
+
described_class.new(data: data)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'stores the init var' do
|
23
|
+
expect(subject.send(type)).not_to be_nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns the domain data' do
|
27
|
+
expect(data).to have_key('Canada federal')
|
28
|
+
expect(data.values.flatten).to include('gov')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns the list contents' do
|
32
|
+
expect(subject.contents).to match(/^gov$/)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'knows the list path' do
|
36
|
+
expect(subject.path).to eql(Gman.list_path)
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'returns the PublicSuffix list' do
|
40
|
+
expect(subject.public_suffix_list).to be_a(PublicSuffix::List)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'knows if a domain is valid' do
|
44
|
+
expect(subject.valid?('whitehouse.gov')).to be(true)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'knows if a domain is invalid' do
|
48
|
+
expect(subject.valid?('example.com')).to be(false)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'returns the domain groups' do
|
52
|
+
expect(subject.groups).to include('Canada federal')
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'returns the domains' do
|
56
|
+
expect(subject.domains).to include('gov')
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'returns the domain count' do
|
60
|
+
expect(subject.count).to be_a(Integer)
|
61
|
+
expect(subject.count).to be > 100
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'alphabetizes the list' do
|
65
|
+
canada.shuffle!
|
66
|
+
expect(canada.first).not_to eql('100milehouse.com')
|
67
|
+
subject.alphabetize
|
68
|
+
expect(canada.first).to eql('100milehouse.com')
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'outputs public suffix format' do
|
72
|
+
expect(subject.to_s).to match("// Canada federal\ncanada\.ca\n")
|
73
|
+
end
|
74
|
+
|
75
|
+
it "finds a domain's parent" do
|
76
|
+
expect(subject.parent_domain('foo.gov.uk')).to eql('gov.uk')
|
77
|
+
end
|
78
|
+
|
79
|
+
context 'with the list path stubbed' do
|
80
|
+
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
81
|
+
|
82
|
+
before do
|
83
|
+
subject.instance_variable_set('@path', stubbed_list_path)
|
84
|
+
end
|
85
|
+
|
86
|
+
context 'with list data stubbed' do
|
87
|
+
before do
|
88
|
+
subject.data = { 'foo' => ['!mail.bar.gov', 'bar.gov', 'baz.net'] }
|
89
|
+
end
|
90
|
+
|
91
|
+
context 'alphabetizing' do
|
92
|
+
before { subject.alphabetize }
|
93
|
+
|
94
|
+
it 'puts exceptions last' do
|
95
|
+
expect(subject.data['foo'].last).to eql('!mail.bar.gov')
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context 'writing' do
|
100
|
+
before { subject.write }
|
101
|
+
|
102
|
+
it 'writes the contents' do
|
103
|
+
expect(stubbed_file_contents).to match("// foo\nbar.gov\nbaz.net")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|