gman 7.0.0 → 7.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
- data/.github/config.yml +23 -0
- data/.github/funding.yml +1 -0
- data/.github/no-response.yml +15 -0
- data/.github/release-drafter.yml +4 -0
- data/.github/settings.yml +33 -0
- data/.github/stale.yml +29 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +14 -5
- data/.rubocop_todo.yml +84 -0
- data/.ruby-version +1 -1
- data/Gemfile +2 -0
- data/bin/gman +6 -4
- data/bin/gman_filter +5 -7
- data/config/domains.txt +8454 -168
- data/config/vendor/academic.txt +6 -7
- data/config/vendor/dotgovs.csv +5786 -5560
- data/docs/CODE_OF_CONDUCT.md +46 -0
- data/docs/CONTRIBUTING.md +92 -0
- data/{README.md → docs/README.md} +3 -3
- data/docs/SECURITY.md +3 -0
- data/docs/_config.yml +2 -0
- data/gman.gemspec +18 -17
- data/lib/gman.rb +4 -2
- data/lib/gman/country_codes.rb +17 -17
- data/lib/gman/domain_list.rb +25 -9
- data/lib/gman/identifier.rb +57 -19
- data/lib/gman/importer.rb +31 -21
- data/lib/gman/locality.rb +8 -6
- data/lib/gman/version.rb +3 -1
- data/script/add +2 -0
- data/script/alphabetize +2 -0
- data/script/cibuild +1 -1
- data/script/dedupe +2 -1
- data/script/profile +2 -1
- data/script/prune +5 -3
- data/script/reconcile-us +6 -3
- data/script/vendor-federal-de +2 -1
- data/script/vendor-municipal-de +2 -1
- data/script/vendor-nl +2 -0
- data/script/vendor-public-suffix +6 -4
- data/script/vendor-se +2 -1
- data/script/vendor-swot +3 -1
- data/script/vendor-us +5 -3
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +101 -0
- data/spec/gman/country_code_spec.rb +39 -0
- data/spec/gman/domain_list_spec.rb +110 -0
- data/spec/gman/domains_spec.rb +25 -0
- data/spec/gman/identifier_spec.rb +218 -0
- data/spec/gman/importer_spec.rb +236 -0
- data/spec/gman/locality_spec.rb +24 -0
- data/spec/gman_spec.rb +74 -0
- data/spec/spec_helper.rb +31 -0
- metadata +89 -81
- data/.rake_tasks +0 -0
- data/CONTRIBUTING.md +0 -22
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -48
- data/test/test_gman.rb +0 -56
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domain_list.rb +0 -112
- data/test/test_gman_domains.rb +0 -32
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -244
- data/test/test_gman_locality.rb +0 -10
@@ -0,0 +1,101 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe 'Gman bin' do
|
4
|
+
let(:domain) { 'whitehouse.gov' }
|
5
|
+
let(:args) { [domain] }
|
6
|
+
let(:command) { 'gman' }
|
7
|
+
let(:bin_path) do
|
8
|
+
File.expand_path "../../bin/#{command}", File.dirname(__FILE__)
|
9
|
+
end
|
10
|
+
let(:response_parts) { Open3.capture2e('bundle', 'exec', bin_path, *args) }
|
11
|
+
let(:output) { response_parts[0] }
|
12
|
+
let(:status) { response_parts[1] }
|
13
|
+
let(:exit_code) { status.exitstatus }
|
14
|
+
|
15
|
+
context 'a valid domain' do
|
16
|
+
it 'parses the domain' do
|
17
|
+
expect(output).to match('Domain : whitehouse.gov')
|
18
|
+
end
|
19
|
+
|
20
|
+
it "knows it's valid" do
|
21
|
+
expect(output).to match('Valid government domain')
|
22
|
+
expect(exit_code).to be(0)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'knows the type' do
|
26
|
+
expect(output).to match(/federal/i)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'knows the agency' do
|
30
|
+
expect(output).to match('Executive Office of the President')
|
31
|
+
end
|
32
|
+
|
33
|
+
it 'knows the country' do
|
34
|
+
expect(output).to match('United States')
|
35
|
+
end
|
36
|
+
|
37
|
+
it 'knows the city' do
|
38
|
+
expect(output).to match('Washington')
|
39
|
+
end
|
40
|
+
|
41
|
+
it 'knows the state' do
|
42
|
+
expect(output).to match('DC')
|
43
|
+
end
|
44
|
+
|
45
|
+
it 'colors by default' do
|
46
|
+
expect(output).to match(/\e\[32m/)
|
47
|
+
end
|
48
|
+
|
49
|
+
context 'with colorization disabled' do
|
50
|
+
let(:args) { [domain, '--no-color'] }
|
51
|
+
|
52
|
+
it "doesn't color" do
|
53
|
+
expect(output).not_to match(/\e\[32m/)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
context 'with no args' do
|
59
|
+
let(:args) { [] }
|
60
|
+
|
61
|
+
it 'displays the help text' do
|
62
|
+
expect(output).to match('USAGE')
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
context 'an invalid domain' do
|
67
|
+
let(:domain) { 'foo.invalid' }
|
68
|
+
|
69
|
+
it 'knows the domain is invalid' do
|
70
|
+
expect(output).to match('Invalid domain')
|
71
|
+
expect(exit_code).to be(1)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context 'a non-government domain' do
|
76
|
+
let(:domain) { 'github.com' }
|
77
|
+
|
78
|
+
it "knows it's not a government domain" do
|
79
|
+
expect(output).to match('Not a government domain')
|
80
|
+
expect(exit_code).to be(1)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
context 'filtering' do
|
85
|
+
let(:command) { 'gman_filter' }
|
86
|
+
let(:txt_path) do
|
87
|
+
File.expand_path '../fixtures/obama.txt', File.dirname(__FILE__)
|
88
|
+
end
|
89
|
+
let(:args) { [txt_path] }
|
90
|
+
|
91
|
+
it 'returns only government domains' do
|
92
|
+
expected = <<~EXPECTED
|
93
|
+
mr.senator@obama.senate.gov
|
94
|
+
president@whitehouse.gov
|
95
|
+
commander.in.chief@us.army.mil
|
96
|
+
EXPECTED
|
97
|
+
|
98
|
+
expect(output).to eql(expected)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe 'Gman Country Codes' do
|
4
|
+
{
|
5
|
+
'whitehouse.gov' => 'United States of America',
|
6
|
+
'foo.gov.uk' => 'United Kingdom of Great Britain and Northern Ireland',
|
7
|
+
'army.mil' => 'United States of America',
|
8
|
+
'foo.gc.ca' => 'Canada',
|
9
|
+
'foo.eu' => nil
|
10
|
+
}.each do |domain, expected_country|
|
11
|
+
context "given #{domain.inspect}" do
|
12
|
+
subject { Gman.new(domain) }
|
13
|
+
|
14
|
+
let(:country) { subject.country }
|
15
|
+
|
16
|
+
it 'knows the country' do
|
17
|
+
if expected_country.nil?
|
18
|
+
expect(country).to be_nil
|
19
|
+
else
|
20
|
+
expect(country.name).to eql(expected_country)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
it 'knows the alpha2' do
|
25
|
+
expected = case expected_country
|
26
|
+
when 'United States of America'
|
27
|
+
'us'
|
28
|
+
when 'Canada'
|
29
|
+
'ca'
|
30
|
+
when 'United Kingdom of Great Britain and Northern Ireland'
|
31
|
+
'gb'
|
32
|
+
else
|
33
|
+
'eu'
|
34
|
+
end
|
35
|
+
expect(subject.alpha2).to eql(expected)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe Gman::DomainList do
|
4
|
+
let(:data) { subject.data }
|
5
|
+
let(:canada) { data['Canada municipal'] }
|
6
|
+
|
7
|
+
%i[path contents data].each do |type|
|
8
|
+
context "when initialized by #{type}" do
|
9
|
+
subject do
|
10
|
+
case type
|
11
|
+
when :path
|
12
|
+
described_class.new(path: Gman.list_path)
|
13
|
+
when :contents
|
14
|
+
contents = File.read(Gman.list_path)
|
15
|
+
described_class.new(contents: contents)
|
16
|
+
when :data
|
17
|
+
data = described_class.new(path: Gman.list_path).to_h
|
18
|
+
described_class.new(data: data)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'stores the init var' do
|
23
|
+
expect(subject.send(type)).not_to be_nil
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns the domain data' do
|
27
|
+
expect(data).to have_key('Canada federal')
|
28
|
+
expect(data.values.flatten).to include('gov')
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'returns the list contents' do
|
32
|
+
expect(subject.contents).to match(/^gov$/)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'knows the list path' do
|
36
|
+
expect(subject.path).to eql(Gman.list_path)
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'returns the PublicSuffix list' do
|
40
|
+
expect(subject.public_suffix_list).to be_a(PublicSuffix::List)
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'knows if a domain is valid' do
|
44
|
+
expect(subject.valid?('whitehouse.gov')).to be(true)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'knows if a domain is invalid' do
|
48
|
+
expect(subject.valid?('example.com')).to be(false)
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'returns the domain groups' do
|
52
|
+
expect(subject.groups).to include('Canada federal')
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'returns the domains' do
|
56
|
+
expect(subject.domains).to include('gov')
|
57
|
+
end
|
58
|
+
|
59
|
+
it 'returns the domain count' do
|
60
|
+
expect(subject.count).to be_a(Integer)
|
61
|
+
expect(subject.count).to be > 100
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'alphabetizes the list' do
|
65
|
+
canada.shuffle!
|
66
|
+
expect(canada.first).not_to eql('100milehouse.com')
|
67
|
+
subject.alphabetize
|
68
|
+
expect(canada.first).to eql('100milehouse.com')
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'outputs public suffix format' do
|
72
|
+
expect(subject.to_s).to match("// Canada federal\ncanada\.ca\n")
|
73
|
+
end
|
74
|
+
|
75
|
+
it "finds a domain's parent" do
|
76
|
+
expect(subject.parent_domain('foo.gov.uk')).to eql('gov.uk')
|
77
|
+
end
|
78
|
+
|
79
|
+
context 'with the list path stubbed' do
|
80
|
+
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
81
|
+
|
82
|
+
before do
|
83
|
+
subject.instance_variable_set('@path', stubbed_list_path)
|
84
|
+
end
|
85
|
+
|
86
|
+
context 'with list data stubbed' do
|
87
|
+
before do
|
88
|
+
subject.data = { 'foo' => ['!mail.bar.gov', 'bar.gov', 'baz.net'] }
|
89
|
+
end
|
90
|
+
|
91
|
+
context 'alphabetizing' do
|
92
|
+
before { subject.alphabetize }
|
93
|
+
|
94
|
+
it 'puts exceptions last' do
|
95
|
+
expect(subject.data['foo'].last).to eql('!mail.bar.gov')
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context 'writing' do
|
100
|
+
before { subject.write }
|
101
|
+
|
102
|
+
it 'writes the contents' do
|
103
|
+
expect(stubbed_file_contents).to match("// foo\nbar.gov\nbaz.net")
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe 'Gman domains' do
|
4
|
+
let(:resolve_domains?) { ENV['GMAN_RESOLVE_DOMAINS'] == 'true' }
|
5
|
+
let(:importer) { Gman::Importer.new({}) }
|
6
|
+
let(:options) { { skip_dupe: true, skip_resolve: !resolve_domains? } }
|
7
|
+
|
8
|
+
Gman.list.to_h.each do |group, domains|
|
9
|
+
next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
|
10
|
+
|
11
|
+
context "the #{group} group" do
|
12
|
+
it 'only contains valid domains' do
|
13
|
+
invalid_domains = []
|
14
|
+
|
15
|
+
Parallel.each(domains, in_threads: 4) do |domain|
|
16
|
+
next if importer.valid_domain?(domain, options)
|
17
|
+
|
18
|
+
invalid_domains.push domain
|
19
|
+
end
|
20
|
+
|
21
|
+
expect(invalid_domains).to be_empty
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe 'Gman identifier' do
|
4
|
+
subject { Gman.new(domain) }
|
5
|
+
|
6
|
+
let(:domain) { '' }
|
7
|
+
|
8
|
+
it 'parses the dotgov list' do
|
9
|
+
expect(Gman.dotgov_list).to be_a(CSV::Table)
|
10
|
+
expect(Gman.dotgov_list.first).to have_key('Domain Name')
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'locality domains' do
|
14
|
+
context 'a state domain' do
|
15
|
+
let(:domain) { 'state.ak.us' }
|
16
|
+
|
17
|
+
it "knows it's a state" do
|
18
|
+
expect(subject).to be_a_state
|
19
|
+
expect(subject.type).to be(:state)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'knows the state' do
|
23
|
+
expect(subject.state).to eql('AK')
|
24
|
+
end
|
25
|
+
|
26
|
+
it "knows it's not a dotgov" do
|
27
|
+
expect(subject).not_to be_a_dotgov
|
28
|
+
end
|
29
|
+
|
30
|
+
it "know's it's not a city" do
|
31
|
+
expect(subject).not_to be_a_city
|
32
|
+
end
|
33
|
+
|
34
|
+
it "know's it's not a county" do
|
35
|
+
expect(subject).not_to be_a_county
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'a city domain' do
|
40
|
+
let(:domain) { 'ci.champaign.il.us' }
|
41
|
+
|
42
|
+
it "knows it's a city" do
|
43
|
+
expect(subject).to be_a_city
|
44
|
+
expect(subject.type).to be(:city)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'knows the state' do
|
48
|
+
expect(subject.state).to eql('IL')
|
49
|
+
end
|
50
|
+
|
51
|
+
it "knows it's not a dotgov" do
|
52
|
+
expect(subject).not_to be_a_dotgov
|
53
|
+
end
|
54
|
+
|
55
|
+
it "know's it's not a state" do
|
56
|
+
expect(subject).not_to be_a_state
|
57
|
+
end
|
58
|
+
|
59
|
+
it "know's it's not a county" do
|
60
|
+
expect(subject).not_to be_a_county
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context 'dotgovs' do
|
65
|
+
context 'A federal dotgov' do
|
66
|
+
let(:domain) { 'whitehouse.gov' }
|
67
|
+
|
68
|
+
it "knows it's federal" do
|
69
|
+
expect(subject).to be_federal
|
70
|
+
expect(subject.type).to be(:federal)
|
71
|
+
end
|
72
|
+
|
73
|
+
it "knows it's a dotgov" do
|
74
|
+
expect(subject).to be_a_dotgov
|
75
|
+
end
|
76
|
+
|
77
|
+
it "knows it's not a city" do
|
78
|
+
expect(subject).not_to be_a_city
|
79
|
+
end
|
80
|
+
|
81
|
+
it "knows it's not a state" do
|
82
|
+
expect(subject).not_to be_a_state
|
83
|
+
end
|
84
|
+
|
85
|
+
it "knows it's not a county" do
|
86
|
+
expect(subject).not_to be_a_county
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'knows the state' do
|
90
|
+
expect(subject.state).to eql('DC')
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'knows the city' do
|
94
|
+
expect(subject.city).to eql('Washington')
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'knows the agency' do
|
98
|
+
expect(subject.agency).to eql('Executive Office of the President')
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'knows the organization' do
|
102
|
+
expect(subject.organization).to eql('White House')
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
context 'a state .gov' do
|
107
|
+
let(:domain) { 'illinois.gov' }
|
108
|
+
|
109
|
+
it "knows it's a state" do
|
110
|
+
expect(subject).to be_a_state
|
111
|
+
expect(subject.type).to be(:state)
|
112
|
+
end
|
113
|
+
|
114
|
+
it "knows it's a dotgov" do
|
115
|
+
expect(subject).to be_a_dotgov
|
116
|
+
end
|
117
|
+
|
118
|
+
it "knows it's not a city" do
|
119
|
+
expect(subject).not_to be_a_city
|
120
|
+
end
|
121
|
+
|
122
|
+
it "knows it's not federal" do
|
123
|
+
expect(subject).not_to be_federal
|
124
|
+
end
|
125
|
+
|
126
|
+
it "knows it's not a county" do
|
127
|
+
expect(subject).not_to be_a_county
|
128
|
+
end
|
129
|
+
|
130
|
+
it 'knows the state' do
|
131
|
+
expect(subject.state).to eql('IL')
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'knows the city' do
|
135
|
+
expect(subject.city).to eql('Springfield')
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context 'a county .gov' do
|
140
|
+
let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
|
141
|
+
|
142
|
+
it "knows it's a county" do
|
143
|
+
expect(subject).to be_a_county
|
144
|
+
expect(subject.type).to be(:county)
|
145
|
+
end
|
146
|
+
|
147
|
+
it "knows it's a dotgov" do
|
148
|
+
expect(subject).to be_a_dotgov
|
149
|
+
end
|
150
|
+
|
151
|
+
it "knows it's not a city" do
|
152
|
+
expect(subject).not_to be_a_city
|
153
|
+
end
|
154
|
+
|
155
|
+
it "knows it's not federal" do
|
156
|
+
expect(subject).not_to be_federal
|
157
|
+
end
|
158
|
+
|
159
|
+
it "knows it's not a state" do
|
160
|
+
expect(subject).not_to be_a_state
|
161
|
+
end
|
162
|
+
|
163
|
+
it 'knows the state' do
|
164
|
+
expect(subject.state).to eql('PA')
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'knows the city' do
|
168
|
+
expect(subject.city).to eql('Pittsburgh')
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
context 'a city .gov' do
|
173
|
+
let(:domain) { 'ABERDEENMD.GOV' }
|
174
|
+
|
175
|
+
it "knows it's a city" do
|
176
|
+
expect(subject).to be_a_city
|
177
|
+
expect(subject.type).to be(:city)
|
178
|
+
end
|
179
|
+
|
180
|
+
it 'knows the city' do
|
181
|
+
expect(subject.city).to eql('Aberdeen')
|
182
|
+
end
|
183
|
+
|
184
|
+
it 'knows the state' do
|
185
|
+
expect(subject.state).to eql('MD')
|
186
|
+
end
|
187
|
+
|
188
|
+
it "knows it's a dotgov" do
|
189
|
+
expect(subject).to be_a_dotgov
|
190
|
+
end
|
191
|
+
|
192
|
+
it "know's it's not a state" do
|
193
|
+
expect(subject).not_to be_a_state
|
194
|
+
end
|
195
|
+
|
196
|
+
it "know's it's not a county" do
|
197
|
+
expect(subject).not_to be_a_county
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
context "determining a domain's type" do
|
204
|
+
{
|
205
|
+
unknown: 'cityofperu.org',
|
206
|
+
"Canada municipal": 'acme.ca',
|
207
|
+
"Canada federal": 'canada.ca'
|
208
|
+
}.each do |expected, domain|
|
209
|
+
context "Given the #{domain} domain" do
|
210
|
+
let(:domain) { domain }
|
211
|
+
|
212
|
+
it "know's the domain's type" do
|
213
|
+
expect(subject.type).to eql(expected)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|