gman 6.0.1 → 7.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
- data/.github/config.yml +23 -0
- data/.github/funding.yml +1 -0
- data/.github/no-response.yml +15 -0
- data/.github/release-drafter.yml +4 -0
- data/.github/settings.yml +33 -0
- data/.github/stale.yml +29 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +17 -5
- data/.rubocop_todo.yml +84 -0
- data/.ruby-version +1 -1
- data/Gemfile +2 -0
- data/bin/gman +6 -4
- data/bin/gman_filter +5 -7
- data/config/domains.txt +8446 -173
- data/config/vendor/academic.txt +8038 -0
- data/config/vendor/dotgovs.csv +5786 -5560
- data/docs/CODE_OF_CONDUCT.md +46 -0
- data/docs/CONTRIBUTING.md +92 -0
- data/{README.md → docs/README.md} +3 -3
- data/docs/SECURITY.md +3 -0
- data/docs/_config.yml +2 -0
- data/gman.gemspec +18 -17
- data/lib/gman.rb +25 -21
- data/lib/gman/country_codes.rb +17 -17
- data/lib/gman/domain_list.rb +123 -41
- data/lib/gman/identifier.rb +59 -21
- data/lib/gman/importer.rb +39 -40
- data/lib/gman/locality.rb +23 -21
- data/lib/gman/version.rb +3 -1
- data/script/add +2 -0
- data/script/alphabetize +2 -0
- data/script/cibuild +1 -1
- data/script/dedupe +2 -1
- data/script/profile +2 -1
- data/script/prune +5 -3
- data/script/reconcile-us +6 -3
- data/script/vendor +1 -1
- data/script/vendor-federal-de +3 -3
- data/script/vendor-municipal-de +3 -3
- data/script/vendor-nl +4 -1
- data/script/vendor-public-suffix +7 -6
- data/script/vendor-se +3 -3
- data/script/vendor-swot +43 -0
- data/script/vendor-us +8 -5
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +101 -0
- data/spec/gman/country_code_spec.rb +39 -0
- data/spec/gman/domain_list_spec.rb +110 -0
- data/spec/gman/domains_spec.rb +25 -0
- data/spec/gman/identifier_spec.rb +218 -0
- data/spec/gman/importer_spec.rb +236 -0
- data/spec/gman/locality_spec.rb +24 -0
- data/spec/gman_spec.rb +74 -0
- data/spec/spec_helper.rb +31 -0
- metadata +86 -73
- data/CONTRIBUTING.md +0 -22
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -40
- data/test/test_gman.rb +0 -62
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domains.rb +0 -33
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -250
- data/test/test_gman_locality.rb +0 -10
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe 'Gman domains' do
|
4
|
+
let(:resolve_domains?) { ENV['GMAN_RESOLVE_DOMAINS'] == 'true' }
|
5
|
+
let(:importer) { Gman::Importer.new({}) }
|
6
|
+
let(:options) { { skip_dupe: true, skip_resolve: !resolve_domains? } }
|
7
|
+
|
8
|
+
Gman.list.to_h.each do |group, domains|
|
9
|
+
next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
|
10
|
+
|
11
|
+
context "the #{group} group" do
|
12
|
+
it 'only contains valid domains' do
|
13
|
+
invalid_domains = []
|
14
|
+
|
15
|
+
Parallel.each(domains, in_threads: 4) do |domain|
|
16
|
+
next if importer.valid_domain?(domain, options)
|
17
|
+
|
18
|
+
invalid_domains.push domain
|
19
|
+
end
|
20
|
+
|
21
|
+
expect(invalid_domains).to be_empty
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe 'Gman identifier' do
|
4
|
+
subject { Gman.new(domain) }
|
5
|
+
|
6
|
+
let(:domain) { '' }
|
7
|
+
|
8
|
+
it 'parses the dotgov list' do
|
9
|
+
expect(Gman.dotgov_list).to be_a(CSV::Table)
|
10
|
+
expect(Gman.dotgov_list.first).to have_key('Domain Name')
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'locality domains' do
|
14
|
+
context 'a state domain' do
|
15
|
+
let(:domain) { 'state.ak.us' }
|
16
|
+
|
17
|
+
it "knows it's a state" do
|
18
|
+
expect(subject).to be_a_state
|
19
|
+
expect(subject.type).to be(:state)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'knows the state' do
|
23
|
+
expect(subject.state).to eql('AK')
|
24
|
+
end
|
25
|
+
|
26
|
+
it "knows it's not a dotgov" do
|
27
|
+
expect(subject).not_to be_a_dotgov
|
28
|
+
end
|
29
|
+
|
30
|
+
it "know's it's not a city" do
|
31
|
+
expect(subject).not_to be_a_city
|
32
|
+
end
|
33
|
+
|
34
|
+
it "know's it's not a county" do
|
35
|
+
expect(subject).not_to be_a_county
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
context 'a city domain' do
|
40
|
+
let(:domain) { 'ci.champaign.il.us' }
|
41
|
+
|
42
|
+
it "knows it's a city" do
|
43
|
+
expect(subject).to be_a_city
|
44
|
+
expect(subject.type).to be(:city)
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'knows the state' do
|
48
|
+
expect(subject.state).to eql('IL')
|
49
|
+
end
|
50
|
+
|
51
|
+
it "knows it's not a dotgov" do
|
52
|
+
expect(subject).not_to be_a_dotgov
|
53
|
+
end
|
54
|
+
|
55
|
+
it "know's it's not a state" do
|
56
|
+
expect(subject).not_to be_a_state
|
57
|
+
end
|
58
|
+
|
59
|
+
it "know's it's not a county" do
|
60
|
+
expect(subject).not_to be_a_county
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context 'dotgovs' do
|
65
|
+
context 'A federal dotgov' do
|
66
|
+
let(:domain) { 'whitehouse.gov' }
|
67
|
+
|
68
|
+
it "knows it's federal" do
|
69
|
+
expect(subject).to be_federal
|
70
|
+
expect(subject.type).to be(:federal)
|
71
|
+
end
|
72
|
+
|
73
|
+
it "knows it's a dotgov" do
|
74
|
+
expect(subject).to be_a_dotgov
|
75
|
+
end
|
76
|
+
|
77
|
+
it "knows it's not a city" do
|
78
|
+
expect(subject).not_to be_a_city
|
79
|
+
end
|
80
|
+
|
81
|
+
it "knows it's not a state" do
|
82
|
+
expect(subject).not_to be_a_state
|
83
|
+
end
|
84
|
+
|
85
|
+
it "knows it's not a county" do
|
86
|
+
expect(subject).not_to be_a_county
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'knows the state' do
|
90
|
+
expect(subject.state).to eql('DC')
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'knows the city' do
|
94
|
+
expect(subject.city).to eql('Washington')
|
95
|
+
end
|
96
|
+
|
97
|
+
it 'knows the agency' do
|
98
|
+
expect(subject.agency).to eql('Executive Office of the President')
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'knows the organization' do
|
102
|
+
expect(subject.organization).to eql('White House')
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
context 'a state .gov' do
|
107
|
+
let(:domain) { 'illinois.gov' }
|
108
|
+
|
109
|
+
it "knows it's a state" do
|
110
|
+
expect(subject).to be_a_state
|
111
|
+
expect(subject.type).to be(:state)
|
112
|
+
end
|
113
|
+
|
114
|
+
it "knows it's a dotgov" do
|
115
|
+
expect(subject).to be_a_dotgov
|
116
|
+
end
|
117
|
+
|
118
|
+
it "knows it's not a city" do
|
119
|
+
expect(subject).not_to be_a_city
|
120
|
+
end
|
121
|
+
|
122
|
+
it "knows it's not federal" do
|
123
|
+
expect(subject).not_to be_federal
|
124
|
+
end
|
125
|
+
|
126
|
+
it "knows it's not a county" do
|
127
|
+
expect(subject).not_to be_a_county
|
128
|
+
end
|
129
|
+
|
130
|
+
it 'knows the state' do
|
131
|
+
expect(subject.state).to eql('IL')
|
132
|
+
end
|
133
|
+
|
134
|
+
it 'knows the city' do
|
135
|
+
expect(subject.city).to eql('Springfield')
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context 'a county .gov' do
|
140
|
+
let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
|
141
|
+
|
142
|
+
it "knows it's a county" do
|
143
|
+
expect(subject).to be_a_county
|
144
|
+
expect(subject.type).to be(:county)
|
145
|
+
end
|
146
|
+
|
147
|
+
it "knows it's a dotgov" do
|
148
|
+
expect(subject).to be_a_dotgov
|
149
|
+
end
|
150
|
+
|
151
|
+
it "knows it's not a city" do
|
152
|
+
expect(subject).not_to be_a_city
|
153
|
+
end
|
154
|
+
|
155
|
+
it "knows it's not federal" do
|
156
|
+
expect(subject).not_to be_federal
|
157
|
+
end
|
158
|
+
|
159
|
+
it "knows it's not a state" do
|
160
|
+
expect(subject).not_to be_a_state
|
161
|
+
end
|
162
|
+
|
163
|
+
it 'knows the state' do
|
164
|
+
expect(subject.state).to eql('PA')
|
165
|
+
end
|
166
|
+
|
167
|
+
it 'knows the city' do
|
168
|
+
expect(subject.city).to eql('Pittsburgh')
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
context 'a city .gov' do
|
173
|
+
let(:domain) { 'ABERDEENMD.GOV' }
|
174
|
+
|
175
|
+
it "knows it's a city" do
|
176
|
+
expect(subject).to be_a_city
|
177
|
+
expect(subject.type).to be(:city)
|
178
|
+
end
|
179
|
+
|
180
|
+
it 'knows the city' do
|
181
|
+
expect(subject.city).to eql('Aberdeen')
|
182
|
+
end
|
183
|
+
|
184
|
+
it 'knows the state' do
|
185
|
+
expect(subject.state).to eql('MD')
|
186
|
+
end
|
187
|
+
|
188
|
+
it "knows it's a dotgov" do
|
189
|
+
expect(subject).to be_a_dotgov
|
190
|
+
end
|
191
|
+
|
192
|
+
it "know's it's not a state" do
|
193
|
+
expect(subject).not_to be_a_state
|
194
|
+
end
|
195
|
+
|
196
|
+
it "know's it's not a county" do
|
197
|
+
expect(subject).not_to be_a_county
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
context "determining a domain's type" do
|
204
|
+
{
|
205
|
+
unknown: 'cityofperu.org',
|
206
|
+
"Canada municipal": 'acme.ca',
|
207
|
+
"Canada federal": 'canada.ca'
|
208
|
+
}.each do |expected, domain|
|
209
|
+
context "Given the #{domain} domain" do
|
210
|
+
let(:domain) { domain }
|
211
|
+
|
212
|
+
it "know's the domain's type" do
|
213
|
+
expect(subject.type).to eql(expected)
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
@@ -0,0 +1,236 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe Gman::Importer do
|
4
|
+
subject { described_class.new(domains) }
|
5
|
+
|
6
|
+
let(:domains) { { 'test' => ['example.com'] } }
|
7
|
+
let(:stdout) { StringIO.new }
|
8
|
+
let(:logger) { Logger.new(@stdout) }
|
9
|
+
let(:domain_list) { subject.domain_list }
|
10
|
+
|
11
|
+
before do
|
12
|
+
subject.instance_variable_set '@logger', logger
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'inits the domain list' do
|
16
|
+
expect(domain_list).to be_a(Gman::DomainList)
|
17
|
+
expect(domain_list.count).to be(1)
|
18
|
+
expect(domain_list.domains.first).to eql('example.com')
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'inits the logger' do
|
22
|
+
expect(subject.logger).to be_a(Logger)
|
23
|
+
end
|
24
|
+
|
25
|
+
it 'returns the current domain list' do
|
26
|
+
expect(subject.current).to be_a(Gman::DomainList)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'returns the resolver' do
|
30
|
+
expect(subject.resolver).to be_a(Resolv::DNS)
|
31
|
+
end
|
32
|
+
|
33
|
+
context 'domain rejection' do
|
34
|
+
it 'returns false' do
|
35
|
+
expect(subject.reject('example.com', 'reasons')).to be(false)
|
36
|
+
end
|
37
|
+
|
38
|
+
it 'returns the reason why asked' do
|
39
|
+
with_env 'RECONCILING', 'true' do
|
40
|
+
expect(subject.reject('example.com', 'reasons')).to eql('reasons')
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
context 'manipulating the domain list' do
|
46
|
+
context 'normalizing domains' do
|
47
|
+
let(:domains) { { 'test' => ['www.EXAMPLE.com/'] } }
|
48
|
+
|
49
|
+
before { subject.send :normalize_domains! }
|
50
|
+
|
51
|
+
it 'normalizes the domains' do
|
52
|
+
expect(domain_list.domains.first).to eql('example.com')
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context 'removing invalid domains' do
|
57
|
+
let(:domains) { { 'test' => ['foo.github.io', 'example.com'] } }
|
58
|
+
|
59
|
+
before { subject.send :ensure_validity! }
|
60
|
+
|
61
|
+
it 'removes invalid domains' do
|
62
|
+
expect(domain_list.count).to be(1)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
context 'with the current list stubbed' do
|
68
|
+
let(:stubbed_list) { Gman::DomainList.new(path: stubbed_list_path) }
|
69
|
+
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
70
|
+
|
71
|
+
before { subject.instance_variable_set '@current', stubbed_list }
|
72
|
+
|
73
|
+
context 'writing' do
|
74
|
+
before { @current = subject.current.to_s }
|
75
|
+
|
76
|
+
before { subject.send :add_to_current }
|
77
|
+
|
78
|
+
after { File.write(stubbed_list_path, @current) }
|
79
|
+
|
80
|
+
context 'adding domains' do
|
81
|
+
let(:domains) do
|
82
|
+
{ 'test' => ['example.com'], 'test2' => ['github.com'] }
|
83
|
+
end
|
84
|
+
|
85
|
+
it 'adds the domains' do
|
86
|
+
expected = "// test\nexample.com\n\n// test2\ngithub.com"
|
87
|
+
expect(stubbed_file_contents).to match(expected)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context 'importing' do
|
92
|
+
let(:domains) do
|
93
|
+
{
|
94
|
+
'test' => ['www.example.com', 'foo.github.io'],
|
95
|
+
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
96
|
+
}
|
97
|
+
end
|
98
|
+
|
99
|
+
before { subject.import(skip_resolve: true) }
|
100
|
+
|
101
|
+
it 'imports' do
|
102
|
+
expected = "// test\nexample.com\nfoo.github.io"
|
103
|
+
expect(stubbed_file_contents).to match(expected)
|
104
|
+
|
105
|
+
expected = "// test2\ngithub.com\nwhitehouse.gov"
|
106
|
+
expect(stubbed_file_contents).to match(expected)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
context 'domain validation' do
|
113
|
+
let(:domain) { '' }
|
114
|
+
let(:valid?) { subject.send(:ensure_valid, domain) }
|
115
|
+
|
116
|
+
context 'a valid domain' do
|
117
|
+
let(:domain) { 'whitehouse.gov' }
|
118
|
+
|
119
|
+
it 'is valid' do
|
120
|
+
expect(valid?).to be(true)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
{
|
125
|
+
empty: '',
|
126
|
+
blacklisted: 'egovlink.com',
|
127
|
+
invalid: 'foo.invalid',
|
128
|
+
academic: 'harvard.edu',
|
129
|
+
"rejex'd": 'foo.github.io'
|
130
|
+
}.each_key do |type|
|
131
|
+
context "a #{type} domain" do
|
132
|
+
it 'is invalid' do
|
133
|
+
expect(valid?).to be(false)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
context 'duplicate domains' do
|
140
|
+
let(:dupe?) { subject.send(:dupe?, domain) }
|
141
|
+
let(:ensure_not_dupe) { subject.send(:ensure_not_dupe, domain) }
|
142
|
+
|
143
|
+
context 'a unique domain' do
|
144
|
+
let(:domain) { 'gman.com' }
|
145
|
+
|
146
|
+
it 'is not a dupe' do
|
147
|
+
expect(dupe?).to be_falsy
|
148
|
+
expect(ensure_not_dupe).to be_truthy
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
context 'a duplicate domain' do
|
153
|
+
let(:domain) { 'gov' }
|
154
|
+
|
155
|
+
it "knows it's a dupe" do
|
156
|
+
expect(dupe?).to be_truthy
|
157
|
+
expect(ensure_not_dupe).to be_falsy
|
158
|
+
end
|
159
|
+
|
160
|
+
context 'a subdomain' do
|
161
|
+
let(:domain) { 'whitehouse.gov' }
|
162
|
+
|
163
|
+
it "know when a domain's a subdomain of an existing domain" do
|
164
|
+
expect(dupe?).to be_truthy
|
165
|
+
expect(ensure_not_dupe).to be_falsy
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
context 'domain resolution' do
|
172
|
+
let(:resolves?) { subject.domain_resolves?(domain) }
|
173
|
+
let(:ensure_resolves) { subject.send(:ensure_resolves, domain) }
|
174
|
+
|
175
|
+
context 'a valid domain' do
|
176
|
+
let(:domain) { 'github.com' }
|
177
|
+
|
178
|
+
it 'resolves' do
|
179
|
+
expect(resolves?).to be_truthy
|
180
|
+
expect(ensure_resolves).to be_truthy
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
context 'an invalid domain' do
|
185
|
+
let(:domain) { 'foo.invalid' }
|
186
|
+
|
187
|
+
it "doesn't resolve" do
|
188
|
+
expect(resolves?).to be_falsy
|
189
|
+
expect(ensure_resolves).to be_falsy
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
context 'regex checks' do
|
195
|
+
let(:ensure_regex) { subject.send(:ensure_regex, domain) }
|
196
|
+
|
197
|
+
context 'valid domains' do
|
198
|
+
let(:domain) { 'example.com' }
|
199
|
+
|
200
|
+
it 'passes' do
|
201
|
+
expect(ensure_regex).to be_truthy
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
[
|
206
|
+
'home.example.com', 'site.example.com', 'user.example.com',
|
207
|
+
'foo.weebly.com', 'foo.wordpress.com', 'foo.govoffice.com',
|
208
|
+
'foo.govoffice1.com', 'foo.homestead.com', 'foo.wix.com',
|
209
|
+
'foo.blogspot.com', 'foo.tripod.com', 'foo.squarespace.com',
|
210
|
+
'foo.github.io', 'ci.champaign.il.us'
|
211
|
+
].each do |domain|
|
212
|
+
context "a #{domain} domain" do
|
213
|
+
let(:domain) { domain }
|
214
|
+
|
215
|
+
it 'rejects the domain' do
|
216
|
+
expect(ensure_regex).to be_falsy
|
217
|
+
end
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
context 'normalizing domains' do
|
223
|
+
let(:normalized_domain) { subject.normalize_domain(domain) }
|
224
|
+
|
225
|
+
[
|
226
|
+
'http://example.com', 'www.example.com', 'example.com/',
|
227
|
+
'example.com/foo', 'example.com/foo/', 'EXAMPLE.com'
|
228
|
+
].each do |domain|
|
229
|
+
let(:domain) { domain }
|
230
|
+
|
231
|
+
it 'normalizes the domain' do
|
232
|
+
expect(normalized_domain).to eql('example.com')
|
233
|
+
end
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|