gman 7.0.1 → 7.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +3 -0
- data/.ruby-version +1 -1
- data/config/domains.txt +8259 -42
- data/config/vendor/academic.txt +6 -7
- data/config/vendor/dotgovs.csv +5634 -5560
- data/contributing.json +32 -0
- data/gman.gemspec +3 -5
- data/lib/gman.rb +1 -1
- data/lib/gman/domain_list.rb +18 -6
- data/lib/gman/identifier.rb +2 -2
- data/lib/gman/importer.rb +1 -1
- data/lib/gman/version.rb +1 -1
- data/script/cibuild +1 -1
- data/script/dedupe +1 -1
- data/script/vendor-swot +1 -1
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +99 -0
- data/spec/gman/country_code_spec.rb +36 -0
- data/spec/gman/domain_list_spec.rb +108 -0
- data/spec/gman/domains_spec.rb +22 -0
- data/spec/gman/identifier_spec.rb +182 -0
- data/spec/gman/importer_spec.rb +227 -0
- data/spec/gman/locality_spec.rb +22 -0
- data/spec/gman_spec.rb +72 -0
- data/spec/spec_helper.rb +29 -0
- metadata +52 -83
- data/.rake_tasks +0 -0
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -48
- data/test/test_gman.rb +0 -56
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domain_list.rb +0 -112
- data/test/test_gman_domains.rb +0 -32
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -244
- data/test/test_gman_locality.rb +0 -10
@@ -0,0 +1,182 @@
|
|
1
|
+
RSpec.describe 'Gman identifier' do
|
2
|
+
let(:domain) { '' }
|
3
|
+
subject { Gman.new(domain) }
|
4
|
+
|
5
|
+
it 'parses the dotgov list' do
|
6
|
+
expect(Gman.dotgov_list).to be_a(CSV::Table)
|
7
|
+
expect(Gman.dotgov_list.first).to have_key('Domain Name')
|
8
|
+
end
|
9
|
+
|
10
|
+
context 'locality domains' do
|
11
|
+
context 'a state domain' do
|
12
|
+
let(:domain) { 'state.ak.us' }
|
13
|
+
|
14
|
+
it "knows it's a state" do
|
15
|
+
expect(subject).to be_a_state
|
16
|
+
expect(subject.type).to eql(:state)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'knows the state' do
|
20
|
+
expect(subject.state).to eql('AK')
|
21
|
+
end
|
22
|
+
|
23
|
+
it "knows it's not a dotgov" do
|
24
|
+
expect(subject).to_not be_a_dotgov
|
25
|
+
end
|
26
|
+
|
27
|
+
it "know's it's not a city" do
|
28
|
+
expect(subject).to_not be_a_city
|
29
|
+
end
|
30
|
+
|
31
|
+
it "know's it's not a county" do
|
32
|
+
expect(subject).to_not be_a_county
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context 'a city domain' do
|
37
|
+
let(:domain) { 'ci.champaign.il.us' }
|
38
|
+
|
39
|
+
it "knows it's a city" do
|
40
|
+
expect(subject).to be_a_city
|
41
|
+
expect(subject.type).to eql(:city)
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'knows the state' do
|
45
|
+
expect(subject.state).to eql('IL')
|
46
|
+
end
|
47
|
+
|
48
|
+
it "knows it's not a dotgov" do
|
49
|
+
expect(subject).to_not be_a_dotgov
|
50
|
+
end
|
51
|
+
|
52
|
+
it "know's it's not a state" do
|
53
|
+
expect(subject).to_not be_a_state
|
54
|
+
end
|
55
|
+
|
56
|
+
it "know's it's not a county" do
|
57
|
+
expect(subject).to_not be_a_county
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
context 'dotgovs' do
|
62
|
+
context 'A federal dotgov' do
|
63
|
+
let(:domain) { 'whitehouse.gov' }
|
64
|
+
|
65
|
+
it "knows it's federal" do
|
66
|
+
expect(subject).to be_federal
|
67
|
+
expect(subject.type).to eql(:federal)
|
68
|
+
end
|
69
|
+
|
70
|
+
it "knows it's a dotgov" do
|
71
|
+
expect(subject).to be_a_dotgov
|
72
|
+
end
|
73
|
+
|
74
|
+
it "knows it's not a city" do
|
75
|
+
expect(subject).to_not be_a_city
|
76
|
+
end
|
77
|
+
|
78
|
+
it "knows it's not a state" do
|
79
|
+
expect(subject).to_not be_a_state
|
80
|
+
end
|
81
|
+
|
82
|
+
it "knows it's not a county" do
|
83
|
+
expect(subject).to_not be_a_county
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'knows the state' do
|
87
|
+
expect(subject.state).to eql('DC')
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'knows the city' do
|
91
|
+
expect(subject.city).to eql('Washington')
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'knows the agency' do
|
95
|
+
expect(subject.agency).to eql('Executive Office of the President')
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context 'a state .gov' do
|
100
|
+
let(:domain) { 'illinois.gov' }
|
101
|
+
|
102
|
+
it "knows it's a state" do
|
103
|
+
expect(subject).to be_a_state
|
104
|
+
expect(subject.type).to eql(:state)
|
105
|
+
end
|
106
|
+
|
107
|
+
it "knows it's a dotgov" do
|
108
|
+
expect(subject).to be_a_dotgov
|
109
|
+
end
|
110
|
+
|
111
|
+
it "knows it's not a city" do
|
112
|
+
expect(subject).to_not be_a_city
|
113
|
+
end
|
114
|
+
|
115
|
+
it "knows it's not federal" do
|
116
|
+
expect(subject).to_not be_federal
|
117
|
+
end
|
118
|
+
|
119
|
+
it "knows it's not a county" do
|
120
|
+
expect(subject).to_not be_a_county
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'knows the state' do
|
124
|
+
expect(subject.state).to eql('IL')
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'knows the city' do
|
128
|
+
expect(subject.city).to eql('Springfield')
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
context 'a county .gov' do
|
133
|
+
let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
|
134
|
+
|
135
|
+
it "knows it's a county" do
|
136
|
+
expect(subject).to be_a_county
|
137
|
+
expect(subject.type).to eql(:county)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "knows it's a dotgov" do
|
141
|
+
expect(subject).to be_a_dotgov
|
142
|
+
end
|
143
|
+
|
144
|
+
it "knows it's not a city" do
|
145
|
+
expect(subject).to_not be_a_city
|
146
|
+
end
|
147
|
+
|
148
|
+
it "knows it's not federal" do
|
149
|
+
expect(subject).to_not be_federal
|
150
|
+
end
|
151
|
+
|
152
|
+
it "knows it's not a state" do
|
153
|
+
expect(subject).to_not be_a_state
|
154
|
+
end
|
155
|
+
|
156
|
+
it 'knows the state' do
|
157
|
+
expect(subject.state).to eql('PA')
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'knows the city' do
|
161
|
+
expect(subject.city).to eql('Pittsburgh')
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
context "determining a domain's type" do
|
168
|
+
{
|
169
|
+
:unknown => 'cityofperu.org',
|
170
|
+
:"Canada municipal" => 'acme.ca',
|
171
|
+
:"Canada federal" => 'canada.ca'
|
172
|
+
}.each do |expected, domain|
|
173
|
+
context "Given the #{domain} domain" do
|
174
|
+
let(:domain) { domain }
|
175
|
+
|
176
|
+
it "know's the domain's type" do
|
177
|
+
expect(subject.type).to eql(expected)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
RSpec.describe Gman::Importer do
|
2
|
+
let(:domains) { { 'test' => ['example.com'] } }
|
3
|
+
let(:stdout) { StringIO.new }
|
4
|
+
let(:logger) { Logger.new(@stdout) }
|
5
|
+
let(:domain_list) { subject.domain_list }
|
6
|
+
subject { described_class.new(domains) }
|
7
|
+
|
8
|
+
before do
|
9
|
+
subject.instance_variable_set '@logger', logger
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'inits the domain list' do
|
13
|
+
expect(domain_list).to be_a(Gman::DomainList)
|
14
|
+
expect(domain_list.count).to eql(1)
|
15
|
+
expect(domain_list.domains.first).to eql('example.com')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'inits the logger' do
|
19
|
+
expect(subject.logger).to be_a(Logger)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'returns the current domain list' do
|
23
|
+
expect(subject.current).to be_a(Gman::DomainList)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns the resolver' do
|
27
|
+
expect(subject.resolver).to be_a(Resolv::DNS)
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'domain rejection' do
|
31
|
+
it 'returns false' do
|
32
|
+
expect(subject.reject('example.com', 'reasons')).to eql(false)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns the reason why asked' do
|
36
|
+
with_env 'RECONCILING', 'true' do
|
37
|
+
expect(subject.reject('example.com', 'reasons')).to eql('reasons')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'manipulating the domain list' do
|
43
|
+
context 'normalizing domains' do
|
44
|
+
let(:domains) { { 'test' => ['www.EXAMPLE.com/'] } }
|
45
|
+
before { subject.send :normalize_domains! }
|
46
|
+
|
47
|
+
it 'normalizes the domains' do
|
48
|
+
expect(domain_list.domains.first).to eql('example.com')
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'removing invalid domains' do
|
53
|
+
let(:domains) { { 'test' => ['foo.github.io', 'example.com'] } }
|
54
|
+
before { subject.send :ensure_validity! }
|
55
|
+
|
56
|
+
it 'removes invalid domains' do
|
57
|
+
expect(domain_list.count).to eql(1)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
context 'with the current list stubbed' do
|
63
|
+
let(:stubbed_list) { Gman::DomainList.new(path: stubbed_list_path) }
|
64
|
+
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
65
|
+
before { subject.instance_variable_set '@current', stubbed_list }
|
66
|
+
|
67
|
+
context 'writing' do
|
68
|
+
before { @current = subject.current.to_s }
|
69
|
+
before { subject.send :add_to_current }
|
70
|
+
after { File.write(stubbed_list_path, @current) }
|
71
|
+
|
72
|
+
context 'adding domains' do
|
73
|
+
let(:domains) do
|
74
|
+
{ 'test' => ['example.com'], 'test2' => ['github.com'] }
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'adds the domains' do
|
78
|
+
expected = "// test\nexample.com\n\n// test2\ngithub.com"
|
79
|
+
expect(stubbed_file_contents).to match(expected)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context 'importing' do
|
84
|
+
let(:domains) do
|
85
|
+
{
|
86
|
+
'test' => ['www.example.com', 'foo.github.io'],
|
87
|
+
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
88
|
+
}
|
89
|
+
end
|
90
|
+
before { subject.import(skip_resolve: true) }
|
91
|
+
|
92
|
+
it 'imports' do
|
93
|
+
expected = "// test\nexample.com\nfoo.github.io"
|
94
|
+
expect(stubbed_file_contents).to match(expected)
|
95
|
+
|
96
|
+
expected = "// test2\ngithub.com\nwhitehouse.gov"
|
97
|
+
expect(stubbed_file_contents).to match(expected)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
context 'domain validation' do
|
104
|
+
let(:domain) { '' }
|
105
|
+
let(:valid?) { subject.send(:ensure_valid, domain) }
|
106
|
+
|
107
|
+
context 'a valid domain' do
|
108
|
+
let(:domain) { 'whitehouse.gov' }
|
109
|
+
|
110
|
+
it 'is valid' do
|
111
|
+
expect(valid?).to eql(true)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
{
|
116
|
+
:empty => '',
|
117
|
+
:blacklisted => 'egovlink.com',
|
118
|
+
:invalid => 'foo.invalid',
|
119
|
+
:academic => 'harvard.edu',
|
120
|
+
:"rejex'd" => 'foo.github.io'
|
121
|
+
}.each do |type, _domain|
|
122
|
+
context "a #{type} domain" do
|
123
|
+
it 'is invalid' do
|
124
|
+
expect(valid?).to eql(false)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
context 'duplicate domains' do
|
131
|
+
let(:dupe?) { subject.send(:dupe?, domain) }
|
132
|
+
let(:ensure_not_dupe) { subject.send(:ensure_not_dupe, domain) }
|
133
|
+
|
134
|
+
context 'a unique domain' do
|
135
|
+
let(:domain) { 'gman.com' }
|
136
|
+
|
137
|
+
it 'is not a dupe' do
|
138
|
+
expect(dupe?).to be_falsy
|
139
|
+
expect(ensure_not_dupe).to be_truthy
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
context 'a duplicate domain' do
|
144
|
+
let(:domain) { 'gov' }
|
145
|
+
|
146
|
+
it "knows it's a dupe" do
|
147
|
+
expect(dupe?).to be_truthy
|
148
|
+
expect(ensure_not_dupe).to be_falsy
|
149
|
+
end
|
150
|
+
|
151
|
+
context 'a subdomain' do
|
152
|
+
let(:domain) { 'whitehouse.gov' }
|
153
|
+
|
154
|
+
it "know when a domain's a subdomain of an existing domain" do
|
155
|
+
expect(dupe?).to be_truthy
|
156
|
+
expect(ensure_not_dupe).to be_falsy
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
context 'domain resolution' do
|
163
|
+
let(:resolves?) { subject.domain_resolves?(domain) }
|
164
|
+
let(:ensure_resolves) { subject.send(:ensure_resolves, domain) }
|
165
|
+
|
166
|
+
context 'a valid domain' do
|
167
|
+
let(:domain) { 'github.com' }
|
168
|
+
|
169
|
+
it 'resolves' do
|
170
|
+
expect(resolves?).to be_truthy
|
171
|
+
expect(ensure_resolves).to be_truthy
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
context 'an invalid domain' do
|
176
|
+
let(:domain) { 'foo.invalid' }
|
177
|
+
|
178
|
+
it "doesn't resolve" do
|
179
|
+
expect(resolves?).to be_falsy
|
180
|
+
expect(ensure_resolves).to be_falsy
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
context 'regex checks' do
|
186
|
+
let(:ensure_regex) { subject.send(:ensure_regex, domain) }
|
187
|
+
|
188
|
+
context 'valid domains' do
|
189
|
+
let(:domain) { 'example.com' }
|
190
|
+
|
191
|
+
it 'passes' do
|
192
|
+
expect(ensure_regex).to be_truthy
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
[
|
197
|
+
'home.example.com', 'site.example.com', 'user.example.com',
|
198
|
+
'foo.weebly.com', 'foo.wordpress.com', 'foo.govoffice.com',
|
199
|
+
'foo.govoffice1.com', 'foo.homestead.com', 'foo.wix.com',
|
200
|
+
'foo.blogspot.com', 'foo.tripod.com', 'foo.squarespace.com',
|
201
|
+
'foo.github.io', 'ci.champaign.il.us'
|
202
|
+
].each do |domain|
|
203
|
+
context "a #{domain} domain" do
|
204
|
+
let(:domain) { domain }
|
205
|
+
|
206
|
+
it 'rejects the domain' do
|
207
|
+
expect(ensure_regex).to be_falsy
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
context 'normalizing domains' do
|
214
|
+
let(:normalized_domain) { subject.normalize_domain(domain) }
|
215
|
+
|
216
|
+
[
|
217
|
+
'http://example.com', 'www.example.com', 'example.com/',
|
218
|
+
'example.com/foo', 'example.com/foo/', 'EXAMPLE.com'
|
219
|
+
].each do |domain|
|
220
|
+
let(:domain) { domain }
|
221
|
+
|
222
|
+
it 'normalizes the domain' do
|
223
|
+
expect(normalized_domain).to eql('example.com')
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
RSpec.describe Gman::Locality do
|
2
|
+
context 'valid domains' do
|
3
|
+
['foo.state.il.us', 'ci.foo.il.us'].each do |domain|
|
4
|
+
context "the #{domain} domain" do
|
5
|
+
it 'is valid' do
|
6
|
+
expect(described_class.valid?(domain)).to eql(true)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'invalid domains' do
|
13
|
+
['state.foo.il.us', 'foo.ci.il.us',
|
14
|
+
'k12.il.us', 'ci.foo.zx.us'].each do |domain|
|
15
|
+
context "the #{domain} domain" do
|
16
|
+
it 'is invalid' do
|
17
|
+
expect(described_class.valid?(domain)).to eql(false)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/spec/gman_spec.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
RSpec.describe Gman do
|
2
|
+
context 'valid domains' do
|
3
|
+
['foo.gov', 'http://foo.mil', 'foo@bar.gc.ca', 'foo.gov.au',
|
4
|
+
'https://www.foo.gouv.fr', 'foo@ci.champaign.il.us',
|
5
|
+
'foo.bar.baz.gov.au', 'foo@bar.gov.uk', 'foo.gov',
|
6
|
+
'foo.fed.us', 'foo.state.il.us', 'state.il.us',
|
7
|
+
'foo@af.mil', 'foo.gov.in'].each do |domain|
|
8
|
+
subject { described_class.new(domain) }
|
9
|
+
|
10
|
+
it "knows #{domain.inspect} is valid government domain" do
|
11
|
+
expect(described_class.valid?(domain)).to eql(true)
|
12
|
+
expect(subject.valid?).to eql(true)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context 'invalid domains' do
|
18
|
+
['foo.bar.com', 'bar@foo.biz', 'http://www.foo.biz',
|
19
|
+
'foo.uk', 'gov', 'foo@k12.champaign.il.us', 'foo@kii.gov.by',
|
20
|
+
'foo', '', nil, ' ', 'foo.city.il.us', 'foo.ci.il.us',
|
21
|
+
'foo.zx.us', 'foo@mail.gov.ua', 'foo@gwu.edu'].each do |domain|
|
22
|
+
subject { described_class.new(domain) }
|
23
|
+
|
24
|
+
it "knows #{domain.inspect} is not a valid government domain" do
|
25
|
+
expect(described_class.valid?(domain)).to eql(false)
|
26
|
+
expect(subject.valid?).to eql(false)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'localities' do
|
32
|
+
subject { described_class.new(domain) }
|
33
|
+
|
34
|
+
context 'when given github.gov' do
|
35
|
+
let(:domain) { 'github.gov' }
|
36
|
+
|
37
|
+
it "knows it's not a locality" do
|
38
|
+
expect(subject.locality?).to eql(false)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'when given foo.state.il.us' do
|
43
|
+
let(:domain) { 'foo.state.il.us' }
|
44
|
+
|
45
|
+
it "knows it's a locality" do
|
46
|
+
expect(subject.locality?).to eql(true)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
context 'class methods' do
|
52
|
+
it 'returns the domain list' do
|
53
|
+
expect(described_class.list).to be_a(Gman::DomainList)
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'returns the academic list' do
|
57
|
+
expect(described_class.academic_list).to be_a(Gman::DomainList)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns the config path' do
|
61
|
+
expect(Dir.exist?(described_class.config_path)).to eql(true)
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'returns the list path' do
|
65
|
+
expect(File.exist?(described_class.list_path)).to eql(true)
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'returns the academic list path' do
|
69
|
+
expect(File.exist?(described_class.academic_list_path)).to eql(true)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|