gman 7.0.1 → 7.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +3 -0
- data/.ruby-version +1 -1
- data/config/domains.txt +8259 -42
- data/config/vendor/academic.txt +6 -7
- data/config/vendor/dotgovs.csv +5634 -5560
- data/contributing.json +32 -0
- data/gman.gemspec +3 -5
- data/lib/gman.rb +1 -1
- data/lib/gman/domain_list.rb +18 -6
- data/lib/gman/identifier.rb +2 -2
- data/lib/gman/importer.rb +1 -1
- data/lib/gman/version.rb +1 -1
- data/script/cibuild +1 -1
- data/script/dedupe +1 -1
- data/script/vendor-swot +1 -1
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +99 -0
- data/spec/gman/country_code_spec.rb +36 -0
- data/spec/gman/domain_list_spec.rb +108 -0
- data/spec/gman/domains_spec.rb +22 -0
- data/spec/gman/identifier_spec.rb +182 -0
- data/spec/gman/importer_spec.rb +227 -0
- data/spec/gman/locality_spec.rb +22 -0
- data/spec/gman_spec.rb +72 -0
- data/spec/spec_helper.rb +29 -0
- metadata +52 -83
- data/.rake_tasks +0 -0
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -48
- data/test/test_gman.rb +0 -56
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domain_list.rb +0 -112
- data/test/test_gman_domains.rb +0 -32
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -244
- data/test/test_gman_locality.rb +0 -10
@@ -0,0 +1,182 @@
|
|
1
|
+
RSpec.describe 'Gman identifier' do
|
2
|
+
let(:domain) { '' }
|
3
|
+
subject { Gman.new(domain) }
|
4
|
+
|
5
|
+
it 'parses the dotgov list' do
|
6
|
+
expect(Gman.dotgov_list).to be_a(CSV::Table)
|
7
|
+
expect(Gman.dotgov_list.first).to have_key('Domain Name')
|
8
|
+
end
|
9
|
+
|
10
|
+
context 'locality domains' do
|
11
|
+
context 'a state domain' do
|
12
|
+
let(:domain) { 'state.ak.us' }
|
13
|
+
|
14
|
+
it "knows it's a state" do
|
15
|
+
expect(subject).to be_a_state
|
16
|
+
expect(subject.type).to eql(:state)
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'knows the state' do
|
20
|
+
expect(subject.state).to eql('AK')
|
21
|
+
end
|
22
|
+
|
23
|
+
it "knows it's not a dotgov" do
|
24
|
+
expect(subject).to_not be_a_dotgov
|
25
|
+
end
|
26
|
+
|
27
|
+
it "know's it's not a city" do
|
28
|
+
expect(subject).to_not be_a_city
|
29
|
+
end
|
30
|
+
|
31
|
+
it "know's it's not a county" do
|
32
|
+
expect(subject).to_not be_a_county
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
context 'a city domain' do
|
37
|
+
let(:domain) { 'ci.champaign.il.us' }
|
38
|
+
|
39
|
+
it "knows it's a city" do
|
40
|
+
expect(subject).to be_a_city
|
41
|
+
expect(subject.type).to eql(:city)
|
42
|
+
end
|
43
|
+
|
44
|
+
it 'knows the state' do
|
45
|
+
expect(subject.state).to eql('IL')
|
46
|
+
end
|
47
|
+
|
48
|
+
it "knows it's not a dotgov" do
|
49
|
+
expect(subject).to_not be_a_dotgov
|
50
|
+
end
|
51
|
+
|
52
|
+
it "know's it's not a state" do
|
53
|
+
expect(subject).to_not be_a_state
|
54
|
+
end
|
55
|
+
|
56
|
+
it "know's it's not a county" do
|
57
|
+
expect(subject).to_not be_a_county
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
context 'dotgovs' do
|
62
|
+
context 'A federal dotgov' do
|
63
|
+
let(:domain) { 'whitehouse.gov' }
|
64
|
+
|
65
|
+
it "knows it's federal" do
|
66
|
+
expect(subject).to be_federal
|
67
|
+
expect(subject.type).to eql(:federal)
|
68
|
+
end
|
69
|
+
|
70
|
+
it "knows it's a dotgov" do
|
71
|
+
expect(subject).to be_a_dotgov
|
72
|
+
end
|
73
|
+
|
74
|
+
it "knows it's not a city" do
|
75
|
+
expect(subject).to_not be_a_city
|
76
|
+
end
|
77
|
+
|
78
|
+
it "knows it's not a state" do
|
79
|
+
expect(subject).to_not be_a_state
|
80
|
+
end
|
81
|
+
|
82
|
+
it "knows it's not a county" do
|
83
|
+
expect(subject).to_not be_a_county
|
84
|
+
end
|
85
|
+
|
86
|
+
it 'knows the state' do
|
87
|
+
expect(subject.state).to eql('DC')
|
88
|
+
end
|
89
|
+
|
90
|
+
it 'knows the city' do
|
91
|
+
expect(subject.city).to eql('Washington')
|
92
|
+
end
|
93
|
+
|
94
|
+
it 'knows the agency' do
|
95
|
+
expect(subject.agency).to eql('Executive Office of the President')
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
context 'a state .gov' do
|
100
|
+
let(:domain) { 'illinois.gov' }
|
101
|
+
|
102
|
+
it "knows it's a state" do
|
103
|
+
expect(subject).to be_a_state
|
104
|
+
expect(subject.type).to eql(:state)
|
105
|
+
end
|
106
|
+
|
107
|
+
it "knows it's a dotgov" do
|
108
|
+
expect(subject).to be_a_dotgov
|
109
|
+
end
|
110
|
+
|
111
|
+
it "knows it's not a city" do
|
112
|
+
expect(subject).to_not be_a_city
|
113
|
+
end
|
114
|
+
|
115
|
+
it "knows it's not federal" do
|
116
|
+
expect(subject).to_not be_federal
|
117
|
+
end
|
118
|
+
|
119
|
+
it "knows it's not a county" do
|
120
|
+
expect(subject).to_not be_a_county
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'knows the state' do
|
124
|
+
expect(subject.state).to eql('IL')
|
125
|
+
end
|
126
|
+
|
127
|
+
it 'knows the city' do
|
128
|
+
expect(subject.city).to eql('Springfield')
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
context 'a county .gov' do
|
133
|
+
let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
|
134
|
+
|
135
|
+
it "knows it's a county" do
|
136
|
+
expect(subject).to be_a_county
|
137
|
+
expect(subject.type).to eql(:county)
|
138
|
+
end
|
139
|
+
|
140
|
+
it "knows it's a dotgov" do
|
141
|
+
expect(subject).to be_a_dotgov
|
142
|
+
end
|
143
|
+
|
144
|
+
it "knows it's not a city" do
|
145
|
+
expect(subject).to_not be_a_city
|
146
|
+
end
|
147
|
+
|
148
|
+
it "knows it's not federal" do
|
149
|
+
expect(subject).to_not be_federal
|
150
|
+
end
|
151
|
+
|
152
|
+
it "knows it's not a state" do
|
153
|
+
expect(subject).to_not be_a_state
|
154
|
+
end
|
155
|
+
|
156
|
+
it 'knows the state' do
|
157
|
+
expect(subject.state).to eql('PA')
|
158
|
+
end
|
159
|
+
|
160
|
+
it 'knows the city' do
|
161
|
+
expect(subject.city).to eql('Pittsburgh')
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
context "determining a domain's type" do
|
168
|
+
{
|
169
|
+
:unknown => 'cityofperu.org',
|
170
|
+
:"Canada municipal" => 'acme.ca',
|
171
|
+
:"Canada federal" => 'canada.ca'
|
172
|
+
}.each do |expected, domain|
|
173
|
+
context "Given the #{domain} domain" do
|
174
|
+
let(:domain) { domain }
|
175
|
+
|
176
|
+
it "know's the domain's type" do
|
177
|
+
expect(subject.type).to eql(expected)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,227 @@
|
|
1
|
+
RSpec.describe Gman::Importer do
|
2
|
+
let(:domains) { { 'test' => ['example.com'] } }
|
3
|
+
let(:stdout) { StringIO.new }
|
4
|
+
let(:logger) { Logger.new(@stdout) }
|
5
|
+
let(:domain_list) { subject.domain_list }
|
6
|
+
subject { described_class.new(domains) }
|
7
|
+
|
8
|
+
before do
|
9
|
+
subject.instance_variable_set '@logger', logger
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'inits the domain list' do
|
13
|
+
expect(domain_list).to be_a(Gman::DomainList)
|
14
|
+
expect(domain_list.count).to eql(1)
|
15
|
+
expect(domain_list.domains.first).to eql('example.com')
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'inits the logger' do
|
19
|
+
expect(subject.logger).to be_a(Logger)
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'returns the current domain list' do
|
23
|
+
expect(subject.current).to be_a(Gman::DomainList)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'returns the resolver' do
|
27
|
+
expect(subject.resolver).to be_a(Resolv::DNS)
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'domain rejection' do
|
31
|
+
it 'returns false' do
|
32
|
+
expect(subject.reject('example.com', 'reasons')).to eql(false)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'returns the reason why asked' do
|
36
|
+
with_env 'RECONCILING', 'true' do
|
37
|
+
expect(subject.reject('example.com', 'reasons')).to eql('reasons')
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'manipulating the domain list' do
|
43
|
+
context 'normalizing domains' do
|
44
|
+
let(:domains) { { 'test' => ['www.EXAMPLE.com/'] } }
|
45
|
+
before { subject.send :normalize_domains! }
|
46
|
+
|
47
|
+
it 'normalizes the domains' do
|
48
|
+
expect(domain_list.domains.first).to eql('example.com')
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'removing invalid domains' do
|
53
|
+
let(:domains) { { 'test' => ['foo.github.io', 'example.com'] } }
|
54
|
+
before { subject.send :ensure_validity! }
|
55
|
+
|
56
|
+
it 'removes invalid domains' do
|
57
|
+
expect(domain_list.count).to eql(1)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
context 'with the current list stubbed' do
|
63
|
+
let(:stubbed_list) { Gman::DomainList.new(path: stubbed_list_path) }
|
64
|
+
let(:stubbed_file_contents) { File.read(stubbed_list_path) }
|
65
|
+
before { subject.instance_variable_set '@current', stubbed_list }
|
66
|
+
|
67
|
+
context 'writing' do
|
68
|
+
before { @current = subject.current.to_s }
|
69
|
+
before { subject.send :add_to_current }
|
70
|
+
after { File.write(stubbed_list_path, @current) }
|
71
|
+
|
72
|
+
context 'adding domains' do
|
73
|
+
let(:domains) do
|
74
|
+
{ 'test' => ['example.com'], 'test2' => ['github.com'] }
|
75
|
+
end
|
76
|
+
|
77
|
+
it 'adds the domains' do
|
78
|
+
expected = "// test\nexample.com\n\n// test2\ngithub.com"
|
79
|
+
expect(stubbed_file_contents).to match(expected)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
context 'importing' do
|
84
|
+
let(:domains) do
|
85
|
+
{
|
86
|
+
'test' => ['www.example.com', 'foo.github.io'],
|
87
|
+
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
88
|
+
}
|
89
|
+
end
|
90
|
+
before { subject.import(skip_resolve: true) }
|
91
|
+
|
92
|
+
it 'imports' do
|
93
|
+
expected = "// test\nexample.com\nfoo.github.io"
|
94
|
+
expect(stubbed_file_contents).to match(expected)
|
95
|
+
|
96
|
+
expected = "// test2\ngithub.com\nwhitehouse.gov"
|
97
|
+
expect(stubbed_file_contents).to match(expected)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
context 'domain validation' do
|
104
|
+
let(:domain) { '' }
|
105
|
+
let(:valid?) { subject.send(:ensure_valid, domain) }
|
106
|
+
|
107
|
+
context 'a valid domain' do
|
108
|
+
let(:domain) { 'whitehouse.gov' }
|
109
|
+
|
110
|
+
it 'is valid' do
|
111
|
+
expect(valid?).to eql(true)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
{
|
116
|
+
:empty => '',
|
117
|
+
:blacklisted => 'egovlink.com',
|
118
|
+
:invalid => 'foo.invalid',
|
119
|
+
:academic => 'harvard.edu',
|
120
|
+
:"rejex'd" => 'foo.github.io'
|
121
|
+
}.each do |type, _domain|
|
122
|
+
context "a #{type} domain" do
|
123
|
+
it 'is invalid' do
|
124
|
+
expect(valid?).to eql(false)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
context 'duplicate domains' do
|
131
|
+
let(:dupe?) { subject.send(:dupe?, domain) }
|
132
|
+
let(:ensure_not_dupe) { subject.send(:ensure_not_dupe, domain) }
|
133
|
+
|
134
|
+
context 'a unique domain' do
|
135
|
+
let(:domain) { 'gman.com' }
|
136
|
+
|
137
|
+
it 'is not a dupe' do
|
138
|
+
expect(dupe?).to be_falsy
|
139
|
+
expect(ensure_not_dupe).to be_truthy
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
context 'a duplicate domain' do
|
144
|
+
let(:domain) { 'gov' }
|
145
|
+
|
146
|
+
it "knows it's a dupe" do
|
147
|
+
expect(dupe?).to be_truthy
|
148
|
+
expect(ensure_not_dupe).to be_falsy
|
149
|
+
end
|
150
|
+
|
151
|
+
context 'a subdomain' do
|
152
|
+
let(:domain) { 'whitehouse.gov' }
|
153
|
+
|
154
|
+
it "know when a domain's a subdomain of an existing domain" do
|
155
|
+
expect(dupe?).to be_truthy
|
156
|
+
expect(ensure_not_dupe).to be_falsy
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
context 'domain resolution' do
|
163
|
+
let(:resolves?) { subject.domain_resolves?(domain) }
|
164
|
+
let(:ensure_resolves) { subject.send(:ensure_resolves, domain) }
|
165
|
+
|
166
|
+
context 'a valid domain' do
|
167
|
+
let(:domain) { 'github.com' }
|
168
|
+
|
169
|
+
it 'resolves' do
|
170
|
+
expect(resolves?).to be_truthy
|
171
|
+
expect(ensure_resolves).to be_truthy
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
context 'an invalid domain' do
|
176
|
+
let(:domain) { 'foo.invalid' }
|
177
|
+
|
178
|
+
it "doesn't resolve" do
|
179
|
+
expect(resolves?).to be_falsy
|
180
|
+
expect(ensure_resolves).to be_falsy
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
context 'regex checks' do
|
186
|
+
let(:ensure_regex) { subject.send(:ensure_regex, domain) }
|
187
|
+
|
188
|
+
context 'valid domains' do
|
189
|
+
let(:domain) { 'example.com' }
|
190
|
+
|
191
|
+
it 'passes' do
|
192
|
+
expect(ensure_regex).to be_truthy
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
[
|
197
|
+
'home.example.com', 'site.example.com', 'user.example.com',
|
198
|
+
'foo.weebly.com', 'foo.wordpress.com', 'foo.govoffice.com',
|
199
|
+
'foo.govoffice1.com', 'foo.homestead.com', 'foo.wix.com',
|
200
|
+
'foo.blogspot.com', 'foo.tripod.com', 'foo.squarespace.com',
|
201
|
+
'foo.github.io', 'ci.champaign.il.us'
|
202
|
+
].each do |domain|
|
203
|
+
context "a #{domain} domain" do
|
204
|
+
let(:domain) { domain }
|
205
|
+
|
206
|
+
it 'rejects the domain' do
|
207
|
+
expect(ensure_regex).to be_falsy
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
context 'normalizing domains' do
|
214
|
+
let(:normalized_domain) { subject.normalize_domain(domain) }
|
215
|
+
|
216
|
+
[
|
217
|
+
'http://example.com', 'www.example.com', 'example.com/',
|
218
|
+
'example.com/foo', 'example.com/foo/', 'EXAMPLE.com'
|
219
|
+
].each do |domain|
|
220
|
+
let(:domain) { domain }
|
221
|
+
|
222
|
+
it 'normalizes the domain' do
|
223
|
+
expect(normalized_domain).to eql('example.com')
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
RSpec.describe Gman::Locality do
|
2
|
+
context 'valid domains' do
|
3
|
+
['foo.state.il.us', 'ci.foo.il.us'].each do |domain|
|
4
|
+
context "the #{domain} domain" do
|
5
|
+
it 'is valid' do
|
6
|
+
expect(described_class.valid?(domain)).to eql(true)
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
context 'invalid domains' do
|
13
|
+
['state.foo.il.us', 'foo.ci.il.us',
|
14
|
+
'k12.il.us', 'ci.foo.zx.us'].each do |domain|
|
15
|
+
context "the #{domain} domain" do
|
16
|
+
it 'is invalid' do
|
17
|
+
expect(described_class.valid?(domain)).to eql(false)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/spec/gman_spec.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
RSpec.describe Gman do
|
2
|
+
context 'valid domains' do
|
3
|
+
['foo.gov', 'http://foo.mil', 'foo@bar.gc.ca', 'foo.gov.au',
|
4
|
+
'https://www.foo.gouv.fr', 'foo@ci.champaign.il.us',
|
5
|
+
'foo.bar.baz.gov.au', 'foo@bar.gov.uk', 'foo.gov',
|
6
|
+
'foo.fed.us', 'foo.state.il.us', 'state.il.us',
|
7
|
+
'foo@af.mil', 'foo.gov.in'].each do |domain|
|
8
|
+
subject { described_class.new(domain) }
|
9
|
+
|
10
|
+
it "knows #{domain.inspect} is valid government domain" do
|
11
|
+
expect(described_class.valid?(domain)).to eql(true)
|
12
|
+
expect(subject.valid?).to eql(true)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context 'invalid domains' do
|
18
|
+
['foo.bar.com', 'bar@foo.biz', 'http://www.foo.biz',
|
19
|
+
'foo.uk', 'gov', 'foo@k12.champaign.il.us', 'foo@kii.gov.by',
|
20
|
+
'foo', '', nil, ' ', 'foo.city.il.us', 'foo.ci.il.us',
|
21
|
+
'foo.zx.us', 'foo@mail.gov.ua', 'foo@gwu.edu'].each do |domain|
|
22
|
+
subject { described_class.new(domain) }
|
23
|
+
|
24
|
+
it "knows #{domain.inspect} is not a valid government domain" do
|
25
|
+
expect(described_class.valid?(domain)).to eql(false)
|
26
|
+
expect(subject.valid?).to eql(false)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context 'localities' do
|
32
|
+
subject { described_class.new(domain) }
|
33
|
+
|
34
|
+
context 'when given github.gov' do
|
35
|
+
let(:domain) { 'github.gov' }
|
36
|
+
|
37
|
+
it "knows it's not a locality" do
|
38
|
+
expect(subject.locality?).to eql(false)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
context 'when given foo.state.il.us' do
|
43
|
+
let(:domain) { 'foo.state.il.us' }
|
44
|
+
|
45
|
+
it "knows it's a locality" do
|
46
|
+
expect(subject.locality?).to eql(true)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
context 'class methods' do
|
52
|
+
it 'returns the domain list' do
|
53
|
+
expect(described_class.list).to be_a(Gman::DomainList)
|
54
|
+
end
|
55
|
+
|
56
|
+
it 'returns the academic list' do
|
57
|
+
expect(described_class.academic_list).to be_a(Gman::DomainList)
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'returns the config path' do
|
61
|
+
expect(Dir.exist?(described_class.config_path)).to eql(true)
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'returns the list path' do
|
65
|
+
expect(File.exist?(described_class.list_path)).to eql(true)
|
66
|
+
end
|
67
|
+
|
68
|
+
it 'returns the academic list path' do
|
69
|
+
expect(File.exist?(described_class.academic_list_path)).to eql(true)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|