gman 6.0.1 → 7.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +17 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8446 -173
  20. data/config/vendor/academic.txt +8038 -0
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +25 -21
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +123 -41
  31. data/lib/gman/identifier.rb +59 -21
  32. data/lib/gman/importer.rb +39 -40
  33. data/lib/gman/locality.rb +23 -21
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor +1 -1
  43. data/script/vendor-federal-de +3 -3
  44. data/script/vendor-municipal-de +3 -3
  45. data/script/vendor-nl +4 -1
  46. data/script/vendor-public-suffix +7 -6
  47. data/script/vendor-se +3 -3
  48. data/script/vendor-swot +43 -0
  49. data/script/vendor-us +8 -5
  50. data/spec/fixtures/domains.txt +4 -0
  51. data/{test → spec}/fixtures/obama.txt +0 -0
  52. data/spec/gman/bin_spec.rb +101 -0
  53. data/spec/gman/country_code_spec.rb +39 -0
  54. data/spec/gman/domain_list_spec.rb +110 -0
  55. data/spec/gman/domains_spec.rb +25 -0
  56. data/spec/gman/identifier_spec.rb +218 -0
  57. data/spec/gman/importer_spec.rb +236 -0
  58. data/spec/gman/locality_spec.rb +24 -0
  59. data/spec/gman_spec.rb +74 -0
  60. data/spec/spec_helper.rb +31 -0
  61. metadata +86 -73
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -40
  66. data/test/test_gman.rb +0 -62
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domains.rb +0 -33
  70. data/test/test_gman_filter.rb +0 -17
  71. data/test/test_gman_identifier.rb +0 -106
  72. data/test/test_gman_importer.rb +0 -250
  73. data/test/test_gman_locality.rb +0 -10
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman domains' do
4
+ let(:resolve_domains?) { ENV['GMAN_RESOLVE_DOMAINS'] == 'true' }
5
+ let(:importer) { Gman::Importer.new({}) }
6
+ let(:options) { { skip_dupe: true, skip_resolve: !resolve_domains? } }
7
+
8
+ Gman.list.to_h.each do |group, domains|
9
+ next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
10
+
11
+ context "the #{group} group" do
12
+ it 'only contains valid domains' do
13
+ invalid_domains = []
14
+
15
+ Parallel.each(domains, in_threads: 4) do |domain|
16
+ next if importer.valid_domain?(domain, options)
17
+
18
+ invalid_domains.push domain
19
+ end
20
+
21
+ expect(invalid_domains).to be_empty
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,218 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman identifier' do
4
+ subject { Gman.new(domain) }
5
+
6
+ let(:domain) { '' }
7
+
8
+ it 'parses the dotgov list' do
9
+ expect(Gman.dotgov_list).to be_a(CSV::Table)
10
+ expect(Gman.dotgov_list.first).to have_key('Domain Name')
11
+ end
12
+
13
+ context 'locality domains' do
14
+ context 'a state domain' do
15
+ let(:domain) { 'state.ak.us' }
16
+
17
+ it "knows it's a state" do
18
+ expect(subject).to be_a_state
19
+ expect(subject.type).to be(:state)
20
+ end
21
+
22
+ it 'knows the state' do
23
+ expect(subject.state).to eql('AK')
24
+ end
25
+
26
+ it "knows it's not a dotgov" do
27
+ expect(subject).not_to be_a_dotgov
28
+ end
29
+
30
+ it "know's it's not a city" do
31
+ expect(subject).not_to be_a_city
32
+ end
33
+
34
+ it "know's it's not a county" do
35
+ expect(subject).not_to be_a_county
36
+ end
37
+ end
38
+
39
+ context 'a city domain' do
40
+ let(:domain) { 'ci.champaign.il.us' }
41
+
42
+ it "knows it's a city" do
43
+ expect(subject).to be_a_city
44
+ expect(subject.type).to be(:city)
45
+ end
46
+
47
+ it 'knows the state' do
48
+ expect(subject.state).to eql('IL')
49
+ end
50
+
51
+ it "knows it's not a dotgov" do
52
+ expect(subject).not_to be_a_dotgov
53
+ end
54
+
55
+ it "know's it's not a state" do
56
+ expect(subject).not_to be_a_state
57
+ end
58
+
59
+ it "know's it's not a county" do
60
+ expect(subject).not_to be_a_county
61
+ end
62
+ end
63
+
64
+ context 'dotgovs' do
65
+ context 'A federal dotgov' do
66
+ let(:domain) { 'whitehouse.gov' }
67
+
68
+ it "knows it's federal" do
69
+ expect(subject).to be_federal
70
+ expect(subject.type).to be(:federal)
71
+ end
72
+
73
+ it "knows it's a dotgov" do
74
+ expect(subject).to be_a_dotgov
75
+ end
76
+
77
+ it "knows it's not a city" do
78
+ expect(subject).not_to be_a_city
79
+ end
80
+
81
+ it "knows it's not a state" do
82
+ expect(subject).not_to be_a_state
83
+ end
84
+
85
+ it "knows it's not a county" do
86
+ expect(subject).not_to be_a_county
87
+ end
88
+
89
+ it 'knows the state' do
90
+ expect(subject.state).to eql('DC')
91
+ end
92
+
93
+ it 'knows the city' do
94
+ expect(subject.city).to eql('Washington')
95
+ end
96
+
97
+ it 'knows the agency' do
98
+ expect(subject.agency).to eql('Executive Office of the President')
99
+ end
100
+
101
+ it 'knows the organization' do
102
+ expect(subject.organization).to eql('White House')
103
+ end
104
+ end
105
+
106
+ context 'a state .gov' do
107
+ let(:domain) { 'illinois.gov' }
108
+
109
+ it "knows it's a state" do
110
+ expect(subject).to be_a_state
111
+ expect(subject.type).to be(:state)
112
+ end
113
+
114
+ it "knows it's a dotgov" do
115
+ expect(subject).to be_a_dotgov
116
+ end
117
+
118
+ it "knows it's not a city" do
119
+ expect(subject).not_to be_a_city
120
+ end
121
+
122
+ it "knows it's not federal" do
123
+ expect(subject).not_to be_federal
124
+ end
125
+
126
+ it "knows it's not a county" do
127
+ expect(subject).not_to be_a_county
128
+ end
129
+
130
+ it 'knows the state' do
131
+ expect(subject.state).to eql('IL')
132
+ end
133
+
134
+ it 'knows the city' do
135
+ expect(subject.city).to eql('Springfield')
136
+ end
137
+ end
138
+
139
+ context 'a county .gov' do
140
+ let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
141
+
142
+ it "knows it's a county" do
143
+ expect(subject).to be_a_county
144
+ expect(subject.type).to be(:county)
145
+ end
146
+
147
+ it "knows it's a dotgov" do
148
+ expect(subject).to be_a_dotgov
149
+ end
150
+
151
+ it "knows it's not a city" do
152
+ expect(subject).not_to be_a_city
153
+ end
154
+
155
+ it "knows it's not federal" do
156
+ expect(subject).not_to be_federal
157
+ end
158
+
159
+ it "knows it's not a state" do
160
+ expect(subject).not_to be_a_state
161
+ end
162
+
163
+ it 'knows the state' do
164
+ expect(subject.state).to eql('PA')
165
+ end
166
+
167
+ it 'knows the city' do
168
+ expect(subject.city).to eql('Pittsburgh')
169
+ end
170
+ end
171
+
172
+ context 'a city .gov' do
173
+ let(:domain) { 'ABERDEENMD.GOV' }
174
+
175
+ it "knows it's a city" do
176
+ expect(subject).to be_a_city
177
+ expect(subject.type).to be(:city)
178
+ end
179
+
180
+ it 'knows the city' do
181
+ expect(subject.city).to eql('Aberdeen')
182
+ end
183
+
184
+ it 'knows the state' do
185
+ expect(subject.state).to eql('MD')
186
+ end
187
+
188
+ it "knows it's a dotgov" do
189
+ expect(subject).to be_a_dotgov
190
+ end
191
+
192
+ it "know's it's not a state" do
193
+ expect(subject).not_to be_a_state
194
+ end
195
+
196
+ it "know's it's not a county" do
197
+ expect(subject).not_to be_a_county
198
+ end
199
+ end
200
+ end
201
+ end
202
+
203
+ context "determining a domain's type" do
204
+ {
205
+ unknown: 'cityofperu.org',
206
+ "Canada municipal": 'acme.ca',
207
+ "Canada federal": 'canada.ca'
208
+ }.each do |expected, domain|
209
+ context "Given the #{domain} domain" do
210
+ let(:domain) { domain }
211
+
212
+ it "know's the domain's type" do
213
+ expect(subject.type).to eql(expected)
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,236 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Gman::Importer do
4
+ subject { described_class.new(domains) }
5
+
6
+ let(:domains) { { 'test' => ['example.com'] } }
7
+ let(:stdout) { StringIO.new }
8
+ let(:logger) { Logger.new(@stdout) }
9
+ let(:domain_list) { subject.domain_list }
10
+
11
+ before do
12
+ subject.instance_variable_set '@logger', logger
13
+ end
14
+
15
+ it 'inits the domain list' do
16
+ expect(domain_list).to be_a(Gman::DomainList)
17
+ expect(domain_list.count).to be(1)
18
+ expect(domain_list.domains.first).to eql('example.com')
19
+ end
20
+
21
+ it 'inits the logger' do
22
+ expect(subject.logger).to be_a(Logger)
23
+ end
24
+
25
+ it 'returns the current domain list' do
26
+ expect(subject.current).to be_a(Gman::DomainList)
27
+ end
28
+
29
+ it 'returns the resolver' do
30
+ expect(subject.resolver).to be_a(Resolv::DNS)
31
+ end
32
+
33
+ context 'domain rejection' do
34
+ it 'returns false' do
35
+ expect(subject.reject('example.com', 'reasons')).to be(false)
36
+ end
37
+
38
+ it 'returns the reason why asked' do
39
+ with_env 'RECONCILING', 'true' do
40
+ expect(subject.reject('example.com', 'reasons')).to eql('reasons')
41
+ end
42
+ end
43
+ end
44
+
45
+ context 'manipulating the domain list' do
46
+ context 'normalizing domains' do
47
+ let(:domains) { { 'test' => ['www.EXAMPLE.com/'] } }
48
+
49
+ before { subject.send :normalize_domains! }
50
+
51
+ it 'normalizes the domains' do
52
+ expect(domain_list.domains.first).to eql('example.com')
53
+ end
54
+ end
55
+
56
+ context 'removing invalid domains' do
57
+ let(:domains) { { 'test' => ['foo.github.io', 'example.com'] } }
58
+
59
+ before { subject.send :ensure_validity! }
60
+
61
+ it 'removes invalid domains' do
62
+ expect(domain_list.count).to be(1)
63
+ end
64
+ end
65
+ end
66
+
67
+ context 'with the current list stubbed' do
68
+ let(:stubbed_list) { Gman::DomainList.new(path: stubbed_list_path) }
69
+ let(:stubbed_file_contents) { File.read(stubbed_list_path) }
70
+
71
+ before { subject.instance_variable_set '@current', stubbed_list }
72
+
73
+ context 'writing' do
74
+ before { @current = subject.current.to_s }
75
+
76
+ before { subject.send :add_to_current }
77
+
78
+ after { File.write(stubbed_list_path, @current) }
79
+
80
+ context 'adding domains' do
81
+ let(:domains) do
82
+ { 'test' => ['example.com'], 'test2' => ['github.com'] }
83
+ end
84
+
85
+ it 'adds the domains' do
86
+ expected = "// test\nexample.com\n\n// test2\ngithub.com"
87
+ expect(stubbed_file_contents).to match(expected)
88
+ end
89
+ end
90
+
91
+ context 'importing' do
92
+ let(:domains) do
93
+ {
94
+ 'test' => ['www.example.com', 'foo.github.io'],
95
+ 'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
96
+ }
97
+ end
98
+
99
+ before { subject.import(skip_resolve: true) }
100
+
101
+ it 'imports' do
102
+ expected = "// test\nexample.com\nfoo.github.io"
103
+ expect(stubbed_file_contents).to match(expected)
104
+
105
+ expected = "// test2\ngithub.com\nwhitehouse.gov"
106
+ expect(stubbed_file_contents).to match(expected)
107
+ end
108
+ end
109
+ end
110
+ end
111
+
112
+ context 'domain validation' do
113
+ let(:domain) { '' }
114
+ let(:valid?) { subject.send(:ensure_valid, domain) }
115
+
116
+ context 'a valid domain' do
117
+ let(:domain) { 'whitehouse.gov' }
118
+
119
+ it 'is valid' do
120
+ expect(valid?).to be(true)
121
+ end
122
+ end
123
+
124
+ {
125
+ empty: '',
126
+ blacklisted: 'egovlink.com',
127
+ invalid: 'foo.invalid',
128
+ academic: 'harvard.edu',
129
+ "rejex'd": 'foo.github.io'
130
+ }.each_key do |type|
131
+ context "a #{type} domain" do
132
+ it 'is invalid' do
133
+ expect(valid?).to be(false)
134
+ end
135
+ end
136
+ end
137
+ end
138
+
139
+ context 'duplicate domains' do
140
+ let(:dupe?) { subject.send(:dupe?, domain) }
141
+ let(:ensure_not_dupe) { subject.send(:ensure_not_dupe, domain) }
142
+
143
+ context 'a unique domain' do
144
+ let(:domain) { 'gman.com' }
145
+
146
+ it 'is not a dupe' do
147
+ expect(dupe?).to be_falsy
148
+ expect(ensure_not_dupe).to be_truthy
149
+ end
150
+ end
151
+
152
+ context 'a duplicate domain' do
153
+ let(:domain) { 'gov' }
154
+
155
+ it "knows it's a dupe" do
156
+ expect(dupe?).to be_truthy
157
+ expect(ensure_not_dupe).to be_falsy
158
+ end
159
+
160
+ context 'a subdomain' do
161
+ let(:domain) { 'whitehouse.gov' }
162
+
163
+ it "know when a domain's a subdomain of an existing domain" do
164
+ expect(dupe?).to be_truthy
165
+ expect(ensure_not_dupe).to be_falsy
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ context 'domain resolution' do
172
+ let(:resolves?) { subject.domain_resolves?(domain) }
173
+ let(:ensure_resolves) { subject.send(:ensure_resolves, domain) }
174
+
175
+ context 'a valid domain' do
176
+ let(:domain) { 'github.com' }
177
+
178
+ it 'resolves' do
179
+ expect(resolves?).to be_truthy
180
+ expect(ensure_resolves).to be_truthy
181
+ end
182
+ end
183
+
184
+ context 'an invalid domain' do
185
+ let(:domain) { 'foo.invalid' }
186
+
187
+ it "doesn't resolve" do
188
+ expect(resolves?).to be_falsy
189
+ expect(ensure_resolves).to be_falsy
190
+ end
191
+ end
192
+ end
193
+
194
+ context 'regex checks' do
195
+ let(:ensure_regex) { subject.send(:ensure_regex, domain) }
196
+
197
+ context 'valid domains' do
198
+ let(:domain) { 'example.com' }
199
+
200
+ it 'passes' do
201
+ expect(ensure_regex).to be_truthy
202
+ end
203
+ end
204
+
205
+ [
206
+ 'home.example.com', 'site.example.com', 'user.example.com',
207
+ 'foo.weebly.com', 'foo.wordpress.com', 'foo.govoffice.com',
208
+ 'foo.govoffice1.com', 'foo.homestead.com', 'foo.wix.com',
209
+ 'foo.blogspot.com', 'foo.tripod.com', 'foo.squarespace.com',
210
+ 'foo.github.io', 'ci.champaign.il.us'
211
+ ].each do |domain|
212
+ context "a #{domain} domain" do
213
+ let(:domain) { domain }
214
+
215
+ it 'rejects the domain' do
216
+ expect(ensure_regex).to be_falsy
217
+ end
218
+ end
219
+ end
220
+ end
221
+
222
+ context 'normalizing domains' do
223
+ let(:normalized_domain) { subject.normalize_domain(domain) }
224
+
225
+ [
226
+ 'http://example.com', 'www.example.com', 'example.com/',
227
+ 'example.com/foo', 'example.com/foo/', 'EXAMPLE.com'
228
+ ].each do |domain|
229
+ let(:domain) { domain }
230
+
231
+ it 'normalizes the domain' do
232
+ expect(normalized_domain).to eql('example.com')
233
+ end
234
+ end
235
+ end
236
+ end