gman 6.0.1 → 7.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +17 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8446 -173
  20. data/config/vendor/academic.txt +8038 -0
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +25 -21
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +123 -41
  31. data/lib/gman/identifier.rb +59 -21
  32. data/lib/gman/importer.rb +39 -40
  33. data/lib/gman/locality.rb +23 -21
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor +1 -1
  43. data/script/vendor-federal-de +3 -3
  44. data/script/vendor-municipal-de +3 -3
  45. data/script/vendor-nl +4 -1
  46. data/script/vendor-public-suffix +7 -6
  47. data/script/vendor-se +3 -3
  48. data/script/vendor-swot +43 -0
  49. data/script/vendor-us +8 -5
  50. data/spec/fixtures/domains.txt +4 -0
  51. data/{test → spec}/fixtures/obama.txt +0 -0
  52. data/spec/gman/bin_spec.rb +101 -0
  53. data/spec/gman/country_code_spec.rb +39 -0
  54. data/spec/gman/domain_list_spec.rb +110 -0
  55. data/spec/gman/domains_spec.rb +25 -0
  56. data/spec/gman/identifier_spec.rb +218 -0
  57. data/spec/gman/importer_spec.rb +236 -0
  58. data/spec/gman/locality_spec.rb +24 -0
  59. data/spec/gman_spec.rb +74 -0
  60. data/spec/spec_helper.rb +31 -0
  61. metadata +86 -73
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -40
  66. data/test/test_gman.rb +0 -62
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domains.rb +0 -33
  70. data/test/test_gman_filter.rb +0 -17
  71. data/test/test_gman_identifier.rb +0 -106
  72. data/test/test_gman_importer.rb +0 -250
  73. data/test/test_gman_locality.rb +0 -10
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman domains' do
4
+ let(:resolve_domains?) { ENV['GMAN_RESOLVE_DOMAINS'] == 'true' }
5
+ let(:importer) { Gman::Importer.new({}) }
6
+ let(:options) { { skip_dupe: true, skip_resolve: !resolve_domains? } }
7
+
8
+ Gman.list.to_h.each do |group, domains|
9
+ next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
10
+
11
+ context "the #{group} group" do
12
+ it 'only contains valid domains' do
13
+ invalid_domains = []
14
+
15
+ Parallel.each(domains, in_threads: 4) do |domain|
16
+ next if importer.valid_domain?(domain, options)
17
+
18
+ invalid_domains.push domain
19
+ end
20
+
21
+ expect(invalid_domains).to be_empty
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,218 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman identifier' do
4
+ subject { Gman.new(domain) }
5
+
6
+ let(:domain) { '' }
7
+
8
+ it 'parses the dotgov list' do
9
+ expect(Gman.dotgov_list).to be_a(CSV::Table)
10
+ expect(Gman.dotgov_list.first).to have_key('Domain Name')
11
+ end
12
+
13
+ context 'locality domains' do
14
+ context 'a state domain' do
15
+ let(:domain) { 'state.ak.us' }
16
+
17
+ it "knows it's a state" do
18
+ expect(subject).to be_a_state
19
+ expect(subject.type).to be(:state)
20
+ end
21
+
22
+ it 'knows the state' do
23
+ expect(subject.state).to eql('AK')
24
+ end
25
+
26
+ it "knows it's not a dotgov" do
27
+ expect(subject).not_to be_a_dotgov
28
+ end
29
+
30
+ it "know's it's not a city" do
31
+ expect(subject).not_to be_a_city
32
+ end
33
+
34
+ it "know's it's not a county" do
35
+ expect(subject).not_to be_a_county
36
+ end
37
+ end
38
+
39
+ context 'a city domain' do
40
+ let(:domain) { 'ci.champaign.il.us' }
41
+
42
+ it "knows it's a city" do
43
+ expect(subject).to be_a_city
44
+ expect(subject.type).to be(:city)
45
+ end
46
+
47
+ it 'knows the state' do
48
+ expect(subject.state).to eql('IL')
49
+ end
50
+
51
+ it "knows it's not a dotgov" do
52
+ expect(subject).not_to be_a_dotgov
53
+ end
54
+
55
+ it "know's it's not a state" do
56
+ expect(subject).not_to be_a_state
57
+ end
58
+
59
+ it "know's it's not a county" do
60
+ expect(subject).not_to be_a_county
61
+ end
62
+ end
63
+
64
+ context 'dotgovs' do
65
+ context 'A federal dotgov' do
66
+ let(:domain) { 'whitehouse.gov' }
67
+
68
+ it "knows it's federal" do
69
+ expect(subject).to be_federal
70
+ expect(subject.type).to be(:federal)
71
+ end
72
+
73
+ it "knows it's a dotgov" do
74
+ expect(subject).to be_a_dotgov
75
+ end
76
+
77
+ it "knows it's not a city" do
78
+ expect(subject).not_to be_a_city
79
+ end
80
+
81
+ it "knows it's not a state" do
82
+ expect(subject).not_to be_a_state
83
+ end
84
+
85
+ it "knows it's not a county" do
86
+ expect(subject).not_to be_a_county
87
+ end
88
+
89
+ it 'knows the state' do
90
+ expect(subject.state).to eql('DC')
91
+ end
92
+
93
+ it 'knows the city' do
94
+ expect(subject.city).to eql('Washington')
95
+ end
96
+
97
+ it 'knows the agency' do
98
+ expect(subject.agency).to eql('Executive Office of the President')
99
+ end
100
+
101
+ it 'knows the organization' do
102
+ expect(subject.organization).to eql('White House')
103
+ end
104
+ end
105
+
106
+ context 'a state .gov' do
107
+ let(:domain) { 'illinois.gov' }
108
+
109
+ it "knows it's a state" do
110
+ expect(subject).to be_a_state
111
+ expect(subject.type).to be(:state)
112
+ end
113
+
114
+ it "knows it's a dotgov" do
115
+ expect(subject).to be_a_dotgov
116
+ end
117
+
118
+ it "knows it's not a city" do
119
+ expect(subject).not_to be_a_city
120
+ end
121
+
122
+ it "knows it's not federal" do
123
+ expect(subject).not_to be_federal
124
+ end
125
+
126
+ it "knows it's not a county" do
127
+ expect(subject).not_to be_a_county
128
+ end
129
+
130
+ it 'knows the state' do
131
+ expect(subject.state).to eql('IL')
132
+ end
133
+
134
+ it 'knows the city' do
135
+ expect(subject.city).to eql('Springfield')
136
+ end
137
+ end
138
+
139
+ context 'a county .gov' do
140
+ let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
141
+
142
+ it "knows it's a county" do
143
+ expect(subject).to be_a_county
144
+ expect(subject.type).to be(:county)
145
+ end
146
+
147
+ it "knows it's a dotgov" do
148
+ expect(subject).to be_a_dotgov
149
+ end
150
+
151
+ it "knows it's not a city" do
152
+ expect(subject).not_to be_a_city
153
+ end
154
+
155
+ it "knows it's not federal" do
156
+ expect(subject).not_to be_federal
157
+ end
158
+
159
+ it "knows it's not a state" do
160
+ expect(subject).not_to be_a_state
161
+ end
162
+
163
+ it 'knows the state' do
164
+ expect(subject.state).to eql('PA')
165
+ end
166
+
167
+ it 'knows the city' do
168
+ expect(subject.city).to eql('Pittsburgh')
169
+ end
170
+ end
171
+
172
+ context 'a city .gov' do
173
+ let(:domain) { 'ABERDEENMD.GOV' }
174
+
175
+ it "knows it's a city" do
176
+ expect(subject).to be_a_city
177
+ expect(subject.type).to be(:city)
178
+ end
179
+
180
+ it 'knows the city' do
181
+ expect(subject.city).to eql('Aberdeen')
182
+ end
183
+
184
+ it 'knows the state' do
185
+ expect(subject.state).to eql('MD')
186
+ end
187
+
188
+ it "knows it's a dotgov" do
189
+ expect(subject).to be_a_dotgov
190
+ end
191
+
192
+ it "know's it's not a state" do
193
+ expect(subject).not_to be_a_state
194
+ end
195
+
196
+ it "know's it's not a county" do
197
+ expect(subject).not_to be_a_county
198
+ end
199
+ end
200
+ end
201
+ end
202
+
203
+ context "determining a domain's type" do
204
+ {
205
+ unknown: 'cityofperu.org',
206
+ "Canada municipal": 'acme.ca',
207
+ "Canada federal": 'canada.ca'
208
+ }.each do |expected, domain|
209
+ context "Given the #{domain} domain" do
210
+ let(:domain) { domain }
211
+
212
+ it "know's the domain's type" do
213
+ expect(subject.type).to eql(expected)
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end
@@ -0,0 +1,236 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Gman::Importer do
4
+ subject { described_class.new(domains) }
5
+
6
+ let(:domains) { { 'test' => ['example.com'] } }
7
+ let(:stdout) { StringIO.new }
8
+ let(:logger) { Logger.new(@stdout) }
9
+ let(:domain_list) { subject.domain_list }
10
+
11
+ before do
12
+ subject.instance_variable_set '@logger', logger
13
+ end
14
+
15
+ it 'inits the domain list' do
16
+ expect(domain_list).to be_a(Gman::DomainList)
17
+ expect(domain_list.count).to be(1)
18
+ expect(domain_list.domains.first).to eql('example.com')
19
+ end
20
+
21
+ it 'inits the logger' do
22
+ expect(subject.logger).to be_a(Logger)
23
+ end
24
+
25
+ it 'returns the current domain list' do
26
+ expect(subject.current).to be_a(Gman::DomainList)
27
+ end
28
+
29
+ it 'returns the resolver' do
30
+ expect(subject.resolver).to be_a(Resolv::DNS)
31
+ end
32
+
33
+ context 'domain rejection' do
34
+ it 'returns false' do
35
+ expect(subject.reject('example.com', 'reasons')).to be(false)
36
+ end
37
+
38
+ it 'returns the reason why asked' do
39
+ with_env 'RECONCILING', 'true' do
40
+ expect(subject.reject('example.com', 'reasons')).to eql('reasons')
41
+ end
42
+ end
43
+ end
44
+
45
+ context 'manipulating the domain list' do
46
+ context 'normalizing domains' do
47
+ let(:domains) { { 'test' => ['www.EXAMPLE.com/'] } }
48
+
49
+ before { subject.send :normalize_domains! }
50
+
51
+ it 'normalizes the domains' do
52
+ expect(domain_list.domains.first).to eql('example.com')
53
+ end
54
+ end
55
+
56
+ context 'removing invalid domains' do
57
+ let(:domains) { { 'test' => ['foo.github.io', 'example.com'] } }
58
+
59
+ before { subject.send :ensure_validity! }
60
+
61
+ it 'removes invalid domains' do
62
+ expect(domain_list.count).to be(1)
63
+ end
64
+ end
65
+ end
66
+
67
+ context 'with the current list stubbed' do
68
+ let(:stubbed_list) { Gman::DomainList.new(path: stubbed_list_path) }
69
+ let(:stubbed_file_contents) { File.read(stubbed_list_path) }
70
+
71
+ before { subject.instance_variable_set '@current', stubbed_list }
72
+
73
+ context 'writing' do
74
+ before { @current = subject.current.to_s }
75
+
76
+ before { subject.send :add_to_current }
77
+
78
+ after { File.write(stubbed_list_path, @current) }
79
+
80
+ context 'adding domains' do
81
+ let(:domains) do
82
+ { 'test' => ['example.com'], 'test2' => ['github.com'] }
83
+ end
84
+
85
+ it 'adds the domains' do
86
+ expected = "// test\nexample.com\n\n// test2\ngithub.com"
87
+ expect(stubbed_file_contents).to match(expected)
88
+ end
89
+ end
90
+
91
+ context 'importing' do
92
+ let(:domains) do
93
+ {
94
+ 'test' => ['www.example.com', 'foo.github.io'],
95
+ 'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
96
+ }
97
+ end
98
+
99
+ before { subject.import(skip_resolve: true) }
100
+
101
+ it 'imports' do
102
+ expected = "// test\nexample.com\nfoo.github.io"
103
+ expect(stubbed_file_contents).to match(expected)
104
+
105
+ expected = "// test2\ngithub.com\nwhitehouse.gov"
106
+ expect(stubbed_file_contents).to match(expected)
107
+ end
108
+ end
109
+ end
110
+ end
111
+
112
+ context 'domain validation' do
113
+ let(:domain) { '' }
114
+ let(:valid?) { subject.send(:ensure_valid, domain) }
115
+
116
+ context 'a valid domain' do
117
+ let(:domain) { 'whitehouse.gov' }
118
+
119
+ it 'is valid' do
120
+ expect(valid?).to be(true)
121
+ end
122
+ end
123
+
124
+ {
125
+ empty: '',
126
+ blacklisted: 'egovlink.com',
127
+ invalid: 'foo.invalid',
128
+ academic: 'harvard.edu',
129
+ "rejex'd": 'foo.github.io'
130
+ }.each_key do |type|
131
+ context "a #{type} domain" do
132
+ it 'is invalid' do
133
+ expect(valid?).to be(false)
134
+ end
135
+ end
136
+ end
137
+ end
138
+
139
+ context 'duplicate domains' do
140
+ let(:dupe?) { subject.send(:dupe?, domain) }
141
+ let(:ensure_not_dupe) { subject.send(:ensure_not_dupe, domain) }
142
+
143
+ context 'a unique domain' do
144
+ let(:domain) { 'gman.com' }
145
+
146
+ it 'is not a dupe' do
147
+ expect(dupe?).to be_falsy
148
+ expect(ensure_not_dupe).to be_truthy
149
+ end
150
+ end
151
+
152
+ context 'a duplicate domain' do
153
+ let(:domain) { 'gov' }
154
+
155
+ it "knows it's a dupe" do
156
+ expect(dupe?).to be_truthy
157
+ expect(ensure_not_dupe).to be_falsy
158
+ end
159
+
160
+ context 'a subdomain' do
161
+ let(:domain) { 'whitehouse.gov' }
162
+
163
+ it "know when a domain's a subdomain of an existing domain" do
164
+ expect(dupe?).to be_truthy
165
+ expect(ensure_not_dupe).to be_falsy
166
+ end
167
+ end
168
+ end
169
+ end
170
+
171
+ context 'domain resolution' do
172
+ let(:resolves?) { subject.domain_resolves?(domain) }
173
+ let(:ensure_resolves) { subject.send(:ensure_resolves, domain) }
174
+
175
+ context 'a valid domain' do
176
+ let(:domain) { 'github.com' }
177
+
178
+ it 'resolves' do
179
+ expect(resolves?).to be_truthy
180
+ expect(ensure_resolves).to be_truthy
181
+ end
182
+ end
183
+
184
+ context 'an invalid domain' do
185
+ let(:domain) { 'foo.invalid' }
186
+
187
+ it "doesn't resolve" do
188
+ expect(resolves?).to be_falsy
189
+ expect(ensure_resolves).to be_falsy
190
+ end
191
+ end
192
+ end
193
+
194
+ context 'regex checks' do
195
+ let(:ensure_regex) { subject.send(:ensure_regex, domain) }
196
+
197
+ context 'valid domains' do
198
+ let(:domain) { 'example.com' }
199
+
200
+ it 'passes' do
201
+ expect(ensure_regex).to be_truthy
202
+ end
203
+ end
204
+
205
+ [
206
+ 'home.example.com', 'site.example.com', 'user.example.com',
207
+ 'foo.weebly.com', 'foo.wordpress.com', 'foo.govoffice.com',
208
+ 'foo.govoffice1.com', 'foo.homestead.com', 'foo.wix.com',
209
+ 'foo.blogspot.com', 'foo.tripod.com', 'foo.squarespace.com',
210
+ 'foo.github.io', 'ci.champaign.il.us'
211
+ ].each do |domain|
212
+ context "a #{domain} domain" do
213
+ let(:domain) { domain }
214
+
215
+ it 'rejects the domain' do
216
+ expect(ensure_regex).to be_falsy
217
+ end
218
+ end
219
+ end
220
+ end
221
+
222
+ context 'normalizing domains' do
223
+ let(:normalized_domain) { subject.normalize_domain(domain) }
224
+
225
+ [
226
+ 'http://example.com', 'www.example.com', 'example.com/',
227
+ 'example.com/foo', 'example.com/foo/', 'EXAMPLE.com'
228
+ ].each do |domain|
229
+ let(:domain) { domain }
230
+
231
+ it 'normalizes the domain' do
232
+ expect(normalized_domain).to eql('example.com')
233
+ end
234
+ end
235
+ end
236
+ end