gman 7.0.0 → 7.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (74) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +14 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8454 -168
  20. data/config/vendor/academic.txt +6 -7
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +4 -2
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +25 -9
  31. data/lib/gman/identifier.rb +57 -19
  32. data/lib/gman/importer.rb +31 -21
  33. data/lib/gman/locality.rb +8 -6
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor-federal-de +2 -1
  43. data/script/vendor-municipal-de +2 -1
  44. data/script/vendor-nl +2 -0
  45. data/script/vendor-public-suffix +6 -4
  46. data/script/vendor-se +2 -1
  47. data/script/vendor-swot +3 -1
  48. data/script/vendor-us +5 -3
  49. data/spec/fixtures/domains.txt +4 -0
  50. data/{test → spec}/fixtures/obama.txt +0 -0
  51. data/spec/gman/bin_spec.rb +101 -0
  52. data/spec/gman/country_code_spec.rb +39 -0
  53. data/spec/gman/domain_list_spec.rb +110 -0
  54. data/spec/gman/domains_spec.rb +25 -0
  55. data/spec/gman/identifier_spec.rb +218 -0
  56. data/spec/gman/importer_spec.rb +236 -0
  57. data/spec/gman/locality_spec.rb +24 -0
  58. data/spec/gman_spec.rb +74 -0
  59. data/spec/spec_helper.rb +31 -0
  60. metadata +89 -81
  61. data/.rake_tasks +0 -0
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -48
  66. data/test/test_gman.rb +0 -56
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domain_list.rb +0 -112
  70. data/test/test_gman_domains.rb +0 -32
  71. data/test/test_gman_filter.rb +0 -17
  72. data/test/test_gman_identifier.rb +0 -106
  73. data/test/test_gman_importer.rb +0 -244
  74. data/test/test_gman_locality.rb +0 -10
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman bin' do
4
+ let(:domain) { 'whitehouse.gov' }
5
+ let(:args) { [domain] }
6
+ let(:command) { 'gman' }
7
+ let(:bin_path) do
8
+ File.expand_path "../../bin/#{command}", File.dirname(__FILE__)
9
+ end
10
+ let(:response_parts) { Open3.capture2e('bundle', 'exec', bin_path, *args) }
11
+ let(:output) { response_parts[0] }
12
+ let(:status) { response_parts[1] }
13
+ let(:exit_code) { status.exitstatus }
14
+
15
+ context 'a valid domain' do
16
+ it 'parses the domain' do
17
+ expect(output).to match('Domain : whitehouse.gov')
18
+ end
19
+
20
+ it "knows it's valid" do
21
+ expect(output).to match('Valid government domain')
22
+ expect(exit_code).to be(0)
23
+ end
24
+
25
+ it 'knows the type' do
26
+ expect(output).to match(/federal/i)
27
+ end
28
+
29
+ it 'knows the agency' do
30
+ expect(output).to match('Executive Office of the President')
31
+ end
32
+
33
+ it 'knows the country' do
34
+ expect(output).to match('United States')
35
+ end
36
+
37
+ it 'knows the city' do
38
+ expect(output).to match('Washington')
39
+ end
40
+
41
+ it 'knows the state' do
42
+ expect(output).to match('DC')
43
+ end
44
+
45
+ it 'colors by default' do
46
+ expect(output).to match(/\e\[32m/)
47
+ end
48
+
49
+ context 'with colorization disabled' do
50
+ let(:args) { [domain, '--no-color'] }
51
+
52
+ it "doesn't color" do
53
+ expect(output).not_to match(/\e\[32m/)
54
+ end
55
+ end
56
+ end
57
+
58
+ context 'with no args' do
59
+ let(:args) { [] }
60
+
61
+ it 'displays the help text' do
62
+ expect(output).to match('USAGE')
63
+ end
64
+ end
65
+
66
+ context 'an invalid domain' do
67
+ let(:domain) { 'foo.invalid' }
68
+
69
+ it 'knows the domain is invalid' do
70
+ expect(output).to match('Invalid domain')
71
+ expect(exit_code).to be(1)
72
+ end
73
+ end
74
+
75
+ context 'a non-government domain' do
76
+ let(:domain) { 'github.com' }
77
+
78
+ it "knows it's not a government domain" do
79
+ expect(output).to match('Not a government domain')
80
+ expect(exit_code).to be(1)
81
+ end
82
+ end
83
+
84
+ context 'filtering' do
85
+ let(:command) { 'gman_filter' }
86
+ let(:txt_path) do
87
+ File.expand_path '../fixtures/obama.txt', File.dirname(__FILE__)
88
+ end
89
+ let(:args) { [txt_path] }
90
+
91
+ it 'returns only government domains' do
92
+ expected = <<~EXPECTED
93
+ mr.senator@obama.senate.gov
94
+ president@whitehouse.gov
95
+ commander.in.chief@us.army.mil
96
+ EXPECTED
97
+
98
+ expect(output).to eql(expected)
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman Country Codes' do
4
+ {
5
+ 'whitehouse.gov' => 'United States of America',
6
+ 'foo.gov.uk' => 'United Kingdom of Great Britain and Northern Ireland',
7
+ 'army.mil' => 'United States of America',
8
+ 'foo.gc.ca' => 'Canada',
9
+ 'foo.eu' => nil
10
+ }.each do |domain, expected_country|
11
+ context "given #{domain.inspect}" do
12
+ subject { Gman.new(domain) }
13
+
14
+ let(:country) { subject.country }
15
+
16
+ it 'knows the country' do
17
+ if expected_country.nil?
18
+ expect(country).to be_nil
19
+ else
20
+ expect(country.name).to eql(expected_country)
21
+ end
22
+ end
23
+
24
+ it 'knows the alpha2' do
25
+ expected = case expected_country
26
+ when 'United States of America'
27
+ 'us'
28
+ when 'Canada'
29
+ 'ca'
30
+ when 'United Kingdom of Great Britain and Northern Ireland'
31
+ 'gb'
32
+ else
33
+ 'eu'
34
+ end
35
+ expect(subject.alpha2).to eql(expected)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Gman::DomainList do
4
+ let(:data) { subject.data }
5
+ let(:canada) { data['Canada municipal'] }
6
+
7
+ %i[path contents data].each do |type|
8
+ context "when initialized by #{type}" do
9
+ subject do
10
+ case type
11
+ when :path
12
+ described_class.new(path: Gman.list_path)
13
+ when :contents
14
+ contents = File.read(Gman.list_path)
15
+ described_class.new(contents: contents)
16
+ when :data
17
+ data = described_class.new(path: Gman.list_path).to_h
18
+ described_class.new(data: data)
19
+ end
20
+ end
21
+
22
+ it 'stores the init var' do
23
+ expect(subject.send(type)).not_to be_nil
24
+ end
25
+
26
+ it 'returns the domain data' do
27
+ expect(data).to have_key('Canada federal')
28
+ expect(data.values.flatten).to include('gov')
29
+ end
30
+
31
+ it 'returns the list contents' do
32
+ expect(subject.contents).to match(/^gov$/)
33
+ end
34
+
35
+ it 'knows the list path' do
36
+ expect(subject.path).to eql(Gman.list_path)
37
+ end
38
+
39
+ it 'returns the PublicSuffix list' do
40
+ expect(subject.public_suffix_list).to be_a(PublicSuffix::List)
41
+ end
42
+
43
+ it 'knows if a domain is valid' do
44
+ expect(subject.valid?('whitehouse.gov')).to be(true)
45
+ end
46
+
47
+ it 'knows if a domain is invalid' do
48
+ expect(subject.valid?('example.com')).to be(false)
49
+ end
50
+
51
+ it 'returns the domain groups' do
52
+ expect(subject.groups).to include('Canada federal')
53
+ end
54
+
55
+ it 'returns the domains' do
56
+ expect(subject.domains).to include('gov')
57
+ end
58
+
59
+ it 'returns the domain count' do
60
+ expect(subject.count).to be_a(Integer)
61
+ expect(subject.count).to be > 100
62
+ end
63
+
64
+ it 'alphabetizes the list' do
65
+ canada.shuffle!
66
+ expect(canada.first).not_to eql('100milehouse.com')
67
+ subject.alphabetize
68
+ expect(canada.first).to eql('100milehouse.com')
69
+ end
70
+
71
+ it 'outputs public suffix format' do
72
+ expect(subject.to_s).to match("// Canada federal\ncanada\.ca\n")
73
+ end
74
+
75
+ it "finds a domain's parent" do
76
+ expect(subject.parent_domain('foo.gov.uk')).to eql('gov.uk')
77
+ end
78
+
79
+ context 'with the list path stubbed' do
80
+ let(:stubbed_file_contents) { File.read(stubbed_list_path) }
81
+
82
+ before do
83
+ subject.instance_variable_set('@path', stubbed_list_path)
84
+ end
85
+
86
+ context 'with list data stubbed' do
87
+ before do
88
+ subject.data = { 'foo' => ['!mail.bar.gov', 'bar.gov', 'baz.net'] }
89
+ end
90
+
91
+ context 'alphabetizing' do
92
+ before { subject.alphabetize }
93
+
94
+ it 'puts exceptions last' do
95
+ expect(subject.data['foo'].last).to eql('!mail.bar.gov')
96
+ end
97
+ end
98
+
99
+ context 'writing' do
100
+ before { subject.write }
101
+
102
+ it 'writes the contents' do
103
+ expect(stubbed_file_contents).to match("// foo\nbar.gov\nbaz.net")
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman domains' do
4
+ let(:resolve_domains?) { ENV['GMAN_RESOLVE_DOMAINS'] == 'true' }
5
+ let(:importer) { Gman::Importer.new({}) }
6
+ let(:options) { { skip_dupe: true, skip_resolve: !resolve_domains? } }
7
+
8
+ Gman.list.to_h.each do |group, domains|
9
+ next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
10
+
11
+ context "the #{group} group" do
12
+ it 'only contains valid domains' do
13
+ invalid_domains = []
14
+
15
+ Parallel.each(domains, in_threads: 4) do |domain|
16
+ next if importer.valid_domain?(domain, options)
17
+
18
+ invalid_domains.push domain
19
+ end
20
+
21
+ expect(invalid_domains).to be_empty
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,218 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman identifier' do
4
+ subject { Gman.new(domain) }
5
+
6
+ let(:domain) { '' }
7
+
8
+ it 'parses the dotgov list' do
9
+ expect(Gman.dotgov_list).to be_a(CSV::Table)
10
+ expect(Gman.dotgov_list.first).to have_key('Domain Name')
11
+ end
12
+
13
+ context 'locality domains' do
14
+ context 'a state domain' do
15
+ let(:domain) { 'state.ak.us' }
16
+
17
+ it "knows it's a state" do
18
+ expect(subject).to be_a_state
19
+ expect(subject.type).to be(:state)
20
+ end
21
+
22
+ it 'knows the state' do
23
+ expect(subject.state).to eql('AK')
24
+ end
25
+
26
+ it "knows it's not a dotgov" do
27
+ expect(subject).not_to be_a_dotgov
28
+ end
29
+
30
+ it "know's it's not a city" do
31
+ expect(subject).not_to be_a_city
32
+ end
33
+
34
+ it "know's it's not a county" do
35
+ expect(subject).not_to be_a_county
36
+ end
37
+ end
38
+
39
+ context 'a city domain' do
40
+ let(:domain) { 'ci.champaign.il.us' }
41
+
42
+ it "knows it's a city" do
43
+ expect(subject).to be_a_city
44
+ expect(subject.type).to be(:city)
45
+ end
46
+
47
+ it 'knows the state' do
48
+ expect(subject.state).to eql('IL')
49
+ end
50
+
51
+ it "knows it's not a dotgov" do
52
+ expect(subject).not_to be_a_dotgov
53
+ end
54
+
55
+ it "know's it's not a state" do
56
+ expect(subject).not_to be_a_state
57
+ end
58
+
59
+ it "know's it's not a county" do
60
+ expect(subject).not_to be_a_county
61
+ end
62
+ end
63
+
64
+ context 'dotgovs' do
65
+ context 'A federal dotgov' do
66
+ let(:domain) { 'whitehouse.gov' }
67
+
68
+ it "knows it's federal" do
69
+ expect(subject).to be_federal
70
+ expect(subject.type).to be(:federal)
71
+ end
72
+
73
+ it "knows it's a dotgov" do
74
+ expect(subject).to be_a_dotgov
75
+ end
76
+
77
+ it "knows it's not a city" do
78
+ expect(subject).not_to be_a_city
79
+ end
80
+
81
+ it "knows it's not a state" do
82
+ expect(subject).not_to be_a_state
83
+ end
84
+
85
+ it "knows it's not a county" do
86
+ expect(subject).not_to be_a_county
87
+ end
88
+
89
+ it 'knows the state' do
90
+ expect(subject.state).to eql('DC')
91
+ end
92
+
93
+ it 'knows the city' do
94
+ expect(subject.city).to eql('Washington')
95
+ end
96
+
97
+ it 'knows the agency' do
98
+ expect(subject.agency).to eql('Executive Office of the President')
99
+ end
100
+
101
+ it 'knows the organization' do
102
+ expect(subject.organization).to eql('White House')
103
+ end
104
+ end
105
+
106
+ context 'a state .gov' do
107
+ let(:domain) { 'illinois.gov' }
108
+
109
+ it "knows it's a state" do
110
+ expect(subject).to be_a_state
111
+ expect(subject.type).to be(:state)
112
+ end
113
+
114
+ it "knows it's a dotgov" do
115
+ expect(subject).to be_a_dotgov
116
+ end
117
+
118
+ it "knows it's not a city" do
119
+ expect(subject).not_to be_a_city
120
+ end
121
+
122
+ it "knows it's not federal" do
123
+ expect(subject).not_to be_federal
124
+ end
125
+
126
+ it "knows it's not a county" do
127
+ expect(subject).not_to be_a_county
128
+ end
129
+
130
+ it 'knows the state' do
131
+ expect(subject.state).to eql('IL')
132
+ end
133
+
134
+ it 'knows the city' do
135
+ expect(subject.city).to eql('Springfield')
136
+ end
137
+ end
138
+
139
+ context 'a county .gov' do
140
+ let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
141
+
142
+ it "knows it's a county" do
143
+ expect(subject).to be_a_county
144
+ expect(subject.type).to be(:county)
145
+ end
146
+
147
+ it "knows it's a dotgov" do
148
+ expect(subject).to be_a_dotgov
149
+ end
150
+
151
+ it "knows it's not a city" do
152
+ expect(subject).not_to be_a_city
153
+ end
154
+
155
+ it "knows it's not federal" do
156
+ expect(subject).not_to be_federal
157
+ end
158
+
159
+ it "knows it's not a state" do
160
+ expect(subject).not_to be_a_state
161
+ end
162
+
163
+ it 'knows the state' do
164
+ expect(subject.state).to eql('PA')
165
+ end
166
+
167
+ it 'knows the city' do
168
+ expect(subject.city).to eql('Pittsburgh')
169
+ end
170
+ end
171
+
172
+ context 'a city .gov' do
173
+ let(:domain) { 'ABERDEENMD.GOV' }
174
+
175
+ it "knows it's a city" do
176
+ expect(subject).to be_a_city
177
+ expect(subject.type).to be(:city)
178
+ end
179
+
180
+ it 'knows the city' do
181
+ expect(subject.city).to eql('Aberdeen')
182
+ end
183
+
184
+ it 'knows the state' do
185
+ expect(subject.state).to eql('MD')
186
+ end
187
+
188
+ it "knows it's a dotgov" do
189
+ expect(subject).to be_a_dotgov
190
+ end
191
+
192
+ it "know's it's not a state" do
193
+ expect(subject).not_to be_a_state
194
+ end
195
+
196
+ it "know's it's not a county" do
197
+ expect(subject).not_to be_a_county
198
+ end
199
+ end
200
+ end
201
+ end
202
+
203
+ context "determining a domain's type" do
204
+ {
205
+ unknown: 'cityofperu.org',
206
+ "Canada municipal": 'acme.ca',
207
+ "Canada federal": 'canada.ca'
208
+ }.each do |expected, domain|
209
+ context "Given the #{domain} domain" do
210
+ let(:domain) { domain }
211
+
212
+ it "know's the domain's type" do
213
+ expect(subject.type).to eql(expected)
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end