gman 7.0.0 → 7.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +14 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8454 -168
  20. data/config/vendor/academic.txt +6 -7
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +4 -2
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +25 -9
  31. data/lib/gman/identifier.rb +57 -19
  32. data/lib/gman/importer.rb +31 -21
  33. data/lib/gman/locality.rb +8 -6
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor-federal-de +2 -1
  43. data/script/vendor-municipal-de +2 -1
  44. data/script/vendor-nl +2 -0
  45. data/script/vendor-public-suffix +6 -4
  46. data/script/vendor-se +2 -1
  47. data/script/vendor-swot +3 -1
  48. data/script/vendor-us +5 -3
  49. data/spec/fixtures/domains.txt +4 -0
  50. data/{test → spec}/fixtures/obama.txt +0 -0
  51. data/spec/gman/bin_spec.rb +101 -0
  52. data/spec/gman/country_code_spec.rb +39 -0
  53. data/spec/gman/domain_list_spec.rb +110 -0
  54. data/spec/gman/domains_spec.rb +25 -0
  55. data/spec/gman/identifier_spec.rb +218 -0
  56. data/spec/gman/importer_spec.rb +236 -0
  57. data/spec/gman/locality_spec.rb +24 -0
  58. data/spec/gman_spec.rb +74 -0
  59. data/spec/spec_helper.rb +31 -0
  60. metadata +89 -81
  61. data/.rake_tasks +0 -0
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -48
  66. data/test/test_gman.rb +0 -56
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domain_list.rb +0 -112
  70. data/test/test_gman_domains.rb +0 -32
  71. data/test/test_gman_filter.rb +0 -17
  72. data/test/test_gman_identifier.rb +0 -106
  73. data/test/test_gman_importer.rb +0 -244
  74. data/test/test_gman_locality.rb +0 -10
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman bin' do
4
+ let(:domain) { 'whitehouse.gov' }
5
+ let(:args) { [domain] }
6
+ let(:command) { 'gman' }
7
+ let(:bin_path) do
8
+ File.expand_path "../../bin/#{command}", File.dirname(__FILE__)
9
+ end
10
+ let(:response_parts) { Open3.capture2e('bundle', 'exec', bin_path, *args) }
11
+ let(:output) { response_parts[0] }
12
+ let(:status) { response_parts[1] }
13
+ let(:exit_code) { status.exitstatus }
14
+
15
+ context 'a valid domain' do
16
+ it 'parses the domain' do
17
+ expect(output).to match('Domain : whitehouse.gov')
18
+ end
19
+
20
+ it "knows it's valid" do
21
+ expect(output).to match('Valid government domain')
22
+ expect(exit_code).to be(0)
23
+ end
24
+
25
+ it 'knows the type' do
26
+ expect(output).to match(/federal/i)
27
+ end
28
+
29
+ it 'knows the agency' do
30
+ expect(output).to match('Executive Office of the President')
31
+ end
32
+
33
+ it 'knows the country' do
34
+ expect(output).to match('United States')
35
+ end
36
+
37
+ it 'knows the city' do
38
+ expect(output).to match('Washington')
39
+ end
40
+
41
+ it 'knows the state' do
42
+ expect(output).to match('DC')
43
+ end
44
+
45
+ it 'colors by default' do
46
+ expect(output).to match(/\e\[32m/)
47
+ end
48
+
49
+ context 'with colorization disabled' do
50
+ let(:args) { [domain, '--no-color'] }
51
+
52
+ it "doesn't color" do
53
+ expect(output).not_to match(/\e\[32m/)
54
+ end
55
+ end
56
+ end
57
+
58
+ context 'with no args' do
59
+ let(:args) { [] }
60
+
61
+ it 'displays the help text' do
62
+ expect(output).to match('USAGE')
63
+ end
64
+ end
65
+
66
+ context 'an invalid domain' do
67
+ let(:domain) { 'foo.invalid' }
68
+
69
+ it 'knows the domain is invalid' do
70
+ expect(output).to match('Invalid domain')
71
+ expect(exit_code).to be(1)
72
+ end
73
+ end
74
+
75
+ context 'a non-government domain' do
76
+ let(:domain) { 'github.com' }
77
+
78
+ it "knows it's not a government domain" do
79
+ expect(output).to match('Not a government domain')
80
+ expect(exit_code).to be(1)
81
+ end
82
+ end
83
+
84
+ context 'filtering' do
85
+ let(:command) { 'gman_filter' }
86
+ let(:txt_path) do
87
+ File.expand_path '../fixtures/obama.txt', File.dirname(__FILE__)
88
+ end
89
+ let(:args) { [txt_path] }
90
+
91
+ it 'returns only government domains' do
92
+ expected = <<~EXPECTED
93
+ mr.senator@obama.senate.gov
94
+ president@whitehouse.gov
95
+ commander.in.chief@us.army.mil
96
+ EXPECTED
97
+
98
+ expect(output).to eql(expected)
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman Country Codes' do
4
+ {
5
+ 'whitehouse.gov' => 'United States of America',
6
+ 'foo.gov.uk' => 'United Kingdom of Great Britain and Northern Ireland',
7
+ 'army.mil' => 'United States of America',
8
+ 'foo.gc.ca' => 'Canada',
9
+ 'foo.eu' => nil
10
+ }.each do |domain, expected_country|
11
+ context "given #{domain.inspect}" do
12
+ subject { Gman.new(domain) }
13
+
14
+ let(:country) { subject.country }
15
+
16
+ it 'knows the country' do
17
+ if expected_country.nil?
18
+ expect(country).to be_nil
19
+ else
20
+ expect(country.name).to eql(expected_country)
21
+ end
22
+ end
23
+
24
+ it 'knows the alpha2' do
25
+ expected = case expected_country
26
+ when 'United States of America'
27
+ 'us'
28
+ when 'Canada'
29
+ 'ca'
30
+ when 'United Kingdom of Great Britain and Northern Ireland'
31
+ 'gb'
32
+ else
33
+ 'eu'
34
+ end
35
+ expect(subject.alpha2).to eql(expected)
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe Gman::DomainList do
4
+ let(:data) { subject.data }
5
+ let(:canada) { data['Canada municipal'] }
6
+
7
+ %i[path contents data].each do |type|
8
+ context "when initialized by #{type}" do
9
+ subject do
10
+ case type
11
+ when :path
12
+ described_class.new(path: Gman.list_path)
13
+ when :contents
14
+ contents = File.read(Gman.list_path)
15
+ described_class.new(contents: contents)
16
+ when :data
17
+ data = described_class.new(path: Gman.list_path).to_h
18
+ described_class.new(data: data)
19
+ end
20
+ end
21
+
22
+ it 'stores the init var' do
23
+ expect(subject.send(type)).not_to be_nil
24
+ end
25
+
26
+ it 'returns the domain data' do
27
+ expect(data).to have_key('Canada federal')
28
+ expect(data.values.flatten).to include('gov')
29
+ end
30
+
31
+ it 'returns the list contents' do
32
+ expect(subject.contents).to match(/^gov$/)
33
+ end
34
+
35
+ it 'knows the list path' do
36
+ expect(subject.path).to eql(Gman.list_path)
37
+ end
38
+
39
+ it 'returns the PublicSuffix list' do
40
+ expect(subject.public_suffix_list).to be_a(PublicSuffix::List)
41
+ end
42
+
43
+ it 'knows if a domain is valid' do
44
+ expect(subject.valid?('whitehouse.gov')).to be(true)
45
+ end
46
+
47
+ it 'knows if a domain is invalid' do
48
+ expect(subject.valid?('example.com')).to be(false)
49
+ end
50
+
51
+ it 'returns the domain groups' do
52
+ expect(subject.groups).to include('Canada federal')
53
+ end
54
+
55
+ it 'returns the domains' do
56
+ expect(subject.domains).to include('gov')
57
+ end
58
+
59
+ it 'returns the domain count' do
60
+ expect(subject.count).to be_a(Integer)
61
+ expect(subject.count).to be > 100
62
+ end
63
+
64
+ it 'alphabetizes the list' do
65
+ canada.shuffle!
66
+ expect(canada.first).not_to eql('100milehouse.com')
67
+ subject.alphabetize
68
+ expect(canada.first).to eql('100milehouse.com')
69
+ end
70
+
71
+ it 'outputs public suffix format' do
72
+ expect(subject.to_s).to match("// Canada federal\ncanada\.ca\n")
73
+ end
74
+
75
+ it "finds a domain's parent" do
76
+ expect(subject.parent_domain('foo.gov.uk')).to eql('gov.uk')
77
+ end
78
+
79
+ context 'with the list path stubbed' do
80
+ let(:stubbed_file_contents) { File.read(stubbed_list_path) }
81
+
82
+ before do
83
+ subject.instance_variable_set('@path', stubbed_list_path)
84
+ end
85
+
86
+ context 'with list data stubbed' do
87
+ before do
88
+ subject.data = { 'foo' => ['!mail.bar.gov', 'bar.gov', 'baz.net'] }
89
+ end
90
+
91
+ context 'alphabetizing' do
92
+ before { subject.alphabetize }
93
+
94
+ it 'puts exceptions last' do
95
+ expect(subject.data['foo'].last).to eql('!mail.bar.gov')
96
+ end
97
+ end
98
+
99
+ context 'writing' do
100
+ before { subject.write }
101
+
102
+ it 'writes the contents' do
103
+ expect(stubbed_file_contents).to match("// foo\nbar.gov\nbaz.net")
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman domains' do
4
+ let(:resolve_domains?) { ENV['GMAN_RESOLVE_DOMAINS'] == 'true' }
5
+ let(:importer) { Gman::Importer.new({}) }
6
+ let(:options) { { skip_dupe: true, skip_resolve: !resolve_domains? } }
7
+
8
+ Gman.list.to_h.each do |group, domains|
9
+ next if ['non-us gov', 'non-us mil', 'US Federal'].include?(group)
10
+
11
+ context "the #{group} group" do
12
+ it 'only contains valid domains' do
13
+ invalid_domains = []
14
+
15
+ Parallel.each(domains, in_threads: 4) do |domain|
16
+ next if importer.valid_domain?(domain, options)
17
+
18
+ invalid_domains.push domain
19
+ end
20
+
21
+ expect(invalid_domains).to be_empty
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,218 @@
1
+ # frozen_string_literal: true
2
+
3
+ RSpec.describe 'Gman identifier' do
4
+ subject { Gman.new(domain) }
5
+
6
+ let(:domain) { '' }
7
+
8
+ it 'parses the dotgov list' do
9
+ expect(Gman.dotgov_list).to be_a(CSV::Table)
10
+ expect(Gman.dotgov_list.first).to have_key('Domain Name')
11
+ end
12
+
13
+ context 'locality domains' do
14
+ context 'a state domain' do
15
+ let(:domain) { 'state.ak.us' }
16
+
17
+ it "knows it's a state" do
18
+ expect(subject).to be_a_state
19
+ expect(subject.type).to be(:state)
20
+ end
21
+
22
+ it 'knows the state' do
23
+ expect(subject.state).to eql('AK')
24
+ end
25
+
26
+ it "knows it's not a dotgov" do
27
+ expect(subject).not_to be_a_dotgov
28
+ end
29
+
30
+ it "know's it's not a city" do
31
+ expect(subject).not_to be_a_city
32
+ end
33
+
34
+ it "know's it's not a county" do
35
+ expect(subject).not_to be_a_county
36
+ end
37
+ end
38
+
39
+ context 'a city domain' do
40
+ let(:domain) { 'ci.champaign.il.us' }
41
+
42
+ it "knows it's a city" do
43
+ expect(subject).to be_a_city
44
+ expect(subject.type).to be(:city)
45
+ end
46
+
47
+ it 'knows the state' do
48
+ expect(subject.state).to eql('IL')
49
+ end
50
+
51
+ it "knows it's not a dotgov" do
52
+ expect(subject).not_to be_a_dotgov
53
+ end
54
+
55
+ it "know's it's not a state" do
56
+ expect(subject).not_to be_a_state
57
+ end
58
+
59
+ it "know's it's not a county" do
60
+ expect(subject).not_to be_a_county
61
+ end
62
+ end
63
+
64
+ context 'dotgovs' do
65
+ context 'A federal dotgov' do
66
+ let(:domain) { 'whitehouse.gov' }
67
+
68
+ it "knows it's federal" do
69
+ expect(subject).to be_federal
70
+ expect(subject.type).to be(:federal)
71
+ end
72
+
73
+ it "knows it's a dotgov" do
74
+ expect(subject).to be_a_dotgov
75
+ end
76
+
77
+ it "knows it's not a city" do
78
+ expect(subject).not_to be_a_city
79
+ end
80
+
81
+ it "knows it's not a state" do
82
+ expect(subject).not_to be_a_state
83
+ end
84
+
85
+ it "knows it's not a county" do
86
+ expect(subject).not_to be_a_county
87
+ end
88
+
89
+ it 'knows the state' do
90
+ expect(subject.state).to eql('DC')
91
+ end
92
+
93
+ it 'knows the city' do
94
+ expect(subject.city).to eql('Washington')
95
+ end
96
+
97
+ it 'knows the agency' do
98
+ expect(subject.agency).to eql('Executive Office of the President')
99
+ end
100
+
101
+ it 'knows the organization' do
102
+ expect(subject.organization).to eql('White House')
103
+ end
104
+ end
105
+
106
+ context 'a state .gov' do
107
+ let(:domain) { 'illinois.gov' }
108
+
109
+ it "knows it's a state" do
110
+ expect(subject).to be_a_state
111
+ expect(subject.type).to be(:state)
112
+ end
113
+
114
+ it "knows it's a dotgov" do
115
+ expect(subject).to be_a_dotgov
116
+ end
117
+
118
+ it "knows it's not a city" do
119
+ expect(subject).not_to be_a_city
120
+ end
121
+
122
+ it "knows it's not federal" do
123
+ expect(subject).not_to be_federal
124
+ end
125
+
126
+ it "knows it's not a county" do
127
+ expect(subject).not_to be_a_county
128
+ end
129
+
130
+ it 'knows the state' do
131
+ expect(subject.state).to eql('IL')
132
+ end
133
+
134
+ it 'knows the city' do
135
+ expect(subject.city).to eql('Springfield')
136
+ end
137
+ end
138
+
139
+ context 'a county .gov' do
140
+ let(:domain) { 'ALLEGHENYCOUNTYPA.GOV' }
141
+
142
+ it "knows it's a county" do
143
+ expect(subject).to be_a_county
144
+ expect(subject.type).to be(:county)
145
+ end
146
+
147
+ it "knows it's a dotgov" do
148
+ expect(subject).to be_a_dotgov
149
+ end
150
+
151
+ it "knows it's not a city" do
152
+ expect(subject).not_to be_a_city
153
+ end
154
+
155
+ it "knows it's not federal" do
156
+ expect(subject).not_to be_federal
157
+ end
158
+
159
+ it "knows it's not a state" do
160
+ expect(subject).not_to be_a_state
161
+ end
162
+
163
+ it 'knows the state' do
164
+ expect(subject.state).to eql('PA')
165
+ end
166
+
167
+ it 'knows the city' do
168
+ expect(subject.city).to eql('Pittsburgh')
169
+ end
170
+ end
171
+
172
+ context 'a city .gov' do
173
+ let(:domain) { 'ABERDEENMD.GOV' }
174
+
175
+ it "knows it's a city" do
176
+ expect(subject).to be_a_city
177
+ expect(subject.type).to be(:city)
178
+ end
179
+
180
+ it 'knows the city' do
181
+ expect(subject.city).to eql('Aberdeen')
182
+ end
183
+
184
+ it 'knows the state' do
185
+ expect(subject.state).to eql('MD')
186
+ end
187
+
188
+ it "knows it's a dotgov" do
189
+ expect(subject).to be_a_dotgov
190
+ end
191
+
192
+ it "know's it's not a state" do
193
+ expect(subject).not_to be_a_state
194
+ end
195
+
196
+ it "know's it's not a county" do
197
+ expect(subject).not_to be_a_county
198
+ end
199
+ end
200
+ end
201
+ end
202
+
203
+ context "determining a domain's type" do
204
+ {
205
+ unknown: 'cityofperu.org',
206
+ "Canada municipal": 'acme.ca',
207
+ "Canada federal": 'canada.ca'
208
+ }.each do |expected, domain|
209
+ context "Given the #{domain} domain" do
210
+ let(:domain) { domain }
211
+
212
+ it "know's the domain's type" do
213
+ expect(subject.type).to eql(expected)
214
+ end
215
+ end
216
+ end
217
+ end
218
+ end