gman 5.0.9 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,81 +1,75 @@
1
- require_relative "helper"
1
+ require_relative 'helper'
2
2
 
3
3
  class TestGmanBin < Minitest::Test
4
-
5
4
  def setup
6
- @output, @status = test_bin("whitehouse.gov")
5
+ @output, @status = test_bin('whitehouse.gov')
7
6
  end
8
7
 
9
- should "parse the domain" do
10
- output, status = test_bin("bar.gov")
11
- assert_match /Domain : bar.gov/, output
8
+ should 'parse the domain' do
9
+ output, = test_bin('bar.gov')
10
+ assert_match(/Domain : bar.gov/, output)
12
11
 
13
- output, status = test_bin("foo@bar.gov")
14
- assert_match /Domain : bar.gov/, output
12
+ output, = test_bin('foo@bar.gov')
13
+ assert_match(/Domain : bar.gov/, output)
15
14
 
16
- output, status = test_bin("http://bar.gov/foo")
17
- assert_match /Domain : bar.gov/, output
15
+ output, = test_bin('http://bar.gov/foo')
16
+ assert_match(/Domain : bar.gov/, output)
18
17
  end
19
18
 
20
- should "err on invalid domains" do
21
- output, status = test_bin("foo.invalid")
19
+ should 'err on invalid domains' do
20
+ output, status = test_bin('foo.invalid')
22
21
  assert_equal 1, status.exitstatus
23
- assert_match /Invalid domain/, output
22
+ assert_match(/Invalid domain/, output)
24
23
  end
25
24
 
26
- should "err on non-government domains" do
27
- output, status = test_bin("github.com")
25
+ should 'err on non-government domains' do
26
+ output, status = test_bin('github.com')
28
27
  assert_equal 1, status.exitstatus
29
- assert_match /Not a government domain/, output
28
+ assert_match(/Not a government domain/, output)
30
29
  end
31
30
 
32
- should "know the type" do
33
- assert_match /federal/, @output
31
+ should 'know the type' do
32
+ assert_match(/federal/, @output)
34
33
  assert_equal 0, @status.exitstatus
35
34
  end
36
35
 
37
- should "know the agency" do
38
- assert_match /Executive Office of the President/, @output
36
+ should 'know the agency' do
37
+ assert_match(/Executive Office of the President/, @output)
39
38
  assert_equal 0, @status.exitstatus
40
39
  end
41
40
 
42
- should "know the country" do
43
- assert_match /United States/, @output
41
+ should 'know the country' do
42
+ assert_match(/United States/, @output)
44
43
  assert_equal 0, @status.exitstatus
45
44
  end
46
45
 
47
- should "know the city" do
48
- assert_match /Washington/, @output
46
+ should 'know the city' do
47
+ assert_match(/Washington/, @output)
49
48
  assert_equal 0, @status.exitstatus
50
49
  end
51
50
 
52
- should "know the state" do
53
- assert_match /DC/, @output
51
+ should 'know the state' do
52
+ assert_match(/DC/, @output)
54
53
  assert_equal 0, @status.exitstatus
55
54
  end
56
55
 
57
- should "allow you to disable colorization" do
58
- output, status = test_bin("whitehouse.gov", "--no-color")
59
- refute_match /\e\[32m/, output
56
+ should 'allow you to disable colorization' do
57
+ output, = test_bin('whitehouse.gov', '--no-color')
58
+ refute_match(/\e\[32m/, output)
60
59
  end
61
60
 
62
- should "color by default" do
63
- assert_match /\e\[32m/, @output
61
+ should 'color by default' do
62
+ assert_match(/\e\[32m/, @output)
64
63
  end
65
64
 
66
- should "show help text" do
67
- output, status = test_bin
68
- assert_match /Usage/i, output
69
-
70
- output, status = test_bin("")
71
- assert_match /Usage/i, output
65
+ should 'show help text' do
66
+ output, = test_bin
67
+ assert_match(/Usage/i, output)
72
68
 
73
- output, status = test_bin("--no-color")
74
- assert_match /Usage/i, output
75
- end
69
+ output, = test_bin('')
70
+ assert_match(/Usage/i, output)
76
71
 
77
- should "know if a country is sanctioned" do
78
- output, status = test_bin "kim@pyongyang.gov.kp"
79
- assert_match /SANCTIONED/, output
72
+ output, = test_bin('--no-color')
73
+ assert_match(/Usage/i, output)
80
74
  end
81
75
  end
@@ -2,13 +2,17 @@ require File.join(File.dirname(__FILE__), 'helper')
2
2
 
3
3
  class TestGmanCountryCodes < Minitest::Test
4
4
  should "determine a domain's country" do
5
- assert_equal "United States of America", Gman.new("whitehouse.gov").country.name
6
- assert_equal "United States of America", Gman.new("army.mil").country.name
7
- assert_equal "United Kingdom of Great Britain and Northern Ireland", Gman.new("foo.gov.uk").country.name
8
- assert_equal "Canada", Gman.new("foo.gc.ca").country.name
5
+ name = Gman.new('whitehouse.gov').country.name
6
+ assert_equal 'United States of America', name
7
+
8
+ name = Gman.new('foo.gov.uk').country.name
9
+ assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
10
+
11
+ assert_equal 'United States of America', Gman.new('army.mil').country.name
12
+ assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
9
13
  end
10
14
 
11
- should "not err out on an unknown country code" do
12
- assert_equal nil, Gman.new("foo.eu").country
15
+ should 'not err out on an unknown country code' do
16
+ assert_equal nil, Gman.new('foo.eu').country
13
17
  end
14
18
  end
@@ -1,28 +1,33 @@
1
1
  require File.join(File.dirname(__FILE__), 'helper')
2
2
 
3
- class TestDomains < Minitest::Test
4
-
5
- WHITELIST = [ "non-us gov", "non-us mil", "US Federal"]
3
+ class TestGmanDomains < Minitest::Test
4
+ WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
6
5
 
7
6
  def resolve_domains?
8
- ENV["GMAN_RESOLVE_DOMAINS"] == "true"
7
+ ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
9
8
  end
10
9
 
11
- should "only contains valid domains" do
10
+ should 'only contains valid domains' do
12
11
  importer = Gman::Importer.new({})
13
12
  if resolve_domains?
14
- importer.logger.info "Validating that all domains resolve. This may take a while..."
13
+ importer.logger.info <<-MSG
14
+ Validating that all domains resolve. This may take a while...
15
+ MSG
15
16
  else
16
- importer.logger.info "Skipping domain resolution. Run `GMAN_RESOLVE_DOMAINS=true rake test` to validate that domains resolve."
17
+ importer.logger.info 'Skipping domain resolution.' \
18
+ 'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
19
+ 'to validate that domains resolve.'
17
20
  end
18
21
 
19
22
  invalid = []
20
- Parallel.each(Gman::DomainList.current.list, :in_threads => 2) do |group, domains|
23
+ list = Gman::DomainList.current.list
24
+ Parallel.each(list, in_threads: 2) do |group, domains|
21
25
  next if WHITELIST.include?(group)
22
26
  invalid.push domains.reject { |domain|
23
- importer.valid_domain?(domain, :skip_dupe => true, :skip_resolve => !resolve_domains?)
27
+ options = { skip_dupe: true, skip_resolve: !resolve_domains? }
28
+ importer.valid_domain?(domain, options)
24
29
  }
25
30
  end
26
- assert_equal [], invalid.flatten.reject { |e| e.empty? }
31
+ assert_equal [], invalid.flatten.reject(&:empty?)
27
32
  end
28
33
  end
@@ -2,18 +2,16 @@ HERE = File.dirname(__FILE__)
2
2
  require File.join(HERE, 'helper')
3
3
 
4
4
  class TestGmanFilter < Minitest::Test
5
+ txt_path = fixture_path 'obama.txt'
6
+ exec_path = bin_path 'gman_filter'
5
7
 
6
- txt_path = File.join(HERE, "obama.txt")
7
- exec_path = File.join(HERE, "..", "bin", "gman_filter")
8
-
9
- should "remove non-gov/mil addresses" do
10
- filtered = `#{exec_path} < #{txt_path}`
8
+ should 'remove non-gov/mil addresses' do
9
+ output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
11
10
  expected = %w(
12
11
  mr.senator@obama.senate.gov
13
12
  president@whitehouse.gov
14
13
  commander.in.chief@us.army.mil
15
14
  ).join("\n") + "\n"
16
- assert_equal filtered, expected
15
+ assert_equal output, expected
17
16
  end
18
-
19
17
  end
@@ -1,16 +1,16 @@
1
1
  require File.join(File.dirname(__FILE__), 'helper')
2
2
 
3
3
  class TestGmanIdentifier < Minitest::Test
4
- should "Parse the dotgov list" do
4
+ should 'Parse the dotgov list' do
5
5
  assert Gman.dotgov_list
6
6
  assert_equal CSV::Table, Gman.dotgov_list.class
7
7
  assert_equal CSV::Row, Gman.dotgov_list.first.class
8
- assert Gman.dotgov_list.first["Domain Name"]
8
+ assert Gman.dotgov_list.first['Domain Name']
9
9
  end
10
10
 
11
- context "locality domains" do
12
- should "detect state domains" do
13
- domain = Gman.new("state.ak.us")
11
+ context 'locality domains' do
12
+ should 'detect state domains' do
13
+ domain = Gman.new('state.ak.us')
14
14
  assert domain.state?
15
15
 
16
16
  refute domain.dotgov?
@@ -19,11 +19,11 @@ class TestGmanIdentifier < Minitest::Test
19
19
  refute domain.county?
20
20
 
21
21
  assert_equal :state, domain.type
22
- assert_equal "AK", domain.state
22
+ assert_equal 'AK', domain.state
23
23
  end
24
24
 
25
- should "detect city domains" do
26
- domain = Gman.new("ci.champaign.il.us")
25
+ should 'detect city domains' do
26
+ domain = Gman.new('ci.champaign.il.us')
27
27
  assert domain.city?
28
28
 
29
29
  refute domain.dotgov?
@@ -32,13 +32,13 @@ class TestGmanIdentifier < Minitest::Test
32
32
  refute domain.county?
33
33
 
34
34
  assert_equal :city, domain.type
35
- assert_equal "IL", domain.state
35
+ assert_equal 'IL', domain.state
36
36
  end
37
37
  end
38
38
 
39
- context "dotgovs" do
40
- should "detect federal dotgovs" do
41
- domain = Gman.new "whitehouse.gov"
39
+ context 'dotgovs' do
40
+ should 'detect federal dotgovs' do
41
+ domain = Gman.new 'whitehouse.gov'
42
42
  assert domain.federal?
43
43
  assert domain.dotgov?
44
44
 
@@ -47,13 +47,13 @@ class TestGmanIdentifier < Minitest::Test
47
47
  refute domain.county?
48
48
 
49
49
  assert_equal :federal, domain.type
50
- assert_equal "DC", domain.state
51
- assert_equal "Washington", domain.city
52
- assert_equal "Executive Office of the President", domain.agency
50
+ assert_equal 'DC', domain.state
51
+ assert_equal 'Washington', domain.city
52
+ assert_equal 'Executive Office of the President', domain.agency
53
53
  end
54
54
 
55
- should "detect state dotgovs" do
56
- domain = Gman.new "illinois.gov"
55
+ should 'detect state dotgovs' do
56
+ domain = Gman.new 'illinois.gov'
57
57
  assert domain.state?
58
58
  assert domain.dotgov?
59
59
 
@@ -62,12 +62,12 @@ class TestGmanIdentifier < Minitest::Test
62
62
  refute domain.county?
63
63
 
64
64
  assert_equal :state, domain.type
65
- assert_equal "IL", domain.state
66
- assert_equal "Springfield", domain.city
65
+ assert_equal 'IL', domain.state
66
+ assert_equal 'Springfield', domain.city
67
67
  end
68
68
 
69
- should "detect county dotgovs" do
70
- domain = Gman.new "ALLEGHENYCOUNTYPA.GOV"
69
+ should 'detect county dotgovs' do
70
+ domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
71
71
  assert domain.county?
72
72
  assert domain.dotgov?
73
73
 
@@ -76,30 +76,31 @@ class TestGmanIdentifier < Minitest::Test
76
76
  refute domain.state?
77
77
 
78
78
  assert_equal :county, domain.type
79
- assert_equal "PA", domain.state
80
- assert_equal "Pittsburgh", domain.city
79
+ assert_equal 'PA', domain.state
80
+ assert_equal 'Pittsburgh', domain.city
81
81
  end
82
82
 
83
- should "detect the list category" do
84
- assert_equal "US Federal", Gman.new("whitehouse.gov").send("list_category")
83
+ should 'detect the list category' do
84
+ category = Gman.new('whitehouse.gov').send('list_category')
85
+ assert_equal 'US Federal', category
85
86
  end
86
87
  end
87
88
 
88
- context "non-dotgov domains" do
89
+ context 'non-dotgov domains' do
89
90
  should "determine a domain's group" do
90
- assert_equal "usagovIN", Gman.new("cityofperu.org").send("list_category")
91
- assert_equal :unknown, Gman.new("cityofperu.org").type
91
+ assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
92
+ assert_equal :unknown, Gman.new('cityofperu.org').type
92
93
 
93
- assert_equal "Canada municipal", Gman.new("acme.ca").send("list_category")
94
- assert_equal :"Canada municipal", Gman.new("acme.ca").type
94
+ assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
95
+ assert_equal :"Canada municipal", Gman.new('acme.ca').type
95
96
 
96
- assert_equal "Canada federal", Gman.new("canada.ca").send("list_category")
97
- assert_equal :"Canada federal", Gman.new("canada.ca").type
97
+ assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
98
+ assert_equal :"Canada federal", Gman.new('canada.ca').type
98
99
  end
99
100
 
100
- should "detect the state" do
101
- assert_equal "OR", Gman.new("ashland.or.us").state
102
- refute Gman.new("canada.ca").state
101
+ should 'detect the state' do
102
+ assert_equal 'OR', Gman.new('ashland.or.us').state
103
+ refute Gman.new('canada.ca').state
103
104
  end
104
105
  end
105
106
  end
@@ -0,0 +1,250 @@
1
+ require File.join(File.dirname(__FILE__), 'helper')
2
+
3
+ class TestGManImporter < Minitest::Test
4
+ def setup
5
+ @importer = Gman::Importer.new 'test' => ['example.com']
6
+ @stdout = StringIO.new
7
+ @importer.instance_variable_set '@logger', Logger.new(@stdout)
8
+
9
+ with_env 'GMAN_STUB_DOMAINS', 'true' do
10
+ @original_domain_list = File.open(Gman.list_path).read
11
+ end
12
+ end
13
+
14
+ def teardown
15
+ with_env 'GMAN_STUB_DOMAINS', 'true' do
16
+ File.write Gman.list_path, @original_domain_list
17
+ end
18
+ end
19
+
20
+ should 'init the domain list' do
21
+ assert_equal Gman::DomainList, @importer.domains.class
22
+ assert_equal 1, @importer.domains.domains.count
23
+ assert_equal 'example.com', @importer.domains.domains.first
24
+ end
25
+
26
+ should 'init the logger' do
27
+ assert_equal Logger, @importer.logger.class
28
+ end
29
+
30
+ should 'return the current domain list' do
31
+ assert_equal Gman::DomainList, @importer.current.class
32
+ end
33
+
34
+ should 'return the resolver' do
35
+ assert_equal Resolv::DNS, @importer.resolver.class
36
+ end
37
+
38
+ context 'domain rejection' do
39
+ should 'return false for a rejected domain' do
40
+ refute @importer.reject 'example.com', 'reasons'
41
+ end
42
+
43
+ should 'return the reason when asked' do
44
+ with_env 'RECONCILING', 'true' do
45
+ assert_equal 'reasons', @importer.reject('example.com', 'reasons')
46
+ end
47
+ end
48
+ end
49
+
50
+ context 'manipulating the domain list' do
51
+ should 'normalize domains within the domain list' do
52
+ importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
53
+ importer.send :normalize_domains!
54
+ assert_equal 'example.com', importer.domains.domains.first
55
+ end
56
+
57
+ should 'remove invalid domains from the domain list' do
58
+ importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
59
+ importer.instance_variable_set '@logger', Logger.new(@stdout)
60
+
61
+ assert_equal 2, importer.domains.domains.count
62
+ importer.send :ensure_validity!
63
+ assert_equal 1, importer.domains.domains.count
64
+ end
65
+
66
+ context 'writing the domain list' do
67
+ should 'add domains to the current domain list' do
68
+ with_env 'GMAN_STUB_DOMAINS', 'true' do
69
+ domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
70
+ importer = Gman::Importer.new domains
71
+ importer.send :add_to_current
72
+ expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
73
+ assert_equal expected, File.open(Gman.list_path).read
74
+ end
75
+ end
76
+
77
+ should 'import' do
78
+ with_env 'GMAN_STUB_DOMAINS', 'true' do
79
+ domains = {
80
+ 'test' => ['www.example.com', 'goo.github.io'],
81
+ 'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
82
+ }
83
+
84
+ importer = Gman::Importer.new domains
85
+ importer.instance_variable_set '@logger', Logger.new(@stdout)
86
+ importer.import(skip_resolve: true)
87
+
88
+ expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
89
+ assert_equal expected, File.open(Gman.list_path).read
90
+ end
91
+ end
92
+ end
93
+ end
94
+
95
+ context 'domain validation' do
96
+ should 'allow valid domains' do
97
+ assert @importer.send :ensure_valid, 'whitehouse.gov'
98
+ end
99
+
100
+ should 'reject empty domains' do
101
+ refute @importer.send :ensure_valid, ''
102
+ end
103
+
104
+ should 'reject blacklisted domains' do
105
+ refute @importer.send :ensure_valid, 'egovlink.com'
106
+ end
107
+
108
+ should 'reject invalid domains' do
109
+ refute @importer.send :ensure_valid, 'foo.invalid'
110
+ end
111
+
112
+ should 'reject academic domains' do
113
+ refute @importer.send :ensure_valid, 'harvard.edu'
114
+ end
115
+
116
+ should "reject regex'd domains" do
117
+ refute @importer.send :ensure_valid, 'foo.github.io'
118
+ end
119
+ end
120
+
121
+ context 'duplicate domains' do
122
+ should 'know a unique domain is not a dupe' do
123
+ refute @importer.send :dupe?, 'gman.com'
124
+ end
125
+
126
+ should "know when a domain's a dupe" do
127
+ assert @importer.send :dupe?, 'gov'
128
+ end
129
+
130
+ should "know when a domain's a subdomain of an existing domain" do
131
+ assert @importer.send :dupe?, 'whitehouse.gov'
132
+ end
133
+
134
+ should 'allow unique domains' do
135
+ assert @importer.send :ensure_not_dupe, 'gman.com'
136
+ end
137
+
138
+ should 'reject duplicate domains' do
139
+ refute @importer.send :ensure_not_dupe, 'gov'
140
+ end
141
+
142
+ should 'reject subdomains' do
143
+ refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
144
+ end
145
+ end
146
+
147
+ context 'domain resolution' do
148
+ should 'know if a domain resolves' do
149
+ assert @importer.domain_resolves?('github.com')
150
+ assert @importer.send :ensure_resolves, 'github.com'
151
+ end
152
+
153
+ should "know if a domain doesn't resolve" do
154
+ refute @importer.domain_resolves?('foo.invalid')
155
+ refute @importer.send :ensure_resolves, 'foo.invalid'
156
+ end
157
+
158
+ should 'know if a domain has an IP' do
159
+ end
160
+
161
+ should 'know if a domain returns a given record' do
162
+ end
163
+ end
164
+
165
+ context 'regex checks' do
166
+ should 'pass valid domains' do
167
+ assert @importer.send :ensure_regex, 'example.com'
168
+ end
169
+
170
+ should 'reject domains that begin with home.' do
171
+ refute @importer.send :ensure_regex, 'home.example.com'
172
+ end
173
+
174
+ should 'reject domains that begin with user.' do
175
+ refute @importer.send :ensure_regex, 'user.example.com'
176
+ end
177
+
178
+ should 'reject domains that begin with site.' do
179
+ refute @importer.send :ensure_regex, 'user.example.com'
180
+ end
181
+
182
+ should 'reject weebly domains' do
183
+ refute @importer.send :ensure_regex, 'foo.weebly.com'
184
+ end
185
+
186
+ should 'reject wordpress domains' do
187
+ refute @importer.send :ensure_regex, 'foo.wordpress.com'
188
+ end
189
+
190
+ should 'reject govoffice domains' do
191
+ refute @importer.send :ensure_regex, 'foo.govoffice.com'
192
+ refute @importer.send :ensure_regex, 'foo.govoffice1.com'
193
+ end
194
+
195
+ should 'reject homestead domains' do
196
+ refute @importer.send :ensure_regex, 'foo.homestead.com'
197
+ end
198
+
199
+ should 'reject wix domains' do
200
+ refute @importer.send :ensure_regex, 'foo.wix.com'
201
+ end
202
+
203
+ should 'reject blogspot domains' do
204
+ refute @importer.send :ensure_regex, 'foo.blogspot.com'
205
+ end
206
+
207
+ should 'reject tripod domains' do
208
+ refute @importer.send :ensure_regex, 'foo.tripod.com'
209
+ end
210
+
211
+ should 'reject squarespace domains' do
212
+ refute @importer.send :ensure_regex, 'foo.squarespace.com'
213
+ end
214
+
215
+ should 'reject github.io domains' do
216
+ refute @importer.send :ensure_regex, 'foo.github.io'
217
+ end
218
+
219
+ should 'reject locality domains' do
220
+ refute @importer.send :ensure_regex, 'ci.champaign.il.us'
221
+ end
222
+ end
223
+
224
+ context 'normalizing domains' do
225
+ should 'normalize URLs to domains' do
226
+ expected = 'example.com'
227
+ assert_equal expected, @importer.normalize_domain('http://example.com')
228
+ end
229
+
230
+ should 'strip WWW' do
231
+ assert_equal 'example.com', @importer.normalize_domain('www.example.com')
232
+ end
233
+
234
+ should 'remove trailing slashes' do
235
+ assert_equal 'example.com', @importer.normalize_domain('example.com/')
236
+ end
237
+
238
+ should 'remove paths' do
239
+ assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
240
+ end
241
+
242
+ should 'remove paths with trailing slashes' do
243
+ assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
244
+ end
245
+
246
+ should 'downcase' do
247
+ assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
248
+ end
249
+ end
250
+ end