gman 5.0.9 → 6.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,81 +1,75 @@
1
- require_relative "helper"
1
+ require_relative 'helper'
2
2
 
3
3
  class TestGmanBin < Minitest::Test
4
-
5
4
  def setup
6
- @output, @status = test_bin("whitehouse.gov")
5
+ @output, @status = test_bin('whitehouse.gov')
7
6
  end
8
7
 
9
- should "parse the domain" do
10
- output, status = test_bin("bar.gov")
11
- assert_match /Domain : bar.gov/, output
8
+ should 'parse the domain' do
9
+ output, = test_bin('bar.gov')
10
+ assert_match(/Domain : bar.gov/, output)
12
11
 
13
- output, status = test_bin("foo@bar.gov")
14
- assert_match /Domain : bar.gov/, output
12
+ output, = test_bin('foo@bar.gov')
13
+ assert_match(/Domain : bar.gov/, output)
15
14
 
16
- output, status = test_bin("http://bar.gov/foo")
17
- assert_match /Domain : bar.gov/, output
15
+ output, = test_bin('http://bar.gov/foo')
16
+ assert_match(/Domain : bar.gov/, output)
18
17
  end
19
18
 
20
- should "err on invalid domains" do
21
- output, status = test_bin("foo.invalid")
19
+ should 'err on invalid domains' do
20
+ output, status = test_bin('foo.invalid')
22
21
  assert_equal 1, status.exitstatus
23
- assert_match /Invalid domain/, output
22
+ assert_match(/Invalid domain/, output)
24
23
  end
25
24
 
26
- should "err on non-government domains" do
27
- output, status = test_bin("github.com")
25
+ should 'err on non-government domains' do
26
+ output, status = test_bin('github.com')
28
27
  assert_equal 1, status.exitstatus
29
- assert_match /Not a government domain/, output
28
+ assert_match(/Not a government domain/, output)
30
29
  end
31
30
 
32
- should "know the type" do
33
- assert_match /federal/, @output
31
+ should 'know the type' do
32
+ assert_match(/federal/, @output)
34
33
  assert_equal 0, @status.exitstatus
35
34
  end
36
35
 
37
- should "know the agency" do
38
- assert_match /Executive Office of the President/, @output
36
+ should 'know the agency' do
37
+ assert_match(/Executive Office of the President/, @output)
39
38
  assert_equal 0, @status.exitstatus
40
39
  end
41
40
 
42
- should "know the country" do
43
- assert_match /United States/, @output
41
+ should 'know the country' do
42
+ assert_match(/United States/, @output)
44
43
  assert_equal 0, @status.exitstatus
45
44
  end
46
45
 
47
- should "know the city" do
48
- assert_match /Washington/, @output
46
+ should 'know the city' do
47
+ assert_match(/Washington/, @output)
49
48
  assert_equal 0, @status.exitstatus
50
49
  end
51
50
 
52
- should "know the state" do
53
- assert_match /DC/, @output
51
+ should 'know the state' do
52
+ assert_match(/DC/, @output)
54
53
  assert_equal 0, @status.exitstatus
55
54
  end
56
55
 
57
- should "allow you to disable colorization" do
58
- output, status = test_bin("whitehouse.gov", "--no-color")
59
- refute_match /\e\[32m/, output
56
+ should 'allow you to disable colorization' do
57
+ output, = test_bin('whitehouse.gov', '--no-color')
58
+ refute_match(/\e\[32m/, output)
60
59
  end
61
60
 
62
- should "color by default" do
63
- assert_match /\e\[32m/, @output
61
+ should 'color by default' do
62
+ assert_match(/\e\[32m/, @output)
64
63
  end
65
64
 
66
- should "show help text" do
67
- output, status = test_bin
68
- assert_match /Usage/i, output
69
-
70
- output, status = test_bin("")
71
- assert_match /Usage/i, output
65
+ should 'show help text' do
66
+ output, = test_bin
67
+ assert_match(/Usage/i, output)
72
68
 
73
- output, status = test_bin("--no-color")
74
- assert_match /Usage/i, output
75
- end
69
+ output, = test_bin('')
70
+ assert_match(/Usage/i, output)
76
71
 
77
- should "know if a country is sanctioned" do
78
- output, status = test_bin "kim@pyongyang.gov.kp"
79
- assert_match /SANCTIONED/, output
72
+ output, = test_bin('--no-color')
73
+ assert_match(/Usage/i, output)
80
74
  end
81
75
  end
@@ -2,13 +2,17 @@ require File.join(File.dirname(__FILE__), 'helper')
2
2
 
3
3
  class TestGmanCountryCodes < Minitest::Test
4
4
  should "determine a domain's country" do
5
- assert_equal "United States of America", Gman.new("whitehouse.gov").country.name
6
- assert_equal "United States of America", Gman.new("army.mil").country.name
7
- assert_equal "United Kingdom of Great Britain and Northern Ireland", Gman.new("foo.gov.uk").country.name
8
- assert_equal "Canada", Gman.new("foo.gc.ca").country.name
5
+ name = Gman.new('whitehouse.gov').country.name
6
+ assert_equal 'United States of America', name
7
+
8
+ name = Gman.new('foo.gov.uk').country.name
9
+ assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
10
+
11
+ assert_equal 'United States of America', Gman.new('army.mil').country.name
12
+ assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
9
13
  end
10
14
 
11
- should "not err out on an unknown country code" do
12
- assert_equal nil, Gman.new("foo.eu").country
15
+ should 'not err out on an unknown country code' do
16
+ assert_equal nil, Gman.new('foo.eu').country
13
17
  end
14
18
  end
@@ -1,28 +1,33 @@
1
1
  require File.join(File.dirname(__FILE__), 'helper')
2
2
 
3
- class TestDomains < Minitest::Test
4
-
5
- WHITELIST = [ "non-us gov", "non-us mil", "US Federal"]
3
+ class TestGmanDomains < Minitest::Test
4
+ WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
6
5
 
7
6
  def resolve_domains?
8
- ENV["GMAN_RESOLVE_DOMAINS"] == "true"
7
+ ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
9
8
  end
10
9
 
11
- should "only contains valid domains" do
10
+ should 'only contains valid domains' do
12
11
  importer = Gman::Importer.new({})
13
12
  if resolve_domains?
14
- importer.logger.info "Validating that all domains resolve. This may take a while..."
13
+ importer.logger.info <<-MSG
14
+ Validating that all domains resolve. This may take a while...
15
+ MSG
15
16
  else
16
- importer.logger.info "Skipping domain resolution. Run `GMAN_RESOLVE_DOMAINS=true rake test` to validate that domains resolve."
17
+ importer.logger.info 'Skipping domain resolution.' \
18
+ 'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
19
+ 'to validate that domains resolve.'
17
20
  end
18
21
 
19
22
  invalid = []
20
- Parallel.each(Gman::DomainList.current.list, :in_threads => 2) do |group, domains|
23
+ list = Gman::DomainList.current.list
24
+ Parallel.each(list, in_threads: 2) do |group, domains|
21
25
  next if WHITELIST.include?(group)
22
26
  invalid.push domains.reject { |domain|
23
- importer.valid_domain?(domain, :skip_dupe => true, :skip_resolve => !resolve_domains?)
27
+ options = { skip_dupe: true, skip_resolve: !resolve_domains? }
28
+ importer.valid_domain?(domain, options)
24
29
  }
25
30
  end
26
- assert_equal [], invalid.flatten.reject { |e| e.empty? }
31
+ assert_equal [], invalid.flatten.reject(&:empty?)
27
32
  end
28
33
  end
@@ -2,18 +2,16 @@ HERE = File.dirname(__FILE__)
2
2
  require File.join(HERE, 'helper')
3
3
 
4
4
  class TestGmanFilter < Minitest::Test
5
+ txt_path = fixture_path 'obama.txt'
6
+ exec_path = bin_path 'gman_filter'
5
7
 
6
- txt_path = File.join(HERE, "obama.txt")
7
- exec_path = File.join(HERE, "..", "bin", "gman_filter")
8
-
9
- should "remove non-gov/mil addresses" do
10
- filtered = `#{exec_path} < #{txt_path}`
8
+ should 'remove non-gov/mil addresses' do
9
+ output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
11
10
  expected = %w(
12
11
  mr.senator@obama.senate.gov
13
12
  president@whitehouse.gov
14
13
  commander.in.chief@us.army.mil
15
14
  ).join("\n") + "\n"
16
- assert_equal filtered, expected
15
+ assert_equal output, expected
17
16
  end
18
-
19
17
  end
@@ -1,16 +1,16 @@
1
1
  require File.join(File.dirname(__FILE__), 'helper')
2
2
 
3
3
  class TestGmanIdentifier < Minitest::Test
4
- should "Parse the dotgov list" do
4
+ should 'Parse the dotgov list' do
5
5
  assert Gman.dotgov_list
6
6
  assert_equal CSV::Table, Gman.dotgov_list.class
7
7
  assert_equal CSV::Row, Gman.dotgov_list.first.class
8
- assert Gman.dotgov_list.first["Domain Name"]
8
+ assert Gman.dotgov_list.first['Domain Name']
9
9
  end
10
10
 
11
- context "locality domains" do
12
- should "detect state domains" do
13
- domain = Gman.new("state.ak.us")
11
+ context 'locality domains' do
12
+ should 'detect state domains' do
13
+ domain = Gman.new('state.ak.us')
14
14
  assert domain.state?
15
15
 
16
16
  refute domain.dotgov?
@@ -19,11 +19,11 @@ class TestGmanIdentifier < Minitest::Test
19
19
  refute domain.county?
20
20
 
21
21
  assert_equal :state, domain.type
22
- assert_equal "AK", domain.state
22
+ assert_equal 'AK', domain.state
23
23
  end
24
24
 
25
- should "detect city domains" do
26
- domain = Gman.new("ci.champaign.il.us")
25
+ should 'detect city domains' do
26
+ domain = Gman.new('ci.champaign.il.us')
27
27
  assert domain.city?
28
28
 
29
29
  refute domain.dotgov?
@@ -32,13 +32,13 @@ class TestGmanIdentifier < Minitest::Test
32
32
  refute domain.county?
33
33
 
34
34
  assert_equal :city, domain.type
35
- assert_equal "IL", domain.state
35
+ assert_equal 'IL', domain.state
36
36
  end
37
37
  end
38
38
 
39
- context "dotgovs" do
40
- should "detect federal dotgovs" do
41
- domain = Gman.new "whitehouse.gov"
39
+ context 'dotgovs' do
40
+ should 'detect federal dotgovs' do
41
+ domain = Gman.new 'whitehouse.gov'
42
42
  assert domain.federal?
43
43
  assert domain.dotgov?
44
44
 
@@ -47,13 +47,13 @@ class TestGmanIdentifier < Minitest::Test
47
47
  refute domain.county?
48
48
 
49
49
  assert_equal :federal, domain.type
50
- assert_equal "DC", domain.state
51
- assert_equal "Washington", domain.city
52
- assert_equal "Executive Office of the President", domain.agency
50
+ assert_equal 'DC', domain.state
51
+ assert_equal 'Washington', domain.city
52
+ assert_equal 'Executive Office of the President', domain.agency
53
53
  end
54
54
 
55
- should "detect state dotgovs" do
56
- domain = Gman.new "illinois.gov"
55
+ should 'detect state dotgovs' do
56
+ domain = Gman.new 'illinois.gov'
57
57
  assert domain.state?
58
58
  assert domain.dotgov?
59
59
 
@@ -62,12 +62,12 @@ class TestGmanIdentifier < Minitest::Test
62
62
  refute domain.county?
63
63
 
64
64
  assert_equal :state, domain.type
65
- assert_equal "IL", domain.state
66
- assert_equal "Springfield", domain.city
65
+ assert_equal 'IL', domain.state
66
+ assert_equal 'Springfield', domain.city
67
67
  end
68
68
 
69
- should "detect county dotgovs" do
70
- domain = Gman.new "ALLEGHENYCOUNTYPA.GOV"
69
+ should 'detect county dotgovs' do
70
+ domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
71
71
  assert domain.county?
72
72
  assert domain.dotgov?
73
73
 
@@ -76,30 +76,31 @@ class TestGmanIdentifier < Minitest::Test
76
76
  refute domain.state?
77
77
 
78
78
  assert_equal :county, domain.type
79
- assert_equal "PA", domain.state
80
- assert_equal "Pittsburgh", domain.city
79
+ assert_equal 'PA', domain.state
80
+ assert_equal 'Pittsburgh', domain.city
81
81
  end
82
82
 
83
- should "detect the list category" do
84
- assert_equal "US Federal", Gman.new("whitehouse.gov").send("list_category")
83
+ should 'detect the list category' do
84
+ category = Gman.new('whitehouse.gov').send('list_category')
85
+ assert_equal 'US Federal', category
85
86
  end
86
87
  end
87
88
 
88
- context "non-dotgov domains" do
89
+ context 'non-dotgov domains' do
89
90
  should "determine a domain's group" do
90
- assert_equal "usagovIN", Gman.new("cityofperu.org").send("list_category")
91
- assert_equal :unknown, Gman.new("cityofperu.org").type
91
+ assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
92
+ assert_equal :unknown, Gman.new('cityofperu.org').type
92
93
 
93
- assert_equal "Canada municipal", Gman.new("acme.ca").send("list_category")
94
- assert_equal :"Canada municipal", Gman.new("acme.ca").type
94
+ assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
95
+ assert_equal :"Canada municipal", Gman.new('acme.ca').type
95
96
 
96
- assert_equal "Canada federal", Gman.new("canada.ca").send("list_category")
97
- assert_equal :"Canada federal", Gman.new("canada.ca").type
97
+ assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
98
+ assert_equal :"Canada federal", Gman.new('canada.ca').type
98
99
  end
99
100
 
100
- should "detect the state" do
101
- assert_equal "OR", Gman.new("ashland.or.us").state
102
- refute Gman.new("canada.ca").state
101
+ should 'detect the state' do
102
+ assert_equal 'OR', Gman.new('ashland.or.us').state
103
+ refute Gman.new('canada.ca').state
103
104
  end
104
105
  end
105
106
  end
@@ -0,0 +1,250 @@
1
+ require File.join(File.dirname(__FILE__), 'helper')
2
+
3
+ class TestGManImporter < Minitest::Test
4
+ def setup
5
+ @importer = Gman::Importer.new 'test' => ['example.com']
6
+ @stdout = StringIO.new
7
+ @importer.instance_variable_set '@logger', Logger.new(@stdout)
8
+
9
+ with_env 'GMAN_STUB_DOMAINS', 'true' do
10
+ @original_domain_list = File.open(Gman.list_path).read
11
+ end
12
+ end
13
+
14
+ def teardown
15
+ with_env 'GMAN_STUB_DOMAINS', 'true' do
16
+ File.write Gman.list_path, @original_domain_list
17
+ end
18
+ end
19
+
20
+ should 'init the domain list' do
21
+ assert_equal Gman::DomainList, @importer.domains.class
22
+ assert_equal 1, @importer.domains.domains.count
23
+ assert_equal 'example.com', @importer.domains.domains.first
24
+ end
25
+
26
+ should 'init the logger' do
27
+ assert_equal Logger, @importer.logger.class
28
+ end
29
+
30
+ should 'return the current domain list' do
31
+ assert_equal Gman::DomainList, @importer.current.class
32
+ end
33
+
34
+ should 'return the resolver' do
35
+ assert_equal Resolv::DNS, @importer.resolver.class
36
+ end
37
+
38
+ context 'domain rejection' do
39
+ should 'return false for a rejected domain' do
40
+ refute @importer.reject 'example.com', 'reasons'
41
+ end
42
+
43
+ should 'return the reason when asked' do
44
+ with_env 'RECONCILING', 'true' do
45
+ assert_equal 'reasons', @importer.reject('example.com', 'reasons')
46
+ end
47
+ end
48
+ end
49
+
50
+ context 'manipulating the domain list' do
51
+ should 'normalize domains within the domain list' do
52
+ importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
53
+ importer.send :normalize_domains!
54
+ assert_equal 'example.com', importer.domains.domains.first
55
+ end
56
+
57
+ should 'remove invalid domains from the domain list' do
58
+ importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
59
+ importer.instance_variable_set '@logger', Logger.new(@stdout)
60
+
61
+ assert_equal 2, importer.domains.domains.count
62
+ importer.send :ensure_validity!
63
+ assert_equal 1, importer.domains.domains.count
64
+ end
65
+
66
+ context 'writing the domain list' do
67
+ should 'add domains to the current domain list' do
68
+ with_env 'GMAN_STUB_DOMAINS', 'true' do
69
+ domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
70
+ importer = Gman::Importer.new domains
71
+ importer.send :add_to_current
72
+ expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
73
+ assert_equal expected, File.open(Gman.list_path).read
74
+ end
75
+ end
76
+
77
+ should 'import' do
78
+ with_env 'GMAN_STUB_DOMAINS', 'true' do
79
+ domains = {
80
+ 'test' => ['www.example.com', 'goo.github.io'],
81
+ 'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
82
+ }
83
+
84
+ importer = Gman::Importer.new domains
85
+ importer.instance_variable_set '@logger', Logger.new(@stdout)
86
+ importer.import(skip_resolve: true)
87
+
88
+ expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
89
+ assert_equal expected, File.open(Gman.list_path).read
90
+ end
91
+ end
92
+ end
93
+ end
94
+
95
+ context 'domain validation' do
96
+ should 'allow valid domains' do
97
+ assert @importer.send :ensure_valid, 'whitehouse.gov'
98
+ end
99
+
100
+ should 'reject empty domains' do
101
+ refute @importer.send :ensure_valid, ''
102
+ end
103
+
104
+ should 'reject blacklisted domains' do
105
+ refute @importer.send :ensure_valid, 'egovlink.com'
106
+ end
107
+
108
+ should 'reject invalid domains' do
109
+ refute @importer.send :ensure_valid, 'foo.invalid'
110
+ end
111
+
112
+ should 'reject academic domains' do
113
+ refute @importer.send :ensure_valid, 'harvard.edu'
114
+ end
115
+
116
+ should "reject regex'd domains" do
117
+ refute @importer.send :ensure_valid, 'foo.github.io'
118
+ end
119
+ end
120
+
121
+ context 'duplicate domains' do
122
+ should 'know a unique domain is not a dupe' do
123
+ refute @importer.send :dupe?, 'gman.com'
124
+ end
125
+
126
+ should "know when a domain's a dupe" do
127
+ assert @importer.send :dupe?, 'gov'
128
+ end
129
+
130
+ should "know when a domain's a subdomain of an existing domain" do
131
+ assert @importer.send :dupe?, 'whitehouse.gov'
132
+ end
133
+
134
+ should 'allow unique domains' do
135
+ assert @importer.send :ensure_not_dupe, 'gman.com'
136
+ end
137
+
138
+ should 'reject duplicate domains' do
139
+ refute @importer.send :ensure_not_dupe, 'gov'
140
+ end
141
+
142
+ should 'reject subdomains' do
143
+ refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
144
+ end
145
+ end
146
+
147
+ context 'domain resolution' do
148
+ should 'know if a domain resolves' do
149
+ assert @importer.domain_resolves?('github.com')
150
+ assert @importer.send :ensure_resolves, 'github.com'
151
+ end
152
+
153
+ should "know if a domain doesn't resolve" do
154
+ refute @importer.domain_resolves?('foo.invalid')
155
+ refute @importer.send :ensure_resolves, 'foo.invalid'
156
+ end
157
+
158
+ should 'know if a domain has an IP' do
159
+ end
160
+
161
+ should 'know if a domain returns a given record' do
162
+ end
163
+ end
164
+
165
+ context 'regex checks' do
166
+ should 'pass valid domains' do
167
+ assert @importer.send :ensure_regex, 'example.com'
168
+ end
169
+
170
+ should 'reject domains that begin with home.' do
171
+ refute @importer.send :ensure_regex, 'home.example.com'
172
+ end
173
+
174
+ should 'reject domains that begin with user.' do
175
+ refute @importer.send :ensure_regex, 'user.example.com'
176
+ end
177
+
178
+ should 'reject domains that begin with site.' do
179
+ refute @importer.send :ensure_regex, 'user.example.com'
180
+ end
181
+
182
+ should 'reject weebly domains' do
183
+ refute @importer.send :ensure_regex, 'foo.weebly.com'
184
+ end
185
+
186
+ should 'reject wordpress domains' do
187
+ refute @importer.send :ensure_regex, 'foo.wordpress.com'
188
+ end
189
+
190
+ should 'reject govoffice domains' do
191
+ refute @importer.send :ensure_regex, 'foo.govoffice.com'
192
+ refute @importer.send :ensure_regex, 'foo.govoffice1.com'
193
+ end
194
+
195
+ should 'reject homestead domains' do
196
+ refute @importer.send :ensure_regex, 'foo.homestead.com'
197
+ end
198
+
199
+ should 'reject wix domains' do
200
+ refute @importer.send :ensure_regex, 'foo.wix.com'
201
+ end
202
+
203
+ should 'reject blogspot domains' do
204
+ refute @importer.send :ensure_regex, 'foo.blogspot.com'
205
+ end
206
+
207
+ should 'reject tripod domains' do
208
+ refute @importer.send :ensure_regex, 'foo.tripod.com'
209
+ end
210
+
211
+ should 'reject squarespace domains' do
212
+ refute @importer.send :ensure_regex, 'foo.squarespace.com'
213
+ end
214
+
215
+ should 'reject github.io domains' do
216
+ refute @importer.send :ensure_regex, 'foo.github.io'
217
+ end
218
+
219
+ should 'reject locality domains' do
220
+ refute @importer.send :ensure_regex, 'ci.champaign.il.us'
221
+ end
222
+ end
223
+
224
+ context 'normalizing domains' do
225
+ should 'normalize URLs to domains' do
226
+ expected = 'example.com'
227
+ assert_equal expected, @importer.normalize_domain('http://example.com')
228
+ end
229
+
230
+ should 'strip WWW' do
231
+ assert_equal 'example.com', @importer.normalize_domain('www.example.com')
232
+ end
233
+
234
+ should 'remove trailing slashes' do
235
+ assert_equal 'example.com', @importer.normalize_domain('example.com/')
236
+ end
237
+
238
+ should 'remove paths' do
239
+ assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
240
+ end
241
+
242
+ should 'remove paths with trailing slashes' do
243
+ assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
244
+ end
245
+
246
+ should 'downcase' do
247
+ assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
248
+ end
249
+ end
250
+ end