gman 7.0.1 → 7.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,18 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanCountryCodes < Minitest::Test
4
- should "determine a domain's country" do
5
- name = Gman.new('whitehouse.gov').country.name
6
- assert_equal 'United States of America', name
7
-
8
- name = Gman.new('foo.gov.uk').country.name
9
- assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
10
-
11
- assert_equal 'United States of America', Gman.new('army.mil').country.name
12
- assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
13
- end
14
-
15
- should 'not err out on an unknown country code' do
16
- assert_equal nil, Gman.new('foo.eu').country
17
- end
18
- end
@@ -1,112 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomainList < Minitest::Test
4
- INIT_TYPES = [:path, :contents, :data].freeze
5
-
6
- def setup
7
- @original_domain_list = File.read(stubbed_list_path)
8
- end
9
-
10
- def teardown
11
- File.write stubbed_list_path, @original_domain_list
12
- end
13
-
14
- def domain_list(type)
15
- case type
16
- when :path
17
- Gman::DomainList.new(path: Gman.list_path)
18
- when :contents
19
- contents = File.read(Gman.list_path)
20
- Gman::DomainList.new(contents: contents)
21
- when :data
22
- data = Gman::DomainList.new(path: Gman.list_path).to_h
23
- Gman::DomainList.new(data: data)
24
- end
25
- end
26
-
27
- INIT_TYPES.each do |type|
28
- context "when initalized with #{type}" do
29
- should 'store the init vars' do
30
- refute domain_list(type).public_send(type).nil?
31
- end
32
-
33
- should 'return the domain data' do
34
- list = domain_list(type)
35
- assert list.data.key? 'Canada federal'
36
- assert list.data.any? { |_key, values| values.include? 'gov' }
37
- end
38
-
39
- should 'return the list contents' do
40
- list = domain_list(type)
41
- assert_match(/^gov$/, list.contents)
42
- end
43
-
44
- should 'return the list path' do
45
- list = domain_list(type)
46
- assert_equal list.path, Gman.list_path
47
- end
48
-
49
- should 'return the public suffix parsed list' do
50
- list = domain_list(type)
51
- assert list.public_suffix_list.class == PublicSuffix::List
52
- end
53
-
54
- should 'know if a domain is valid' do
55
- list = domain_list(type)
56
- assert list.valid? 'whitehouse.gov'
57
- end
58
-
59
- should 'know if a domain is invalid' do
60
- list = domain_list(type)
61
- refute list.valid? 'example.com'
62
- end
63
-
64
- should 'return the domain groups' do
65
- list = domain_list(type)
66
- assert list.groups.include?('Canada federal')
67
- end
68
-
69
- should 'return the domains' do
70
- list = domain_list(type)
71
- assert list.domains.include?('gov')
72
- end
73
-
74
- should 'return the domain count' do
75
- list = domain_list(type)
76
- assert list.count.is_a?(Integer)
77
- assert list.count > 100
78
- end
79
-
80
- should 'alphabetize the list' do
81
- list = domain_list(type)
82
- list.data['Canada municipal'].shuffle!
83
- assert list.data['Canada municipal'].first != '100milehouse.com'
84
- list.alphabetize
85
- assert list.data['Canada municipal'].first == '100milehouse.com'
86
- end
87
-
88
- should 'write the list' do
89
- list = domain_list(type)
90
- list.instance_variable_set('@path', stubbed_list_path)
91
- list.data = { 'foo' => ['bar.gov', 'baz.net'] }
92
- list.write
93
- contents = File.read(stubbed_list_path)
94
- assert_match %r{^// foo$}, contents
95
- expected = "// foo\nbar.gov\nbaz.net"
96
- assert contents.include?(expected)
97
- end
98
-
99
- should 'output the list in public_suffix format' do
100
- list = domain_list(type)
101
- string = list.to_s
102
- assert_match %r{^// Canada federal$}, string
103
- assert string.include? "// Canada federal\ncanada\.ca\n"
104
- end
105
-
106
- should "find a domain's parent" do
107
- list = domain_list(type)
108
- assert_equal 'gov.uk', list.parent_domain('foo.gov.uk')
109
- end
110
- end
111
- end
112
- end
@@ -1,32 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomains < Minitest::Test
4
- WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
5
-
6
- def resolve_domains?
7
- ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
8
- end
9
-
10
- should 'only contains valid domains' do
11
- importer = Gman::Importer.new({})
12
- if resolve_domains?
13
- importer.logger.info <<-MSG
14
- Validating that all domains resolve. This may take a while...
15
- MSG
16
- else
17
- importer.logger.info 'Skipping domain resolution.' \
18
- 'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
19
- 'to validate that domains resolve.'
20
- end
21
-
22
- invalid = []
23
- options = { skip_dupe: true, skip_resolve: !resolve_domains? }
24
- Gman.list.to_h.each do |group, domains|
25
- next if WHITELIST.include?(group)
26
- Parallel.each(domains, in_threads: 4) do |domain|
27
- invalid.push(domain) unless importer.valid_domain?(domain, options)
28
- end
29
- end
30
- assert_equal [], invalid.flatten.reject(&:empty?)
31
- end
32
- end
@@ -1,17 +0,0 @@
1
- HERE = File.dirname(__FILE__)
2
- require File.join(HERE, 'helper')
3
-
4
- class TestGmanFilter < Minitest::Test
5
- txt_path = fixture_path 'obama.txt'
6
- exec_path = bin_path 'gman_filter'
7
-
8
- should 'remove non-gov/mil addresses' do
9
- output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
10
- expected = %w(
11
- mr.senator@obama.senate.gov
12
- president@whitehouse.gov
13
- commander.in.chief@us.army.mil
14
- ).join("\n") + "\n"
15
- assert_equal output, expected
16
- end
17
- end
@@ -1,106 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanIdentifier < Minitest::Test
4
- should 'Parse the dotgov list' do
5
- assert Gman.dotgov_list
6
- assert_equal CSV::Table, Gman.dotgov_list.class
7
- assert_equal CSV::Row, Gman.dotgov_list.first.class
8
- assert Gman.dotgov_list.first['Domain Name']
9
- end
10
-
11
- context 'locality domains' do
12
- should 'detect state domains' do
13
- domain = Gman.new('state.ak.us')
14
- assert domain.state?
15
-
16
- refute domain.dotgov?
17
- refute domain.city?
18
- refute domain.federal?
19
- refute domain.county?
20
-
21
- assert_equal :state, domain.type
22
- assert_equal 'AK', domain.state
23
- end
24
-
25
- should 'detect city domains' do
26
- domain = Gman.new('ci.champaign.il.us')
27
- assert domain.city?
28
-
29
- refute domain.dotgov?
30
- refute domain.state?
31
- refute domain.federal?
32
- refute domain.county?
33
-
34
- assert_equal :city, domain.type
35
- assert_equal 'IL', domain.state
36
- end
37
- end
38
-
39
- context 'dotgovs' do
40
- should 'detect federal dotgovs' do
41
- domain = Gman.new 'whitehouse.gov'
42
- assert domain.federal?
43
- assert domain.dotgov?
44
-
45
- refute domain.city?
46
- refute domain.state?
47
- refute domain.county?
48
-
49
- assert_equal :federal, domain.type
50
- assert_equal 'DC', domain.state
51
- assert_equal 'Washington', domain.city
52
- assert_equal 'Executive Office of the President', domain.agency
53
- end
54
-
55
- should 'detect state dotgovs' do
56
- domain = Gman.new 'illinois.gov'
57
- assert domain.state?
58
- assert domain.dotgov?
59
-
60
- refute domain.city?
61
- refute domain.federal?
62
- refute domain.county?
63
-
64
- assert_equal :state, domain.type
65
- assert_equal 'IL', domain.state
66
- assert_equal 'Springfield', domain.city
67
- end
68
-
69
- should 'detect county dotgovs' do
70
- domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
71
- assert domain.county?
72
- assert domain.dotgov?
73
-
74
- refute domain.city?
75
- refute domain.federal?
76
- refute domain.state?
77
-
78
- assert_equal :county, domain.type
79
- assert_equal 'PA', domain.state
80
- assert_equal 'Pittsburgh', domain.city
81
- end
82
-
83
- should 'detect the list category' do
84
- category = Gman.new('whitehouse.gov').send('list_category')
85
- assert_equal 'US Federal', category
86
- end
87
- end
88
-
89
- context 'non-dotgov domains' do
90
- should "determine a domain's group" do
91
- assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
92
- assert_equal :unknown, Gman.new('cityofperu.org').type
93
-
94
- assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
95
- assert_equal :"Canada municipal", Gman.new('acme.ca').type
96
-
97
- assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
98
- assert_equal :"Canada federal", Gman.new('canada.ca').type
99
- end
100
-
101
- should 'detect the state' do
102
- assert_equal 'OR', Gman.new('ashland.or.us').state
103
- refute Gman.new('canada.ca').state
104
- end
105
- end
106
- end
@@ -1,244 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGManImporter < Minitest::Test
4
- def setup
5
- @importer = Gman::Importer.new 'test' => ['example.com']
6
- @stdout = StringIO.new
7
- @importer.instance_variable_set '@logger', Logger.new(@stdout)
8
-
9
- @original_domain_list = File.read(stubbed_list_path)
10
- end
11
-
12
- def teardown
13
- File.write stubbed_list_path, @original_domain_list
14
- end
15
-
16
- should 'init the domain list' do
17
- assert_equal Gman::DomainList, @importer.domain_list.class
18
- assert_equal 1, @importer.domain_list.count
19
- assert_equal 'example.com', @importer.domain_list.domains.first
20
- end
21
-
22
- should 'init the logger' do
23
- assert_equal Logger, @importer.logger.class
24
- end
25
-
26
- should 'return the current domain list' do
27
- assert_equal Gman::DomainList, @importer.current.class
28
- end
29
-
30
- should 'return the resolver' do
31
- assert_equal Resolv::DNS, @importer.resolver.class
32
- end
33
-
34
- context 'domain rejection' do
35
- should 'return false for a rejected domain' do
36
- refute @importer.reject 'example.com', 'reasons'
37
- end
38
-
39
- should 'return the reason when asked' do
40
- with_env 'RECONCILING', 'true' do
41
- assert_equal 'reasons', @importer.reject('example.com', 'reasons')
42
- end
43
- end
44
- end
45
-
46
- context 'manipulating the domain list' do
47
- should 'normalize domains within the domain list' do
48
- importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
49
- importer.send :normalize_domains!
50
- assert_equal 'example.com', importer.domain_list.domains.first
51
- end
52
-
53
- should 'remove invalid domains from the domain list' do
54
- importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
55
- importer.instance_variable_set '@logger', Logger.new(@stdout)
56
-
57
- assert_equal 2, importer.domain_list.count
58
- importer.send :ensure_validity!
59
- assert_equal 1, importer.domain_list.count
60
- end
61
-
62
- context 'writing the domain list' do
63
- should 'add domains to the current domain list' do
64
- domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
65
- importer = Gman::Importer.new domains
66
- importer.instance_variable_set '@current', stubbed_list
67
- importer.send :add_to_current
68
- expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
69
- assert_equal expected, File.open(stubbed_list_path).read
70
- end
71
-
72
- should 'import' do
73
- domains = {
74
- 'test' => ['www.example.com', 'goo.github.io'],
75
- 'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
76
- }
77
-
78
- importer = Gman::Importer.new domains
79
- importer.instance_variable_set '@current', stubbed_list
80
- importer.instance_variable_set '@logger', Logger.new(@stdout)
81
- importer.import(skip_resolve: true)
82
-
83
- expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
84
- assert_equal expected, File.open(stubbed_list_path).read
85
- end
86
- end
87
- end
88
-
89
- context 'domain validation' do
90
- should 'allow valid domains' do
91
- assert @importer.send :ensure_valid, 'whitehouse.gov'
92
- end
93
-
94
- should 'reject empty domains' do
95
- refute @importer.send :ensure_valid, ''
96
- end
97
-
98
- should 'reject blacklisted domains' do
99
- refute @importer.send :ensure_valid, 'egovlink.com'
100
- end
101
-
102
- should 'reject invalid domains' do
103
- refute @importer.send :ensure_valid, 'foo.invalid'
104
- end
105
-
106
- should 'reject academic domains' do
107
- refute @importer.send :ensure_valid, 'harvard.edu'
108
- end
109
-
110
- should "reject regex'd domains" do
111
- refute @importer.send :ensure_valid, 'foo.github.io'
112
- end
113
- end
114
-
115
- context 'duplicate domains' do
116
- should 'know a unique domain is not a dupe' do
117
- refute @importer.send :dupe?, 'gman.com'
118
- end
119
-
120
- should "know when a domain's a dupe" do
121
- assert @importer.send :dupe?, 'gov'
122
- end
123
-
124
- should "know when a domain's a subdomain of an existing domain" do
125
- assert @importer.send :dupe?, 'whitehouse.gov'
126
- end
127
-
128
- should 'allow unique domains' do
129
- assert @importer.send :ensure_not_dupe, 'gman.com'
130
- end
131
-
132
- should 'reject duplicate domains' do
133
- refute @importer.send :ensure_not_dupe, 'gov'
134
- end
135
-
136
- should 'reject subdomains' do
137
- refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
138
- end
139
- end
140
-
141
- context 'domain resolution' do
142
- should 'know if a domain resolves' do
143
- assert @importer.domain_resolves?('github.com')
144
- assert @importer.send :ensure_resolves, 'github.com'
145
- end
146
-
147
- should "know if a domain doesn't resolve" do
148
- refute @importer.domain_resolves?('foo.invalid')
149
- refute @importer.send :ensure_resolves, 'foo.invalid'
150
- end
151
-
152
- should 'know if a domain has an IP' do
153
- end
154
-
155
- should 'know if a domain returns a given record' do
156
- end
157
- end
158
-
159
- context 'regex checks' do
160
- should 'pass valid domains' do
161
- assert @importer.send :ensure_regex, 'example.com'
162
- end
163
-
164
- should 'reject domains that begin with home.' do
165
- refute @importer.send :ensure_regex, 'home.example.com'
166
- end
167
-
168
- should 'reject domains that begin with user.' do
169
- refute @importer.send :ensure_regex, 'user.example.com'
170
- end
171
-
172
- should 'reject domains that begin with site.' do
173
- refute @importer.send :ensure_regex, 'user.example.com'
174
- end
175
-
176
- should 'reject weebly domains' do
177
- refute @importer.send :ensure_regex, 'foo.weebly.com'
178
- end
179
-
180
- should 'reject wordpress domains' do
181
- refute @importer.send :ensure_regex, 'foo.wordpress.com'
182
- end
183
-
184
- should 'reject govoffice domains' do
185
- refute @importer.send :ensure_regex, 'foo.govoffice.com'
186
- refute @importer.send :ensure_regex, 'foo.govoffice1.com'
187
- end
188
-
189
- should 'reject homestead domains' do
190
- refute @importer.send :ensure_regex, 'foo.homestead.com'
191
- end
192
-
193
- should 'reject wix domains' do
194
- refute @importer.send :ensure_regex, 'foo.wix.com'
195
- end
196
-
197
- should 'reject blogspot domains' do
198
- refute @importer.send :ensure_regex, 'foo.blogspot.com'
199
- end
200
-
201
- should 'reject tripod domains' do
202
- refute @importer.send :ensure_regex, 'foo.tripod.com'
203
- end
204
-
205
- should 'reject squarespace domains' do
206
- refute @importer.send :ensure_regex, 'foo.squarespace.com'
207
- end
208
-
209
- should 'reject github.io domains' do
210
- refute @importer.send :ensure_regex, 'foo.github.io'
211
- end
212
-
213
- should 'reject locality domains' do
214
- refute @importer.send :ensure_regex, 'ci.champaign.il.us'
215
- end
216
- end
217
-
218
- context 'normalizing domains' do
219
- should 'normalize URLs to domains' do
220
- expected = 'example.com'
221
- assert_equal expected, @importer.normalize_domain('http://example.com')
222
- end
223
-
224
- should 'strip WWW' do
225
- assert_equal 'example.com', @importer.normalize_domain('www.example.com')
226
- end
227
-
228
- should 'remove trailing slashes' do
229
- assert_equal 'example.com', @importer.normalize_domain('example.com/')
230
- end
231
-
232
- should 'remove paths' do
233
- assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
234
- end
235
-
236
- should 'remove paths with trailing slashes' do
237
- assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
238
- end
239
-
240
- should 'downcase' do
241
- assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
242
- end
243
- end
244
- end