gman 7.0.1 → 7.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,18 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanCountryCodes < Minitest::Test
4
- should "determine a domain's country" do
5
- name = Gman.new('whitehouse.gov').country.name
6
- assert_equal 'United States of America', name
7
-
8
- name = Gman.new('foo.gov.uk').country.name
9
- assert_equal 'United Kingdom of Great Britain and Northern Ireland', name
10
-
11
- assert_equal 'United States of America', Gman.new('army.mil').country.name
12
- assert_equal 'Canada', Gman.new('foo.gc.ca').country.name
13
- end
14
-
15
- should 'not err out on an unknown country code' do
16
- assert_equal nil, Gman.new('foo.eu').country
17
- end
18
- end
@@ -1,112 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomainList < Minitest::Test
4
- INIT_TYPES = [:path, :contents, :data].freeze
5
-
6
- def setup
7
- @original_domain_list = File.read(stubbed_list_path)
8
- end
9
-
10
- def teardown
11
- File.write stubbed_list_path, @original_domain_list
12
- end
13
-
14
- def domain_list(type)
15
- case type
16
- when :path
17
- Gman::DomainList.new(path: Gman.list_path)
18
- when :contents
19
- contents = File.read(Gman.list_path)
20
- Gman::DomainList.new(contents: contents)
21
- when :data
22
- data = Gman::DomainList.new(path: Gman.list_path).to_h
23
- Gman::DomainList.new(data: data)
24
- end
25
- end
26
-
27
- INIT_TYPES.each do |type|
28
- context "when initalized with #{type}" do
29
- should 'store the init vars' do
30
- refute domain_list(type).public_send(type).nil?
31
- end
32
-
33
- should 'return the domain data' do
34
- list = domain_list(type)
35
- assert list.data.key? 'Canada federal'
36
- assert list.data.any? { |_key, values| values.include? 'gov' }
37
- end
38
-
39
- should 'return the list contents' do
40
- list = domain_list(type)
41
- assert_match(/^gov$/, list.contents)
42
- end
43
-
44
- should 'return the list path' do
45
- list = domain_list(type)
46
- assert_equal list.path, Gman.list_path
47
- end
48
-
49
- should 'return the public suffix parsed list' do
50
- list = domain_list(type)
51
- assert list.public_suffix_list.class == PublicSuffix::List
52
- end
53
-
54
- should 'know if a domain is valid' do
55
- list = domain_list(type)
56
- assert list.valid? 'whitehouse.gov'
57
- end
58
-
59
- should 'know if a domain is invalid' do
60
- list = domain_list(type)
61
- refute list.valid? 'example.com'
62
- end
63
-
64
- should 'return the domain groups' do
65
- list = domain_list(type)
66
- assert list.groups.include?('Canada federal')
67
- end
68
-
69
- should 'return the domains' do
70
- list = domain_list(type)
71
- assert list.domains.include?('gov')
72
- end
73
-
74
- should 'return the domain count' do
75
- list = domain_list(type)
76
- assert list.count.is_a?(Integer)
77
- assert list.count > 100
78
- end
79
-
80
- should 'alphabetize the list' do
81
- list = domain_list(type)
82
- list.data['Canada municipal'].shuffle!
83
- assert list.data['Canada municipal'].first != '100milehouse.com'
84
- list.alphabetize
85
- assert list.data['Canada municipal'].first == '100milehouse.com'
86
- end
87
-
88
- should 'write the list' do
89
- list = domain_list(type)
90
- list.instance_variable_set('@path', stubbed_list_path)
91
- list.data = { 'foo' => ['bar.gov', 'baz.net'] }
92
- list.write
93
- contents = File.read(stubbed_list_path)
94
- assert_match %r{^// foo$}, contents
95
- expected = "// foo\nbar.gov\nbaz.net"
96
- assert contents.include?(expected)
97
- end
98
-
99
- should 'output the list in public_suffix format' do
100
- list = domain_list(type)
101
- string = list.to_s
102
- assert_match %r{^// Canada federal$}, string
103
- assert string.include? "// Canada federal\ncanada\.ca\n"
104
- end
105
-
106
- should "find a domain's parent" do
107
- list = domain_list(type)
108
- assert_equal 'gov.uk', list.parent_domain('foo.gov.uk')
109
- end
110
- end
111
- end
112
- end
@@ -1,32 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanDomains < Minitest::Test
4
- WHITELIST = ['non-us gov', 'non-us mil', 'US Federal'].freeze
5
-
6
- def resolve_domains?
7
- ENV['GMAN_RESOLVE_DOMAINS'] == 'true'
8
- end
9
-
10
- should 'only contains valid domains' do
11
- importer = Gman::Importer.new({})
12
- if resolve_domains?
13
- importer.logger.info <<-MSG
14
- Validating that all domains resolve. This may take a while...
15
- MSG
16
- else
17
- importer.logger.info 'Skipping domain resolution.' \
18
- 'Run `GMAN_RESOLVE_DOMAINS=true rake test` '\
19
- 'to validate that domains resolve.'
20
- end
21
-
22
- invalid = []
23
- options = { skip_dupe: true, skip_resolve: !resolve_domains? }
24
- Gman.list.to_h.each do |group, domains|
25
- next if WHITELIST.include?(group)
26
- Parallel.each(domains, in_threads: 4) do |domain|
27
- invalid.push(domain) unless importer.valid_domain?(domain, options)
28
- end
29
- end
30
- assert_equal [], invalid.flatten.reject(&:empty?)
31
- end
32
- end
@@ -1,17 +0,0 @@
1
- HERE = File.dirname(__FILE__)
2
- require File.join(HERE, 'helper')
3
-
4
- class TestGmanFilter < Minitest::Test
5
- txt_path = fixture_path 'obama.txt'
6
- exec_path = bin_path 'gman_filter'
7
-
8
- should 'remove non-gov/mil addresses' do
9
- output, _status = Open3.capture2e('bundle', 'exec', exec_path, txt_path)
10
- expected = %w(
11
- mr.senator@obama.senate.gov
12
- president@whitehouse.gov
13
- commander.in.chief@us.army.mil
14
- ).join("\n") + "\n"
15
- assert_equal output, expected
16
- end
17
- end
@@ -1,106 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGmanIdentifier < Minitest::Test
4
- should 'Parse the dotgov list' do
5
- assert Gman.dotgov_list
6
- assert_equal CSV::Table, Gman.dotgov_list.class
7
- assert_equal CSV::Row, Gman.dotgov_list.first.class
8
- assert Gman.dotgov_list.first['Domain Name']
9
- end
10
-
11
- context 'locality domains' do
12
- should 'detect state domains' do
13
- domain = Gman.new('state.ak.us')
14
- assert domain.state?
15
-
16
- refute domain.dotgov?
17
- refute domain.city?
18
- refute domain.federal?
19
- refute domain.county?
20
-
21
- assert_equal :state, domain.type
22
- assert_equal 'AK', domain.state
23
- end
24
-
25
- should 'detect city domains' do
26
- domain = Gman.new('ci.champaign.il.us')
27
- assert domain.city?
28
-
29
- refute domain.dotgov?
30
- refute domain.state?
31
- refute domain.federal?
32
- refute domain.county?
33
-
34
- assert_equal :city, domain.type
35
- assert_equal 'IL', domain.state
36
- end
37
- end
38
-
39
- context 'dotgovs' do
40
- should 'detect federal dotgovs' do
41
- domain = Gman.new 'whitehouse.gov'
42
- assert domain.federal?
43
- assert domain.dotgov?
44
-
45
- refute domain.city?
46
- refute domain.state?
47
- refute domain.county?
48
-
49
- assert_equal :federal, domain.type
50
- assert_equal 'DC', domain.state
51
- assert_equal 'Washington', domain.city
52
- assert_equal 'Executive Office of the President', domain.agency
53
- end
54
-
55
- should 'detect state dotgovs' do
56
- domain = Gman.new 'illinois.gov'
57
- assert domain.state?
58
- assert domain.dotgov?
59
-
60
- refute domain.city?
61
- refute domain.federal?
62
- refute domain.county?
63
-
64
- assert_equal :state, domain.type
65
- assert_equal 'IL', domain.state
66
- assert_equal 'Springfield', domain.city
67
- end
68
-
69
- should 'detect county dotgovs' do
70
- domain = Gman.new 'ALLEGHENYCOUNTYPA.GOV'
71
- assert domain.county?
72
- assert domain.dotgov?
73
-
74
- refute domain.city?
75
- refute domain.federal?
76
- refute domain.state?
77
-
78
- assert_equal :county, domain.type
79
- assert_equal 'PA', domain.state
80
- assert_equal 'Pittsburgh', domain.city
81
- end
82
-
83
- should 'detect the list category' do
84
- category = Gman.new('whitehouse.gov').send('list_category')
85
- assert_equal 'US Federal', category
86
- end
87
- end
88
-
89
- context 'non-dotgov domains' do
90
- should "determine a domain's group" do
91
- assert_equal 'usagovIN', Gman.new('cityofperu.org').send('list_category')
92
- assert_equal :unknown, Gman.new('cityofperu.org').type
93
-
94
- assert_equal 'Canada municipal', Gman.new('acme.ca').send('list_category')
95
- assert_equal :"Canada municipal", Gman.new('acme.ca').type
96
-
97
- assert_equal 'Canada federal', Gman.new('canada.ca').send('list_category')
98
- assert_equal :"Canada federal", Gman.new('canada.ca').type
99
- end
100
-
101
- should 'detect the state' do
102
- assert_equal 'OR', Gman.new('ashland.or.us').state
103
- refute Gman.new('canada.ca').state
104
- end
105
- end
106
- end
@@ -1,244 +0,0 @@
1
- require File.join(File.dirname(__FILE__), 'helper')
2
-
3
- class TestGManImporter < Minitest::Test
4
- def setup
5
- @importer = Gman::Importer.new 'test' => ['example.com']
6
- @stdout = StringIO.new
7
- @importer.instance_variable_set '@logger', Logger.new(@stdout)
8
-
9
- @original_domain_list = File.read(stubbed_list_path)
10
- end
11
-
12
- def teardown
13
- File.write stubbed_list_path, @original_domain_list
14
- end
15
-
16
- should 'init the domain list' do
17
- assert_equal Gman::DomainList, @importer.domain_list.class
18
- assert_equal 1, @importer.domain_list.count
19
- assert_equal 'example.com', @importer.domain_list.domains.first
20
- end
21
-
22
- should 'init the logger' do
23
- assert_equal Logger, @importer.logger.class
24
- end
25
-
26
- should 'return the current domain list' do
27
- assert_equal Gman::DomainList, @importer.current.class
28
- end
29
-
30
- should 'return the resolver' do
31
- assert_equal Resolv::DNS, @importer.resolver.class
32
- end
33
-
34
- context 'domain rejection' do
35
- should 'return false for a rejected domain' do
36
- refute @importer.reject 'example.com', 'reasons'
37
- end
38
-
39
- should 'return the reason when asked' do
40
- with_env 'RECONCILING', 'true' do
41
- assert_equal 'reasons', @importer.reject('example.com', 'reasons')
42
- end
43
- end
44
- end
45
-
46
- context 'manipulating the domain list' do
47
- should 'normalize domains within the domain list' do
48
- importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
49
- importer.send :normalize_domains!
50
- assert_equal 'example.com', importer.domain_list.domains.first
51
- end
52
-
53
- should 'remove invalid domains from the domain list' do
54
- importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
55
- importer.instance_variable_set '@logger', Logger.new(@stdout)
56
-
57
- assert_equal 2, importer.domain_list.count
58
- importer.send :ensure_validity!
59
- assert_equal 1, importer.domain_list.count
60
- end
61
-
62
- context 'writing the domain list' do
63
- should 'add domains to the current domain list' do
64
- domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
65
- importer = Gman::Importer.new domains
66
- importer.instance_variable_set '@current', stubbed_list
67
- importer.send :add_to_current
68
- expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
69
- assert_equal expected, File.open(stubbed_list_path).read
70
- end
71
-
72
- should 'import' do
73
- domains = {
74
- 'test' => ['www.example.com', 'goo.github.io'],
75
- 'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
76
- }
77
-
78
- importer = Gman::Importer.new domains
79
- importer.instance_variable_set '@current', stubbed_list
80
- importer.instance_variable_set '@logger', Logger.new(@stdout)
81
- importer.import(skip_resolve: true)
82
-
83
- expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
84
- assert_equal expected, File.open(stubbed_list_path).read
85
- end
86
- end
87
- end
88
-
89
- context 'domain validation' do
90
- should 'allow valid domains' do
91
- assert @importer.send :ensure_valid, 'whitehouse.gov'
92
- end
93
-
94
- should 'reject empty domains' do
95
- refute @importer.send :ensure_valid, ''
96
- end
97
-
98
- should 'reject blacklisted domains' do
99
- refute @importer.send :ensure_valid, 'egovlink.com'
100
- end
101
-
102
- should 'reject invalid domains' do
103
- refute @importer.send :ensure_valid, 'foo.invalid'
104
- end
105
-
106
- should 'reject academic domains' do
107
- refute @importer.send :ensure_valid, 'harvard.edu'
108
- end
109
-
110
- should "reject regex'd domains" do
111
- refute @importer.send :ensure_valid, 'foo.github.io'
112
- end
113
- end
114
-
115
- context 'duplicate domains' do
116
- should 'know a unique domain is not a dupe' do
117
- refute @importer.send :dupe?, 'gman.com'
118
- end
119
-
120
- should "know when a domain's a dupe" do
121
- assert @importer.send :dupe?, 'gov'
122
- end
123
-
124
- should "know when a domain's a subdomain of an existing domain" do
125
- assert @importer.send :dupe?, 'whitehouse.gov'
126
- end
127
-
128
- should 'allow unique domains' do
129
- assert @importer.send :ensure_not_dupe, 'gman.com'
130
- end
131
-
132
- should 'reject duplicate domains' do
133
- refute @importer.send :ensure_not_dupe, 'gov'
134
- end
135
-
136
- should 'reject subdomains' do
137
- refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
138
- end
139
- end
140
-
141
- context 'domain resolution' do
142
- should 'know if a domain resolves' do
143
- assert @importer.domain_resolves?('github.com')
144
- assert @importer.send :ensure_resolves, 'github.com'
145
- end
146
-
147
- should "know if a domain doesn't resolve" do
148
- refute @importer.domain_resolves?('foo.invalid')
149
- refute @importer.send :ensure_resolves, 'foo.invalid'
150
- end
151
-
152
- should 'know if a domain has an IP' do
153
- end
154
-
155
- should 'know if a domain returns a given record' do
156
- end
157
- end
158
-
159
- context 'regex checks' do
160
- should 'pass valid domains' do
161
- assert @importer.send :ensure_regex, 'example.com'
162
- end
163
-
164
- should 'reject domains that begin with home.' do
165
- refute @importer.send :ensure_regex, 'home.example.com'
166
- end
167
-
168
- should 'reject domains that begin with user.' do
169
- refute @importer.send :ensure_regex, 'user.example.com'
170
- end
171
-
172
- should 'reject domains that begin with site.' do
173
- refute @importer.send :ensure_regex, 'user.example.com'
174
- end
175
-
176
- should 'reject weebly domains' do
177
- refute @importer.send :ensure_regex, 'foo.weebly.com'
178
- end
179
-
180
- should 'reject wordpress domains' do
181
- refute @importer.send :ensure_regex, 'foo.wordpress.com'
182
- end
183
-
184
- should 'reject govoffice domains' do
185
- refute @importer.send :ensure_regex, 'foo.govoffice.com'
186
- refute @importer.send :ensure_regex, 'foo.govoffice1.com'
187
- end
188
-
189
- should 'reject homestead domains' do
190
- refute @importer.send :ensure_regex, 'foo.homestead.com'
191
- end
192
-
193
- should 'reject wix domains' do
194
- refute @importer.send :ensure_regex, 'foo.wix.com'
195
- end
196
-
197
- should 'reject blogspot domains' do
198
- refute @importer.send :ensure_regex, 'foo.blogspot.com'
199
- end
200
-
201
- should 'reject tripod domains' do
202
- refute @importer.send :ensure_regex, 'foo.tripod.com'
203
- end
204
-
205
- should 'reject squarespace domains' do
206
- refute @importer.send :ensure_regex, 'foo.squarespace.com'
207
- end
208
-
209
- should 'reject github.io domains' do
210
- refute @importer.send :ensure_regex, 'foo.github.io'
211
- end
212
-
213
- should 'reject locality domains' do
214
- refute @importer.send :ensure_regex, 'ci.champaign.il.us'
215
- end
216
- end
217
-
218
- context 'normalizing domains' do
219
- should 'normalize URLs to domains' do
220
- expected = 'example.com'
221
- assert_equal expected, @importer.normalize_domain('http://example.com')
222
- end
223
-
224
- should 'strip WWW' do
225
- assert_equal 'example.com', @importer.normalize_domain('www.example.com')
226
- end
227
-
228
- should 'remove trailing slashes' do
229
- assert_equal 'example.com', @importer.normalize_domain('example.com/')
230
- end
231
-
232
- should 'remove paths' do
233
- assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
234
- end
235
-
236
- should 'remove paths with trailing slashes' do
237
- assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
238
- end
239
-
240
- should 'downcase' do
241
- assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
242
- end
243
- end
244
- end