gman 7.0.0 → 7.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
- data/.github/config.yml +23 -0
- data/.github/funding.yml +1 -0
- data/.github/no-response.yml +15 -0
- data/.github/release-drafter.yml +4 -0
- data/.github/settings.yml +33 -0
- data/.github/stale.yml +29 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +14 -5
- data/.rubocop_todo.yml +84 -0
- data/.ruby-version +1 -1
- data/Gemfile +2 -0
- data/bin/gman +6 -4
- data/bin/gman_filter +5 -7
- data/config/domains.txt +8454 -168
- data/config/vendor/academic.txt +6 -7
- data/config/vendor/dotgovs.csv +5786 -5560
- data/docs/CODE_OF_CONDUCT.md +46 -0
- data/docs/CONTRIBUTING.md +92 -0
- data/{README.md → docs/README.md} +3 -3
- data/docs/SECURITY.md +3 -0
- data/docs/_config.yml +2 -0
- data/gman.gemspec +18 -17
- data/lib/gman.rb +4 -2
- data/lib/gman/country_codes.rb +17 -17
- data/lib/gman/domain_list.rb +25 -9
- data/lib/gman/identifier.rb +57 -19
- data/lib/gman/importer.rb +31 -21
- data/lib/gman/locality.rb +8 -6
- data/lib/gman/version.rb +3 -1
- data/script/add +2 -0
- data/script/alphabetize +2 -0
- data/script/cibuild +1 -1
- data/script/dedupe +2 -1
- data/script/profile +2 -1
- data/script/prune +5 -3
- data/script/reconcile-us +6 -3
- data/script/vendor-federal-de +2 -1
- data/script/vendor-municipal-de +2 -1
- data/script/vendor-nl +2 -0
- data/script/vendor-public-suffix +6 -4
- data/script/vendor-se +2 -1
- data/script/vendor-swot +3 -1
- data/script/vendor-us +5 -3
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +101 -0
- data/spec/gman/country_code_spec.rb +39 -0
- data/spec/gman/domain_list_spec.rb +110 -0
- data/spec/gman/domains_spec.rb +25 -0
- data/spec/gman/identifier_spec.rb +218 -0
- data/spec/gman/importer_spec.rb +236 -0
- data/spec/gman/locality_spec.rb +24 -0
- data/spec/gman_spec.rb +74 -0
- data/spec/spec_helper.rb +31 -0
- metadata +89 -81
- data/.rake_tasks +0 -0
- data/CONTRIBUTING.md +0 -22
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -48
- data/test/test_gman.rb +0 -56
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domain_list.rb +0 -112
- data/test/test_gman_domains.rb +0 -32
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -244
- data/test/test_gman_locality.rb +0 -10
data/test/test_gman_importer.rb
DELETED
@@ -1,244 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGManImporter < Minitest::Test
|
4
|
-
def setup
|
5
|
-
@importer = Gman::Importer.new 'test' => ['example.com']
|
6
|
-
@stdout = StringIO.new
|
7
|
-
@importer.instance_variable_set '@logger', Logger.new(@stdout)
|
8
|
-
|
9
|
-
@original_domain_list = File.read(stubbed_list_path)
|
10
|
-
end
|
11
|
-
|
12
|
-
def teardown
|
13
|
-
File.write stubbed_list_path, @original_domain_list
|
14
|
-
end
|
15
|
-
|
16
|
-
should 'init the domain list' do
|
17
|
-
assert_equal Gman::DomainList, @importer.domain_list.class
|
18
|
-
assert_equal 1, @importer.domain_list.count
|
19
|
-
assert_equal 'example.com', @importer.domain_list.domains.first
|
20
|
-
end
|
21
|
-
|
22
|
-
should 'init the logger' do
|
23
|
-
assert_equal Logger, @importer.logger.class
|
24
|
-
end
|
25
|
-
|
26
|
-
should 'return the current domain list' do
|
27
|
-
assert_equal Gman::DomainList, @importer.current.class
|
28
|
-
end
|
29
|
-
|
30
|
-
should 'return the resolver' do
|
31
|
-
assert_equal Resolv::DNS, @importer.resolver.class
|
32
|
-
end
|
33
|
-
|
34
|
-
context 'domain rejection' do
|
35
|
-
should 'return false for a rejected domain' do
|
36
|
-
refute @importer.reject 'example.com', 'reasons'
|
37
|
-
end
|
38
|
-
|
39
|
-
should 'return the reason when asked' do
|
40
|
-
with_env 'RECONCILING', 'true' do
|
41
|
-
assert_equal 'reasons', @importer.reject('example.com', 'reasons')
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
context 'manipulating the domain list' do
|
47
|
-
should 'normalize domains within the domain list' do
|
48
|
-
importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
|
49
|
-
importer.send :normalize_domains!
|
50
|
-
assert_equal 'example.com', importer.domain_list.domains.first
|
51
|
-
end
|
52
|
-
|
53
|
-
should 'remove invalid domains from the domain list' do
|
54
|
-
importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
|
55
|
-
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
56
|
-
|
57
|
-
assert_equal 2, importer.domain_list.count
|
58
|
-
importer.send :ensure_validity!
|
59
|
-
assert_equal 1, importer.domain_list.count
|
60
|
-
end
|
61
|
-
|
62
|
-
context 'writing the domain list' do
|
63
|
-
should 'add domains to the current domain list' do
|
64
|
-
domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
|
65
|
-
importer = Gman::Importer.new domains
|
66
|
-
importer.instance_variable_set '@current', stubbed_list
|
67
|
-
importer.send :add_to_current
|
68
|
-
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
69
|
-
assert_equal expected, File.open(stubbed_list_path).read
|
70
|
-
end
|
71
|
-
|
72
|
-
should 'import' do
|
73
|
-
domains = {
|
74
|
-
'test' => ['www.example.com', 'goo.github.io'],
|
75
|
-
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
76
|
-
}
|
77
|
-
|
78
|
-
importer = Gman::Importer.new domains
|
79
|
-
importer.instance_variable_set '@current', stubbed_list
|
80
|
-
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
81
|
-
importer.import(skip_resolve: true)
|
82
|
-
|
83
|
-
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
84
|
-
assert_equal expected, File.open(stubbed_list_path).read
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
context 'domain validation' do
|
90
|
-
should 'allow valid domains' do
|
91
|
-
assert @importer.send :ensure_valid, 'whitehouse.gov'
|
92
|
-
end
|
93
|
-
|
94
|
-
should 'reject empty domains' do
|
95
|
-
refute @importer.send :ensure_valid, ''
|
96
|
-
end
|
97
|
-
|
98
|
-
should 'reject blacklisted domains' do
|
99
|
-
refute @importer.send :ensure_valid, 'egovlink.com'
|
100
|
-
end
|
101
|
-
|
102
|
-
should 'reject invalid domains' do
|
103
|
-
refute @importer.send :ensure_valid, 'foo.invalid'
|
104
|
-
end
|
105
|
-
|
106
|
-
should 'reject academic domains' do
|
107
|
-
refute @importer.send :ensure_valid, 'harvard.edu'
|
108
|
-
end
|
109
|
-
|
110
|
-
should "reject regex'd domains" do
|
111
|
-
refute @importer.send :ensure_valid, 'foo.github.io'
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
context 'duplicate domains' do
|
116
|
-
should 'know a unique domain is not a dupe' do
|
117
|
-
refute @importer.send :dupe?, 'gman.com'
|
118
|
-
end
|
119
|
-
|
120
|
-
should "know when a domain's a dupe" do
|
121
|
-
assert @importer.send :dupe?, 'gov'
|
122
|
-
end
|
123
|
-
|
124
|
-
should "know when a domain's a subdomain of an existing domain" do
|
125
|
-
assert @importer.send :dupe?, 'whitehouse.gov'
|
126
|
-
end
|
127
|
-
|
128
|
-
should 'allow unique domains' do
|
129
|
-
assert @importer.send :ensure_not_dupe, 'gman.com'
|
130
|
-
end
|
131
|
-
|
132
|
-
should 'reject duplicate domains' do
|
133
|
-
refute @importer.send :ensure_not_dupe, 'gov'
|
134
|
-
end
|
135
|
-
|
136
|
-
should 'reject subdomains' do
|
137
|
-
refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
context 'domain resolution' do
|
142
|
-
should 'know if a domain resolves' do
|
143
|
-
assert @importer.domain_resolves?('github.com')
|
144
|
-
assert @importer.send :ensure_resolves, 'github.com'
|
145
|
-
end
|
146
|
-
|
147
|
-
should "know if a domain doesn't resolve" do
|
148
|
-
refute @importer.domain_resolves?('foo.invalid')
|
149
|
-
refute @importer.send :ensure_resolves, 'foo.invalid'
|
150
|
-
end
|
151
|
-
|
152
|
-
should 'know if a domain has an IP' do
|
153
|
-
end
|
154
|
-
|
155
|
-
should 'know if a domain returns a given record' do
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
context 'regex checks' do
|
160
|
-
should 'pass valid domains' do
|
161
|
-
assert @importer.send :ensure_regex, 'example.com'
|
162
|
-
end
|
163
|
-
|
164
|
-
should 'reject domains that begin with home.' do
|
165
|
-
refute @importer.send :ensure_regex, 'home.example.com'
|
166
|
-
end
|
167
|
-
|
168
|
-
should 'reject domains that begin with user.' do
|
169
|
-
refute @importer.send :ensure_regex, 'user.example.com'
|
170
|
-
end
|
171
|
-
|
172
|
-
should 'reject domains that begin with site.' do
|
173
|
-
refute @importer.send :ensure_regex, 'user.example.com'
|
174
|
-
end
|
175
|
-
|
176
|
-
should 'reject weebly domains' do
|
177
|
-
refute @importer.send :ensure_regex, 'foo.weebly.com'
|
178
|
-
end
|
179
|
-
|
180
|
-
should 'reject wordpress domains' do
|
181
|
-
refute @importer.send :ensure_regex, 'foo.wordpress.com'
|
182
|
-
end
|
183
|
-
|
184
|
-
should 'reject govoffice domains' do
|
185
|
-
refute @importer.send :ensure_regex, 'foo.govoffice.com'
|
186
|
-
refute @importer.send :ensure_regex, 'foo.govoffice1.com'
|
187
|
-
end
|
188
|
-
|
189
|
-
should 'reject homestead domains' do
|
190
|
-
refute @importer.send :ensure_regex, 'foo.homestead.com'
|
191
|
-
end
|
192
|
-
|
193
|
-
should 'reject wix domains' do
|
194
|
-
refute @importer.send :ensure_regex, 'foo.wix.com'
|
195
|
-
end
|
196
|
-
|
197
|
-
should 'reject blogspot domains' do
|
198
|
-
refute @importer.send :ensure_regex, 'foo.blogspot.com'
|
199
|
-
end
|
200
|
-
|
201
|
-
should 'reject tripod domains' do
|
202
|
-
refute @importer.send :ensure_regex, 'foo.tripod.com'
|
203
|
-
end
|
204
|
-
|
205
|
-
should 'reject squarespace domains' do
|
206
|
-
refute @importer.send :ensure_regex, 'foo.squarespace.com'
|
207
|
-
end
|
208
|
-
|
209
|
-
should 'reject github.io domains' do
|
210
|
-
refute @importer.send :ensure_regex, 'foo.github.io'
|
211
|
-
end
|
212
|
-
|
213
|
-
should 'reject locality domains' do
|
214
|
-
refute @importer.send :ensure_regex, 'ci.champaign.il.us'
|
215
|
-
end
|
216
|
-
end
|
217
|
-
|
218
|
-
context 'normalizing domains' do
|
219
|
-
should 'normalize URLs to domains' do
|
220
|
-
expected = 'example.com'
|
221
|
-
assert_equal expected, @importer.normalize_domain('http://example.com')
|
222
|
-
end
|
223
|
-
|
224
|
-
should 'strip WWW' do
|
225
|
-
assert_equal 'example.com', @importer.normalize_domain('www.example.com')
|
226
|
-
end
|
227
|
-
|
228
|
-
should 'remove trailing slashes' do
|
229
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/')
|
230
|
-
end
|
231
|
-
|
232
|
-
should 'remove paths' do
|
233
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
|
234
|
-
end
|
235
|
-
|
236
|
-
should 'remove paths with trailing slashes' do
|
237
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
|
238
|
-
end
|
239
|
-
|
240
|
-
should 'downcase' do
|
241
|
-
assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
|
242
|
-
end
|
243
|
-
end
|
244
|
-
end
|
data/test/test_gman_locality.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanLocality < Minitest::Test
|
4
|
-
should 'parse the alpha2' do
|
5
|
-
assert_equal 'us', Gman.new('whitehouse.gov').alpha2
|
6
|
-
assert_equal 'us', Gman.new('army.mil').alpha2
|
7
|
-
assert_equal 'gb', Gman.new('foo.gov.uk').alpha2
|
8
|
-
assert_equal 'ca', Gman.new('gov.ca').alpha2
|
9
|
-
end
|
10
|
-
end
|