gman 7.0.0 → 7.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
- data/.github/config.yml +23 -0
- data/.github/funding.yml +1 -0
- data/.github/no-response.yml +15 -0
- data/.github/release-drafter.yml +4 -0
- data/.github/settings.yml +33 -0
- data/.github/stale.yml +29 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +14 -5
- data/.rubocop_todo.yml +84 -0
- data/.ruby-version +1 -1
- data/Gemfile +2 -0
- data/bin/gman +6 -4
- data/bin/gman_filter +5 -7
- data/config/domains.txt +8454 -168
- data/config/vendor/academic.txt +6 -7
- data/config/vendor/dotgovs.csv +5786 -5560
- data/docs/CODE_OF_CONDUCT.md +46 -0
- data/docs/CONTRIBUTING.md +92 -0
- data/{README.md → docs/README.md} +3 -3
- data/docs/SECURITY.md +3 -0
- data/docs/_config.yml +2 -0
- data/gman.gemspec +18 -17
- data/lib/gman.rb +4 -2
- data/lib/gman/country_codes.rb +17 -17
- data/lib/gman/domain_list.rb +25 -9
- data/lib/gman/identifier.rb +57 -19
- data/lib/gman/importer.rb +31 -21
- data/lib/gman/locality.rb +8 -6
- data/lib/gman/version.rb +3 -1
- data/script/add +2 -0
- data/script/alphabetize +2 -0
- data/script/cibuild +1 -1
- data/script/dedupe +2 -1
- data/script/profile +2 -1
- data/script/prune +5 -3
- data/script/reconcile-us +6 -3
- data/script/vendor-federal-de +2 -1
- data/script/vendor-municipal-de +2 -1
- data/script/vendor-nl +2 -0
- data/script/vendor-public-suffix +6 -4
- data/script/vendor-se +2 -1
- data/script/vendor-swot +3 -1
- data/script/vendor-us +5 -3
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +101 -0
- data/spec/gman/country_code_spec.rb +39 -0
- data/spec/gman/domain_list_spec.rb +110 -0
- data/spec/gman/domains_spec.rb +25 -0
- data/spec/gman/identifier_spec.rb +218 -0
- data/spec/gman/importer_spec.rb +236 -0
- data/spec/gman/locality_spec.rb +24 -0
- data/spec/gman_spec.rb +74 -0
- data/spec/spec_helper.rb +31 -0
- metadata +89 -81
- data/.rake_tasks +0 -0
- data/CONTRIBUTING.md +0 -22
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -48
- data/test/test_gman.rb +0 -56
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domain_list.rb +0 -112
- data/test/test_gman_domains.rb +0 -32
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -244
- data/test/test_gman_locality.rb +0 -10
data/test/test_gman_importer.rb
DELETED
@@ -1,244 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGManImporter < Minitest::Test
|
4
|
-
def setup
|
5
|
-
@importer = Gman::Importer.new 'test' => ['example.com']
|
6
|
-
@stdout = StringIO.new
|
7
|
-
@importer.instance_variable_set '@logger', Logger.new(@stdout)
|
8
|
-
|
9
|
-
@original_domain_list = File.read(stubbed_list_path)
|
10
|
-
end
|
11
|
-
|
12
|
-
def teardown
|
13
|
-
File.write stubbed_list_path, @original_domain_list
|
14
|
-
end
|
15
|
-
|
16
|
-
should 'init the domain list' do
|
17
|
-
assert_equal Gman::DomainList, @importer.domain_list.class
|
18
|
-
assert_equal 1, @importer.domain_list.count
|
19
|
-
assert_equal 'example.com', @importer.domain_list.domains.first
|
20
|
-
end
|
21
|
-
|
22
|
-
should 'init the logger' do
|
23
|
-
assert_equal Logger, @importer.logger.class
|
24
|
-
end
|
25
|
-
|
26
|
-
should 'return the current domain list' do
|
27
|
-
assert_equal Gman::DomainList, @importer.current.class
|
28
|
-
end
|
29
|
-
|
30
|
-
should 'return the resolver' do
|
31
|
-
assert_equal Resolv::DNS, @importer.resolver.class
|
32
|
-
end
|
33
|
-
|
34
|
-
context 'domain rejection' do
|
35
|
-
should 'return false for a rejected domain' do
|
36
|
-
refute @importer.reject 'example.com', 'reasons'
|
37
|
-
end
|
38
|
-
|
39
|
-
should 'return the reason when asked' do
|
40
|
-
with_env 'RECONCILING', 'true' do
|
41
|
-
assert_equal 'reasons', @importer.reject('example.com', 'reasons')
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
context 'manipulating the domain list' do
|
47
|
-
should 'normalize domains within the domain list' do
|
48
|
-
importer = Gman::Importer.new 'test' => ['www.EXAMPLE.com/']
|
49
|
-
importer.send :normalize_domains!
|
50
|
-
assert_equal 'example.com', importer.domain_list.domains.first
|
51
|
-
end
|
52
|
-
|
53
|
-
should 'remove invalid domains from the domain list' do
|
54
|
-
importer = Gman::Importer.new 'test' => ['foo.github.io', 'example.com']
|
55
|
-
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
56
|
-
|
57
|
-
assert_equal 2, importer.domain_list.count
|
58
|
-
importer.send :ensure_validity!
|
59
|
-
assert_equal 1, importer.domain_list.count
|
60
|
-
end
|
61
|
-
|
62
|
-
context 'writing the domain list' do
|
63
|
-
should 'add domains to the current domain list' do
|
64
|
-
domains = { 'test' => ['example.com'], 'test2' => ['github.com'] }
|
65
|
-
importer = Gman::Importer.new domains
|
66
|
-
importer.instance_variable_set '@current', stubbed_list
|
67
|
-
importer.send :add_to_current
|
68
|
-
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
69
|
-
assert_equal expected, File.open(stubbed_list_path).read
|
70
|
-
end
|
71
|
-
|
72
|
-
should 'import' do
|
73
|
-
domains = {
|
74
|
-
'test' => ['www.example.com', 'goo.github.io'],
|
75
|
-
'test2' => ['github.com', 'www.github.com', 'whitehouse.gov']
|
76
|
-
}
|
77
|
-
|
78
|
-
importer = Gman::Importer.new domains
|
79
|
-
importer.instance_variable_set '@current', stubbed_list
|
80
|
-
importer.instance_variable_set '@logger', Logger.new(@stdout)
|
81
|
-
importer.import(skip_resolve: true)
|
82
|
-
|
83
|
-
expected = "// test\nexample.com\ngov\n\n// test2\ngithub.com"
|
84
|
-
assert_equal expected, File.open(stubbed_list_path).read
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
context 'domain validation' do
|
90
|
-
should 'allow valid domains' do
|
91
|
-
assert @importer.send :ensure_valid, 'whitehouse.gov'
|
92
|
-
end
|
93
|
-
|
94
|
-
should 'reject empty domains' do
|
95
|
-
refute @importer.send :ensure_valid, ''
|
96
|
-
end
|
97
|
-
|
98
|
-
should 'reject blacklisted domains' do
|
99
|
-
refute @importer.send :ensure_valid, 'egovlink.com'
|
100
|
-
end
|
101
|
-
|
102
|
-
should 'reject invalid domains' do
|
103
|
-
refute @importer.send :ensure_valid, 'foo.invalid'
|
104
|
-
end
|
105
|
-
|
106
|
-
should 'reject academic domains' do
|
107
|
-
refute @importer.send :ensure_valid, 'harvard.edu'
|
108
|
-
end
|
109
|
-
|
110
|
-
should "reject regex'd domains" do
|
111
|
-
refute @importer.send :ensure_valid, 'foo.github.io'
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
context 'duplicate domains' do
|
116
|
-
should 'know a unique domain is not a dupe' do
|
117
|
-
refute @importer.send :dupe?, 'gman.com'
|
118
|
-
end
|
119
|
-
|
120
|
-
should "know when a domain's a dupe" do
|
121
|
-
assert @importer.send :dupe?, 'gov'
|
122
|
-
end
|
123
|
-
|
124
|
-
should "know when a domain's a subdomain of an existing domain" do
|
125
|
-
assert @importer.send :dupe?, 'whitehouse.gov'
|
126
|
-
end
|
127
|
-
|
128
|
-
should 'allow unique domains' do
|
129
|
-
assert @importer.send :ensure_not_dupe, 'gman.com'
|
130
|
-
end
|
131
|
-
|
132
|
-
should 'reject duplicate domains' do
|
133
|
-
refute @importer.send :ensure_not_dupe, 'gov'
|
134
|
-
end
|
135
|
-
|
136
|
-
should 'reject subdomains' do
|
137
|
-
refute @importer.send :ensure_not_dupe, 'whitehouse.gov'
|
138
|
-
end
|
139
|
-
end
|
140
|
-
|
141
|
-
context 'domain resolution' do
|
142
|
-
should 'know if a domain resolves' do
|
143
|
-
assert @importer.domain_resolves?('github.com')
|
144
|
-
assert @importer.send :ensure_resolves, 'github.com'
|
145
|
-
end
|
146
|
-
|
147
|
-
should "know if a domain doesn't resolve" do
|
148
|
-
refute @importer.domain_resolves?('foo.invalid')
|
149
|
-
refute @importer.send :ensure_resolves, 'foo.invalid'
|
150
|
-
end
|
151
|
-
|
152
|
-
should 'know if a domain has an IP' do
|
153
|
-
end
|
154
|
-
|
155
|
-
should 'know if a domain returns a given record' do
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
context 'regex checks' do
|
160
|
-
should 'pass valid domains' do
|
161
|
-
assert @importer.send :ensure_regex, 'example.com'
|
162
|
-
end
|
163
|
-
|
164
|
-
should 'reject domains that begin with home.' do
|
165
|
-
refute @importer.send :ensure_regex, 'home.example.com'
|
166
|
-
end
|
167
|
-
|
168
|
-
should 'reject domains that begin with user.' do
|
169
|
-
refute @importer.send :ensure_regex, 'user.example.com'
|
170
|
-
end
|
171
|
-
|
172
|
-
should 'reject domains that begin with site.' do
|
173
|
-
refute @importer.send :ensure_regex, 'user.example.com'
|
174
|
-
end
|
175
|
-
|
176
|
-
should 'reject weebly domains' do
|
177
|
-
refute @importer.send :ensure_regex, 'foo.weebly.com'
|
178
|
-
end
|
179
|
-
|
180
|
-
should 'reject wordpress domains' do
|
181
|
-
refute @importer.send :ensure_regex, 'foo.wordpress.com'
|
182
|
-
end
|
183
|
-
|
184
|
-
should 'reject govoffice domains' do
|
185
|
-
refute @importer.send :ensure_regex, 'foo.govoffice.com'
|
186
|
-
refute @importer.send :ensure_regex, 'foo.govoffice1.com'
|
187
|
-
end
|
188
|
-
|
189
|
-
should 'reject homestead domains' do
|
190
|
-
refute @importer.send :ensure_regex, 'foo.homestead.com'
|
191
|
-
end
|
192
|
-
|
193
|
-
should 'reject wix domains' do
|
194
|
-
refute @importer.send :ensure_regex, 'foo.wix.com'
|
195
|
-
end
|
196
|
-
|
197
|
-
should 'reject blogspot domains' do
|
198
|
-
refute @importer.send :ensure_regex, 'foo.blogspot.com'
|
199
|
-
end
|
200
|
-
|
201
|
-
should 'reject tripod domains' do
|
202
|
-
refute @importer.send :ensure_regex, 'foo.tripod.com'
|
203
|
-
end
|
204
|
-
|
205
|
-
should 'reject squarespace domains' do
|
206
|
-
refute @importer.send :ensure_regex, 'foo.squarespace.com'
|
207
|
-
end
|
208
|
-
|
209
|
-
should 'reject github.io domains' do
|
210
|
-
refute @importer.send :ensure_regex, 'foo.github.io'
|
211
|
-
end
|
212
|
-
|
213
|
-
should 'reject locality domains' do
|
214
|
-
refute @importer.send :ensure_regex, 'ci.champaign.il.us'
|
215
|
-
end
|
216
|
-
end
|
217
|
-
|
218
|
-
context 'normalizing domains' do
|
219
|
-
should 'normalize URLs to domains' do
|
220
|
-
expected = 'example.com'
|
221
|
-
assert_equal expected, @importer.normalize_domain('http://example.com')
|
222
|
-
end
|
223
|
-
|
224
|
-
should 'strip WWW' do
|
225
|
-
assert_equal 'example.com', @importer.normalize_domain('www.example.com')
|
226
|
-
end
|
227
|
-
|
228
|
-
should 'remove trailing slashes' do
|
229
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/')
|
230
|
-
end
|
231
|
-
|
232
|
-
should 'remove paths' do
|
233
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/foo')
|
234
|
-
end
|
235
|
-
|
236
|
-
should 'remove paths with trailing slashes' do
|
237
|
-
assert_equal 'example.com', @importer.normalize_domain('example.com/foo/')
|
238
|
-
end
|
239
|
-
|
240
|
-
should 'downcase' do
|
241
|
-
assert_equal 'example.com', @importer.normalize_domain('EXAMPLE.com')
|
242
|
-
end
|
243
|
-
end
|
244
|
-
end
|
data/test/test_gman_locality.rb
DELETED
@@ -1,10 +0,0 @@
|
|
1
|
-
require File.join(File.dirname(__FILE__), 'helper')
|
2
|
-
|
3
|
-
class TestGmanLocality < Minitest::Test
|
4
|
-
should 'parse the alpha2' do
|
5
|
-
assert_equal 'us', Gman.new('whitehouse.gov').alpha2
|
6
|
-
assert_equal 'us', Gman.new('army.mil').alpha2
|
7
|
-
assert_equal 'gb', Gman.new('foo.gov.uk').alpha2
|
8
|
-
assert_equal 'ca', Gman.new('gov.ca').alpha2
|
9
|
-
end
|
10
|
-
end
|