gman 6.0.1 → 7.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/CODEOWNERS +3 -0
- data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
- data/.github/config.yml +23 -0
- data/.github/funding.yml +1 -0
- data/.github/no-response.yml +15 -0
- data/.github/release-drafter.yml +4 -0
- data/.github/settings.yml +33 -0
- data/.github/stale.yml +29 -0
- data/.gitignore +1 -0
- data/.rspec +2 -0
- data/.rubocop.yml +17 -5
- data/.rubocop_todo.yml +84 -0
- data/.ruby-version +1 -1
- data/Gemfile +2 -0
- data/bin/gman +6 -4
- data/bin/gman_filter +5 -7
- data/config/domains.txt +8446 -173
- data/config/vendor/academic.txt +8038 -0
- data/config/vendor/dotgovs.csv +5786 -5560
- data/docs/CODE_OF_CONDUCT.md +46 -0
- data/docs/CONTRIBUTING.md +92 -0
- data/{README.md → docs/README.md} +3 -3
- data/docs/SECURITY.md +3 -0
- data/docs/_config.yml +2 -0
- data/gman.gemspec +18 -17
- data/lib/gman.rb +25 -21
- data/lib/gman/country_codes.rb +17 -17
- data/lib/gman/domain_list.rb +123 -41
- data/lib/gman/identifier.rb +59 -21
- data/lib/gman/importer.rb +39 -40
- data/lib/gman/locality.rb +23 -21
- data/lib/gman/version.rb +3 -1
- data/script/add +2 -0
- data/script/alphabetize +2 -0
- data/script/cibuild +1 -1
- data/script/dedupe +2 -1
- data/script/profile +2 -1
- data/script/prune +5 -3
- data/script/reconcile-us +6 -3
- data/script/vendor +1 -1
- data/script/vendor-federal-de +3 -3
- data/script/vendor-municipal-de +3 -3
- data/script/vendor-nl +4 -1
- data/script/vendor-public-suffix +7 -6
- data/script/vendor-se +3 -3
- data/script/vendor-swot +43 -0
- data/script/vendor-us +8 -5
- data/spec/fixtures/domains.txt +4 -0
- data/{test → spec}/fixtures/obama.txt +0 -0
- data/spec/gman/bin_spec.rb +101 -0
- data/spec/gman/country_code_spec.rb +39 -0
- data/spec/gman/domain_list_spec.rb +110 -0
- data/spec/gman/domains_spec.rb +25 -0
- data/spec/gman/identifier_spec.rb +218 -0
- data/spec/gman/importer_spec.rb +236 -0
- data/spec/gman/locality_spec.rb +24 -0
- data/spec/gman_spec.rb +74 -0
- data/spec/spec_helper.rb +31 -0
- metadata +86 -73
- data/CONTRIBUTING.md +0 -22
- data/Rakefile +0 -22
- data/test/fixtures/domains.txt +0 -2
- data/test/helper.rb +0 -40
- data/test/test_gman.rb +0 -62
- data/test/test_gman_bin.rb +0 -75
- data/test/test_gman_country_codes.rb +0 -18
- data/test/test_gman_domains.rb +0 -33
- data/test/test_gman_filter.rb +0 -17
- data/test/test_gman_identifier.rb +0 -106
- data/test/test_gman_importer.rb +0 -250
- data/test/test_gman_locality.rb +0 -10
data/lib/gman/identifier.rb
CHANGED
@@ -1,9 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class Gman
|
4
|
+
# Defines an instance method that delegates to a hash's key
|
5
|
+
#
|
6
|
+
# hash_method - a symbol representing the instance method to delegate to. The
|
7
|
+
# instance method should return a hash or respond to #[]
|
8
|
+
# key - the key to call within the hash
|
9
|
+
# method - (optional) the instance method the key should be aliased to.
|
10
|
+
# If not specified, defaults to the hash key
|
11
|
+
# default - (optional) value to return if value is nil (defaults to nil)
|
12
|
+
#
|
13
|
+
# Returns a symbol representing the instance method
|
14
|
+
def self.def_hash_delegator(hash_method, key, method = nil, default = nil)
|
15
|
+
method ||= key.to_s.downcase.sub(' ', '_')
|
16
|
+
define_method(method) do
|
17
|
+
hash = send(hash_method)
|
18
|
+
if hash.respond_to? :[]
|
19
|
+
hash[key.to_s] || default
|
20
|
+
else
|
21
|
+
default
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def_hash_delegator :dotgov_listing, :Agency
|
27
|
+
def_hash_delegator :dotgov_listing, :Organization
|
28
|
+
def_hash_delegator :dotgov_listing, :City
|
29
|
+
def_hash_delegator :dotgov_listing, :"Domain Type"
|
30
|
+
private :domain_type
|
31
|
+
|
2
32
|
def type
|
3
|
-
[
|
33
|
+
%i[state district cog city federal county].each do |type|
|
4
34
|
return type if send "#{type}?"
|
5
35
|
end
|
6
36
|
return if list_category.nil?
|
37
|
+
|
7
38
|
if list_category.include?('usagov')
|
8
39
|
:unknown
|
9
40
|
else
|
@@ -14,7 +45,7 @@ class Gman
|
|
14
45
|
def state
|
15
46
|
if matches
|
16
47
|
matches[4].upcase
|
17
|
-
elsif dotgov_listing
|
48
|
+
elsif dotgov_listing['State']
|
18
49
|
dotgov_listing['State']
|
19
50
|
elsif list_category
|
20
51
|
matches = list_category.match(/usagov([A-Z]{2})/)
|
@@ -22,27 +53,23 @@ class Gman
|
|
22
53
|
end
|
23
54
|
end
|
24
55
|
|
25
|
-
def city
|
26
|
-
dotgov_listing['City'] if dotgov_listing
|
27
|
-
end
|
28
|
-
|
29
|
-
def agency
|
30
|
-
dotgov_listing['Agency'] if federal?
|
31
|
-
end
|
32
|
-
|
33
56
|
def dotgov?
|
34
57
|
domain.tld == 'gov'
|
35
58
|
end
|
36
59
|
|
37
60
|
def federal?
|
38
|
-
|
61
|
+
return false unless dotgov_listing
|
62
|
+
|
63
|
+
domain_type =~ /^Federal Agency/i
|
39
64
|
end
|
40
65
|
|
41
66
|
def city?
|
42
67
|
if matches
|
43
|
-
%w
|
68
|
+
%w[ci town vil].include?(matches[3])
|
44
69
|
elsif dotgov_listing
|
45
|
-
|
70
|
+
domain_type == 'City'
|
71
|
+
else
|
72
|
+
false
|
46
73
|
end
|
47
74
|
end
|
48
75
|
|
@@ -50,7 +77,9 @@ class Gman
|
|
50
77
|
if matches
|
51
78
|
matches[3] == 'co'
|
52
79
|
elsif dotgov_listing
|
53
|
-
|
80
|
+
domain_type == 'County'
|
81
|
+
else
|
82
|
+
false
|
54
83
|
end
|
55
84
|
end
|
56
85
|
|
@@ -58,40 +87,49 @@ class Gman
|
|
58
87
|
if matches
|
59
88
|
matches[1] == 'state'
|
60
89
|
elsif dotgov_listing
|
61
|
-
|
90
|
+
domain_type == 'State/Local Govt'
|
91
|
+
else
|
92
|
+
false
|
62
93
|
end
|
63
94
|
end
|
64
95
|
|
65
96
|
def district?
|
66
|
-
|
97
|
+
return false unless matches
|
98
|
+
|
99
|
+
matches[1] == 'dst'
|
67
100
|
end
|
68
101
|
|
69
102
|
def cog?
|
70
|
-
|
103
|
+
return false unless matches
|
104
|
+
|
105
|
+
matches[1] == 'cog'
|
71
106
|
end
|
72
107
|
|
73
108
|
private
|
74
109
|
|
75
110
|
def list_category
|
76
111
|
@list_category ||= begin
|
77
|
-
match = Gman.list.find(domain.to_s)
|
112
|
+
match = Gman.list.public_suffix_list.find(domain.to_s)
|
78
113
|
return unless match
|
79
|
-
|
80
|
-
|
114
|
+
|
115
|
+
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
|
116
|
+
matches = Gman.list.contents.match(regex)
|
81
117
|
matches[1] if matches
|
82
118
|
end
|
83
119
|
end
|
84
120
|
|
85
121
|
def matches
|
86
122
|
return @matches if defined? @matches
|
123
|
+
|
87
124
|
@matches = domain.to_s.match(Locality::REGEX)
|
88
125
|
end
|
89
126
|
|
90
127
|
def dotgov_listing
|
91
128
|
return @dotgov_listing if defined? @dotgov_listing
|
92
129
|
return unless dotgov?
|
130
|
+
|
93
131
|
@dotgov_listing = Gman.dotgov_list.find do |listing|
|
94
|
-
listing['Domain Name'].casecmp("#{domain.sld}.gov")
|
132
|
+
listing['Domain Name'].casecmp("#{domain.sld}.gov").zero?
|
95
133
|
end
|
96
134
|
end
|
97
135
|
|
data/lib/gman/importer.rb
CHANGED
@@ -1,18 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Utility functions for parsing and manipulating public-suffix domain lists
|
2
4
|
# Only used in development and not loaded by default
|
3
5
|
require 'yaml'
|
4
6
|
require 'open-uri'
|
5
7
|
require 'resolv'
|
6
8
|
require 'logger'
|
9
|
+
require 'swot'
|
7
10
|
require_relative '../gman'
|
8
11
|
require_relative './domain_list'
|
9
12
|
|
10
13
|
class Gman
|
11
14
|
class Importer
|
12
|
-
attr_accessor :
|
15
|
+
attr_accessor :domain_list
|
13
16
|
|
14
17
|
# Known false positives from vendored lists
|
15
|
-
BLACKLIST = %w
|
18
|
+
BLACKLIST = %w[
|
16
19
|
business.centurytel.net
|
17
20
|
chesnee.net
|
18
21
|
citlink.net
|
@@ -38,31 +41,32 @@ class Gman
|
|
38
41
|
wctc.net
|
39
42
|
webconnections.net
|
40
43
|
webpages.charter.net
|
41
|
-
|
44
|
+
].freeze
|
42
45
|
|
43
46
|
REGEX_CHECKS = {
|
44
|
-
'home. regex'
|
45
|
-
'user. regex'
|
46
|
-
'sites. regex'
|
47
|
-
'weebly'
|
48
|
-
'wordpress'
|
49
|
-
'govoffice'
|
50
|
-
'homestead'
|
51
|
-
'wix.com'
|
52
|
-
'blogspot.com'
|
53
|
-
'tripod.com'
|
47
|
+
'home. regex' => /^home\./,
|
48
|
+
'user. regex' => /^users?\./,
|
49
|
+
'sites. regex' => /^sites?\./,
|
50
|
+
'weebly' => /weebly\.com$/,
|
51
|
+
'wordpress' => /wordpress\.com$/,
|
52
|
+
'govoffice' => /govoffice\d?\.com$/,
|
53
|
+
'homestead' => /homestead\.com$/,
|
54
|
+
'wix.com' => /wix\.com$/,
|
55
|
+
'blogspot.com' => /blogspot\.com$/,
|
56
|
+
'tripod.com' => /tripod\.com$/,
|
54
57
|
'squarespace.com' => /squarespace\.com$/,
|
55
|
-
'github.io'
|
56
|
-
'tumblr'
|
57
|
-
'locality'
|
58
|
+
'github.io' => /github\.io$/,
|
59
|
+
'tumblr' => /tumblr\.com$/,
|
60
|
+
'locality' => Gman::Locality::REGEX,
|
61
|
+
'french edu' => /^ac-.*?\.fr/
|
58
62
|
}.freeze
|
59
63
|
|
60
64
|
def initialize(domains)
|
61
|
-
@
|
65
|
+
@domain_list = DomainList.new(data: domains)
|
62
66
|
end
|
63
67
|
|
64
68
|
def logger
|
65
|
-
@logger ||= Logger.new(
|
69
|
+
@logger ||= Logger.new($stdout)
|
66
70
|
end
|
67
71
|
|
68
72
|
def normalize_domain(domain)
|
@@ -71,9 +75,10 @@ class Gman
|
|
71
75
|
end
|
72
76
|
|
73
77
|
def valid_domain?(domain, options = {})
|
74
|
-
return false unless ensure_valid(domain)
|
75
78
|
return false if !options[:skip_dupe] && !ensure_not_dupe(domain)
|
79
|
+
return false unless ensure_valid(domain)
|
76
80
|
return false if !options[:skip_resolve] && !ensure_resolves(domain)
|
81
|
+
|
77
82
|
true
|
78
83
|
end
|
79
84
|
|
@@ -81,6 +86,7 @@ class Gman
|
|
81
86
|
# rather than a bool and silence log output
|
82
87
|
def reject(domain, reason)
|
83
88
|
return reason if ENV['RECONCILING']
|
89
|
+
|
84
90
|
logger.info "👎 `#{domain}`: #{reason}"
|
85
91
|
false
|
86
92
|
end
|
@@ -89,30 +95,26 @@ class Gman
|
|
89
95
|
@current ||= DomainList.current
|
90
96
|
end
|
91
97
|
|
92
|
-
def import(options)
|
98
|
+
def import(options = {})
|
93
99
|
logger.info "Current: #{Gman::DomainList.current.count} domains"
|
94
|
-
logger.info "Adding: #{
|
100
|
+
logger.info "Adding: #{domain_list.count} domains"
|
95
101
|
|
96
102
|
normalize_domains!
|
97
103
|
ensure_validity!(options)
|
98
104
|
|
99
|
-
if domains.count == 0
|
100
|
-
logger.info 'Nothing to add. Aborting'
|
101
|
-
exit 0
|
102
|
-
end
|
103
|
-
|
104
105
|
add_to_current
|
105
106
|
logger.info "New: #{current.count} domains"
|
106
107
|
end
|
107
108
|
|
108
109
|
def resolver
|
109
|
-
@resolver ||= Resolv::DNS.new(nameserver: ['
|
110
|
+
@resolver ||= Resolv::DNS.new(nameserver: ['1.1.1.1', '8.8.8.8'])
|
110
111
|
end
|
111
112
|
|
112
113
|
# Verifies that the given domain has an MX record, and thus is valid
|
113
114
|
def domain_resolves?(domain)
|
114
115
|
domain = Addressable::URI.new(host: domain).normalize.host
|
115
116
|
return true if ip?(domain)
|
117
|
+
|
116
118
|
returns_record?(domain, 'NS') || returns_record?(domain, 'MX')
|
117
119
|
end
|
118
120
|
|
@@ -120,16 +122,17 @@ class Gman
|
|
120
122
|
|
121
123
|
def ensure_regex(domain)
|
122
124
|
REGEX_CHECKS.each do |msg, regex|
|
123
|
-
return reject(domain, msg) if domain
|
125
|
+
return reject(domain, msg) if domain&.match?(regex)
|
124
126
|
end
|
125
127
|
true
|
126
128
|
end
|
127
129
|
|
128
130
|
def ensure_valid(domain)
|
129
131
|
return false if domain.empty?
|
132
|
+
|
130
133
|
if BLACKLIST.include?(domain)
|
131
134
|
reject(domain, 'blacklist')
|
132
|
-
elsif !PublicSuffix.valid?(".#{domain}")
|
135
|
+
elsif !PublicSuffix.valid?("foo.#{domain}")
|
133
136
|
reject(domain, 'invalid')
|
134
137
|
elsif Swot.is_academic?(domain)
|
135
138
|
reject(domain, 'academic')
|
@@ -140,11 +143,13 @@ class Gman
|
|
140
143
|
|
141
144
|
def ensure_resolves(domain)
|
142
145
|
return reject(domain, 'unresolvable') unless domain_resolves?(domain)
|
146
|
+
|
143
147
|
true
|
144
148
|
end
|
145
149
|
|
146
150
|
def ensure_not_dupe(domain)
|
147
151
|
return true unless dupe?(domain)
|
152
|
+
|
148
153
|
if current.domains.include?(domain)
|
149
154
|
reject(domain, 'duplicate')
|
150
155
|
else
|
@@ -158,22 +163,22 @@ class Gman
|
|
158
163
|
end
|
159
164
|
|
160
165
|
def normalize_domains!
|
161
|
-
|
166
|
+
domain_list.to_h.each_value do |domains|
|
162
167
|
domains.map! { |domain| normalize_domain(domain) }
|
163
168
|
domains.uniq!
|
164
169
|
end
|
165
170
|
end
|
166
171
|
|
167
172
|
def ensure_validity!(options = {})
|
168
|
-
|
173
|
+
domain_list.data.each_value do |domains|
|
169
174
|
domains.select! { |domain| valid_domain?(domain, options) }
|
170
175
|
end
|
171
176
|
end
|
172
177
|
|
173
178
|
def add_to_current
|
174
|
-
|
175
|
-
current.
|
176
|
-
current.
|
179
|
+
domain_list.data.each do |group, domains|
|
180
|
+
current.data[group] ||= []
|
181
|
+
current.data[group].concat domains
|
177
182
|
end
|
178
183
|
current.write
|
179
184
|
end
|
@@ -192,9 +197,3 @@ class Gman
|
|
192
197
|
end
|
193
198
|
end
|
194
199
|
end
|
195
|
-
|
196
|
-
class Gman
|
197
|
-
def self.import(hash, options = {})
|
198
|
-
Gman::Importer.new(hash).import(options)
|
199
|
-
end
|
200
|
-
end
|
data/lib/gman/locality.rb
CHANGED
@@ -1,17 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class Gman
|
4
|
+
# Second level .us domains for states and locality
|
5
|
+
# See http://en.wikipedia.org/wiki/.us
|
6
|
+
#
|
7
|
+
# Examples:
|
8
|
+
# * foo.state.il.us
|
9
|
+
# * ci.foo.il.us
|
10
|
+
#
|
11
|
+
# Not:
|
12
|
+
# * state.foo.il.us
|
13
|
+
# * foo.ci.il.us
|
14
|
+
# * k12.il.us
|
15
|
+
# * ci.foo.zx.us
|
2
16
|
class Locality
|
3
|
-
AFFINITY_NAMESPACES = %w
|
17
|
+
AFFINITY_NAMESPACES = %w[state dst cog].freeze
|
4
18
|
|
5
|
-
STATES = %w
|
19
|
+
STATES = %w[
|
6
20
|
ak al ar az ca co ct dc de fl ga hi ia id il in ks ky
|
7
21
|
la ma md me mi mn mo ms mt nc nd ne nh nj nm nv ny oh
|
8
22
|
ok or pa ri sc sd tn tx um ut va vt wa wi wv wy
|
9
|
-
|
23
|
+
].freeze
|
10
24
|
|
11
|
-
LOCALITY_DOMAINS = %w
|
25
|
+
LOCALITY_DOMAINS = %w[
|
12
26
|
ci co borough boro city county
|
13
27
|
parish town twp vi vil village
|
14
|
-
|
28
|
+
].freeze
|
15
29
|
|
16
30
|
REGEX = /
|
17
31
|
(
|
@@ -19,22 +33,10 @@ class Gman
|
|
19
33
|
|
|
20
34
|
(#{Regexp.union(LOCALITY_DOMAINS)})\.[a-z-]+
|
21
35
|
)\.(#{Regexp.union(STATES)})\.us
|
22
|
-
/x
|
23
|
-
end
|
36
|
+
/x.freeze
|
24
37
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
# Examples:
|
29
|
-
# * foo.state.il.us
|
30
|
-
# * ci.foo.il.us
|
31
|
-
#
|
32
|
-
# Not:
|
33
|
-
# * state.foo.il.us
|
34
|
-
# * foo.ci.il.us
|
35
|
-
# * k12.il.us
|
36
|
-
# * ci.foo.zx.us
|
37
|
-
def locality?
|
38
|
-
!domain.to_s.match(Locality::REGEX).nil?
|
38
|
+
def self.valid?(domain)
|
39
|
+
!domain.to_s.match(Locality::REGEX).nil?
|
40
|
+
end
|
39
41
|
end
|
40
42
|
end
|
data/lib/gman/version.rb
CHANGED
data/script/add
CHANGED
data/script/alphabetize
CHANGED
data/script/cibuild
CHANGED
data/script/dedupe
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'yaml'
|
4
5
|
require 'open-uri'
|
@@ -12,7 +13,7 @@ puts "Current list contains #{current.count} domains..."
|
|
12
13
|
|
13
14
|
dupe = current.count - current.domains.uniq.count
|
14
15
|
puts "Found #{dupe} duplicate domains"
|
15
|
-
exit 0 if dupe
|
16
|
+
exit 0 if dupe.zero?
|
16
17
|
|
17
18
|
dupes = current.domains.select { |domain| current.domains.count(domain) > 1 }
|
18
19
|
|