gman 5.0.9 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +21 -0
- data/.ruby-version +1 -1
- data/Gemfile +1 -0
- data/README.md +16 -22
- data/Rakefile +3 -3
- data/bin/gman +10 -11
- data/bin/gman_filter +7 -7
- data/config/domains.txt +19 -19
- data/config/vendor/dotgovs.csv +398 -355
- data/gman.gemspec +34 -27
- data/lib/gman.rb +29 -23
- data/lib/gman/country_codes.rb +14 -15
- data/lib/gman/domain_list.rb +34 -25
- data/lib/gman/identifier.rb +39 -43
- data/lib/gman/importer.rb +111 -61
- data/lib/gman/locality.rb +22 -10
- data/lib/gman/version.rb +1 -1
- data/script/add +2 -2
- data/script/alphabetize +2 -2
- data/script/cibuild +2 -0
- data/script/dedupe +2 -2
- data/script/profile +5 -2
- data/script/prune +7 -7
- data/script/reconcile-us +26 -21
- data/script/vendor-federal-de +5 -5
- data/script/vendor-municipal-de +5 -5
- data/script/vendor-nl +12 -4
- data/script/vendor-public-suffix +8 -8
- data/script/vendor-se +8 -6
- data/script/vendor-us +7 -7
- data/test/fixtures/domains.txt +2 -0
- data/test/{obama.txt → fixtures/obama.txt} +0 -0
- data/test/helper.rb +19 -5
- data/test/test_gman.rb +43 -38
- data/test/test_gman_bin.rb +37 -43
- data/test/test_gman_country_codes.rb +10 -6
- data/test/test_gman_domains.rb +15 -10
- data/test/test_gman_filter.rb +5 -7
- data/test/test_gman_identifier.rb +36 -35
- data/test/test_gman_importer.rb +250 -0
- data/test/test_gman_locality.rb +5 -5
- metadata +28 -10
- data/lib/gman/sanctions.rb +0 -29
- data/test/test_gman_sanctions.rb +0 -20
data/gman.gemspec
CHANGED
@@ -1,36 +1,43 @@
|
|
1
|
-
require File.expand_path
|
1
|
+
require File.expand_path './lib/gman/version', File.dirname(__FILE__)
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
|
-
s.name =
|
5
|
-
s.summary =
|
6
|
-
|
4
|
+
s.name = 'gman'
|
5
|
+
s.summary = <<-EOF
|
6
|
+
Check if a given domain or email address belong to a governemnt entity
|
7
|
+
EOF
|
8
|
+
s.description = <<-EOF
|
9
|
+
A ruby gem to check if the owner of a given email address is working for
|
10
|
+
THE MAN.
|
11
|
+
EOF
|
7
12
|
s.version = Gman::VERSION
|
8
|
-
s.authors = [
|
9
|
-
s.email =
|
10
|
-
s.homepage =
|
11
|
-
s.licenses = [
|
13
|
+
s.authors = ['Ben Balter']
|
14
|
+
s.email = 'ben.balter@github.com'
|
15
|
+
s.homepage = 'https://github.com/benbalter/gman'
|
16
|
+
s.licenses = ['MIT']
|
12
17
|
|
13
|
-
s.files
|
14
|
-
s.test_files
|
15
|
-
s.executables
|
16
|
-
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map do |f|
|
21
|
+
File.basename(f)
|
22
|
+
end
|
23
|
+
s.require_paths = ['lib']
|
17
24
|
|
18
|
-
s.require_paths = [
|
25
|
+
s.require_paths = ['lib']
|
19
26
|
s.required_ruby_version = '~> 2.0'
|
20
27
|
|
21
|
-
s.add_dependency(
|
22
|
-
s.add_dependency(
|
23
|
-
s.add_dependency(
|
24
|
-
s.add_dependency(
|
25
|
-
|
26
|
-
s.add_development_dependency( "rake", "~> 10.4" )
|
27
|
-
s.add_development_dependency( "shoulda", "~> 3.5" )
|
28
|
-
s.add_development_dependency( "rdoc", "~> 4.2" )
|
29
|
-
s.add_development_dependency( "bundler", "~> 1.10" )
|
30
|
-
s.add_development_dependency( "pry", "~> 0.10" )
|
31
|
-
s.add_development_dependency( "parallel", "~> 1.6" )
|
32
|
-
s.add_development_dependency( "mechanize", "~> 2.7" )
|
33
|
-
s.add_development_dependency( "addressable", "~> 2.3" )
|
34
|
-
s.add_development_dependency( "ruby-prof", "~> 0.15" )
|
28
|
+
s.add_dependency('swot', '~> 1.0')
|
29
|
+
s.add_dependency('iso_country_codes', '~> 0.6')
|
30
|
+
s.add_dependency('naughty_or_nice', '~> 2.0')
|
31
|
+
s.add_dependency('colored', '~> 1.2')
|
35
32
|
|
33
|
+
s.add_development_dependency('rake', '~> 10.4')
|
34
|
+
s.add_development_dependency('shoulda', '~> 3.5')
|
35
|
+
s.add_development_dependency('rdoc', '~> 4.2')
|
36
|
+
s.add_development_dependency('bundler', '~> 1.10')
|
37
|
+
s.add_development_dependency('pry', '~> 0.10')
|
38
|
+
s.add_development_dependency('parallel', '~> 1.6')
|
39
|
+
s.add_development_dependency('mechanize', '~> 2.7')
|
40
|
+
s.add_development_dependency('addressable', '~> 2.3')
|
41
|
+
s.add_development_dependency('ruby-prof', '~> 0.15')
|
42
|
+
s.add_development_dependency('rubocop', '~> 0.37')
|
36
43
|
end
|
data/lib/gman.rb
CHANGED
@@ -6,30 +6,33 @@ require_relative 'gman/version'
|
|
6
6
|
require_relative 'gman/country_codes'
|
7
7
|
require_relative 'gman/locality'
|
8
8
|
require_relative 'gman/identifier'
|
9
|
-
require_relative 'gman/sanctions'
|
10
9
|
|
11
10
|
class Gman
|
12
|
-
|
13
11
|
include NaughtyOrNice
|
14
12
|
|
15
13
|
class << self
|
16
14
|
# returns an instance of our custom public suffix list
|
17
|
-
# list behaves like PublicSuffix::List
|
15
|
+
# list behaves like PublicSuffix::List
|
16
|
+
# but is limited to our whitelisted domains
|
18
17
|
def list
|
19
|
-
|
18
|
+
@list ||= PublicSuffix::List.parse(list_contents)
|
20
19
|
end
|
21
20
|
|
22
21
|
def config_path
|
23
|
-
File.
|
22
|
+
File.expand_path '../config', File.dirname(__FILE__)
|
24
23
|
end
|
25
24
|
|
26
25
|
# Returns the absolute path to the domain list
|
27
26
|
def list_path
|
28
|
-
|
27
|
+
if ENV['GMAN_STUB_DOMAINS']
|
28
|
+
File.expand_path '../test/fixtures/domains.txt', File.dirname(__FILE__)
|
29
|
+
else
|
30
|
+
File.expand_path 'domains.txt', config_path
|
31
|
+
end
|
29
32
|
end
|
30
33
|
|
31
34
|
def list_contents
|
32
|
-
|
35
|
+
@list_contents ||= File.new(list_path, 'r:utf-8').read
|
33
36
|
end
|
34
37
|
end
|
35
38
|
|
@@ -37,25 +40,28 @@ class Gman
|
|
37
40
|
#
|
38
41
|
# Returns boolean true if a government domain
|
39
42
|
def valid?
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
@valid ||= begin
|
44
|
+
return false unless valid_domain?
|
45
|
+
return false if academic?
|
46
|
+
locality? || public_suffix_valid?
|
47
|
+
end
|
48
|
+
end
|
45
49
|
|
46
|
-
|
47
|
-
return false if Swot::is_academic?(domain)
|
50
|
+
private
|
48
51
|
|
49
|
-
|
50
|
-
|
52
|
+
def valid_domain?
|
53
|
+
domain && domain.valid? && !academic?
|
54
|
+
end
|
51
55
|
|
52
|
-
|
53
|
-
|
56
|
+
def academic?
|
57
|
+
domain && Swot.is_academic?(domain)
|
58
|
+
end
|
54
59
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
+
# domain is on the domain list and
|
61
|
+
# domain is not explicitly blacklisted and
|
62
|
+
# domain matches a standard public suffix list rule
|
63
|
+
def public_suffix_valid?
|
64
|
+
rule = Gman.list.find(to_s)
|
65
|
+
!rule.nil? && rule.type != :exception && rule.allow?(".#{domain}")
|
60
66
|
end
|
61
67
|
end
|
data/lib/gman/country_codes.rb
CHANGED
@@ -1,21 +1,20 @@
|
|
1
1
|
class Gman
|
2
|
-
|
3
2
|
# Map last part of TLD to alpha2 country code
|
4
3
|
ALPHA2_MAP = {
|
5
|
-
:
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
}
|
4
|
+
ac: 'sh',
|
5
|
+
uk: 'gb',
|
6
|
+
su: 'ru',
|
7
|
+
tp: 'tl',
|
8
|
+
yu: 'rs',
|
9
|
+
gov: 'us',
|
10
|
+
mil: 'us',
|
11
|
+
org: 'us',
|
12
|
+
com: 'us',
|
13
|
+
net: 'us',
|
14
|
+
edu: 'us',
|
15
|
+
travel: 'us',
|
16
|
+
info: 'us'
|
17
|
+
}.freeze
|
19
18
|
|
20
19
|
# Returns the two character alpha county code represented by the domain
|
21
20
|
#
|
data/lib/gman/domain_list.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
class Gman
|
2
2
|
class DomainList
|
3
|
-
|
4
3
|
attr_accessor :list
|
5
|
-
|
4
|
+
alias to_h list
|
6
5
|
|
7
|
-
COMMENT_REGEX =
|
6
|
+
COMMENT_REGEX = %r{//[/\s]*(.*)$}i
|
8
7
|
|
9
8
|
def initialize(list)
|
10
|
-
@list = list.reject { |
|
9
|
+
@list = list.reject { |_group, domains| domains.compact.empty? }
|
11
10
|
end
|
12
11
|
|
13
12
|
def groups
|
@@ -15,7 +14,7 @@ class Gman
|
|
15
14
|
end
|
16
15
|
|
17
16
|
def domains
|
18
|
-
list.values.flatten.sort.uniq
|
17
|
+
list.values.flatten.compact.sort.uniq
|
19
18
|
end
|
20
19
|
|
21
20
|
def count
|
@@ -23,18 +22,18 @@ class Gman
|
|
23
22
|
end
|
24
23
|
|
25
24
|
def alphabetize
|
26
|
-
@list = @list.sort_by { |k,
|
27
|
-
@list.each { |
|
25
|
+
@list = @list.sort_by { |k, _v| k.downcase }.to_h
|
26
|
+
@list.each { |_group, domains| domains.sort!.uniq! }
|
28
27
|
end
|
29
28
|
|
30
29
|
def write
|
30
|
+
alphabetize
|
31
31
|
File.write(Gman.list_path, to_public_suffix)
|
32
32
|
end
|
33
33
|
|
34
34
|
def to_public_suffix
|
35
|
-
current_group =
|
36
|
-
|
37
|
-
list.sort_by { |group, domains| group.downcase }.each do |group, domains|
|
35
|
+
current_group = output = ''
|
36
|
+
list.sort_by { |group, _domains| group.downcase }.each do |group, domains|
|
38
37
|
if group != current_group
|
39
38
|
output << "\n\n" unless current_group.empty? # first entry
|
40
39
|
output << "// #{group}\n"
|
@@ -46,7 +45,7 @@ class Gman
|
|
46
45
|
end
|
47
46
|
|
48
47
|
def self.current
|
49
|
-
current = File.open(Gman
|
48
|
+
current = File.open(Gman.list_path).read
|
50
49
|
DomainList.from_public_suffix(current)
|
51
50
|
end
|
52
51
|
|
@@ -56,23 +55,33 @@ class Gman
|
|
56
55
|
DomainList.new(hash)
|
57
56
|
end
|
58
57
|
|
59
|
-
|
58
|
+
def parent_domain(domain)
|
59
|
+
domains.find { |c| domain =~ /\.#{Regexp.escape(c)}$/ }
|
60
|
+
end
|
61
|
+
|
62
|
+
class << self
|
63
|
+
private
|
60
64
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
domain_hash[group].push line.downcase
|
65
|
+
# Given an array of comments/domains in public suffix format
|
66
|
+
# Converts to a hash in the form of :group => [domain1, domain2...]
|
67
|
+
def array_to_hash(domains)
|
68
|
+
domain_hash = {}
|
69
|
+
group = ''
|
70
|
+
domains.each do |line|
|
71
|
+
if line =~ COMMENT_REGEX
|
72
|
+
group = COMMENT_REGEX.match(line)[1]
|
73
|
+
else
|
74
|
+
safe_push(domain_hash, group, line.downcase)
|
75
|
+
end
|
73
76
|
end
|
77
|
+
domain_hash
|
78
|
+
end
|
79
|
+
|
80
|
+
def safe_push(hash, key, value)
|
81
|
+
return if value.empty?
|
82
|
+
hash[key] ||= []
|
83
|
+
hash[key].push value
|
74
84
|
end
|
75
|
-
domain_hash
|
76
85
|
end
|
77
86
|
end
|
78
87
|
end
|
data/lib/gman/identifier.rb
CHANGED
@@ -1,21 +1,10 @@
|
|
1
1
|
class Gman
|
2
|
-
|
3
2
|
def type
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
:cog
|
10
|
-
elsif city?
|
11
|
-
:city
|
12
|
-
elsif federal?
|
13
|
-
:federal
|
14
|
-
elsif county?
|
15
|
-
:county
|
16
|
-
elsif list_category.nil?
|
17
|
-
nil
|
18
|
-
elsif list_category.include?("usagov")
|
3
|
+
[:state, :district, :cog, :city, :federal, :county].each do |type|
|
4
|
+
return type if send "#{type}?"
|
5
|
+
end
|
6
|
+
return if list_category.nil?
|
7
|
+
if list_category.include?('usagov')
|
19
8
|
:unknown
|
20
9
|
else
|
21
10
|
list_category.to_sym
|
@@ -26,7 +15,7 @@ class Gman
|
|
26
15
|
if matches
|
27
16
|
matches[4].upcase
|
28
17
|
elsif dotgov_listing
|
29
|
-
dotgov_listing[
|
18
|
+
dotgov_listing['State']
|
30
19
|
elsif list_category
|
31
20
|
matches = list_category.match(/usagov([A-Z]{2})/)
|
32
21
|
matches[1] if matches
|
@@ -34,80 +23,87 @@ class Gman
|
|
34
23
|
end
|
35
24
|
|
36
25
|
def city
|
37
|
-
dotgov_listing[
|
26
|
+
dotgov_listing['City'] if dotgov_listing
|
38
27
|
end
|
39
28
|
|
40
29
|
def agency
|
41
|
-
dotgov_listing[
|
30
|
+
dotgov_listing['Agency'] if federal?
|
42
31
|
end
|
43
32
|
|
44
33
|
def dotgov?
|
45
|
-
domain.tld ==
|
34
|
+
domain.tld == 'gov'
|
46
35
|
end
|
47
36
|
|
48
37
|
def federal?
|
49
|
-
dotgov_listing && dotgov_listing[
|
38
|
+
dotgov_listing && dotgov_listing['Domain Type'] == 'Federal Agency'
|
50
39
|
end
|
51
40
|
|
52
41
|
def city?
|
53
42
|
if matches
|
54
|
-
%w
|
43
|
+
%w(ci town vil).include?(matches[3])
|
55
44
|
elsif dotgov_listing
|
56
|
-
dotgov_listing[
|
45
|
+
dotgov_listing['Domain Type'] == 'City'
|
57
46
|
end
|
58
47
|
end
|
59
48
|
|
60
49
|
def county?
|
61
50
|
if matches
|
62
|
-
matches[3] ==
|
51
|
+
matches[3] == 'co'
|
63
52
|
elsif dotgov_listing
|
64
|
-
dotgov_listing[
|
53
|
+
dotgov_listing['Domain Type'] == 'County'
|
65
54
|
end
|
66
55
|
end
|
67
56
|
|
68
57
|
def state?
|
69
58
|
if matches
|
70
|
-
matches[1] ==
|
59
|
+
matches[1] == 'state'
|
71
60
|
elsif dotgov_listing
|
72
|
-
dotgov_listing[
|
61
|
+
dotgov_listing['Domain Type'] == 'State/Local Govt'
|
73
62
|
end
|
74
63
|
end
|
75
64
|
|
76
65
|
def district?
|
77
|
-
|
66
|
+
matches && matches[1] == 'dst'
|
78
67
|
end
|
79
68
|
|
80
69
|
def cog?
|
81
|
-
|
70
|
+
matches && matches[1] == 'cog'
|
82
71
|
end
|
83
72
|
|
84
73
|
private
|
85
74
|
|
86
75
|
def list_category
|
87
76
|
@list_category ||= begin
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
77
|
+
match = Gman.list.find(domain.to_s)
|
78
|
+
return unless match
|
79
|
+
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.name)}\n}im
|
80
|
+
matches = Gman.list_contents.match(regex)
|
81
|
+
matches[1] if matches
|
93
82
|
end
|
94
83
|
end
|
95
84
|
|
96
85
|
def matches
|
97
86
|
return @matches if defined? @matches
|
98
|
-
@matches = domain.to_s.match(
|
87
|
+
@matches = domain.to_s.match(Locality::REGEX)
|
99
88
|
end
|
100
89
|
|
101
|
-
def
|
102
|
-
|
90
|
+
def dotgov_listing
|
91
|
+
return @dotgov_listing if defined? @dotgov_listing
|
92
|
+
return unless dotgov?
|
93
|
+
@dotgov_listing = Gman.dotgov_list.find do |listing|
|
94
|
+
listing['Domain Name'].casecmp("#{domain.sld}.gov") == 0
|
95
|
+
end
|
103
96
|
end
|
104
97
|
|
105
|
-
|
106
|
-
|
107
|
-
|
98
|
+
class << self
|
99
|
+
def dotgov_list
|
100
|
+
@dotgov_list ||= CSV.read(dotgov_list_path, headers: true)
|
101
|
+
end
|
108
102
|
|
109
|
-
|
110
|
-
|
111
|
-
|
103
|
+
private
|
104
|
+
|
105
|
+
def dotgov_list_path
|
106
|
+
File.join Gman.config_path, 'vendor/dotgovs.csv'
|
107
|
+
end
|
112
108
|
end
|
113
109
|
end
|
data/lib/gman/importer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Utility functions for parsing and manipulating public-suffix
|
1
|
+
# Utility functions for parsing and manipulating public-suffix domain lists
|
2
2
|
# Only used in development and not loaded by default
|
3
3
|
require 'yaml'
|
4
4
|
require 'open-uri'
|
@@ -9,11 +9,10 @@ require_relative './domain_list'
|
|
9
9
|
|
10
10
|
class Gman
|
11
11
|
class Importer
|
12
|
-
|
13
12
|
attr_accessor :domains
|
14
13
|
|
15
14
|
# Known false positives from vendored lists
|
16
|
-
BLACKLIST = %w
|
15
|
+
BLACKLIST = %w(
|
17
16
|
business.centurytel.net
|
18
17
|
chesnee.net
|
19
18
|
citlink.net
|
@@ -39,7 +38,24 @@ class Gman
|
|
39
38
|
wctc.net
|
40
39
|
webconnections.net
|
41
40
|
webpages.charter.net
|
42
|
-
|
41
|
+
).freeze
|
42
|
+
|
43
|
+
REGEX_CHECKS = {
|
44
|
+
'home. regex' => /^home\./,
|
45
|
+
'user. regex' => /^users?\./,
|
46
|
+
'sites. regex' => /^sites?\./,
|
47
|
+
'weebly' => /weebly\.com$/,
|
48
|
+
'wordpress' => /wordpress\.com$/,
|
49
|
+
'govoffice' => /govoffice\d?\.com$/,
|
50
|
+
'homestead' => /homestead\.com$/,
|
51
|
+
'wix.com' => /wix\.com$/,
|
52
|
+
'blogspot.com' => /blogspot\.com$/,
|
53
|
+
'tripod.com' => /tripod\.com$/,
|
54
|
+
'squarespace.com' => /squarespace\.com$/,
|
55
|
+
'github.io' => /github\.io$/,
|
56
|
+
'tumblr' => /tumblr\.com$/,
|
57
|
+
'locality' => Gman::Locality::REGEX
|
58
|
+
}.freeze
|
43
59
|
|
44
60
|
def initialize(domains)
|
45
61
|
@domains = DomainList.new(domains)
|
@@ -50,40 +66,21 @@ class Gman
|
|
50
66
|
end
|
51
67
|
|
52
68
|
def normalize_domain(domain)
|
53
|
-
domain.
|
69
|
+
domain = Gman.new(domain).to_s
|
70
|
+
domain.to_s.downcase.strip.gsub(/^www./, '').gsub(%r{/$}, '')
|
54
71
|
end
|
55
72
|
|
56
|
-
def valid_domain?(domain, options={})
|
57
|
-
return false
|
58
|
-
return
|
59
|
-
return
|
60
|
-
return reject(domain, "sites. regex") if domain =~ /^sites?\./
|
61
|
-
return reject(domain, "weebly") if domain =~ /weebly\.com$/
|
62
|
-
return reject(domain, "wordpress") if domain =~ /wordpress\.com$/
|
63
|
-
return reject(domain, "govoffice") if domain =~ /govoffice\d?\.com$/
|
64
|
-
return reject(domain, "homestead") if domain =~ /homestead\.com$/
|
65
|
-
return reject(domain, "wix.com") if domain =~ /wix\.com$/
|
66
|
-
return reject(domain, "blogspot.com") if domain =~ /blogspot\.com$/
|
67
|
-
return reject(domain, "tripod.com") if domain =~ /tripod\.com$/
|
68
|
-
return reject(domain, "squarespace.com") if domain =~ /squarespace\.com$/
|
69
|
-
return reject(domain, "github.io") if domain =~ /github\.io$/
|
70
|
-
return reject(domain, "locality") if domain =~ Gman::LOCALITY_REGEX
|
71
|
-
return reject(domain, "blacklist") if BLACKLIST.include?(domain)
|
72
|
-
return reject(domain, "duplicate") if !options[:skip_dupe] && current.domains.include?(domain)
|
73
|
-
return reject(domain, "invalid") unless PublicSuffix.valid?(".#{domain}")
|
74
|
-
return reject(domain, "academic") if Swot::is_academic?(domain)
|
75
|
-
|
76
|
-
if !options[:skip_dupe] && subdomain = current.domains.find { |c| domain =~ /\.#{Regexp.escape(c)}$/}
|
77
|
-
return reject(domain, "subdomain of #{subdomain}")
|
78
|
-
end
|
79
|
-
|
80
|
-
return reject(domain, "unresolvable") if !options[:skip_resolve] && !domain_resolves?(domain)
|
73
|
+
def valid_domain?(domain, options = {})
|
74
|
+
return false unless ensure_valid(domain)
|
75
|
+
return false if !options[:skip_dupe] && !ensure_not_dupe(domain)
|
76
|
+
return false if !options[:skip_resolve] && !ensure_resolves(domain)
|
81
77
|
true
|
82
78
|
end
|
83
79
|
|
84
|
-
# if RECONCILING=true, return the reason,
|
80
|
+
# if RECONCILING=true, return the reason,
|
81
|
+
# rather than a bool and silence log output
|
85
82
|
def reject(domain, reason)
|
86
|
-
return reason if ENV[
|
83
|
+
return reason if ENV['RECONCILING']
|
87
84
|
logger.info "👎 `#{domain}`: #{reason}"
|
88
85
|
false
|
89
86
|
end
|
@@ -92,59 +89,112 @@ class Gman
|
|
92
89
|
@current ||= DomainList.current
|
93
90
|
end
|
94
91
|
|
95
|
-
def import(options
|
92
|
+
def import(options)
|
96
93
|
logger.info "Current: #{Gman::DomainList.current.count} domains"
|
97
94
|
logger.info "Adding: #{domains.count} domains"
|
98
95
|
|
99
|
-
|
100
|
-
|
101
|
-
domains.map! { |domain| normalize_domain(domain) }
|
102
|
-
domains.select! { |domain| valid_domain?(domain, options) }
|
103
|
-
end
|
104
|
-
|
105
|
-
logger.info "Filtered to: #{domains.count} domains"
|
96
|
+
normalize_domains!
|
97
|
+
ensure_validity!(options)
|
106
98
|
|
107
99
|
if domains.count == 0
|
108
|
-
logger.info
|
100
|
+
logger.info 'Nothing to add. Aborting'
|
109
101
|
exit 0
|
110
102
|
end
|
111
103
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
104
|
+
add_to_current
|
105
|
+
logger.info "New: #{current.count} domains"
|
106
|
+
end
|
107
|
+
|
108
|
+
def resolver
|
109
|
+
@resolver ||= Resolv::DNS.new(nameserver: ['8.8.8.8', '8.8.4.4'])
|
110
|
+
end
|
111
|
+
|
112
|
+
# Verifies that the given domain has an MX record, and thus is valid
|
113
|
+
def domain_resolves?(domain)
|
114
|
+
domain = Addressable::URI.new(host: domain).normalize.host
|
115
|
+
return true if ip?(domain)
|
116
|
+
returns_record?(domain, 'NS') || returns_record?(domain, 'MX')
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
def ensure_regex(domain)
|
122
|
+
REGEX_CHECKS.each do |msg, regex|
|
123
|
+
return reject(domain, msg) if domain =~ regex
|
117
124
|
end
|
125
|
+
true
|
126
|
+
end
|
118
127
|
|
119
|
-
|
128
|
+
def ensure_valid(domain)
|
129
|
+
return false if domain.empty?
|
130
|
+
if BLACKLIST.include?(domain)
|
131
|
+
reject(domain, 'blacklist')
|
132
|
+
elsif !PublicSuffix.valid?(".#{domain}")
|
133
|
+
reject(domain, 'invalid')
|
134
|
+
elsif Swot.is_academic?(domain)
|
135
|
+
reject(domain, 'academic')
|
136
|
+
else
|
137
|
+
ensure_regex(domain)
|
138
|
+
end
|
139
|
+
end
|
120
140
|
|
121
|
-
|
122
|
-
|
123
|
-
|
141
|
+
def ensure_resolves(domain)
|
142
|
+
return reject(domain, 'unresolvable') unless domain_resolves?(domain)
|
143
|
+
true
|
124
144
|
end
|
125
145
|
|
126
|
-
def
|
127
|
-
|
146
|
+
def ensure_not_dupe(domain)
|
147
|
+
return true unless dupe?(domain)
|
148
|
+
if current.domains.include?(domain)
|
149
|
+
reject(domain, 'duplicate')
|
150
|
+
else
|
151
|
+
parent = current.parent_domain(domain)
|
152
|
+
reject(domain, "subdomain of #{parent}")
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def dupe?(domain)
|
157
|
+
current.domains.include?(domain) || current.parent_domain(domain)
|
158
|
+
end
|
159
|
+
|
160
|
+
def normalize_domains!
|
161
|
+
domains.list.each do |_group, domains|
|
162
|
+
domains.map! { |domain| normalize_domain(domain) }
|
163
|
+
domains.uniq!
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def ensure_validity!(options = {})
|
168
|
+
domains.list.each do |_group, domains|
|
169
|
+
domains.select! { |domain| valid_domain?(domain, options) }
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def add_to_current
|
174
|
+
domains.list.each do |group, domains|
|
175
|
+
current.list[group] ||= []
|
176
|
+
current.list[group].concat domains
|
177
|
+
end
|
178
|
+
current.write
|
128
179
|
end
|
129
180
|
|
130
|
-
def
|
131
|
-
|
181
|
+
def ip?(domain)
|
182
|
+
resolver.getaddress(domain)
|
132
183
|
rescue Resolv::ResolvError
|
133
184
|
false
|
134
185
|
end
|
135
186
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
resolve_without_errors { resolver.getresource(domain, Resolv::DNS::Resource::IN::MX) }
|
187
|
+
def returns_record?(domain, type)
|
188
|
+
type = Object.const_get "Resolv::DNS::Resource::IN::#{type}"
|
189
|
+
resolver.getresource(domain, type)
|
190
|
+
rescue Resolv::ResolvError
|
191
|
+
false
|
142
192
|
end
|
143
193
|
end
|
144
194
|
end
|
145
195
|
|
146
196
|
class Gman
|
147
|
-
def self.import(hash, options={})
|
197
|
+
def self.import(hash, options = {})
|
148
198
|
Gman::Importer.new(hash).import(options)
|
149
199
|
end
|
150
200
|
end
|