gman 5.0.9 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +21 -0
- data/.ruby-version +1 -1
- data/Gemfile +1 -0
- data/README.md +16 -22
- data/Rakefile +3 -3
- data/bin/gman +10 -11
- data/bin/gman_filter +7 -7
- data/config/domains.txt +19 -19
- data/config/vendor/dotgovs.csv +398 -355
- data/gman.gemspec +34 -27
- data/lib/gman.rb +29 -23
- data/lib/gman/country_codes.rb +14 -15
- data/lib/gman/domain_list.rb +34 -25
- data/lib/gman/identifier.rb +39 -43
- data/lib/gman/importer.rb +111 -61
- data/lib/gman/locality.rb +22 -10
- data/lib/gman/version.rb +1 -1
- data/script/add +2 -2
- data/script/alphabetize +2 -2
- data/script/cibuild +2 -0
- data/script/dedupe +2 -2
- data/script/profile +5 -2
- data/script/prune +7 -7
- data/script/reconcile-us +26 -21
- data/script/vendor-federal-de +5 -5
- data/script/vendor-municipal-de +5 -5
- data/script/vendor-nl +12 -4
- data/script/vendor-public-suffix +8 -8
- data/script/vendor-se +8 -6
- data/script/vendor-us +7 -7
- data/test/fixtures/domains.txt +2 -0
- data/test/{obama.txt → fixtures/obama.txt} +0 -0
- data/test/helper.rb +19 -5
- data/test/test_gman.rb +43 -38
- data/test/test_gman_bin.rb +37 -43
- data/test/test_gman_country_codes.rb +10 -6
- data/test/test_gman_domains.rb +15 -10
- data/test/test_gman_filter.rb +5 -7
- data/test/test_gman_identifier.rb +36 -35
- data/test/test_gman_importer.rb +250 -0
- data/test/test_gman_locality.rb +5 -5
- metadata +28 -10
- data/lib/gman/sanctions.rb +0 -29
- data/test/test_gman_sanctions.rb +0 -20
data/gman.gemspec
CHANGED
@@ -1,36 +1,43 @@
|
|
1
|
-
require File.expand_path
|
1
|
+
require File.expand_path './lib/gman/version', File.dirname(__FILE__)
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
|
-
s.name =
|
5
|
-
s.summary =
|
6
|
-
|
4
|
+
s.name = 'gman'
|
5
|
+
s.summary = <<-EOF
|
6
|
+
Check if a given domain or email address belong to a governemnt entity
|
7
|
+
EOF
|
8
|
+
s.description = <<-EOF
|
9
|
+
A ruby gem to check if the owner of a given email address is working for
|
10
|
+
THE MAN.
|
11
|
+
EOF
|
7
12
|
s.version = Gman::VERSION
|
8
|
-
s.authors = [
|
9
|
-
s.email =
|
10
|
-
s.homepage =
|
11
|
-
s.licenses = [
|
13
|
+
s.authors = ['Ben Balter']
|
14
|
+
s.email = 'ben.balter@github.com'
|
15
|
+
s.homepage = 'https://github.com/benbalter/gman'
|
16
|
+
s.licenses = ['MIT']
|
12
17
|
|
13
|
-
s.files
|
14
|
-
s.test_files
|
15
|
-
s.executables
|
16
|
-
|
18
|
+
s.files = `git ls-files`.split("\n")
|
19
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map do |f|
|
21
|
+
File.basename(f)
|
22
|
+
end
|
23
|
+
s.require_paths = ['lib']
|
17
24
|
|
18
|
-
s.require_paths = [
|
25
|
+
s.require_paths = ['lib']
|
19
26
|
s.required_ruby_version = '~> 2.0'
|
20
27
|
|
21
|
-
s.add_dependency(
|
22
|
-
s.add_dependency(
|
23
|
-
s.add_dependency(
|
24
|
-
s.add_dependency(
|
25
|
-
|
26
|
-
s.add_development_dependency( "rake", "~> 10.4" )
|
27
|
-
s.add_development_dependency( "shoulda", "~> 3.5" )
|
28
|
-
s.add_development_dependency( "rdoc", "~> 4.2" )
|
29
|
-
s.add_development_dependency( "bundler", "~> 1.10" )
|
30
|
-
s.add_development_dependency( "pry", "~> 0.10" )
|
31
|
-
s.add_development_dependency( "parallel", "~> 1.6" )
|
32
|
-
s.add_development_dependency( "mechanize", "~> 2.7" )
|
33
|
-
s.add_development_dependency( "addressable", "~> 2.3" )
|
34
|
-
s.add_development_dependency( "ruby-prof", "~> 0.15" )
|
28
|
+
s.add_dependency('swot', '~> 1.0')
|
29
|
+
s.add_dependency('iso_country_codes', '~> 0.6')
|
30
|
+
s.add_dependency('naughty_or_nice', '~> 2.0')
|
31
|
+
s.add_dependency('colored', '~> 1.2')
|
35
32
|
|
33
|
+
s.add_development_dependency('rake', '~> 10.4')
|
34
|
+
s.add_development_dependency('shoulda', '~> 3.5')
|
35
|
+
s.add_development_dependency('rdoc', '~> 4.2')
|
36
|
+
s.add_development_dependency('bundler', '~> 1.10')
|
37
|
+
s.add_development_dependency('pry', '~> 0.10')
|
38
|
+
s.add_development_dependency('parallel', '~> 1.6')
|
39
|
+
s.add_development_dependency('mechanize', '~> 2.7')
|
40
|
+
s.add_development_dependency('addressable', '~> 2.3')
|
41
|
+
s.add_development_dependency('ruby-prof', '~> 0.15')
|
42
|
+
s.add_development_dependency('rubocop', '~> 0.37')
|
36
43
|
end
|
data/lib/gman.rb
CHANGED
@@ -6,30 +6,33 @@ require_relative 'gman/version'
|
|
6
6
|
require_relative 'gman/country_codes'
|
7
7
|
require_relative 'gman/locality'
|
8
8
|
require_relative 'gman/identifier'
|
9
|
-
require_relative 'gman/sanctions'
|
10
9
|
|
11
10
|
class Gman
|
12
|
-
|
13
11
|
include NaughtyOrNice
|
14
12
|
|
15
13
|
class << self
|
16
14
|
# returns an instance of our custom public suffix list
|
17
|
-
# list behaves like PublicSuffix::List
|
15
|
+
# list behaves like PublicSuffix::List
|
16
|
+
# but is limited to our whitelisted domains
|
18
17
|
def list
|
19
|
-
|
18
|
+
@list ||= PublicSuffix::List.parse(list_contents)
|
20
19
|
end
|
21
20
|
|
22
21
|
def config_path
|
23
|
-
File.
|
22
|
+
File.expand_path '../config', File.dirname(__FILE__)
|
24
23
|
end
|
25
24
|
|
26
25
|
# Returns the absolute path to the domain list
|
27
26
|
def list_path
|
28
|
-
|
27
|
+
if ENV['GMAN_STUB_DOMAINS']
|
28
|
+
File.expand_path '../test/fixtures/domains.txt', File.dirname(__FILE__)
|
29
|
+
else
|
30
|
+
File.expand_path 'domains.txt', config_path
|
31
|
+
end
|
29
32
|
end
|
30
33
|
|
31
34
|
def list_contents
|
32
|
-
|
35
|
+
@list_contents ||= File.new(list_path, 'r:utf-8').read
|
33
36
|
end
|
34
37
|
end
|
35
38
|
|
@@ -37,25 +40,28 @@ class Gman
|
|
37
40
|
#
|
38
41
|
# Returns boolean true if a government domain
|
39
42
|
def valid?
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
43
|
+
@valid ||= begin
|
44
|
+
return false unless valid_domain?
|
45
|
+
return false if academic?
|
46
|
+
locality? || public_suffix_valid?
|
47
|
+
end
|
48
|
+
end
|
45
49
|
|
46
|
-
|
47
|
-
return false if Swot::is_academic?(domain)
|
50
|
+
private
|
48
51
|
|
49
|
-
|
50
|
-
|
52
|
+
def valid_domain?
|
53
|
+
domain && domain.valid? && !academic?
|
54
|
+
end
|
51
55
|
|
52
|
-
|
53
|
-
|
56
|
+
def academic?
|
57
|
+
domain && Swot.is_academic?(domain)
|
58
|
+
end
|
54
59
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
+
# domain is on the domain list and
|
61
|
+
# domain is not explicitly blacklisted and
|
62
|
+
# domain matches a standard public suffix list rule
|
63
|
+
def public_suffix_valid?
|
64
|
+
rule = Gman.list.find(to_s)
|
65
|
+
!rule.nil? && rule.type != :exception && rule.allow?(".#{domain}")
|
60
66
|
end
|
61
67
|
end
|
data/lib/gman/country_codes.rb
CHANGED
@@ -1,21 +1,20 @@
|
|
1
1
|
class Gman
|
2
|
-
|
3
2
|
# Map last part of TLD to alpha2 country code
|
4
3
|
ALPHA2_MAP = {
|
5
|
-
:
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
:
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
17
|
-
:
|
18
|
-
}
|
4
|
+
ac: 'sh',
|
5
|
+
uk: 'gb',
|
6
|
+
su: 'ru',
|
7
|
+
tp: 'tl',
|
8
|
+
yu: 'rs',
|
9
|
+
gov: 'us',
|
10
|
+
mil: 'us',
|
11
|
+
org: 'us',
|
12
|
+
com: 'us',
|
13
|
+
net: 'us',
|
14
|
+
edu: 'us',
|
15
|
+
travel: 'us',
|
16
|
+
info: 'us'
|
17
|
+
}.freeze
|
19
18
|
|
20
19
|
# Returns the two character alpha county code represented by the domain
|
21
20
|
#
|
data/lib/gman/domain_list.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
class Gman
|
2
2
|
class DomainList
|
3
|
-
|
4
3
|
attr_accessor :list
|
5
|
-
|
4
|
+
alias to_h list
|
6
5
|
|
7
|
-
COMMENT_REGEX =
|
6
|
+
COMMENT_REGEX = %r{//[/\s]*(.*)$}i
|
8
7
|
|
9
8
|
def initialize(list)
|
10
|
-
@list = list.reject { |
|
9
|
+
@list = list.reject { |_group, domains| domains.compact.empty? }
|
11
10
|
end
|
12
11
|
|
13
12
|
def groups
|
@@ -15,7 +14,7 @@ class Gman
|
|
15
14
|
end
|
16
15
|
|
17
16
|
def domains
|
18
|
-
list.values.flatten.sort.uniq
|
17
|
+
list.values.flatten.compact.sort.uniq
|
19
18
|
end
|
20
19
|
|
21
20
|
def count
|
@@ -23,18 +22,18 @@ class Gman
|
|
23
22
|
end
|
24
23
|
|
25
24
|
def alphabetize
|
26
|
-
@list = @list.sort_by { |k,
|
27
|
-
@list.each { |
|
25
|
+
@list = @list.sort_by { |k, _v| k.downcase }.to_h
|
26
|
+
@list.each { |_group, domains| domains.sort!.uniq! }
|
28
27
|
end
|
29
28
|
|
30
29
|
def write
|
30
|
+
alphabetize
|
31
31
|
File.write(Gman.list_path, to_public_suffix)
|
32
32
|
end
|
33
33
|
|
34
34
|
def to_public_suffix
|
35
|
-
current_group =
|
36
|
-
|
37
|
-
list.sort_by { |group, domains| group.downcase }.each do |group, domains|
|
35
|
+
current_group = output = ''
|
36
|
+
list.sort_by { |group, _domains| group.downcase }.each do |group, domains|
|
38
37
|
if group != current_group
|
39
38
|
output << "\n\n" unless current_group.empty? # first entry
|
40
39
|
output << "// #{group}\n"
|
@@ -46,7 +45,7 @@ class Gman
|
|
46
45
|
end
|
47
46
|
|
48
47
|
def self.current
|
49
|
-
current = File.open(Gman
|
48
|
+
current = File.open(Gman.list_path).read
|
50
49
|
DomainList.from_public_suffix(current)
|
51
50
|
end
|
52
51
|
|
@@ -56,23 +55,33 @@ class Gman
|
|
56
55
|
DomainList.new(hash)
|
57
56
|
end
|
58
57
|
|
59
|
-
|
58
|
+
def parent_domain(domain)
|
59
|
+
domains.find { |c| domain =~ /\.#{Regexp.escape(c)}$/ }
|
60
|
+
end
|
61
|
+
|
62
|
+
class << self
|
63
|
+
private
|
60
64
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
domain_hash[group].push line.downcase
|
65
|
+
# Given an array of comments/domains in public suffix format
|
66
|
+
# Converts to a hash in the form of :group => [domain1, domain2...]
|
67
|
+
def array_to_hash(domains)
|
68
|
+
domain_hash = {}
|
69
|
+
group = ''
|
70
|
+
domains.each do |line|
|
71
|
+
if line =~ COMMENT_REGEX
|
72
|
+
group = COMMENT_REGEX.match(line)[1]
|
73
|
+
else
|
74
|
+
safe_push(domain_hash, group, line.downcase)
|
75
|
+
end
|
73
76
|
end
|
77
|
+
domain_hash
|
78
|
+
end
|
79
|
+
|
80
|
+
def safe_push(hash, key, value)
|
81
|
+
return if value.empty?
|
82
|
+
hash[key] ||= []
|
83
|
+
hash[key].push value
|
74
84
|
end
|
75
|
-
domain_hash
|
76
85
|
end
|
77
86
|
end
|
78
87
|
end
|
data/lib/gman/identifier.rb
CHANGED
@@ -1,21 +1,10 @@
|
|
1
1
|
class Gman
|
2
|
-
|
3
2
|
def type
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
:cog
|
10
|
-
elsif city?
|
11
|
-
:city
|
12
|
-
elsif federal?
|
13
|
-
:federal
|
14
|
-
elsif county?
|
15
|
-
:county
|
16
|
-
elsif list_category.nil?
|
17
|
-
nil
|
18
|
-
elsif list_category.include?("usagov")
|
3
|
+
[:state, :district, :cog, :city, :federal, :county].each do |type|
|
4
|
+
return type if send "#{type}?"
|
5
|
+
end
|
6
|
+
return if list_category.nil?
|
7
|
+
if list_category.include?('usagov')
|
19
8
|
:unknown
|
20
9
|
else
|
21
10
|
list_category.to_sym
|
@@ -26,7 +15,7 @@ class Gman
|
|
26
15
|
if matches
|
27
16
|
matches[4].upcase
|
28
17
|
elsif dotgov_listing
|
29
|
-
dotgov_listing[
|
18
|
+
dotgov_listing['State']
|
30
19
|
elsif list_category
|
31
20
|
matches = list_category.match(/usagov([A-Z]{2})/)
|
32
21
|
matches[1] if matches
|
@@ -34,80 +23,87 @@ class Gman
|
|
34
23
|
end
|
35
24
|
|
36
25
|
def city
|
37
|
-
dotgov_listing[
|
26
|
+
dotgov_listing['City'] if dotgov_listing
|
38
27
|
end
|
39
28
|
|
40
29
|
def agency
|
41
|
-
dotgov_listing[
|
30
|
+
dotgov_listing['Agency'] if federal?
|
42
31
|
end
|
43
32
|
|
44
33
|
def dotgov?
|
45
|
-
domain.tld ==
|
34
|
+
domain.tld == 'gov'
|
46
35
|
end
|
47
36
|
|
48
37
|
def federal?
|
49
|
-
dotgov_listing && dotgov_listing[
|
38
|
+
dotgov_listing && dotgov_listing['Domain Type'] == 'Federal Agency'
|
50
39
|
end
|
51
40
|
|
52
41
|
def city?
|
53
42
|
if matches
|
54
|
-
%w
|
43
|
+
%w(ci town vil).include?(matches[3])
|
55
44
|
elsif dotgov_listing
|
56
|
-
dotgov_listing[
|
45
|
+
dotgov_listing['Domain Type'] == 'City'
|
57
46
|
end
|
58
47
|
end
|
59
48
|
|
60
49
|
def county?
|
61
50
|
if matches
|
62
|
-
matches[3] ==
|
51
|
+
matches[3] == 'co'
|
63
52
|
elsif dotgov_listing
|
64
|
-
dotgov_listing[
|
53
|
+
dotgov_listing['Domain Type'] == 'County'
|
65
54
|
end
|
66
55
|
end
|
67
56
|
|
68
57
|
def state?
|
69
58
|
if matches
|
70
|
-
matches[1] ==
|
59
|
+
matches[1] == 'state'
|
71
60
|
elsif dotgov_listing
|
72
|
-
dotgov_listing[
|
61
|
+
dotgov_listing['Domain Type'] == 'State/Local Govt'
|
73
62
|
end
|
74
63
|
end
|
75
64
|
|
76
65
|
def district?
|
77
|
-
|
66
|
+
matches && matches[1] == 'dst'
|
78
67
|
end
|
79
68
|
|
80
69
|
def cog?
|
81
|
-
|
70
|
+
matches && matches[1] == 'cog'
|
82
71
|
end
|
83
72
|
|
84
73
|
private
|
85
74
|
|
86
75
|
def list_category
|
87
76
|
@list_category ||= begin
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
77
|
+
match = Gman.list.find(domain.to_s)
|
78
|
+
return unless match
|
79
|
+
regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.name)}\n}im
|
80
|
+
matches = Gman.list_contents.match(regex)
|
81
|
+
matches[1] if matches
|
93
82
|
end
|
94
83
|
end
|
95
84
|
|
96
85
|
def matches
|
97
86
|
return @matches if defined? @matches
|
98
|
-
@matches = domain.to_s.match(
|
87
|
+
@matches = domain.to_s.match(Locality::REGEX)
|
99
88
|
end
|
100
89
|
|
101
|
-
def
|
102
|
-
|
90
|
+
def dotgov_listing
|
91
|
+
return @dotgov_listing if defined? @dotgov_listing
|
92
|
+
return unless dotgov?
|
93
|
+
@dotgov_listing = Gman.dotgov_list.find do |listing|
|
94
|
+
listing['Domain Name'].casecmp("#{domain.sld}.gov") == 0
|
95
|
+
end
|
103
96
|
end
|
104
97
|
|
105
|
-
|
106
|
-
|
107
|
-
|
98
|
+
class << self
|
99
|
+
def dotgov_list
|
100
|
+
@dotgov_list ||= CSV.read(dotgov_list_path, headers: true)
|
101
|
+
end
|
108
102
|
|
109
|
-
|
110
|
-
|
111
|
-
|
103
|
+
private
|
104
|
+
|
105
|
+
def dotgov_list_path
|
106
|
+
File.join Gman.config_path, 'vendor/dotgovs.csv'
|
107
|
+
end
|
112
108
|
end
|
113
109
|
end
|
data/lib/gman/importer.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Utility functions for parsing and manipulating public-suffix
|
1
|
+
# Utility functions for parsing and manipulating public-suffix domain lists
|
2
2
|
# Only used in development and not loaded by default
|
3
3
|
require 'yaml'
|
4
4
|
require 'open-uri'
|
@@ -9,11 +9,10 @@ require_relative './domain_list'
|
|
9
9
|
|
10
10
|
class Gman
|
11
11
|
class Importer
|
12
|
-
|
13
12
|
attr_accessor :domains
|
14
13
|
|
15
14
|
# Known false positives from vendored lists
|
16
|
-
BLACKLIST = %w
|
15
|
+
BLACKLIST = %w(
|
17
16
|
business.centurytel.net
|
18
17
|
chesnee.net
|
19
18
|
citlink.net
|
@@ -39,7 +38,24 @@ class Gman
|
|
39
38
|
wctc.net
|
40
39
|
webconnections.net
|
41
40
|
webpages.charter.net
|
42
|
-
|
41
|
+
).freeze
|
42
|
+
|
43
|
+
REGEX_CHECKS = {
|
44
|
+
'home. regex' => /^home\./,
|
45
|
+
'user. regex' => /^users?\./,
|
46
|
+
'sites. regex' => /^sites?\./,
|
47
|
+
'weebly' => /weebly\.com$/,
|
48
|
+
'wordpress' => /wordpress\.com$/,
|
49
|
+
'govoffice' => /govoffice\d?\.com$/,
|
50
|
+
'homestead' => /homestead\.com$/,
|
51
|
+
'wix.com' => /wix\.com$/,
|
52
|
+
'blogspot.com' => /blogspot\.com$/,
|
53
|
+
'tripod.com' => /tripod\.com$/,
|
54
|
+
'squarespace.com' => /squarespace\.com$/,
|
55
|
+
'github.io' => /github\.io$/,
|
56
|
+
'tumblr' => /tumblr\.com$/,
|
57
|
+
'locality' => Gman::Locality::REGEX
|
58
|
+
}.freeze
|
43
59
|
|
44
60
|
def initialize(domains)
|
45
61
|
@domains = DomainList.new(domains)
|
@@ -50,40 +66,21 @@ class Gman
|
|
50
66
|
end
|
51
67
|
|
52
68
|
def normalize_domain(domain)
|
53
|
-
domain.
|
69
|
+
domain = Gman.new(domain).to_s
|
70
|
+
domain.to_s.downcase.strip.gsub(/^www./, '').gsub(%r{/$}, '')
|
54
71
|
end
|
55
72
|
|
56
|
-
def valid_domain?(domain, options={})
|
57
|
-
return false
|
58
|
-
return
|
59
|
-
return
|
60
|
-
return reject(domain, "sites. regex") if domain =~ /^sites?\./
|
61
|
-
return reject(domain, "weebly") if domain =~ /weebly\.com$/
|
62
|
-
return reject(domain, "wordpress") if domain =~ /wordpress\.com$/
|
63
|
-
return reject(domain, "govoffice") if domain =~ /govoffice\d?\.com$/
|
64
|
-
return reject(domain, "homestead") if domain =~ /homestead\.com$/
|
65
|
-
return reject(domain, "wix.com") if domain =~ /wix\.com$/
|
66
|
-
return reject(domain, "blogspot.com") if domain =~ /blogspot\.com$/
|
67
|
-
return reject(domain, "tripod.com") if domain =~ /tripod\.com$/
|
68
|
-
return reject(domain, "squarespace.com") if domain =~ /squarespace\.com$/
|
69
|
-
return reject(domain, "github.io") if domain =~ /github\.io$/
|
70
|
-
return reject(domain, "locality") if domain =~ Gman::LOCALITY_REGEX
|
71
|
-
return reject(domain, "blacklist") if BLACKLIST.include?(domain)
|
72
|
-
return reject(domain, "duplicate") if !options[:skip_dupe] && current.domains.include?(domain)
|
73
|
-
return reject(domain, "invalid") unless PublicSuffix.valid?(".#{domain}")
|
74
|
-
return reject(domain, "academic") if Swot::is_academic?(domain)
|
75
|
-
|
76
|
-
if !options[:skip_dupe] && subdomain = current.domains.find { |c| domain =~ /\.#{Regexp.escape(c)}$/}
|
77
|
-
return reject(domain, "subdomain of #{subdomain}")
|
78
|
-
end
|
79
|
-
|
80
|
-
return reject(domain, "unresolvable") if !options[:skip_resolve] && !domain_resolves?(domain)
|
73
|
+
def valid_domain?(domain, options = {})
|
74
|
+
return false unless ensure_valid(domain)
|
75
|
+
return false if !options[:skip_dupe] && !ensure_not_dupe(domain)
|
76
|
+
return false if !options[:skip_resolve] && !ensure_resolves(domain)
|
81
77
|
true
|
82
78
|
end
|
83
79
|
|
84
|
-
# if RECONCILING=true, return the reason,
|
80
|
+
# if RECONCILING=true, return the reason,
|
81
|
+
# rather than a bool and silence log output
|
85
82
|
def reject(domain, reason)
|
86
|
-
return reason if ENV[
|
83
|
+
return reason if ENV['RECONCILING']
|
87
84
|
logger.info "👎 `#{domain}`: #{reason}"
|
88
85
|
false
|
89
86
|
end
|
@@ -92,59 +89,112 @@ class Gman
|
|
92
89
|
@current ||= DomainList.current
|
93
90
|
end
|
94
91
|
|
95
|
-
def import(options
|
92
|
+
def import(options)
|
96
93
|
logger.info "Current: #{Gman::DomainList.current.count} domains"
|
97
94
|
logger.info "Adding: #{domains.count} domains"
|
98
95
|
|
99
|
-
|
100
|
-
|
101
|
-
domains.map! { |domain| normalize_domain(domain) }
|
102
|
-
domains.select! { |domain| valid_domain?(domain, options) }
|
103
|
-
end
|
104
|
-
|
105
|
-
logger.info "Filtered to: #{domains.count} domains"
|
96
|
+
normalize_domains!
|
97
|
+
ensure_validity!(options)
|
106
98
|
|
107
99
|
if domains.count == 0
|
108
|
-
logger.info
|
100
|
+
logger.info 'Nothing to add. Aborting'
|
109
101
|
exit 0
|
110
102
|
end
|
111
103
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
104
|
+
add_to_current
|
105
|
+
logger.info "New: #{current.count} domains"
|
106
|
+
end
|
107
|
+
|
108
|
+
def resolver
|
109
|
+
@resolver ||= Resolv::DNS.new(nameserver: ['8.8.8.8', '8.8.4.4'])
|
110
|
+
end
|
111
|
+
|
112
|
+
# Verifies that the given domain has an MX record, and thus is valid
|
113
|
+
def domain_resolves?(domain)
|
114
|
+
domain = Addressable::URI.new(host: domain).normalize.host
|
115
|
+
return true if ip?(domain)
|
116
|
+
returns_record?(domain, 'NS') || returns_record?(domain, 'MX')
|
117
|
+
end
|
118
|
+
|
119
|
+
private
|
120
|
+
|
121
|
+
def ensure_regex(domain)
|
122
|
+
REGEX_CHECKS.each do |msg, regex|
|
123
|
+
return reject(domain, msg) if domain =~ regex
|
117
124
|
end
|
125
|
+
true
|
126
|
+
end
|
118
127
|
|
119
|
-
|
128
|
+
def ensure_valid(domain)
|
129
|
+
return false if domain.empty?
|
130
|
+
if BLACKLIST.include?(domain)
|
131
|
+
reject(domain, 'blacklist')
|
132
|
+
elsif !PublicSuffix.valid?(".#{domain}")
|
133
|
+
reject(domain, 'invalid')
|
134
|
+
elsif Swot.is_academic?(domain)
|
135
|
+
reject(domain, 'academic')
|
136
|
+
else
|
137
|
+
ensure_regex(domain)
|
138
|
+
end
|
139
|
+
end
|
120
140
|
|
121
|
-
|
122
|
-
|
123
|
-
|
141
|
+
def ensure_resolves(domain)
|
142
|
+
return reject(domain, 'unresolvable') unless domain_resolves?(domain)
|
143
|
+
true
|
124
144
|
end
|
125
145
|
|
126
|
-
def
|
127
|
-
|
146
|
+
def ensure_not_dupe(domain)
|
147
|
+
return true unless dupe?(domain)
|
148
|
+
if current.domains.include?(domain)
|
149
|
+
reject(domain, 'duplicate')
|
150
|
+
else
|
151
|
+
parent = current.parent_domain(domain)
|
152
|
+
reject(domain, "subdomain of #{parent}")
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def dupe?(domain)
|
157
|
+
current.domains.include?(domain) || current.parent_domain(domain)
|
158
|
+
end
|
159
|
+
|
160
|
+
def normalize_domains!
|
161
|
+
domains.list.each do |_group, domains|
|
162
|
+
domains.map! { |domain| normalize_domain(domain) }
|
163
|
+
domains.uniq!
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
def ensure_validity!(options = {})
|
168
|
+
domains.list.each do |_group, domains|
|
169
|
+
domains.select! { |domain| valid_domain?(domain, options) }
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def add_to_current
|
174
|
+
domains.list.each do |group, domains|
|
175
|
+
current.list[group] ||= []
|
176
|
+
current.list[group].concat domains
|
177
|
+
end
|
178
|
+
current.write
|
128
179
|
end
|
129
180
|
|
130
|
-
def
|
131
|
-
|
181
|
+
def ip?(domain)
|
182
|
+
resolver.getaddress(domain)
|
132
183
|
rescue Resolv::ResolvError
|
133
184
|
false
|
134
185
|
end
|
135
186
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
resolve_without_errors { resolver.getresource(domain, Resolv::DNS::Resource::IN::MX) }
|
187
|
+
def returns_record?(domain, type)
|
188
|
+
type = Object.const_get "Resolv::DNS::Resource::IN::#{type}"
|
189
|
+
resolver.getresource(domain, type)
|
190
|
+
rescue Resolv::ResolvError
|
191
|
+
false
|
142
192
|
end
|
143
193
|
end
|
144
194
|
end
|
145
195
|
|
146
196
|
class Gman
|
147
|
-
def self.import(hash, options={})
|
197
|
+
def self.import(hash, options = {})
|
148
198
|
Gman::Importer.new(hash).import(options)
|
149
199
|
end
|
150
200
|
end
|