gman 6.0.1 → 7.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +17 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8446 -173
  20. data/config/vendor/academic.txt +8038 -0
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +25 -21
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +123 -41
  31. data/lib/gman/identifier.rb +59 -21
  32. data/lib/gman/importer.rb +39 -40
  33. data/lib/gman/locality.rb +23 -21
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor +1 -1
  43. data/script/vendor-federal-de +3 -3
  44. data/script/vendor-municipal-de +3 -3
  45. data/script/vendor-nl +4 -1
  46. data/script/vendor-public-suffix +7 -6
  47. data/script/vendor-se +3 -3
  48. data/script/vendor-swot +43 -0
  49. data/script/vendor-us +8 -5
  50. data/spec/fixtures/domains.txt +4 -0
  51. data/{test → spec}/fixtures/obama.txt +0 -0
  52. data/spec/gman/bin_spec.rb +101 -0
  53. data/spec/gman/country_code_spec.rb +39 -0
  54. data/spec/gman/domain_list_spec.rb +110 -0
  55. data/spec/gman/domains_spec.rb +25 -0
  56. data/spec/gman/identifier_spec.rb +218 -0
  57. data/spec/gman/importer_spec.rb +236 -0
  58. data/spec/gman/locality_spec.rb +24 -0
  59. data/spec/gman_spec.rb +74 -0
  60. data/spec/spec_helper.rb +31 -0
  61. metadata +86 -73
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -40
  66. data/test/test_gman.rb +0 -62
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domains.rb +0 -33
  70. data/test/test_gman_filter.rb +0 -17
  71. data/test/test_gman_identifier.rb +0 -106
  72. data/test/test_gman_importer.rb +0 -250
  73. data/test/test_gman_locality.rb +0 -10
@@ -1,9 +1,40 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Gman
4
+ # Defines an instance method that delegates to a hash's key
5
+ #
6
+ # hash_method - a symbol representing the instance method to delegate to. The
7
+ # instance method should return a hash or respond to #[]
8
+ # key - the key to call within the hash
9
+ # method - (optional) the instance method the key should be aliased to.
10
+ # If not specified, defaults to the hash key
11
+ # default - (optional) value to return if value is nil (defaults to nil)
12
+ #
13
+ # Returns a symbol representing the instance method
14
+ def self.def_hash_delegator(hash_method, key, method = nil, default = nil)
15
+ method ||= key.to_s.downcase.sub(' ', '_')
16
+ define_method(method) do
17
+ hash = send(hash_method)
18
+ if hash.respond_to? :[]
19
+ hash[key.to_s] || default
20
+ else
21
+ default
22
+ end
23
+ end
24
+ end
25
+
26
+ def_hash_delegator :dotgov_listing, :Agency
27
+ def_hash_delegator :dotgov_listing, :Organization
28
+ def_hash_delegator :dotgov_listing, :City
29
+ def_hash_delegator :dotgov_listing, :"Domain Type"
30
+ private :domain_type
31
+
2
32
  def type
3
- [:state, :district, :cog, :city, :federal, :county].each do |type|
33
+ %i[state district cog city federal county].each do |type|
4
34
  return type if send "#{type}?"
5
35
  end
6
36
  return if list_category.nil?
37
+
7
38
  if list_category.include?('usagov')
8
39
  :unknown
9
40
  else
@@ -14,7 +45,7 @@ class Gman
14
45
  def state
15
46
  if matches
16
47
  matches[4].upcase
17
- elsif dotgov_listing
48
+ elsif dotgov_listing['State']
18
49
  dotgov_listing['State']
19
50
  elsif list_category
20
51
  matches = list_category.match(/usagov([A-Z]{2})/)
@@ -22,27 +53,23 @@ class Gman
22
53
  end
23
54
  end
24
55
 
25
- def city
26
- dotgov_listing['City'] if dotgov_listing
27
- end
28
-
29
- def agency
30
- dotgov_listing['Agency'] if federal?
31
- end
32
-
33
56
  def dotgov?
34
57
  domain.tld == 'gov'
35
58
  end
36
59
 
37
60
  def federal?
38
- dotgov_listing && dotgov_listing['Domain Type'] == 'Federal Agency'
61
+ return false unless dotgov_listing
62
+
63
+ domain_type =~ /^Federal Agency/i
39
64
  end
40
65
 
41
66
  def city?
42
67
  if matches
43
- %w(ci town vil).include?(matches[3])
68
+ %w[ci town vil].include?(matches[3])
44
69
  elsif dotgov_listing
45
- dotgov_listing['Domain Type'] == 'City'
70
+ domain_type == 'City'
71
+ else
72
+ false
46
73
  end
47
74
  end
48
75
 
@@ -50,7 +77,9 @@ class Gman
50
77
  if matches
51
78
  matches[3] == 'co'
52
79
  elsif dotgov_listing
53
- dotgov_listing['Domain Type'] == 'County'
80
+ domain_type == 'County'
81
+ else
82
+ false
54
83
  end
55
84
  end
56
85
 
@@ -58,40 +87,49 @@ class Gman
58
87
  if matches
59
88
  matches[1] == 'state'
60
89
  elsif dotgov_listing
61
- dotgov_listing['Domain Type'] == 'State/Local Govt'
90
+ domain_type == 'State/Local Govt'
91
+ else
92
+ false
62
93
  end
63
94
  end
64
95
 
65
96
  def district?
66
- matches && matches[1] == 'dst'
97
+ return false unless matches
98
+
99
+ matches[1] == 'dst'
67
100
  end
68
101
 
69
102
  def cog?
70
- matches && matches[1] == 'cog'
103
+ return false unless matches
104
+
105
+ matches[1] == 'cog'
71
106
  end
72
107
 
73
108
  private
74
109
 
75
110
  def list_category
76
111
  @list_category ||= begin
77
- match = Gman.list.find(domain.to_s)
112
+ match = Gman.list.public_suffix_list.find(domain.to_s)
78
113
  return unless match
79
- regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.name)}\n}im
80
- matches = Gman.list_contents.match(regex)
114
+
115
+ regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
116
+ matches = Gman.list.contents.match(regex)
81
117
  matches[1] if matches
82
118
  end
83
119
  end
84
120
 
85
121
  def matches
86
122
  return @matches if defined? @matches
123
+
87
124
  @matches = domain.to_s.match(Locality::REGEX)
88
125
  end
89
126
 
90
127
  def dotgov_listing
91
128
  return @dotgov_listing if defined? @dotgov_listing
92
129
  return unless dotgov?
130
+
93
131
  @dotgov_listing = Gman.dotgov_list.find do |listing|
94
- listing['Domain Name'].casecmp("#{domain.sld}.gov") == 0
132
+ listing['Domain Name'].casecmp("#{domain.sld}.gov").zero?
95
133
  end
96
134
  end
97
135
 
@@ -1,18 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Utility functions for parsing and manipulating public-suffix domain lists
2
4
  # Only used in development and not loaded by default
3
5
  require 'yaml'
4
6
  require 'open-uri'
5
7
  require 'resolv'
6
8
  require 'logger'
9
+ require 'swot'
7
10
  require_relative '../gman'
8
11
  require_relative './domain_list'
9
12
 
10
13
  class Gman
11
14
  class Importer
12
- attr_accessor :domains
15
+ attr_accessor :domain_list
13
16
 
14
17
  # Known false positives from vendored lists
15
- BLACKLIST = %w(
18
+ BLACKLIST = %w[
16
19
  business.centurytel.net
17
20
  chesnee.net
18
21
  citlink.net
@@ -38,31 +41,32 @@ class Gman
38
41
  wctc.net
39
42
  webconnections.net
40
43
  webpages.charter.net
41
- ).freeze
44
+ ].freeze
42
45
 
43
46
  REGEX_CHECKS = {
44
- 'home. regex' => /^home\./,
45
- 'user. regex' => /^users?\./,
46
- 'sites. regex' => /^sites?\./,
47
- 'weebly' => /weebly\.com$/,
48
- 'wordpress' => /wordpress\.com$/,
49
- 'govoffice' => /govoffice\d?\.com$/,
50
- 'homestead' => /homestead\.com$/,
51
- 'wix.com' => /wix\.com$/,
52
- 'blogspot.com' => /blogspot\.com$/,
53
- 'tripod.com' => /tripod\.com$/,
47
+ 'home. regex' => /^home\./,
48
+ 'user. regex' => /^users?\./,
49
+ 'sites. regex' => /^sites?\./,
50
+ 'weebly' => /weebly\.com$/,
51
+ 'wordpress' => /wordpress\.com$/,
52
+ 'govoffice' => /govoffice\d?\.com$/,
53
+ 'homestead' => /homestead\.com$/,
54
+ 'wix.com' => /wix\.com$/,
55
+ 'blogspot.com' => /blogspot\.com$/,
56
+ 'tripod.com' => /tripod\.com$/,
54
57
  'squarespace.com' => /squarespace\.com$/,
55
- 'github.io' => /github\.io$/,
56
- 'tumblr' => /tumblr\.com$/,
57
- 'locality' => Gman::Locality::REGEX
58
+ 'github.io' => /github\.io$/,
59
+ 'tumblr' => /tumblr\.com$/,
60
+ 'locality' => Gman::Locality::REGEX,
61
+ 'french edu' => /^ac-.*?\.fr/
58
62
  }.freeze
59
63
 
60
64
  def initialize(domains)
61
- @domains = DomainList.new(domains)
65
+ @domain_list = DomainList.new(data: domains)
62
66
  end
63
67
 
64
68
  def logger
65
- @logger ||= Logger.new(STDOUT)
69
+ @logger ||= Logger.new($stdout)
66
70
  end
67
71
 
68
72
  def normalize_domain(domain)
@@ -71,9 +75,10 @@ class Gman
71
75
  end
72
76
 
73
77
  def valid_domain?(domain, options = {})
74
- return false unless ensure_valid(domain)
75
78
  return false if !options[:skip_dupe] && !ensure_not_dupe(domain)
79
+ return false unless ensure_valid(domain)
76
80
  return false if !options[:skip_resolve] && !ensure_resolves(domain)
81
+
77
82
  true
78
83
  end
79
84
 
@@ -81,6 +86,7 @@ class Gman
81
86
  # rather than a bool and silence log output
82
87
  def reject(domain, reason)
83
88
  return reason if ENV['RECONCILING']
89
+
84
90
  logger.info "👎 `#{domain}`: #{reason}"
85
91
  false
86
92
  end
@@ -89,30 +95,26 @@ class Gman
89
95
  @current ||= DomainList.current
90
96
  end
91
97
 
92
- def import(options)
98
+ def import(options = {})
93
99
  logger.info "Current: #{Gman::DomainList.current.count} domains"
94
- logger.info "Adding: #{domains.count} domains"
100
+ logger.info "Adding: #{domain_list.count} domains"
95
101
 
96
102
  normalize_domains!
97
103
  ensure_validity!(options)
98
104
 
99
- if domains.count == 0
100
- logger.info 'Nothing to add. Aborting'
101
- exit 0
102
- end
103
-
104
105
  add_to_current
105
106
  logger.info "New: #{current.count} domains"
106
107
  end
107
108
 
108
109
  def resolver
109
- @resolver ||= Resolv::DNS.new(nameserver: ['8.8.8.8', '8.8.4.4'])
110
+ @resolver ||= Resolv::DNS.new(nameserver: ['1.1.1.1', '8.8.8.8'])
110
111
  end
111
112
 
112
113
  # Verifies that the given domain has an MX record, and thus is valid
113
114
  def domain_resolves?(domain)
114
115
  domain = Addressable::URI.new(host: domain).normalize.host
115
116
  return true if ip?(domain)
117
+
116
118
  returns_record?(domain, 'NS') || returns_record?(domain, 'MX')
117
119
  end
118
120
 
@@ -120,16 +122,17 @@ class Gman
120
122
 
121
123
  def ensure_regex(domain)
122
124
  REGEX_CHECKS.each do |msg, regex|
123
- return reject(domain, msg) if domain =~ regex
125
+ return reject(domain, msg) if domain&.match?(regex)
124
126
  end
125
127
  true
126
128
  end
127
129
 
128
130
  def ensure_valid(domain)
129
131
  return false if domain.empty?
132
+
130
133
  if BLACKLIST.include?(domain)
131
134
  reject(domain, 'blacklist')
132
- elsif !PublicSuffix.valid?(".#{domain}")
135
+ elsif !PublicSuffix.valid?("foo.#{domain}")
133
136
  reject(domain, 'invalid')
134
137
  elsif Swot.is_academic?(domain)
135
138
  reject(domain, 'academic')
@@ -140,11 +143,13 @@ class Gman
140
143
 
141
144
  def ensure_resolves(domain)
142
145
  return reject(domain, 'unresolvable') unless domain_resolves?(domain)
146
+
143
147
  true
144
148
  end
145
149
 
146
150
  def ensure_not_dupe(domain)
147
151
  return true unless dupe?(domain)
152
+
148
153
  if current.domains.include?(domain)
149
154
  reject(domain, 'duplicate')
150
155
  else
@@ -158,22 +163,22 @@ class Gman
158
163
  end
159
164
 
160
165
  def normalize_domains!
161
- domains.list.each do |_group, domains|
166
+ domain_list.to_h.each_value do |domains|
162
167
  domains.map! { |domain| normalize_domain(domain) }
163
168
  domains.uniq!
164
169
  end
165
170
  end
166
171
 
167
172
  def ensure_validity!(options = {})
168
- domains.list.each do |_group, domains|
173
+ domain_list.data.each_value do |domains|
169
174
  domains.select! { |domain| valid_domain?(domain, options) }
170
175
  end
171
176
  end
172
177
 
173
178
  def add_to_current
174
- domains.list.each do |group, domains|
175
- current.list[group] ||= []
176
- current.list[group].concat domains
179
+ domain_list.data.each do |group, domains|
180
+ current.data[group] ||= []
181
+ current.data[group].concat domains
177
182
  end
178
183
  current.write
179
184
  end
@@ -192,9 +197,3 @@ class Gman
192
197
  end
193
198
  end
194
199
  end
195
-
196
- class Gman
197
- def self.import(hash, options = {})
198
- Gman::Importer.new(hash).import(options)
199
- end
200
- end
@@ -1,17 +1,31 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Gman
4
+ # Second level .us domains for states and locality
5
+ # See http://en.wikipedia.org/wiki/.us
6
+ #
7
+ # Examples:
8
+ # * foo.state.il.us
9
+ # * ci.foo.il.us
10
+ #
11
+ # Not:
12
+ # * state.foo.il.us
13
+ # * foo.ci.il.us
14
+ # * k12.il.us
15
+ # * ci.foo.zx.us
2
16
  class Locality
3
- AFFINITY_NAMESPACES = %w(state dst cog).freeze
17
+ AFFINITY_NAMESPACES = %w[state dst cog].freeze
4
18
 
5
- STATES = %w(
19
+ STATES = %w[
6
20
  ak al ar az ca co ct dc de fl ga hi ia id il in ks ky
7
21
  la ma md me mi mn mo ms mt nc nd ne nh nj nm nv ny oh
8
22
  ok or pa ri sc sd tn tx um ut va vt wa wi wv wy
9
- ).freeze
23
+ ].freeze
10
24
 
11
- LOCALITY_DOMAINS = %w(
25
+ LOCALITY_DOMAINS = %w[
12
26
  ci co borough boro city county
13
27
  parish town twp vi vil village
14
- ).freeze
28
+ ].freeze
15
29
 
16
30
  REGEX = /
17
31
  (
@@ -19,22 +33,10 @@ class Gman
19
33
  |
20
34
  (#{Regexp.union(LOCALITY_DOMAINS)})\.[a-z-]+
21
35
  )\.(#{Regexp.union(STATES)})\.us
22
- /x
23
- end
36
+ /x.freeze
24
37
 
25
- # Second level .us domains for states and locality
26
- # See http://en.wikipedia.org/wiki/.us
27
- #
28
- # Examples:
29
- # * foo.state.il.us
30
- # * ci.foo.il.us
31
- #
32
- # Not:
33
- # * state.foo.il.us
34
- # * foo.ci.il.us
35
- # * k12.il.us
36
- # * ci.foo.zx.us
37
- def locality?
38
- !domain.to_s.match(Locality::REGEX).nil?
38
+ def self.valid?(domain)
39
+ !domain.to_s.match(Locality::REGEX).nil?
40
+ end
39
41
  end
40
42
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Gman
2
- VERSION = '6.0.1'.freeze
4
+ VERSION = '7.0.4'
3
5
  end
data/script/add CHANGED
@@ -1,4 +1,6 @@
1
1
  #! /usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  #
3
5
  # Add one or more domains to a given group, running the standard import checks
4
6
  #
@@ -1,4 +1,6 @@
1
1
  #! /usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  #
3
5
  # Alphabetizes entries in the domains.txt file
4
6
  #
@@ -2,7 +2,7 @@
2
2
 
3
3
  set -ex
4
4
 
5
- bundle exec rake test
5
+ bundle exec rspec
6
6
  bundle exec rubocop -D -S -a
7
7
  bundle exec script/dedupe
8
8
  bundle exec gem build gman.gemspec
@@ -1,4 +1,5 @@
1
1
  #! /usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'yaml'
4
5
  require 'open-uri'
@@ -12,7 +13,7 @@ puts "Current list contains #{current.count} domains..."
12
13
 
13
14
  dupe = current.count - current.domains.uniq.count
14
15
  puts "Found #{dupe} duplicate domains"
15
- exit 0 if dupe == 0
16
+ exit 0 if dupe.zero?
16
17
 
17
18
  dupes = current.domains.select { |domain| current.domains.count(domain) > 1 }
18
19