gman 6.0.1 → 7.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +5 -5
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
  4. data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
  5. data/.github/config.yml +23 -0
  6. data/.github/funding.yml +1 -0
  7. data/.github/no-response.yml +15 -0
  8. data/.github/release-drafter.yml +4 -0
  9. data/.github/settings.yml +33 -0
  10. data/.github/stale.yml +29 -0
  11. data/.gitignore +1 -0
  12. data/.rspec +2 -0
  13. data/.rubocop.yml +17 -5
  14. data/.rubocop_todo.yml +84 -0
  15. data/.ruby-version +1 -1
  16. data/Gemfile +2 -0
  17. data/bin/gman +6 -4
  18. data/bin/gman_filter +5 -7
  19. data/config/domains.txt +8446 -173
  20. data/config/vendor/academic.txt +8038 -0
  21. data/config/vendor/dotgovs.csv +5786 -5560
  22. data/docs/CODE_OF_CONDUCT.md +46 -0
  23. data/docs/CONTRIBUTING.md +92 -0
  24. data/{README.md → docs/README.md} +3 -3
  25. data/docs/SECURITY.md +3 -0
  26. data/docs/_config.yml +2 -0
  27. data/gman.gemspec +18 -17
  28. data/lib/gman.rb +25 -21
  29. data/lib/gman/country_codes.rb +17 -17
  30. data/lib/gman/domain_list.rb +123 -41
  31. data/lib/gman/identifier.rb +59 -21
  32. data/lib/gman/importer.rb +39 -40
  33. data/lib/gman/locality.rb +23 -21
  34. data/lib/gman/version.rb +3 -1
  35. data/script/add +2 -0
  36. data/script/alphabetize +2 -0
  37. data/script/cibuild +1 -1
  38. data/script/dedupe +2 -1
  39. data/script/profile +2 -1
  40. data/script/prune +5 -3
  41. data/script/reconcile-us +6 -3
  42. data/script/vendor +1 -1
  43. data/script/vendor-federal-de +3 -3
  44. data/script/vendor-municipal-de +3 -3
  45. data/script/vendor-nl +4 -1
  46. data/script/vendor-public-suffix +7 -6
  47. data/script/vendor-se +3 -3
  48. data/script/vendor-swot +43 -0
  49. data/script/vendor-us +8 -5
  50. data/spec/fixtures/domains.txt +4 -0
  51. data/{test → spec}/fixtures/obama.txt +0 -0
  52. data/spec/gman/bin_spec.rb +101 -0
  53. data/spec/gman/country_code_spec.rb +39 -0
  54. data/spec/gman/domain_list_spec.rb +110 -0
  55. data/spec/gman/domains_spec.rb +25 -0
  56. data/spec/gman/identifier_spec.rb +218 -0
  57. data/spec/gman/importer_spec.rb +236 -0
  58. data/spec/gman/locality_spec.rb +24 -0
  59. data/spec/gman_spec.rb +74 -0
  60. data/spec/spec_helper.rb +31 -0
  61. metadata +86 -73
  62. data/CONTRIBUTING.md +0 -22
  63. data/Rakefile +0 -22
  64. data/test/fixtures/domains.txt +0 -2
  65. data/test/helper.rb +0 -40
  66. data/test/test_gman.rb +0 -62
  67. data/test/test_gman_bin.rb +0 -75
  68. data/test/test_gman_country_codes.rb +0 -18
  69. data/test/test_gman_domains.rb +0 -33
  70. data/test/test_gman_filter.rb +0 -17
  71. data/test/test_gman_identifier.rb +0 -106
  72. data/test/test_gman_importer.rb +0 -250
  73. data/test/test_gman_locality.rb +0 -10
@@ -1,9 +1,40 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Gman
4
+ # Defines an instance method that delegates to a hash's key
5
+ #
6
+ # hash_method - a symbol representing the instance method to delegate to. The
7
+ # instance method should return a hash or respond to #[]
8
+ # key - the key to call within the hash
9
+ # method - (optional) the instance method the key should be aliased to.
10
+ # If not specified, defaults to the hash key
11
+ # default - (optional) value to return if value is nil (defaults to nil)
12
+ #
13
+ # Returns a symbol representing the instance method
14
+ def self.def_hash_delegator(hash_method, key, method = nil, default = nil)
15
+ method ||= key.to_s.downcase.sub(' ', '_')
16
+ define_method(method) do
17
+ hash = send(hash_method)
18
+ if hash.respond_to? :[]
19
+ hash[key.to_s] || default
20
+ else
21
+ default
22
+ end
23
+ end
24
+ end
25
+
26
+ def_hash_delegator :dotgov_listing, :Agency
27
+ def_hash_delegator :dotgov_listing, :Organization
28
+ def_hash_delegator :dotgov_listing, :City
29
+ def_hash_delegator :dotgov_listing, :"Domain Type"
30
+ private :domain_type
31
+
2
32
  def type
3
- [:state, :district, :cog, :city, :federal, :county].each do |type|
33
+ %i[state district cog city federal county].each do |type|
4
34
  return type if send "#{type}?"
5
35
  end
6
36
  return if list_category.nil?
37
+
7
38
  if list_category.include?('usagov')
8
39
  :unknown
9
40
  else
@@ -14,7 +45,7 @@ class Gman
14
45
  def state
15
46
  if matches
16
47
  matches[4].upcase
17
- elsif dotgov_listing
48
+ elsif dotgov_listing['State']
18
49
  dotgov_listing['State']
19
50
  elsif list_category
20
51
  matches = list_category.match(/usagov([A-Z]{2})/)
@@ -22,27 +53,23 @@ class Gman
22
53
  end
23
54
  end
24
55
 
25
- def city
26
- dotgov_listing['City'] if dotgov_listing
27
- end
28
-
29
- def agency
30
- dotgov_listing['Agency'] if federal?
31
- end
32
-
33
56
  def dotgov?
34
57
  domain.tld == 'gov'
35
58
  end
36
59
 
37
60
  def federal?
38
- dotgov_listing && dotgov_listing['Domain Type'] == 'Federal Agency'
61
+ return false unless dotgov_listing
62
+
63
+ domain_type =~ /^Federal Agency/i
39
64
  end
40
65
 
41
66
  def city?
42
67
  if matches
43
- %w(ci town vil).include?(matches[3])
68
+ %w[ci town vil].include?(matches[3])
44
69
  elsif dotgov_listing
45
- dotgov_listing['Domain Type'] == 'City'
70
+ domain_type == 'City'
71
+ else
72
+ false
46
73
  end
47
74
  end
48
75
 
@@ -50,7 +77,9 @@ class Gman
50
77
  if matches
51
78
  matches[3] == 'co'
52
79
  elsif dotgov_listing
53
- dotgov_listing['Domain Type'] == 'County'
80
+ domain_type == 'County'
81
+ else
82
+ false
54
83
  end
55
84
  end
56
85
 
@@ -58,40 +87,49 @@ class Gman
58
87
  if matches
59
88
  matches[1] == 'state'
60
89
  elsif dotgov_listing
61
- dotgov_listing['Domain Type'] == 'State/Local Govt'
90
+ domain_type == 'State/Local Govt'
91
+ else
92
+ false
62
93
  end
63
94
  end
64
95
 
65
96
  def district?
66
- matches && matches[1] == 'dst'
97
+ return false unless matches
98
+
99
+ matches[1] == 'dst'
67
100
  end
68
101
 
69
102
  def cog?
70
- matches && matches[1] == 'cog'
103
+ return false unless matches
104
+
105
+ matches[1] == 'cog'
71
106
  end
72
107
 
73
108
  private
74
109
 
75
110
  def list_category
76
111
  @list_category ||= begin
77
- match = Gman.list.find(domain.to_s)
112
+ match = Gman.list.public_suffix_list.find(domain.to_s)
78
113
  return unless match
79
- regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.name)}\n}im
80
- matches = Gman.list_contents.match(regex)
114
+
115
+ regex = %r{// ([^\n]+)\n?[^/]*\n#{Regexp.escape(match.value)}\n}im
116
+ matches = Gman.list.contents.match(regex)
81
117
  matches[1] if matches
82
118
  end
83
119
  end
84
120
 
85
121
  def matches
86
122
  return @matches if defined? @matches
123
+
87
124
  @matches = domain.to_s.match(Locality::REGEX)
88
125
  end
89
126
 
90
127
  def dotgov_listing
91
128
  return @dotgov_listing if defined? @dotgov_listing
92
129
  return unless dotgov?
130
+
93
131
  @dotgov_listing = Gman.dotgov_list.find do |listing|
94
- listing['Domain Name'].casecmp("#{domain.sld}.gov") == 0
132
+ listing['Domain Name'].casecmp("#{domain.sld}.gov").zero?
95
133
  end
96
134
  end
97
135
 
@@ -1,18 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Utility functions for parsing and manipulating public-suffix domain lists
2
4
  # Only used in development and not loaded by default
3
5
  require 'yaml'
4
6
  require 'open-uri'
5
7
  require 'resolv'
6
8
  require 'logger'
9
+ require 'swot'
7
10
  require_relative '../gman'
8
11
  require_relative './domain_list'
9
12
 
10
13
  class Gman
11
14
  class Importer
12
- attr_accessor :domains
15
+ attr_accessor :domain_list
13
16
 
14
17
  # Known false positives from vendored lists
15
- BLACKLIST = %w(
18
+ BLACKLIST = %w[
16
19
  business.centurytel.net
17
20
  chesnee.net
18
21
  citlink.net
@@ -38,31 +41,32 @@ class Gman
38
41
  wctc.net
39
42
  webconnections.net
40
43
  webpages.charter.net
41
- ).freeze
44
+ ].freeze
42
45
 
43
46
  REGEX_CHECKS = {
44
- 'home. regex' => /^home\./,
45
- 'user. regex' => /^users?\./,
46
- 'sites. regex' => /^sites?\./,
47
- 'weebly' => /weebly\.com$/,
48
- 'wordpress' => /wordpress\.com$/,
49
- 'govoffice' => /govoffice\d?\.com$/,
50
- 'homestead' => /homestead\.com$/,
51
- 'wix.com' => /wix\.com$/,
52
- 'blogspot.com' => /blogspot\.com$/,
53
- 'tripod.com' => /tripod\.com$/,
47
+ 'home. regex' => /^home\./,
48
+ 'user. regex' => /^users?\./,
49
+ 'sites. regex' => /^sites?\./,
50
+ 'weebly' => /weebly\.com$/,
51
+ 'wordpress' => /wordpress\.com$/,
52
+ 'govoffice' => /govoffice\d?\.com$/,
53
+ 'homestead' => /homestead\.com$/,
54
+ 'wix.com' => /wix\.com$/,
55
+ 'blogspot.com' => /blogspot\.com$/,
56
+ 'tripod.com' => /tripod\.com$/,
54
57
  'squarespace.com' => /squarespace\.com$/,
55
- 'github.io' => /github\.io$/,
56
- 'tumblr' => /tumblr\.com$/,
57
- 'locality' => Gman::Locality::REGEX
58
+ 'github.io' => /github\.io$/,
59
+ 'tumblr' => /tumblr\.com$/,
60
+ 'locality' => Gman::Locality::REGEX,
61
+ 'french edu' => /^ac-.*?\.fr/
58
62
  }.freeze
59
63
 
60
64
  def initialize(domains)
61
- @domains = DomainList.new(domains)
65
+ @domain_list = DomainList.new(data: domains)
62
66
  end
63
67
 
64
68
  def logger
65
- @logger ||= Logger.new(STDOUT)
69
+ @logger ||= Logger.new($stdout)
66
70
  end
67
71
 
68
72
  def normalize_domain(domain)
@@ -71,9 +75,10 @@ class Gman
71
75
  end
72
76
 
73
77
  def valid_domain?(domain, options = {})
74
- return false unless ensure_valid(domain)
75
78
  return false if !options[:skip_dupe] && !ensure_not_dupe(domain)
79
+ return false unless ensure_valid(domain)
76
80
  return false if !options[:skip_resolve] && !ensure_resolves(domain)
81
+
77
82
  true
78
83
  end
79
84
 
@@ -81,6 +86,7 @@ class Gman
81
86
  # rather than a bool and silence log output
82
87
  def reject(domain, reason)
83
88
  return reason if ENV['RECONCILING']
89
+
84
90
  logger.info "👎 `#{domain}`: #{reason}"
85
91
  false
86
92
  end
@@ -89,30 +95,26 @@ class Gman
89
95
  @current ||= DomainList.current
90
96
  end
91
97
 
92
- def import(options)
98
+ def import(options = {})
93
99
  logger.info "Current: #{Gman::DomainList.current.count} domains"
94
- logger.info "Adding: #{domains.count} domains"
100
+ logger.info "Adding: #{domain_list.count} domains"
95
101
 
96
102
  normalize_domains!
97
103
  ensure_validity!(options)
98
104
 
99
- if domains.count == 0
100
- logger.info 'Nothing to add. Aborting'
101
- exit 0
102
- end
103
-
104
105
  add_to_current
105
106
  logger.info "New: #{current.count} domains"
106
107
  end
107
108
 
108
109
  def resolver
109
- @resolver ||= Resolv::DNS.new(nameserver: ['8.8.8.8', '8.8.4.4'])
110
+ @resolver ||= Resolv::DNS.new(nameserver: ['1.1.1.1', '8.8.8.8'])
110
111
  end
111
112
 
112
113
  # Verifies that the given domain has an MX record, and thus is valid
113
114
  def domain_resolves?(domain)
114
115
  domain = Addressable::URI.new(host: domain).normalize.host
115
116
  return true if ip?(domain)
117
+
116
118
  returns_record?(domain, 'NS') || returns_record?(domain, 'MX')
117
119
  end
118
120
 
@@ -120,16 +122,17 @@ class Gman
120
122
 
121
123
  def ensure_regex(domain)
122
124
  REGEX_CHECKS.each do |msg, regex|
123
- return reject(domain, msg) if domain =~ regex
125
+ return reject(domain, msg) if domain&.match?(regex)
124
126
  end
125
127
  true
126
128
  end
127
129
 
128
130
  def ensure_valid(domain)
129
131
  return false if domain.empty?
132
+
130
133
  if BLACKLIST.include?(domain)
131
134
  reject(domain, 'blacklist')
132
- elsif !PublicSuffix.valid?(".#{domain}")
135
+ elsif !PublicSuffix.valid?("foo.#{domain}")
133
136
  reject(domain, 'invalid')
134
137
  elsif Swot.is_academic?(domain)
135
138
  reject(domain, 'academic')
@@ -140,11 +143,13 @@ class Gman
140
143
 
141
144
  def ensure_resolves(domain)
142
145
  return reject(domain, 'unresolvable') unless domain_resolves?(domain)
146
+
143
147
  true
144
148
  end
145
149
 
146
150
  def ensure_not_dupe(domain)
147
151
  return true unless dupe?(domain)
152
+
148
153
  if current.domains.include?(domain)
149
154
  reject(domain, 'duplicate')
150
155
  else
@@ -158,22 +163,22 @@ class Gman
158
163
  end
159
164
 
160
165
  def normalize_domains!
161
- domains.list.each do |_group, domains|
166
+ domain_list.to_h.each_value do |domains|
162
167
  domains.map! { |domain| normalize_domain(domain) }
163
168
  domains.uniq!
164
169
  end
165
170
  end
166
171
 
167
172
  def ensure_validity!(options = {})
168
- domains.list.each do |_group, domains|
173
+ domain_list.data.each_value do |domains|
169
174
  domains.select! { |domain| valid_domain?(domain, options) }
170
175
  end
171
176
  end
172
177
 
173
178
  def add_to_current
174
- domains.list.each do |group, domains|
175
- current.list[group] ||= []
176
- current.list[group].concat domains
179
+ domain_list.data.each do |group, domains|
180
+ current.data[group] ||= []
181
+ current.data[group].concat domains
177
182
  end
178
183
  current.write
179
184
  end
@@ -192,9 +197,3 @@ class Gman
192
197
  end
193
198
  end
194
199
  end
195
-
196
- class Gman
197
- def self.import(hash, options = {})
198
- Gman::Importer.new(hash).import(options)
199
- end
200
- end
@@ -1,17 +1,31 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Gman
4
+ # Second level .us domains for states and locality
5
+ # See http://en.wikipedia.org/wiki/.us
6
+ #
7
+ # Examples:
8
+ # * foo.state.il.us
9
+ # * ci.foo.il.us
10
+ #
11
+ # Not:
12
+ # * state.foo.il.us
13
+ # * foo.ci.il.us
14
+ # * k12.il.us
15
+ # * ci.foo.zx.us
2
16
  class Locality
3
- AFFINITY_NAMESPACES = %w(state dst cog).freeze
17
+ AFFINITY_NAMESPACES = %w[state dst cog].freeze
4
18
 
5
- STATES = %w(
19
+ STATES = %w[
6
20
  ak al ar az ca co ct dc de fl ga hi ia id il in ks ky
7
21
  la ma md me mi mn mo ms mt nc nd ne nh nj nm nv ny oh
8
22
  ok or pa ri sc sd tn tx um ut va vt wa wi wv wy
9
- ).freeze
23
+ ].freeze
10
24
 
11
- LOCALITY_DOMAINS = %w(
25
+ LOCALITY_DOMAINS = %w[
12
26
  ci co borough boro city county
13
27
  parish town twp vi vil village
14
- ).freeze
28
+ ].freeze
15
29
 
16
30
  REGEX = /
17
31
  (
@@ -19,22 +33,10 @@ class Gman
19
33
  |
20
34
  (#{Regexp.union(LOCALITY_DOMAINS)})\.[a-z-]+
21
35
  )\.(#{Regexp.union(STATES)})\.us
22
- /x
23
- end
36
+ /x.freeze
24
37
 
25
- # Second level .us domains for states and locality
26
- # See http://en.wikipedia.org/wiki/.us
27
- #
28
- # Examples:
29
- # * foo.state.il.us
30
- # * ci.foo.il.us
31
- #
32
- # Not:
33
- # * state.foo.il.us
34
- # * foo.ci.il.us
35
- # * k12.il.us
36
- # * ci.foo.zx.us
37
- def locality?
38
- !domain.to_s.match(Locality::REGEX).nil?
38
+ def self.valid?(domain)
39
+ !domain.to_s.match(Locality::REGEX).nil?
40
+ end
39
41
  end
40
42
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class Gman
2
- VERSION = '6.0.1'.freeze
4
+ VERSION = '7.0.4'
3
5
  end
data/script/add CHANGED
@@ -1,4 +1,6 @@
1
1
  #! /usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  #
3
5
  # Add one or more domains to a given group, running the standard import checks
4
6
  #
@@ -1,4 +1,6 @@
1
1
  #! /usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
2
4
  #
3
5
  # Alphabetizes entries in the domains.txt file
4
6
  #
@@ -2,7 +2,7 @@
2
2
 
3
3
  set -ex
4
4
 
5
- bundle exec rake test
5
+ bundle exec rspec
6
6
  bundle exec rubocop -D -S -a
7
7
  bundle exec script/dedupe
8
8
  bundle exec gem build gman.gemspec
@@ -1,4 +1,5 @@
1
1
  #! /usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'yaml'
4
5
  require 'open-uri'
@@ -12,7 +13,7 @@ puts "Current list contains #{current.count} domains..."
12
13
 
13
14
  dupe = current.count - current.domains.uniq.count
14
15
  puts "Found #{dupe} duplicate domains"
15
- exit 0 if dupe == 0
16
+ exit 0 if dupe.zero?
16
17
 
17
18
  dupes = current.domains.select { |domain| current.domains.count(domain) > 1 }
18
19