RubyGems - gman - Versions diffs - 7.0.0 → 7.0.5 - Mend

gman 7.0.0 → 7.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

checksums.yaml +5 -5
data/.github/CODEOWNERS +3 -0
data/.github/ISSUE_TEMPLATE/bug_report.md +28 -0
data/.github/ISSUE_TEMPLATE/feature_request.md +21 -0
data/.github/config.yml +23 -0
data/.github/funding.yml +1 -0
data/.github/no-response.yml +15 -0
data/.github/release-drafter.yml +4 -0
data/.github/settings.yml +33 -0
data/.github/stale.yml +29 -0
data/.gitignore +1 -0
data/.rspec +2 -0
data/.rubocop.yml +14 -5
data/.rubocop_todo.yml +84 -0
data/.ruby-version +1 -1
data/Gemfile +2 -0
data/bin/gman +6 -4
data/bin/gman_filter +5 -7
data/config/domains.txt +8454 -168
data/config/vendor/academic.txt +6 -7
data/config/vendor/dotgovs.csv +5786 -5560
data/docs/CODE_OF_CONDUCT.md +46 -0
data/docs/CONTRIBUTING.md +92 -0
data/{README.md → docs/README.md} +3 -3
data/docs/SECURITY.md +3 -0
data/docs/_config.yml +2 -0
data/gman.gemspec +18 -17
data/lib/gman.rb +4 -2
data/lib/gman/country_codes.rb +17 -17
data/lib/gman/domain_list.rb +25 -9
data/lib/gman/identifier.rb +57 -19
data/lib/gman/importer.rb +31 -21
data/lib/gman/locality.rb +8 -6
data/lib/gman/version.rb +3 -1
data/script/add +2 -0
data/script/alphabetize +2 -0
data/script/cibuild +1 -1
data/script/dedupe +2 -1
data/script/profile +2 -1
data/script/prune +5 -3
data/script/reconcile-us +6 -3
data/script/vendor-federal-de +2 -1
data/script/vendor-municipal-de +2 -1
data/script/vendor-nl +2 -0
data/script/vendor-public-suffix +6 -4
data/script/vendor-se +2 -1
data/script/vendor-swot +3 -1
data/script/vendor-us +5 -3
data/spec/fixtures/domains.txt +4 -0
data/{test → spec}/fixtures/obama.txt +0 -0
data/spec/gman/bin_spec.rb +101 -0
data/spec/gman/country_code_spec.rb +39 -0
data/spec/gman/domain_list_spec.rb +110 -0
data/spec/gman/domains_spec.rb +25 -0
data/spec/gman/identifier_spec.rb +218 -0
data/spec/gman/importer_spec.rb +236 -0
data/spec/gman/locality_spec.rb +24 -0
data/spec/gman_spec.rb +74 -0
data/spec/spec_helper.rb +31 -0
metadata +89 -81
data/.rake_tasks +0 -0
data/CONTRIBUTING.md +0 -22
data/Rakefile +0 -22
data/test/fixtures/domains.txt +0 -2
data/test/helper.rb +0 -48
data/test/test_gman.rb +0 -56
data/test/test_gman_bin.rb +0 -75
data/test/test_gman_country_codes.rb +0 -18
data/test/test_gman_domain_list.rb +0 -112
data/test/test_gman_domains.rb +0 -32
data/test/test_gman_filter.rb +0 -17
data/test/test_gman_identifier.rb +0 -106
data/test/test_gman_importer.rb +0 -244
data/test/test_gman_locality.rb +0 -10

data/lib/gman/importer.rb CHANGED

@@ -1,9 +1,12 @@
+# frozen_string_literal: true
 # Utility functions for parsing and manipulating public-suffix domain lists
 # Only used in development and not loaded by default
 require 'yaml'
 require 'open-uri'
 require 'resolv'
 require 'logger'
+require 'swot'
 require_relative '../gman'
 require_relative './domain_list'
@@ -12,7 +15,7 @@ class Gman
     attr_accessor :domain_list
     # Known false positives from vendored lists
-    BLACKLIST = %w(
+    BLACKLIST = %w[
       business.centurytel.net
       chesnee.net
       citlink.net
@@ -38,23 +41,24 @@ class Gman
       wctc.net
       webconnections.net
       webpages.charter.net
-    ).freeze
+    ].freeze
     REGEX_CHECKS = {
-      'home. regex'     => /^home\./,
-      'user. regex'     => /^users?\./,
-      'sites. regex'    => /^sites?\./,
-      'weebly'          => /weebly\.com$/,
-      'wordpress'       => /wordpress\.com$/,
-      'govoffice'       => /govoffice\d?\.com$/,
-      'homestead'       => /homestead\.com$/,
-      'wix.com'         => /wix\.com$/,
-      'blogspot.com'    => /blogspot\.com$/,
-      'tripod.com'      => /tripod\.com$/,
+      'home. regex' => /^home\./,
+      'user. regex' => /^users?\./,
+      'sites. regex' => /^sites?\./,
+      'weebly' => /weebly\.com$/,
+      'wordpress' => /wordpress\.com$/,
+      'govoffice' => /govoffice\d?\.com$/,
+      'homestead' => /homestead\.com$/,
+      'wix.com' => /wix\.com$/,
+      'blogspot.com' => /blogspot\.com$/,
+      'tripod.com' => /tripod\.com$/,
       'squarespace.com' => /squarespace\.com$/,
-      'github.io'       => /github\.io$/,
-      'tumblr'          => /tumblr\.com$/,
-      'locality'        => Gman::Locality::REGEX
+      'github.io' => /github\.io$/,
+      'tumblr' => /tumblr\.com$/,
+      'locality' => Gman::Locality::REGEX,
+      'french edu' => /^ac-.*?\.fr/
     }.freeze
     def initialize(domains)
@@ -62,7 +66,7 @@ class Gman
     end
     def logger
-      @logger ||= Logger.new(STDOUT)
+      @logger ||= Logger.new($stdout)
     end
     def normalize_domain(domain)
@@ -74,6 +78,7 @@ class Gman
       return false if !options[:skip_dupe] && !ensure_not_dupe(domain)
       return false unless ensure_valid(domain)
       return false if !options[:skip_resolve] && !ensure_resolves(domain)
       true
     end
@@ -81,6 +86,7 @@ class Gman
     # rather than a bool and silence log output
     def reject(domain, reason)
       return reason if ENV['RECONCILING']
       logger.info "👎 `#{domain}`: #{reason}"
       false
     end
@@ -101,13 +107,14 @@ class Gman
     end
     def resolver
-      @resolver ||= Resolv::DNS.new(nameserver: ['8.8.8.8', '8.8.4.4'])
+      @resolver ||= Resolv::DNS.new(nameserver: ['1.1.1.1', '8.8.8.8'])
     end
     # Verifies that the given domain has an MX record, and thus is valid
     def domain_resolves?(domain)
       domain = Addressable::URI.new(host: domain).normalize.host
       return true if ip?(domain)
       returns_record?(domain, 'NS') || returns_record?(domain, 'MX')
     end
@@ -115,16 +122,17 @@ class Gman
     def ensure_regex(domain)
       REGEX_CHECKS.each do |msg, regex|
-        return reject(domain, msg) if domain =~ regex
+        return reject(domain, msg) if domain&.match?(regex)
       end
       true
     end
     def ensure_valid(domain)
       return false if domain.empty?
       if BLACKLIST.include?(domain)
         reject(domain, 'blacklist')
-      elsif !PublicSuffix.valid?(".#{domain}")
+      elsif !PublicSuffix.valid?("foo.#{domain}")
         reject(domain, 'invalid')
       elsif Swot.is_academic?(domain)
         reject(domain, 'academic')
@@ -135,11 +143,13 @@ class Gman
     def ensure_resolves(domain)
       return reject(domain, 'unresolvable') unless domain_resolves?(domain)
       true
     end
     def ensure_not_dupe(domain)
       return true unless dupe?(domain)
       if current.domains.include?(domain)
         reject(domain, 'duplicate')
       else
@@ -153,14 +163,14 @@ class Gman
     end
     def normalize_domains!
-      domain_list.to_h.each do |_group, domains|
+      domain_list.to_h.each_value do |domains|
         domains.map! { |domain| normalize_domain(domain) }
         domains.uniq!
       end
     end
     def ensure_validity!(options = {})
-      domain_list.data.each do |_group, domains|
+      domain_list.data.each_value do |domains|
         domains.select! { |domain| valid_domain?(domain, options) }
       end
     end

data/lib/gman/locality.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 class Gman
   # Second level .us domains for states and locality
   # See http://en.wikipedia.org/wiki/.us
@@ -12,18 +14,18 @@ class Gman
   #  * k12.il.us
   #  * ci.foo.zx.us
   class Locality
-    AFFINITY_NAMESPACES = %w(state dst cog).freeze
+    AFFINITY_NAMESPACES = %w[state dst cog].freeze
-    STATES = %w(
+    STATES = %w[
       ak al ar az ca co ct dc de fl ga hi ia id il in ks ky
       la ma md me mi mn mo ms mt nc nd ne nh nj nm nv ny oh
       ok or pa ri sc sd tn tx um ut va vt wa wi wv wy
-    ).freeze
+    ].freeze
-    LOCALITY_DOMAINS = %w(
+    LOCALITY_DOMAINS = %w[
       ci co borough boro city county
       parish town twp vi vil village
-    ).freeze
+    ].freeze
     REGEX = /
       (
@@ -31,7 +33,7 @@ class Gman
       |
         (#{Regexp.union(LOCALITY_DOMAINS)})\.[a-z-]+
       )\.(#{Regexp.union(STATES)})\.us
-    /x
+    /x.freeze
     def self.valid?(domain)
       !domain.to_s.match(Locality::REGEX).nil?

data/lib/gman/version.rb CHANGED

@@ -1,3 +1,5 @@
+# frozen_string_literal: true
 class Gman
-  VERSION = '7.0.0'.freeze
+  VERSION = '7.0.5'
 end

data/script/add CHANGED

@@ -1,4 +1,6 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 #
 # Add one or more domains to a given group, running the standard import checks
 #

data/script/alphabetize CHANGED

@@ -1,4 +1,6 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 #
 # Alphabetizes entries in the domains.txt file
 #

data/script/cibuild CHANGED

@@ -2,7 +2,7 @@
 set -ex
-bundle exec rake test
+bundle exec rspec
 bundle exec rubocop -D -S -a
 bundle exec script/dedupe
 bundle exec gem build gman.gemspec

data/script/dedupe CHANGED

@@ -1,4 +1,5 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 require 'yaml'
 require 'open-uri'
@@ -12,7 +13,7 @@ puts "Current list contains #{current.count} domains..."
 dupe = current.count - current.domains.uniq.count
 puts "Found #{dupe} duplicate domains"
-exit 0 if dupe == 0
+exit 0 if dupe.zero?
 dupes = current.domains.select { |domain| current.domains.count(domain) > 1 }

data/script/profile CHANGED

@@ -1,4 +1,5 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 require 'ruby-prof'
 require './lib/gman'
@@ -17,4 +18,4 @@ end
 result = RubyProf.stop
 printer = RubyProf::FlatPrinter.new(result)
-printer.print(STDOUT)
+printer.print($stdout)

data/script/prune CHANGED

@@ -1,4 +1,6 @@
 #!/usr/bin/env ruby
+# frozen_string_literal: true
 # Given an array of domains, removes them from the list
 # Example usage: script/prune foo.invalid, bar.invalid, foo.bar.invalid
@@ -12,9 +14,9 @@ list = File.open('./config/domains.txt').read
 puts "Starting list: #{Gman::DomainList.current.count} domains"
 domains.each do |domain|
-  list.gsub!(/^#{domain}$\n/, '')
+  list.gsub!(/^#{Regexp.escape(domain)}$\n/, '')
 end
-puts "Ending list: #{Gman::DomainList.current.count} domains"
 File.write './config/domains.txt', list
+puts "Ending list: #{Gman::DomainList.current.count} domains"

data/script/reconcile-us CHANGED

@@ -1,4 +1,6 @@
 #!/usr/bin/env ruby
+# frozen_string_literal: true
 #
 # Reconciles the USA.gov-maintained list of US domains with domains.txt
 # to show domains listed in the USA.gov-maintained list that we reject and why
@@ -12,7 +14,7 @@ ENV['RECONCILING'] = 'true'
 blacklist = ['usagovQUASI']
 source = 'https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt'
-data = open(source).read
+data = URI.open(source).read
 data = data.split('_' * 74)
 data = data.last.strip
 data = data.split(/\r?\n/).reject(&:empty?)
@@ -20,7 +22,7 @@ data = data.split(/\r?\n/).reject(&:empty?)
 domains = {}
 group = ''
 data.each do |row|
-  if row =~ /^\w/
+  if /^\w/.match?(row)
     group = row
     domains[group] = []
   else
@@ -33,7 +35,7 @@ importer = Gman::Importer.new(domains)
 importer.logger.info "Starting with #{importer.domains.count} domains"
-importer.domains.list.each do |_group, d|
+importer.domains.list.each_value do |d|
   d.map! { |domain| Gman.new(domain).to_s }
   d.map! { |domain| importer.normalize_domain(domain) }
 end
@@ -44,6 +46,7 @@ importer.logger.info "Filtered down to #{count} normalized domains"
 missing = {}
 importer.domains.list.each do |g, usagovdomains|
   next unless importer.current.list[g]
   missing[g] = importer.current.list[g] - usagovdomains
 end

data/script/vendor-federal-de CHANGED

@@ -1,4 +1,5 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 require 'csv'
 require 'open-uri'
@@ -6,7 +7,7 @@ require './lib/gman'
 url = 'https://raw.githubusercontent.com/robbi5/german-gov-domains/master/data/domains.csv'
-domains = open(url).read.encode('UTF-8')
+domains = URI.open(url).read.encode('UTF-8')
 domains = CSV.parse(domains, headers: true)
 domains = domains.map { |row| row['Domain Name'] }

data/script/vendor-municipal-de CHANGED

@@ -1,4 +1,5 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 require 'csv'
 require 'open-uri'
@@ -6,7 +7,7 @@ require './lib/gman'
 url = 'http://www.mik.nrw.de/nc/themen-aufgaben/kommunales/kommunale-adressen.html?tx_szkommunaldb_pi1%5Bexport%5D=csv'
-csv = open(url).read.force_encoding('iso-8859-1').encode('UTF-8')
+csv = URI.open(url).read.force_encoding('iso-8859-1').encode('UTF-8')
 # For some reason, the header row is actually the last row
 # Pop the last line off the file and prepend it at the begining

data/script/vendor-nl CHANGED

@@ -1,4 +1,6 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 # See https://github.com/github/government.github.com/pull/367#issuecomment-102108763
 require 'fileutils'

data/script/vendor-public-suffix CHANGED

@@ -1,4 +1,6 @@
 #!/usr/bin/env ruby
+# frozen_string_literal: true
 # Propagates an initial list of best-guess government domains
 require 'public_suffix'
@@ -6,21 +8,21 @@ require 'yaml'
 require_relative '../lib/gman'
 # https://gist.github.com/benbalter/6147066
-REGEX = /(\.g[ou]{1,2}(v|b|vt)|\.mil|\.gc|\.fed)(\.[a-z]{2})?$/i
+REGEX = /(\.g[ou]{1,2}(v|b|vt)|\.mil|\.gc|\.fed)(\.[a-z]{2})?$/i.freeze
 domains = []
 PublicSuffix::List.default.each do |rule|
   domain = nil
   if rule.parts.length == 1
-    domain = rule.parts.first if ".#{rule.value}" =~ REGEX
-  elsif ".#{rule.value}" =~ REGEX
+    domain = rule.parts.first if REGEX.match?(".#{rule.value}")
+  elsif REGEX.match?(".#{rule.value}")
     domain = rule.parts.pop(2).join('.')
   end
   domains.push domain unless domain.nil? || domains.include?(domain)
 end
-# Note: We want to skip resolution here, because a domain like `gov.sv` may be
+# NOTE: We want to skip resolution here, because a domain like `gov.sv` may be
 # a valid TLD, not have any top-level sites, and we'd still want it listed
 Gman::Importer.new('non-us gov' => domains).import(skip_resolve: true)

data/script/vendor-se CHANGED

@@ -1,4 +1,5 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 require 'mechanize'
 require 'csv'
@@ -14,7 +15,7 @@ response = agent.submit(form, submit_button)
 rows = CSV.parse(response.content, headers: true, col_sep: "\t")
 domains = rows.map do |row|
-  row['Webbadress'] unless row['Namn'] =~ /UNIVERSITET/
+  row['Webbadress'] unless /UNIVERSITET/.match?(row['Namn'])
 end
 Gman::Importer.new('Swedish Administrative Authorities' => domains).import

data/script/vendor-swot CHANGED

@@ -1,4 +1,6 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 #
 # Vendors the Swot-maintained list of adademic domains into config/academic.txt
 # Source: https://github.com/leereilly/swot/
@@ -12,7 +14,7 @@
 #
 # Note: We do this, because as a bajillion individual files, Swot takes up 30MB
-require './lib/gman'
+require 'gman'
 require 'swot'
 # Generate array of all Swot domains

data/script/vendor-us CHANGED

@@ -1,4 +1,6 @@
 #! /usr/bin/env ruby
+# frozen_string_literal: true
 #
 # Vendors the USA.gov-maintained list of US domains into domains.txt
 # Source: https://github.com/GSA-OCSIT/govt-urls
@@ -13,10 +15,10 @@
 require './lib/gman'
 require 'open-uri'
-blacklist = %w(usagovQUASI usagovFEDgov)
+blacklist = %w[usagovQUASI usagovFEDgov]
 source = 'https://raw.githubusercontent.com/GSA/govt-urls/master/government-urls-hierarchical-list.txt'
-data = open(source).read
+data = URI.open(source).read
 data = data.split('_' * 74)
 data = data.last.strip
 data = data.split(/\r?\n/).reject(&:empty?)
@@ -24,7 +26,7 @@ data = data.split(/\r?\n/).reject(&:empty?)
 domains = {}
 group = ''
 data.each do |row|
-  if row =~ /^\w/
+  if /^\w/.match?(row)
     group = row
     domains[group] = []
   else

data/spec/fixtures/domains.txt ADDED

@@ -0,0 +1,4 @@
+// foo
+bar.gov
+baz.net
+!mail.bar.gov

data/{test → spec}/fixtures/obama.txt RENAMED

File without changes