RubyGems - html-proofer - Versions diffs - 3.19.4 → 4.0.0.rc1 - Mend

html-proofer 3.19.4 → 4.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +4 -4
data/bin/htmlproofer +30 -57
data/lib/html-proofer.rb +1 -54
data/lib/html_proofer/attribute/url.rb +231 -0
data/lib/html_proofer/attribute.rb +15 -0
data/lib/html_proofer/cache.rb +234 -0
data/lib/html_proofer/check/favicon.rb +35 -0
data/lib/html_proofer/check/images.rb +62 -0
data/lib/html_proofer/check/links.rb +118 -0
data/lib/html_proofer/check/open_graph.rb +34 -0
data/lib/html_proofer/check/scripts.rb +38 -0
data/lib/html_proofer/check.rb +91 -0
data/lib/{html-proofer → html_proofer}/configuration.rb +30 -31
data/lib/html_proofer/element.rb +122 -0
data/lib/html_proofer/failure.rb +17 -0
data/lib/{html-proofer → html_proofer}/log.rb +0 -0
data/lib/html_proofer/reporter/cli.rb +29 -0
data/lib/html_proofer/reporter.rb +23 -0
data/lib/html_proofer/runner.rb +245 -0
data/lib/html_proofer/url_validator/external.rb +189 -0
data/lib/html_proofer/url_validator/internal.rb +86 -0
data/lib/html_proofer/url_validator.rb +16 -0
data/lib/{html-proofer → html_proofer}/utils.rb +5 -8
data/lib/{html-proofer → html_proofer}/version.rb +1 -1
data/lib/html_proofer/xpath_functions.rb +10 -0
data/lib/html_proofer.rb +56 -0
metadata +46 -27
data/lib/html-proofer/cache.rb +0 -194
data/lib/html-proofer/check/favicon.rb +0 -29
data/lib/html-proofer/check/html.rb +0 -37
data/lib/html-proofer/check/images.rb +0 -48
data/lib/html-proofer/check/links.rb +0 -182
data/lib/html-proofer/check/opengraph.rb +0 -46
data/lib/html-proofer/check/scripts.rb +0 -42
data/lib/html-proofer/check.rb +0 -75
data/lib/html-proofer/element.rb +0 -265
data/lib/html-proofer/issue.rb +0 -65
data/lib/html-proofer/middleware.rb +0 -82
data/lib/html-proofer/runner.rb +0 -249
data/lib/html-proofer/url_validator.rb +0 -237

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: html-proofer
 version: !ruby/object:Gem::Version
-  version: 3.19.4
+  version: 4.0.0.rc1
 platform: ruby
 authors:
 - Garen Torikian
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-05-19 00:00:00.000000000 Z
+date: 2022-01-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: addressable
@@ -44,14 +44,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.13'
+        version: '1.12'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.13'
+        version: '1.12'
 - !ruby/object:Gem::Dependency
   name: parallel
   requirement: !ruby/object:Gem::Requirement
@@ -108,6 +108,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '2.0'
+- !ruby/object:Gem::Dependency
+  name: zeitwerk
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.5'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '2.5'
 - !ruby/object:Gem::Dependency
   name: awesome_print
   requirement: !ruby/object:Gem::Requirement
@@ -123,7 +137,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: pry-byebug
+  name: debug
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -193,7 +207,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: rubocop-performance
+  name: rubocop-rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -258,28 +272,33 @@ extra_rdoc_files: []
 files:
 - bin/htmlproofer
 - lib/html-proofer.rb
-- lib/html-proofer/cache.rb
-- lib/html-proofer/check.rb
-- lib/html-proofer/check/favicon.rb
-- lib/html-proofer/check/html.rb
-- lib/html-proofer/check/images.rb
-- lib/html-proofer/check/links.rb
-- lib/html-proofer/check/opengraph.rb
-- lib/html-proofer/check/scripts.rb
-- lib/html-proofer/configuration.rb
-- lib/html-proofer/element.rb
-- lib/html-proofer/issue.rb
-- lib/html-proofer/log.rb
-- lib/html-proofer/middleware.rb
-- lib/html-proofer/runner.rb
-- lib/html-proofer/url_validator.rb
-- lib/html-proofer/utils.rb
-- lib/html-proofer/version.rb
+- lib/html_proofer.rb
+- lib/html_proofer/attribute.rb
+- lib/html_proofer/attribute/url.rb
+- lib/html_proofer/cache.rb
+- lib/html_proofer/check.rb
+- lib/html_proofer/check/favicon.rb
+- lib/html_proofer/check/images.rb
+- lib/html_proofer/check/links.rb
+- lib/html_proofer/check/open_graph.rb
+- lib/html_proofer/check/scripts.rb
+- lib/html_proofer/configuration.rb
+- lib/html_proofer/element.rb
+- lib/html_proofer/failure.rb
+- lib/html_proofer/log.rb
+- lib/html_proofer/reporter.rb
+- lib/html_proofer/reporter/cli.rb
+- lib/html_proofer/runner.rb
+- lib/html_proofer/url_validator.rb
+- lib/html_proofer/url_validator/external.rb
+- lib/html_proofer/url_validator/internal.rb
+- lib/html_proofer/utils.rb
+- lib/html_proofer/version.rb
+- lib/html_proofer/xpath_functions.rb
 homepage: https://github.com/gjtorikian/html-proofer
 licenses:
 - MIT
 metadata:
-  funding_uri: https://github.com/sponsors/gjtorikian/
   rubygems_mfa_required: 'true'
 post_install_message:
 rdoc_options: []
@@ -295,11 +314,11 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '4.0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">="
+  - - ">"
     - !ruby/object:Gem::Version
-      version: '0'
+      version: 1.3.1
 requirements: []
-rubygems_version: 3.3.13
+rubygems_version: 3.3.3
 signing_key:
 specification_version: 4
 summary: A set of tests to validate your HTML output. These tests check if your image

data/lib/html-proofer/cache.rb DELETED Viewed

@@ -1,194 +0,0 @@
-# frozen_string_literal: true
-require_relative 'utils'
-require 'date'
-require 'json'
-require 'uri'
-module HTMLProofer
-  class Cache
-    include HTMLProofer::Utils
-    DEFAULT_STORAGE_DIR = File.join('tmp', '.htmlproofer')
-    DEFAULT_CACHE_FILE_NAME = 'cache.log'
-    URI_REGEXP = URI::DEFAULT_PARSER.make_regexp
-    attr_reader :exists, :cache_log, :storage_dir, :cache_file
-    def initialize(logger, options)
-      @logger = logger
-      @cache_log = {}
-      @cache_datetime = DateTime.now
-      @cache_time = @cache_datetime.to_time
-      if options.nil? || options.empty?
-        define_singleton_method('use_cache?') { false }
-      else
-        define_singleton_method('use_cache?') { true }
-        setup_cache!(options)
-        @parsed_timeframe = parsed_timeframe(options[:timeframe])
-      end
-    end
-    def within_timeframe?(time)
-      return false if time.nil?
-      (@parsed_timeframe..@cache_time).cover?(Time.parse(time))
-    end
-    def urls
-      @cache_log['urls'] || []
-    end
-    def size
-      @cache_log.length
-    end
-    def parsed_timeframe(timeframe)
-      time, date = timeframe.match(/(\d+)(\D)/).captures
-      time = time.to_i
-      case date
-      when 'M'
-        time_ago(time, :months)
-      when 'w'
-        time_ago(time, :weeks)
-      when 'd'
-        time_ago(time, :days)
-      when 'h'
-        time_ago(time, :hours)
-      else
-        raise ArgumentError, "#{date} is not a valid timeframe!"
-      end
-    end
-    def add(url, filenames, status, msg = '')
-      return unless use_cache?
-      data = {
-        time: @cache_time,
-        filenames: filenames,
-        status: status,
-        message: msg
-      }
-      @cache_log[clean_url(url)] = data
-    end
-    def detect_url_changes(found, type)
-      found_urls = found.keys.map { |url| clean_url(url) }
-      # if there were no urls, bail
-      return {} if found_urls.empty?
-      existing_urls = @cache_log.keys.map { |url| clean_url(url) }
-      # prepare to add new URLs detected
-      additions = found.reject do |url, _|
-        url = clean_url(url)
-        if existing_urls.include?(url)
-          true
-        else
-          @logger.log :debug, "Adding #{url} to cache check"
-          false
-        end
-      end
-      new_link_count = additions.length
-      new_link_text = pluralize(new_link_count, 'link', 'links')
-      @logger.log :info, "Adding #{new_link_text} to the cache..."
-      # remove from cache URLs that no longer exist
-      deletions = 0
-      @cache_log.delete_if do |url, _|
-        url = clean_url(url)
-        if found_urls.include?(url)
-          false
-        elsif url_matches_type?(url, type)
-          @logger.log :debug, "Removing #{url} from cache check"
-          deletions += 1
-          true
-        end
-      end
-      del_link_text = pluralize(deletions, 'link', 'links')
-      @logger.log :info, "Removing #{del_link_text} from the cache..."
-      additions
-    end
-    # TODO: Garbage performance--both the external and internal
-    # caches need access to this file. Write a proper versioned
-    # schema in the future
-    def write
-      File.write(cache_file, @cache_log.to_json)
-    end
-    def load?
-      @load.nil?
-    end
-    def retrieve_urls(urls, type)
-      urls_to_check = detect_url_changes(urls, type)
-      @cache_log.each_pair do |url, cache|
-        next if within_timeframe?(cache['time']) && cache['message'].empty? # these were successes to skip
-        if url_matches_type?(url, type)
-          urls_to_check[url] = cache['filenames'] # recheck expired links
-        end
-      end
-      urls_to_check
-    end
-    # FIXME: it seems that Typhoeus actually acts on escaped URLs,
-    # but there's no way to get at that information, and the cache
-    # stores unescaped URLs. Because of this, some links, such as
-    # github.com/search/issues?q=is:open+is:issue+fig are not matched
-    # as github.com/search/issues?q=is%3Aopen+is%3Aissue+fig
-    def unescape_url(url)
-      Addressable::URI.unescape(url)
-    end
-    def clean_url(url)
-      unescape_url(url)
-    end
-    def setup_cache!(options)
-      @storage_dir = options[:storage_dir] || DEFAULT_STORAGE_DIR
-      FileUtils.mkdir_p(storage_dir) unless Dir.exist?(storage_dir)
-      cache_file_name = options[:cache_file] || DEFAULT_CACHE_FILE_NAME
-      @cache_file = File.join(storage_dir, cache_file_name)
-      return unless File.exist?(@cache_file)
-      contents = File.read(@cache_file)
-      @cache_log = contents.empty? ? {} : JSON.parse(contents)
-    end
-    private
-    def time_ago(measurement, unit)
-      case unit
-      when :months
-        @cache_datetime >> -measurement
-      when :weeks
-        @cache_datetime - (measurement * 7)
-      when :days
-        @cache_datetime - measurement
-      when :hours
-        @cache_datetime - Rational(measurement / 24.0)
-      end.to_time
-    end
-    def url_matches_type?(url, type)
-      return true if type == :internal && url !~ URI_REGEXP
-      return true if type == :external && url =~ URI_REGEXP
-    end
-  end
-end

data/lib/html-proofer/check/favicon.rb DELETED Viewed

@@ -1,29 +0,0 @@
-# frozen_string_literal: true
-class FaviconCheck < ::HTMLProofer::Check
-  def run
-    found = false
-    @html.xpath('//link[not(ancestor::pre or ancestor::code)]').each do |node|
-      favicon = create_element(node)
-      next if favicon.ignore?
-      found = true if favicon.rel.split.last.eql? 'icon'
-      break if found
-    end
-    return if found
-    return if immediate_redirect?
-    add_issue('no favicon specified')
-  end
-  private
-  # allow any instant-redirect meta tag
-  def immediate_redirect?
-    @html.xpath("//meta[@http-equiv='refresh']").attribute('content').value.start_with? '0;'
-  rescue StandardError
-    false
-  end
-end

data/lib/html-proofer/check/html.rb DELETED Viewed

@@ -1,37 +0,0 @@
-# frozen_string_literal: true
-class HtmlCheck < ::HTMLProofer::Check
-  # tags embedded in scripts are used in templating languages: http://git.io/vOovv
-  SCRIPT_EMBEDS_MSG = /Element script embeds close tag/.freeze
-  INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze
-  INVALID_PREFIX = /Namespace prefix/.freeze
-  PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze
-  DOCTYPE_MSG = /Expected a doctype token/.freeze
-  EOF_IN_TAG = /End of input in tag/.freeze
-  MISMATCHED_TAGS = /That tag isn't allowed here/.freeze
-  def run
-    @html.errors.each do |error|
-      add_issue(error.message, line: error.line) if report?(error.message)
-    end
-  end
-  def report?(message)
-    case message
-    when SCRIPT_EMBEDS_MSG
-      options[:validation][:report_script_embeds]
-    when INVALID_TAG_MSG, INVALID_PREFIX
-      options[:validation][:report_invalid_tags]
-    when PARSE_ENTITY_REF
-      options[:validation][:report_missing_names]
-    when DOCTYPE_MSG
-      options[:validation][:report_missing_doctype]
-    when EOF_IN_TAG
-      options[:validation][:report_eof_tags]
-    when MISMATCHED_TAGS
-      options[:validation][:report_mismatched_tags]
-    else
-      true
-    end
-  end
-end

data/lib/html-proofer/check/images.rb DELETED Viewed

@@ -1,48 +0,0 @@
-# frozen_string_literal: true
-class ImageCheck < ::HTMLProofer::Check
-  SCREEN_SHOT_REGEX = /Screen(?: |%20)Shot(?: |%20)\d+-\d+-\d+(?: |%20)at(?: |%20)\d+.\d+.\d+/.freeze
-  def empty_alt_tag?
-    @img.alt.nil? || @img.alt.strip.empty?
-  end
-  def terrible_filename?
-    @img.url =~ SCREEN_SHOT_REGEX
-  end
-  def missing_src?
-    blank?(@img.url)
-  end
-  def run
-    @html.css('img').each do |node|
-      @img = create_element(node)
-      line = node.line
-      content = node.content
-      next if @img.ignore?
-      # screenshot filenames should return because of terrible names
-      if terrible_filename?
-        add_issue("image has a terrible filename (#{@img.url})", line: line, content: content)
-        next
-      end
-      # does the image exist?
-      if missing_src?
-        add_issue('image has no src or srcset attribute', line: line, content: content)
-      elsif @img.remote?
-        add_to_external_urls(@img.url)
-      elsif !@img.exists?
-        add_issue("internal image #{@img.url} does not exist", line: line, content: content)
-      end
-      add_issue("image #{@img.url} does not have an alt attribute", line: line, content: content) if empty_alt_tag? && !@img.ignore_empty_alt? && !@img.ignore_alt?
-      add_issue("image #{@img.url} uses the http scheme", line: line, content: content) if @img.check_img_http? && @img.scheme == 'http'
-    end
-    external_urls
-  end
-end

data/lib/html-proofer/check/links.rb DELETED Viewed

@@ -1,182 +0,0 @@
-# frozen_string_literal: true
-class LinkCheck < ::HTMLProofer::Check
-  include HTMLProofer::Utils
-  def missing_href?
-    return blank?(@link.src) if @node.name == 'source'
-    blank?(@link.href) && blank?(@link.name) && blank?(@link.id)
-  end
-  def placeholder?
-    (!blank?(@link.id) || !blank?(@link.name)) && @link.href.nil?
-  end
-  def run
-    @html.css('a, link, source').each do |node|
-      @link = create_element(node)
-      line = node.line
-      content = node.to_s
-      next if @link.ignore?
-      next if placeholder?
-      next if @link.allow_hash_href? && @link.href == '#'
-      # is it even a valid URL?
-      unless @link.valid?
-        add_issue("#{@link.href} is an invalid URL", line: line, content: content)
-        next
-      end
-      check_schemes(@link, line, content)
-      # is there even an href?
-      if missing_href?
-        next if @link.allow_missing_href?
-        # HTML5 allows dropping the href: http://git.io/vBX0z
-        next if @html.internal_subset.nil? || (@html.internal_subset.name == 'html' && @html.internal_subset.external_id.nil?)
-        add_issue('anchor has no href attribute', line: line, content: content)
-        next
-      end
-      # intentionally here because we still want valid? & missing_href? to execute
-      next if @link.non_http_remote?
-      if !@link.href&.start_with?('#') && !@link.internal? && @link.remote?
-        check_sri(line, content) if @link.check_sri? && node.name == 'link'
-        # we need to skip these for now; although the domain main be valid,
-        # curl/Typheous inaccurately return 404s for some links. cc https://git.io/vyCFx
-        next if @link.respond_to?(:rel) && @link.rel == 'dns-prefetch'
-        unless @link.path?
-          add_issue("#{@link.href} is an invalid URL", line: line, content: content)
-          next
-        end
-        add_to_external_urls(@link.href || @link.src)
-        next
-      elsif @link.internal?
-        add_to_internal_urls(@link.href, InternalLink.new(@link, @path, line, content))
-        add_issue("internally linking to #{@link.href}, which does not exist", line: line, content: content) if !@link.exists? && !@link.hash
-      end
-    end
-    external_urls
-  end
-  def check_internal_link(link, path, line, content)
-    # does the local directory have a trailing slash?
-    if link.unslashed_directory?(link.absolute_path)
-      add_issue("internally linking to a directory #{link.absolute_path} without trailing slash", path: path, line: line, content: content)
-      return false
-    end
-    return true unless link.hash
-    # verify the target hash
-    handle_hash(link, path, line, content)
-  end
-  def check_schemes(link, line, content)
-    case link.scheme
-    when 'mailto'
-      handle_mailto(link, line, content)
-    when 'tel'
-      handle_tel(link, line, content)
-    when 'http'
-      return unless @options[:enforce_https]
-      add_issue("#{link.href} is not an HTTPS link", line: line, content: content)
-    end
-  end
-  def handle_mailto(link, line, content)
-    if link.path.empty?
-      add_issue("#{link.href} contains no email address", line: line, content: content) unless link.ignore_empty_mailto?
-    elsif !link.path.include?('@')
-      add_issue("#{link.href} contains an invalid email address", line: line, content: content)
-    end
-  end
-  def handle_tel(link, line, content)
-    add_issue("#{link.href} contains no phone number", line: line, content: content) if link.path.empty?
-  end
-  def handle_hash(link, path, line, content)
-    if link.internal? && !hash_exists?(link.html, link.hash) # rubocop:disable Style/GuardClause
-      return add_issue("linking to internal hash ##{link.hash} that does not exist", path: path, line: line, content: content)
-    elsif link.external?
-      return external_link_check(link, line, content)
-    end
-    true
-  end
-  def external_link_check(link, line, content)
-    if link.exists? # rubocop:disable Style/GuardClause
-      target_html = create_nokogiri(link.absolute_path)
-      return add_issue("linking to #{link.href}, but #{link.hash} does not exist", line: line, content: content) unless hash_exists?(target_html, link.hash)
-    else
-      return add_issue("trying to find hash of #{link.href}, but #{link.absolute_path} does not exist", line: line, content: content)
-    end
-    true
-  end
-  def hash_exists?(html, href_hash)
-    decoded_href_hash = Addressable::URI.unescape(href_hash)
-    fragment_ids = [href_hash, decoded_href_hash]
-    # https://www.w3.org/TR/html5/single-page.html#scroll-to-fragid
-    fragment_ids.include?('top') || !find_fragments(html, fragment_ids).empty?
-  end
-  def find_fragments(html, fragment_ids)
-    xpaths = fragment_ids.flat_map do |frag_id|
-      escaped_frag_id = "'#{frag_id.split("'").join("', \"'\", '")}', ''"
-      [
-        "//*[case_sensitive_equals(@id, concat(#{escaped_frag_id}))]",
-        "//*[case_sensitive_equals(@name, concat(#{escaped_frag_id}))]"
-      ]
-    end
-    xpaths << XpathFunctions.new
-    html.xpath(*xpaths)
-  end
-  # Whitelist for affected elements from Subresource Integrity specification
-  # https://w3c.github.io/webappsec-subresource-integrity/#link-element-for-stylesheets
-  SRI_REL_TYPES = %(stylesheet)
-  def check_sri(line, content)
-    return unless SRI_REL_TYPES.include?(@link.rel)
-    if !defined?(@link.integrity) && !defined?(@link.crossorigin)
-      add_issue("SRI and CORS not provided in: #{@link.src}", line: line, content: content)
-    elsif !defined?(@link.integrity)
-      add_issue("Integrity is missing in: #{@link.src}", line: line, content: content)
-    elsif !defined?(@link.crossorigin)
-      add_issue("CORS not provided for external resource in: #{@link.src}", line: line, content: content)
-    end
-  end
-  class XpathFunctions
-    def case_sensitive_equals(node_set, str_to_match)
-      node_set.find_all { |node| node.to_s.== str_to_match.to_s }
-    end
-  end
-  class InternalLink
-    attr_reader :link, :href, :path, :line, :content
-    def initialize(link, path, line, content)
-      @link = link
-      @href = @link.href
-      @path = path
-      @line = line
-      @content = content
-    end
-  end
-end

data/lib/html-proofer/check/opengraph.rb DELETED Viewed

@@ -1,46 +0,0 @@
-# frozen_string_literal: true
-class OpenGraphElement < ::HTMLProofer::Element
-  attr_reader :src
-  def initialize(obj, check, logger)
-    super(obj, check, logger)
-    # Fake up src from the content attribute
-    instance_variable_set('@src', @content)
-    @src.insert 0, 'http:' if %r{^//}.match?(@src)
-  end
-end
-class OpenGraphCheck < ::HTMLProofer::Check
-  def missing_src?
-    !@opengraph.src
-  end
-  def empty_src?
-    blank?(@opengraph.src)
-  end
-  def run
-    @html.css('meta[property="og:url"], meta[property="og:image"]').each do |m|
-      @opengraph = OpenGraphElement.new(m, self, @logger)
-      next if @opengraph.ignore?
-      # does the opengraph exist?
-      if missing_src?
-        add_issue('open graph has no content attribute', line: m.line, content: m.content)
-      elsif empty_src?
-        add_issue('open graph content attribute is empty', line: m.line, content: m.content)
-      elsif !@opengraph.valid?
-        add_issue("#{@opengraph.src} is an invalid URL", line: m.line)
-      elsif @opengraph.remote?
-        add_to_external_urls(@opengraph.url)
-      else
-        add_issue("internal open graph #{@opengraph.url} does not exist", line: m.line, content: m.content) unless @opengraph.exists?
-      end
-    end
-    external_urls
-  end
-end