broken_link_finder 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +3 -3
- data/README.md +2 -1
- data/Rakefile +13 -16
- data/benchmark.rb +2 -0
- data/bin/console +7 -7
- data/broken_link_finder.gemspec +28 -27
- data/exe/broken_link_finder +8 -7
- data/lib/broken_link_finder/finder.rb +36 -38
- data/lib/broken_link_finder/reporter.rb +12 -10
- data/lib/broken_link_finder/version.rb +3 -1
- data/lib/broken_link_finder/wgit_extensions.rb +5 -15
- data/lib/broken_link_finder.rb +9 -4
- data/load.rb +4 -2
- metadata +29 -30
- data/.travis.yml +0 -5
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 715744f5d7676d5d4ba2cddb80f0f8109f79a7f92689c3ff3088a52f307f5f1f
         | 
| 4 | 
            +
              data.tar.gz: 7026f6037f0d710d8dab3bc710ddf7b202594c25ac8a8522398e62af3f4e78dd
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 7844d0c6d2c39752a98dcb9a7eb455f6492012321be71ff40f49641b7adc3658f4f541a35afc9ca1a9d6ce330472c4f166db0895acc1e1d6ecad53f86af7f0ac
         | 
| 7 | 
            +
              data.tar.gz: f5224db527d4636e9006ea332813d9cd133ea221506aa7a45edecc6fd230f212e21f4db8d1757a070c753876d6b003329a381b0f9a48663c966318b9fb2d1c86
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -9,6 +9,16 @@ | |
| 9 9 | 
             
            - ...
         | 
| 10 10 | 
             
            ---
         | 
| 11 11 |  | 
| 12 | 
            +
            ## v0.9.1
         | 
| 13 | 
            +
            ### Added
         | 
| 14 | 
            +
            - `BrokenLinkFinder::Finder.crawl_site` alias: `crawl_r`.
         | 
| 15 | 
            +
            ### Changed/Removed
         | 
| 16 | 
            +
            - Upgraded `wgit` to v0.2.0.
         | 
| 17 | 
            +
            - Refactored the code base (no breaking changes).
         | 
| 18 | 
            +
            ### Fixed
         | 
| 19 | 
            +
            - ...
         | 
| 20 | 
            +
            ---
         | 
| 21 | 
            +
             | 
| 12 22 | 
             
            ## v0.9.0
         | 
| 13 23 | 
             
            ### Added
         | 
| 14 24 | 
             
            - The `version` command to the executable.
         | 
    
        data/Gemfile
    CHANGED
    
    
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,10 +1,10 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                broken_link_finder (0.9. | 
| 4 | 
            +
                broken_link_finder (0.9.1)
         | 
| 5 5 | 
             
                  thor (= 0.20.3)
         | 
| 6 6 | 
             
                  thread (= 0.2)
         | 
| 7 | 
            -
                  wgit (= 0.0 | 
| 7 | 
            +
                  wgit (= 0.2.0)
         | 
| 8 8 |  | 
| 9 9 | 
             
            GEM
         | 
| 10 10 | 
             
              remote: https://rubygems.org/
         | 
| @@ -42,7 +42,7 @@ GEM | |
| 42 42 | 
             
                  addressable (>= 2.3.6)
         | 
| 43 43 | 
             
                  crack (>= 0.3.2)
         | 
| 44 44 | 
             
                  hashdiff
         | 
| 45 | 
            -
                wgit (0.0 | 
| 45 | 
            +
                wgit (0.2.0)
         | 
| 46 46 | 
             
                  addressable (~> 2.6.0)
         | 
| 47 47 | 
             
                  mongo (~> 2.9.0)
         | 
| 48 48 | 
             
                  nokogiri (~> 1.10.3)
         | 
    
        data/README.md
    CHANGED
    
    | @@ -122,7 +122,8 @@ The gem is available as open source under the terms of the [MIT License](http:// | |
| 122 122 | 
             
            After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
         | 
| 123 123 |  | 
| 124 124 | 
             
            To install this gem onto your local machine, run `bundle exec rake install`. To release a new gem version:
         | 
| 125 | 
            -
            - Update the version number in `version.rb` and add version to the `CHANGELOG`
         | 
| 125 | 
            +
            - Update the version number in `version.rb` and add the new version to the `CHANGELOG`
         | 
| 126 126 | 
             
            - Run `bundle install`
         | 
| 127 127 | 
             
            - Run `bundle exec rake test` ensuring all tests pass
         | 
| 128 | 
            +
            - Run `bundle exec rake compile` ensuring no warnings
         | 
| 128 129 | 
             
            - Run `bundle exec rake release[origin]`
         | 
    
        data/Rakefile
    CHANGED
    
    | @@ -1,33 +1,30 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'bundler/gem_tasks'
         | 
| 4 | 
            +
            require 'rake/testtask'
         | 
| 3 5 |  | 
| 4 6 | 
             
            Rake::TestTask.new(:test) do |t|
         | 
| 5 | 
            -
              t.libs <<  | 
| 6 | 
            -
              t.libs <<  | 
| 7 | 
            +
              t.libs << 'test'
         | 
| 8 | 
            +
              t.libs << 'lib'
         | 
| 7 9 | 
             
              t.test_files = FileList['test/**/*_test.rb']
         | 
| 8 10 | 
             
            end
         | 
| 9 11 |  | 
| 10 | 
            -
            desc  | 
| 12 | 
            +
            desc 'Print help information'
         | 
| 11 13 | 
             
            task default: :help
         | 
| 12 14 |  | 
| 13 | 
            -
            desc  | 
| 15 | 
            +
            desc 'Print help information'
         | 
| 14 16 | 
             
            task :help do
         | 
| 15 | 
            -
              system  | 
| 16 | 
            -
            end
         | 
| 17 | 
            -
             | 
| 18 | 
            -
            desc "Run the setup script"
         | 
| 19 | 
            -
            task :setup do
         | 
| 20 | 
            -
              system "./bin/setup"
         | 
| 17 | 
            +
              system 'bundle exec rake -D'
         | 
| 21 18 | 
             
            end
         | 
| 22 19 |  | 
| 23 | 
            -
            desc  | 
| 20 | 
            +
            desc 'Run the development console'
         | 
| 24 21 | 
             
            task :console do
         | 
| 25 | 
            -
              system  | 
| 22 | 
            +
              system './bin/console'
         | 
| 26 23 | 
             
            end
         | 
| 27 24 |  | 
| 28 | 
            -
            desc  | 
| 25 | 
            +
            desc 'Compile all project Ruby files with warnings.'
         | 
| 29 26 | 
             
            task :compile do
         | 
| 30 | 
            -
              paths = Dir[ | 
| 27 | 
            +
              paths = Dir['**/*.rb', '**/*.gemspec', 'exe/broken_link_finder']
         | 
| 31 28 | 
             
              paths.each do |f|
         | 
| 32 29 | 
             
                puts "\nCompiling #{f}..."
         | 
| 33 30 | 
             
                puts `ruby -cw #{f}`
         | 
    
        data/benchmark.rb
    CHANGED
    
    
    
        data/bin/console
    CHANGED
    
    | @@ -1,15 +1,15 @@ | |
| 1 1 | 
             
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            # frozen_string_literal: true
         | 
| 2 3 |  | 
| 3 | 
            -
            require  | 
| 4 | 
            -
            require  | 
| 5 | 
            -
            require  | 
| 6 | 
            -
            require "broken_link_finder"
         | 
| 7 | 
            -
            require 'wgit/core_ext'
         | 
| 4 | 
            +
            require 'bundler/setup'
         | 
| 5 | 
            +
            require 'pry'
         | 
| 6 | 
            +
            require 'byebug'
         | 
| 8 7 | 
             
            require 'logger'
         | 
| 9 8 | 
             
            require 'httplog'
         | 
| 9 | 
            +
            require 'broken_link_finder'
         | 
| 10 10 |  | 
| 11 11 | 
             
            logger = Logger.new(STDOUT)
         | 
| 12 | 
            -
            logger.formatter = proc do | | 
| 12 | 
            +
            logger.formatter = proc do |_severity, _datetime, _progname, msg|
         | 
| 13 13 | 
             
              "#{msg}\n"
         | 
| 14 14 | 
             
            end
         | 
| 15 15 |  | 
| @@ -43,7 +43,7 @@ end | |
| 43 43 | 
             
            # You can add fixtures and/or initialization code here...
         | 
| 44 44 | 
             
            reload
         | 
| 45 45 |  | 
| 46 | 
            -
            url =  | 
| 46 | 
            +
            url = 'http://txti.es/'
         | 
| 47 47 | 
             
            by_page = Finder.new
         | 
| 48 48 | 
             
            by_link = Finder.new sort: :link
         | 
| 49 49 | 
             
            finder = by_page
         | 
    
        data/broken_link_finder.gemspec
    CHANGED
    
    | @@ -1,51 +1,52 @@ | |
| 1 | 
            -
            #  | 
| 2 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            lib = File.expand_path('lib', __dir__)
         | 
| 3 4 | 
             
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         | 
| 4 5 | 
             
            require 'broken_link_finder/version'
         | 
| 5 6 |  | 
| 6 7 | 
             
            Gem::Specification.new do |spec|
         | 
| 7 | 
            -
              spec.name          =  | 
| 8 | 
            +
              spec.name          = 'broken_link_finder'
         | 
| 8 9 | 
             
              spec.version       = BrokenLinkFinder::VERSION
         | 
| 9 | 
            -
              spec.author        =  | 
| 10 | 
            -
              spec.email         =  | 
| 10 | 
            +
              spec.author        = 'Michael Telford'
         | 
| 11 | 
            +
              spec.email         = 'michael.telford@live.com'
         | 
| 11 12 |  | 
| 12 13 | 
             
              spec.summary       = "Finds a website's broken links and reports back to you with a summary."
         | 
| 13 14 | 
             
              spec.description   = "Finds a website's broken links using the 'wgit' gem and reports back to you with a summary."
         | 
| 14 | 
            -
              spec.homepage      =  | 
| 15 | 
            -
              spec.license       =  | 
| 15 | 
            +
              spec.homepage      = 'https://github.com/michaeltelford/broken-link-finder'
         | 
| 16 | 
            +
              spec.license       = 'MIT'
         | 
| 16 17 | 
             
              spec.metadata      = {
         | 
| 17 | 
            -
                 | 
| 18 | 
            +
                'source_code_uri' => 'https://github.com/michaeltelford/broken-link-finder'
         | 
| 18 19 | 
             
              }
         | 
| 19 20 |  | 
| 20 21 | 
             
              # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
         | 
| 21 22 | 
             
              # to allow pushing to a single host or delete this section to allow pushing to any host.
         | 
| 22 23 | 
             
              if spec.respond_to?(:metadata)
         | 
| 23 | 
            -
                spec.metadata['allowed_push_host'] =  | 
| 24 | 
            +
                spec.metadata['allowed_push_host'] = 'https://rubygems.org'
         | 
| 24 25 | 
             
              else
         | 
| 25 | 
            -
                raise  | 
| 26 | 
            -
                   | 
| 26 | 
            +
                raise 'RubyGems 2.0 or newer is required to protect against ' \
         | 
| 27 | 
            +
                  'public gem pushes.'
         | 
| 27 28 | 
             
              end
         | 
| 28 29 |  | 
| 29 | 
            -
              spec.files | 
| 30 | 
            +
              spec.files = `git ls-files -z`.split("\x0").reject do |f|
         | 
| 30 31 | 
             
                f.match(%r{^(test|spec|features)/})
         | 
| 31 32 | 
             
              end
         | 
| 32 | 
            -
              spec.bindir        =  | 
| 33 | 
            -
              spec.executables   = [ | 
| 34 | 
            -
              spec.require_paths = [ | 
| 33 | 
            +
              spec.bindir        = 'exe'
         | 
| 34 | 
            +
              spec.executables   = ['broken_link_finder']
         | 
| 35 | 
            +
              spec.require_paths = ['lib']
         | 
| 35 36 | 
             
              spec.post_install_message = "Added the executable 'broken_link_finder' to $PATH"
         | 
| 36 37 |  | 
| 37 38 | 
             
              spec.required_ruby_version = '~> 2.5'
         | 
| 38 39 |  | 
| 39 | 
            -
              spec.add_development_dependency  | 
| 40 | 
            -
              spec.add_development_dependency  | 
| 41 | 
            -
              spec.add_development_dependency  | 
| 42 | 
            -
              spec.add_development_dependency  | 
| 43 | 
            -
              spec.add_development_dependency  | 
| 44 | 
            -
              spec.add_development_dependency  | 
| 45 | 
            -
              spec.add_development_dependency  | 
| 46 | 
            -
              spec.add_development_dependency  | 
| 47 | 
            -
             | 
| 48 | 
            -
              spec.add_runtime_dependency  | 
| 49 | 
            -
              spec.add_runtime_dependency  | 
| 50 | 
            -
              spec.add_runtime_dependency  | 
| 40 | 
            +
              spec.add_development_dependency 'bundler', '~> 2.0'
         | 
| 41 | 
            +
              spec.add_development_dependency 'byebug', '~> 11.0'
         | 
| 42 | 
            +
              spec.add_development_dependency 'httplog', '~> 1.3'
         | 
| 43 | 
            +
              spec.add_development_dependency 'memory_profiler', '~> 0.9'
         | 
| 44 | 
            +
              spec.add_development_dependency 'minitest', '~> 5.0'
         | 
| 45 | 
            +
              spec.add_development_dependency 'pry', '~> 0.12'
         | 
| 46 | 
            +
              spec.add_development_dependency 'rake', '~> 10.0'
         | 
| 47 | 
            +
              spec.add_development_dependency 'webmock', '~> 3.5'
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              spec.add_runtime_dependency 'thor', '0.20.3'
         | 
| 50 | 
            +
              spec.add_runtime_dependency 'thread', '0.2'
         | 
| 51 | 
            +
              spec.add_runtime_dependency 'wgit', '0.2.0'
         | 
| 51 52 | 
             
            end
         | 
    
        data/exe/broken_link_finder
    CHANGED
    
    | @@ -1,6 +1,7 @@ | |
| 1 1 | 
             
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            # frozen_string_literal: true
         | 
| 2 3 |  | 
| 3 | 
            -
            $LOAD_PATH.unshift File.expand_path( | 
| 4 | 
            +
            $LOAD_PATH.unshift File.expand_path('../lib', __dir__)
         | 
| 4 5 | 
             
            require 'broken_link_finder'
         | 
| 5 6 | 
             
            require 'thor'
         | 
| 6 7 |  | 
| @@ -14,10 +15,10 @@ class BrokenLinkFinderCLI < Thor | |
| 14 15 | 
             
              def crawl(url)
         | 
| 15 16 | 
             
                url = "http://#{url}" unless url.start_with?('http')
         | 
| 16 17 |  | 
| 17 | 
            -
                sort_by | 
| 18 | 
            -
                max_threads | 
| 19 | 
            -
                broken_verbose | 
| 20 | 
            -
                ignored_verbose | 
| 18 | 
            +
                sort_by         = options[:sort_by_link] ? :link : :page
         | 
| 19 | 
            +
                max_threads     = options[:threads]
         | 
| 20 | 
            +
                broken_verbose  = !options[:concise]
         | 
| 21 | 
            +
                ignored_verbose = options[:verbose]
         | 
| 21 22 |  | 
| 22 23 | 
             
                finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads: max_threads)
         | 
| 23 24 | 
             
                options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
         | 
| @@ -25,8 +26,8 @@ class BrokenLinkFinderCLI < Thor | |
| 25 26 | 
             
                  broken_verbose: broken_verbose,
         | 
| 26 27 | 
             
                  ignored_verbose: ignored_verbose
         | 
| 27 28 | 
             
                )
         | 
| 28 | 
            -
              rescue Exception =>  | 
| 29 | 
            -
                puts "An error has occurred: #{ | 
| 29 | 
            +
              rescue Exception => e
         | 
| 30 | 
            +
                puts "An error has occurred: #{e.message}"
         | 
| 30 31 | 
             
              end
         | 
| 31 32 |  | 
| 32 33 | 
             
              desc 'version', 'Display the currently installed version'
         | 
| @@ -1,10 +1,11 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            require_relative 'reporter'
         | 
| 2 | 
            -
            require 'wgit'
         | 
| 3 4 | 
             
            require 'thread/pool'
         | 
| 4 5 | 
             
            require 'set'
         | 
| 5 6 |  | 
| 6 7 | 
             
            module BrokenLinkFinder
         | 
| 7 | 
            -
              DEFAULT_MAX_THREADS = 100 | 
| 8 | 
            +
              DEFAULT_MAX_THREADS = 100
         | 
| 8 9 |  | 
| 9 10 | 
             
              # Alias for BrokenLinkFinder::Finder.new.
         | 
| 10 11 | 
             
              def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
         | 
| @@ -16,9 +17,8 @@ module BrokenLinkFinder | |
| 16 17 |  | 
| 17 18 | 
             
                # Creates a new Finder instance.
         | 
| 18 19 | 
             
                def initialize(sort: :page, max_threads: BrokenLinkFinder::DEFAULT_MAX_THREADS)
         | 
| 19 | 
            -
                   | 
| 20 | 
            -
             | 
| 21 | 
            -
                  end
         | 
| 20 | 
            +
                  raise "Sort by either :page or :link, not #{sort}" \
         | 
| 21 | 
            +
                  unless %i[page link].include?(sort)
         | 
| 22 22 |  | 
| 23 23 | 
             
                  @sort        = sort
         | 
| 24 24 | 
             
                  @max_threads = max_threads
         | 
| @@ -43,7 +43,7 @@ module BrokenLinkFinder | |
| 43 43 | 
             
                def crawl_url(url)
         | 
| 44 44 | 
             
                  clear_links
         | 
| 45 45 |  | 
| 46 | 
            -
                  url =  | 
| 46 | 
            +
                  url = url.to_url
         | 
| 47 47 | 
             
                  doc = @crawler.crawl_url(url)
         | 
| 48 48 |  | 
| 49 49 | 
             
                  # Ensure the given page url is valid.
         | 
| @@ -65,8 +65,8 @@ module BrokenLinkFinder | |
| 65 65 | 
             
                def crawl_site(url)
         | 
| 66 66 | 
             
                  clear_links
         | 
| 67 67 |  | 
| 68 | 
            -
                  url | 
| 69 | 
            -
                  pool | 
| 68 | 
            +
                  url           = url.to_url
         | 
| 69 | 
            +
                  pool          = Thread.pool(@max_threads)
         | 
| 70 70 | 
             
                  crawled_pages = []
         | 
| 71 71 |  | 
| 72 72 | 
             
                  # Crawl the site's HTML web pages looking for links.
         | 
| @@ -95,14 +95,14 @@ module BrokenLinkFinder | |
| 95 95 | 
             
                # Returns true if there were broken links and vice versa.
         | 
| 96 96 | 
             
                def pretty_print_link_report(
         | 
| 97 97 | 
             
                  stream = STDOUT,
         | 
| 98 | 
            -
                  broken_verbose: | 
| 98 | 
            +
                  broken_verbose:  true,
         | 
| 99 99 | 
             
                  ignored_verbose: false
         | 
| 100 100 | 
             
                )
         | 
| 101 101 | 
             
                  reporter = BrokenLinkFinder::Reporter.new(
         | 
| 102 102 | 
             
                    stream, @sort, @broken_links, @ignored_links
         | 
| 103 103 | 
             
                  )
         | 
| 104 104 | 
             
                  reporter.pretty_print_link_report(
         | 
| 105 | 
            -
                    broken_verbose: | 
| 105 | 
            +
                    broken_verbose:  broken_verbose,
         | 
| 106 106 | 
             
                    ignored_verbose: ignored_verbose
         | 
| 107 107 | 
             
                  )
         | 
| 108 108 |  | 
| @@ -114,14 +114,14 @@ module BrokenLinkFinder | |
| 114 114 | 
             
                # Finds which links are unsupported or broken and records the details.
         | 
| 115 115 | 
             
                def find_broken_links(doc)
         | 
| 116 116 | 
             
                  # Report and reject any non supported links.
         | 
| 117 | 
            -
                  links = doc.all_links | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 117 | 
            +
                  links = doc.all_links
         | 
| 118 | 
            +
                             .reject do |link|
         | 
| 119 | 
            +
                                if link.is_absolute? && !link.start_with?('http')
         | 
| 120 | 
            +
                                  append_ignored_link(doc.url, link)
         | 
| 121 | 
            +
                                  true
         | 
| 122 | 
            +
                                end
         | 
| 123 | 
            +
                              end
         | 
| 124 | 
            +
                             .uniq
         | 
| 125 125 |  | 
| 126 126 | 
             
                  # Iterate over the supported links checking if they're broken or not.
         | 
| 127 127 | 
             
                  links.each do |link|
         | 
| @@ -138,8 +138,8 @@ module BrokenLinkFinder | |
| 138 138 | 
             
                    link_doc = @crawler.crawl_url(link_url)
         | 
| 139 139 |  | 
| 140 140 | 
             
                    # Determine if the crawled link is broken or not.
         | 
| 141 | 
            -
                    if | 
| 142 | 
            -
                        link_doc.nil?  | 
| 141 | 
            +
                    if  @crawler.last_response.is_a?(Net::HTTPNotFound) ||
         | 
| 142 | 
            +
                        link_doc.nil? ||
         | 
| 143 143 | 
             
                        has_broken_anchor(link_doc)
         | 
| 144 144 | 
             
                      append_broken_link(doc.url, link)
         | 
| 145 145 | 
             
                    else
         | 
| @@ -157,10 +157,10 @@ module BrokenLinkFinder | |
| 157 157 |  | 
| 158 158 | 
             
                # Returns true if the link is/contains a broken anchor.
         | 
| 159 159 | 
             
                def has_broken_anchor(doc)
         | 
| 160 | 
            -
                  raise  | 
| 160 | 
            +
                  raise 'link document is nil' unless doc
         | 
| 161 161 |  | 
| 162 162 | 
             
                  anchor = doc.url.anchor
         | 
| 163 | 
            -
                  return false if anchor.nil?  | 
| 163 | 
            +
                  return false if anchor.nil? || (anchor == '#')
         | 
| 164 164 |  | 
| 165 165 | 
             
                  anchor = anchor[1..-1] if anchor.start_with?('#')
         | 
| 166 166 | 
             
                  doc.xpath("//*[@id='#{anchor}']").empty?
         | 
| @@ -171,9 +171,7 @@ module BrokenLinkFinder | |
| 171 171 | 
             
                  key, value = get_key_value(url, link)
         | 
| 172 172 |  | 
| 173 173 | 
             
                  @lock.synchronize do
         | 
| 174 | 
            -
                    unless @broken_links[key]
         | 
| 175 | 
            -
                      @broken_links[key] = []
         | 
| 176 | 
            -
                    end
         | 
| 174 | 
            +
                    @broken_links[key] = [] unless @broken_links[key]
         | 
| 177 175 | 
             
                    @broken_links[key] << value
         | 
| 178 176 |  | 
| 179 177 | 
             
                    @all_broken_links  << link
         | 
| @@ -185,9 +183,7 @@ module BrokenLinkFinder | |
| 185 183 | 
             
                  key, value = get_key_value(url, link)
         | 
| 186 184 |  | 
| 187 185 | 
             
                  @lock.synchronize do
         | 
| 188 | 
            -
                    unless @ignored_links[key]
         | 
| 189 | 
            -
                      @ignored_links[key] = []
         | 
| 190 | 
            -
                    end
         | 
| 186 | 
            +
                    @ignored_links[key] = [] unless @ignored_links[key]
         | 
| 191 187 | 
             
                    @ignored_links[key] << value
         | 
| 192 188 | 
             
                  end
         | 
| 193 189 | 
             
                end
         | 
| @@ -195,9 +191,10 @@ module BrokenLinkFinder | |
| 195 191 | 
             
                # Returns the correct key value depending on the @sort type.
         | 
| 196 192 | 
             
                # @sort == :page ? [url, link] : [link, url]
         | 
| 197 193 | 
             
                def get_key_value(url, link)
         | 
| 198 | 
            -
                   | 
| 194 | 
            +
                  case @sort
         | 
| 195 | 
            +
                  when :page
         | 
| 199 196 | 
             
                    [url, link]
         | 
| 200 | 
            -
                   | 
| 197 | 
            +
                  when :link
         | 
| 201 198 | 
             
                    [link, url]
         | 
| 202 199 | 
             
                  else
         | 
| 203 200 | 
             
                    raise "Unsupported sort type: #{sort}"
         | 
| @@ -206,14 +203,14 @@ module BrokenLinkFinder | |
| 206 203 |  | 
| 207 204 | 
             
                # Sort keys and values alphabetically.
         | 
| 208 205 | 
             
                def sort_links
         | 
| 209 | 
            -
                  @broken_links.values.map | 
| 210 | 
            -
                  @ignored_links.values.map | 
| 206 | 
            +
                  @broken_links.values.map(&:uniq!)
         | 
| 207 | 
            +
                  @ignored_links.values.map(&:uniq!)
         | 
| 211 208 |  | 
| 212 | 
            -
                  @broken_links  = @broken_links.sort_by  { |k,  | 
| 213 | 
            -
                  @ignored_links = @ignored_links.sort_by { |k,  | 
| 209 | 
            +
                  @broken_links  = @broken_links.sort_by  { |k, _v| k }.to_h
         | 
| 210 | 
            +
                  @ignored_links = @ignored_links.sort_by { |k, _v| k }.to_h
         | 
| 214 211 |  | 
| 215 | 
            -
                  @broken_links.each  { | | 
| 216 | 
            -
                  @ignored_links.each { | | 
| 212 | 
            +
                  @broken_links.each  { |_k, v| v.sort! }
         | 
| 213 | 
            +
                  @ignored_links.each { |_k, v| v.sort! }
         | 
| 217 214 | 
             
                end
         | 
| 218 215 |  | 
| 219 216 | 
             
                # Sets and returns the total number of links crawled.
         | 
| @@ -221,7 +218,8 @@ module BrokenLinkFinder | |
| 221 218 | 
             
                  @total_links_crawled = @all_broken_links.size + @all_intact_links.size
         | 
| 222 219 | 
             
                end
         | 
| 223 220 |  | 
| 224 | 
            -
                 | 
| 225 | 
            -
                 | 
| 221 | 
            +
                alias crawl_page                crawl_url
         | 
| 222 | 
            +
                alias crawl_r                   crawl_site
         | 
| 223 | 
            +
                alias pretty_print_link_summary pretty_print_link_report
         | 
| 226 224 | 
             
              end
         | 
| 227 225 | 
             
            end
         | 
| @@ -1,15 +1,16 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            module BrokenLinkFinder
         | 
| 2 4 | 
             
              class Reporter
         | 
| 3 5 | 
             
                # The amount of pages/links to display when verbose is false.
         | 
| 4 | 
            -
                NUM_VALUES = 3 | 
| 6 | 
            +
                NUM_VALUES = 3
         | 
| 5 7 |  | 
| 6 8 | 
             
                # Creates a new Reporter instance.
         | 
| 7 9 | 
             
                # stream is any Object that responds to :puts.
         | 
| 8 10 | 
             
                def initialize(stream, sort, broken_links, ignored_links)
         | 
| 9 | 
            -
                  raise  | 
| 10 | 
            -
                   | 
| 11 | 
            -
             | 
| 12 | 
            -
                  end
         | 
| 11 | 
            +
                  raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
         | 
| 12 | 
            +
                  raise "sort by either :page or :link, not #{sort}" \
         | 
| 13 | 
            +
                  unless %i[page link].include?(sort)
         | 
| 13 14 |  | 
| 14 15 | 
             
                  @stream         = stream
         | 
| 15 16 | 
             
                  @sort           = sort
         | 
| @@ -21,6 +22,7 @@ module BrokenLinkFinder | |
| 21 22 | 
             
                def pretty_print_link_report(broken_verbose: true, ignored_verbose: false)
         | 
| 22 23 | 
             
                  report_broken_links(verbose: broken_verbose)
         | 
| 23 24 | 
             
                  report_ignored_links(verbose: ignored_verbose)
         | 
| 25 | 
            +
             | 
| 24 26 | 
             
                  nil
         | 
| 25 27 | 
             
                end
         | 
| 26 28 |  | 
| @@ -29,7 +31,7 @@ module BrokenLinkFinder | |
| 29 31 | 
             
                # Report a summary of the broken links.
         | 
| 30 32 | 
             
                def report_broken_links(verbose: true)
         | 
| 31 33 | 
             
                  if @broken_links.empty?
         | 
| 32 | 
            -
                    print  | 
| 34 | 
            +
                    print 'Good news, there are no broken links!'
         | 
| 33 35 | 
             
                  else
         | 
| 34 36 | 
             
                    num_pages, num_links = get_hash_stats(@broken_links)
         | 
| 35 37 | 
             
                    print "Found #{num_links} broken link(s) across #{num_pages} page(s):"
         | 
| @@ -40,7 +42,7 @@ module BrokenLinkFinder | |
| 40 42 | 
             
                        "The broken link '#{key}' was found on the following pages:"
         | 
| 41 43 | 
             
                      nprint msg
         | 
| 42 44 |  | 
| 43 | 
            -
                      if verbose  | 
| 45 | 
            +
                      if verbose || (values.length <= NUM_VALUES)
         | 
| 44 46 | 
             
                        values.each { |value| print value }
         | 
| 45 47 | 
             
                      else # Only print N values and summarise the rest.
         | 
| 46 48 | 
             
                        NUM_VALUES.times { |i| print values[i] }
         | 
| @@ -64,7 +66,7 @@ module BrokenLinkFinder | |
| 64 66 | 
             
                        "The link '#{key}' was ignored on the following pages:"
         | 
| 65 67 | 
             
                      nprint msg
         | 
| 66 68 |  | 
| 67 | 
            -
                      if verbose  | 
| 69 | 
            +
                      if verbose || (values.length <= NUM_VALUES)
         | 
| 68 70 | 
             
                        values.each { |value| print value }
         | 
| 69 71 | 
             
                      else # Only print N values and summarise the rest.
         | 
| 70 72 | 
             
                        NUM_VALUES.times { |i| print values[i] }
         | 
| @@ -85,8 +87,8 @@ module BrokenLinkFinder | |
| 85 87 | 
             
                # combined values. The hash should be of the format: { 'str' => [...] }.
         | 
| 86 88 | 
             
                # Use like: `num_pages, num_links = get_hash_stats(links)`.
         | 
| 87 89 | 
             
                def get_hash_stats(hash)
         | 
| 88 | 
            -
                  num_keys | 
| 89 | 
            -
                  values | 
| 90 | 
            +
                  num_keys   = hash.keys.length
         | 
| 91 | 
            +
                  values     = hash.values.flatten
         | 
| 90 92 | 
             
                  num_values = sort_by_page? ? values.length : values.uniq.length
         | 
| 91 93 |  | 
| 92 94 | 
             
                  sort_by_page? ?
         | 
| @@ -1,21 +1,11 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 | 
            -
            # We  | 
| 3 | 
            +
            # We extract all the Document's links, not just the links to other webpages.
         | 
| 4 4 | 
             
            Wgit::Document.define_extension(
         | 
| 5 5 | 
             
              :all_links,
         | 
| 6 | 
            -
              '//*/@href | //*/@src',
         | 
| 6 | 
            +
              '//*/@href | //*/@src', # Any element with a href or src attribute.
         | 
| 7 7 | 
             
              singleton: false,
         | 
| 8 | 
            -
              text_content_only: true | 
| 8 | 
            +
              text_content_only: true
         | 
| 9 9 | 
             
            ) do |links|
         | 
| 10 | 
            -
               | 
| 11 | 
            -
                links = links.
         | 
| 12 | 
            -
                  map do |link|
         | 
| 13 | 
            -
                    Wgit::Url.new(link)
         | 
| 14 | 
            -
                  rescue
         | 
| 15 | 
            -
                    nil
         | 
| 16 | 
            -
                  end.
         | 
| 17 | 
            -
                  compact.
         | 
| 18 | 
            -
                  uniq
         | 
| 19 | 
            -
              end
         | 
| 20 | 
            -
              links
         | 
| 10 | 
            +
              links&.map(&:to_url)&.uniq
         | 
| 21 11 | 
             
            end
         | 
    
        data/lib/broken_link_finder.rb
    CHANGED
    
    | @@ -1,4 +1,9 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 3 | 
            -
             | 
| 4 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'wgit'
         | 
| 4 | 
            +
            require 'wgit/core_ext'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            require_relative './broken_link_finder/wgit_extensions'
         | 
| 7 | 
            +
            require_relative './broken_link_finder/version'
         | 
| 8 | 
            +
            require_relative './broken_link_finder/reporter'
         | 
| 9 | 
            +
            require_relative './broken_link_finder/finder'
         | 
    
        data/load.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: broken_link_finder
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.9. | 
| 4 | 
            +
              version: 0.9.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Michael Telford
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2019- | 
| 11 | 
            +
            date: 2019-09-22 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: bundler
         | 
| @@ -25,117 +25,117 @@ dependencies: | |
| 25 25 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 26 | 
             
                    version: '2.0'
         | 
| 27 27 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            -
              name:  | 
| 28 | 
            +
              name: byebug
         | 
| 29 29 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 30 | 
             
                requirements:
         | 
| 31 31 | 
             
                - - "~>"
         | 
| 32 32 | 
             
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            -
                    version: ' | 
| 33 | 
            +
                    version: '11.0'
         | 
| 34 34 | 
             
              type: :development
         | 
| 35 35 | 
             
              prerelease: false
         | 
| 36 36 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 37 | 
             
                requirements:
         | 
| 38 38 | 
             
                - - "~>"
         | 
| 39 39 | 
             
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            -
                    version: ' | 
| 40 | 
            +
                    version: '11.0'
         | 
| 41 41 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            -
              name:  | 
| 42 | 
            +
              name: httplog
         | 
| 43 43 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 44 | 
             
                requirements:
         | 
| 45 45 | 
             
                - - "~>"
         | 
| 46 46 | 
             
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            -
                    version: ' | 
| 47 | 
            +
                    version: '1.3'
         | 
| 48 48 | 
             
              type: :development
         | 
| 49 49 | 
             
              prerelease: false
         | 
| 50 50 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 51 | 
             
                requirements:
         | 
| 52 52 | 
             
                - - "~>"
         | 
| 53 53 | 
             
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            -
                    version: ' | 
| 54 | 
            +
                    version: '1.3'
         | 
| 55 55 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            -
              name:  | 
| 56 | 
            +
              name: memory_profiler
         | 
| 57 57 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 58 | 
             
                requirements:
         | 
| 59 59 | 
             
                - - "~>"
         | 
| 60 60 | 
             
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            -
                    version: '0. | 
| 61 | 
            +
                    version: '0.9'
         | 
| 62 62 | 
             
              type: :development
         | 
| 63 63 | 
             
              prerelease: false
         | 
| 64 64 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 65 | 
             
                requirements:
         | 
| 66 66 | 
             
                - - "~>"
         | 
| 67 67 | 
             
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            -
                    version: '0. | 
| 68 | 
            +
                    version: '0.9'
         | 
| 69 69 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 70 | 
            -
              name:  | 
| 70 | 
            +
              name: minitest
         | 
| 71 71 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 72 72 | 
             
                requirements:
         | 
| 73 73 | 
             
                - - "~>"
         | 
| 74 74 | 
             
                  - !ruby/object:Gem::Version
         | 
| 75 | 
            -
                    version: ' | 
| 75 | 
            +
                    version: '5.0'
         | 
| 76 76 | 
             
              type: :development
         | 
| 77 77 | 
             
              prerelease: false
         | 
| 78 78 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 79 79 | 
             
                requirements:
         | 
| 80 80 | 
             
                - - "~>"
         | 
| 81 81 | 
             
                  - !ruby/object:Gem::Version
         | 
| 82 | 
            -
                    version: ' | 
| 82 | 
            +
                    version: '5.0'
         | 
| 83 83 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 84 | 
            -
              name:  | 
| 84 | 
            +
              name: pry
         | 
| 85 85 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 86 86 | 
             
                requirements:
         | 
| 87 87 | 
             
                - - "~>"
         | 
| 88 88 | 
             
                  - !ruby/object:Gem::Version
         | 
| 89 | 
            -
                    version: ' | 
| 89 | 
            +
                    version: '0.12'
         | 
| 90 90 | 
             
              type: :development
         | 
| 91 91 | 
             
              prerelease: false
         | 
| 92 92 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 93 93 | 
             
                requirements:
         | 
| 94 94 | 
             
                - - "~>"
         | 
| 95 95 | 
             
                  - !ruby/object:Gem::Version
         | 
| 96 | 
            -
                    version: ' | 
| 96 | 
            +
                    version: '0.12'
         | 
| 97 97 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 98 | 
            -
              name:  | 
| 98 | 
            +
              name: rake
         | 
| 99 99 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 100 100 | 
             
                requirements:
         | 
| 101 101 | 
             
                - - "~>"
         | 
| 102 102 | 
             
                  - !ruby/object:Gem::Version
         | 
| 103 | 
            -
                    version: ' | 
| 103 | 
            +
                    version: '10.0'
         | 
| 104 104 | 
             
              type: :development
         | 
| 105 105 | 
             
              prerelease: false
         | 
| 106 106 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 107 107 | 
             
                requirements:
         | 
| 108 108 | 
             
                - - "~>"
         | 
| 109 109 | 
             
                  - !ruby/object:Gem::Version
         | 
| 110 | 
            -
                    version: ' | 
| 110 | 
            +
                    version: '10.0'
         | 
| 111 111 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 112 | 
            -
              name:  | 
| 112 | 
            +
              name: webmock
         | 
| 113 113 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 114 114 | 
             
                requirements:
         | 
| 115 115 | 
             
                - - "~>"
         | 
| 116 116 | 
             
                  - !ruby/object:Gem::Version
         | 
| 117 | 
            -
                    version: ' | 
| 117 | 
            +
                    version: '3.5'
         | 
| 118 118 | 
             
              type: :development
         | 
| 119 119 | 
             
              prerelease: false
         | 
| 120 120 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 121 121 | 
             
                requirements:
         | 
| 122 122 | 
             
                - - "~>"
         | 
| 123 123 | 
             
                  - !ruby/object:Gem::Version
         | 
| 124 | 
            -
                    version: ' | 
| 124 | 
            +
                    version: '3.5'
         | 
| 125 125 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 126 | 
            -
              name:  | 
| 126 | 
            +
              name: thor
         | 
| 127 127 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 128 128 | 
             
                requirements:
         | 
| 129 129 | 
             
                - - '='
         | 
| 130 130 | 
             
                  - !ruby/object:Gem::Version
         | 
| 131 | 
            -
                    version: 0. | 
| 131 | 
            +
                    version: 0.20.3
         | 
| 132 132 | 
             
              type: :runtime
         | 
| 133 133 | 
             
              prerelease: false
         | 
| 134 134 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 135 135 | 
             
                requirements:
         | 
| 136 136 | 
             
                - - '='
         | 
| 137 137 | 
             
                  - !ruby/object:Gem::Version
         | 
| 138 | 
            -
                    version: 0. | 
| 138 | 
            +
                    version: 0.20.3
         | 
| 139 139 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 140 140 | 
             
              name: thread
         | 
| 141 141 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -151,19 +151,19 @@ dependencies: | |
| 151 151 | 
             
                  - !ruby/object:Gem::Version
         | 
| 152 152 | 
             
                    version: '0.2'
         | 
| 153 153 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 154 | 
            -
              name:  | 
| 154 | 
            +
              name: wgit
         | 
| 155 155 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 156 156 | 
             
                requirements:
         | 
| 157 157 | 
             
                - - '='
         | 
| 158 158 | 
             
                  - !ruby/object:Gem::Version
         | 
| 159 | 
            -
                    version: 0. | 
| 159 | 
            +
                    version: 0.2.0
         | 
| 160 160 | 
             
              type: :runtime
         | 
| 161 161 | 
             
              prerelease: false
         | 
| 162 162 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 163 163 | 
             
                requirements:
         | 
| 164 164 | 
             
                - - '='
         | 
| 165 165 | 
             
                  - !ruby/object:Gem::Version
         | 
| 166 | 
            -
                    version: 0. | 
| 166 | 
            +
                    version: 0.2.0
         | 
| 167 167 | 
             
            description: Finds a website's broken links using the 'wgit' gem and reports back
         | 
| 168 168 | 
             
              to you with a summary.
         | 
| 169 169 | 
             
            email: michael.telford@live.com
         | 
| @@ -174,7 +174,6 @@ extra_rdoc_files: [] | |
| 174 174 | 
             
            files:
         | 
| 175 175 | 
             
            - ".gitignore"
         | 
| 176 176 | 
             
            - ".ruby-version"
         | 
| 177 | 
            -
            - ".travis.yml"
         | 
| 178 177 | 
             
            - CHANGELOG.md
         | 
| 179 178 | 
             
            - Gemfile
         | 
| 180 179 | 
             
            - Gemfile.lock
         |