httpspell 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +30 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +108 -0
- data/README.markdown +21 -0
- data/Rakefile +18 -0
- data/exe/httpspell +51 -0
- data/httpspell.gemspec +40 -0
- data/lib/httpspell/spellchecker.rb +25 -0
- data/lib/httpspell/spider.rb +62 -0
- data/lib/httpspell/version.rb +5 -0
- metadata +213 -0
    
        checksums.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
| 1 | 
            +
            ---
         | 
| 2 | 
            +
            SHA256:
         | 
| 3 | 
            +
              metadata.gz: 0fe73f8f1ff3740d6e3ae3af685d3554879a2819fc9d7803c994a21dd3694d91
         | 
| 4 | 
            +
              data.tar.gz: 8354f5c3bdc325a073310aa534a6171164d3dfbe7a1c4154f77737f20108eb91
         | 
| 5 | 
            +
            SHA512:
         | 
| 6 | 
            +
              metadata.gz: '073693d2520238d10012e4c02057c4966ab8af80f1c9db868e5ae2a4b95e4ae59a7d0989c162f62649aa0d2194290da0bca1ac5e1186f8ff3569cca581d571ae'
         | 
| 7 | 
            +
              data.tar.gz: 8eb778ffa3bcc1f56e8362d160117f695ec5f3ca146592219f4ef43a160ea28b96c67c5a6edeba52fbdc6dc3413b9f4967243fc60431a81861506d2c46435b7b
         | 
    
        data/.gitignore
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            pkg
         | 
    
        data/.rubocop.yml
    ADDED
    
    | @@ -0,0 +1,30 @@ | |
| 1 | 
            +
            AllCops:
         | 
| 2 | 
            +
              TargetRubyVersion: 2.5.1
         | 
| 3 | 
            +
              Include:
         | 
| 4 | 
            +
                - '**/Gemfile'
         | 
| 5 | 
            +
                - '**/Rakefile'
         | 
| 6 | 
            +
                - '**/config.ru'
         | 
| 7 | 
            +
                - '**/*.rake'
         | 
| 8 | 
            +
              Exclude:
         | 
| 9 | 
            +
                - vendor/**/*
         | 
| 10 | 
            +
                - db/migrations/**/*
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              DisplayCopNames:
         | 
| 13 | 
            +
                Enabled: true
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              DisplayStyleGuide:
         | 
| 16 | 
            +
                Enabled: true
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            Naming/FileName:
         | 
| 19 | 
            +
              Exclude:
         | 
| 20 | 
            +
              - Guardfile
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            Metrics/BlockLength:
         | 
| 23 | 
            +
              Exclude:
         | 
| 24 | 
            +
                - spec/**/*
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            Metrics/LineLength:
         | 
| 27 | 
            +
              Max: 160
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            Style/Documentation:
         | 
| 30 | 
            +
              Enabled: false
         | 
    
        data/Gemfile
    ADDED
    
    
    
        data/Gemfile.lock
    ADDED
    
    | @@ -0,0 +1,108 @@ | |
| 1 | 
            +
            PATH
         | 
| 2 | 
            +
              remote: .
         | 
| 3 | 
            +
              specs:
         | 
| 4 | 
            +
                httpspell (1.0.0)
         | 
| 5 | 
            +
                  addressable
         | 
| 6 | 
            +
                  nokogiri
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            GEM
         | 
| 9 | 
            +
              remote: https://rubygems.org/
         | 
| 10 | 
            +
              specs:
         | 
| 11 | 
            +
                addressable (2.5.2)
         | 
| 12 | 
            +
                  public_suffix (>= 2.0.2, < 4.0)
         | 
| 13 | 
            +
                ast (2.4.0)
         | 
| 14 | 
            +
                byebug (10.0.2)
         | 
| 15 | 
            +
                coderay (1.1.2)
         | 
| 16 | 
            +
                diff-lcs (1.3)
         | 
| 17 | 
            +
                ffi (1.9.23)
         | 
| 18 | 
            +
                formatador (0.2.5)
         | 
| 19 | 
            +
                guard (2.14.2)
         | 
| 20 | 
            +
                  formatador (>= 0.2.4)
         | 
| 21 | 
            +
                  listen (>= 2.7, < 4.0)
         | 
| 22 | 
            +
                  lumberjack (>= 1.0.12, < 2.0)
         | 
| 23 | 
            +
                  nenv (~> 0.1)
         | 
| 24 | 
            +
                  notiffany (~> 0.0)
         | 
| 25 | 
            +
                  pry (>= 0.9.12)
         | 
| 26 | 
            +
                  shellany (~> 0.0)
         | 
| 27 | 
            +
                  thor (>= 0.18.1)
         | 
| 28 | 
            +
                guard-bundler (2.1.0)
         | 
| 29 | 
            +
                  bundler (~> 1.0)
         | 
| 30 | 
            +
                  guard (~> 2.2)
         | 
| 31 | 
            +
                  guard-compat (~> 1.1)
         | 
| 32 | 
            +
                guard-compat (1.2.1)
         | 
| 33 | 
            +
                guard-rspec (4.7.3)
         | 
| 34 | 
            +
                  guard (~> 2.1)
         | 
| 35 | 
            +
                  guard-compat (~> 1.1)
         | 
| 36 | 
            +
                  rspec (>= 2.99.0, < 4.0)
         | 
| 37 | 
            +
                listen (3.1.5)
         | 
| 38 | 
            +
                  rb-fsevent (~> 0.9, >= 0.9.4)
         | 
| 39 | 
            +
                  rb-inotify (~> 0.9, >= 0.9.7)
         | 
| 40 | 
            +
                  ruby_dep (~> 1.2)
         | 
| 41 | 
            +
                lumberjack (1.0.13)
         | 
| 42 | 
            +
                method_source (0.9.0)
         | 
| 43 | 
            +
                mini_portile2 (2.3.0)
         | 
| 44 | 
            +
                nenv (0.3.0)
         | 
| 45 | 
            +
                nokogiri (1.8.2)
         | 
| 46 | 
            +
                  mini_portile2 (~> 2.3.0)
         | 
| 47 | 
            +
                notiffany (0.1.1)
         | 
| 48 | 
            +
                  nenv (~> 0.1)
         | 
| 49 | 
            +
                  shellany (~> 0.0)
         | 
| 50 | 
            +
                parallel (1.12.1)
         | 
| 51 | 
            +
                parser (2.5.1.0)
         | 
| 52 | 
            +
                  ast (~> 2.4.0)
         | 
| 53 | 
            +
                powerpack (0.1.1)
         | 
| 54 | 
            +
                pry (0.11.3)
         | 
| 55 | 
            +
                  coderay (~> 1.1.0)
         | 
| 56 | 
            +
                  method_source (~> 0.9.0)
         | 
| 57 | 
            +
                pry-byebug (3.6.0)
         | 
| 58 | 
            +
                  byebug (~> 10.0)
         | 
| 59 | 
            +
                  pry (~> 0.10)
         | 
| 60 | 
            +
                public_suffix (3.0.2)
         | 
| 61 | 
            +
                rainbow (3.0.0)
         | 
| 62 | 
            +
                rake (12.3.1)
         | 
| 63 | 
            +
                rb-fsevent (0.10.3)
         | 
| 64 | 
            +
                rb-inotify (0.9.10)
         | 
| 65 | 
            +
                  ffi (>= 0.5.0, < 2)
         | 
| 66 | 
            +
                rspec (3.7.0)
         | 
| 67 | 
            +
                  rspec-core (~> 3.7.0)
         | 
| 68 | 
            +
                  rspec-expectations (~> 3.7.0)
         | 
| 69 | 
            +
                  rspec-mocks (~> 3.7.0)
         | 
| 70 | 
            +
                rspec-core (3.7.1)
         | 
| 71 | 
            +
                  rspec-support (~> 3.7.0)
         | 
| 72 | 
            +
                rspec-expectations (3.7.0)
         | 
| 73 | 
            +
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 74 | 
            +
                  rspec-support (~> 3.7.0)
         | 
| 75 | 
            +
                rspec-mocks (3.7.0)
         | 
| 76 | 
            +
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 77 | 
            +
                  rspec-support (~> 3.7.0)
         | 
| 78 | 
            +
                rspec-support (3.7.1)
         | 
| 79 | 
            +
                rubocop (0.56.0)
         | 
| 80 | 
            +
                  parallel (~> 1.10)
         | 
| 81 | 
            +
                  parser (>= 2.5)
         | 
| 82 | 
            +
                  powerpack (~> 0.1)
         | 
| 83 | 
            +
                  rainbow (>= 2.2.2, < 4.0)
         | 
| 84 | 
            +
                  ruby-progressbar (~> 1.7)
         | 
| 85 | 
            +
                  unicode-display_width (~> 1.0, >= 1.0.1)
         | 
| 86 | 
            +
                ruby-progressbar (1.9.0)
         | 
| 87 | 
            +
                ruby_dep (1.5.0)
         | 
| 88 | 
            +
                shellany (0.0.1)
         | 
| 89 | 
            +
                thor (0.20.0)
         | 
| 90 | 
            +
                unicode-display_width (1.3.3)
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            PLATFORMS
         | 
| 93 | 
            +
              ruby
         | 
| 94 | 
            +
             | 
| 95 | 
            +
            DEPENDENCIES
         | 
| 96 | 
            +
              bundler
         | 
| 97 | 
            +
              guard
         | 
| 98 | 
            +
              guard-bundler
         | 
| 99 | 
            +
              guard-rspec
         | 
| 100 | 
            +
              httpspell!
         | 
| 101 | 
            +
              pry
         | 
| 102 | 
            +
              pry-byebug
         | 
| 103 | 
            +
              rake
         | 
| 104 | 
            +
              rspec
         | 
| 105 | 
            +
              rubocop
         | 
| 106 | 
            +
             | 
| 107 | 
            +
            BUNDLED WITH
         | 
| 108 | 
            +
               1.16.1
         | 
    
        data/README.markdown
    ADDED
    
    | @@ -0,0 +1,21 @@ | |
| 1 | 
            +
            # `httpspell`
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            This is a spellchecker that recursively fetches HTML pages, converts them to plain text (using [pandoc](http://pandoc.org/)), and spellchecks them with [hunspell](https://hunspell.github.io/). Unknown words will be printed to `stdout`, which makes the tool a good candidate for CI pipelines where you might want to take action when a spelling error is found on a web page.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            Words that are not in the dictionary for the given language (inferred from the `lang` attribute of the HTML document's root element) can be added to a personal dictionary, which will mark the word as correctly spelled.
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            # What is *not* checked
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            * When spidering a site, `httpspell` will skip all responses with a `content-type` header other than `text/html`.
         | 
| 10 | 
            +
            * Before converting, `httpspell` removes the following nodes from the HTML DOM as they are not a good target for spellchecking:
         | 
| 11 | 
            +
              - `code`
         | 
| 12 | 
            +
              - `pre`
         | 
| 13 | 
            +
              - Elements with `spellcheck='false'` (this is how HTML5 allows tagging elements as a target for spellchecking)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            # Misc
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            If you produce content with kramdown (e.g. using Jekyll), setting `spellcheck='false'` for an element is a simple as adding this line *after* the element (e.g. heading):
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            ```
         | 
| 20 | 
            +
            {: spellcheck="false"}
         | 
| 21 | 
            +
            ```
         | 
    
        data/Rakefile
    ADDED
    
    | @@ -0,0 +1,18 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'rspec/core/rake_task'
         | 
| 4 | 
            +
            require 'bundler/gem_tasks'
         | 
| 5 | 
            +
            require 'rubocop/rake_task'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            RuboCop::RakeTask.new
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            task default: ['spec:all']
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            namespace :spec do
         | 
| 12 | 
            +
              desc 'Run all specs'
         | 
| 13 | 
            +
              task all: ['rubocop:auto_correct', :unit]
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              RSpec::Core::RakeTask.new(:unit) do |t|
         | 
| 16 | 
            +
                t.pattern = 'spec/unit/**/*_spec.rb'
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
            end
         | 
    
        data/exe/httpspell
    ADDED
    
    | @@ -0,0 +1,51 @@ | |
| 1 | 
            +
            #!/usr/bin/env ruby
         | 
| 2 | 
            +
            # frozen_string_literal: true
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'optparse'
         | 
| 5 | 
            +
            require 'httpspell/spider'
         | 
| 6 | 
            +
            require 'httpspell/spellchecker'
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            personal_dictionary_path = nil
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            begin
         | 
| 11 | 
            +
              OptionParser.new do |parser|
         | 
| 12 | 
            +
                parser.banner.prepend <<~BANNER
         | 
| 13 | 
            +
                  Spellchecks a website via HTTP.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                BANNER
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                parser.on('-p', '--personal-dictionary=FILE', 'path to the personal dictionary file') do |p|
         | 
| 18 | 
            +
                  personal_dictionary_path = p
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                # TODO: --recursive, defaults to false
         | 
| 22 | 
            +
                # TODO wget has some additional options for recursive behavior that should be reviewed
         | 
| 23 | 
            +
              end.parse!
         | 
| 24 | 
            +
            rescue StandardError
         | 
| 25 | 
            +
              warn "Error - #{$ERROR_INFO}"
         | 
| 26 | 
            +
              exit 1
         | 
| 27 | 
            +
            end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            if ARGV.size != 1
         | 
| 30 | 
            +
              warn "Expected exactly one argument, but received #{ARGV.size}."
         | 
| 31 | 
            +
              exit 1
         | 
| 32 | 
            +
            end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            spell_checker = HttpSpell::SpellChecker.new(personal_dictionary_path)
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            HttpSpell::Spider.new(ARGV.first).start do |url, doc|
         | 
| 37 | 
            +
              lang = doc.root['lang'] || 'de-DE'
         | 
| 38 | 
            +
             | 
| 39 | 
            +
              # Remove sections that are not to be spellchecked
         | 
| 40 | 
            +
              doc.css('pre').each(&:unlink)
         | 
| 41 | 
            +
              doc.css('code').each(&:unlink)
         | 
| 42 | 
            +
              doc.css('[spellcheck=false]').each(&:unlink)
         | 
| 43 | 
            +
             | 
| 44 | 
            +
              # TODO: Find sections with a lang attribute and handle them separately
         | 
| 45 | 
            +
              unknown_words = spell_checker.check(doc.to_s, lang)
         | 
| 46 | 
            +
             | 
| 47 | 
            +
              unless unknown_words.empty?
         | 
| 48 | 
            +
                warn "#{unknown_words.size} unknown words at #{url}:"
         | 
| 49 | 
            +
                puts unknown_words
         | 
| 50 | 
            +
              end
         | 
| 51 | 
            +
            end
         | 
    
        data/httpspell.gemspec
    ADDED
    
    | @@ -0,0 +1,40 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            lib = File.expand_path('lib', __dir__)
         | 
| 4 | 
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         | 
| 5 | 
            +
            require 'httpspell/version'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            # rubocop:disable Metrics/BlockLength
         | 
| 8 | 
            +
            Gem::Specification.new do |spec|
         | 
| 9 | 
            +
              spec.name          = 'httpspell'
         | 
| 10 | 
            +
              spec.version       = HttpSpell::VERSION
         | 
| 11 | 
            +
              spec.authors       = ['Steffen Uhlig']
         | 
| 12 | 
            +
              spec.email         = ['steffen@familie-uhlig.net']
         | 
| 13 | 
            +
             | 
| 14 | 
            +
              spec.summary       = 'HTTP spellchecker'
         | 
| 15 | 
            +
              spec.description   = %(httpspell is a spellchecker that recursively fetches
         | 
| 16 | 
            +
                HTML pages, converts them to plain text using pandoc, and
         | 
| 17 | 
            +
                spellchecks them with hunspell.)
         | 
| 18 | 
            +
              spec.license       = 'MIT'
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              spec.files         = `git ls-files -z`.split("\x0").reject do |f|
         | 
| 21 | 
            +
                f.match(%r{^(test|spec|features)/})
         | 
| 22 | 
            +
              end
         | 
| 23 | 
            +
              spec.bindir        = 'exe'
         | 
| 24 | 
            +
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         | 
| 25 | 
            +
              spec.require_paths = ['lib']
         | 
| 26 | 
            +
             | 
| 27 | 
            +
              spec.add_dependency 'addressable'
         | 
| 28 | 
            +
              spec.add_dependency 'nokogiri'
         | 
| 29 | 
            +
             | 
| 30 | 
            +
              spec.add_development_dependency 'bundler'
         | 
| 31 | 
            +
              spec.add_development_dependency 'guard'
         | 
| 32 | 
            +
              spec.add_development_dependency 'guard-bundler'
         | 
| 33 | 
            +
              spec.add_development_dependency 'guard-rspec'
         | 
| 34 | 
            +
              spec.add_development_dependency 'pry'
         | 
| 35 | 
            +
              spec.add_development_dependency 'pry-byebug'
         | 
| 36 | 
            +
              spec.add_development_dependency 'rake'
         | 
| 37 | 
            +
              spec.add_development_dependency 'rspec'
         | 
| 38 | 
            +
              spec.add_development_dependency 'rubocop'
         | 
| 39 | 
            +
            end
         | 
| 40 | 
            +
            # rubocop:enable Metrics/BlockLength
         | 
| @@ -0,0 +1,25 @@ | |
| 1 | 
            +
            module HttpSpell
         | 
| 2 | 
            +
              class SpellChecker
         | 
| 3 | 
            +
                def initialize(personal_dictionary_path = nil)
         | 
| 4 | 
            +
                  @personal_dictionary_arg = "-p #{personal_dictionary_path}" if personal_dictionary_path
         | 
| 5 | 
            +
                end
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def check(doc, lang)
         | 
| 8 | 
            +
                  Open3.pipeline_rw('pandoc --from html --to plain', "hunspell -d #{translate(lang)} #{@personal_dictionary_arg} -i UTF-8 -l") do |stdin, stdout, _wait_thrs|
         | 
| 9 | 
            +
                    stdin.puts(doc)
         | 
| 10 | 
            +
                    stdin.close
         | 
| 11 | 
            +
                    stdout.read.split.uniq
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                private
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                # The W3C [recommends](https://www.w3.org/International/questions/qa-html-language-declarations)
         | 
| 18 | 
            +
                # to specify language using identifiers as per [RFC 5646](https://tools.ietf.org/html/rfc5646)
         | 
| 19 | 
            +
                # which uses dashes. Hunspell, however, uses underscores. This method translates RFC-style identifiers
         | 
| 20 | 
            +
                # to hunspell-style.
         | 
| 21 | 
            +
                def translate(lang)
         | 
| 22 | 
            +
                  lang.tr('-', '_')
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
            end
         | 
| @@ -0,0 +1,62 @@ | |
| 1 | 
            +
            require 'nokogiri'
         | 
| 2 | 
            +
            require 'open-uri'
         | 
| 3 | 
            +
            require 'open3'
         | 
| 4 | 
            +
            require 'addressable/uri'
         | 
| 5 | 
            +
            require 'English'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            module HttpSpell
         | 
| 8 | 
            +
              # rubocop:disable Metrics/AbcSize
         | 
| 9 | 
            +
              # rubocop:disable Metrics/MethodLength
         | 
| 10 | 
            +
              class Spider
         | 
| 11 | 
            +
                attr_reader :todo, :done
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def initialize(starting_point, base_url = starting_point)
         | 
| 14 | 
            +
                  @todo = []
         | 
| 15 | 
            +
                  @done = []
         | 
| 16 | 
            +
                  todo << Addressable::URI.parse(starting_point)
         | 
| 17 | 
            +
                  @base_url = Addressable::URI.parse(base_url)
         | 
| 18 | 
            +
                end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                def start
         | 
| 21 | 
            +
                  while todo.any?
         | 
| 22 | 
            +
                    url = todo.pop
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                    begin
         | 
| 25 | 
            +
                      extracted = links(url) do |u, d|
         | 
| 26 | 
            +
                        yield u, d if block_given?
         | 
| 27 | 
            +
                      rescue
         | 
| 28 | 
            +
                        warn "Callback error for #{url}: #{$ERROR_INFO}"
         | 
| 29 | 
            +
                      end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                      done.append(url)
         | 
| 32 | 
            +
                      todo.concat(extracted - done - todo)
         | 
| 33 | 
            +
                    rescue StandardError
         | 
| 34 | 
            +
                      warn "Could not fetch #{url}: #{$ERROR_INFO}"
         | 
| 35 | 
            +
                    end
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                private
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                def links(uri)
         | 
| 42 | 
            +
                  # We are using open-uri, which follows redirects and also provides the content-type.
         | 
| 43 | 
            +
                  response = URI(uri).read
         | 
| 44 | 
            +
                  return [] unless response.content_type == 'text/html'
         | 
| 45 | 
            +
                  doc = Nokogiri::HTML(response)
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                  links = doc.css('a[href]').map do |e|
         | 
| 48 | 
            +
                    link = Addressable::URI.parse(e['href'])
         | 
| 49 | 
            +
                    link = uri.join(link) if link.relative?
         | 
| 50 | 
            +
                    next unless link.to_s.start_with?(@base_url.to_s)
         | 
| 51 | 
            +
                    link
         | 
| 52 | 
            +
                  rescue StandardError
         | 
| 53 | 
            +
                    warn $ERROR_INFO
         | 
| 54 | 
            +
                  end.compact
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                  yield uri, doc if block_given?
         | 
| 57 | 
            +
                  links
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
              end
         | 
| 60 | 
            +
              # rubocop:enable Metrics/AbcSize
         | 
| 61 | 
            +
              # rubocop:enable Metrics/MethodLength
         | 
| 62 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,213 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: httpspell
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 1.0.0
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - Steffen Uhlig
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: exe
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2018-05-30 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies:
         | 
| 13 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            +
              name: addressable
         | 
| 15 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 | 
            +
                requirements:
         | 
| 17 | 
            +
                - - ">="
         | 
| 18 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            +
                    version: '0'
         | 
| 20 | 
            +
              type: :runtime
         | 
| 21 | 
            +
              prerelease: false
         | 
| 22 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 | 
            +
                requirements:
         | 
| 24 | 
            +
                - - ">="
         | 
| 25 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            +
                    version: '0'
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: nokogiri
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - ">="
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: '0'
         | 
| 34 | 
            +
              type: :runtime
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - ">="
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: '0'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: bundler
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - ">="
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '0'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - ">="
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '0'
         | 
| 55 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 56 | 
            +
              name: guard
         | 
| 57 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 | 
            +
                requirements:
         | 
| 59 | 
            +
                - - ">="
         | 
| 60 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 61 | 
            +
                    version: '0'
         | 
| 62 | 
            +
              type: :development
         | 
| 63 | 
            +
              prerelease: false
         | 
| 64 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 | 
            +
                requirements:
         | 
| 66 | 
            +
                - - ">="
         | 
| 67 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 68 | 
            +
                    version: '0'
         | 
| 69 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 70 | 
            +
              name: guard-bundler
         | 
| 71 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 72 | 
            +
                requirements:
         | 
| 73 | 
            +
                - - ">="
         | 
| 74 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 75 | 
            +
                    version: '0'
         | 
| 76 | 
            +
              type: :development
         | 
| 77 | 
            +
              prerelease: false
         | 
| 78 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 79 | 
            +
                requirements:
         | 
| 80 | 
            +
                - - ">="
         | 
| 81 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 82 | 
            +
                    version: '0'
         | 
| 83 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 84 | 
            +
              name: guard-rspec
         | 
| 85 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 86 | 
            +
                requirements:
         | 
| 87 | 
            +
                - - ">="
         | 
| 88 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 89 | 
            +
                    version: '0'
         | 
| 90 | 
            +
              type: :development
         | 
| 91 | 
            +
              prerelease: false
         | 
| 92 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 93 | 
            +
                requirements:
         | 
| 94 | 
            +
                - - ">="
         | 
| 95 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 96 | 
            +
                    version: '0'
         | 
| 97 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 98 | 
            +
              name: pry
         | 
| 99 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 100 | 
            +
                requirements:
         | 
| 101 | 
            +
                - - ">="
         | 
| 102 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 103 | 
            +
                    version: '0'
         | 
| 104 | 
            +
              type: :development
         | 
| 105 | 
            +
              prerelease: false
         | 
| 106 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 107 | 
            +
                requirements:
         | 
| 108 | 
            +
                - - ">="
         | 
| 109 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 110 | 
            +
                    version: '0'
         | 
| 111 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 112 | 
            +
              name: pry-byebug
         | 
| 113 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 114 | 
            +
                requirements:
         | 
| 115 | 
            +
                - - ">="
         | 
| 116 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 117 | 
            +
                    version: '0'
         | 
| 118 | 
            +
              type: :development
         | 
| 119 | 
            +
              prerelease: false
         | 
| 120 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 121 | 
            +
                requirements:
         | 
| 122 | 
            +
                - - ">="
         | 
| 123 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 124 | 
            +
                    version: '0'
         | 
| 125 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 126 | 
            +
              name: rake
         | 
| 127 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 128 | 
            +
                requirements:
         | 
| 129 | 
            +
                - - ">="
         | 
| 130 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 131 | 
            +
                    version: '0'
         | 
| 132 | 
            +
              type: :development
         | 
| 133 | 
            +
              prerelease: false
         | 
| 134 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 135 | 
            +
                requirements:
         | 
| 136 | 
            +
                - - ">="
         | 
| 137 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 138 | 
            +
                    version: '0'
         | 
| 139 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 140 | 
            +
              name: rspec
         | 
| 141 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 142 | 
            +
                requirements:
         | 
| 143 | 
            +
                - - ">="
         | 
| 144 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 145 | 
            +
                    version: '0'
         | 
| 146 | 
            +
              type: :development
         | 
| 147 | 
            +
              prerelease: false
         | 
| 148 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 149 | 
            +
                requirements:
         | 
| 150 | 
            +
                - - ">="
         | 
| 151 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 152 | 
            +
                    version: '0'
         | 
| 153 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 154 | 
            +
              name: rubocop
         | 
| 155 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 156 | 
            +
                requirements:
         | 
| 157 | 
            +
                - - ">="
         | 
| 158 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 159 | 
            +
                    version: '0'
         | 
| 160 | 
            +
              type: :development
         | 
| 161 | 
            +
              prerelease: false
         | 
| 162 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 163 | 
            +
                requirements:
         | 
| 164 | 
            +
                - - ">="
         | 
| 165 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 166 | 
            +
                    version: '0'
         | 
| 167 | 
            +
            description: |-
         | 
| 168 | 
            +
              httpspell is a spellchecker that recursively fetches
         | 
| 169 | 
            +
                  HTML pages, converts them to plain text using pandoc, and
         | 
| 170 | 
            +
                  spellchecks them with hunspell.
         | 
| 171 | 
            +
            email:
         | 
| 172 | 
            +
            - steffen@familie-uhlig.net
         | 
| 173 | 
            +
            executables:
         | 
| 174 | 
            +
            - httpspell
         | 
| 175 | 
            +
            extensions: []
         | 
| 176 | 
            +
            extra_rdoc_files: []
         | 
| 177 | 
            +
            files:
         | 
| 178 | 
            +
            - ".gitignore"
         | 
| 179 | 
            +
            - ".rubocop.yml"
         | 
| 180 | 
            +
            - Gemfile
         | 
| 181 | 
            +
            - Gemfile.lock
         | 
| 182 | 
            +
            - README.markdown
         | 
| 183 | 
            +
            - Rakefile
         | 
| 184 | 
            +
            - exe/httpspell
         | 
| 185 | 
            +
            - httpspell.gemspec
         | 
| 186 | 
            +
            - lib/httpspell/spellchecker.rb
         | 
| 187 | 
            +
            - lib/httpspell/spider.rb
         | 
| 188 | 
            +
            - lib/httpspell/version.rb
         | 
| 189 | 
            +
            homepage: 
         | 
| 190 | 
            +
            licenses:
         | 
| 191 | 
            +
            - MIT
         | 
| 192 | 
            +
            metadata: {}
         | 
| 193 | 
            +
            post_install_message: 
         | 
| 194 | 
            +
            rdoc_options: []
         | 
| 195 | 
            +
            require_paths:
         | 
| 196 | 
            +
            - lib
         | 
| 197 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 198 | 
            +
              requirements:
         | 
| 199 | 
            +
              - - ">="
         | 
| 200 | 
            +
                - !ruby/object:Gem::Version
         | 
| 201 | 
            +
                  version: '0'
         | 
| 202 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 203 | 
            +
              requirements:
         | 
| 204 | 
            +
              - - ">="
         | 
| 205 | 
            +
                - !ruby/object:Gem::Version
         | 
| 206 | 
            +
                  version: '0'
         | 
| 207 | 
            +
            requirements: []
         | 
| 208 | 
            +
            rubyforge_project: 
         | 
| 209 | 
            +
            rubygems_version: 2.7.6
         | 
| 210 | 
            +
            signing_key: 
         | 
| 211 | 
            +
            specification_version: 4
         | 
| 212 | 
            +
            summary: HTTP spellchecker
         | 
| 213 | 
            +
            test_files: []
         |