site_checker 0.1.1 → 0.2.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.rbenv-version +1 -0
- data/.rspec +1 -0
- data/History.md +9 -0
- data/LICENSE +29 -0
- data/README.md +102 -0
- data/gem_tasks/rspec.rake +6 -0
- data/gem_tasks/yard.rake +6 -0
- data/lib/site_checker/dsl.rb +17 -0
- data/lib/site_checker/io/content_from_file_system.rb +43 -0
- data/lib/site_checker/io/content_from_web.rb +36 -0
- data/lib/site_checker/link.rb +60 -0
- data/lib/site_checker/link_collector.rb +153 -0
- data/lib/site_checker/parse/page.rb +82 -0
- data/lib/site_checker.rb +90 -206
- data/site_checker.gemspec +24 -0
- data/spec/dsl_spec.rb +37 -0
- data/spec/integration_spec.rb +191 -0
- data/spec/site_checker/io/content_from_file_system_spec.rb +61 -0
- data/spec/site_checker/io/content_from_web_spec.rb +46 -0
- data/spec/site_checker/io/io_spec_helper.rb +22 -0
- data/spec/site_checker/link_collector_spec.rb +41 -0
- data/spec/site_checker/link_spec.rb +94 -0
- data/spec/site_checker/parse/page_spec.rb +71 -0
- data/spec/site_checker/parse/parse_spec_helper.rb +8 -0
- data/spec/spec_helper.rb +10 -0
- metadata +134 -66
    
        data/lib/site_checker.rb
    CHANGED
    
    | @@ -1,224 +1,108 @@ | |
| 1 | 
            -
            require 'nokogiri'
         | 
| 2 1 | 
             
            require 'open-uri'
         | 
| 2 | 
            +
            require 'nokogiri'
         | 
| 3 3 |  | 
| 4 | 
            -
             | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 15 | 
            -
             | 
| 16 | 
            -
                 | 
| 17 | 
            -
                 | 
| 18 | 
            -
             | 
| 19 | 
            -
                
         | 
| 20 | 
            -
                 | 
| 21 | 
            -
             | 
| 22 | 
            -
                 | 
| 23 | 
            -
                 | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
                 | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
                 | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
                 | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
                 | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
              private
         | 
| 43 | 
            -
              def process_local_page(url, parent_url)
         | 
| 44 | 
            -
                links = collect_links(url, parent_url)
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                filter_out_working_anchors!(links)
         | 
| 47 | 
            -
                report_and_remove_anchors!(links, parent_url)
         | 
| 48 | 
            -
             | 
| 49 | 
            -
                links.each do |link, kind|
         | 
| 50 | 
            -
                  if kind != :anchor
         | 
| 51 | 
            -
                    visit(kind, url, link) unless visited?(kind, link)
         | 
| 52 | 
            -
                  else
         | 
| 53 | 
            -
                  end
         | 
| 4 | 
            +
            require 'site_checker/io/content_from_file_system'
         | 
| 5 | 
            +
            require 'site_checker/io/content_from_web'
         | 
| 6 | 
            +
            require 'site_checker/parse/page'
         | 
| 7 | 
            +
            require 'site_checker/link'
         | 
| 8 | 
            +
            require 'site_checker/link_collector'
         | 
| 9 | 
            +
            require 'site_checker/dsl'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            module SiteChecker
         | 
| 12 | 
            +
              class << self
         | 
| 13 | 
            +
                attr_accessor :ignore_list
         | 
| 14 | 
            +
                attr_accessor :visit_references
         | 
| 15 | 
            +
                attr_accessor :max_recursion_depth
         | 
| 16 | 
            +
                attr_accessor :dsl_enabled
         | 
| 17 | 
            +
                attr_reader   :link_collector
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                ##
         | 
| 20 | 
            +
                # The following configuration options, which can be used together, are available:
         | 
| 21 | 
            +
                #
         | 
| 22 | 
            +
                # - ignoring certain links:
         | 
| 23 | 
            +
                #
         | 
| 24 | 
            +
                #     SiteChecker.configure do |config|
         | 
| 25 | 
            +
                #       config.ignore_list = ["/", "/atom.xml"]
         | 
| 26 | 
            +
                #     end
         | 
| 27 | 
            +
                #
         | 
| 28 | 
            +
                # - visit the external references as well:
         | 
| 29 | 
            +
                #
         | 
| 30 | 
            +
                #     SiteChecker.configure do |config|
         | 
| 31 | 
            +
                #       config.visit_references = true
         | 
| 32 | 
            +
                #     end
         | 
| 33 | 
            +
                #
         | 
| 34 | 
            +
                # - set the depth of the recursion:
         | 
| 35 | 
            +
                #
         | 
| 36 | 
            +
                #     SiteChecker.configure do |config|
         | 
| 37 | 
            +
                #       config.max_recursion_depth = 3
         | 
| 38 | 
            +
                #     end
         | 
| 39 | 
            +
                def configure
         | 
| 40 | 
            +
                  yield self
         | 
| 54 41 | 
             
                end
         | 
| 55 | 
            -
              end
         | 
| 56 | 
            -
             | 
| 57 | 
            -
              def register_visit(kind, link)
         | 
| 58 | 
            -
                @visits[kind] = [] unless @visits.has_key?(kind)
         | 
| 59 | 
            -
                @visits[kind] << link
         | 
| 60 | 
            -
              end
         | 
| 61 42 |  | 
| 62 | 
            -
             | 
| 63 | 
            -
                 | 
| 64 | 
            -
                 | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
                   | 
| 70 | 
            -
             | 
| 71 | 
            -
                  else
         | 
| 72 | 
            -
                    unless stop_recursion?
         | 
| 73 | 
            -
                      @recursion_depth += 1
         | 
| 74 | 
            -
                      process_local_page(link, parent_url)
         | 
| 75 | 
            -
                      @recursion_depth -= 1
         | 
| 76 | 
            -
                    end
         | 
| 77 | 
            -
                  end
         | 
| 78 | 
            -
              end
         | 
| 79 | 
            -
             | 
| 80 | 
            -
              def open_reference(kind, link, parent_url)
         | 
| 81 | 
            -
                content = nil
         | 
| 82 | 
            -
                begin
         | 
| 83 | 
            -
                  if kind == :local_page
         | 
| 84 | 
            -
                    if URI(@root).absolute?
         | 
| 85 | 
            -
                      content = open(link)
         | 
| 86 | 
            -
                    else
         | 
| 87 | 
            -
                      link = add_index_html(link)
         | 
| 88 | 
            -
                      content = File.open(link).read
         | 
| 89 | 
            -
                    end
         | 
| 90 | 
            -
                  elsif kind == :local_image
         | 
| 91 | 
            -
                    if URI(@root).absolute?
         | 
| 92 | 
            -
                      open(link)
         | 
| 93 | 
            -
                    else
         | 
| 94 | 
            -
                      File.open(link)
         | 
| 95 | 
            -
                    end
         | 
| 96 | 
            -
                  elsif @visit_references
         | 
| 97 | 
            -
                    open(link)
         | 
| 98 | 
            -
                  end
         | 
| 99 | 
            -
                rescue OpenURI::HTTPError => e
         | 
| 100 | 
            -
                  new_problem(strip_root(parent_url), "#{strip_root(link)} (#{e.message.strip})")
         | 
| 101 | 
            -
                rescue Errno::ENOENT => e
         | 
| 102 | 
            -
                  link = remove_index_html(link) if kind == :local_page
         | 
| 103 | 
            -
                  new_problem(strip_root(parent_url), "#{strip_root(link)} (404 Not Found)")
         | 
| 104 | 
            -
                rescue => e
         | 
| 105 | 
            -
                  new_problem(strip_root(parent_url), "#{strip_root(link)} (#{e.message.strip})")
         | 
| 43 | 
            +
                ##
         | 
| 44 | 
            +
                # Recursively visits the provided url looking for reference problems.
         | 
| 45 | 
            +
                #
         | 
| 46 | 
            +
                # @param [String] url where the processing starts
         | 
| 47 | 
            +
                # @param [String] root the root URL of the site
         | 
| 48 | 
            +
                #
         | 
| 49 | 
            +
                def check(url, root)
         | 
| 50 | 
            +
                  create_instance
         | 
| 51 | 
            +
                  @link_collector.check(url, root)
         | 
| 106 52 | 
             
                end
         | 
| 107 | 
            -
                content
         | 
| 108 | 
            -
              end
         | 
| 109 53 |  | 
| 110 | 
            -
             | 
| 111 | 
            -
                 | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
| 115 | 
            -
                 | 
| 116 | 
            -
             | 
| 117 | 
            -
                  new_problem(strip_root(parent_url), "#{strip_root(anchor)} (404 Not Found)")
         | 
| 118 | 
            -
                  links.delete(anchor)
         | 
| 54 | 
            +
                ##
         | 
| 55 | 
            +
                # Returns the Array of the visited local pages.
         | 
| 56 | 
            +
                #
         | 
| 57 | 
            +
                # @return [Array] list of the visited local pages
         | 
| 58 | 
            +
                #
         | 
| 59 | 
            +
                def local_pages
         | 
| 60 | 
            +
                  @link_collector.local_pages
         | 
| 119 61 | 
             
                end
         | 
| 120 | 
            -
              end
         | 
| 121 | 
            -
             | 
| 122 | 
            -
              def has_anchor?(links, link)
         | 
| 123 | 
            -
                anchor = link.gsub(/^.+#/, "")
         | 
| 124 | 
            -
                links.has_key?(anchor) && links[anchor] == :anchor
         | 
| 125 | 
            -
              end
         | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
              def absolute_reference?(link)
         | 
| 129 | 
            -
                link.start_with?(@root)
         | 
| 130 | 
            -
              end
         | 
| 131 62 |  | 
| 132 | 
            -
             | 
| 133 | 
            -
                 | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
                 | 
| 138 | 
            -
             | 
| 139 | 
            -
                if content
         | 
| 140 | 
            -
                  doc = Nokogiri(content)
         | 
| 141 | 
            -
                  doc.xpath("//img").reject {|img| ignored?(img['src'])}.each do |img|
         | 
| 142 | 
            -
                    link_kind = detect_link_and_kind(img['src'], url, :remote_image, :local_image)
         | 
| 143 | 
            -
                    links.merge!(link_kind) unless link_kind.empty?    
         | 
| 144 | 
            -
                  end
         | 
| 145 | 
            -
                  doc.xpath("//a").reject {|a| ignored?(a['href'])}.each do |a|
         | 
| 146 | 
            -
                    link_kind = detect_link_and_kind(a['href'], url, :remote_page, :local_page)
         | 
| 147 | 
            -
                    links.merge!(link_kind) unless link_kind.empty?
         | 
| 148 | 
            -
                  end
         | 
| 149 | 
            -
                
         | 
| 150 | 
            -
                  doc.xpath("//a").reject {|a| !a['id']}.each do |a|
         | 
| 151 | 
            -
                    links.merge!({a['id'] => :anchor})
         | 
| 152 | 
            -
                  end
         | 
| 63 | 
            +
                ##
         | 
| 64 | 
            +
                # Returns the Array of the visited remote (external) pages.
         | 
| 65 | 
            +
                #
         | 
| 66 | 
            +
                # @return [Array] list of the visited remote pages
         | 
| 67 | 
            +
                #
         | 
| 68 | 
            +
                def remote_pages
         | 
| 69 | 
            +
                  @link_collector.remote_pages
         | 
| 153 70 | 
             
                end
         | 
| 154 | 
            -
                links
         | 
| 155 | 
            -
              end
         | 
| 156 71 |  | 
| 157 | 
            -
             | 
| 158 | 
            -
                 | 
| 159 | 
            -
                 | 
| 160 | 
            -
                 | 
| 161 | 
            -
             | 
| 162 | 
            -
                 | 
| 163 | 
            -
                   | 
| 164 | 
            -
                    link_kind[link.to_s] = external_kind
         | 
| 165 | 
            -
                  else
         | 
| 166 | 
            -
                    link_kind[create_absolute_reference(link.to_s)] = local_kind
         | 
| 167 | 
            -
                  end
         | 
| 72 | 
            +
                ##
         | 
| 73 | 
            +
                # Returns the Array of the visited local images.
         | 
| 74 | 
            +
                #
         | 
| 75 | 
            +
                # @return [Array] list of the visited local images
         | 
| 76 | 
            +
                #
         | 
| 77 | 
            +
                def local_images
         | 
| 78 | 
            +
                  @link_collector.local_images
         | 
| 168 79 | 
             
                end
         | 
| 169 | 
            -
                link_kind
         | 
| 170 | 
            -
              end
         | 
| 171 | 
            -
             | 
| 172 | 
            -
              def strip_trailing_slash(link)
         | 
| 173 | 
            -
                link.gsub(/\/$/, "")
         | 
| 174 | 
            -
              end
         | 
| 175 80 |  | 
| 176 | 
            -
             | 
| 177 | 
            -
                 | 
| 178 | 
            -
             | 
| 179 | 
            -
                 | 
| 180 | 
            -
             | 
| 81 | 
            +
                ##
         | 
| 82 | 
            +
                # Returns the Array of the visited remote (external) images.
         | 
| 83 | 
            +
                #
         | 
| 84 | 
            +
                # @return [Array] list of the visited remote images
         | 
| 85 | 
            +
                #
         | 
| 86 | 
            +
                def remote_images
         | 
| 87 | 
            +
                  @link_collector.remote_images
         | 
| 181 88 | 
             
                end
         | 
| 182 | 
            -
              end
         | 
| 183 | 
            -
             | 
| 184 | 
            -
              def add_index_html(path)
         | 
| 185 | 
            -
                path.end_with?(".html") ? path : File.join(path, "index.html") 
         | 
| 186 | 
            -
              end
         | 
| 187 | 
            -
             | 
| 188 | 
            -
              def remove_index_html(path)
         | 
| 189 | 
            -
                path.gsub(/\/index.html$/, "")
         | 
| 190 | 
            -
              end
         | 
| 191 89 |  | 
| 192 | 
            -
             | 
| 193 | 
            -
                 | 
| 194 | 
            -
                 | 
| 195 | 
            -
             | 
| 196 | 
            -
                 | 
| 197 | 
            -
             | 
| 90 | 
            +
                ##
         | 
| 91 | 
            +
                # Returns the Hash (:parent_url => [Array of problematic links]) of the problems.
         | 
| 92 | 
            +
                #
         | 
| 93 | 
            +
                # @return [Hash] the result of the check
         | 
| 94 | 
            +
                #
         | 
| 95 | 
            +
                def problems
         | 
| 96 | 
            +
                  @link_collector.problems
         | 
| 198 97 | 
             
                end
         | 
| 199 | 
            -
              end
         | 
| 200 | 
            -
             | 
| 201 | 
            -
              def new_problem(url, message)
         | 
| 202 | 
            -
                url = @root if url.empty?
         | 
| 203 | 
            -
                @problems[url] = [] unless problems.has_key?(url)
         | 
| 204 | 
            -
                @problems[url] << message
         | 
| 205 | 
            -
              end
         | 
| 206 98 |  | 
| 207 | 
            -
             | 
| 208 | 
            -
                 | 
| 209 | 
            -
                  @ | 
| 210 | 
            -
             | 
| 211 | 
            -
             | 
| 212 | 
            -
             | 
| 213 | 
            -
             | 
| 214 | 
            -
              
         | 
| 215 | 
            -
              def stop_recursion?
         | 
| 216 | 
            -
                if @max_recursion_depth == -1
         | 
| 217 | 
            -
                  false
         | 
| 218 | 
            -
                elsif @max_recursion_depth > @recursion_depth
         | 
| 219 | 
            -
                  false
         | 
| 220 | 
            -
                else
         | 
| 221 | 
            -
                  true
         | 
| 99 | 
            +
                private
         | 
| 100 | 
            +
                def create_instance
         | 
| 101 | 
            +
                  @link_collector = SiteChecker::LinkCollector.new do |config|
         | 
| 102 | 
            +
                    config.visit_references = @visit_references if @visit_references
         | 
| 103 | 
            +
                    config.ignore_list = @ignore_list if @ignore_list
         | 
| 104 | 
            +
                    config.max_recursion_depth = @max_recursion_depth if @max_recursion_depth
         | 
| 105 | 
            +
                  end
         | 
| 222 106 | 
             
                end
         | 
| 223 107 | 
             
              end
         | 
| 224 | 
            -
            end
         | 
| 108 | 
            +
            end
         | 
| @@ -0,0 +1,24 @@ | |
| 1 | 
            +
            # -*- encoding: utf-8 -*-
         | 
| 2 | 
            +
            Gem::Specification.new do |s|
         | 
| 3 | 
            +
              s.name        = 'site_checker'
         | 
| 4 | 
            +
              s.version     = '0.2.0.pre'
         | 
| 5 | 
            +
              s.date        = '2012-12-22'
         | 
| 6 | 
            +
              s.summary     = "site_checker-#{s.version}"
         | 
| 7 | 
            +
              s.description = "A simple tool for checking references on your website"
         | 
| 8 | 
            +
              s.authors     = ["Zsolt Fabok"]
         | 
| 9 | 
            +
              s.email       = 'me@zsoltfabok.com'
         | 
| 10 | 
            +
              s.homepage    = 'https://github.com/ZsoltFabok/site_checker'
         | 
| 11 | 
            +
              s.license     = 'BSD'
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              s.files         = `git ls-files`.split("\n").reject {|path| path =~ /\.gitignore$/ || path =~ /file$/ }
         | 
| 14 | 
            +
              s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
         | 
| 15 | 
            +
              s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
         | 
| 16 | 
            +
              s.require_paths = ["lib"]
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              s.add_development_dependency('rspec'  , '2.12.0')
         | 
| 19 | 
            +
              s.add_development_dependency('webmock', '1.9.0')
         | 
| 20 | 
            +
              s.add_development_dependency('rake'   , '10.0.3')
         | 
| 21 | 
            +
              s.add_development_dependency('yard'   , '0.8.3')
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              s.add_runtime_dependency('nokogiri', '1.5.6')
         | 
| 24 | 
            +
            end
         | 
    
        data/spec/dsl_spec.rb
    ADDED
    
    | @@ -0,0 +1,37 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
            require 'site_checker/io/io_spec_helper'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe "DSL" do
         | 
| 5 | 
            +
              include IoSpecHelper
         | 
| 6 | 
            +
             | 
| 7 | 
            +
              before(:each) do
         | 
| 8 | 
            +
                @test_url = "http://localhost:4000"
         | 
| 9 | 
            +
                @root = "http://localhost:4000"
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
             | 
| 12 | 
            +
              it "should forward all the method calls if DSL is enabled" do
         | 
| 13 | 
            +
                 SiteChecker.configure do |config|
         | 
| 14 | 
            +
                  config.dsl_enabled = true
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                local_pages   = mock()
         | 
| 18 | 
            +
                local_images  = mock()
         | 
| 19 | 
            +
                remote_pages  = mock()
         | 
| 20 | 
            +
                remote_images = mock()
         | 
| 21 | 
            +
                problems      = mock()
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                SiteChecker.should_receive(:check).with(@test_url, @root)
         | 
| 24 | 
            +
                SiteChecker.should_receive(:local_pages).and_return(local_pages)
         | 
| 25 | 
            +
                SiteChecker.should_receive(:remote_pages).and_return(remote_pages)
         | 
| 26 | 
            +
                SiteChecker.should_receive(:local_images).and_return(local_images)
         | 
| 27 | 
            +
                SiteChecker.should_receive(:remote_images).and_return(remote_images)
         | 
| 28 | 
            +
                SiteChecker.should_receive(:problems).and_return(problems)
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                check_site(@test_url, @root)
         | 
| 31 | 
            +
                collected_local_pages.should eql(local_pages)
         | 
| 32 | 
            +
                collected_remote_pages.should eql(remote_pages)
         | 
| 33 | 
            +
                collected_local_images.should eql(local_images)
         | 
| 34 | 
            +
                collected_remote_images.should eql(remote_images)
         | 
| 35 | 
            +
                collected_problems.should eql(problems)
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
            end
         | 
| @@ -0,0 +1,191 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
            require 'site_checker/io/io_spec_helper'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe "Integration" do
         | 
| 5 | 
            +
              include IoSpecHelper
         | 
| 6 | 
            +
             | 
| 7 | 
            +
              before(:each) do
         | 
| 8 | 
            +
                SiteChecker.configure do |config|
         | 
| 9 | 
            +
                  config.visit_references = true
         | 
| 10 | 
            +
                end
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              describe "server based checking" do
         | 
| 14 | 
            +
                before(:each) do
         | 
| 15 | 
            +
                  @test_url = "http://localhost:4000"
         | 
| 16 | 
            +
                  @root = "http://localhost:4000"
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                it "should visit the page" do
         | 
| 20 | 
            +
                  content = "<html></html>"
         | 
| 21 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 22 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 23 | 
            +
                  SiteChecker.local_pages.should eql([@test_url])
         | 
| 24 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 25 | 
            +
                end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                it "should check the link to an external page" do
         | 
| 28 | 
            +
                  content = "<html>text<a href=\"http://external.org/\"/></html>"
         | 
| 29 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 30 | 
            +
                  webmock("http://external.org", 200, "")
         | 
| 31 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 32 | 
            +
                  SiteChecker.remote_pages.should eql(["http://external.org/" ])
         | 
| 33 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 34 | 
            +
                end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                it "should not check the link to an external page if the reference checking is turned off" do
         | 
| 37 | 
            +
                  SiteChecker.configure do |config|
         | 
| 38 | 
            +
                    config.visit_references = false
         | 
| 39 | 
            +
                  end
         | 
| 40 | 
            +
                  content = "<html>text<a href=\"http://external.org/\"/></html>"
         | 
| 41 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 42 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 43 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                it "should report a problem if the external link is dead" do
         | 
| 47 | 
            +
                  content = "<html>text<a href=\"http://external.org/\"/></html>"
         | 
| 48 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 49 | 
            +
                  webmock("http://external.org", 404, "")
         | 
| 50 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 51 | 
            +
                  SiteChecker.problems.should eql({@test_url => ["http://external.org/ (404)"]})
         | 
| 52 | 
            +
                end
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                it "should check the link to an external image" do
         | 
| 55 | 
            +
                  content = "<html>text<img src=\"http://external.org/a.png\"/></html>"
         | 
| 56 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 57 | 
            +
                  webmock("http://external.org/a.png", 200, "")
         | 
| 58 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 59 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 60 | 
            +
                end
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                it "should check the link to a local image" do
         | 
| 63 | 
            +
                  content = "<html>text<img src=\"/a.png\"/></html>"
         | 
| 64 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 65 | 
            +
                  webmock("#{@test_url}/a.png", 200, "")
         | 
| 66 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 67 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                it "should report a problem if the image cannot be found" do
         | 
| 71 | 
            +
                  content = "<html>text<img src=\"http://external.org/a.png\"/></html>"
         | 
| 72 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 73 | 
            +
                  webmock("http://external.org/a.png", 404, "")
         | 
| 74 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 75 | 
            +
                  SiteChecker.problems.should eql({@test_url => ["http://external.org/a.png (404)"]})
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                it "should report a problem for a local page with absolute path" do
         | 
| 79 | 
            +
                  content = "<html>text<a href=\"#{@test_url}/another\"/></html>"
         | 
| 80 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 81 | 
            +
                  webmock("#{@test_url}/another", 200, "")
         | 
| 82 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 83 | 
            +
                  SiteChecker.problems.should eql({@test_url => ["#{@test_url}/another (absolute path)"]})
         | 
| 84 | 
            +
                end
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                it "should report a problem for a local image with absolute path" do
         | 
| 87 | 
            +
                  content = "<html>text<img src=\"#{@test_url}/a.png\"/></html>"
         | 
| 88 | 
            +
                  webmock("#{@test_url}/a.png", 200, "")
         | 
| 89 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 90 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 91 | 
            +
                  SiteChecker.problems.should eql({@test_url => ["#{@test_url}/a.png (absolute path)"]})
         | 
| 92 | 
            +
                end
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                it "should filter out certain links" do
         | 
| 95 | 
            +
                  SiteChecker.configure do |config|
         | 
| 96 | 
            +
                    config.ignore_list = ["/atom.xml", "/"]
         | 
| 97 | 
            +
                  end
         | 
| 98 | 
            +
                  content = "<html>text<a href=\"/atom.xml\"/><br/><a href=\"/\"/></html>"
         | 
| 99 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 100 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 101 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 102 | 
            +
                end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                it "should not report a valid internal anchor" do
         | 
| 105 | 
            +
                  content = "<html><a href=\"#goto\">goto</a>text<a id=\"goto\"></a></html>"
         | 
| 106 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 107 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 108 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 109 | 
            +
                end
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                it "should report an invalid internal anchor" do
         | 
| 112 | 
            +
                  content = "<html><a href=\"#goto\">goto</a>text<a id=\"got\"></a></html>"
         | 
| 113 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 114 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 115 | 
            +
                  SiteChecker.problems.should eql({@test_url => ["#goto (404 Not Found)"]})
         | 
| 116 | 
            +
                end
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                it "should follow an external anchor to the external page" do
         | 
| 119 | 
            +
                  content = "<html><a href=\"http://example.org#goto\">goto</a></html>"
         | 
| 120 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 121 | 
            +
                  webmock("http://example.org", 200, content)
         | 
| 122 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 123 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 124 | 
            +
                end
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                it "should go down one level down for an internal page" do
         | 
| 127 | 
            +
                  content = "<html>text<a href=\"/one-level-down\"/></html>"
         | 
| 128 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 129 | 
            +
                  webmock("#{@root}/one-level-down", 200, "<html></html>")
         | 
| 130 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 131 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 132 | 
            +
                end
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                it "should report a problem with a linked local page" do
         | 
| 135 | 
            +
                  content = "<html>text<a href=\"/one-level-down\"/></html>"
         | 
| 136 | 
            +
                  webmock(@test_url, 200, content)
         | 
| 137 | 
            +
                  webmock("#{@root}/one-level-down", 404, "<html></html>")
         | 
| 138 | 
            +
                  SiteChecker.check(@test_url, @root)
         | 
| 139 | 
            +
                  SiteChecker.problems.should eql({@test_url => ["/one-level-down (404)"]})
         | 
| 140 | 
            +
                end
         | 
| 141 | 
            +
              end
         | 
| 142 | 
            +
             | 
| 143 | 
            +
              describe "file system based checking" do
         | 
| 144 | 
            +
                before(:each) do
         | 
| 145 | 
            +
                  @root = fs_test_path
         | 
| 146 | 
            +
                  clean_fs_test_path
         | 
| 147 | 
            +
                end
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                it "should find a referenced page" do
         | 
| 150 | 
            +
                  @root = fs_test_path
         | 
| 151 | 
            +
                  content = "<html>text<a href=\"/one-level-down\"/></html>"
         | 
| 152 | 
            +
                  filesystemmock("index.html", content)
         | 
| 153 | 
            +
                  filesystemmock("/one-level-down/index.html", content)
         | 
| 154 | 
            +
                  SiteChecker.check(fs_test_path, @root)
         | 
| 155 | 
            +
                  SiteChecker.local_pages.should eql([fs_test_path, "/one-level-down"])
         | 
| 156 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 157 | 
            +
                end
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                it "should report a problem when the local page cannot be found" do
         | 
| 160 | 
            +
                  content = "<html>text<a href=\"/one-level-down\"/></html>"
         | 
| 161 | 
            +
                  filesystemmock("index.html", content)
         | 
| 162 | 
            +
                  SiteChecker.check(fs_test_path, @root)
         | 
| 163 | 
            +
                  SiteChecker.problems.should eql({fs_test_path => ["/one-level-down (404 Not Found)"]})
         | 
| 164 | 
            +
                end
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                it "should use the local images" do
         | 
| 167 | 
            +
                  content = "<html>text<img src=\"/a.png\"/></html>"
         | 
| 168 | 
            +
                  filesystemmock("index.html", content)
         | 
| 169 | 
            +
                  filesystemmock("a.png", "")
         | 
| 170 | 
            +
                  SiteChecker.check(fs_test_path, @root)
         | 
| 171 | 
            +
                  SiteChecker.local_images.should eql(["/a.png"])
         | 
| 172 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 173 | 
            +
                end
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                it "should report a problem when the local image cannot be found" do
         | 
| 176 | 
            +
                  content = "<html>text<img src=\"/a.png\"/></html>"
         | 
| 177 | 
            +
                  filesystemmock("index.html", content)
         | 
| 178 | 
            +
                  SiteChecker.check(fs_test_path, @root)
         | 
| 179 | 
            +
                  SiteChecker.problems.should eql({fs_test_path => ["/a.png (404 Not Found)"]})
         | 
| 180 | 
            +
                end
         | 
| 181 | 
            +
             | 
| 182 | 
            +
                it "should be able to handle anchors in other files" do
         | 
| 183 | 
            +
                  content = "<html><a href=\"/other#goto\">goto</a>text<a id=\"goto\"></a></html>"
         | 
| 184 | 
            +
                  content2 = "<html><a id=\"goto\">goto</a>"
         | 
| 185 | 
            +
                  filesystemmock("index.html", content)
         | 
| 186 | 
            +
                  filesystemmock("other/index.html", content2)
         | 
| 187 | 
            +
                  SiteChecker.check(fs_test_path, @root)
         | 
| 188 | 
            +
                  SiteChecker.problems.should be_empty
         | 
| 189 | 
            +
                end
         | 
| 190 | 
            +
              end
         | 
| 191 | 
            +
            end
         | 
| @@ -0,0 +1,61 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            describe SiteChecker::IO::ContentFromFileSystem do
         | 
| 4 | 
            +
              context "#get" do
         | 
| 5 | 
            +
                before(:each) do
         | 
| 6 | 
            +
                  @root = "/home/test/web/public"
         | 
| 7 | 
            +
                  @link = SiteChecker::Link.create({:url => "link", :kind => :page, :location => :local})
         | 
| 8 | 
            +
                  @file = mock(File)
         | 
| 9 | 
            +
                  @content = mock()
         | 
| 10 | 
            +
                  @content_reader = SiteChecker::IO::ContentFromFileSystem.new(false, @root)
         | 
| 11 | 
            +
                end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                it "should return the content of a link using the local index.html" do
         | 
| 14 | 
            +
                  File.should_receive(:open).with("#{@root}/#{@link.url}/index.html") {@file}
         | 
| 15 | 
            +
                  @file.should_receive(:read) {@content}
         | 
| 16 | 
            +
                  @content_reader.get(@link).should eql(@content)
         | 
| 17 | 
            +
                end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                it "should return the content of a link which points to a real .html file" do
         | 
| 20 | 
            +
                  @link.url = "/about.html"
         | 
| 21 | 
            +
                  File.should_receive(:open).with("#{@root}/about.html") {@file}
         | 
| 22 | 
            +
                  @file.should_receive(:read) {@content}
         | 
| 23 | 
            +
                  @content_reader.get(@link).should eql(@content)
         | 
| 24 | 
            +
                end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                it "should return the content of a link with anchor" do
         | 
| 27 | 
            +
                  @link.url = "/about#something"
         | 
| 28 | 
            +
                  File.should_receive(:open).with("#{@root}/about/index.html") {@file}
         | 
| 29 | 
            +
                  @file.should_receive(:read) {@content}
         | 
| 30 | 
            +
                  @content_reader.get(@link).should eql(@content)
         | 
| 31 | 
            +
                end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                it "should raise error if the link is broken" do
         | 
| 34 | 
            +
                  File.should_receive(:open).with("#{@root}/#{@link.url}/index.html").and_raise(Errno::ENOENT)
         | 
| 35 | 
            +
                  expect {@content_reader.get(@link)}.to raise_error(RuntimeError, "(404 Not Found)")
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                it "should check the existence of a local image" do
         | 
| 39 | 
            +
                  @link.kind = :image
         | 
| 40 | 
            +
                  @link.url = "img/image1"
         | 
| 41 | 
            +
                  File.should_receive(:open).with("#{@root}/#{@link.url}") {@file}
         | 
| 42 | 
            +
                  @file.should_not_receive(:read)
         | 
| 43 | 
            +
                  @content_reader.get(@link).should
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                it "should not open a remote reference if opt-out" do
         | 
| 47 | 
            +
                  @link.location = :remote
         | 
| 48 | 
            +
                  File.should_not_receive(:open)
         | 
| 49 | 
            +
                  @content_reader.get(@link)
         | 
| 50 | 
            +
                end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                it "should open a remote reference if opt-in" do
         | 
| 53 | 
            +
                  @content_reader = SiteChecker::IO::ContentFromFileSystem.new(true, @root)
         | 
| 54 | 
            +
                  @link.location = :remote
         | 
| 55 | 
            +
                  @link.url = "http://example.org"
         | 
| 56 | 
            +
                  File.should_not_receive(:open)
         | 
| 57 | 
            +
                  @content_reader.should_receive(:open)
         | 
| 58 | 
            +
                  @content_reader.get(@link)
         | 
| 59 | 
            +
                end
         | 
| 60 | 
            +
              end
         | 
| 61 | 
            +
            end
         | 
| @@ -0,0 +1,46 @@ | |
| 1 | 
            +
            require 'spec_helper'
         | 
| 2 | 
            +
            require_relative 'io_spec_helper'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            describe SiteChecker::IO::ContentFromWeb do
         | 
| 5 | 
            +
            	include IoSpecHelper
         | 
| 6 | 
            +
            	context "#get" do
         | 
| 7 | 
            +
            		before(:each) do
         | 
| 8 | 
            +
            			@root = "http://localhost:4000"
         | 
| 9 | 
            +
            			@link = SiteChecker::Link.create({:url => "link", :kind => :page, :location => :local})
         | 
| 10 | 
            +
            			@content = mock()
         | 
| 11 | 
            +
            			@content_reader = SiteChecker::IO::ContentFromWeb.new(false, @root)
         | 
| 12 | 
            +
            		end
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            		it "should return the content of a link" do
         | 
| 15 | 
            +
                  @content_reader.should_receive(:open).with(URI("#{@root}/#{@link.url}")).and_return(@content)
         | 
| 16 | 
            +
                  @content_reader.get(@link).should eql(@content)
         | 
| 17 | 
            +
            		end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            		it "should raise error if the link is broken" do
         | 
| 20 | 
            +
            			@content_reader.should_receive(:open).with(URI("#{@root}/#{@link.url}")).
         | 
| 21 | 
            +
            				and_raise(OpenURI::HTTPError.new("404 Not Found", nil))
         | 
| 22 | 
            +
                  expect {@content_reader.get(@link)}.to raise_error(RuntimeError, "(404 Not Found)")
         | 
| 23 | 
            +
            		end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            		it "should check the existence of an image" do
         | 
| 26 | 
            +
            			@link.kind = :image
         | 
| 27 | 
            +
            			@link.url = "img/image1"
         | 
| 28 | 
            +
            			@content_reader.should_receive(:open).with(URI("#{@root}/#{@link.url}"))
         | 
| 29 | 
            +
            			@content_reader.get(@link)
         | 
| 30 | 
            +
            		end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            		it "should not open a remote reference if opt-out" do
         | 
| 33 | 
            +
            			@link.location = :remote
         | 
| 34 | 
            +
                  @content_reader.should_not_receive(:open).with(URI("#{@root}/#{@link.url}"))
         | 
| 35 | 
            +
                  @content_reader.get(@link)
         | 
| 36 | 
            +
            		end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            		it "should open a remote reference if opt-in" do
         | 
| 39 | 
            +
            			@content_reader = SiteChecker::IO::ContentFromWeb.new(true, @root)
         | 
| 40 | 
            +
            			@link.location = :remote
         | 
| 41 | 
            +
            			@link.url = "http://example.org"
         | 
| 42 | 
            +
                  @content_reader.should_receive(:open).with(URI(@link.url))
         | 
| 43 | 
            +
                  @content_reader.get(@link)
         | 
| 44 | 
            +
            		end
         | 
| 45 | 
            +
            	end
         | 
| 46 | 
            +
            end
         |