jekyll-link-checker 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/exe/jekyll-link-checker +7 -3
- data/lib/link-checker.rb +143 -36
- data/lib/version.rb +1 -1
- metadata +30 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 95ce2357aa0eaea04dbc5695f935918cfb98b8a76786dfb2c9317579e8801a4f
         | 
| 4 | 
            +
              data.tar.gz: 2e1cbde04eaa0f36daf75027f14a45364a57b203409d30abfd353d82a1242235
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: d77c8b80e72b2ff133b2188eed42c60fb07e174997097d9b7f4c558833bbe3db75b2c91c308a566f3f6fe757056c0a6c1913942062d886180ea552d15ea48348
         | 
| 7 | 
            +
              data.tar.gz: 7b2714ae7ebe65845a225d882091d3c34a0aa62a4e5104cc577296a759d37a0fde82369e85c0d420d6383e7690be774f3185490a4d835ad44b06a99dce03fbc0
         | 
    
        data/exe/jekyll-link-checker
    CHANGED
    
    | @@ -39,15 +39,19 @@ parser = OptionParser.new do |opts| | |
| 39 39 | 
             
              opts.on("-S SKIP_LIST", "--skip-list-file SKIP_LIST", "File containing a list of links not to check. There must be one link per line.") do |skip_list|
         | 
| 40 40 | 
             
                link_checker.update_skip_list(skip_list)
         | 
| 41 41 | 
             
              end
         | 
| 42 | 
            -
              opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status | 
| 43 | 
            -
                                                "head-only: Only tries to do a HEAD request | 
| 42 | 
            +
              opts.on("-m MODE", "--mode MODE", "try-head (default): Tries to do a HEAD request and then a GET request if HEAD didn't return a success status",
         | 
| 43 | 
            +
                                                "head-only: Only tries to do a HEAD request",
         | 
| 44 44 | 
             
                                                "get-only: Only tries to do a GET request") do |mode|
         | 
| 45 45 | 
             
                link_checker.mode = mode
         | 
| 46 46 | 
             
              end
         | 
| 47 | 
            +
              opts.on("-i", "--[no-]ignore-fragments", "Whether to ignore the fragments " \
         | 
| 48 | 
            +
                                                         "in links. Defaults to false.") do |ignore_fragments|
         | 
| 49 | 
            +
                link_checker.ignore_fragments = ignore_fragments
         | 
| 50 | 
            +
              end
         | 
| 47 51 | 
             
              opts.on("-f", "--fail-fast", "Exits the program on the first invalid link") do |_|
         | 
| 48 52 | 
             
                link_checker.fail_fast = true
         | 
| 49 53 | 
             
              end
         | 
| 50 | 
            -
              opts.on("-a", "--[no] | 
| 54 | 
            +
              opts.on("-a", "--[no-]abort", "Abort the program on the first invalid link") do |abort_on_failure|
         | 
| 51 55 | 
             
                link_checker.abort_on_failure = abort_on_failure
         | 
| 52 56 | 
             
              end
         | 
| 53 57 | 
             
              opts.on("-V", "--verbose", "Run with verbose output") do |_|
         | 
    
        data/lib/link-checker.rb
    CHANGED
    
    | @@ -6,7 +6,7 @@ require "faraday-cookie_jar" | |
| 6 6 | 
             
            require "addressable"
         | 
| 7 7 | 
             
            require "pathname"
         | 
| 8 8 |  | 
| 9 | 
            -
            # Checks all the links of a
         | 
| 9 | 
            +
            # Checks all the links of a static website to make sure they're all valid
         | 
| 10 10 | 
             
            class LinkChecker
         | 
| 11 11 | 
             
              HEADERS = {
         | 
| 12 12 | 
             
                "User-Agent" => "Mozilla/5.0 (Windows NT 6.1) " \
         | 
| @@ -23,7 +23,8 @@ class LinkChecker | |
| 23 23 | 
             
                "Cache-Control" => "no-cache"
         | 
| 24 24 | 
             
              }.freeze
         | 
| 25 25 |  | 
| 26 | 
            -
              HREF = /href="([ | 
| 26 | 
            +
              HREF = /href="([^"\n]+)"/.freeze
         | 
| 27 | 
            +
              ID = /id="([^"\n]+)"/.freeze
         | 
| 27 28 | 
             
              HTML = %w[.html .htm].freeze
         | 
| 28 29 | 
             
              SCHEMES = %w[https http].freeze
         | 
| 29 30 |  | 
| @@ -32,7 +33,7 @@ class LinkChecker | |
| 32 33 | 
             
              DEFAULT_MODE = "try_head"
         | 
| 33 34 |  | 
| 34 35 | 
             
              attr_accessor :hostname, :baseurl, :site_folder, :skip_list, :mode, :verbose,
         | 
| 35 | 
            -
                            :fail_fast, :abort_on_failure
         | 
| 36 | 
            +
                            :ignore_fragments, :fail_fast, :abort_on_failure
         | 
| 36 37 | 
             
              attr_writer :files, :html_files, :links
         | 
| 37 38 |  | 
| 38 39 | 
             
              # Set default values for all the properties
         | 
| @@ -42,6 +43,7 @@ class LinkChecker | |
| 42 43 | 
             
                @site_folder = opts[:site_folder] || DEFAULT_SITE_FOLDER
         | 
| 43 44 |  | 
| 44 45 | 
             
                update_skip_list(opts[:skip_list] || [])
         | 
| 46 | 
            +
                @ignore_fragments = opts[:ignore_fragments]
         | 
| 45 47 |  | 
| 46 48 | 
             
                @mode = opts[:mode] || DEFAULT_MODE
         | 
| 47 49 | 
             
                @verbose = opts[:verbose]
         | 
| @@ -67,6 +69,7 @@ class LinkChecker | |
| 67 69 | 
             
                link_checker_config = config["link-checker"]
         | 
| 68 70 | 
             
                if link_checker_config
         | 
| 69 71 | 
             
                  opts[:skip_list] = link_checker_config["skip-list"]
         | 
| 72 | 
            +
                  opts[:ignore_fragments] = link_checker_config["ignore-fragments"]
         | 
| 70 73 | 
             
                  opts[:mode] = link_checker_config["mode"]
         | 
| 71 74 | 
             
                  opts[:verbose] = link_checker_config["verbose"]
         | 
| 72 75 | 
             
                  opts[:fail_fast] = link_checker_config["fail-fast"]
         | 
| @@ -116,21 +119,17 @@ class LinkChecker | |
| 116 119 | 
             
                error_count = 0
         | 
| 117 120 | 
             
                i = 0
         | 
| 118 121 | 
             
                prev_msg_size = 0
         | 
| 119 | 
            -
                links.each do | | 
| 122 | 
            +
                links.each do |uri, fragments|
         | 
| 120 123 | 
             
                  i += 1
         | 
| 121 124 | 
             
                  if verbose
         | 
| 122 125 | 
             
                    prev_msg_size.times { print " " }
         | 
| 123 | 
            -
                    msg = "#{ | 
| 126 | 
            +
                    msg = "#{uri} #{i}/#{links.size}"
         | 
| 124 127 | 
             
                    print "\r#{msg}\r"
         | 
| 125 128 | 
             
                    prev_msg_size = msg.size
         | 
| 126 129 | 
             
                  end
         | 
| 127 130 |  | 
| 128 131 | 
             
                  # Skip the link if it's in the skip list
         | 
| 129 | 
            -
                  next if @skip_list.include?( | 
| 130 | 
            -
             | 
| 131 | 
            -
                  # Parse the uri
         | 
| 132 | 
            -
                  uri = Addressable::URI.parse(link)
         | 
| 133 | 
            -
                  next if uri.site&.end_with?(":")
         | 
| 132 | 
            +
                  next if @skip_list.include?(uri.to_s)
         | 
| 134 133 |  | 
| 135 134 | 
             
                  error = false
         | 
| 136 135 |  | 
| @@ -138,16 +137,52 @@ class LinkChecker | |
| 138 137 | 
             
                  if uri.hostname.nil? || uri.hostname == hostname
         | 
| 139 138 | 
             
                    uri.path.chomp!("/")
         | 
| 140 139 |  | 
| 141 | 
            -
                    # If the uri | 
| 142 | 
            -
                     | 
| 143 | 
            -
             | 
| 144 | 
            -
                       | 
| 140 | 
            +
                    # If the uri's path is valid
         | 
| 141 | 
            +
                    valid_fragments = valid_links[uri.path]
         | 
| 142 | 
            +
                    if valid_fragments
         | 
| 143 | 
            +
                      fragments.each do |fragment, files|
         | 
| 144 | 
            +
                        # Skip the base fragment
         | 
| 145 | 
            +
                        next unless fragment
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                        next if valid_fragments.include?(fragment)
         | 
| 148 | 
            +
             | 
| 149 | 
            +
                        error = true
         | 
| 150 | 
            +
                        puts "Invalid fragment '#{fragment}' in link '#{uri}' " \
         | 
| 151 | 
            +
                             "is present in:"
         | 
| 152 | 
            +
                        files.each { |file| puts "\t#{file}" }
         | 
| 153 | 
            +
                      end
         | 
| 154 | 
            +
                    else
         | 
| 145 155 | 
             
                      error = true
         | 
| 156 | 
            +
                      puts "Invalid internal link '#{link}' is present in:"
         | 
| 157 | 
            +
                      fragments.flat_map { |_, files| files }.uniq
         | 
| 158 | 
            +
                               .each { |file| puts "\t#{file}" }
         | 
| 146 159 | 
             
                    end
         | 
| 147 | 
            -
                  elsif  | 
| 148 | 
            -
                    status = make_request(conn,  | 
| 160 | 
            +
                  elsif fragments.keys == [nil]
         | 
| 161 | 
            +
                    status = make_request(conn, uri)
         | 
| 149 162 | 
             
                    error = !status_allowed?(status)
         | 
| 150 | 
            -
                     | 
| 163 | 
            +
                    if error
         | 
| 164 | 
            +
                      puts "Request to #{link} returned #{status} present in"
         | 
| 165 | 
            +
                      fragments[nil].each { |file| puts "\t#{file}" }
         | 
| 166 | 
            +
                    end
         | 
| 167 | 
            +
                  else
         | 
| 168 | 
            +
                    response = get_request(conn, uri)
         | 
| 169 | 
            +
                    status = response.status
         | 
| 170 | 
            +
                    if status == 200
         | 
| 171 | 
            +
                      valid_fragments = uniq_string_matches(response.body, ID)
         | 
| 172 | 
            +
                      fragments.each do |fragment, files|
         | 
| 173 | 
            +
                        unless valid_fragments.include?(fragment)
         | 
| 174 | 
            +
                          puts "Invalid link to fragment '#{fragment}' present in: "
         | 
| 175 | 
            +
                          files.each { |file| puts "\t#{file}" }
         | 
| 176 | 
            +
                        end
         | 
| 177 | 
            +
                      end
         | 
| 178 | 
            +
                    else
         | 
| 179 | 
            +
                      error = true
         | 
| 180 | 
            +
                      puts "Request to #{link} in #{files} returned #{status}"
         | 
| 181 | 
            +
                      error = true
         | 
| 182 | 
            +
                      puts "Invalid internal link '#{link}' is present in:"
         | 
| 183 | 
            +
                      fragments.flat_map { |_, files| files }.uniq
         | 
| 184 | 
            +
                               .each { |file| puts "\t#{file}" }
         | 
| 185 | 
            +
                    end
         | 
| 151 186 | 
             
                  end
         | 
| 152 187 |  | 
| 153 188 | 
             
                  next unless error
         | 
| @@ -181,40 +216,75 @@ class LinkChecker | |
| 181 216 | 
             
              end
         | 
| 182 217 |  | 
| 183 218 | 
             
              # Find all the valid links for the site
         | 
| 219 | 
            +
              # The value returned by this method is formatted like so:
         | 
| 220 | 
            +
              # {
         | 
| 221 | 
            +
              #   "path": [
         | 
| 222 | 
            +
              #     fragment
         | 
| 223 | 
            +
              #   ]
         | 
| 224 | 
            +
              # }
         | 
| 184 225 | 
             
              def valid_links
         | 
| 185 226 | 
             
                return @valid_links if @valid_links
         | 
| 186 227 |  | 
| 187 228 | 
             
                @valid_links = files.map do |file|
         | 
| 188 | 
            -
                   | 
| 189 | 
            -
                   | 
| 190 | 
            -
             | 
| 191 | 
            -
             | 
| 192 | 
            -
                   | 
| 193 | 
            -
                  path
         | 
| 229 | 
            +
                  fragments = []
         | 
| 230 | 
            +
                  fragments = uniq_file_matches(file, ID) if html?(file) &&
         | 
| 231 | 
            +
                                                             !@ignore_fragments
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                  [file_url(file), fragments]
         | 
| 194 234 | 
             
                end
         | 
| 235 | 
            +
                @valid_links = @valid_links.to_h
         | 
| 195 236 | 
             
              end
         | 
| 196 237 |  | 
| 197 238 | 
             
              # Find all HTML files
         | 
| 198 239 | 
             
              def html_files
         | 
| 199 240 | 
             
                return @html_files if @html_files
         | 
| 200 241 |  | 
| 201 | 
            -
                @html_files = files.filter { |file|  | 
| 242 | 
            +
                @html_files = files.filter { |file| html?(file) }
         | 
| 202 243 | 
             
              end
         | 
| 203 244 |  | 
| 204 245 | 
             
              # Find all links in html_files
         | 
| 246 | 
            +
              # The value returned by this method is formatted like so:
         | 
| 247 | 
            +
              # {
         | 
| 248 | 
            +
              #   uri without fragment: {
         | 
| 249 | 
            +
              #     uri's fragment: Set [
         | 
| 250 | 
            +
              #       "file containing this link"
         | 
| 251 | 
            +
              #     ]
         | 
| 252 | 
            +
              #   }
         | 
| 253 | 
            +
              # }
         | 
| 205 254 | 
             
              def links
         | 
| 206 255 | 
             
                return @links if @links
         | 
| 207 256 |  | 
| 208 257 | 
             
                @links = {}
         | 
| 209 258 | 
             
                html_files.each do |file|
         | 
| 210 | 
            -
                   | 
| 211 | 
            -
             | 
| 212 | 
            -
             | 
| 213 | 
            -
             | 
| 214 | 
            -
                     | 
| 215 | 
            -
             | 
| 216 | 
            -
                     | 
| 217 | 
            -
                     | 
| 259 | 
            +
                  file_path = file_url(file)
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                  # For each link in the file
         | 
| 262 | 
            +
                  uniq_file_matches(file, HREF).each do |link|
         | 
| 263 | 
            +
                    uri = Addressable::URI.parse(link)
         | 
| 264 | 
            +
             | 
| 265 | 
            +
                    # Skip the emails and phone numbers URIs
         | 
| 266 | 
            +
                    next if uri.site&.end_with?(":")
         | 
| 267 | 
            +
                    # Skip the URIs with unknown schemes
         | 
| 268 | 
            +
                    next unless uri.scheme.nil? || SCHEMES.include?(uri.scheme)
         | 
| 269 | 
            +
             | 
| 270 | 
            +
                    # Set the URI's path to the file's valid link if the link is a
         | 
| 271 | 
            +
                    # fragment of the current file
         | 
| 272 | 
            +
                    uri.path = file_path if link.start_with?("#")
         | 
| 273 | 
            +
                    uri.path = uri.path.dup
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                    # Remove the fragment from the URI and put it in a local variable
         | 
| 276 | 
            +
                    fragment = uri.fragment.nil? || uri.fragment.empty? ? nil : uri.fragment
         | 
| 277 | 
            +
                    uri.fragment = nil
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                    fragment = nil if @ignore_fragments
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                    # Get the link for the URI
         | 
| 282 | 
            +
                    uri_fragments = @links[uri] ||= {}
         | 
| 283 | 
            +
             | 
| 284 | 
            +
                    # Get the files for the fragment
         | 
| 285 | 
            +
                    fragment_files = uri_fragments[fragment] ||= Set.new
         | 
| 286 | 
            +
             | 
| 287 | 
            +
                    fragment_files << file
         | 
| 218 288 | 
             
                  end
         | 
| 219 289 | 
             
                end
         | 
| 220 290 | 
             
                @links
         | 
| @@ -234,16 +304,53 @@ class LinkChecker | |
| 234 304 | 
             
              # Make a request on the connection for the URL
         | 
| 235 305 | 
             
              def make_request(conn, url)
         | 
| 236 306 | 
             
                if @mode != "get_only"
         | 
| 237 | 
            -
                   | 
| 238 | 
            -
                  return  | 
| 239 | 
            -
                                            status_allowed?( | 
| 307 | 
            +
                  response_status = head_request(conn, url).status
         | 
| 308 | 
            +
                  return response_status if mode == "head_only" ||
         | 
| 309 | 
            +
                                            status_allowed?(response_status)
         | 
| 240 310 | 
             
                end
         | 
| 241 311 |  | 
| 242 | 
            -
                conn | 
| 312 | 
            +
                get_request(conn, url).status
         | 
| 313 | 
            +
              end
         | 
| 314 | 
            +
             | 
| 315 | 
            +
              # Make a get request on the connection for the URL
         | 
| 316 | 
            +
              def get_request(conn, url)
         | 
| 317 | 
            +
                conn.get(url, {}, HEADERS)
         | 
| 318 | 
            +
              end
         | 
| 319 | 
            +
             | 
| 320 | 
            +
              # Make a head request on the connection for the URL
         | 
| 321 | 
            +
              def head_request(conn, url)
         | 
| 322 | 
            +
                conn.head(url, {}, HEADERS)
         | 
| 243 323 | 
             
              end
         | 
| 244 324 |  | 
| 245 325 | 
             
              # Returns whether the status is successfull
         | 
| 246 326 | 
             
              def status_allowed?(status)
         | 
| 247 327 | 
             
                status >= 200 && status < 300
         | 
| 248 328 | 
             
              end
         | 
| 329 | 
            +
             | 
| 330 | 
            +
              # Finds all the matches in a file for a given regex
         | 
| 331 | 
            +
              def uniq_file_matches(path, regex)
         | 
| 332 | 
            +
                uniq_string_matches(File.open(path).read, regex)
         | 
| 333 | 
            +
              end
         | 
| 334 | 
            +
             | 
| 335 | 
            +
              # Finds all the matches in a String for a given regex
         | 
| 336 | 
            +
              def uniq_string_matches(str, regex)
         | 
| 337 | 
            +
                str.scan(regex)
         | 
| 338 | 
            +
                   .map { |matches| matches[0].strip }
         | 
| 339 | 
            +
                   .uniq
         | 
| 340 | 
            +
              end
         | 
| 341 | 
            +
             | 
| 342 | 
            +
              # Determines whether the file is an HTML file based on it's extension
         | 
| 343 | 
            +
              def html?(path)
         | 
| 344 | 
            +
                HTML.include?(File.extname(path))
         | 
| 345 | 
            +
              end
         | 
| 346 | 
            +
             | 
| 347 | 
            +
              # Gets the url of a file in the static site based on its path
         | 
| 348 | 
            +
              def file_url(path)
         | 
| 349 | 
            +
                path = Pathname.new(path)
         | 
| 350 | 
            +
                path = path.relative_path_from(@site_folder)
         | 
| 351 | 
            +
                path = "/" + path.to_s
         | 
| 352 | 
            +
                path.chomp!("index.html")
         | 
| 353 | 
            +
                path.chomp!("/")
         | 
| 354 | 
            +
                path
         | 
| 355 | 
            +
              end
         | 
| 249 356 | 
             
            end
         | 
    
        data/lib/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: jekyll-link-checker
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.2.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Zakary Kamal Ismail
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2019-09- | 
| 11 | 
            +
            date: 2019-09-21 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: faraday
         | 
| @@ -72,6 +72,34 @@ dependencies: | |
| 72 72 | 
             
                - - "<"
         | 
| 73 73 | 
             
                  - !ruby/object:Gem::Version
         | 
| 74 74 | 
             
                    version: '5.0'
         | 
| 75 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 76 | 
            +
              name: rspec
         | 
| 77 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 78 | 
            +
                requirements:
         | 
| 79 | 
            +
                - - ">="
         | 
| 80 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 81 | 
            +
                    version: '0'
         | 
| 82 | 
            +
              type: :development
         | 
| 83 | 
            +
              prerelease: false
         | 
| 84 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 85 | 
            +
                requirements:
         | 
| 86 | 
            +
                - - ">="
         | 
| 87 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 88 | 
            +
                    version: '0'
         | 
| 89 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 90 | 
            +
              name: rubocop
         | 
| 91 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 92 | 
            +
                requirements:
         | 
| 93 | 
            +
                - - ">="
         | 
| 94 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 95 | 
            +
                    version: '0'
         | 
| 96 | 
            +
              type: :development
         | 
| 97 | 
            +
              prerelease: false
         | 
| 98 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 99 | 
            +
                requirements:
         | 
| 100 | 
            +
                - - ">="
         | 
| 101 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 102 | 
            +
                    version: '0'
         | 
| 75 103 | 
             
            description: Verifies that all the links in a Jekyll website are valid.It can also
         | 
| 76 104 | 
             
              work with any static site generator.
         | 
| 77 105 | 
             
            email: zakary.kamal.fs@outlook.com
         |