RubyGems - rdoc_link_checker - Versions diffs - 0.4.0 → 0.5.0 - Mend

rdoc_link_checker 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/README.md +16 -6
data/lib/rdoc_link_checker/version.rb +1 -1
data/lib/rdoc_link_checker.rb +70 -86
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: cde4b6bb3a511ccee4b20f792707c85c8705b8a828754909f63f88c533d5efe4
-  data.tar.gz: ff3dfefb26e94258e32a2c5f19bc0b894591611a4dff9612741e34e641ca8130
+  metadata.gz: 9e8d4cbf42017efc4d297c70e05d689b891cfc792ebd3285e1fcaf077a9f4303
+  data.tar.gz: ed4434d61b90db5a1d9db0b9a56ee98da5717d398864c1723cd7557d0e11867b
 SHA512:
-  metadata.gz: c577bf0a97429715c606ee45986258a77bf5028a5e2af47f76e9e2776c5cf66c52045fbc6d1e28fc940e13c178b13ba5914aa6b952d2e24d13b6a36147675d8b
-  data.tar.gz: 9bdcfb203468e9de9d0f94d02a2e0e4ac6140e1e33745012272c36afd469631204a9da410061383a7b7be434df08aaed48c5553bbb6b7578fae115a8fb41f1ec
+  metadata.gz: 724b89d6bb0b6cbf320bb00f29ccdbf467ef945aea04a31077fb8a2378007ddaec91dded922249b0ee806d3d91cf32af666670e1f016631d61f2ea546a8f0f1a
+  data.tar.gz: f95baa7b5b9d8e42028834a0bf298b19e4fffdcc4bd8c1229f55381ab3aeb27ae4561757f27309b14053fddf40c1e4aa008b8aaefe662323e2c2f503b2783942

data/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# RDoc Link Checker
+# RDocLinkChecker
 A gem to find broken links in HTML files generated by Ruby RDoc.
@@ -6,13 +6,23 @@ Reports a link as broken if:
 - The target page given by +href+ is not found.
 - The target page is found, but the fragment given by +href+
-  is not a link target on that page;
+  is not a link target (element with attribute <tt>id</tt>) on that page;
   this usually causes a browser to open at the top of the page
   instead of at the given fragment.
-Note that some browsers are forgiving, and will open the target
-page at a link target similar to the given fragment;
-for example, fragment ```#bar``` may be opened at an element
-with id ```foobar```.
+  Some browsers are forgiving, and will open the target
+  page at a link target similar to the given fragment;
+  for example, fragment ```#bar``` may be opened at an element
+  with id ```foobar```.
 See the [help text](doc/help.txt).
+<b>Note</b>: An RDoc bug that was fixed recently
+(PR https://github.com/ruby/rdoc/pull/1002)
+caused many (make that many, many) broken links TOC section
+https://docs.ruby-lang.org/en/master/table_of_contents.html#classes.
+Unless you have a recent Ruby version installed (one that has the bug fix),
+the RDocLinkChecker will find and report all those broken links.
+<b>Workaround</b>:  Use option <tt>--no_toc</tt>, which suppresses checking
+for those links.

data/lib/rdoc_link_checker/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 class RDocLinkChecker
-  VERSION = "0.4.0"
+  VERSION = "0.5.0"
 end

data/lib/rdoc_link_checker.rb CHANGED Viewed

@@ -13,6 +13,8 @@ class RDocLinkChecker
   attr_accessor :html_dirpath, :onsite_only, :no_toc
+  attr_accessor :source_paths, :pages
   def initialize(
     html_dirpath,
     onsite_only: false,
@@ -21,26 +23,25 @@ class RDocLinkChecker
     self.html_dirpath = html_dirpath
     self.onsite_only = onsite_only
     self.no_toc = no_toc
-    @pages = {}
+    self.pages = {}
     @counts = {
       source_pages: 0,
       target_pages: 0,
       links_checked: 0,
       links_broken: 0,
     }
-    @verbose = false
   end
   def check
     # All work is done in the HTML directory,
     # and that is where Report.htm will be put.
     Dir.chdir(html_dirpath) do |dir|
-      @counts[:start_time] = Time.now
+      @counts[:start_time] = Time.new
       gather_source_paths
       create_source_pages
       create_target_pages
       verify_links
-      @counts[:end_time] = Time.now
+      @counts[:end_time] = Time.new
       report
     end
   end
@@ -48,36 +49,24 @@ class RDocLinkChecker
   # Gather paths to source HTML pages.
   def gather_source_paths
     paths = []
-    puts 'Gathering source paths' if @verbose
     paths = Find.find('.').select {|path| path.end_with?('.html') }
     # Remove leading './'.
-    @source_paths = paths.map{|path| path.sub(%r[^\./], '')}
-    @source_paths.delete('table_of_contents.html') if no_toc
-    if @verbose
-      @source_paths.each_with_index do |source_path, i|
-        puts '- %4d %s' % [i, source_path]
-      end
-    end
-    @counts[:source_pages] = @source_paths.size
-    puts "Gathered #{@source_paths.size} source paths" if @verbose
+    self.source_paths = paths.map{|path| path.sub(%r[^\./], '')}
+    @counts[:source_pages] = source_paths.size
   end
   # Create a source \Page object for each source path.
   # Gather its links and ids.
   def create_source_pages
-    puts "Creating #{@source_paths.size} source pages" if @verbose
-    @source_paths.sort.each_with_index do |source_path, i|
-      progress_s = RDocLinkChecker.progress_s(i + 1, @source_paths.size)
-      puts "Creating source page #{source_path} #{progress_s}" if @verbose
-      source_page = Page.new(source_path, @verbose, @pages, @counts, onsite_only)
-      @pages[source_path] = source_page
+    source_paths.sort.each_with_index do |source_path, i|
+      progress_s = RDocLinkChecker.progress_s(i + 1, source_paths.size)
+      source_page = Page.new(:source, source_path, onsite_only, pages: pages, counts: @counts)
+      pages[source_path] = source_page
       source_text = File.read(source_path)
       doc = Nokogiri::HTML(source_text)
-      source_page.gather_links(doc)
+      source_page.gather_links(doc) unless no_toc
       source_page.gather_ids(doc)
-      puts "Created source page #{progress_s}" if @verbose
     end
-    puts "Created #{@pages.size} source pages" if @verbose
   end
   # Create a target \Page object for each link
@@ -85,40 +74,30 @@ class RDocLinkChecker
   def create_target_pages
     doc = nil
     target_page_count = 0
-    @source_paths = @pages.keys
-    @source_paths.each do |source_path|
+    source_paths = pages.keys
+    source_paths.each do |source_path|
       # Need for relative links to work.
       dirname = File.dirname(source_path)
       Dir.chdir(dirname) do
-        source_page = @pages[source_path]
-        puts "Creating target pages for #{source_page.links.size} links in #{source_path}" if @verbose
+        source_page = pages[source_path]
         source_page.links.each_with_index do |link, i|
           next if link.path.nil?
-          link.puts(i) if @verbose
           target_path = link.real_path
-          if @pages[target_path]
-            puts "Page #{target_path} already created" if @verbose
-            target_page = @pages[target_path]
+          if pages[target_path]
+            target_page = pages[target_path]
           else
+            target_page_count += 1
+            target_page = Page.new(:target, target_path, onsite_only, pages: pages, counts: @counts)
+            pages[target_path] = target_page
             if File.readable?(link.path)
-              puts "Creating target page #{target_path}" if @verbose
-              target_page_count += 1
-              target_page = Page.new(target_path, @verbose, @pages, @counts, onsite_only)
-              @pages[target_path] = target_page
               target_text = File.read(link.path)
               doc = Nokogiri::HTML(target_text)
               target_page.gather_ids(doc)
-              puts "Created target page #{target_path}" if @verbose
             elsif RDocLinkChecker.checkable?(link.path)
-              puts "Creating target page #{target_path}" if @verbose
-              target_page_count += 1
-              target_page = Page.new(target_path, @verbose, @pages, @counts, onsite_only)
-              @pages[target_path] = target_page
-              puts "Created target page #{target_path}" if @verbose
               link.exception = fetch(link.path, target_page)
               link.valid_p = false if link.exception
             else
-              puts "File not readable or checkable: #{target_path}" if @verbose
+              # File not readable or checkable.
             end
           end
           next if target_page.nil?
@@ -127,40 +106,33 @@ class RDocLinkChecker
             target_page.gather_ids(doc)
           end
         end
-        puts "Created target pages for #{source_page.links.size} links in #{source_path}" if @verbose
       end
     end
-    puts "Created #{target_page_count} target pages" if @verbose
     @counts[:target_pages] = target_page_count
   end
   # Verify that each link target exists.
   def verify_links
-    linking_pages = @pages.select do |path, page|
+    linking_pages = pages.select do |path, page|
       !page.links.empty?
     end
-    puts "Checking links on #{linking_pages.size} pages" if @verbose
     link_count = 0
     broken_count = 0
     linking_pages.each_pair do |path, page|
-      puts "Checking #{page.links.size} links on page #{path}" if @verbose
       link_count += page.links.size
       page.links.each_with_index do |link, i|
         if link.valid_p.nil? # Don't disturb if already set to false.
-          target_page = @pages[link.real_path]
+          target_page = pages[link.real_path]
           if target_page
             target_id = link.fragment
             link.valid_p = target_id.nil? || target_page.ids.include?(target_id)
           else
-            link_valid_p = false
+            link.valid_p = false
           end
         end
-        link.puts(i) if @verbose
         broken_count += 1 unless link.valid_p
       end
-      puts "Checked #{page.links.size} links on page #{path}" if @verbose
     end
-    puts "Checked #{link_count} links on #{linking_pages.size} pages" if @verbose
     @counts[:links_checked] = link_count
     @counts[:links_broken] = broken_count
   end
@@ -168,21 +140,16 @@ class RDocLinkChecker
   # Fetch the page from the web and gather its ids into the target page.
   # Returns exception or nil.
   def fetch(url, target_page)
-    puts "Begin fetch target page #{url}" if @verbose
-    puts "Getting return code for #{url}" if @verbose
     code = 0
     exception = nil
     begin
       response =  Net::HTTP.get_response(URI(url))
       code = response.code.to_i
       target_page.code = code
-      puts "Returned #{code} (#{response.class})" if @verbose
     rescue => x
-      puts "Raised #{x.class} #{x.message}" if @verbose
       raise unless x.class.name.match(/^(Net|SocketError|IO::TimeoutError|Errno::)/)
       exception = RDocLinkChecker::HttpResponseError.new(url, x)
     end
-    puts "Got return code #{code} for #{url} " if @verbose
     # Don't load if bad code, or no response, or if not html.
     if !code_bad?(code)
       if content_type_html?(response)
@@ -190,7 +157,6 @@ class RDocLinkChecker
         target_page.gather_ids(doc)
       end
     end
-    puts "End fetch target page #{url}" if @verbose
     exception
   end
@@ -262,7 +228,7 @@ EOT
     add_summary(body)
     add_broken_links(body)
-    add_offsite_links(body) unless onsite_only
+    # add_offsite_links(body) unless onsite_only
     report_file_path = 'Report.htm' # _Not_ .html.
     doc.write(File.new(report_file_path, 'w'), 2)
   end
@@ -282,7 +248,7 @@ EOT
       row = {sym => :label, value => :good}
       data.push(row)
     end
-    table2(body, data, 'Parameters')
+    table2(body, data, 'parameters', 'Parameters')
     body.add_element(Element.new('p'))
     # Times table.
@@ -291,7 +257,7 @@ EOT
     minutes = (elapsed_time / 60) % 60
     hours = (elapsed_time/3600)
     elapsed_time_s = "%2.2d:%2.2d:%2.2d" % [hours, minutes, seconds]
-    format = "%Y-%m-%d-%a-%H:%M:%S"
+    format = "%Y-%m-%d-%a-%H:%M:%SZ"
     start_time_s = @counts[:start_time].strftime(format)
     end_time_s = @counts[:end_time].strftime(format)
     data = [
@@ -299,7 +265,7 @@ EOT
       {'End Time' => :label, end_time_s => :good},
       {'Elapsed Time' => :label, elapsed_time_s => :good},
     ]
-    table2(body, data, 'Times')
+    table2(body, data, 'times', 'Times')
     body.add_element(Element.new('p'))
     # Counts.
@@ -309,7 +275,7 @@ EOT
       {'Links Checked' => :label, @counts[:links_checked] => :good},
       {'Links Broken' => :label, @counts[:links_broken] => :bad},
     ]
-    table2(body, data, 'Counts')
+    table2(body, data, 'counts', 'Counts')
     body.add_element(Element.new('p'))
   end
@@ -324,6 +290,7 @@ EOT
       return
     end
+    # Legend.
     ul = body.add_element(Element.new('ul'))
     li = ul.add_element(Element.new('li'))
     li.text = 'Href: the href of the anchor element.'
@@ -345,17 +312,23 @@ Fragment: the fragment of the link.
 If the fragment is reddish, fragment was not found.
 EOT
-    @pages.each_pair do |path, page|
+    pages.each_pair do |path, page|
       broken_links = page.links.select {|link| !link.valid_p }
       next if broken_links.empty?
-      h3 = body.add_element(Element.new('h3'))
+      page_div = body.add_element(Element.new('div'))
+      page_div.add_attribute('class', 'broken_page')
+      page_div.add_attribute('path', path)
+      page_div.add_attribute('count', broken_links.count)
+      h3 = page_div.add_element(Element.new('h3'))
       a = Element.new('a')
-      a.text = path
+      a.text = "#{path} (#{broken_links.count})"
       a.add_attribute('href', path)
       h3.add_element(a)
       broken_links.each do |link|
+        link_div = page_div.add_element(Element.new('div'))
+        link_div.add_attribute('class', 'broken_link')
         data = []
         # Text, URL, fragment
         a = Element.new('a')
@@ -372,8 +345,9 @@ EOT
           data.push({'Exception' => :label, link.exception.class => :bad})
           data.push({'Message' => :label, link.exception.message => :bad})
         end
-        table2(body, data)
-        body.add_element(Element.new('p'))
+        id = link.exception ? 'bad_url' : 'bad_fragment'
+        table2(link_div, data, id)
+        page_div.add_element(Element.new('p'))
       end
     end
@@ -382,12 +356,14 @@ EOT
   def add_offsite_links(body)
     h2 = body.add_element(Element.new('h2'))
     h2.text = 'Off-Site Links by Source Page'
-    @pages.each_pair do |path, page|
+    none = true
+    pages.each_pair do |path, page|
       offsite_links = page.links.select do |link|
         RDocLinkChecker.offsite?(link.href)
       end
       next if offsite_links.empty?
+      none = false
       h3 = body.add_element(Element.new('h3'))
       a = Element.new('a')
       a.text = path
@@ -407,6 +383,10 @@ EOT
         body.add_element(Element.new('p'))
       end
     end
+    if none
+      p = body.add_element(Element.new('p'))
+      p.text = 'None.'
+    end
   end
   Classes = {
@@ -416,9 +396,10 @@ EOT
     bad: 'data center bad',
   }
-  def table2(parent, data, title = nil)
+  def table2(parent, data, id, title = nil)
     data = data.dup
     table = parent.add_element(Element.new('table'))
+    table.add_attribute('id', id)
     if title
       tr = table.add_element(Element.new('tr)'))
       th = tr.add_element(Element.new('th'))
@@ -491,19 +472,18 @@ EOT
   # Class to represent a page.
   class Page
-    attr_accessor :path, :type, :verbose, :pages, :counts, :code, :links, :ids, :dirname, :onsite_only
+    attr_accessor :path, :type, :pages, :counts, :code, :links, :ids, :dirname, :onsite_only
     # Returns a new \Page object:
     #
     # - +path+: a path relative to the HTML directory (if on-site)
     #   or a URL (if off-site).
-    # - +verbose+: whether to put progress message to $stdout.
     # - +pages+: hash of path/page pairs.
     # - +counts+: hash of counts.
     #
-    def initialize(path, verbose, pages, counts, onsite_only)
+    def initialize(type, path, onsite_only, pages: {}, counts: {})
       self.path = path
-      self.verbose = verbose
+      self.type = type
       self.pages = pages
       self.counts = counts
       self.onsite_only = onsite_only
@@ -514,12 +494,20 @@ EOT
       self.dirname = self.dirname == '.' ? '' : dirname
     end
+    def to_h
+      {
+        path: path,
+        type: type,
+        dirname: dirname,
+        code: code
+      }
+    end
     # Gather links for the page:
     #
     # - +doc+: Nokogiri document to be parsed for links.
     #
     def gather_links(doc)
-      puts 'Gathering links' if @verbose
       i = 0
       # The links are in the anchors.
       doc.search('a').each do |a|
@@ -536,10 +524,8 @@ EOT
         next if link.path.nil? || link.path.empty?
         links.push(link)
-        link.puts(i) if @verbose
         i += 1
       end
-      puts "Gathered #{i} links" if @verbose
     end
     # Gather ids for the page.
@@ -570,7 +556,6 @@ EOT
       # - h*
       #
       # We can add more as needed (i.e., if/when we have actual broken links).
-      puts 'Gathering potential link targets' if @verbose
       # body element has 'top', which is a link target.
       body = doc.at('//body')
@@ -605,13 +590,6 @@ EOT
           ids.push(id) if id
         end
       end
-      if @verbose
-        ids.each_with_index do |id, i|
-          puts '%4d %s' % [i, id]
-        end
-      end
-      puts "Gathered #{ids.size} potential link targets" if @verbose
     end
   end
@@ -627,7 +605,6 @@ EOT
     # - +text+: attribute text from anchor element.
     # - +dirname+: directory path of the linking page.
     #
-    # TODO: accept the anchor element, instead of its href and text.
     def initialize(href, text, dirname)
       self.href = href
       self.text = text
@@ -640,6 +617,13 @@ EOT
       self.exception = nil
     end
+    def to_h
+      {
+        href: href,
+        text: text,
+      }
+    end
     # Return the real (not relative) path of the link.
     def make_real_path(dirname, path)
       # Trim single dot.

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rdoc_link_checker
 version: !ruby/object:Gem::Version
-  version: 0.4.0
+  version: 0.5.0
 platform: ruby
 authors:
 - Burdette Lamar
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-05-20 00:00:00.000000000 Z
+date: 2023-05-24 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler