RubyGems - rdoc_link_checker - Versions diffs - 0.2.0 → 0.3.0 - Mend

rdoc_link_checker 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +32 -3
data/bin/rdoc_link_checker +32 -15
data/doc/help.txt +16 -0
data/lib/rdoc_link_checker/version.rb +1 -1
data/lib/rdoc_link_checker.rb +676 -3
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: dc87ba9f414e2d9c949f1eb2f85c20ff6ad3a165f0bed638211ea7a84ac16d69
-  data.tar.gz: e801fe955f57f847e7d172540a23a6acfc94597e878ee8dad15eb772da155d32
+  metadata.gz: 3a9ffa7ca1ef044fdc73a544cd7fb31dce91b0fc0a2f0937d071a8ce1d033ac6
+  data.tar.gz: 7e93a01cbb75c2db88050e1ab10aee8e549316d4ae7e06ab80dec38e3af5f9e6
 SHA512:
-  metadata.gz: 7487c3faaed27b9e8a3d3c9922dd650891eb7aeb1e9bd50704006037c9c4dd234bacc95482dcd2247ef9222e4288f2b2e789485bcd044d113a565d0fac733f96
-  data.tar.gz: ebcb50284bb5f96f184d1f0047ed2a21994474c6c3e135dc09532a72ee88fe1fb25fa50e6253d7599966d73287543f7543bde72a34ab536d73b0b165c88a1670
+  metadata.gz: ffd501ff86f9bda291348d6a9aa0a982a4ad1b7b5020d963c69a388343a34427b2e9f5282732da8249b1fc2604482938dd96a610db236cd537f37f9abee7d075
+  data.tar.gz: 5ee92b068a8cb0feea303e4bf8f669bac3f587086a213cd9324cdd6d66e07c22bf6639c42d5119e25d63ad312bac3211bc2675424fec7c4ad76b0ca1f4770c32

data/README.md CHANGED Viewed

@@ -1,6 +1,35 @@
 # RDoc Link Checker
-Not ready for prime time.  Just wanted to reserve the name.
+A gem to find broken links in HTML files generated by Ruby RDoc.
-Development is active,
-so should be up in a few days (say, by the end of May 2023).
+Reports a link as broken if:
+- The target page given by +href+ is not found.
+- The target page is found, but the fragment given by +href+
+  is not a link target on that page;
+  this usually causes a browser to open at the top of the page
+  instead of at the given fragment.
+Note that some browsers are forgiving, and will open the target
+page at a link target similar to the given fragment;
+for example, fragment ```bar``` may be opened at an element
+with id ```foobar```.
+```
+Usage:
+rdoc_link_checker html_dirpath options
+The argument is the path to a directory containing a tree
+of RDoc-generated HTML files, such as those generated by command
+rdoc --visibility=private --op html . # Note the trailing dot.
+Options:
+--onsite_only   Check link targets only on pages in the file tree at <html_dirpath>,
+                and not those on other local pages or those on the web.
+--no_toc        Do not check links on the TOC page (table_of_contents.html).
+--version       Print the version and exit.
+--help          Print this help and exit.
+The output is file <html_dirpath>/Report.htm, which reports broken links.
+```

data/bin/rdoc_link_checker CHANGED Viewed

@@ -5,31 +5,33 @@ require_relative '../lib/rdoc_link_checker'
 options = GetoptLong.new(
   ['--html_dirpath', '-d', GetoptLong::REQUIRED_ARGUMENT],
-  ['--version', '-v', GetoptLong::NO_ARGUMENT],
-  ['--help', '-h', GetoptLong::NO_ARGUMENT]
+  ['--onsite_only', '-l', GetoptLong::NO_ARGUMENT],
+  ['--no_toc',      '-n', GetoptLong::NO_ARGUMENT],
+  ['--version',     '-v', GetoptLong::NO_ARGUMENT],
+  ['--help',        '-h', GetoptLong::NO_ARGUMENT]
 )
-message = nil
-case ARGV.size
-when 0
-  message = "Expected one argument; got none."
-when 1
-  # Okay.
-else
-  message = "Expected one argument, not #{ARGV.inspect}."
-end
-raise ArgumentError.new(message) if message
 def help
-  puts 'Boo!'
+  path = File.absolute_path(__FILE__)
+  dirname = File.dirname(File.dirname(path))
+  filepath = File.join(dirname, 'doc', 'help.txt')
+  puts File.read(filepath)
+  exit
 end
 def version
   puts RDocLinkChecker::VERSION
+  exit
 end
+onsite_only = false
+no_toc = false
 options.each do |option, argument|
   case option
+  when '--onsite_only'
+    onsite_only = true
+  when '--no_toc'
+    no_toc = true
   when '--help'
     help
   when '--version'
@@ -37,5 +39,20 @@ options.each do |option, argument|
   end
 end
+message = nil
+case ARGV.size
+when 0
+  message = "Expected one argument; got none."
+when 1
+  # Okay.
+else
+  message = "Expected one argument, not #{ARGV.inspect}."
+end
+raise ArgumentError.new(message) if message
 html_dirpath = ARGV[0]
-RDocLinkChecker.new(html_dirpath)
+RDocLinkChecker.new(
+  html_dirpath,
+  onsite_only: onsite_only,
+  no_toc: no_toc
+).check

data/doc/help.txt ADDED Viewed

@@ -0,0 +1,16 @@
+Usage:
+  rdoc_link_checker html_dirpath options
+The argument is the path to a directory containing a tree
+of RDoc-generated HTML files, such as those generated by command
+  rdoc --visibility=private --op html . # Note the trailing dot.
+Options:
+  --onsite_only   Check link targets only on pages in the file tree at <html_dirpath>,
+                  and not those on other local pages or those on the web.
+  --no_toc        Do not check links on the TOC page (table_of_contents.html).
+  --version       Print the version and exit.
+  --help          Print this help and exit.
+The output is file <html_dirpath>/Report.htm, which reports broken links.

data/lib/rdoc_link_checker/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 class RDocLinkChecker
-  VERSION = "0.2.0"
+  VERSION = "0.3.0"
 end

data/lib/rdoc_link_checker.rb CHANGED Viewed

@@ -1,13 +1,686 @@
 # frozen_string_literal: true
+require 'nokogiri'
+require 'rexml/document'
+require 'find'
+require 'net/http'
 require_relative 'rdoc_link_checker/version'
 class RDocLinkChecker
-  def initialize(html_dirpath)
-    puts html_dirpath
+  include REXML
+  attr_accessor :html_dirpath, :onsite_only, :no_toc
+  def initialize(
+    html_dirpath,
+    onsite_only: false,
+    no_toc: false
+  )
+    self.html_dirpath = html_dirpath
+    self.onsite_only = onsite_only
+    self.no_toc = no_toc
+    @pages = {}
+    @counts = {
+      source_pages: 0,
+      target_pages: 0,
+      links_checked: 0,
+      links_broken: 0,
+    }
+    @verbose = false
+  end
+  def check
+    # All work is done in the HTML directory,
+    # and that is where Report.htm will be put.
+    Dir.chdir(html_dirpath) do |dir|
+      @counts[:start_time] = Time.now
+      gather_source_paths
+      create_source_pages
+      create_target_pages
+      verify_links
+      @counts[:end_time] = Time.now
+      report
+    end
+  end
+  # Gather paths to source HTML pages.
+  def gather_source_paths
+    paths = []
+    puts 'Gathering source paths' if @verbose
+    paths = Find.find('.').select {|path| path.end_with?('.html') }
+    # Remove leading './'.
+    @source_paths = paths.map{|path| path.sub(%r[^\./], '')}
+    @source_paths.delete('table_of_contents.html') if no_toc
+    if @verbose
+      @source_paths.each_with_index do |source_path, i|
+        puts '- %4d %s' % [i, source_path]
+      end
+    end
+    @counts[:source_pages] = @source_paths.size
+    puts "Gathered #{@source_paths.size} source paths" if @verbose
+  end
+  # Create a source \Page object for each source path.
+  # Gather its links and ids.
+  def create_source_pages
+    puts "Creating #{@source_paths.size} source pages" if @verbose
+    @source_paths.sort.each_with_index do |source_path, i|
+      progress_s = RDocLinkChecker.progress_s(i + 1, @source_paths.size)
+      puts "Creating source page #{source_path} #{progress_s}" if @verbose
+      source_page = Page.new(source_path, @verbose, @pages, @counts, onsite_only)
+      @pages[source_path] = source_page
+      source_text = File.read(source_path)
+      doc = Nokogiri::HTML(source_text)
+      source_page.gather_links(doc)
+      source_page.gather_ids(doc)
+      puts "Created source page #{progress_s}" if @verbose
+    end
+    puts "Created #{@pages.size} source pages" if @verbose
+  end
+  # Create a target \Page object for each link
+  # (unless already created as a source page).
+  def create_target_pages
+    doc = nil
+    target_page_count = 0
+    @source_paths = @pages.keys
+    @source_paths.each do |source_path|
+      # Need for relative links to work.
+      dirname = File.dirname(source_path)
+      Dir.chdir(dirname) do
+        source_page = @pages[source_path]
+        puts "Creating target pages for #{source_page.links.size} links in #{source_path}" if @verbose
+        source_page.links.each_with_index do |link, i|
+          next if link.path.nil?
+          link.puts(i) if @verbose
+          target_path = link.real_path
+          if @pages[target_path]
+            puts "Page #{target_path} already created" if @verbose
+            target_page = @pages[target_path]
+          else
+            if File.readable?(link.path)
+              puts "Creating target page #{target_path}" if @verbose
+              target_page_count += 1
+              target_page = Page.new(target_path, @verbose, @pages, @counts, onsite_only)
+              @pages[target_path] = target_page
+              target_text = File.read(link.path)
+              doc = Nokogiri::HTML(target_text)
+              target_page.gather_ids(doc)
+              puts "Created target page #{target_path}" if @verbose
+            elsif RDocLinkChecker.checkable?(link.path)
+              puts "Creating target page #{target_path}" if @verbose
+              target_page_count += 1
+              target_page = Page.new(target_path, @verbose, @pages, @counts, onsite_only)
+              @pages[target_path] = target_page
+              puts "Created target page #{target_path}" if @verbose
+              link.exception = fetch(link.path, target_page)
+              link.valid_p = false if link.exception
+            else
+              puts "File not readable or checkable: #{target_path}" if @verbose
+            end
+          end
+          next if target_page.nil?
+          if link.has_fragment? && target_page.ids.empty?
+            doc || doc = Nokogiri::HTML(target_text)
+            target_page.gather_ids(doc)
+          end
+        end
+        puts "Created target pages for #{source_page.links.size} links in #{source_path}" if @verbose
+      end
+    end
+    puts "Created #{target_page_count} target pages" if @verbose
+    @counts[:target_pages] = target_page_count
+  end
+  # Verify that each link target exists.
+  def verify_links
+    linking_pages = @pages.select do |path, page|
+      !page.links.empty?
+    end
+    puts "Checking links on #{linking_pages.size} pages" if @verbose
+    link_count = 0
+    broken_count = 0
+    linking_pages.each_pair do |path, page|
+      puts "Checking #{page.links.size} links on page #{path}" if @verbose
+      link_count += page.links.size
+      page.links.each_with_index do |link, i|
+        if link.valid_p.nil? # Don't disturb if already set to false.
+          target_page = @pages[link.real_path]
+          if target_page
+            target_id = link.fragment
+            link.valid_p = target_id.nil? || target_page.ids.include?(target_id)
+          else
+            link_valid_p = false
+          end
+        end
+        link.puts(i) if @verbose
+        broken_count += 1 unless link.valid_p
+      end
+      puts "Checked #{page.links.size} links on page #{path}" if @verbose
+    end
+    puts "Checked #{link_count} links on #{linking_pages.size} pages" if @verbose
+    @counts[:links_checked] = link_count
+    @counts[:links_broken] = broken_count
+  end
+  # Fetch the page from the web and gather its ids into the target page.
+  # Returns exception or nil.
+  def fetch(url, target_page)
+    puts "Begin fetch target page #{url}" if @verbose
+    puts "Getting return code for #{url}" if @verbose
+    code = 0
+    exception = nil
+    begin
+      response =  Net::HTTP.get_response(URI(url))
+      code = response.code.to_i
+      target_page.code = code
+      puts "Returned #{code} (#{response.class})" if @verbose
+    rescue => x
+      puts "Raised #{x.class} #{x.message}" if @verbose
+      raise unless x.class.name.match(/^(Net|SocketError|IO::TimeoutError|Errno::)/)
+      exception = RDocLinkChecker::HttpResponseError.new(url, x)
+    end
+    puts "Got return code #{code} for #{url} " if @verbose
+    # Don't load if bad code, or no response, or if not html.
+    if !code_bad?(code)
+      if content_type_html?(response)
+        doc = Nokogiri::HTML(response.body)
+        target_page.gather_ids(doc)
+      end
+    end
+    puts "End fetch target page #{url}" if @verbose
+    exception
+  end
+  # Returns whether the code is bad (zero or >= 400).
+  def code_bad?(code)
+    return false if code.nil?
+    (code == 0) || (code >= 400)
+  end
+  # Returns whether the response body should be HTML.
+  def content_type_html?(response)
+    return false unless response
+    return false unless response['Content-Type']
+    response['Content-Type'].match('html')
+  end
+  # Returns whether the path is offsite.
+  def self.offsite?(path)
+    path.start_with?('http')
+  end
+  # Returns the string fragment for the given path or ULR, or +nil+
+  def self.get_fragment(s)
+    a = s.split('#', 2)
+    a.size == 2 ? a[1] : nil
+  end
+  # Returns a progress string giving a fraction and percentage.
+  def self.progress_s(i, total)
+    fraction_s = "#{i}/#{total}"
+    percent_i = (i*100.0/total).round
+    "(#{fraction_s}, #{percent_i}%)"
+  end
+  # Returns whether the path is checkable.
+  def self.checkable?(path)
+    return false unless path
+    begin
+      uri = URI(path)
+      return ['http', 'https', nil].include?(uri.scheme)
+    rescue
+      return false
+    end
+  end
+  # Generate the report; +checker+ is the \RDocLinkChecker object.
+  def report
+    doc = Document.new('')
+    root = doc.add_element(Element.new('root'))
+    head = root.add_element(Element.new('head'))
+    title = head.add_element(Element.new('title'))
+    title.text = 'RDocLinkChecker Report'
+    style = head.add_element(Element.new('style'))
+    style.text = <<EOT
+*        { font-family: sans-serif }
+.data    { font-family: courier }
+.center  { text-align: center }
+.good    { color: rgb(  0,  97,   0); background-color: rgb(198, 239, 206) } /* Greenish */
+.iffy    { color: rgb(156, 101,   0); background-color: rgb(255, 235, 156) } /* Yellowish */
+.bad     { color: rgb(156,   0,   6); background-color: rgb(255, 199, 206) } /* Reddish */
+.neutral { color: rgb(  0,   0,   0); background-color: rgb(217, 217, 214) } /* Grayish */
+EOT
+    body = root.add_element(Element.new('body'))
+    h1 = body.add_element(Element.new('h1'))
+    h1.text = 'RDocLinkChecker Report'
+    add_summary(body)
+    add_broken_links(body)
+    add_offsite_links(body) unless onsite_only
+    report_file_path = 'Report.htm' # _Not_ .html.
+    doc.write(File.new(report_file_path, 'w'), 2)
+  end
+  def add_summary(body)
+    h2 = body.add_element(Element.new('h2'))
+    h2.text = 'Summary'
+    # Parameters table.
+    data = []
+    [
+      :html_dirpath,
+      :onsite_only,
+      :no_toc
+    ].each do |sym|
+      value = send(sym).inspect
+      row = {sym => :label, value => :good}
+      data.push(row)
+    end
+    table2(body, data, 'Parameters')
+    body.add_element(Element.new('p'))
+    # Times table.
+    elapsed_time = @counts[:end_time] - @counts[:start_time]
+    seconds = elapsed_time % 60
+    minutes = (elapsed_time / 60) % 60
+    hours = (elapsed_time/3600)
+    elapsed_time_s = "%2.2d:%2.2d:%2.2d" % [hours, minutes, seconds]
+    format = "%Y-%m-%d-%a-%H:%M:%S"
+    start_time_s = @counts[:start_time].strftime(format)
+    end_time_s = @counts[:end_time].strftime(format)
+    data = [
+      {'Start Time' => :label, start_time_s => :good},
+      {'End Time' => :label, end_time_s => :good},
+      {'Elapsed Time' => :label, elapsed_time_s => :good},
+    ]
+    table2(body, data, 'Times')
+    body.add_element(Element.new('p'))
+    # Counts.
+    data = [
+      {'Source Pages' => :label, @counts[:source_pages] => :good},
+      {'Target Pages' => :label, @counts[:target_pages] => :good},
+      {'Links Checked' => :label, @counts[:links_checked] => :good},
+      {'Links Broken' => :label, @counts[:links_broken] => :bad},
+    ]
+    table2(body, data, 'Counts')
+    body.add_element(Element.new('p'))
+  end
+  def add_broken_links(body)
+    h2 = body.add_element(Element.new('h2'))
+    h2.text = 'Broken Links by Source Page'
+    if @counts[:links_broken] == 0
+      p = body.add_element('p')
+      p.text = 'None.'
+      return
+    end
+    ul = body.add_element(Element.new('ul'))
+    li = ul.add_element(Element.new('li'))
+    li.text = 'Href: the href of the anchor element.'
+    li = ul.add_element(Element.new('li'))
+    li.text = 'Text: the text of the anchor element.'
+    li = ul.add_element(Element.new('li'))
+    li.text = 'Path: the URL or path of the link (not including the fragment):'
+    ul2 = li.add_element(Element.new('ul'))
+    li2 = ul2.add_element(Element.new('li'))
+    li2.text = 'For an on-site link, an abbreviated path is given.'
+    li2 = ul2.add_element(Element.new('li'))
+    li2.text = <<EOT
+For an off-site link, the full URL is given.
+If the path is reddish, the page was not found.
+EOT
+    li = ul.add_element(Element.new('li'))
+    li.text = <<EOT
+Fragment: the fragment of the link.
+If the fragment is reddish, fragment was not found.
+EOT
+    @pages.each_pair do |path, page|
+      broken_links = page.links.select {|link| !link.valid_p }
+      next if broken_links.empty?
+      h3 = body.add_element(Element.new('h3'))
+      a = Element.new('a')
+      a.text = path
+      a.add_attribute('href', path)
+      h3.add_element(a)
+      broken_links.each do |link|
+        data = []
+        # Text, URL, fragment
+        a = Element.new('a')
+        a.text = link.href
+        a.add_attribute('href', link.href)
+        data.push({'Href' => :label, a => :bad})
+        data.push({'Text' => :label, link.text => :good})
+        fragment_p = !link.fragment.nil?
+        class_ = fragment_p ? :good : :bad
+        data.push({'Path' => :label, link.real_path => class_})
+        class_ = fragment_p ? :bad : :good
+        data.push({'Fragment' => :label, link.fragment => class_})
+        if link.exception
+          data.push({'Exception' => :label, link.exception.class => :bad})
+          data.push({'Message' => :label, link.exception.message => :bad})
+        end
+        table2(body, data)
+        body.add_element(Element.new('p'))
+      end
+    end
+  end
+  def add_offsite_links(body)
+    h2 = body.add_element(Element.new('h2'))
+    h2.text = 'Off-Site Links by Source Page'
+    @pages.each_pair do |path, page|
+      offsite_links = page.links.select do |link|
+        RDocLinkChecker.offsite?(link.href)
+      end
+      next if offsite_links.empty?
+      h3 = body.add_element(Element.new('h3'))
+      a = Element.new('a')
+      a.text = path
+      a.add_attribute('href', path)
+      h3.add_element(a)
+      offsite_links.each do |link|
+        data = []
+        # Text, URL, fragment
+        a = Element.new('a')
+        a.text = link.href
+        a.add_attribute('href', link.href)
+        class_ = link.valid_p ? :good : :bad
+        data.push({'Href' => :label, a => class_})
+        data.push({'Text' => :label, link.text => :good})
+        table2(body, data)
+        body.add_element(Element.new('p'))
+      end
+    end
+  end
+  Classes = {
+    label: 'label center neutral',
+    good: 'data center good',
+    iffy: 'data center iffy',
+    bad: 'data center bad',
+  }
+  def table2(parent, data, title = nil)
+    data = data.dup
+    table = parent.add_element(Element.new('table'))
+    if title
+      tr = table.add_element(Element.new('tr)'))
+      th = tr.add_element(Element.new('th'))
+      th.add_attribute('colspan', 2)
+      if title.kind_of?(REXML::Element)
+        th.add_element(title)
+      else
+        th.text = title
+      end
+    end
+    data.each do |row_h|
+      label, label_class, value, value_class = row_h.flatten
+      tr = table.add_element(Element.new('tr'))
+      td = tr.add_element(Element.new('td'))
+      td.text = label
+      td.add_attribute('class', Classes[label_class])
+      td = tr.add_element(Element.new('td'))
+      if value.kind_of?(REXML::Element)
+        td.add_element(value)
+      else
+        td.text = value
+      end
+      td.add_attribute('class', Classes[value_class])
+    end
+  end
+  class Error; end
+  class HttpResponseError < Error
+    attr_accessor :url, :x
+    def initialize(url, x)
+      self.url = url
+      self.x = x
+    end
+    def message
+      <<EOT
+#{self.class.name}:
+An exception was raised when checking page availability with Net::HTTP:
+  Url: #{url}
+  Class: #{x.class}
+  Message: #{x.message}
+EOT
+    end
+  end
+  class HttpStatusCodeError < Error
+    attr_accessor :url, :code
+    def initialize(url, code)
+      self.url = url
+      self.code = code
+    end
+    def message
+      <<EOT
+#{self.class.name}:
+  The return code for the page was not 200:
+    Url: #{url}
+    Return code: #{code}
+EOT
+    end
+  end
+  # Class to represent a page.
+  class Page
+    attr_accessor :path, :type, :verbose, :pages, :counts, :code, :links, :ids, :dirname, :onsite_only
+    # Returns a new \Page object:
+    #
+    # - +path+: a path relative to the HTML directory (if on-site)
+    #   or a URL (if off-site).
+    # - +verbose+: whether to put progress message to $stdout.
+    # - +pages+: hash of path/page pairs.
+    # - +counts+: hash of counts.
+    #
+    def initialize(path, verbose, pages, counts, onsite_only)
+      self.path = path
+      self.verbose = verbose
+      self.pages = pages
+      self.counts = counts
+      self.onsite_only = onsite_only
+      self.code = nil
+      self.links = []
+      self.ids = []
+      self.dirname = File.dirname(path)
+      self.dirname = self.dirname == '.' ? '' : dirname
+    end
+    # Gather links for the page:
+    #
+    # - +doc+: Nokogiri document to be parsed for links.
+    #
+    def gather_links(doc)
+      puts 'Gathering links' if @verbose
+      i = 0
+      # The links are in the anchors.
+      doc.search('a').each do |a|
+        # Ignore pilcrow (paragraph character) and up-arrow.
+        next if a.text == "\u00B6"
+        next if a.text == "\u2191"
+        href = a.attr('href')
+        next if href.nil? or href.empty?
+        next if RDocLinkChecker.offsite?(href) && onsite_only
+        next unless RDocLinkChecker.checkable?(href)
+        link = Link.new(href, a.text, dirname)
+        next if link.path.nil? || link.path.empty?
+        links.push(link)
+        link.puts(i) if @verbose
+        i += 1
+      end
+      puts "Gathered #{i} links" if @verbose
+    end
+    # Gather ids for the page.
+    # +doc+ is the Nokogiri document to be parsed.
+    def gather_ids(doc)
+      # Don't do twice (some pages are both source and target).
+      return unless ids.empty?
+      # For off-site, gather all ids, regardless of element.
+      if RDocLinkChecker.offsite?(path)
+        doc.xpath("//*[@id]").each do |element|
+          id = element.attr('id')
+          ids.push(id)
+        end
+        return
+      end
+      # We're on-site, which means that the page is RDoc-generated
+      # and we know what to expect.
+      # In theory, an author can link to any element that has an attribute :id.
+      # In practice, gathering all such elements is very time-consuming.
+      # These are the elements currently linked to:
+      #
+      # - body
+      # - a
+      # - div
+      # - dt
+      # - h*
+      #
+      # We can add more as needed (i.e., if/when we have actual broken links).
+      puts 'Gathering potential link targets' if @verbose
+      # body element has 'top', which is a link target.
+      body = doc.at('//body')
+      id = body.attribute('id')
+      ids.push(id) if id
+      # Some ids are in the as (anchors).
+      body.search('a').each do |a|
+        id = a.attr(id)
+        ids.push(id) if id
+      end
+      # Method ids are in divs, but gather only method-detail divs.
+      body.search('div').each do |div|
+        class_ = div.attr('class')
+        next if class_.nil?
+        next unless class_.match('method-')
+        id = div.attr('id')
+        ids.push(id) if id
+      end
+      # Constant ids are in dts.
+      body.search('dt').each do |dt|
+        id = dt.attr('id')
+        ids.push(id) if id
+      end
+      # Label ids are in headings.
+      %w[h1 h2 h3 h4 h5 h6].each do |tag|
+        body.search(tag).each do |h|
+          id = h.attr('id')
+          ids.push(id) if id
+        end
+      end
+      if @verbose
+        ids.each_with_index do |id, i|
+          puts '%4d %s' % [i, id]
+        end
+      end
+      puts "Gathered #{ids.size} potential link targets" if @verbose
+    end
   end
-  class Error < StandardError; end
+  # Class to represent a link.
+  class Link
+    attr_accessor :href, :text, :dirname, :path, :fragment, :valid_p, :real_path, :exception
+    # Returns a new \Link object:
+    #
+    # - +href+: attribute href from anchor element.
+    # - +text+: attribute text from anchor element.
+    # - +dirname+: directory path of the linking page.
+    #
+    # TODO: accept the anchor element, instead of its href and text.
+    def initialize(href, text, dirname)
+      self.href = href
+      self.text = text
+      self.dirname = dirname
+      path, fragment = href.split('#', 2)
+      self.path = path
+      self.fragment = fragment
+      self.valid_p = nil
+      self.real_path = make_real_path(dirname, path)
+      self.exception = nil
+    end
+    # Return the real (not relative) path of the link.
+    def make_real_path(dirname, path)
+      # Trim single dot.
+      return path.sub('./', '') if path.start_with?('./')
+      return path if dirname.nil? || dirname.empty?
+      # May have one or more leading '../'.
+      up_dir = '../'
+      levels = path.scan(/(?=#{up_dir})/).count
+      dirs = dirname.split('/')
+      if levels == 0
+        dirs.empty? ? path : File.join(dirname, path)
+      else
+        # Remove leading '../' elements.
+        path = path.gsub(%r[\.\./], '')
+        # Remove the corresponding parts of dirname.
+        dirs.pop(levels)
+        return path if dirs.empty?
+        dirname = dirs.join('/')
+        File.join(dirname, path)
+      end
+    end
+    # Returns whether the link has a fragment.
+    def has_fragment?
+      fragment ? true : false
+    end
+    # Puts link info onto $stdout.
+    def puts(i)
+      $stdout.puts <<EOT
+Link #{i}:
+  Href:      #{href}
+  Text:      #{text}
+  Path:      #{path}
+  Fragment:  #{fragment}
+  Valid:     #{valid_p}
+  Real path: #{real_path}
+  Dirname:   #{dirname}
+EOT
+    end
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rdoc_link_checker
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - burdettelamar
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-05-19 00:00:00.000000000 Z
+date: 2023-05-20 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -66,6 +66,7 @@ files:
 - README.md
 - Rakefile
 - bin/rdoc_link_checker
+- doc/help.txt
 - lib/rdoc_link_checker.rb
 - lib/rdoc_link_checker/version.rb
 - rdoc_link_checker.gemspec