rdoc_link_checker 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cde4b6bb3a511ccee4b20f792707c85c8705b8a828754909f63f88c533d5efe4
4
- data.tar.gz: ff3dfefb26e94258e32a2c5f19bc0b894591611a4dff9612741e34e641ca8130
3
+ metadata.gz: 9e8d4cbf42017efc4d297c70e05d689b891cfc792ebd3285e1fcaf077a9f4303
4
+ data.tar.gz: ed4434d61b90db5a1d9db0b9a56ee98da5717d398864c1723cd7557d0e11867b
5
5
  SHA512:
6
- metadata.gz: c577bf0a97429715c606ee45986258a77bf5028a5e2af47f76e9e2776c5cf66c52045fbc6d1e28fc940e13c178b13ba5914aa6b952d2e24d13b6a36147675d8b
7
- data.tar.gz: 9bdcfb203468e9de9d0f94d02a2e0e4ac6140e1e33745012272c36afd469631204a9da410061383a7b7be434df08aaed48c5553bbb6b7578fae115a8fb41f1ec
6
+ metadata.gz: 724b89d6bb0b6cbf320bb00f29ccdbf467ef945aea04a31077fb8a2378007ddaec91dded922249b0ee806d3d91cf32af666670e1f016631d61f2ea546a8f0f1a
7
+ data.tar.gz: f95baa7b5b9d8e42028834a0bf298b19e4fffdcc4bd8c1229f55381ab3aeb27ae4561757f27309b14053fddf40c1e4aa008b8aaefe662323e2c2f503b2783942
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # RDoc Link Checker
1
+ # RDocLinkChecker
2
2
 
3
3
  A gem to find broken links in HTML files generated by Ruby RDoc.
4
4
 
@@ -6,13 +6,23 @@ Reports a link as broken if:
6
6
 
7
7
  - The target page given by +href+ is not found.
8
8
  - The target page is found, but the fragment given by +href+
9
- is not a link target on that page;
9
+ is not a link target (element with attribute <tt>id</tt>) on that page;
10
10
  this usually causes a browser to open at the top of the page
11
11
  instead of at the given fragment.
12
12
 
13
- Note that some browsers are forgiving, and will open the target
14
- page at a link target similar to the given fragment;
15
- for example, fragment ```#bar``` may be opened at an element
16
- with id ```foobar```.
13
+ Some browsers are forgiving, and will open the target
14
+ page at a link target similar to the given fragment;
15
+ for example, fragment ```#bar``` may be opened at an element
16
+ with id ```foobar```.
17
17
 
18
18
  See the [help text](doc/help.txt).
19
+
20
+ <b>Note</b>: An RDoc bug that was fixed recently
21
+ (PR https://github.com/ruby/rdoc/pull/1002)
22
+ caused many (make that many, many) broken links TOC section
23
+ https://docs.ruby-lang.org/en/master/table_of_contents.html#classes.
24
+ Unless you have a recent Ruby version installed (one that has the bug fix),
25
+ the RDocLinkChecker will find and report all those broken links.
26
+
27
+ <b>Workaround</b>: Use option <tt>--no_toc</tt>, which suppresses checking
28
+ for those links.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RDocLinkChecker
4
- VERSION = "0.4.0"
4
+ VERSION = "0.5.0"
5
5
  end
@@ -13,6 +13,8 @@ class RDocLinkChecker
13
13
 
14
14
  attr_accessor :html_dirpath, :onsite_only, :no_toc
15
15
 
16
+ attr_accessor :source_paths, :pages
17
+
16
18
  def initialize(
17
19
  html_dirpath,
18
20
  onsite_only: false,
@@ -21,26 +23,25 @@ class RDocLinkChecker
21
23
  self.html_dirpath = html_dirpath
22
24
  self.onsite_only = onsite_only
23
25
  self.no_toc = no_toc
24
- @pages = {}
26
+ self.pages = {}
25
27
  @counts = {
26
28
  source_pages: 0,
27
29
  target_pages: 0,
28
30
  links_checked: 0,
29
31
  links_broken: 0,
30
32
  }
31
- @verbose = false
32
33
  end
33
34
 
34
35
  def check
35
36
  # All work is done in the HTML directory,
36
37
  # and that is where Report.htm will be put.
37
38
  Dir.chdir(html_dirpath) do |dir|
38
- @counts[:start_time] = Time.now
39
+ @counts[:start_time] = Time.new
39
40
  gather_source_paths
40
41
  create_source_pages
41
42
  create_target_pages
42
43
  verify_links
43
- @counts[:end_time] = Time.now
44
+ @counts[:end_time] = Time.new
44
45
  report
45
46
  end
46
47
  end
@@ -48,36 +49,24 @@ class RDocLinkChecker
48
49
  # Gather paths to source HTML pages.
49
50
  def gather_source_paths
50
51
  paths = []
51
- puts 'Gathering source paths' if @verbose
52
52
  paths = Find.find('.').select {|path| path.end_with?('.html') }
53
53
  # Remove leading './'.
54
- @source_paths = paths.map{|path| path.sub(%r[^\./], '')}
55
- @source_paths.delete('table_of_contents.html') if no_toc
56
- if @verbose
57
- @source_paths.each_with_index do |source_path, i|
58
- puts '- %4d %s' % [i, source_path]
59
- end
60
- end
61
- @counts[:source_pages] = @source_paths.size
62
- puts "Gathered #{@source_paths.size} source paths" if @verbose
54
+ self.source_paths = paths.map{|path| path.sub(%r[^\./], '')}
55
+ @counts[:source_pages] = source_paths.size
63
56
  end
64
57
 
65
58
  # Create a source \Page object for each source path.
66
59
  # Gather its links and ids.
67
60
  def create_source_pages
68
- puts "Creating #{@source_paths.size} source pages" if @verbose
69
- @source_paths.sort.each_with_index do |source_path, i|
70
- progress_s = RDocLinkChecker.progress_s(i + 1, @source_paths.size)
71
- puts "Creating source page #{source_path} #{progress_s}" if @verbose
72
- source_page = Page.new(source_path, @verbose, @pages, @counts, onsite_only)
73
- @pages[source_path] = source_page
61
+ source_paths.sort.each_with_index do |source_path, i|
62
+ progress_s = RDocLinkChecker.progress_s(i + 1, source_paths.size)
63
+ source_page = Page.new(:source, source_path, onsite_only, pages: pages, counts: @counts)
64
+ pages[source_path] = source_page
74
65
  source_text = File.read(source_path)
75
66
  doc = Nokogiri::HTML(source_text)
76
- source_page.gather_links(doc)
67
+ source_page.gather_links(doc) unless no_toc
77
68
  source_page.gather_ids(doc)
78
- puts "Created source page #{progress_s}" if @verbose
79
69
  end
80
- puts "Created #{@pages.size} source pages" if @verbose
81
70
  end
82
71
 
83
72
  # Create a target \Page object for each link
@@ -85,40 +74,30 @@ class RDocLinkChecker
85
74
  def create_target_pages
86
75
  doc = nil
87
76
  target_page_count = 0
88
- @source_paths = @pages.keys
89
- @source_paths.each do |source_path|
77
+ source_paths = pages.keys
78
+ source_paths.each do |source_path|
90
79
  # Need for relative links to work.
91
80
  dirname = File.dirname(source_path)
92
81
  Dir.chdir(dirname) do
93
- source_page = @pages[source_path]
94
- puts "Creating target pages for #{source_page.links.size} links in #{source_path}" if @verbose
82
+ source_page = pages[source_path]
95
83
  source_page.links.each_with_index do |link, i|
96
84
  next if link.path.nil?
97
- link.puts(i) if @verbose
98
85
  target_path = link.real_path
99
- if @pages[target_path]
100
- puts "Page #{target_path} already created" if @verbose
101
- target_page = @pages[target_path]
86
+ if pages[target_path]
87
+ target_page = pages[target_path]
102
88
  else
89
+ target_page_count += 1
90
+ target_page = Page.new(:target, target_path, onsite_only, pages: pages, counts: @counts)
91
+ pages[target_path] = target_page
103
92
  if File.readable?(link.path)
104
- puts "Creating target page #{target_path}" if @verbose
105
- target_page_count += 1
106
- target_page = Page.new(target_path, @verbose, @pages, @counts, onsite_only)
107
- @pages[target_path] = target_page
108
93
  target_text = File.read(link.path)
109
94
  doc = Nokogiri::HTML(target_text)
110
95
  target_page.gather_ids(doc)
111
- puts "Created target page #{target_path}" if @verbose
112
96
  elsif RDocLinkChecker.checkable?(link.path)
113
- puts "Creating target page #{target_path}" if @verbose
114
- target_page_count += 1
115
- target_page = Page.new(target_path, @verbose, @pages, @counts, onsite_only)
116
- @pages[target_path] = target_page
117
- puts "Created target page #{target_path}" if @verbose
118
97
  link.exception = fetch(link.path, target_page)
119
98
  link.valid_p = false if link.exception
120
99
  else
121
- puts "File not readable or checkable: #{target_path}" if @verbose
100
+ # File not readable or checkable.
122
101
  end
123
102
  end
124
103
  next if target_page.nil?
@@ -127,40 +106,33 @@ class RDocLinkChecker
127
106
  target_page.gather_ids(doc)
128
107
  end
129
108
  end
130
- puts "Created target pages for #{source_page.links.size} links in #{source_path}" if @verbose
131
109
  end
132
110
  end
133
- puts "Created #{target_page_count} target pages" if @verbose
134
111
  @counts[:target_pages] = target_page_count
135
112
  end
136
113
 
137
114
  # Verify that each link target exists.
138
115
  def verify_links
139
- linking_pages = @pages.select do |path, page|
116
+ linking_pages = pages.select do |path, page|
140
117
  !page.links.empty?
141
118
  end
142
- puts "Checking links on #{linking_pages.size} pages" if @verbose
143
119
  link_count = 0
144
120
  broken_count = 0
145
121
  linking_pages.each_pair do |path, page|
146
- puts "Checking #{page.links.size} links on page #{path}" if @verbose
147
122
  link_count += page.links.size
148
123
  page.links.each_with_index do |link, i|
149
124
  if link.valid_p.nil? # Don't disturb if already set to false.
150
- target_page = @pages[link.real_path]
125
+ target_page = pages[link.real_path]
151
126
  if target_page
152
127
  target_id = link.fragment
153
128
  link.valid_p = target_id.nil? || target_page.ids.include?(target_id)
154
129
  else
155
- link_valid_p = false
130
+ link.valid_p = false
156
131
  end
157
132
  end
158
- link.puts(i) if @verbose
159
133
  broken_count += 1 unless link.valid_p
160
134
  end
161
- puts "Checked #{page.links.size} links on page #{path}" if @verbose
162
135
  end
163
- puts "Checked #{link_count} links on #{linking_pages.size} pages" if @verbose
164
136
  @counts[:links_checked] = link_count
165
137
  @counts[:links_broken] = broken_count
166
138
  end
@@ -168,21 +140,16 @@ class RDocLinkChecker
168
140
  # Fetch the page from the web and gather its ids into the target page.
169
141
  # Returns exception or nil.
170
142
  def fetch(url, target_page)
171
- puts "Begin fetch target page #{url}" if @verbose
172
- puts "Getting return code for #{url}" if @verbose
173
143
  code = 0
174
144
  exception = nil
175
145
  begin
176
146
  response = Net::HTTP.get_response(URI(url))
177
147
  code = response.code.to_i
178
148
  target_page.code = code
179
- puts "Returned #{code} (#{response.class})" if @verbose
180
149
  rescue => x
181
- puts "Raised #{x.class} #{x.message}" if @verbose
182
150
  raise unless x.class.name.match(/^(Net|SocketError|IO::TimeoutError|Errno::)/)
183
151
  exception = RDocLinkChecker::HttpResponseError.new(url, x)
184
152
  end
185
- puts "Got return code #{code} for #{url} " if @verbose
186
153
  # Don't load if bad code, or no response, or if not html.
187
154
  if !code_bad?(code)
188
155
  if content_type_html?(response)
@@ -190,7 +157,6 @@ class RDocLinkChecker
190
157
  target_page.gather_ids(doc)
191
158
  end
192
159
  end
193
- puts "End fetch target page #{url}" if @verbose
194
160
  exception
195
161
  end
196
162
 
@@ -262,7 +228,7 @@ EOT
262
228
 
263
229
  add_summary(body)
264
230
  add_broken_links(body)
265
- add_offsite_links(body) unless onsite_only
231
+ # add_offsite_links(body) unless onsite_only
266
232
  report_file_path = 'Report.htm' # _Not_ .html.
267
233
  doc.write(File.new(report_file_path, 'w'), 2)
268
234
  end
@@ -282,7 +248,7 @@ EOT
282
248
  row = {sym => :label, value => :good}
283
249
  data.push(row)
284
250
  end
285
- table2(body, data, 'Parameters')
251
+ table2(body, data, 'parameters', 'Parameters')
286
252
  body.add_element(Element.new('p'))
287
253
 
288
254
  # Times table.
@@ -291,7 +257,7 @@ EOT
291
257
  minutes = (elapsed_time / 60) % 60
292
258
  hours = (elapsed_time/3600)
293
259
  elapsed_time_s = "%2.2d:%2.2d:%2.2d" % [hours, minutes, seconds]
294
- format = "%Y-%m-%d-%a-%H:%M:%S"
260
+ format = "%Y-%m-%d-%a-%H:%M:%SZ"
295
261
  start_time_s = @counts[:start_time].strftime(format)
296
262
  end_time_s = @counts[:end_time].strftime(format)
297
263
  data = [
@@ -299,7 +265,7 @@ EOT
299
265
  {'End Time' => :label, end_time_s => :good},
300
266
  {'Elapsed Time' => :label, elapsed_time_s => :good},
301
267
  ]
302
- table2(body, data, 'Times')
268
+ table2(body, data, 'times', 'Times')
303
269
  body.add_element(Element.new('p'))
304
270
 
305
271
  # Counts.
@@ -309,7 +275,7 @@ EOT
309
275
  {'Links Checked' => :label, @counts[:links_checked] => :good},
310
276
  {'Links Broken' => :label, @counts[:links_broken] => :bad},
311
277
  ]
312
- table2(body, data, 'Counts')
278
+ table2(body, data, 'counts', 'Counts')
313
279
  body.add_element(Element.new('p'))
314
280
 
315
281
  end
@@ -324,6 +290,7 @@ EOT
324
290
  return
325
291
  end
326
292
 
293
+ # Legend.
327
294
  ul = body.add_element(Element.new('ul'))
328
295
  li = ul.add_element(Element.new('li'))
329
296
  li.text = 'Href: the href of the anchor element.'
@@ -345,17 +312,23 @@ Fragment: the fragment of the link.
345
312
  If the fragment is reddish, fragment was not found.
346
313
  EOT
347
314
 
348
- @pages.each_pair do |path, page|
315
+ pages.each_pair do |path, page|
349
316
  broken_links = page.links.select {|link| !link.valid_p }
350
317
  next if broken_links.empty?
351
318
 
352
- h3 = body.add_element(Element.new('h3'))
319
+ page_div = body.add_element(Element.new('div'))
320
+ page_div.add_attribute('class', 'broken_page')
321
+ page_div.add_attribute('path', path)
322
+ page_div.add_attribute('count', broken_links.count)
323
+ h3 = page_div.add_element(Element.new('h3'))
353
324
  a = Element.new('a')
354
- a.text = path
325
+ a.text = "#{path} (#{broken_links.count})"
355
326
  a.add_attribute('href', path)
356
327
  h3.add_element(a)
357
328
 
358
329
  broken_links.each do |link|
330
+ link_div = page_div.add_element(Element.new('div'))
331
+ link_div.add_attribute('class', 'broken_link')
359
332
  data = []
360
333
  # Text, URL, fragment
361
334
  a = Element.new('a')
@@ -372,8 +345,9 @@ EOT
372
345
  data.push({'Exception' => :label, link.exception.class => :bad})
373
346
  data.push({'Message' => :label, link.exception.message => :bad})
374
347
  end
375
- table2(body, data)
376
- body.add_element(Element.new('p'))
348
+ id = link.exception ? 'bad_url' : 'bad_fragment'
349
+ table2(link_div, data, id)
350
+ page_div.add_element(Element.new('p'))
377
351
  end
378
352
  end
379
353
 
@@ -382,12 +356,14 @@ EOT
382
356
  def add_offsite_links(body)
383
357
  h2 = body.add_element(Element.new('h2'))
384
358
  h2.text = 'Off-Site Links by Source Page'
385
- @pages.each_pair do |path, page|
359
+ none = true
360
+ pages.each_pair do |path, page|
386
361
  offsite_links = page.links.select do |link|
387
362
  RDocLinkChecker.offsite?(link.href)
388
363
  end
389
364
  next if offsite_links.empty?
390
365
 
366
+ none = false
391
367
  h3 = body.add_element(Element.new('h3'))
392
368
  a = Element.new('a')
393
369
  a.text = path
@@ -407,6 +383,10 @@ EOT
407
383
  body.add_element(Element.new('p'))
408
384
  end
409
385
  end
386
+ if none
387
+ p = body.add_element(Element.new('p'))
388
+ p.text = 'None.'
389
+ end
410
390
  end
411
391
 
412
392
  Classes = {
@@ -416,9 +396,10 @@ EOT
416
396
  bad: 'data center bad',
417
397
  }
418
398
 
419
- def table2(parent, data, title = nil)
399
+ def table2(parent, data, id, title = nil)
420
400
  data = data.dup
421
401
  table = parent.add_element(Element.new('table'))
402
+ table.add_attribute('id', id)
422
403
  if title
423
404
  tr = table.add_element(Element.new('tr)'))
424
405
  th = tr.add_element(Element.new('th'))
@@ -491,19 +472,18 @@ EOT
491
472
  # Class to represent a page.
492
473
  class Page
493
474
 
494
- attr_accessor :path, :type, :verbose, :pages, :counts, :code, :links, :ids, :dirname, :onsite_only
475
+ attr_accessor :path, :type, :pages, :counts, :code, :links, :ids, :dirname, :onsite_only
495
476
 
496
477
  # Returns a new \Page object:
497
478
  #
498
479
  # - +path+: a path relative to the HTML directory (if on-site)
499
480
  # or a URL (if off-site).
500
- # - +verbose+: whether to put progress message to $stdout.
501
481
  # - +pages+: hash of path/page pairs.
502
482
  # - +counts+: hash of counts.
503
483
  #
504
- def initialize(path, verbose, pages, counts, onsite_only)
484
+ def initialize(type, path, onsite_only, pages: {}, counts: {})
505
485
  self.path = path
506
- self.verbose = verbose
486
+ self.type = type
507
487
  self.pages = pages
508
488
  self.counts = counts
509
489
  self.onsite_only = onsite_only
@@ -514,12 +494,20 @@ EOT
514
494
  self.dirname = self.dirname == '.' ? '' : dirname
515
495
  end
516
496
 
497
+ def to_h
498
+ {
499
+ path: path,
500
+ type: type,
501
+ dirname: dirname,
502
+ code: code
503
+ }
504
+ end
505
+
517
506
  # Gather links for the page:
518
507
  #
519
508
  # - +doc+: Nokogiri document to be parsed for links.
520
509
  #
521
510
  def gather_links(doc)
522
- puts 'Gathering links' if @verbose
523
511
  i = 0
524
512
  # The links are in the anchors.
525
513
  doc.search('a').each do |a|
@@ -536,10 +524,8 @@ EOT
536
524
  next if link.path.nil? || link.path.empty?
537
525
 
538
526
  links.push(link)
539
- link.puts(i) if @verbose
540
527
  i += 1
541
528
  end
542
- puts "Gathered #{i} links" if @verbose
543
529
  end
544
530
 
545
531
  # Gather ids for the page.
@@ -570,7 +556,6 @@ EOT
570
556
  # - h*
571
557
  #
572
558
  # We can add more as needed (i.e., if/when we have actual broken links).
573
- puts 'Gathering potential link targets' if @verbose
574
559
 
575
560
  # body element has 'top', which is a link target.
576
561
  body = doc.at('//body')
@@ -605,13 +590,6 @@ EOT
605
590
  ids.push(id) if id
606
591
  end
607
592
  end
608
- if @verbose
609
- ids.each_with_index do |id, i|
610
- puts '%4d %s' % [i, id]
611
- end
612
- end
613
- puts "Gathered #{ids.size} potential link targets" if @verbose
614
-
615
593
  end
616
594
 
617
595
  end
@@ -627,7 +605,6 @@ EOT
627
605
  # - +text+: attribute text from anchor element.
628
606
  # - +dirname+: directory path of the linking page.
629
607
  #
630
- # TODO: accept the anchor element, instead of its href and text.
631
608
  def initialize(href, text, dirname)
632
609
  self.href = href
633
610
  self.text = text
@@ -640,6 +617,13 @@ EOT
640
617
  self.exception = nil
641
618
  end
642
619
 
620
+ def to_h
621
+ {
622
+ href: href,
623
+ text: text,
624
+ }
625
+ end
626
+
643
627
  # Return the real (not relative) path of the link.
644
628
  def make_real_path(dirname, path)
645
629
  # Trim single dot.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdoc_link_checker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Burdette Lamar
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler