rdoc_link_checker 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cde4b6bb3a511ccee4b20f792707c85c8705b8a828754909f63f88c533d5efe4
4
- data.tar.gz: ff3dfefb26e94258e32a2c5f19bc0b894591611a4dff9612741e34e641ca8130
3
+ metadata.gz: 77d49154c855b923e70e92faf6e9206627a1e1793d3f93e9a6bf24c137c203f5
4
+ data.tar.gz: c93421e52ad4f531ba57ab1cfd47e4996016621ec7f281b30a50ca4c76136c23
5
5
  SHA512:
6
- metadata.gz: c577bf0a97429715c606ee45986258a77bf5028a5e2af47f76e9e2776c5cf66c52045fbc6d1e28fc940e13c178b13ba5914aa6b952d2e24d13b6a36147675d8b
7
- data.tar.gz: 9bdcfb203468e9de9d0f94d02a2e0e4ac6140e1e33745012272c36afd469631204a9da410061383a7b7be434df08aaed48c5553bbb6b7578fae115a8fb41f1ec
6
+ metadata.gz: f7e745ab474bec8a54e49ffc259a3f91a58231166f8c723c352e2235dba33fe1ef2db56b9f918d784b0f0caca6a145aacad5b669cd3c49b3d90fd6179684d2ff
7
+ data.tar.gz: da062b580efb28b8fce02ab8b30f58270b4e08aabeab86a8441b7de68cd172a5fe13aa59274d6da2219e35cfd1bc0817308237f71d570db144a0a5ccb892e3f7
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # RDoc Link Checker
1
+ # RDocLinkChecker
2
2
 
3
3
  A gem to find broken links in HTML files generated by Ruby RDoc.
4
4
 
@@ -6,13 +6,23 @@ Reports a link as broken if:
6
6
 
7
7
  - The target page given by +href+ is not found.
8
8
  - The target page is found, but the fragment given by +href+
9
- is not a link target on that page;
9
+ is not a link target (element with attribute <tt>id</tt>) on that page;
10
10
  this usually causes a browser to open at the top of the page
11
11
  instead of at the given fragment.
12
12
 
13
- Note that some browsers are forgiving, and will open the target
14
- page at a link target similar to the given fragment;
15
- for example, fragment ```#bar``` may be opened at an element
16
- with id ```foobar```.
13
+ Some browsers are forgiving, and will open the target
14
+ page at a link target similar to the given fragment;
15
+ for example, fragment ```#bar``` may be opened at an element
16
+ with id ```foobar```.
17
17
 
18
18
  See the [help text](doc/help.txt).
19
+
20
+ <b>Note</b>: An RDoc bug that was fixed recently
21
+ (PR https://github.com/ruby/rdoc/pull/1002)
22
+ caused many (make that many, many) broken links TOC section
23
+ https://docs.ruby-lang.org/en/master/table_of_contents.html#classes.
24
+ Unless you have a recent Ruby version installed (one that has the bug fix),
25
+ the RDocLinkChecker will find and report all those broken links.
26
+
27
+ <b>Workaround</b>: Use option <tt>--no_toc</tt>, which suppresses checking
28
+ for those links.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class RDocLinkChecker
4
- VERSION = "0.4.0"
4
+ VERSION = "0.6.0"
5
5
  end
@@ -13,6 +13,8 @@ class RDocLinkChecker
13
13
 
14
14
  attr_accessor :html_dirpath, :onsite_only, :no_toc
15
15
 
16
+ attr_accessor :source_paths, :pages
17
+
16
18
  def initialize(
17
19
  html_dirpath,
18
20
  onsite_only: false,
@@ -21,26 +23,25 @@ class RDocLinkChecker
21
23
  self.html_dirpath = html_dirpath
22
24
  self.onsite_only = onsite_only
23
25
  self.no_toc = no_toc
24
- @pages = {}
26
+ self.pages = {}
25
27
  @counts = {
26
28
  source_pages: 0,
27
29
  target_pages: 0,
28
30
  links_checked: 0,
29
31
  links_broken: 0,
30
32
  }
31
- @verbose = false
32
33
  end
33
34
 
34
35
  def check
35
36
  # All work is done in the HTML directory,
36
37
  # and that is where Report.htm will be put.
37
38
  Dir.chdir(html_dirpath) do |dir|
38
- @counts[:start_time] = Time.now
39
+ @counts[:start_time] = Time.new
39
40
  gather_source_paths
40
41
  create_source_pages
41
42
  create_target_pages
42
43
  verify_links
43
- @counts[:end_time] = Time.now
44
+ @counts[:end_time] = Time.new
44
45
  report
45
46
  end
46
47
  end
@@ -48,36 +49,28 @@ class RDocLinkChecker
48
49
  # Gather paths to source HTML pages.
49
50
  def gather_source_paths
50
51
  paths = []
51
- puts 'Gathering source paths' if @verbose
52
52
  paths = Find.find('.').select {|path| path.end_with?('.html') }
53
53
  # Remove leading './'.
54
- @source_paths = paths.map{|path| path.sub(%r[^\./], '')}
55
- @source_paths.delete('table_of_contents.html') if no_toc
56
- if @verbose
57
- @source_paths.each_with_index do |source_path, i|
58
- puts '- %4d %s' % [i, source_path]
59
- end
60
- end
61
- @counts[:source_pages] = @source_paths.size
62
- puts "Gathered #{@source_paths.size} source paths" if @verbose
54
+ self.source_paths = paths.map{|path| path.sub(%r[^\./], '')}
55
+ @counts[:source_pages] = source_paths.size
63
56
  end
64
57
 
65
58
  # Create a source \Page object for each source path.
66
59
  # Gather its links and ids.
67
60
  def create_source_pages
68
- puts "Creating #{@source_paths.size} source pages" if @verbose
69
- @source_paths.sort.each_with_index do |source_path, i|
70
- progress_s = RDocLinkChecker.progress_s(i + 1, @source_paths.size)
71
- puts "Creating source page #{source_path} #{progress_s}" if @verbose
72
- source_page = Page.new(source_path, @verbose, @pages, @counts, onsite_only)
73
- @pages[source_path] = source_page
61
+ source_paths.sort.each_with_index do |source_path, i|
62
+ progress_s = RDocLinkChecker.progress_s(i + 1, source_paths.size)
63
+ source_page = Page.new(:source, source_path, onsite_only, pages: pages, counts: @counts)
64
+ pages[source_path] = source_page
74
65
  source_text = File.read(source_path)
75
66
  doc = Nokogiri::HTML(source_text)
76
- source_page.gather_links(doc)
67
+ if source_path == 'table_of_contents.html'
68
+ source_page.gather_links(doc) unless no_toc
69
+ else
70
+ source_page.gather_links(doc)
71
+ end
77
72
  source_page.gather_ids(doc)
78
- puts "Created source page #{progress_s}" if @verbose
79
73
  end
80
- puts "Created #{@pages.size} source pages" if @verbose
81
74
  end
82
75
 
83
76
  # Create a target \Page object for each link
@@ -85,40 +78,30 @@ class RDocLinkChecker
85
78
  def create_target_pages
86
79
  doc = nil
87
80
  target_page_count = 0
88
- @source_paths = @pages.keys
89
- @source_paths.each do |source_path|
81
+ source_paths = pages.keys
82
+ source_paths.each do |source_path|
90
83
  # Need for relative links to work.
91
84
  dirname = File.dirname(source_path)
92
85
  Dir.chdir(dirname) do
93
- source_page = @pages[source_path]
94
- puts "Creating target pages for #{source_page.links.size} links in #{source_path}" if @verbose
86
+ source_page = pages[source_path]
95
87
  source_page.links.each_with_index do |link, i|
96
88
  next if link.path.nil?
97
- link.puts(i) if @verbose
98
89
  target_path = link.real_path
99
- if @pages[target_path]
100
- puts "Page #{target_path} already created" if @verbose
101
- target_page = @pages[target_path]
90
+ if pages[target_path]
91
+ target_page = pages[target_path]
102
92
  else
93
+ target_page_count += 1
94
+ target_page = Page.new(:target, target_path, onsite_only, pages: pages, counts: @counts)
95
+ pages[target_path] = target_page
103
96
  if File.readable?(link.path)
104
- puts "Creating target page #{target_path}" if @verbose
105
- target_page_count += 1
106
- target_page = Page.new(target_path, @verbose, @pages, @counts, onsite_only)
107
- @pages[target_path] = target_page
108
97
  target_text = File.read(link.path)
109
98
  doc = Nokogiri::HTML(target_text)
110
99
  target_page.gather_ids(doc)
111
- puts "Created target page #{target_path}" if @verbose
112
100
  elsif RDocLinkChecker.checkable?(link.path)
113
- puts "Creating target page #{target_path}" if @verbose
114
- target_page_count += 1
115
- target_page = Page.new(target_path, @verbose, @pages, @counts, onsite_only)
116
- @pages[target_path] = target_page
117
- puts "Created target page #{target_path}" if @verbose
118
101
  link.exception = fetch(link.path, target_page)
119
102
  link.valid_p = false if link.exception
120
103
  else
121
- puts "File not readable or checkable: #{target_path}" if @verbose
104
+ # File not readable or checkable.
122
105
  end
123
106
  end
124
107
  next if target_page.nil?
@@ -127,40 +110,33 @@ class RDocLinkChecker
127
110
  target_page.gather_ids(doc)
128
111
  end
129
112
  end
130
- puts "Created target pages for #{source_page.links.size} links in #{source_path}" if @verbose
131
113
  end
132
114
  end
133
- puts "Created #{target_page_count} target pages" if @verbose
134
115
  @counts[:target_pages] = target_page_count
135
116
  end
136
117
 
137
118
  # Verify that each link target exists.
138
119
  def verify_links
139
- linking_pages = @pages.select do |path, page|
120
+ linking_pages = pages.select do |path, page|
140
121
  !page.links.empty?
141
122
  end
142
- puts "Checking links on #{linking_pages.size} pages" if @verbose
143
123
  link_count = 0
144
124
  broken_count = 0
145
125
  linking_pages.each_pair do |path, page|
146
- puts "Checking #{page.links.size} links on page #{path}" if @verbose
147
126
  link_count += page.links.size
148
127
  page.links.each_with_index do |link, i|
149
128
  if link.valid_p.nil? # Don't disturb if already set to false.
150
- target_page = @pages[link.real_path]
129
+ target_page = pages[link.real_path]
151
130
  if target_page
152
131
  target_id = link.fragment
153
132
  link.valid_p = target_id.nil? || target_page.ids.include?(target_id)
154
133
  else
155
- link_valid_p = false
134
+ link.valid_p = false
156
135
  end
157
136
  end
158
- link.puts(i) if @verbose
159
137
  broken_count += 1 unless link.valid_p
160
138
  end
161
- puts "Checked #{page.links.size} links on page #{path}" if @verbose
162
139
  end
163
- puts "Checked #{link_count} links on #{linking_pages.size} pages" if @verbose
164
140
  @counts[:links_checked] = link_count
165
141
  @counts[:links_broken] = broken_count
166
142
  end
@@ -168,21 +144,16 @@ class RDocLinkChecker
168
144
  # Fetch the page from the web and gather its ids into the target page.
169
145
  # Returns exception or nil.
170
146
  def fetch(url, target_page)
171
- puts "Begin fetch target page #{url}" if @verbose
172
- puts "Getting return code for #{url}" if @verbose
173
147
  code = 0
174
148
  exception = nil
175
149
  begin
176
150
  response = Net::HTTP.get_response(URI(url))
177
151
  code = response.code.to_i
178
152
  target_page.code = code
179
- puts "Returned #{code} (#{response.class})" if @verbose
180
153
  rescue => x
181
- puts "Raised #{x.class} #{x.message}" if @verbose
182
154
  raise unless x.class.name.match(/^(Net|SocketError|IO::TimeoutError|Errno::)/)
183
155
  exception = RDocLinkChecker::HttpResponseError.new(url, x)
184
156
  end
185
- puts "Got return code #{code} for #{url} " if @verbose
186
157
  # Don't load if bad code, or no response, or if not html.
187
158
  if !code_bad?(code)
188
159
  if content_type_html?(response)
@@ -190,7 +161,6 @@ class RDocLinkChecker
190
161
  target_page.gather_ids(doc)
191
162
  end
192
163
  end
193
- puts "End fetch target page #{url}" if @verbose
194
164
  exception
195
165
  end
196
166
 
@@ -262,7 +232,7 @@ EOT
262
232
 
263
233
  add_summary(body)
264
234
  add_broken_links(body)
265
- add_offsite_links(body) unless onsite_only
235
+ # add_offsite_links(body) unless onsite_only
266
236
  report_file_path = 'Report.htm' # _Not_ .html.
267
237
  doc.write(File.new(report_file_path, 'w'), 2)
268
238
  end
@@ -282,7 +252,7 @@ EOT
282
252
  row = {sym => :label, value => :good}
283
253
  data.push(row)
284
254
  end
285
- table2(body, data, 'Parameters')
255
+ table2(body, data, 'parameters', 'Parameters')
286
256
  body.add_element(Element.new('p'))
287
257
 
288
258
  # Times table.
@@ -291,7 +261,7 @@ EOT
291
261
  minutes = (elapsed_time / 60) % 60
292
262
  hours = (elapsed_time/3600)
293
263
  elapsed_time_s = "%2.2d:%2.2d:%2.2d" % [hours, minutes, seconds]
294
- format = "%Y-%m-%d-%a-%H:%M:%S"
264
+ format = "%Y-%m-%d-%a-%H:%M:%SZ"
295
265
  start_time_s = @counts[:start_time].strftime(format)
296
266
  end_time_s = @counts[:end_time].strftime(format)
297
267
  data = [
@@ -299,7 +269,7 @@ EOT
299
269
  {'End Time' => :label, end_time_s => :good},
300
270
  {'Elapsed Time' => :label, elapsed_time_s => :good},
301
271
  ]
302
- table2(body, data, 'Times')
272
+ table2(body, data, 'times', 'Times')
303
273
  body.add_element(Element.new('p'))
304
274
 
305
275
  # Counts.
@@ -309,7 +279,7 @@ EOT
309
279
  {'Links Checked' => :label, @counts[:links_checked] => :good},
310
280
  {'Links Broken' => :label, @counts[:links_broken] => :bad},
311
281
  ]
312
- table2(body, data, 'Counts')
282
+ table2(body, data, 'counts', 'Counts')
313
283
  body.add_element(Element.new('p'))
314
284
 
315
285
  end
@@ -324,6 +294,7 @@ EOT
324
294
  return
325
295
  end
326
296
 
297
+ # Legend.
327
298
  ul = body.add_element(Element.new('ul'))
328
299
  li = ul.add_element(Element.new('li'))
329
300
  li.text = 'Href: the href of the anchor element.'
@@ -345,17 +316,23 @@ Fragment: the fragment of the link.
345
316
  If the fragment is reddish, fragment was not found.
346
317
  EOT
347
318
 
348
- @pages.each_pair do |path, page|
319
+ pages.each_pair do |path, page|
349
320
  broken_links = page.links.select {|link| !link.valid_p }
350
321
  next if broken_links.empty?
351
322
 
352
- h3 = body.add_element(Element.new('h3'))
323
+ page_div = body.add_element(Element.new('div'))
324
+ page_div.add_attribute('class', 'broken_page')
325
+ page_div.add_attribute('path', path)
326
+ page_div.add_attribute('count', broken_links.count)
327
+ h3 = page_div.add_element(Element.new('h3'))
353
328
  a = Element.new('a')
354
- a.text = path
329
+ a.text = "#{path} (#{broken_links.count})"
355
330
  a.add_attribute('href', path)
356
331
  h3.add_element(a)
357
332
 
358
333
  broken_links.each do |link|
334
+ link_div = page_div.add_element(Element.new('div'))
335
+ link_div.add_attribute('class', 'broken_link')
359
336
  data = []
360
337
  # Text, URL, fragment
361
338
  a = Element.new('a')
@@ -372,8 +349,9 @@ EOT
372
349
  data.push({'Exception' => :label, link.exception.class => :bad})
373
350
  data.push({'Message' => :label, link.exception.message => :bad})
374
351
  end
375
- table2(body, data)
376
- body.add_element(Element.new('p'))
352
+ id = link.exception ? 'bad_url' : 'bad_fragment'
353
+ table2(link_div, data, id)
354
+ page_div.add_element(Element.new('p'))
377
355
  end
378
356
  end
379
357
 
@@ -382,12 +360,14 @@ EOT
382
360
  def add_offsite_links(body)
383
361
  h2 = body.add_element(Element.new('h2'))
384
362
  h2.text = 'Off-Site Links by Source Page'
385
- @pages.each_pair do |path, page|
363
+ none = true
364
+ pages.each_pair do |path, page|
386
365
  offsite_links = page.links.select do |link|
387
366
  RDocLinkChecker.offsite?(link.href)
388
367
  end
389
368
  next if offsite_links.empty?
390
369
 
370
+ none = false
391
371
  h3 = body.add_element(Element.new('h3'))
392
372
  a = Element.new('a')
393
373
  a.text = path
@@ -407,6 +387,10 @@ EOT
407
387
  body.add_element(Element.new('p'))
408
388
  end
409
389
  end
390
+ if none
391
+ p = body.add_element(Element.new('p'))
392
+ p.text = 'None.'
393
+ end
410
394
  end
411
395
 
412
396
  Classes = {
@@ -416,9 +400,10 @@ EOT
416
400
  bad: 'data center bad',
417
401
  }
418
402
 
419
- def table2(parent, data, title = nil)
403
+ def table2(parent, data, id, title = nil)
420
404
  data = data.dup
421
405
  table = parent.add_element(Element.new('table'))
406
+ table.add_attribute('id', id)
422
407
  if title
423
408
  tr = table.add_element(Element.new('tr)'))
424
409
  th = tr.add_element(Element.new('th'))
@@ -491,19 +476,18 @@ EOT
491
476
  # Class to represent a page.
492
477
  class Page
493
478
 
494
- attr_accessor :path, :type, :verbose, :pages, :counts, :code, :links, :ids, :dirname, :onsite_only
479
+ attr_accessor :path, :type, :pages, :counts, :code, :links, :ids, :dirname, :onsite_only
495
480
 
496
481
  # Returns a new \Page object:
497
482
  #
498
483
  # - +path+: a path relative to the HTML directory (if on-site)
499
484
  # or a URL (if off-site).
500
- # - +verbose+: whether to put progress message to $stdout.
501
485
  # - +pages+: hash of path/page pairs.
502
486
  # - +counts+: hash of counts.
503
487
  #
504
- def initialize(path, verbose, pages, counts, onsite_only)
488
+ def initialize(type, path, onsite_only, pages: {}, counts: {})
505
489
  self.path = path
506
- self.verbose = verbose
490
+ self.type = type
507
491
  self.pages = pages
508
492
  self.counts = counts
509
493
  self.onsite_only = onsite_only
@@ -514,12 +498,20 @@ EOT
514
498
  self.dirname = self.dirname == '.' ? '' : dirname
515
499
  end
516
500
 
501
+ def to_h
502
+ {
503
+ path: path,
504
+ type: type,
505
+ dirname: dirname,
506
+ code: code
507
+ }
508
+ end
509
+
517
510
  # Gather links for the page:
518
511
  #
519
512
  # - +doc+: Nokogiri document to be parsed for links.
520
513
  #
521
514
  def gather_links(doc)
522
- puts 'Gathering links' if @verbose
523
515
  i = 0
524
516
  # The links are in the anchors.
525
517
  doc.search('a').each do |a|
@@ -536,10 +528,8 @@ EOT
536
528
  next if link.path.nil? || link.path.empty?
537
529
 
538
530
  links.push(link)
539
- link.puts(i) if @verbose
540
531
  i += 1
541
532
  end
542
- puts "Gathered #{i} links" if @verbose
543
533
  end
544
534
 
545
535
  # Gather ids for the page.
@@ -570,7 +560,6 @@ EOT
570
560
  # - h*
571
561
  #
572
562
  # We can add more as needed (i.e., if/when we have actual broken links).
573
- puts 'Gathering potential link targets' if @verbose
574
563
 
575
564
  # body element has 'top', which is a link target.
576
565
  body = doc.at('//body')
@@ -605,13 +594,6 @@ EOT
605
594
  ids.push(id) if id
606
595
  end
607
596
  end
608
- if @verbose
609
- ids.each_with_index do |id, i|
610
- puts '%4d %s' % [i, id]
611
- end
612
- end
613
- puts "Gathered #{ids.size} potential link targets" if @verbose
614
-
615
597
  end
616
598
 
617
599
  end
@@ -627,7 +609,6 @@ EOT
627
609
  # - +text+: attribute text from anchor element.
628
610
  # - +dirname+: directory path of the linking page.
629
611
  #
630
- # TODO: accept the anchor element, instead of its href and text.
631
612
  def initialize(href, text, dirname)
632
613
  self.href = href
633
614
  self.text = text
@@ -640,6 +621,13 @@ EOT
640
621
  self.exception = nil
641
622
  end
642
623
 
624
+ def to_h
625
+ {
626
+ href: href,
627
+ text: text,
628
+ }
629
+ end
630
+
643
631
  # Return the real (not relative) path of the link.
644
632
  def make_real_path(dirname, path)
645
633
  # Trim single dot.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rdoc_link_checker
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Burdette Lamar
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler