broken_link_finder 0.12.1 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 88b1e96f1de644a1a3c06ba7cc0ee1b53f75a3de6686b343e55028e8fa69da9f
4
- data.tar.gz: e399ca05a4b0b9b2c0644b2846fa9dc6be6acd664e1bdc58758eb9ca7a5543cd
3
+ metadata.gz: 4e60e87e2f9f1ae05d4ebe58066169e389994fba88ea3393a9dc5c3d04df20b6
4
+ data.tar.gz: 8019a1671fb811d6bf67feefb044f77be18dc04b0904b1b4e43cb258ee0d5fa3
5
5
  SHA512:
6
- metadata.gz: 57a1604358b0297b66604d1fc5a60a9d1bda05aa9bd5f6b91135ddc2aec4a6eb703c00ef4d905ac156170b190bf500481ce56cf6319f07e8b57447cca4c6a210
7
- data.tar.gz: f4b88e66c9c4fcd2bcbca2fe882abdede7c531e1d5e752a2ac986e39cf51d87714852dcb6e7e8e4870b623d54b468cc8f3ec88c253e7182c1fe89c0af91366a4
6
+ metadata.gz: 6b16be9cbbd35ea468ff879a833b8a20cc6557b6bf1f2c0c79ae7243fc8a75d0f1b2d19ff9030dc112816871b232f85cbe49e344ee0ada405062b731961912a8
7
+ data.tar.gz: cf4f43c4d5369f218e1700b941fd5a4b3681aaafc8131e1394b734b72d343b6ee743690f15c115f711ccf5e133fe91777224fd943b04ae89e46e3ca06f09e424
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.2
1
+ 3.3.0
data/CHANGELOG.md CHANGED
@@ -9,6 +9,26 @@
9
9
  - ...
10
10
  ---
11
11
 
12
+ ## v0.12.3
13
+ ### Added
14
+ - Added `BrokenLinkFinder::Finder#manager` getter method.
15
+ ### Changed/Removed
16
+ - Updated production dependencies including `wgit` (to `v0.12.0`).
17
+ - Updated `Wgit::Url`s to look like Strings when inspected.
18
+ ### Fixed
19
+ - ...
20
+ ---
21
+
22
+ ## v0.12.2
23
+ ### Added
24
+ - Updated to Ruby 3.3 and updated production dependencies including Wgit (v0.11.0)
25
+ - Added `--js` and `--js-delay` flag options to the executable. This allows JS parsing to update a page's DOM before it get crawled.
26
+ ### Changed/Removed
27
+ - ...
28
+ ### Fixed
29
+ - ...
30
+ ---
31
+
12
32
  ## v0.12.1
13
33
  ### Added
14
34
  - Support for Ruby 3.
data/Gemfile.lock CHANGED
@@ -1,66 +1,70 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- broken_link_finder (0.12.1)
5
- thor (~> 0.20)
4
+ broken_link_finder (0.12.3)
5
+ thor (~> 1.3)
6
6
  thread (~> 0.2)
7
- wgit (~> 0.10)
7
+ wgit (~> 0.12)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- addressable (2.8.0)
13
- public_suffix (>= 2.0.2, < 5.0)
14
- bson (4.12.1)
12
+ addressable (2.8.7)
13
+ public_suffix (>= 2.0.2, < 7.0)
14
+ base64 (0.2.0)
15
+ bigdecimal (3.1.8)
16
+ bson (5.0.1)
15
17
  byebug (11.1.3)
16
- cliver (0.3.2)
17
18
  coderay (1.1.3)
18
- concurrent-ruby (1.1.9)
19
- crack (0.4.5)
19
+ concurrent-ruby (1.3.4)
20
+ crack (1.0.0)
21
+ bigdecimal
20
22
  rexml
21
- ethon (0.15.0)
23
+ ethon (0.16.0)
22
24
  ffi (>= 1.15.0)
23
- ferrum (0.11)
25
+ ferrum (0.15)
24
26
  addressable (~> 2.5)
25
- cliver (~> 0.3)
26
27
  concurrent-ruby (~> 1.1)
27
- websocket-driver (>= 0.6, < 0.8)
28
- ffi (1.15.4)
29
- hashdiff (1.0.1)
28
+ webrick (~> 1.7)
29
+ websocket-driver (~> 0.7)
30
+ ffi (1.17.0)
31
+ hashdiff (1.1.1)
30
32
  maxitest (3.7.0)
31
33
  minitest (>= 5.0.0, < 5.15.0)
32
- method_source (1.0.0)
33
- mini_portile2 (2.6.1)
34
+ method_source (1.1.0)
35
+ mini_portile2 (2.8.7)
34
36
  minitest (5.14.4)
35
- mongo (2.17.0)
36
- bson (>= 4.8.2, < 5.0.0)
37
- nokogiri (1.12.5)
38
- mini_portile2 (~> 2.6.1)
37
+ mongo (2.21.0)
38
+ bson (>= 4.14.1, < 6.0.0)
39
+ nokogiri (1.16.7)
40
+ mini_portile2 (~> 2.8.2)
39
41
  racc (~> 1.4)
40
- pry (0.14.1)
42
+ pry (0.14.2)
41
43
  coderay (~> 1.1)
42
44
  method_source (~> 1.0)
43
- public_suffix (4.0.6)
44
- racc (1.6.0)
45
- rake (13.0.6)
46
- rexml (3.2.5)
47
- thor (0.20.3)
45
+ public_suffix (6.0.1)
46
+ racc (1.8.1)
47
+ rake (13.2.1)
48
+ rexml (3.3.9)
49
+ thor (1.3.2)
48
50
  thread (0.2.2)
49
- typhoeus (1.4.0)
51
+ typhoeus (1.4.1)
50
52
  ethon (>= 0.9.0)
51
- webmock (3.14.0)
53
+ webmock (3.24.0)
52
54
  addressable (>= 2.8.0)
53
55
  crack (>= 0.3.2)
54
56
  hashdiff (>= 0.4.0, < 2.0.0)
55
- websocket-driver (0.7.5)
57
+ webrick (1.8.2)
58
+ websocket-driver (0.7.6)
56
59
  websocket-extensions (>= 0.1.0)
57
60
  websocket-extensions (0.1.5)
58
- wgit (0.10.2)
59
- addressable (~> 2.6)
60
- ferrum (~> 0.8)
61
- mongo (~> 2.9)
62
- nokogiri (~> 1.10)
63
- typhoeus (~> 1.3)
61
+ wgit (0.12.0)
62
+ addressable (~> 2.8)
63
+ base64 (~> 0.2)
64
+ ferrum (~> 0.14)
65
+ mongo (~> 2.19)
66
+ nokogiri (~> 1.15)
67
+ typhoeus (~> 1.4)
64
68
 
65
69
  PLATFORMS
66
70
  ruby
@@ -75,7 +79,7 @@ DEPENDENCIES
75
79
  webmock (~> 3.6)
76
80
 
77
81
  RUBY VERSION
78
- ruby 3.0.2p107
82
+ ruby 3.3.0p0
79
83
 
80
84
  BUNDLED WITH
81
- 2.2.22
85
+ 2.5.3
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.add_development_dependency 'rake', '~> 13.0'
48
48
  spec.add_development_dependency 'webmock', '~> 3.6'
49
49
 
50
- spec.add_runtime_dependency 'thor', '~> 0.20'
50
+ spec.add_runtime_dependency 'thor', '~> 1.3'
51
51
  spec.add_runtime_dependency 'thread', '~> 0.2'
52
- spec.add_runtime_dependency 'wgit', '~> 0.10'
52
+ spec.add_runtime_dependency 'wgit', '~> 0.12'
53
53
  end
@@ -9,7 +9,9 @@ class BrokenLinkFinderCLI < Thor
9
9
  desc 'crawl [URL]', 'Find broken links at the URL'
10
10
  option :recursive, type: :boolean, aliases: [:r], default: false, desc: 'Crawl the entire site.'
11
11
  option :threads, type: :numeric, aliases: [:t], default: BrokenLinkFinder::DEFAULT_MAX_THREADS, desc: 'Max number of threads to use when crawling recursively; 1 thread per web page.'
12
- option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH
12
+ option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH, desc: 'The xpath to extract links with, before checking if broken'
13
+ option :js, type: :boolean, default: false, desc: 'Run the Javascript on a page before crawling the HTML, requires Chrome/Chromium to be installed to $PATH'
14
+ option :js_delay, type: :numeric, default: 1, desc: "The seconds of delay time given to a page's Javascript for it to update the DOM, requires the --js flag"
13
15
  option :html, type: :boolean, aliases: [:h], default: false, desc: 'Produce a HTML report (instead of text)'
14
16
  option :sort_by_link, type: :boolean, aliases: [:l], default: false, desc: 'Makes report more concise if there are more pages crawled than broken links found. Use with -r on medium/large sites.'
15
17
  option :verbose, type: :boolean, aliases: [:v], default: false, desc: 'Display all ignored links.'
@@ -22,15 +24,17 @@ class BrokenLinkFinderCLI < Thor
22
24
  max_threads = options[:threads]
23
25
  broken_verbose = !options[:concise]
24
26
  ignored_verbose = options[:verbose]
27
+ parse_js = options[:js]
28
+ parse_js_delay = options[:js_delay]
25
29
 
26
30
  BrokenLinkFinder.link_xpath = options[:xpath]
27
- finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads: max_threads)
31
+ finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads:) do |crawler|
32
+ crawler.parse_javascript = parse_js
33
+ crawler.parse_javascript_delay = parse_js_delay
34
+ end
35
+
28
36
  options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
29
- finder.report(
30
- type: report_type,
31
- broken_verbose: broken_verbose,
32
- ignored_verbose: ignored_verbose
33
- )
37
+ finder.report(type: report_type, broken_verbose:, ignored_verbose:)
34
38
 
35
39
  exit 0
36
40
  rescue StandardError => e
@@ -5,8 +5,8 @@ module BrokenLinkFinder
5
5
  SERVER_WAIT_TIME = 0.5 # Used by Finder#retry_broken_links.
6
6
 
7
7
  # Alias for BrokenLinkFinder::Finder.new.
8
- def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
9
- Finder.new(sort: sort, max_threads: max_threads)
8
+ def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
9
+ Finder.new(sort: sort, max_threads: max_threads, &block)
10
10
  end
11
11
 
12
12
  # Class responsible for finding broken links on a page or site.
@@ -17,8 +17,14 @@ module BrokenLinkFinder
17
17
  # The max number of threads created during #crawl_site - one thread per page.
18
18
  attr_reader :max_threads
19
19
 
20
+ # The underlying Wgit::Crawler used by this instance of Finder.
21
+ attr_reader :crawler
22
+
23
+ # The underlying link manager used by this instance of Finder.
24
+ attr_reader :manager
25
+
20
26
  # Returns a new Finder instance.
21
- def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS)
27
+ def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
22
28
  raise "Sort by either :page or :link, not #{sort}" \
23
29
  unless %i[page link].include?(sort)
24
30
 
@@ -26,6 +32,8 @@ module BrokenLinkFinder
26
32
  @max_threads = max_threads
27
33
  @crawler = Wgit::Crawler.new
28
34
  @manager = BrokenLinkFinder::LinkManager.new(@sort)
35
+
36
+ yield @crawler if block_given?
29
37
  end
30
38
 
31
39
  # Returns the current broken links.
@@ -56,7 +64,7 @@ module BrokenLinkFinder
56
64
  doc = @crawler.crawl(url.dup)
57
65
 
58
66
  # Ensure the given page url is valid.
59
- raise "Invalid or broken URL: #{url}" unless doc
67
+ raise "Invalid or broken URL: #{url}" if doc.empty?
60
68
 
61
69
  # Get all page links and determine which are broken.
62
70
  find_broken_links(doc)
@@ -207,7 +215,7 @@ module BrokenLinkFinder
207
215
 
208
216
  # Return if the crawled link is broken or not.
209
217
  def link_broken?(doc)
210
- doc.nil? || @crawler.last_response.not_found? || has_broken_anchor(doc)
218
+ doc.empty? || @crawler.last_response.not_found? || has_broken_anchor(doc)
211
219
  end
212
220
 
213
221
  # Returns true if the link is/contains a broken anchor/fragment.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BrokenLinkFinder
4
- VERSION = '0.12.1'
4
+ VERSION = '0.12.3'
5
5
  end
@@ -1,5 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Make Wgit::Urls look like Strings when inspected.
4
+ class Wgit::Url
5
+ def inspect
6
+ to_s.inspect
7
+ end
8
+ end
9
+
3
10
  # Define a method on each doc for recording unparsable links.
4
11
  # Unparsable links are recorded as broken links by Finder.
5
12
  class Wgit::Document
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: broken_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.12.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Telford
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-11-22 00:00:00.000000000 Z
11
+ date: 2024-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '0.20'
103
+ version: '1.3'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '0.20'
110
+ version: '1.3'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: thread
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -128,14 +128,14 @@ dependencies:
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '0.10'
131
+ version: '0.12'
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '0.10'
138
+ version: '0.12'
139
139
  description: Finds a website's broken links using the 'wgit' gem and reports back
140
140
  to you with a summary.
141
141
  email: michael.telford@live.com
@@ -194,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
194
194
  - !ruby/object:Gem::Version
195
195
  version: '0'
196
196
  requirements: []
197
- rubygems_version: 3.2.22
197
+ rubygems_version: 3.5.22
198
198
  signing_key:
199
199
  specification_version: 4
200
200
  summary: Finds a website's broken links and reports back to you with a summary.