broken_link_finder 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 88b1e96f1de644a1a3c06ba7cc0ee1b53f75a3de6686b343e55028e8fa69da9f
4
- data.tar.gz: e399ca05a4b0b9b2c0644b2846fa9dc6be6acd664e1bdc58758eb9ca7a5543cd
3
+ metadata.gz: 2279e54526bbf0b0a68072c060e480bc5efe3e8cd9deefd028789716c736cf77
4
+ data.tar.gz: 667e1e86207dd74d594e5e57411076cc21713f15f549adb5a0c904ed5633902a
5
5
  SHA512:
6
- metadata.gz: 57a1604358b0297b66604d1fc5a60a9d1bda05aa9bd5f6b91135ddc2aec4a6eb703c00ef4d905ac156170b190bf500481ce56cf6319f07e8b57447cca4c6a210
7
- data.tar.gz: f4b88e66c9c4fcd2bcbca2fe882abdede7c531e1d5e752a2ac986e39cf51d87714852dcb6e7e8e4870b623d54b468cc8f3ec88c253e7182c1fe89c0af91366a4
6
+ metadata.gz: 3dcc51f65cfb9b869f77f5becb0861b617b2b5424ad0890ff238f18be1f8cc5d70173dc9dd3a98d18754986dc8a2c578e66c7db533fbddcf8673a1ca8c1c90dd
7
+ data.tar.gz: 7a4f7ee5cd856a2713bfa89f250aaf5bf0458818604cdc89ecd683af23337b2bb7b695b51b3f7ce134f3d696e9e447b2c1eaff97ca41b6f683dc85e9ee0dfb83
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.2
1
+ 3.3.0
data/CHANGELOG.md CHANGED
@@ -9,6 +9,16 @@
9
9
  - ...
10
10
  ---
11
11
 
12
+ ## v0.12.2
13
+ ### Added
14
+ - Updated to Ruby 3.3 and updated production dependencies including Wgit (v0.11)
15
+ - Added `--js` and `--js-delay` flag options to the executable. This allows JS parsing to update a page's DOM before it get crawled.
16
+ ### Changed/Removed
17
+ - ...
18
+ ### Fixed
19
+ - ...
20
+ ---
21
+
12
22
  ## v0.12.1
13
23
  ### Added
14
24
  - Support for Ruby 3.
data/Gemfile.lock CHANGED
@@ -1,66 +1,66 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- broken_link_finder (0.12.1)
5
- thor (~> 0.20)
4
+ broken_link_finder (0.12.2)
5
+ thor (~> 1.3)
6
6
  thread (~> 0.2)
7
- wgit (~> 0.10)
7
+ wgit (~> 0.11)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- addressable (2.8.0)
13
- public_suffix (>= 2.0.2, < 5.0)
14
- bson (4.12.1)
12
+ addressable (2.8.6)
13
+ public_suffix (>= 2.0.2, < 6.0)
14
+ bson (4.15.0)
15
15
  byebug (11.1.3)
16
- cliver (0.3.2)
17
16
  coderay (1.1.3)
18
- concurrent-ruby (1.1.9)
17
+ concurrent-ruby (1.2.2)
19
18
  crack (0.4.5)
20
19
  rexml
21
- ethon (0.15.0)
20
+ ethon (0.16.0)
22
21
  ffi (>= 1.15.0)
23
- ferrum (0.11)
22
+ ferrum (0.14)
24
23
  addressable (~> 2.5)
25
- cliver (~> 0.3)
26
24
  concurrent-ruby (~> 1.1)
25
+ webrick (~> 1.7)
27
26
  websocket-driver (>= 0.6, < 0.8)
28
- ffi (1.15.4)
29
- hashdiff (1.0.1)
27
+ ffi (1.16.3)
28
+ hashdiff (1.1.0)
30
29
  maxitest (3.7.0)
31
30
  minitest (>= 5.0.0, < 5.15.0)
32
31
  method_source (1.0.0)
33
- mini_portile2 (2.6.1)
32
+ mini_portile2 (2.8.5)
34
33
  minitest (5.14.4)
35
- mongo (2.17.0)
36
- bson (>= 4.8.2, < 5.0.0)
37
- nokogiri (1.12.5)
38
- mini_portile2 (~> 2.6.1)
34
+ mongo (2.19.3)
35
+ bson (>= 4.14.1, < 5.0.0)
36
+ nokogiri (1.16.0)
37
+ mini_portile2 (~> 2.8.2)
39
38
  racc (~> 1.4)
40
- pry (0.14.1)
39
+ pry (0.14.2)
41
40
  coderay (~> 1.1)
42
41
  method_source (~> 1.0)
43
- public_suffix (4.0.6)
44
- racc (1.6.0)
45
- rake (13.0.6)
46
- rexml (3.2.5)
47
- thor (0.20.3)
42
+ public_suffix (5.0.4)
43
+ racc (1.7.3)
44
+ rake (13.1.0)
45
+ rexml (3.2.6)
46
+ thor (1.3.0)
48
47
  thread (0.2.2)
49
- typhoeus (1.4.0)
48
+ typhoeus (1.4.1)
50
49
  ethon (>= 0.9.0)
51
- webmock (3.14.0)
50
+ webmock (3.19.1)
52
51
  addressable (>= 2.8.0)
53
52
  crack (>= 0.3.2)
54
53
  hashdiff (>= 0.4.0, < 2.0.0)
55
- websocket-driver (0.7.5)
54
+ webrick (1.8.1)
55
+ websocket-driver (0.7.6)
56
56
  websocket-extensions (>= 0.1.0)
57
57
  websocket-extensions (0.1.5)
58
- wgit (0.10.2)
59
- addressable (~> 2.6)
60
- ferrum (~> 0.8)
61
- mongo (~> 2.9)
62
- nokogiri (~> 1.10)
63
- typhoeus (~> 1.3)
58
+ wgit (0.11.0)
59
+ addressable (~> 2.8)
60
+ ferrum (~> 0.14)
61
+ mongo (~> 2.19)
62
+ nokogiri (~> 1.15)
63
+ typhoeus (~> 1.4)
64
64
 
65
65
  PLATFORMS
66
66
  ruby
@@ -75,7 +75,7 @@ DEPENDENCIES
75
75
  webmock (~> 3.6)
76
76
 
77
77
  RUBY VERSION
78
- ruby 3.0.2p107
78
+ ruby 3.3.0p0
79
79
 
80
80
  BUNDLED WITH
81
- 2.2.22
81
+ 2.5.3
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.add_development_dependency 'rake', '~> 13.0'
48
48
  spec.add_development_dependency 'webmock', '~> 3.6'
49
49
 
50
- spec.add_runtime_dependency 'thor', '~> 0.20'
50
+ spec.add_runtime_dependency 'thor', '~> 1.3'
51
51
  spec.add_runtime_dependency 'thread', '~> 0.2'
52
- spec.add_runtime_dependency 'wgit', '~> 0.10'
52
+ spec.add_runtime_dependency 'wgit', '~> 0.11'
53
53
  end
@@ -9,7 +9,9 @@ class BrokenLinkFinderCLI < Thor
9
9
  desc 'crawl [URL]', 'Find broken links at the URL'
10
10
  option :recursive, type: :boolean, aliases: [:r], default: false, desc: 'Crawl the entire site.'
11
11
  option :threads, type: :numeric, aliases: [:t], default: BrokenLinkFinder::DEFAULT_MAX_THREADS, desc: 'Max number of threads to use when crawling recursively; 1 thread per web page.'
12
- option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH
12
+ option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH, desc: 'The xpath to extract links with, before checking if broken'
13
+ option :js, type: :boolean, default: false, desc: 'Run the Javascript on a page before crawling the HTML, requires Chrome/Chromium to be installed to $PATH'
14
+ option :js_delay, type: :numeric, default: 1, desc: "The seconds of delay time given to a page's Javascript for it to update the DOM, requires the --js flag"
13
15
  option :html, type: :boolean, aliases: [:h], default: false, desc: 'Produce a HTML report (instead of text)'
14
16
  option :sort_by_link, type: :boolean, aliases: [:l], default: false, desc: 'Makes report more concise if there are more pages crawled than broken links found. Use with -r on medium/large sites.'
15
17
  option :verbose, type: :boolean, aliases: [:v], default: false, desc: 'Display all ignored links.'
@@ -22,15 +24,17 @@ class BrokenLinkFinderCLI < Thor
22
24
  max_threads = options[:threads]
23
25
  broken_verbose = !options[:concise]
24
26
  ignored_verbose = options[:verbose]
27
+ parse_js = options[:js]
28
+ parse_js_delay = options[:js_delay]
25
29
 
26
30
  BrokenLinkFinder.link_xpath = options[:xpath]
27
- finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads: max_threads)
31
+ finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads:) do |crawler|
32
+ crawler.parse_javascript = parse_js
33
+ crawler.parse_javascript_delay = parse_js_delay
34
+ end
35
+
28
36
  options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
29
- finder.report(
30
- type: report_type,
31
- broken_verbose: broken_verbose,
32
- ignored_verbose: ignored_verbose
33
- )
37
+ finder.report(type: report_type, broken_verbose:, ignored_verbose:)
34
38
 
35
39
  exit 0
36
40
  rescue StandardError => e
@@ -5,8 +5,8 @@ module BrokenLinkFinder
5
5
  SERVER_WAIT_TIME = 0.5 # Used by Finder#retry_broken_links.
6
6
 
7
7
  # Alias for BrokenLinkFinder::Finder.new.
8
- def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
9
- Finder.new(sort: sort, max_threads: max_threads)
8
+ def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
9
+ Finder.new(sort: sort, max_threads: max_threads, &block)
10
10
  end
11
11
 
12
12
  # Class responsible for finding broken links on a page or site.
@@ -17,8 +17,11 @@ module BrokenLinkFinder
17
17
  # The max number of threads created during #crawl_site - one thread per page.
18
18
  attr_reader :max_threads
19
19
 
20
+ # The underlying Wgit::Crawler used by this instance of Finder.
21
+ attr_reader :crawler
22
+
20
23
  # Returns a new Finder instance.
21
- def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS)
24
+ def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
22
25
  raise "Sort by either :page or :link, not #{sort}" \
23
26
  unless %i[page link].include?(sort)
24
27
 
@@ -26,6 +29,8 @@ module BrokenLinkFinder
26
29
  @max_threads = max_threads
27
30
  @crawler = Wgit::Crawler.new
28
31
  @manager = BrokenLinkFinder::LinkManager.new(@sort)
32
+
33
+ yield @crawler if block_given?
29
34
  end
30
35
 
31
36
  # Returns the current broken links.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BrokenLinkFinder
4
- VERSION = '0.12.1'
4
+ VERSION = '0.12.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: broken_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Telford
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-11-22 00:00:00.000000000 Z
11
+ date: 2024-01-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '0.20'
103
+ version: '1.3'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '0.20'
110
+ version: '1.3'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: thread
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -128,14 +128,14 @@ dependencies:
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '0.10'
131
+ version: '0.11'
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '0.10'
138
+ version: '0.11'
139
139
  description: Finds a website's broken links using the 'wgit' gem and reports back
140
140
  to you with a summary.
141
141
  email: michael.telford@live.com
@@ -194,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
194
194
  - !ruby/object:Gem::Version
195
195
  version: '0'
196
196
  requirements: []
197
- rubygems_version: 3.2.22
197
+ rubygems_version: 3.5.3
198
198
  signing_key:
199
199
  specification_version: 4
200
200
  summary: Finds a website's broken links and reports back to you with a summary.