broken_link_finder 0.12.0 → 0.12.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 24ca9c7a6071b07f5ab3132c9c79c4628570c9c3e157b77a27a05cdc0578ac6e
4
- data.tar.gz: 6668eb430c8296e1439f56c242e7e08a27733605d724ec1c5cfa638dcfaa8b52
3
+ metadata.gz: 2279e54526bbf0b0a68072c060e480bc5efe3e8cd9deefd028789716c736cf77
4
+ data.tar.gz: 667e1e86207dd74d594e5e57411076cc21713f15f549adb5a0c904ed5633902a
5
5
  SHA512:
6
- metadata.gz: 1d1cdc47ade4651b8bc2df01212364ba938ee73269bf53e7278519ecd374247291c932abfa73a031973403ed55d360bc9d14b5c60ba312aca4b32837b5064294
7
- data.tar.gz: f56308da4b9d7a4a39afd43808f77d2b6f2fbbf00f17502d2d889de504bcc82ee1858fb673333b11693a65ad73a4f5fb65a97b15955443e8268b1e0ab08b4e51
6
+ metadata.gz: 3dcc51f65cfb9b869f77f5becb0861b617b2b5424ad0890ff238f18be1f8cc5d70173dc9dd3a98d18754986dc8a2c578e66c7db533fbddcf8673a1ca8c1c90dd
7
+ data.tar.gz: 7a4f7ee5cd856a2713bfa89f250aaf5bf0458818604cdc89ecd683af23337b2bb7b695b51b3f7ce134f3d696e9e447b2c1eaff97ca41b6f683dc85e9ee0dfb83
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.7.0
1
+ 3.3.0
data/CHANGELOG.md CHANGED
@@ -9,6 +9,25 @@
9
9
  - ...
10
10
  ---
11
11
 
12
+ ## v0.12.2
13
+ ### Added
14
+ - Updated to Ruby 3.3 and updated production dependencies including Wgit (v0.11)
15
+ - Added `--js` and `--js-delay` flag options to the executable. This allows JS parsing to update a page's DOM before it get crawled.
16
+ ### Changed/Removed
17
+ - ...
18
+ ### Fixed
19
+ - ...
20
+ ---
21
+
22
+ ## v0.12.1
23
+ ### Added
24
+ - Support for Ruby 3.
25
+ ### Changed/Removed
26
+ - Removed support for Ruby 2.5 (as it's too old).
27
+ ### Fixed
28
+ - ...
29
+ ---
30
+
12
31
  ## v0.12.0
13
32
  ### Added
14
33
  - `BrokenLinkFinder::link_xpath` and `link_xpath=` methods so you can customise how links are extracted from each crawled page using the API.
data/Gemfile CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- ruby '~> 2.5'
5
+ ruby '>= 2.6', '< 4'
6
6
 
7
7
  # Specify your gem's dependencies in broken_link_finder.gemspec
8
8
  gemspec
data/Gemfile.lock CHANGED
@@ -1,66 +1,66 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- broken_link_finder (0.12.0)
5
- thor (~> 0.20)
4
+ broken_link_finder (0.12.2)
5
+ thor (~> 1.3)
6
6
  thread (~> 0.2)
7
- wgit (~> 0.10)
7
+ wgit (~> 0.11)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- addressable (2.7.0)
13
- public_suffix (>= 2.0.2, < 5.0)
14
- bson (4.12.0)
12
+ addressable (2.8.6)
13
+ public_suffix (>= 2.0.2, < 6.0)
14
+ bson (4.15.0)
15
15
  byebug (11.1.3)
16
- cliver (0.3.2)
17
16
  coderay (1.1.3)
18
- concurrent-ruby (1.1.8)
17
+ concurrent-ruby (1.2.2)
19
18
  crack (0.4.5)
20
19
  rexml
21
- ethon (0.12.0)
22
- ffi (>= 1.3.0)
23
- ferrum (0.11)
20
+ ethon (0.16.0)
21
+ ffi (>= 1.15.0)
22
+ ferrum (0.14)
24
23
  addressable (~> 2.5)
25
- cliver (~> 0.3)
26
24
  concurrent-ruby (~> 1.1)
25
+ webrick (~> 1.7)
27
26
  websocket-driver (>= 0.6, < 0.8)
28
- ffi (1.15.0)
29
- hashdiff (1.0.1)
30
- maxitest (3.6.0)
31
- minitest (>= 5.0.0, < 5.14.0)
27
+ ffi (1.16.3)
28
+ hashdiff (1.1.0)
29
+ maxitest (3.7.0)
30
+ minitest (>= 5.0.0, < 5.15.0)
32
31
  method_source (1.0.0)
33
- mini_portile2 (2.5.0)
34
- minitest (5.13.0)
35
- mongo (2.14.0)
36
- bson (>= 4.8.2, < 5.0.0)
37
- nokogiri (1.11.2)
38
- mini_portile2 (~> 2.5.0)
32
+ mini_portile2 (2.8.5)
33
+ minitest (5.14.4)
34
+ mongo (2.19.3)
35
+ bson (>= 4.14.1, < 5.0.0)
36
+ nokogiri (1.16.0)
37
+ mini_portile2 (~> 2.8.2)
39
38
  racc (~> 1.4)
40
- pry (0.14.0)
39
+ pry (0.14.2)
41
40
  coderay (~> 1.1)
42
41
  method_source (~> 1.0)
43
- public_suffix (4.0.6)
44
- racc (1.5.2)
45
- rake (13.0.3)
46
- rexml (3.2.4)
47
- thor (0.20.3)
42
+ public_suffix (5.0.4)
43
+ racc (1.7.3)
44
+ rake (13.1.0)
45
+ rexml (3.2.6)
46
+ thor (1.3.0)
48
47
  thread (0.2.2)
49
- typhoeus (1.4.0)
48
+ typhoeus (1.4.1)
50
49
  ethon (>= 0.9.0)
51
- webmock (3.12.2)
52
- addressable (>= 2.3.6)
50
+ webmock (3.19.1)
51
+ addressable (>= 2.8.0)
53
52
  crack (>= 0.3.2)
54
53
  hashdiff (>= 0.4.0, < 2.0.0)
55
- websocket-driver (0.7.3)
54
+ webrick (1.8.1)
55
+ websocket-driver (0.7.6)
56
56
  websocket-extensions (>= 0.1.0)
57
57
  websocket-extensions (0.1.5)
58
- wgit (0.10.0)
59
- addressable (~> 2.6)
60
- ferrum (~> 0.8)
61
- mongo (~> 2.9)
62
- nokogiri (~> 1.10)
63
- typhoeus (~> 1.3)
58
+ wgit (0.11.0)
59
+ addressable (~> 2.8)
60
+ ferrum (~> 0.14)
61
+ mongo (~> 2.19)
62
+ nokogiri (~> 1.15)
63
+ typhoeus (~> 1.4)
64
64
 
65
65
  PLATFORMS
66
66
  ruby
@@ -75,7 +75,7 @@ DEPENDENCIES
75
75
  webmock (~> 3.6)
76
76
 
77
77
  RUBY VERSION
78
- ruby 2.7.0p0
78
+ ruby 3.3.0p0
79
79
 
80
80
  BUNDLED WITH
81
- 2.1.4
81
+ 2.5.3
data/README.md CHANGED
@@ -35,7 +35,7 @@ Only MRI Ruby is tested and supported, but `broken_link_finder` may work with ot
35
35
 
36
36
  Currently, the required MRI Ruby version is:
37
37
 
38
- `~> 2.5` (a.k.a.) `>= 2.5 && < 3`
38
+ `ruby '>= 2.6', '< 4'`
39
39
 
40
40
  ### Using Bundler
41
41
 
@@ -38,7 +38,7 @@ Gem::Specification.new do |spec|
38
38
  spec.require_paths = ['lib']
39
39
  spec.post_install_message = "Added the executable 'broken_link_finder' to $PATH"
40
40
 
41
- spec.required_ruby_version = '~> 2.5'
41
+ spec.required_ruby_version = '>= 2.6', '< 4'
42
42
 
43
43
  spec.add_development_dependency 'bundler', '~> 2.0'
44
44
  spec.add_development_dependency 'byebug', '~> 11.0'
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.add_development_dependency 'rake', '~> 13.0'
48
48
  spec.add_development_dependency 'webmock', '~> 3.6'
49
49
 
50
- spec.add_runtime_dependency 'thor', '~> 0.20'
50
+ spec.add_runtime_dependency 'thor', '~> 1.3'
51
51
  spec.add_runtime_dependency 'thread', '~> 0.2'
52
- spec.add_runtime_dependency 'wgit', '~> 0.10'
52
+ spec.add_runtime_dependency 'wgit', '~> 0.11'
53
53
  end
@@ -9,7 +9,9 @@ class BrokenLinkFinderCLI < Thor
9
9
  desc 'crawl [URL]', 'Find broken links at the URL'
10
10
  option :recursive, type: :boolean, aliases: [:r], default: false, desc: 'Crawl the entire site.'
11
11
  option :threads, type: :numeric, aliases: [:t], default: BrokenLinkFinder::DEFAULT_MAX_THREADS, desc: 'Max number of threads to use when crawling recursively; 1 thread per web page.'
12
- option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH
12
+ option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH, desc: 'The xpath to extract links with, before checking if broken'
13
+ option :js, type: :boolean, default: false, desc: 'Run the Javascript on a page before crawling the HTML, requires Chrome/Chromium to be installed to $PATH'
14
+ option :js_delay, type: :numeric, default: 1, desc: "The seconds of delay time given to a page's Javascript for it to update the DOM, requires the --js flag"
13
15
  option :html, type: :boolean, aliases: [:h], default: false, desc: 'Produce a HTML report (instead of text)'
14
16
  option :sort_by_link, type: :boolean, aliases: [:l], default: false, desc: 'Makes report more concise if there are more pages crawled than broken links found. Use with -r on medium/large sites.'
15
17
  option :verbose, type: :boolean, aliases: [:v], default: false, desc: 'Display all ignored links.'
@@ -22,15 +24,17 @@ class BrokenLinkFinderCLI < Thor
22
24
  max_threads = options[:threads]
23
25
  broken_verbose = !options[:concise]
24
26
  ignored_verbose = options[:verbose]
27
+ parse_js = options[:js]
28
+ parse_js_delay = options[:js_delay]
25
29
 
26
30
  BrokenLinkFinder.link_xpath = options[:xpath]
27
- finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads: max_threads)
31
+ finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads:) do |crawler|
32
+ crawler.parse_javascript = parse_js
33
+ crawler.parse_javascript_delay = parse_js_delay
34
+ end
35
+
28
36
  options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
29
- finder.report(
30
- type: report_type,
31
- broken_verbose: broken_verbose,
32
- ignored_verbose: ignored_verbose
33
- )
37
+ finder.report(type: report_type, broken_verbose:, ignored_verbose:)
34
38
 
35
39
  exit 0
36
40
  rescue StandardError => e
@@ -5,8 +5,8 @@ module BrokenLinkFinder
5
5
  SERVER_WAIT_TIME = 0.5 # Used by Finder#retry_broken_links.
6
6
 
7
7
  # Alias for BrokenLinkFinder::Finder.new.
8
- def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
9
- Finder.new(sort: sort, max_threads: max_threads)
8
+ def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
9
+ Finder.new(sort: sort, max_threads: max_threads, &block)
10
10
  end
11
11
 
12
12
  # Class responsible for finding broken links on a page or site.
@@ -17,8 +17,11 @@ module BrokenLinkFinder
17
17
  # The max number of threads created during #crawl_site - one thread per page.
18
18
  attr_reader :max_threads
19
19
 
20
+ # The underlying Wgit::Crawler used by this instance of Finder.
21
+ attr_reader :crawler
22
+
20
23
  # Returns a new Finder instance.
21
- def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS)
24
+ def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
22
25
  raise "Sort by either :page or :link, not #{sort}" \
23
26
  unless %i[page link].include?(sort)
24
27
 
@@ -26,6 +29,8 @@ module BrokenLinkFinder
26
29
  @max_threads = max_threads
27
30
  @crawler = Wgit::Crawler.new
28
31
  @manager = BrokenLinkFinder::LinkManager.new(@sort)
32
+
33
+ yield @crawler if block_given?
29
34
  end
30
35
 
31
36
  # Returns the current broken links.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BrokenLinkFinder
4
- VERSION = '0.12.0'
4
+ VERSION = '0.12.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: broken_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Telford
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-04-20 00:00:00.000000000 Z
11
+ date: 2024-01-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '0.20'
103
+ version: '1.3'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '0.20'
110
+ version: '1.3'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: thread
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -128,14 +128,14 @@ dependencies:
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '0.10'
131
+ version: '0.11'
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '0.10'
138
+ version: '0.11'
139
139
  description: Finds a website's broken links using the 'wgit' gem and reports back
140
140
  to you with a summary.
141
141
  email: michael.telford@live.com
@@ -182,16 +182,19 @@ require_paths:
182
182
  - lib
183
183
  required_ruby_version: !ruby/object:Gem::Requirement
184
184
  requirements:
185
- - - "~>"
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '2.6'
188
+ - - "<"
186
189
  - !ruby/object:Gem::Version
187
- version: '2.5'
190
+ version: '4'
188
191
  required_rubygems_version: !ruby/object:Gem::Requirement
189
192
  requirements:
190
193
  - - ">="
191
194
  - !ruby/object:Gem::Version
192
195
  version: '0'
193
196
  requirements: []
194
- rubygems_version: 3.1.2
197
+ rubygems_version: 3.5.3
195
198
  signing_key:
196
199
  specification_version: 4
197
200
  summary: Finds a website's broken links and reports back to you with a summary.