broken_link_finder 0.12.1 → 0.12.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 88b1e96f1de644a1a3c06ba7cc0ee1b53f75a3de6686b343e55028e8fa69da9f
4
- data.tar.gz: e399ca05a4b0b9b2c0644b2846fa9dc6be6acd664e1bdc58758eb9ca7a5543cd
3
+ metadata.gz: 2279e54526bbf0b0a68072c060e480bc5efe3e8cd9deefd028789716c736cf77
4
+ data.tar.gz: 667e1e86207dd74d594e5e57411076cc21713f15f549adb5a0c904ed5633902a
5
5
  SHA512:
6
- metadata.gz: 57a1604358b0297b66604d1fc5a60a9d1bda05aa9bd5f6b91135ddc2aec4a6eb703c00ef4d905ac156170b190bf500481ce56cf6319f07e8b57447cca4c6a210
7
- data.tar.gz: f4b88e66c9c4fcd2bcbca2fe882abdede7c531e1d5e752a2ac986e39cf51d87714852dcb6e7e8e4870b623d54b468cc8f3ec88c253e7182c1fe89c0af91366a4
6
+ metadata.gz: 3dcc51f65cfb9b869f77f5becb0861b617b2b5424ad0890ff238f18be1f8cc5d70173dc9dd3a98d18754986dc8a2c578e66c7db533fbddcf8673a1ca8c1c90dd
7
+ data.tar.gz: 7a4f7ee5cd856a2713bfa89f250aaf5bf0458818604cdc89ecd683af23337b2bb7b695b51b3f7ce134f3d696e9e447b2c1eaff97ca41b6f683dc85e9ee0dfb83
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.0.2
1
+ 3.3.0
data/CHANGELOG.md CHANGED
@@ -9,6 +9,16 @@
9
9
  - ...
10
10
  ---
11
11
 
12
+ ## v0.12.2
13
+ ### Added
14
+ - Updated to Ruby 3.3 and updated production dependencies including Wgit (v0.11)
15
+ - Added `--js` and `--js-delay` flag options to the executable. This allows JS parsing to update a page's DOM before it get crawled.
16
+ ### Changed/Removed
17
+ - ...
18
+ ### Fixed
19
+ - ...
20
+ ---
21
+
12
22
  ## v0.12.1
13
23
  ### Added
14
24
  - Support for Ruby 3.
data/Gemfile.lock CHANGED
@@ -1,66 +1,66 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- broken_link_finder (0.12.1)
5
- thor (~> 0.20)
4
+ broken_link_finder (0.12.2)
5
+ thor (~> 1.3)
6
6
  thread (~> 0.2)
7
- wgit (~> 0.10)
7
+ wgit (~> 0.11)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- addressable (2.8.0)
13
- public_suffix (>= 2.0.2, < 5.0)
14
- bson (4.12.1)
12
+ addressable (2.8.6)
13
+ public_suffix (>= 2.0.2, < 6.0)
14
+ bson (4.15.0)
15
15
  byebug (11.1.3)
16
- cliver (0.3.2)
17
16
  coderay (1.1.3)
18
- concurrent-ruby (1.1.9)
17
+ concurrent-ruby (1.2.2)
19
18
  crack (0.4.5)
20
19
  rexml
21
- ethon (0.15.0)
20
+ ethon (0.16.0)
22
21
  ffi (>= 1.15.0)
23
- ferrum (0.11)
22
+ ferrum (0.14)
24
23
  addressable (~> 2.5)
25
- cliver (~> 0.3)
26
24
  concurrent-ruby (~> 1.1)
25
+ webrick (~> 1.7)
27
26
  websocket-driver (>= 0.6, < 0.8)
28
- ffi (1.15.4)
29
- hashdiff (1.0.1)
27
+ ffi (1.16.3)
28
+ hashdiff (1.1.0)
30
29
  maxitest (3.7.0)
31
30
  minitest (>= 5.0.0, < 5.15.0)
32
31
  method_source (1.0.0)
33
- mini_portile2 (2.6.1)
32
+ mini_portile2 (2.8.5)
34
33
  minitest (5.14.4)
35
- mongo (2.17.0)
36
- bson (>= 4.8.2, < 5.0.0)
37
- nokogiri (1.12.5)
38
- mini_portile2 (~> 2.6.1)
34
+ mongo (2.19.3)
35
+ bson (>= 4.14.1, < 5.0.0)
36
+ nokogiri (1.16.0)
37
+ mini_portile2 (~> 2.8.2)
39
38
  racc (~> 1.4)
40
- pry (0.14.1)
39
+ pry (0.14.2)
41
40
  coderay (~> 1.1)
42
41
  method_source (~> 1.0)
43
- public_suffix (4.0.6)
44
- racc (1.6.0)
45
- rake (13.0.6)
46
- rexml (3.2.5)
47
- thor (0.20.3)
42
+ public_suffix (5.0.4)
43
+ racc (1.7.3)
44
+ rake (13.1.0)
45
+ rexml (3.2.6)
46
+ thor (1.3.0)
48
47
  thread (0.2.2)
49
- typhoeus (1.4.0)
48
+ typhoeus (1.4.1)
50
49
  ethon (>= 0.9.0)
51
- webmock (3.14.0)
50
+ webmock (3.19.1)
52
51
  addressable (>= 2.8.0)
53
52
  crack (>= 0.3.2)
54
53
  hashdiff (>= 0.4.0, < 2.0.0)
55
- websocket-driver (0.7.5)
54
+ webrick (1.8.1)
55
+ websocket-driver (0.7.6)
56
56
  websocket-extensions (>= 0.1.0)
57
57
  websocket-extensions (0.1.5)
58
- wgit (0.10.2)
59
- addressable (~> 2.6)
60
- ferrum (~> 0.8)
61
- mongo (~> 2.9)
62
- nokogiri (~> 1.10)
63
- typhoeus (~> 1.3)
58
+ wgit (0.11.0)
59
+ addressable (~> 2.8)
60
+ ferrum (~> 0.14)
61
+ mongo (~> 2.19)
62
+ nokogiri (~> 1.15)
63
+ typhoeus (~> 1.4)
64
64
 
65
65
  PLATFORMS
66
66
  ruby
@@ -75,7 +75,7 @@ DEPENDENCIES
75
75
  webmock (~> 3.6)
76
76
 
77
77
  RUBY VERSION
78
- ruby 3.0.2p107
78
+ ruby 3.3.0p0
79
79
 
80
80
  BUNDLED WITH
81
- 2.2.22
81
+ 2.5.3
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
47
47
  spec.add_development_dependency 'rake', '~> 13.0'
48
48
  spec.add_development_dependency 'webmock', '~> 3.6'
49
49
 
50
- spec.add_runtime_dependency 'thor', '~> 0.20'
50
+ spec.add_runtime_dependency 'thor', '~> 1.3'
51
51
  spec.add_runtime_dependency 'thread', '~> 0.2'
52
- spec.add_runtime_dependency 'wgit', '~> 0.10'
52
+ spec.add_runtime_dependency 'wgit', '~> 0.11'
53
53
  end
@@ -9,7 +9,9 @@ class BrokenLinkFinderCLI < Thor
9
9
  desc 'crawl [URL]', 'Find broken links at the URL'
10
10
  option :recursive, type: :boolean, aliases: [:r], default: false, desc: 'Crawl the entire site.'
11
11
  option :threads, type: :numeric, aliases: [:t], default: BrokenLinkFinder::DEFAULT_MAX_THREADS, desc: 'Max number of threads to use when crawling recursively; 1 thread per web page.'
12
- option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH
12
+ option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH, desc: 'The xpath to extract links with, before checking if broken'
13
+ option :js, type: :boolean, default: false, desc: 'Run the Javascript on a page before crawling the HTML, requires Chrome/Chromium to be installed to $PATH'
14
+ option :js_delay, type: :numeric, default: 1, desc: "The seconds of delay time given to a page's Javascript for it to update the DOM, requires the --js flag"
13
15
  option :html, type: :boolean, aliases: [:h], default: false, desc: 'Produce a HTML report (instead of text)'
14
16
  option :sort_by_link, type: :boolean, aliases: [:l], default: false, desc: 'Makes report more concise if there are more pages crawled than broken links found. Use with -r on medium/large sites.'
15
17
  option :verbose, type: :boolean, aliases: [:v], default: false, desc: 'Display all ignored links.'
@@ -22,15 +24,17 @@ class BrokenLinkFinderCLI < Thor
22
24
  max_threads = options[:threads]
23
25
  broken_verbose = !options[:concise]
24
26
  ignored_verbose = options[:verbose]
27
+ parse_js = options[:js]
28
+ parse_js_delay = options[:js_delay]
25
29
 
26
30
  BrokenLinkFinder.link_xpath = options[:xpath]
27
- finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads: max_threads)
31
+ finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads:) do |crawler|
32
+ crawler.parse_javascript = parse_js
33
+ crawler.parse_javascript_delay = parse_js_delay
34
+ end
35
+
28
36
  options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
29
- finder.report(
30
- type: report_type,
31
- broken_verbose: broken_verbose,
32
- ignored_verbose: ignored_verbose
33
- )
37
+ finder.report(type: report_type, broken_verbose:, ignored_verbose:)
34
38
 
35
39
  exit 0
36
40
  rescue StandardError => e
@@ -5,8 +5,8 @@ module BrokenLinkFinder
5
5
  SERVER_WAIT_TIME = 0.5 # Used by Finder#retry_broken_links.
6
6
 
7
7
  # Alias for BrokenLinkFinder::Finder.new.
8
- def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
9
- Finder.new(sort: sort, max_threads: max_threads)
8
+ def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
9
+ Finder.new(sort: sort, max_threads: max_threads, &block)
10
10
  end
11
11
 
12
12
  # Class responsible for finding broken links on a page or site.
@@ -17,8 +17,11 @@ module BrokenLinkFinder
17
17
  # The max number of threads created during #crawl_site - one thread per page.
18
18
  attr_reader :max_threads
19
19
 
20
+ # The underlying Wgit::Crawler used by this instance of Finder.
21
+ attr_reader :crawler
22
+
20
23
  # Returns a new Finder instance.
21
- def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS)
24
+ def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
22
25
  raise "Sort by either :page or :link, not #{sort}" \
23
26
  unless %i[page link].include?(sort)
24
27
 
@@ -26,6 +29,8 @@ module BrokenLinkFinder
26
29
  @max_threads = max_threads
27
30
  @crawler = Wgit::Crawler.new
28
31
  @manager = BrokenLinkFinder::LinkManager.new(@sort)
32
+
33
+ yield @crawler if block_given?
29
34
  end
30
35
 
31
36
  # Returns the current broken links.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module BrokenLinkFinder
4
- VERSION = '0.12.1'
4
+ VERSION = '0.12.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: broken_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.1
4
+ version: 0.12.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Telford
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-11-22 00:00:00.000000000 Z
11
+ date: 2024-01-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: '0.20'
103
+ version: '1.3'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: '0.20'
110
+ version: '1.3'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: thread
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -128,14 +128,14 @@ dependencies:
128
128
  requirements:
129
129
  - - "~>"
130
130
  - !ruby/object:Gem::Version
131
- version: '0.10'
131
+ version: '0.11'
132
132
  type: :runtime
133
133
  prerelease: false
134
134
  version_requirements: !ruby/object:Gem::Requirement
135
135
  requirements:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
- version: '0.10'
138
+ version: '0.11'
139
139
  description: Finds a website's broken links using the 'wgit' gem and reports back
140
140
  to you with a summary.
141
141
  email: michael.telford@live.com
@@ -194,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
194
194
  - !ruby/object:Gem::Version
195
195
  version: '0'
196
196
  requirements: []
197
- rubygems_version: 3.2.22
197
+ rubygems_version: 3.5.3
198
198
  signing_key:
199
199
  specification_version: 4
200
200
  summary: Finds a website's broken links and reports back to you with a summary.