broken_link_finder 0.12.1 → 0.12.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +35 -35
- data/broken_link_finder.gemspec +2 -2
- data/exe/broken_link_finder +11 -7
- data/lib/broken_link_finder/finder.rb +8 -3
- data/lib/broken_link_finder/version.rb +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2279e54526bbf0b0a68072c060e480bc5efe3e8cd9deefd028789716c736cf77
|
4
|
+
data.tar.gz: 667e1e86207dd74d594e5e57411076cc21713f15f549adb5a0c904ed5633902a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3dcc51f65cfb9b869f77f5becb0861b617b2b5424ad0890ff238f18be1f8cc5d70173dc9dd3a98d18754986dc8a2c578e66c7db533fbddcf8673a1ca8c1c90dd
|
7
|
+
data.tar.gz: 7a4f7ee5cd856a2713bfa89f250aaf5bf0458818604cdc89ecd683af23337b2bb7b695b51b3f7ce134f3d696e9e447b2c1eaff97ca41b6f683dc85e9ee0dfb83
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.0
|
1
|
+
3.3.0
|
data/CHANGELOG.md
CHANGED
@@ -9,6 +9,16 @@
|
|
9
9
|
- ...
|
10
10
|
---
|
11
11
|
|
12
|
+
## v0.12.2
|
13
|
+
### Added
|
14
|
+
- Updated to Ruby 3.3 and updated production dependencies including Wgit (v0.11)
|
15
|
+
- Added `--js` and `--js-delay` flag options to the executable. This allows JS parsing to update a page's DOM before it get crawled.
|
16
|
+
### Changed/Removed
|
17
|
+
- ...
|
18
|
+
### Fixed
|
19
|
+
- ...
|
20
|
+
---
|
21
|
+
|
12
22
|
## v0.12.1
|
13
23
|
### Added
|
14
24
|
- Support for Ruby 3.
|
data/Gemfile.lock
CHANGED
@@ -1,66 +1,66 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.12.
|
5
|
-
thor (~>
|
4
|
+
broken_link_finder (0.12.2)
|
5
|
+
thor (~> 1.3)
|
6
6
|
thread (~> 0.2)
|
7
|
-
wgit (~> 0.
|
7
|
+
wgit (~> 0.11)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
|
-
addressable (2.8.
|
13
|
-
public_suffix (>= 2.0.2, <
|
14
|
-
bson (4.
|
12
|
+
addressable (2.8.6)
|
13
|
+
public_suffix (>= 2.0.2, < 6.0)
|
14
|
+
bson (4.15.0)
|
15
15
|
byebug (11.1.3)
|
16
|
-
cliver (0.3.2)
|
17
16
|
coderay (1.1.3)
|
18
|
-
concurrent-ruby (1.
|
17
|
+
concurrent-ruby (1.2.2)
|
19
18
|
crack (0.4.5)
|
20
19
|
rexml
|
21
|
-
ethon (0.
|
20
|
+
ethon (0.16.0)
|
22
21
|
ffi (>= 1.15.0)
|
23
|
-
ferrum (0.
|
22
|
+
ferrum (0.14)
|
24
23
|
addressable (~> 2.5)
|
25
|
-
cliver (~> 0.3)
|
26
24
|
concurrent-ruby (~> 1.1)
|
25
|
+
webrick (~> 1.7)
|
27
26
|
websocket-driver (>= 0.6, < 0.8)
|
28
|
-
ffi (1.
|
29
|
-
hashdiff (1.0
|
27
|
+
ffi (1.16.3)
|
28
|
+
hashdiff (1.1.0)
|
30
29
|
maxitest (3.7.0)
|
31
30
|
minitest (>= 5.0.0, < 5.15.0)
|
32
31
|
method_source (1.0.0)
|
33
|
-
mini_portile2 (2.
|
32
|
+
mini_portile2 (2.8.5)
|
34
33
|
minitest (5.14.4)
|
35
|
-
mongo (2.
|
36
|
-
bson (>= 4.
|
37
|
-
nokogiri (1.
|
38
|
-
mini_portile2 (~> 2.
|
34
|
+
mongo (2.19.3)
|
35
|
+
bson (>= 4.14.1, < 5.0.0)
|
36
|
+
nokogiri (1.16.0)
|
37
|
+
mini_portile2 (~> 2.8.2)
|
39
38
|
racc (~> 1.4)
|
40
|
-
pry (0.14.
|
39
|
+
pry (0.14.2)
|
41
40
|
coderay (~> 1.1)
|
42
41
|
method_source (~> 1.0)
|
43
|
-
public_suffix (
|
44
|
-
racc (1.
|
45
|
-
rake (13.0
|
46
|
-
rexml (3.2.
|
47
|
-
thor (
|
42
|
+
public_suffix (5.0.4)
|
43
|
+
racc (1.7.3)
|
44
|
+
rake (13.1.0)
|
45
|
+
rexml (3.2.6)
|
46
|
+
thor (1.3.0)
|
48
47
|
thread (0.2.2)
|
49
|
-
typhoeus (1.4.
|
48
|
+
typhoeus (1.4.1)
|
50
49
|
ethon (>= 0.9.0)
|
51
|
-
webmock (3.
|
50
|
+
webmock (3.19.1)
|
52
51
|
addressable (>= 2.8.0)
|
53
52
|
crack (>= 0.3.2)
|
54
53
|
hashdiff (>= 0.4.0, < 2.0.0)
|
55
|
-
|
54
|
+
webrick (1.8.1)
|
55
|
+
websocket-driver (0.7.6)
|
56
56
|
websocket-extensions (>= 0.1.0)
|
57
57
|
websocket-extensions (0.1.5)
|
58
|
-
wgit (0.
|
59
|
-
addressable (~> 2.
|
60
|
-
ferrum (~> 0.
|
61
|
-
mongo (~> 2.
|
62
|
-
nokogiri (~> 1.
|
63
|
-
typhoeus (~> 1.
|
58
|
+
wgit (0.11.0)
|
59
|
+
addressable (~> 2.8)
|
60
|
+
ferrum (~> 0.14)
|
61
|
+
mongo (~> 2.19)
|
62
|
+
nokogiri (~> 1.15)
|
63
|
+
typhoeus (~> 1.4)
|
64
64
|
|
65
65
|
PLATFORMS
|
66
66
|
ruby
|
@@ -75,7 +75,7 @@ DEPENDENCIES
|
|
75
75
|
webmock (~> 3.6)
|
76
76
|
|
77
77
|
RUBY VERSION
|
78
|
-
ruby 3.
|
78
|
+
ruby 3.3.0p0
|
79
79
|
|
80
80
|
BUNDLED WITH
|
81
|
-
2.
|
81
|
+
2.5.3
|
data/broken_link_finder.gemspec
CHANGED
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
|
|
47
47
|
spec.add_development_dependency 'rake', '~> 13.0'
|
48
48
|
spec.add_development_dependency 'webmock', '~> 3.6'
|
49
49
|
|
50
|
-
spec.add_runtime_dependency 'thor', '~>
|
50
|
+
spec.add_runtime_dependency 'thor', '~> 1.3'
|
51
51
|
spec.add_runtime_dependency 'thread', '~> 0.2'
|
52
|
-
spec.add_runtime_dependency 'wgit', '~> 0.
|
52
|
+
spec.add_runtime_dependency 'wgit', '~> 0.11'
|
53
53
|
end
|
data/exe/broken_link_finder
CHANGED
@@ -9,7 +9,9 @@ class BrokenLinkFinderCLI < Thor
|
|
9
9
|
desc 'crawl [URL]', 'Find broken links at the URL'
|
10
10
|
option :recursive, type: :boolean, aliases: [:r], default: false, desc: 'Crawl the entire site.'
|
11
11
|
option :threads, type: :numeric, aliases: [:t], default: BrokenLinkFinder::DEFAULT_MAX_THREADS, desc: 'Max number of threads to use when crawling recursively; 1 thread per web page.'
|
12
|
-
option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH
|
12
|
+
option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH, desc: 'The xpath to extract links with, before checking if broken'
|
13
|
+
option :js, type: :boolean, default: false, desc: 'Run the Javascript on a page before crawling the HTML, requires Chrome/Chromium to be installed to $PATH'
|
14
|
+
option :js_delay, type: :numeric, default: 1, desc: "The seconds of delay time given to a page's Javascript for it to update the DOM, requires the --js flag"
|
13
15
|
option :html, type: :boolean, aliases: [:h], default: false, desc: 'Produce a HTML report (instead of text)'
|
14
16
|
option :sort_by_link, type: :boolean, aliases: [:l], default: false, desc: 'Makes report more concise if there are more pages crawled than broken links found. Use with -r on medium/large sites.'
|
15
17
|
option :verbose, type: :boolean, aliases: [:v], default: false, desc: 'Display all ignored links.'
|
@@ -22,15 +24,17 @@ class BrokenLinkFinderCLI < Thor
|
|
22
24
|
max_threads = options[:threads]
|
23
25
|
broken_verbose = !options[:concise]
|
24
26
|
ignored_verbose = options[:verbose]
|
27
|
+
parse_js = options[:js]
|
28
|
+
parse_js_delay = options[:js_delay]
|
25
29
|
|
26
30
|
BrokenLinkFinder.link_xpath = options[:xpath]
|
27
|
-
finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads:
|
31
|
+
finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads:) do |crawler|
|
32
|
+
crawler.parse_javascript = parse_js
|
33
|
+
crawler.parse_javascript_delay = parse_js_delay
|
34
|
+
end
|
35
|
+
|
28
36
|
options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
|
29
|
-
finder.report(
|
30
|
-
type: report_type,
|
31
|
-
broken_verbose: broken_verbose,
|
32
|
-
ignored_verbose: ignored_verbose
|
33
|
-
)
|
37
|
+
finder.report(type: report_type, broken_verbose:, ignored_verbose:)
|
34
38
|
|
35
39
|
exit 0
|
36
40
|
rescue StandardError => e
|
@@ -5,8 +5,8 @@ module BrokenLinkFinder
|
|
5
5
|
SERVER_WAIT_TIME = 0.5 # Used by Finder#retry_broken_links.
|
6
6
|
|
7
7
|
# Alias for BrokenLinkFinder::Finder.new.
|
8
|
-
def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
|
9
|
-
Finder.new(sort: sort, max_threads: max_threads)
|
8
|
+
def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
|
9
|
+
Finder.new(sort: sort, max_threads: max_threads, &block)
|
10
10
|
end
|
11
11
|
|
12
12
|
# Class responsible for finding broken links on a page or site.
|
@@ -17,8 +17,11 @@ module BrokenLinkFinder
|
|
17
17
|
# The max number of threads created during #crawl_site - one thread per page.
|
18
18
|
attr_reader :max_threads
|
19
19
|
|
20
|
+
# The underlying Wgit::Crawler used by this instance of Finder.
|
21
|
+
attr_reader :crawler
|
22
|
+
|
20
23
|
# Returns a new Finder instance.
|
21
|
-
def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS)
|
24
|
+
def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
|
22
25
|
raise "Sort by either :page or :link, not #{sort}" \
|
23
26
|
unless %i[page link].include?(sort)
|
24
27
|
|
@@ -26,6 +29,8 @@ module BrokenLinkFinder
|
|
26
29
|
@max_threads = max_threads
|
27
30
|
@crawler = Wgit::Crawler.new
|
28
31
|
@manager = BrokenLinkFinder::LinkManager.new(@sort)
|
32
|
+
|
33
|
+
yield @crawler if block_given?
|
29
34
|
end
|
30
35
|
|
31
36
|
# Returns the current broken links.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
103
|
+
version: '1.3'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
110
|
+
version: '1.3'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: thread
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,14 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: '0.
|
131
|
+
version: '0.11'
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: '0.
|
138
|
+
version: '0.11'
|
139
139
|
description: Finds a website's broken links using the 'wgit' gem and reports back
|
140
140
|
to you with a summary.
|
141
141
|
email: michael.telford@live.com
|
@@ -194,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
194
194
|
- !ruby/object:Gem::Version
|
195
195
|
version: '0'
|
196
196
|
requirements: []
|
197
|
-
rubygems_version: 3.
|
197
|
+
rubygems_version: 3.5.3
|
198
198
|
signing_key:
|
199
199
|
specification_version: 4
|
200
200
|
summary: Finds a website's broken links and reports back to you with a summary.
|