broken_link_finder 0.12.1 → 0.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +35 -35
- data/broken_link_finder.gemspec +2 -2
- data/exe/broken_link_finder +11 -7
- data/lib/broken_link_finder/finder.rb +8 -3
- data/lib/broken_link_finder/version.rb +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2279e54526bbf0b0a68072c060e480bc5efe3e8cd9deefd028789716c736cf77
|
|
4
|
+
data.tar.gz: 667e1e86207dd74d594e5e57411076cc21713f15f549adb5a0c904ed5633902a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3dcc51f65cfb9b869f77f5becb0861b617b2b5424ad0890ff238f18be1f8cc5d70173dc9dd3a98d18754986dc8a2c578e66c7db533fbddcf8673a1ca8c1c90dd
|
|
7
|
+
data.tar.gz: 7a4f7ee5cd856a2713bfa89f250aaf5bf0458818604cdc89ecd683af23337b2bb7b695b51b3f7ce134f3d696e9e447b2c1eaff97ca41b6f683dc85e9ee0dfb83
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
3.0
|
|
1
|
+
3.3.0
|
data/CHANGELOG.md
CHANGED
|
@@ -9,6 +9,16 @@
|
|
|
9
9
|
- ...
|
|
10
10
|
---
|
|
11
11
|
|
|
12
|
+
## v0.12.2
|
|
13
|
+
### Added
|
|
14
|
+
- Updated to Ruby 3.3 and updated production dependencies including Wgit (v0.11)
|
|
15
|
+
- Added `--js` and `--js-delay` flag options to the executable. This allows JS parsing to update a page's DOM before it get crawled.
|
|
16
|
+
### Changed/Removed
|
|
17
|
+
- ...
|
|
18
|
+
### Fixed
|
|
19
|
+
- ...
|
|
20
|
+
---
|
|
21
|
+
|
|
12
22
|
## v0.12.1
|
|
13
23
|
### Added
|
|
14
24
|
- Support for Ruby 3.
|
data/Gemfile.lock
CHANGED
|
@@ -1,66 +1,66 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
broken_link_finder (0.12.
|
|
5
|
-
thor (~>
|
|
4
|
+
broken_link_finder (0.12.2)
|
|
5
|
+
thor (~> 1.3)
|
|
6
6
|
thread (~> 0.2)
|
|
7
|
-
wgit (~> 0.
|
|
7
|
+
wgit (~> 0.11)
|
|
8
8
|
|
|
9
9
|
GEM
|
|
10
10
|
remote: https://rubygems.org/
|
|
11
11
|
specs:
|
|
12
|
-
addressable (2.8.
|
|
13
|
-
public_suffix (>= 2.0.2, <
|
|
14
|
-
bson (4.
|
|
12
|
+
addressable (2.8.6)
|
|
13
|
+
public_suffix (>= 2.0.2, < 6.0)
|
|
14
|
+
bson (4.15.0)
|
|
15
15
|
byebug (11.1.3)
|
|
16
|
-
cliver (0.3.2)
|
|
17
16
|
coderay (1.1.3)
|
|
18
|
-
concurrent-ruby (1.
|
|
17
|
+
concurrent-ruby (1.2.2)
|
|
19
18
|
crack (0.4.5)
|
|
20
19
|
rexml
|
|
21
|
-
ethon (0.
|
|
20
|
+
ethon (0.16.0)
|
|
22
21
|
ffi (>= 1.15.0)
|
|
23
|
-
ferrum (0.
|
|
22
|
+
ferrum (0.14)
|
|
24
23
|
addressable (~> 2.5)
|
|
25
|
-
cliver (~> 0.3)
|
|
26
24
|
concurrent-ruby (~> 1.1)
|
|
25
|
+
webrick (~> 1.7)
|
|
27
26
|
websocket-driver (>= 0.6, < 0.8)
|
|
28
|
-
ffi (1.
|
|
29
|
-
hashdiff (1.0
|
|
27
|
+
ffi (1.16.3)
|
|
28
|
+
hashdiff (1.1.0)
|
|
30
29
|
maxitest (3.7.0)
|
|
31
30
|
minitest (>= 5.0.0, < 5.15.0)
|
|
32
31
|
method_source (1.0.0)
|
|
33
|
-
mini_portile2 (2.
|
|
32
|
+
mini_portile2 (2.8.5)
|
|
34
33
|
minitest (5.14.4)
|
|
35
|
-
mongo (2.
|
|
36
|
-
bson (>= 4.
|
|
37
|
-
nokogiri (1.
|
|
38
|
-
mini_portile2 (~> 2.
|
|
34
|
+
mongo (2.19.3)
|
|
35
|
+
bson (>= 4.14.1, < 5.0.0)
|
|
36
|
+
nokogiri (1.16.0)
|
|
37
|
+
mini_portile2 (~> 2.8.2)
|
|
39
38
|
racc (~> 1.4)
|
|
40
|
-
pry (0.14.
|
|
39
|
+
pry (0.14.2)
|
|
41
40
|
coderay (~> 1.1)
|
|
42
41
|
method_source (~> 1.0)
|
|
43
|
-
public_suffix (
|
|
44
|
-
racc (1.
|
|
45
|
-
rake (13.0
|
|
46
|
-
rexml (3.2.
|
|
47
|
-
thor (
|
|
42
|
+
public_suffix (5.0.4)
|
|
43
|
+
racc (1.7.3)
|
|
44
|
+
rake (13.1.0)
|
|
45
|
+
rexml (3.2.6)
|
|
46
|
+
thor (1.3.0)
|
|
48
47
|
thread (0.2.2)
|
|
49
|
-
typhoeus (1.4.
|
|
48
|
+
typhoeus (1.4.1)
|
|
50
49
|
ethon (>= 0.9.0)
|
|
51
|
-
webmock (3.
|
|
50
|
+
webmock (3.19.1)
|
|
52
51
|
addressable (>= 2.8.0)
|
|
53
52
|
crack (>= 0.3.2)
|
|
54
53
|
hashdiff (>= 0.4.0, < 2.0.0)
|
|
55
|
-
|
|
54
|
+
webrick (1.8.1)
|
|
55
|
+
websocket-driver (0.7.6)
|
|
56
56
|
websocket-extensions (>= 0.1.0)
|
|
57
57
|
websocket-extensions (0.1.5)
|
|
58
|
-
wgit (0.
|
|
59
|
-
addressable (~> 2.
|
|
60
|
-
ferrum (~> 0.
|
|
61
|
-
mongo (~> 2.
|
|
62
|
-
nokogiri (~> 1.
|
|
63
|
-
typhoeus (~> 1.
|
|
58
|
+
wgit (0.11.0)
|
|
59
|
+
addressable (~> 2.8)
|
|
60
|
+
ferrum (~> 0.14)
|
|
61
|
+
mongo (~> 2.19)
|
|
62
|
+
nokogiri (~> 1.15)
|
|
63
|
+
typhoeus (~> 1.4)
|
|
64
64
|
|
|
65
65
|
PLATFORMS
|
|
66
66
|
ruby
|
|
@@ -75,7 +75,7 @@ DEPENDENCIES
|
|
|
75
75
|
webmock (~> 3.6)
|
|
76
76
|
|
|
77
77
|
RUBY VERSION
|
|
78
|
-
ruby 3.
|
|
78
|
+
ruby 3.3.0p0
|
|
79
79
|
|
|
80
80
|
BUNDLED WITH
|
|
81
|
-
2.
|
|
81
|
+
2.5.3
|
data/broken_link_finder.gemspec
CHANGED
|
@@ -47,7 +47,7 @@ Gem::Specification.new do |spec|
|
|
|
47
47
|
spec.add_development_dependency 'rake', '~> 13.0'
|
|
48
48
|
spec.add_development_dependency 'webmock', '~> 3.6'
|
|
49
49
|
|
|
50
|
-
spec.add_runtime_dependency 'thor', '~>
|
|
50
|
+
spec.add_runtime_dependency 'thor', '~> 1.3'
|
|
51
51
|
spec.add_runtime_dependency 'thread', '~> 0.2'
|
|
52
|
-
spec.add_runtime_dependency 'wgit', '~> 0.
|
|
52
|
+
spec.add_runtime_dependency 'wgit', '~> 0.11'
|
|
53
53
|
end
|
data/exe/broken_link_finder
CHANGED
|
@@ -9,7 +9,9 @@ class BrokenLinkFinderCLI < Thor
|
|
|
9
9
|
desc 'crawl [URL]', 'Find broken links at the URL'
|
|
10
10
|
option :recursive, type: :boolean, aliases: [:r], default: false, desc: 'Crawl the entire site.'
|
|
11
11
|
option :threads, type: :numeric, aliases: [:t], default: BrokenLinkFinder::DEFAULT_MAX_THREADS, desc: 'Max number of threads to use when crawling recursively; 1 thread per web page.'
|
|
12
|
-
option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH
|
|
12
|
+
option :xpath, type: :string, aliases: [:x], default: BrokenLinkFinder::DEFAULT_LINK_XPATH, desc: 'The xpath to extract links with, before checking if broken'
|
|
13
|
+
option :js, type: :boolean, default: false, desc: 'Run the Javascript on a page before crawling the HTML, requires Chrome/Chromium to be installed to $PATH'
|
|
14
|
+
option :js_delay, type: :numeric, default: 1, desc: "The seconds of delay time given to a page's Javascript for it to update the DOM, requires the --js flag"
|
|
13
15
|
option :html, type: :boolean, aliases: [:h], default: false, desc: 'Produce a HTML report (instead of text)'
|
|
14
16
|
option :sort_by_link, type: :boolean, aliases: [:l], default: false, desc: 'Makes report more concise if there are more pages crawled than broken links found. Use with -r on medium/large sites.'
|
|
15
17
|
option :verbose, type: :boolean, aliases: [:v], default: false, desc: 'Display all ignored links.'
|
|
@@ -22,15 +24,17 @@ class BrokenLinkFinderCLI < Thor
|
|
|
22
24
|
max_threads = options[:threads]
|
|
23
25
|
broken_verbose = !options[:concise]
|
|
24
26
|
ignored_verbose = options[:verbose]
|
|
27
|
+
parse_js = options[:js]
|
|
28
|
+
parse_js_delay = options[:js_delay]
|
|
25
29
|
|
|
26
30
|
BrokenLinkFinder.link_xpath = options[:xpath]
|
|
27
|
-
finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads:
|
|
31
|
+
finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads:) do |crawler|
|
|
32
|
+
crawler.parse_javascript = parse_js
|
|
33
|
+
crawler.parse_javascript_delay = parse_js_delay
|
|
34
|
+
end
|
|
35
|
+
|
|
28
36
|
options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
|
|
29
|
-
finder.report(
|
|
30
|
-
type: report_type,
|
|
31
|
-
broken_verbose: broken_verbose,
|
|
32
|
-
ignored_verbose: ignored_verbose
|
|
33
|
-
)
|
|
37
|
+
finder.report(type: report_type, broken_verbose:, ignored_verbose:)
|
|
34
38
|
|
|
35
39
|
exit 0
|
|
36
40
|
rescue StandardError => e
|
|
@@ -5,8 +5,8 @@ module BrokenLinkFinder
|
|
|
5
5
|
SERVER_WAIT_TIME = 0.5 # Used by Finder#retry_broken_links.
|
|
6
6
|
|
|
7
7
|
# Alias for BrokenLinkFinder::Finder.new.
|
|
8
|
-
def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
|
|
9
|
-
Finder.new(sort: sort, max_threads: max_threads)
|
|
8
|
+
def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
|
|
9
|
+
Finder.new(sort: sort, max_threads: max_threads, &block)
|
|
10
10
|
end
|
|
11
11
|
|
|
12
12
|
# Class responsible for finding broken links on a page or site.
|
|
@@ -17,8 +17,11 @@ module BrokenLinkFinder
|
|
|
17
17
|
# The max number of threads created during #crawl_site - one thread per page.
|
|
18
18
|
attr_reader :max_threads
|
|
19
19
|
|
|
20
|
+
# The underlying Wgit::Crawler used by this instance of Finder.
|
|
21
|
+
attr_reader :crawler
|
|
22
|
+
|
|
20
23
|
# Returns a new Finder instance.
|
|
21
|
-
def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS)
|
|
24
|
+
def initialize(sort: :page, max_threads: DEFAULT_MAX_THREADS, &block)
|
|
22
25
|
raise "Sort by either :page or :link, not #{sort}" \
|
|
23
26
|
unless %i[page link].include?(sort)
|
|
24
27
|
|
|
@@ -26,6 +29,8 @@ module BrokenLinkFinder
|
|
|
26
29
|
@max_threads = max_threads
|
|
27
30
|
@crawler = Wgit::Crawler.new
|
|
28
31
|
@manager = BrokenLinkFinder::LinkManager.new(@sort)
|
|
32
|
+
|
|
33
|
+
yield @crawler if block_given?
|
|
29
34
|
end
|
|
30
35
|
|
|
31
36
|
# Returns the current broken links.
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: broken_link_finder
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.12.
|
|
4
|
+
version: 0.12.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Michael Telford
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2024-01-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -100,14 +100,14 @@ dependencies:
|
|
|
100
100
|
requirements:
|
|
101
101
|
- - "~>"
|
|
102
102
|
- !ruby/object:Gem::Version
|
|
103
|
-
version: '
|
|
103
|
+
version: '1.3'
|
|
104
104
|
type: :runtime
|
|
105
105
|
prerelease: false
|
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
|
107
107
|
requirements:
|
|
108
108
|
- - "~>"
|
|
109
109
|
- !ruby/object:Gem::Version
|
|
110
|
-
version: '
|
|
110
|
+
version: '1.3'
|
|
111
111
|
- !ruby/object:Gem::Dependency
|
|
112
112
|
name: thread
|
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -128,14 +128,14 @@ dependencies:
|
|
|
128
128
|
requirements:
|
|
129
129
|
- - "~>"
|
|
130
130
|
- !ruby/object:Gem::Version
|
|
131
|
-
version: '0.
|
|
131
|
+
version: '0.11'
|
|
132
132
|
type: :runtime
|
|
133
133
|
prerelease: false
|
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
|
135
135
|
requirements:
|
|
136
136
|
- - "~>"
|
|
137
137
|
- !ruby/object:Gem::Version
|
|
138
|
-
version: '0.
|
|
138
|
+
version: '0.11'
|
|
139
139
|
description: Finds a website's broken links using the 'wgit' gem and reports back
|
|
140
140
|
to you with a summary.
|
|
141
141
|
email: michael.telford@live.com
|
|
@@ -194,7 +194,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
194
194
|
- !ruby/object:Gem::Version
|
|
195
195
|
version: '0'
|
|
196
196
|
requirements: []
|
|
197
|
-
rubygems_version: 3.
|
|
197
|
+
rubygems_version: 3.5.3
|
|
198
198
|
signing_key:
|
|
199
199
|
specification_version: 4
|
|
200
200
|
summary: Finds a website's broken links and reports back to you with a summary.
|