broken_link_finder 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +3 -3
- data/README.md +2 -1
- data/Rakefile +13 -16
- data/benchmark.rb +2 -0
- data/bin/console +7 -7
- data/broken_link_finder.gemspec +28 -27
- data/exe/broken_link_finder +8 -7
- data/lib/broken_link_finder/finder.rb +36 -38
- data/lib/broken_link_finder/reporter.rb +12 -10
- data/lib/broken_link_finder/version.rb +3 -1
- data/lib/broken_link_finder/wgit_extensions.rb +5 -15
- data/lib/broken_link_finder.rb +9 -4
- data/load.rb +4 -2
- metadata +29 -30
- data/.travis.yml +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 715744f5d7676d5d4ba2cddb80f0f8109f79a7f92689c3ff3088a52f307f5f1f
|
4
|
+
data.tar.gz: 7026f6037f0d710d8dab3bc710ddf7b202594c25ac8a8522398e62af3f4e78dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7844d0c6d2c39752a98dcb9a7eb455f6492012321be71ff40f49641b7adc3658f4f541a35afc9ca1a9d6ce330472c4f166db0895acc1e1d6ecad53f86af7f0ac
|
7
|
+
data.tar.gz: f5224db527d4636e9006ea332813d9cd133ea221506aa7a45edecc6fd230f212e21f4db8d1757a070c753876d6b003329a381b0f9a48663c966318b9fb2d1c86
|
data/CHANGELOG.md
CHANGED
@@ -9,6 +9,16 @@
|
|
9
9
|
- ...
|
10
10
|
---
|
11
11
|
|
12
|
+
## v0.9.1
|
13
|
+
### Added
|
14
|
+
- `BrokenLinkFinder::Finder.crawl_site` alias: `crawl_r`.
|
15
|
+
### Changed/Removed
|
16
|
+
- Upgraded `wgit` to v0.2.0.
|
17
|
+
- Refactored the code base (no breaking changes).
|
18
|
+
### Fixed
|
19
|
+
- ...
|
20
|
+
---
|
21
|
+
|
12
22
|
## v0.9.0
|
13
23
|
### Added
|
14
24
|
- The `version` command to the executable.
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.9.
|
4
|
+
broken_link_finder (0.9.1)
|
5
5
|
thor (= 0.20.3)
|
6
6
|
thread (= 0.2)
|
7
|
-
wgit (= 0.0
|
7
|
+
wgit (= 0.2.0)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
@@ -42,7 +42,7 @@ GEM
|
|
42
42
|
addressable (>= 2.3.6)
|
43
43
|
crack (>= 0.3.2)
|
44
44
|
hashdiff
|
45
|
-
wgit (0.0
|
45
|
+
wgit (0.2.0)
|
46
46
|
addressable (~> 2.6.0)
|
47
47
|
mongo (~> 2.9.0)
|
48
48
|
nokogiri (~> 1.10.3)
|
data/README.md
CHANGED
@@ -122,7 +122,8 @@ The gem is available as open source under the terms of the [MIT License](http://
|
|
122
122
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
123
123
|
|
124
124
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new gem version:
|
125
|
-
- Update the version number in `version.rb` and add version to the `CHANGELOG`
|
125
|
+
- Update the version number in `version.rb` and add the new version to the `CHANGELOG`
|
126
126
|
- Run `bundle install`
|
127
127
|
- Run `bundle exec rake test` ensuring all tests pass
|
128
|
+
- Run `bundle exec rake compile` ensuring no warnings
|
128
129
|
- Run `bundle exec rake release[origin]`
|
data/Rakefile
CHANGED
@@ -1,33 +1,30 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
require 'rake/testtask'
|
3
5
|
|
4
6
|
Rake::TestTask.new(:test) do |t|
|
5
|
-
t.libs <<
|
6
|
-
t.libs <<
|
7
|
+
t.libs << 'test'
|
8
|
+
t.libs << 'lib'
|
7
9
|
t.test_files = FileList['test/**/*_test.rb']
|
8
10
|
end
|
9
11
|
|
10
|
-
desc
|
12
|
+
desc 'Print help information'
|
11
13
|
task default: :help
|
12
14
|
|
13
|
-
desc
|
15
|
+
desc 'Print help information'
|
14
16
|
task :help do
|
15
|
-
system
|
16
|
-
end
|
17
|
-
|
18
|
-
desc "Run the setup script"
|
19
|
-
task :setup do
|
20
|
-
system "./bin/setup"
|
17
|
+
system 'bundle exec rake -D'
|
21
18
|
end
|
22
19
|
|
23
|
-
desc
|
20
|
+
desc 'Run the development console'
|
24
21
|
task :console do
|
25
|
-
system
|
22
|
+
system './bin/console'
|
26
23
|
end
|
27
24
|
|
28
|
-
desc
|
25
|
+
desc 'Compile all project Ruby files with warnings.'
|
29
26
|
task :compile do
|
30
|
-
paths = Dir[
|
27
|
+
paths = Dir['**/*.rb', '**/*.gemspec', 'exe/broken_link_finder']
|
31
28
|
paths.each do |f|
|
32
29
|
puts "\nCompiling #{f}..."
|
33
30
|
puts `ruby -cw #{f}`
|
data/benchmark.rb
CHANGED
data/bin/console
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require "broken_link_finder"
|
7
|
-
require 'wgit/core_ext'
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'pry'
|
6
|
+
require 'byebug'
|
8
7
|
require 'logger'
|
9
8
|
require 'httplog'
|
9
|
+
require 'broken_link_finder'
|
10
10
|
|
11
11
|
logger = Logger.new(STDOUT)
|
12
|
-
logger.formatter = proc do |
|
12
|
+
logger.formatter = proc do |_severity, _datetime, _progname, msg|
|
13
13
|
"#{msg}\n"
|
14
14
|
end
|
15
15
|
|
@@ -43,7 +43,7 @@ end
|
|
43
43
|
# You can add fixtures and/or initialization code here...
|
44
44
|
reload
|
45
45
|
|
46
|
-
url =
|
46
|
+
url = 'http://txti.es/'
|
47
47
|
by_page = Finder.new
|
48
48
|
by_link = Finder.new sort: :link
|
49
49
|
finder = by_page
|
data/broken_link_finder.gemspec
CHANGED
@@ -1,51 +1,52 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'broken_link_finder/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'broken_link_finder'
|
8
9
|
spec.version = BrokenLinkFinder::VERSION
|
9
|
-
spec.author =
|
10
|
-
spec.email =
|
10
|
+
spec.author = 'Michael Telford'
|
11
|
+
spec.email = 'michael.telford@live.com'
|
11
12
|
|
12
13
|
spec.summary = "Finds a website's broken links and reports back to you with a summary."
|
13
14
|
spec.description = "Finds a website's broken links using the 'wgit' gem and reports back to you with a summary."
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
15
|
+
spec.homepage = 'https://github.com/michaeltelford/broken-link-finder'
|
16
|
+
spec.license = 'MIT'
|
16
17
|
spec.metadata = {
|
17
|
-
|
18
|
+
'source_code_uri' => 'https://github.com/michaeltelford/broken-link-finder'
|
18
19
|
}
|
19
20
|
|
20
21
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
21
22
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
22
23
|
if spec.respond_to?(:metadata)
|
23
|
-
spec.metadata['allowed_push_host'] =
|
24
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
24
25
|
else
|
25
|
-
raise
|
26
|
-
|
26
|
+
raise 'RubyGems 2.0 or newer is required to protect against ' \
|
27
|
+
'public gem pushes.'
|
27
28
|
end
|
28
29
|
|
29
|
-
spec.files
|
30
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
30
31
|
f.match(%r{^(test|spec|features)/})
|
31
32
|
end
|
32
|
-
spec.bindir =
|
33
|
-
spec.executables = [
|
34
|
-
spec.require_paths = [
|
33
|
+
spec.bindir = 'exe'
|
34
|
+
spec.executables = ['broken_link_finder']
|
35
|
+
spec.require_paths = ['lib']
|
35
36
|
spec.post_install_message = "Added the executable 'broken_link_finder' to $PATH"
|
36
37
|
|
37
38
|
spec.required_ruby_version = '~> 2.5'
|
38
39
|
|
39
|
-
spec.add_development_dependency
|
40
|
-
spec.add_development_dependency
|
41
|
-
spec.add_development_dependency
|
42
|
-
spec.add_development_dependency
|
43
|
-
spec.add_development_dependency
|
44
|
-
spec.add_development_dependency
|
45
|
-
spec.add_development_dependency
|
46
|
-
spec.add_development_dependency
|
47
|
-
|
48
|
-
spec.add_runtime_dependency
|
49
|
-
spec.add_runtime_dependency
|
50
|
-
spec.add_runtime_dependency
|
40
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
41
|
+
spec.add_development_dependency 'byebug', '~> 11.0'
|
42
|
+
spec.add_development_dependency 'httplog', '~> 1.3'
|
43
|
+
spec.add_development_dependency 'memory_profiler', '~> 0.9'
|
44
|
+
spec.add_development_dependency 'minitest', '~> 5.0'
|
45
|
+
spec.add_development_dependency 'pry', '~> 0.12'
|
46
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
47
|
+
spec.add_development_dependency 'webmock', '~> 3.5'
|
48
|
+
|
49
|
+
spec.add_runtime_dependency 'thor', '0.20.3'
|
50
|
+
spec.add_runtime_dependency 'thread', '0.2'
|
51
|
+
spec.add_runtime_dependency 'wgit', '0.2.0'
|
51
52
|
end
|
data/exe/broken_link_finder
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
$LOAD_PATH.unshift File.expand_path(
|
4
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
|
4
5
|
require 'broken_link_finder'
|
5
6
|
require 'thor'
|
6
7
|
|
@@ -14,10 +15,10 @@ class BrokenLinkFinderCLI < Thor
|
|
14
15
|
def crawl(url)
|
15
16
|
url = "http://#{url}" unless url.start_with?('http')
|
16
17
|
|
17
|
-
sort_by
|
18
|
-
max_threads
|
19
|
-
broken_verbose
|
20
|
-
ignored_verbose
|
18
|
+
sort_by = options[:sort_by_link] ? :link : :page
|
19
|
+
max_threads = options[:threads]
|
20
|
+
broken_verbose = !options[:concise]
|
21
|
+
ignored_verbose = options[:verbose]
|
21
22
|
|
22
23
|
finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads: max_threads)
|
23
24
|
options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
|
@@ -25,8 +26,8 @@ class BrokenLinkFinderCLI < Thor
|
|
25
26
|
broken_verbose: broken_verbose,
|
26
27
|
ignored_verbose: ignored_verbose
|
27
28
|
)
|
28
|
-
rescue Exception =>
|
29
|
-
puts "An error has occurred: #{
|
29
|
+
rescue Exception => e
|
30
|
+
puts "An error has occurred: #{e.message}"
|
30
31
|
end
|
31
32
|
|
32
33
|
desc 'version', 'Display the currently installed version'
|
@@ -1,10 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'reporter'
|
2
|
-
require 'wgit'
|
3
4
|
require 'thread/pool'
|
4
5
|
require 'set'
|
5
6
|
|
6
7
|
module BrokenLinkFinder
|
7
|
-
DEFAULT_MAX_THREADS = 100
|
8
|
+
DEFAULT_MAX_THREADS = 100
|
8
9
|
|
9
10
|
# Alias for BrokenLinkFinder::Finder.new.
|
10
11
|
def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
|
@@ -16,9 +17,8 @@ module BrokenLinkFinder
|
|
16
17
|
|
17
18
|
# Creates a new Finder instance.
|
18
19
|
def initialize(sort: :page, max_threads: BrokenLinkFinder::DEFAULT_MAX_THREADS)
|
19
|
-
|
20
|
-
|
21
|
-
end
|
20
|
+
raise "Sort by either :page or :link, not #{sort}" \
|
21
|
+
unless %i[page link].include?(sort)
|
22
22
|
|
23
23
|
@sort = sort
|
24
24
|
@max_threads = max_threads
|
@@ -43,7 +43,7 @@ module BrokenLinkFinder
|
|
43
43
|
def crawl_url(url)
|
44
44
|
clear_links
|
45
45
|
|
46
|
-
url =
|
46
|
+
url = url.to_url
|
47
47
|
doc = @crawler.crawl_url(url)
|
48
48
|
|
49
49
|
# Ensure the given page url is valid.
|
@@ -65,8 +65,8 @@ module BrokenLinkFinder
|
|
65
65
|
def crawl_site(url)
|
66
66
|
clear_links
|
67
67
|
|
68
|
-
url
|
69
|
-
pool
|
68
|
+
url = url.to_url
|
69
|
+
pool = Thread.pool(@max_threads)
|
70
70
|
crawled_pages = []
|
71
71
|
|
72
72
|
# Crawl the site's HTML web pages looking for links.
|
@@ -95,14 +95,14 @@ module BrokenLinkFinder
|
|
95
95
|
# Returns true if there were broken links and vice versa.
|
96
96
|
def pretty_print_link_report(
|
97
97
|
stream = STDOUT,
|
98
|
-
broken_verbose:
|
98
|
+
broken_verbose: true,
|
99
99
|
ignored_verbose: false
|
100
100
|
)
|
101
101
|
reporter = BrokenLinkFinder::Reporter.new(
|
102
102
|
stream, @sort, @broken_links, @ignored_links
|
103
103
|
)
|
104
104
|
reporter.pretty_print_link_report(
|
105
|
-
broken_verbose:
|
105
|
+
broken_verbose: broken_verbose,
|
106
106
|
ignored_verbose: ignored_verbose
|
107
107
|
)
|
108
108
|
|
@@ -114,14 +114,14 @@ module BrokenLinkFinder
|
|
114
114
|
# Finds which links are unsupported or broken and records the details.
|
115
115
|
def find_broken_links(doc)
|
116
116
|
# Report and reject any non supported links.
|
117
|
-
links = doc.all_links
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
117
|
+
links = doc.all_links
|
118
|
+
.reject do |link|
|
119
|
+
if link.is_absolute? && !link.start_with?('http')
|
120
|
+
append_ignored_link(doc.url, link)
|
121
|
+
true
|
122
|
+
end
|
123
|
+
end
|
124
|
+
.uniq
|
125
125
|
|
126
126
|
# Iterate over the supported links checking if they're broken or not.
|
127
127
|
links.each do |link|
|
@@ -138,8 +138,8 @@ module BrokenLinkFinder
|
|
138
138
|
link_doc = @crawler.crawl_url(link_url)
|
139
139
|
|
140
140
|
# Determine if the crawled link is broken or not.
|
141
|
-
if
|
142
|
-
link_doc.nil?
|
141
|
+
if @crawler.last_response.is_a?(Net::HTTPNotFound) ||
|
142
|
+
link_doc.nil? ||
|
143
143
|
has_broken_anchor(link_doc)
|
144
144
|
append_broken_link(doc.url, link)
|
145
145
|
else
|
@@ -157,10 +157,10 @@ module BrokenLinkFinder
|
|
157
157
|
|
158
158
|
# Returns true if the link is/contains a broken anchor.
|
159
159
|
def has_broken_anchor(doc)
|
160
|
-
raise
|
160
|
+
raise 'link document is nil' unless doc
|
161
161
|
|
162
162
|
anchor = doc.url.anchor
|
163
|
-
return false if anchor.nil?
|
163
|
+
return false if anchor.nil? || (anchor == '#')
|
164
164
|
|
165
165
|
anchor = anchor[1..-1] if anchor.start_with?('#')
|
166
166
|
doc.xpath("//*[@id='#{anchor}']").empty?
|
@@ -171,9 +171,7 @@ module BrokenLinkFinder
|
|
171
171
|
key, value = get_key_value(url, link)
|
172
172
|
|
173
173
|
@lock.synchronize do
|
174
|
-
unless @broken_links[key]
|
175
|
-
@broken_links[key] = []
|
176
|
-
end
|
174
|
+
@broken_links[key] = [] unless @broken_links[key]
|
177
175
|
@broken_links[key] << value
|
178
176
|
|
179
177
|
@all_broken_links << link
|
@@ -185,9 +183,7 @@ module BrokenLinkFinder
|
|
185
183
|
key, value = get_key_value(url, link)
|
186
184
|
|
187
185
|
@lock.synchronize do
|
188
|
-
unless @ignored_links[key]
|
189
|
-
@ignored_links[key] = []
|
190
|
-
end
|
186
|
+
@ignored_links[key] = [] unless @ignored_links[key]
|
191
187
|
@ignored_links[key] << value
|
192
188
|
end
|
193
189
|
end
|
@@ -195,9 +191,10 @@ module BrokenLinkFinder
|
|
195
191
|
# Returns the correct key value depending on the @sort type.
|
196
192
|
# @sort == :page ? [url, link] : [link, url]
|
197
193
|
def get_key_value(url, link)
|
198
|
-
|
194
|
+
case @sort
|
195
|
+
when :page
|
199
196
|
[url, link]
|
200
|
-
|
197
|
+
when :link
|
201
198
|
[link, url]
|
202
199
|
else
|
203
200
|
raise "Unsupported sort type: #{sort}"
|
@@ -206,14 +203,14 @@ module BrokenLinkFinder
|
|
206
203
|
|
207
204
|
# Sort keys and values alphabetically.
|
208
205
|
def sort_links
|
209
|
-
@broken_links.values.map
|
210
|
-
@ignored_links.values.map
|
206
|
+
@broken_links.values.map(&:uniq!)
|
207
|
+
@ignored_links.values.map(&:uniq!)
|
211
208
|
|
212
|
-
@broken_links = @broken_links.sort_by { |k,
|
213
|
-
@ignored_links = @ignored_links.sort_by { |k,
|
209
|
+
@broken_links = @broken_links.sort_by { |k, _v| k }.to_h
|
210
|
+
@ignored_links = @ignored_links.sort_by { |k, _v| k }.to_h
|
214
211
|
|
215
|
-
@broken_links.each { |
|
216
|
-
@ignored_links.each { |
|
212
|
+
@broken_links.each { |_k, v| v.sort! }
|
213
|
+
@ignored_links.each { |_k, v| v.sort! }
|
217
214
|
end
|
218
215
|
|
219
216
|
# Sets and returns the total number of links crawled.
|
@@ -221,7 +218,8 @@ module BrokenLinkFinder
|
|
221
218
|
@total_links_crawled = @all_broken_links.size + @all_intact_links.size
|
222
219
|
end
|
223
220
|
|
224
|
-
|
225
|
-
|
221
|
+
alias crawl_page crawl_url
|
222
|
+
alias crawl_r crawl_site
|
223
|
+
alias pretty_print_link_summary pretty_print_link_report
|
226
224
|
end
|
227
225
|
end
|
@@ -1,15 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module BrokenLinkFinder
|
2
4
|
class Reporter
|
3
5
|
# The amount of pages/links to display when verbose is false.
|
4
|
-
NUM_VALUES = 3
|
6
|
+
NUM_VALUES = 3
|
5
7
|
|
6
8
|
# Creates a new Reporter instance.
|
7
9
|
# stream is any Object that responds to :puts.
|
8
10
|
def initialize(stream, sort, broken_links, ignored_links)
|
9
|
-
raise
|
10
|
-
|
11
|
-
|
12
|
-
end
|
11
|
+
raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
|
12
|
+
raise "sort by either :page or :link, not #{sort}" \
|
13
|
+
unless %i[page link].include?(sort)
|
13
14
|
|
14
15
|
@stream = stream
|
15
16
|
@sort = sort
|
@@ -21,6 +22,7 @@ module BrokenLinkFinder
|
|
21
22
|
def pretty_print_link_report(broken_verbose: true, ignored_verbose: false)
|
22
23
|
report_broken_links(verbose: broken_verbose)
|
23
24
|
report_ignored_links(verbose: ignored_verbose)
|
25
|
+
|
24
26
|
nil
|
25
27
|
end
|
26
28
|
|
@@ -29,7 +31,7 @@ module BrokenLinkFinder
|
|
29
31
|
# Report a summary of the broken links.
|
30
32
|
def report_broken_links(verbose: true)
|
31
33
|
if @broken_links.empty?
|
32
|
-
print
|
34
|
+
print 'Good news, there are no broken links!'
|
33
35
|
else
|
34
36
|
num_pages, num_links = get_hash_stats(@broken_links)
|
35
37
|
print "Found #{num_links} broken link(s) across #{num_pages} page(s):"
|
@@ -40,7 +42,7 @@ module BrokenLinkFinder
|
|
40
42
|
"The broken link '#{key}' was found on the following pages:"
|
41
43
|
nprint msg
|
42
44
|
|
43
|
-
if verbose
|
45
|
+
if verbose || (values.length <= NUM_VALUES)
|
44
46
|
values.each { |value| print value }
|
45
47
|
else # Only print N values and summarise the rest.
|
46
48
|
NUM_VALUES.times { |i| print values[i] }
|
@@ -64,7 +66,7 @@ module BrokenLinkFinder
|
|
64
66
|
"The link '#{key}' was ignored on the following pages:"
|
65
67
|
nprint msg
|
66
68
|
|
67
|
-
if verbose
|
69
|
+
if verbose || (values.length <= NUM_VALUES)
|
68
70
|
values.each { |value| print value }
|
69
71
|
else # Only print N values and summarise the rest.
|
70
72
|
NUM_VALUES.times { |i| print values[i] }
|
@@ -85,8 +87,8 @@ module BrokenLinkFinder
|
|
85
87
|
# combined values. The hash should be of the format: { 'str' => [...] }.
|
86
88
|
# Use like: `num_pages, num_links = get_hash_stats(links)`.
|
87
89
|
def get_hash_stats(hash)
|
88
|
-
num_keys
|
89
|
-
values
|
90
|
+
num_keys = hash.keys.length
|
91
|
+
values = hash.values.flatten
|
90
92
|
num_values = sort_by_page? ? values.length : values.uniq.length
|
91
93
|
|
92
94
|
sort_by_page? ?
|
@@ -1,21 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# We
|
3
|
+
# We extract all the Document's links, not just the links to other webpages.
|
4
4
|
Wgit::Document.define_extension(
|
5
5
|
:all_links,
|
6
|
-
'//*/@href | //*/@src',
|
6
|
+
'//*/@href | //*/@src', # Any element with a href or src attribute.
|
7
7
|
singleton: false,
|
8
|
-
text_content_only: true
|
8
|
+
text_content_only: true
|
9
9
|
) do |links|
|
10
|
-
|
11
|
-
links = links.
|
12
|
-
map do |link|
|
13
|
-
Wgit::Url.new(link)
|
14
|
-
rescue
|
15
|
-
nil
|
16
|
-
end.
|
17
|
-
compact.
|
18
|
-
uniq
|
19
|
-
end
|
20
|
-
links
|
10
|
+
links&.map(&:to_url)&.uniq
|
21
11
|
end
|
data/lib/broken_link_finder.rb
CHANGED
@@ -1,4 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'wgit'
|
4
|
+
require 'wgit/core_ext'
|
5
|
+
|
6
|
+
require_relative './broken_link_finder/wgit_extensions'
|
7
|
+
require_relative './broken_link_finder/version'
|
8
|
+
require_relative './broken_link_finder/reporter'
|
9
|
+
require_relative './broken_link_finder/finder'
|
data/load.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -25,117 +25,117 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: byebug
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '11.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '11.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: httplog
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '1.3'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '1.3'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: memory_profiler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
61
|
+
version: '0.9'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0.
|
68
|
+
version: '0.9'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: minitest
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '5.0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '5.0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: pry
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '0.12'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
96
|
+
version: '0.12'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: rake
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
103
|
+
version: '10.0'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
110
|
+
version: '10.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: webmock
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
117
|
+
version: '3.5'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
124
|
+
version: '3.5'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: thor
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - '='
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.20.3
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - '='
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.20.3
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: thread
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -151,19 +151,19 @@ dependencies:
|
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0.2'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
|
-
name:
|
154
|
+
name: wgit
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
157
|
- - '='
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: 0.
|
159
|
+
version: 0.2.0
|
160
160
|
type: :runtime
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
164
|
- - '='
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: 0.
|
166
|
+
version: 0.2.0
|
167
167
|
description: Finds a website's broken links using the 'wgit' gem and reports back
|
168
168
|
to you with a summary.
|
169
169
|
email: michael.telford@live.com
|
@@ -174,7 +174,6 @@ extra_rdoc_files: []
|
|
174
174
|
files:
|
175
175
|
- ".gitignore"
|
176
176
|
- ".ruby-version"
|
177
|
-
- ".travis.yml"
|
178
177
|
- CHANGELOG.md
|
179
178
|
- Gemfile
|
180
179
|
- Gemfile.lock
|