broken_link_finder 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +3 -3
- data/README.md +2 -1
- data/Rakefile +13 -16
- data/benchmark.rb +2 -0
- data/bin/console +7 -7
- data/broken_link_finder.gemspec +28 -27
- data/exe/broken_link_finder +8 -7
- data/lib/broken_link_finder/finder.rb +36 -38
- data/lib/broken_link_finder/reporter.rb +12 -10
- data/lib/broken_link_finder/version.rb +3 -1
- data/lib/broken_link_finder/wgit_extensions.rb +5 -15
- data/lib/broken_link_finder.rb +9 -4
- data/load.rb +4 -2
- metadata +29 -30
- data/.travis.yml +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 715744f5d7676d5d4ba2cddb80f0f8109f79a7f92689c3ff3088a52f307f5f1f
|
4
|
+
data.tar.gz: 7026f6037f0d710d8dab3bc710ddf7b202594c25ac8a8522398e62af3f4e78dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7844d0c6d2c39752a98dcb9a7eb455f6492012321be71ff40f49641b7adc3658f4f541a35afc9ca1a9d6ce330472c4f166db0895acc1e1d6ecad53f86af7f0ac
|
7
|
+
data.tar.gz: f5224db527d4636e9006ea332813d9cd133ea221506aa7a45edecc6fd230f212e21f4db8d1757a070c753876d6b003329a381b0f9a48663c966318b9fb2d1c86
|
data/CHANGELOG.md
CHANGED
@@ -9,6 +9,16 @@
|
|
9
9
|
- ...
|
10
10
|
---
|
11
11
|
|
12
|
+
## v0.9.1
|
13
|
+
### Added
|
14
|
+
- `BrokenLinkFinder::Finder.crawl_site` alias: `crawl_r`.
|
15
|
+
### Changed/Removed
|
16
|
+
- Upgraded `wgit` to v0.2.0.
|
17
|
+
- Refactored the code base (no breaking changes).
|
18
|
+
### Fixed
|
19
|
+
- ...
|
20
|
+
---
|
21
|
+
|
12
22
|
## v0.9.0
|
13
23
|
### Added
|
14
24
|
- The `version` command to the executable.
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.9.
|
4
|
+
broken_link_finder (0.9.1)
|
5
5
|
thor (= 0.20.3)
|
6
6
|
thread (= 0.2)
|
7
|
-
wgit (= 0.0
|
7
|
+
wgit (= 0.2.0)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
@@ -42,7 +42,7 @@ GEM
|
|
42
42
|
addressable (>= 2.3.6)
|
43
43
|
crack (>= 0.3.2)
|
44
44
|
hashdiff
|
45
|
-
wgit (0.0
|
45
|
+
wgit (0.2.0)
|
46
46
|
addressable (~> 2.6.0)
|
47
47
|
mongo (~> 2.9.0)
|
48
48
|
nokogiri (~> 1.10.3)
|
data/README.md
CHANGED
@@ -122,7 +122,8 @@ The gem is available as open source under the terms of the [MIT License](http://
|
|
122
122
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
123
123
|
|
124
124
|
To install this gem onto your local machine, run `bundle exec rake install`. To release a new gem version:
|
125
|
-
- Update the version number in `version.rb` and add version to the `CHANGELOG`
|
125
|
+
- Update the version number in `version.rb` and add the new version to the `CHANGELOG`
|
126
126
|
- Run `bundle install`
|
127
127
|
- Run `bundle exec rake test` ensuring all tests pass
|
128
|
+
- Run `bundle exec rake compile` ensuring no warnings
|
128
129
|
- Run `bundle exec rake release[origin]`
|
data/Rakefile
CHANGED
@@ -1,33 +1,30 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
require 'rake/testtask'
|
3
5
|
|
4
6
|
Rake::TestTask.new(:test) do |t|
|
5
|
-
t.libs <<
|
6
|
-
t.libs <<
|
7
|
+
t.libs << 'test'
|
8
|
+
t.libs << 'lib'
|
7
9
|
t.test_files = FileList['test/**/*_test.rb']
|
8
10
|
end
|
9
11
|
|
10
|
-
desc
|
12
|
+
desc 'Print help information'
|
11
13
|
task default: :help
|
12
14
|
|
13
|
-
desc
|
15
|
+
desc 'Print help information'
|
14
16
|
task :help do
|
15
|
-
system
|
16
|
-
end
|
17
|
-
|
18
|
-
desc "Run the setup script"
|
19
|
-
task :setup do
|
20
|
-
system "./bin/setup"
|
17
|
+
system 'bundle exec rake -D'
|
21
18
|
end
|
22
19
|
|
23
|
-
desc
|
20
|
+
desc 'Run the development console'
|
24
21
|
task :console do
|
25
|
-
system
|
22
|
+
system './bin/console'
|
26
23
|
end
|
27
24
|
|
28
|
-
desc
|
25
|
+
desc 'Compile all project Ruby files with warnings.'
|
29
26
|
task :compile do
|
30
|
-
paths = Dir[
|
27
|
+
paths = Dir['**/*.rb', '**/*.gemspec', 'exe/broken_link_finder']
|
31
28
|
paths.each do |f|
|
32
29
|
puts "\nCompiling #{f}..."
|
33
30
|
puts `ruby -cw #{f}`
|
data/benchmark.rb
CHANGED
data/bin/console
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require "broken_link_finder"
|
7
|
-
require 'wgit/core_ext'
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'pry'
|
6
|
+
require 'byebug'
|
8
7
|
require 'logger'
|
9
8
|
require 'httplog'
|
9
|
+
require 'broken_link_finder'
|
10
10
|
|
11
11
|
logger = Logger.new(STDOUT)
|
12
|
-
logger.formatter = proc do |
|
12
|
+
logger.formatter = proc do |_severity, _datetime, _progname, msg|
|
13
13
|
"#{msg}\n"
|
14
14
|
end
|
15
15
|
|
@@ -43,7 +43,7 @@ end
|
|
43
43
|
# You can add fixtures and/or initialization code here...
|
44
44
|
reload
|
45
45
|
|
46
|
-
url =
|
46
|
+
url = 'http://txti.es/'
|
47
47
|
by_page = Finder.new
|
48
48
|
by_link = Finder.new sort: :link
|
49
49
|
finder = by_page
|
data/broken_link_finder.gemspec
CHANGED
@@ -1,51 +1,52 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'broken_link_finder/version'
|
5
6
|
|
6
7
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
8
|
+
spec.name = 'broken_link_finder'
|
8
9
|
spec.version = BrokenLinkFinder::VERSION
|
9
|
-
spec.author =
|
10
|
-
spec.email =
|
10
|
+
spec.author = 'Michael Telford'
|
11
|
+
spec.email = 'michael.telford@live.com'
|
11
12
|
|
12
13
|
spec.summary = "Finds a website's broken links and reports back to you with a summary."
|
13
14
|
spec.description = "Finds a website's broken links using the 'wgit' gem and reports back to you with a summary."
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
15
|
+
spec.homepage = 'https://github.com/michaeltelford/broken-link-finder'
|
16
|
+
spec.license = 'MIT'
|
16
17
|
spec.metadata = {
|
17
|
-
|
18
|
+
'source_code_uri' => 'https://github.com/michaeltelford/broken-link-finder'
|
18
19
|
}
|
19
20
|
|
20
21
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
21
22
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
22
23
|
if spec.respond_to?(:metadata)
|
23
|
-
spec.metadata['allowed_push_host'] =
|
24
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
24
25
|
else
|
25
|
-
raise
|
26
|
-
|
26
|
+
raise 'RubyGems 2.0 or newer is required to protect against ' \
|
27
|
+
'public gem pushes.'
|
27
28
|
end
|
28
29
|
|
29
|
-
spec.files
|
30
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
30
31
|
f.match(%r{^(test|spec|features)/})
|
31
32
|
end
|
32
|
-
spec.bindir =
|
33
|
-
spec.executables = [
|
34
|
-
spec.require_paths = [
|
33
|
+
spec.bindir = 'exe'
|
34
|
+
spec.executables = ['broken_link_finder']
|
35
|
+
spec.require_paths = ['lib']
|
35
36
|
spec.post_install_message = "Added the executable 'broken_link_finder' to $PATH"
|
36
37
|
|
37
38
|
spec.required_ruby_version = '~> 2.5'
|
38
39
|
|
39
|
-
spec.add_development_dependency
|
40
|
-
spec.add_development_dependency
|
41
|
-
spec.add_development_dependency
|
42
|
-
spec.add_development_dependency
|
43
|
-
spec.add_development_dependency
|
44
|
-
spec.add_development_dependency
|
45
|
-
spec.add_development_dependency
|
46
|
-
spec.add_development_dependency
|
47
|
-
|
48
|
-
spec.add_runtime_dependency
|
49
|
-
spec.add_runtime_dependency
|
50
|
-
spec.add_runtime_dependency
|
40
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
41
|
+
spec.add_development_dependency 'byebug', '~> 11.0'
|
42
|
+
spec.add_development_dependency 'httplog', '~> 1.3'
|
43
|
+
spec.add_development_dependency 'memory_profiler', '~> 0.9'
|
44
|
+
spec.add_development_dependency 'minitest', '~> 5.0'
|
45
|
+
spec.add_development_dependency 'pry', '~> 0.12'
|
46
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
47
|
+
spec.add_development_dependency 'webmock', '~> 3.5'
|
48
|
+
|
49
|
+
spec.add_runtime_dependency 'thor', '0.20.3'
|
50
|
+
spec.add_runtime_dependency 'thread', '0.2'
|
51
|
+
spec.add_runtime_dependency 'wgit', '0.2.0'
|
51
52
|
end
|
data/exe/broken_link_finder
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
|
-
$LOAD_PATH.unshift File.expand_path(
|
4
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
|
4
5
|
require 'broken_link_finder'
|
5
6
|
require 'thor'
|
6
7
|
|
@@ -14,10 +15,10 @@ class BrokenLinkFinderCLI < Thor
|
|
14
15
|
def crawl(url)
|
15
16
|
url = "http://#{url}" unless url.start_with?('http')
|
16
17
|
|
17
|
-
sort_by
|
18
|
-
max_threads
|
19
|
-
broken_verbose
|
20
|
-
ignored_verbose
|
18
|
+
sort_by = options[:sort_by_link] ? :link : :page
|
19
|
+
max_threads = options[:threads]
|
20
|
+
broken_verbose = !options[:concise]
|
21
|
+
ignored_verbose = options[:verbose]
|
21
22
|
|
22
23
|
finder = BrokenLinkFinder::Finder.new(sort: sort_by, max_threads: max_threads)
|
23
24
|
options[:recursive] ? finder.crawl_site(url) : finder.crawl_page(url)
|
@@ -25,8 +26,8 @@ class BrokenLinkFinderCLI < Thor
|
|
25
26
|
broken_verbose: broken_verbose,
|
26
27
|
ignored_verbose: ignored_verbose
|
27
28
|
)
|
28
|
-
rescue Exception =>
|
29
|
-
puts "An error has occurred: #{
|
29
|
+
rescue Exception => e
|
30
|
+
puts "An error has occurred: #{e.message}"
|
30
31
|
end
|
31
32
|
|
32
33
|
desc 'version', 'Display the currently installed version'
|
@@ -1,10 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'reporter'
|
2
|
-
require 'wgit'
|
3
4
|
require 'thread/pool'
|
4
5
|
require 'set'
|
5
6
|
|
6
7
|
module BrokenLinkFinder
|
7
|
-
DEFAULT_MAX_THREADS = 100
|
8
|
+
DEFAULT_MAX_THREADS = 100
|
8
9
|
|
9
10
|
# Alias for BrokenLinkFinder::Finder.new.
|
10
11
|
def self.new(sort: :page, max_threads: DEFAULT_MAX_THREADS)
|
@@ -16,9 +17,8 @@ module BrokenLinkFinder
|
|
16
17
|
|
17
18
|
# Creates a new Finder instance.
|
18
19
|
def initialize(sort: :page, max_threads: BrokenLinkFinder::DEFAULT_MAX_THREADS)
|
19
|
-
|
20
|
-
|
21
|
-
end
|
20
|
+
raise "Sort by either :page or :link, not #{sort}" \
|
21
|
+
unless %i[page link].include?(sort)
|
22
22
|
|
23
23
|
@sort = sort
|
24
24
|
@max_threads = max_threads
|
@@ -43,7 +43,7 @@ module BrokenLinkFinder
|
|
43
43
|
def crawl_url(url)
|
44
44
|
clear_links
|
45
45
|
|
46
|
-
url =
|
46
|
+
url = url.to_url
|
47
47
|
doc = @crawler.crawl_url(url)
|
48
48
|
|
49
49
|
# Ensure the given page url is valid.
|
@@ -65,8 +65,8 @@ module BrokenLinkFinder
|
|
65
65
|
def crawl_site(url)
|
66
66
|
clear_links
|
67
67
|
|
68
|
-
url
|
69
|
-
pool
|
68
|
+
url = url.to_url
|
69
|
+
pool = Thread.pool(@max_threads)
|
70
70
|
crawled_pages = []
|
71
71
|
|
72
72
|
# Crawl the site's HTML web pages looking for links.
|
@@ -95,14 +95,14 @@ module BrokenLinkFinder
|
|
95
95
|
# Returns true if there were broken links and vice versa.
|
96
96
|
def pretty_print_link_report(
|
97
97
|
stream = STDOUT,
|
98
|
-
broken_verbose:
|
98
|
+
broken_verbose: true,
|
99
99
|
ignored_verbose: false
|
100
100
|
)
|
101
101
|
reporter = BrokenLinkFinder::Reporter.new(
|
102
102
|
stream, @sort, @broken_links, @ignored_links
|
103
103
|
)
|
104
104
|
reporter.pretty_print_link_report(
|
105
|
-
broken_verbose:
|
105
|
+
broken_verbose: broken_verbose,
|
106
106
|
ignored_verbose: ignored_verbose
|
107
107
|
)
|
108
108
|
|
@@ -114,14 +114,14 @@ module BrokenLinkFinder
|
|
114
114
|
# Finds which links are unsupported or broken and records the details.
|
115
115
|
def find_broken_links(doc)
|
116
116
|
# Report and reject any non supported links.
|
117
|
-
links = doc.all_links
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
117
|
+
links = doc.all_links
|
118
|
+
.reject do |link|
|
119
|
+
if link.is_absolute? && !link.start_with?('http')
|
120
|
+
append_ignored_link(doc.url, link)
|
121
|
+
true
|
122
|
+
end
|
123
|
+
end
|
124
|
+
.uniq
|
125
125
|
|
126
126
|
# Iterate over the supported links checking if they're broken or not.
|
127
127
|
links.each do |link|
|
@@ -138,8 +138,8 @@ module BrokenLinkFinder
|
|
138
138
|
link_doc = @crawler.crawl_url(link_url)
|
139
139
|
|
140
140
|
# Determine if the crawled link is broken or not.
|
141
|
-
if
|
142
|
-
link_doc.nil?
|
141
|
+
if @crawler.last_response.is_a?(Net::HTTPNotFound) ||
|
142
|
+
link_doc.nil? ||
|
143
143
|
has_broken_anchor(link_doc)
|
144
144
|
append_broken_link(doc.url, link)
|
145
145
|
else
|
@@ -157,10 +157,10 @@ module BrokenLinkFinder
|
|
157
157
|
|
158
158
|
# Returns true if the link is/contains a broken anchor.
|
159
159
|
def has_broken_anchor(doc)
|
160
|
-
raise
|
160
|
+
raise 'link document is nil' unless doc
|
161
161
|
|
162
162
|
anchor = doc.url.anchor
|
163
|
-
return false if anchor.nil?
|
163
|
+
return false if anchor.nil? || (anchor == '#')
|
164
164
|
|
165
165
|
anchor = anchor[1..-1] if anchor.start_with?('#')
|
166
166
|
doc.xpath("//*[@id='#{anchor}']").empty?
|
@@ -171,9 +171,7 @@ module BrokenLinkFinder
|
|
171
171
|
key, value = get_key_value(url, link)
|
172
172
|
|
173
173
|
@lock.synchronize do
|
174
|
-
unless @broken_links[key]
|
175
|
-
@broken_links[key] = []
|
176
|
-
end
|
174
|
+
@broken_links[key] = [] unless @broken_links[key]
|
177
175
|
@broken_links[key] << value
|
178
176
|
|
179
177
|
@all_broken_links << link
|
@@ -185,9 +183,7 @@ module BrokenLinkFinder
|
|
185
183
|
key, value = get_key_value(url, link)
|
186
184
|
|
187
185
|
@lock.synchronize do
|
188
|
-
unless @ignored_links[key]
|
189
|
-
@ignored_links[key] = []
|
190
|
-
end
|
186
|
+
@ignored_links[key] = [] unless @ignored_links[key]
|
191
187
|
@ignored_links[key] << value
|
192
188
|
end
|
193
189
|
end
|
@@ -195,9 +191,10 @@ module BrokenLinkFinder
|
|
195
191
|
# Returns the correct key value depending on the @sort type.
|
196
192
|
# @sort == :page ? [url, link] : [link, url]
|
197
193
|
def get_key_value(url, link)
|
198
|
-
|
194
|
+
case @sort
|
195
|
+
when :page
|
199
196
|
[url, link]
|
200
|
-
|
197
|
+
when :link
|
201
198
|
[link, url]
|
202
199
|
else
|
203
200
|
raise "Unsupported sort type: #{sort}"
|
@@ -206,14 +203,14 @@ module BrokenLinkFinder
|
|
206
203
|
|
207
204
|
# Sort keys and values alphabetically.
|
208
205
|
def sort_links
|
209
|
-
@broken_links.values.map
|
210
|
-
@ignored_links.values.map
|
206
|
+
@broken_links.values.map(&:uniq!)
|
207
|
+
@ignored_links.values.map(&:uniq!)
|
211
208
|
|
212
|
-
@broken_links = @broken_links.sort_by { |k,
|
213
|
-
@ignored_links = @ignored_links.sort_by { |k,
|
209
|
+
@broken_links = @broken_links.sort_by { |k, _v| k }.to_h
|
210
|
+
@ignored_links = @ignored_links.sort_by { |k, _v| k }.to_h
|
214
211
|
|
215
|
-
@broken_links.each { |
|
216
|
-
@ignored_links.each { |
|
212
|
+
@broken_links.each { |_k, v| v.sort! }
|
213
|
+
@ignored_links.each { |_k, v| v.sort! }
|
217
214
|
end
|
218
215
|
|
219
216
|
# Sets and returns the total number of links crawled.
|
@@ -221,7 +218,8 @@ module BrokenLinkFinder
|
|
221
218
|
@total_links_crawled = @all_broken_links.size + @all_intact_links.size
|
222
219
|
end
|
223
220
|
|
224
|
-
|
225
|
-
|
221
|
+
alias crawl_page crawl_url
|
222
|
+
alias crawl_r crawl_site
|
223
|
+
alias pretty_print_link_summary pretty_print_link_report
|
226
224
|
end
|
227
225
|
end
|
@@ -1,15 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module BrokenLinkFinder
|
2
4
|
class Reporter
|
3
5
|
# The amount of pages/links to display when verbose is false.
|
4
|
-
NUM_VALUES = 3
|
6
|
+
NUM_VALUES = 3
|
5
7
|
|
6
8
|
# Creates a new Reporter instance.
|
7
9
|
# stream is any Object that responds to :puts.
|
8
10
|
def initialize(stream, sort, broken_links, ignored_links)
|
9
|
-
raise
|
10
|
-
|
11
|
-
|
12
|
-
end
|
11
|
+
raise 'stream must respond_to? :puts' unless stream.respond_to?(:puts)
|
12
|
+
raise "sort by either :page or :link, not #{sort}" \
|
13
|
+
unless %i[page link].include?(sort)
|
13
14
|
|
14
15
|
@stream = stream
|
15
16
|
@sort = sort
|
@@ -21,6 +22,7 @@ module BrokenLinkFinder
|
|
21
22
|
def pretty_print_link_report(broken_verbose: true, ignored_verbose: false)
|
22
23
|
report_broken_links(verbose: broken_verbose)
|
23
24
|
report_ignored_links(verbose: ignored_verbose)
|
25
|
+
|
24
26
|
nil
|
25
27
|
end
|
26
28
|
|
@@ -29,7 +31,7 @@ module BrokenLinkFinder
|
|
29
31
|
# Report a summary of the broken links.
|
30
32
|
def report_broken_links(verbose: true)
|
31
33
|
if @broken_links.empty?
|
32
|
-
print
|
34
|
+
print 'Good news, there are no broken links!'
|
33
35
|
else
|
34
36
|
num_pages, num_links = get_hash_stats(@broken_links)
|
35
37
|
print "Found #{num_links} broken link(s) across #{num_pages} page(s):"
|
@@ -40,7 +42,7 @@ module BrokenLinkFinder
|
|
40
42
|
"The broken link '#{key}' was found on the following pages:"
|
41
43
|
nprint msg
|
42
44
|
|
43
|
-
if verbose
|
45
|
+
if verbose || (values.length <= NUM_VALUES)
|
44
46
|
values.each { |value| print value }
|
45
47
|
else # Only print N values and summarise the rest.
|
46
48
|
NUM_VALUES.times { |i| print values[i] }
|
@@ -64,7 +66,7 @@ module BrokenLinkFinder
|
|
64
66
|
"The link '#{key}' was ignored on the following pages:"
|
65
67
|
nprint msg
|
66
68
|
|
67
|
-
if verbose
|
69
|
+
if verbose || (values.length <= NUM_VALUES)
|
68
70
|
values.each { |value| print value }
|
69
71
|
else # Only print N values and summarise the rest.
|
70
72
|
NUM_VALUES.times { |i| print values[i] }
|
@@ -85,8 +87,8 @@ module BrokenLinkFinder
|
|
85
87
|
# combined values. The hash should be of the format: { 'str' => [...] }.
|
86
88
|
# Use like: `num_pages, num_links = get_hash_stats(links)`.
|
87
89
|
def get_hash_stats(hash)
|
88
|
-
num_keys
|
89
|
-
values
|
90
|
+
num_keys = hash.keys.length
|
91
|
+
values = hash.values.flatten
|
90
92
|
num_values = sort_by_page? ? values.length : values.uniq.length
|
91
93
|
|
92
94
|
sort_by_page? ?
|
@@ -1,21 +1,11 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
# We
|
3
|
+
# We extract all the Document's links, not just the links to other webpages.
|
4
4
|
Wgit::Document.define_extension(
|
5
5
|
:all_links,
|
6
|
-
'//*/@href | //*/@src',
|
6
|
+
'//*/@href | //*/@src', # Any element with a href or src attribute.
|
7
7
|
singleton: false,
|
8
|
-
text_content_only: true
|
8
|
+
text_content_only: true
|
9
9
|
) do |links|
|
10
|
-
|
11
|
-
links = links.
|
12
|
-
map do |link|
|
13
|
-
Wgit::Url.new(link)
|
14
|
-
rescue
|
15
|
-
nil
|
16
|
-
end.
|
17
|
-
compact.
|
18
|
-
uniq
|
19
|
-
end
|
20
|
-
links
|
10
|
+
links&.map(&:to_url)&.uniq
|
21
11
|
end
|
data/lib/broken_link_finder.rb
CHANGED
@@ -1,4 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'wgit'
|
4
|
+
require 'wgit/core_ext'
|
5
|
+
|
6
|
+
require_relative './broken_link_finder/wgit_extensions'
|
7
|
+
require_relative './broken_link_finder/version'
|
8
|
+
require_relative './broken_link_finder/reporter'
|
9
|
+
require_relative './broken_link_finder/finder'
|
data/load.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -25,117 +25,117 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: byebug
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '11.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '11.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: httplog
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '1.3'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '1.3'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: memory_profiler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0.
|
61
|
+
version: '0.9'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0.
|
68
|
+
version: '0.9'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: minitest
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '5.0'
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '5.0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: pry
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - "~>"
|
88
88
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
89
|
+
version: '0.12'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
96
|
+
version: '0.12'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: rake
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '
|
103
|
+
version: '10.0'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '
|
110
|
+
version: '10.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: webmock
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '
|
117
|
+
version: '3.5'
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '
|
124
|
+
version: '3.5'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: thor
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - '='
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.20.3
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - '='
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.20.3
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
140
|
name: thread
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -151,19 +151,19 @@ dependencies:
|
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0.2'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
|
-
name:
|
154
|
+
name: wgit
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
157
|
- - '='
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version: 0.
|
159
|
+
version: 0.2.0
|
160
160
|
type: :runtime
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
164
|
- - '='
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version: 0.
|
166
|
+
version: 0.2.0
|
167
167
|
description: Finds a website's broken links using the 'wgit' gem and reports back
|
168
168
|
to you with a summary.
|
169
169
|
email: michael.telford@live.com
|
@@ -174,7 +174,6 @@ extra_rdoc_files: []
|
|
174
174
|
files:
|
175
175
|
- ".gitignore"
|
176
176
|
- ".ruby-version"
|
177
|
-
- ".travis.yml"
|
178
177
|
- CHANGELOG.md
|
179
178
|
- Gemfile
|
180
179
|
- Gemfile.lock
|