broken_link_finder 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/README.md +8 -1
- data/Rakefile +16 -1
- data/broken_link_finder.gemspec +2 -2
- data/lib/broken_link_finder/finder.rb +14 -3
- data/lib/broken_link_finder/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 364ec155bda54b8757fbce6425c8978d8d17cd91618d5650dc14f8a63e712a2f
|
4
|
+
data.tar.gz: b5c0e405f159aaed54725042105c519e1e9d0e085bf40aad99b114200ec8713a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eaf1aa2ea2b4f9177561291a8b729e91610f9b2fbd8541ceb2fa96f039a667ffd546dd758b9e4edb4fba9cad25c55368cb4af95ffbe9d0fdc20c546e3d4e5f0b
|
7
|
+
data.tar.gz: 8c6d407f74d900553782d7aedcdc4027c14e436bdb73b2888830827da9e305456e653ad9398c8752e01c8aae381871eb239aee5d7394ea995c5b68b0cbd38404
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.
|
4
|
+
broken_link_finder (0.5.0)
|
5
5
|
thor (= 0.20.3)
|
6
6
|
thread (= 0.2)
|
7
|
-
wgit (= 0.0.
|
7
|
+
wgit (= 0.0.12)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
@@ -36,9 +36,9 @@ GEM
|
|
36
36
|
addressable (>= 2.3.6)
|
37
37
|
crack (>= 0.3.2)
|
38
38
|
hashdiff
|
39
|
-
wgit (0.0.
|
39
|
+
wgit (0.0.12)
|
40
40
|
mongo (~> 2.8.0)
|
41
|
-
nokogiri (~> 1.10)
|
41
|
+
nokogiri (~> 1.10.3)
|
42
42
|
|
43
43
|
PLATFORMS
|
44
44
|
ruby
|
data/README.md
CHANGED
@@ -4,6 +4,14 @@ Does what it says on the tin. Finds a website's broken links.
|
|
4
4
|
|
5
5
|
Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a nice concise summary of the broken links found.
|
6
6
|
|
7
|
+
## How It Works
|
8
|
+
|
9
|
+
Any page element with a `href` or `src` attribute is considered a link. For each link on a given page, any of the following conditions (in order) constitutes that the link is broken:
|
10
|
+
|
11
|
+
1) A response status code of `404 Not Found` is returned.
|
12
|
+
2) An empty HTML response body is returned.
|
13
|
+
3) The HTML response body doesn't contain an element ID matching that of the link's anchor e.g. `http://server.com#about` must contain an element with an ID of `about` or the link is considered broken.
|
14
|
+
|
7
15
|
## Made Possible By
|
8
16
|
|
9
17
|
This repository utilises the awesome `wgit` Ruby gem. See its [repository](https://github.com/michaeltelford/wgit) for more details.
|
@@ -74,7 +82,6 @@ http://imgur.com
|
|
74
82
|
|
75
83
|
## TODO
|
76
84
|
|
77
|
-
- Improve the intelligence of the finder. Currently a custom 'Not Found' webpage will not be discovered as a broken link; it should.
|
78
85
|
- Add logger functionality (especially useful in the console during development).
|
79
86
|
|
80
87
|
## Development
|
data/Rakefile
CHANGED
@@ -7,4 +7,19 @@ Rake::TestTask.new(:test) do |t|
|
|
7
7
|
t.test_files = FileList['test/**/*_test.rb']
|
8
8
|
end
|
9
9
|
|
10
|
-
|
10
|
+
desc "Print help information"
|
11
|
+
task default: :help
|
12
|
+
|
13
|
+
desc "Print help information"
|
14
|
+
task :help do
|
15
|
+
system "bundle exec rake -D"
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Compile all project Ruby files with warnings."
|
19
|
+
task :compile do
|
20
|
+
paths = Dir["**/*.rb", "**/*.gemspec", 'exe/broken_link_finder']
|
21
|
+
paths.each do |f|
|
22
|
+
puts "\nCompiling #{f}..."
|
23
|
+
puts `ruby -cw #{f}`
|
24
|
+
end
|
25
|
+
end
|
data/broken_link_finder.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.description = "Finds a website's broken links using the 'wgit' gem and reports back to you with a summary."
|
14
14
|
spec.homepage = "https://github.com/michaeltelford/broken-link-finder"
|
15
15
|
spec.license = "MIT"
|
16
|
-
spec.metadata = {
|
16
|
+
spec.metadata = {
|
17
17
|
"source_code_uri" => "https://github.com/michaeltelford/broken-link-finder",
|
18
18
|
}
|
19
19
|
|
@@ -43,7 +43,7 @@ Gem::Specification.new do |spec|
|
|
43
43
|
spec.add_development_dependency "byebug", "~> 11.0"
|
44
44
|
spec.add_development_dependency "webmock", "~> 3.5"
|
45
45
|
|
46
|
-
spec.add_runtime_dependency "wgit", "0.0.
|
46
|
+
spec.add_runtime_dependency "wgit", "0.0.12"
|
47
47
|
spec.add_runtime_dependency "thread", "0.2"
|
48
48
|
spec.add_runtime_dependency "thor", "0.20.3"
|
49
49
|
end
|
@@ -67,7 +67,7 @@ module BrokenLinkFinder
|
|
67
67
|
|
68
68
|
# Pretty prints the contents of broken_links into a stream e.g. Kernel
|
69
69
|
# (STDOUT) or a file - anything that respond_to? :puts.
|
70
|
-
# Returns true if there were broken links and vice versa.
|
70
|
+
# Returns true if there were broken links and vice versa.
|
71
71
|
def pretty_print_broken_links(stream = Kernel)
|
72
72
|
raise "stream must respond_to? :puts" unless stream.respond_to? :puts
|
73
73
|
|
@@ -96,13 +96,24 @@ broken links...")
|
|
96
96
|
def find_broken_links(doc)
|
97
97
|
links = doc.internal_full_links + doc.external_links
|
98
98
|
links.each do |link|
|
99
|
-
|
100
|
-
if
|
99
|
+
link_doc = @crawler.crawl_url(link)
|
100
|
+
if @crawler.last_response.is_a?(Net::HTTPNotFound) or
|
101
|
+
link_doc.nil? or
|
102
|
+
has_broken_anchor(link_doc)
|
101
103
|
append_broken_link(doc.url, link)
|
102
104
|
end
|
103
105
|
end
|
104
106
|
end
|
105
107
|
|
108
|
+
# Returns true if the link is/contains a broken anchor.
|
109
|
+
def has_broken_anchor(doc)
|
110
|
+
raise "link document is nil" unless doc
|
111
|
+
return false unless doc.url.anchor
|
112
|
+
|
113
|
+
anchor = doc.url.anchor[1..-1] # Remove the # prefix.
|
114
|
+
doc.xpath("//*[@id='#{anchor}']").empty?
|
115
|
+
end
|
116
|
+
|
106
117
|
# Append url => [link] to @broken_links.
|
107
118
|
def append_broken_link(url, link)
|
108
119
|
@lock.synchronize do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-07-
|
11
|
+
date: 2019-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - '='
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.0.
|
103
|
+
version: 0.0.12
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - '='
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.0.
|
110
|
+
version: 0.0.12
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: thread
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|