broken_link_finder 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +4 -4
- data/README.md +8 -1
- data/Rakefile +16 -1
- data/broken_link_finder.gemspec +2 -2
- data/lib/broken_link_finder/finder.rb +14 -3
- data/lib/broken_link_finder/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 364ec155bda54b8757fbce6425c8978d8d17cd91618d5650dc14f8a63e712a2f
|
4
|
+
data.tar.gz: b5c0e405f159aaed54725042105c519e1e9d0e085bf40aad99b114200ec8713a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eaf1aa2ea2b4f9177561291a8b729e91610f9b2fbd8541ceb2fa96f039a667ffd546dd758b9e4edb4fba9cad25c55368cb4af95ffbe9d0fdc20c546e3d4e5f0b
|
7
|
+
data.tar.gz: 8c6d407f74d900553782d7aedcdc4027c14e436bdb73b2888830827da9e305456e653ad9398c8752e01c8aae381871eb239aee5d7394ea995c5b68b0cbd38404
|
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.
|
4
|
+
broken_link_finder (0.5.0)
|
5
5
|
thor (= 0.20.3)
|
6
6
|
thread (= 0.2)
|
7
|
-
wgit (= 0.0.
|
7
|
+
wgit (= 0.0.12)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
@@ -36,9 +36,9 @@ GEM
|
|
36
36
|
addressable (>= 2.3.6)
|
37
37
|
crack (>= 0.3.2)
|
38
38
|
hashdiff
|
39
|
-
wgit (0.0.
|
39
|
+
wgit (0.0.12)
|
40
40
|
mongo (~> 2.8.0)
|
41
|
-
nokogiri (~> 1.10)
|
41
|
+
nokogiri (~> 1.10.3)
|
42
42
|
|
43
43
|
PLATFORMS
|
44
44
|
ruby
|
data/README.md
CHANGED
@@ -4,6 +4,14 @@ Does what it says on the tin. Finds a website's broken links.
|
|
4
4
|
|
5
5
|
Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a nice concise summary of the broken links found.
|
6
6
|
|
7
|
+
## How It Works
|
8
|
+
|
9
|
+
Any page element with a `href` or `src` attribute is considered a link. For each link on a given page, any of the following conditions (in order) constitutes that the link is broken:
|
10
|
+
|
11
|
+
1) A response status code of `404 Not Found` is returned.
|
12
|
+
2) An empty HTML response body is returned.
|
13
|
+
3) The HTML response body doesn't contain an element ID matching that of the link's anchor e.g. `http://server.com#about` must contain an element with an ID of `about` or the link is considered broken.
|
14
|
+
|
7
15
|
## Made Possible By
|
8
16
|
|
9
17
|
This repository utilises the awesome `wgit` Ruby gem. See its [repository](https://github.com/michaeltelford/wgit) for more details.
|
@@ -74,7 +82,6 @@ http://imgur.com
|
|
74
82
|
|
75
83
|
## TODO
|
76
84
|
|
77
|
-
- Improve the intelligence of the finder. Currently a custom 'Not Found' webpage will not be discovered as a broken link; it should.
|
78
85
|
- Add logger functionality (especially useful in the console during development).
|
79
86
|
|
80
87
|
## Development
|
data/Rakefile
CHANGED
@@ -7,4 +7,19 @@ Rake::TestTask.new(:test) do |t|
|
|
7
7
|
t.test_files = FileList['test/**/*_test.rb']
|
8
8
|
end
|
9
9
|
|
10
|
-
|
10
|
+
desc "Print help information"
|
11
|
+
task default: :help
|
12
|
+
|
13
|
+
desc "Print help information"
|
14
|
+
task :help do
|
15
|
+
system "bundle exec rake -D"
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Compile all project Ruby files with warnings."
|
19
|
+
task :compile do
|
20
|
+
paths = Dir["**/*.rb", "**/*.gemspec", 'exe/broken_link_finder']
|
21
|
+
paths.each do |f|
|
22
|
+
puts "\nCompiling #{f}..."
|
23
|
+
puts `ruby -cw #{f}`
|
24
|
+
end
|
25
|
+
end
|
data/broken_link_finder.gemspec
CHANGED
@@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.description = "Finds a website's broken links using the 'wgit' gem and reports back to you with a summary."
|
14
14
|
spec.homepage = "https://github.com/michaeltelford/broken-link-finder"
|
15
15
|
spec.license = "MIT"
|
16
|
-
spec.metadata = {
|
16
|
+
spec.metadata = {
|
17
17
|
"source_code_uri" => "https://github.com/michaeltelford/broken-link-finder",
|
18
18
|
}
|
19
19
|
|
@@ -43,7 +43,7 @@ Gem::Specification.new do |spec|
|
|
43
43
|
spec.add_development_dependency "byebug", "~> 11.0"
|
44
44
|
spec.add_development_dependency "webmock", "~> 3.5"
|
45
45
|
|
46
|
-
spec.add_runtime_dependency "wgit", "0.0.
|
46
|
+
spec.add_runtime_dependency "wgit", "0.0.12"
|
47
47
|
spec.add_runtime_dependency "thread", "0.2"
|
48
48
|
spec.add_runtime_dependency "thor", "0.20.3"
|
49
49
|
end
|
@@ -67,7 +67,7 @@ module BrokenLinkFinder
|
|
67
67
|
|
68
68
|
# Pretty prints the contents of broken_links into a stream e.g. Kernel
|
69
69
|
# (STDOUT) or a file - anything that respond_to? :puts.
|
70
|
-
# Returns true if there were broken links and vice versa.
|
70
|
+
# Returns true if there were broken links and vice versa.
|
71
71
|
def pretty_print_broken_links(stream = Kernel)
|
72
72
|
raise "stream must respond_to? :puts" unless stream.respond_to? :puts
|
73
73
|
|
@@ -96,13 +96,24 @@ broken links...")
|
|
96
96
|
def find_broken_links(doc)
|
97
97
|
links = doc.internal_full_links + doc.external_links
|
98
98
|
links.each do |link|
|
99
|
-
|
100
|
-
if
|
99
|
+
link_doc = @crawler.crawl_url(link)
|
100
|
+
if @crawler.last_response.is_a?(Net::HTTPNotFound) or
|
101
|
+
link_doc.nil? or
|
102
|
+
has_broken_anchor(link_doc)
|
101
103
|
append_broken_link(doc.url, link)
|
102
104
|
end
|
103
105
|
end
|
104
106
|
end
|
105
107
|
|
108
|
+
# Returns true if the link is/contains a broken anchor.
|
109
|
+
def has_broken_anchor(doc)
|
110
|
+
raise "link document is nil" unless doc
|
111
|
+
return false unless doc.url.anchor
|
112
|
+
|
113
|
+
anchor = doc.url.anchor[1..-1] # Remove the # prefix.
|
114
|
+
doc.xpath("//*[@id='#{anchor}']").empty?
|
115
|
+
end
|
116
|
+
|
106
117
|
# Append url => [link] to @broken_links.
|
107
118
|
def append_broken_link(url, link)
|
108
119
|
@lock.synchronize do
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-07-
|
11
|
+
date: 2019-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - '='
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.0.
|
103
|
+
version: 0.0.12
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - '='
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.0.
|
110
|
+
version: 0.0.12
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: thread
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|