broken_link_finder 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +5 -5
- data/README.md +10 -6
- data/broken_link_finder.gemspec +1 -1
- data/lib/broken_link_finder/finder.rb +11 -17
- data/lib/broken_link_finder/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb0cc981acce272911be9d8a3ed36dd49e0f621eee3e9fd71893020da1600945
|
4
|
+
data.tar.gz: 3b368404cf3b2da83445212c44e43f32ad7d1fc5119c8980aeaa04540ebce2c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92ffd946b60411dba032ac30b8a96820dea262520ab92e1f2d64c48477d4c4ca6e22fe41d221fb421423565f9d61883b48017c1c5af651c1bb71ba96eacf490c
|
7
|
+
data.tar.gz: 17455ab4cf7cb3ab0df9763b98cc844b1c1c07ed702c600cc623e263119a8f071b8a9f55519a021662db64e6bffda91d1c5d439ed8594ae87d830da19acf3529
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.9.
|
4
|
+
broken_link_finder (0.9.4)
|
5
5
|
thor (~> 0.20.3)
|
6
6
|
thread (~> 0.2.0)
|
7
|
-
wgit (~> 0.
|
7
|
+
wgit (~> 0.5.0)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
12
|
addressable (2.6.0)
|
13
13
|
public_suffix (>= 2.0.2, < 4.0)
|
14
|
-
bson (4.
|
14
|
+
bson (4.6.0)
|
15
15
|
byebug (11.0.1)
|
16
16
|
coderay (1.1.2)
|
17
17
|
crack (0.4.3)
|
@@ -27,7 +27,7 @@ GEM
|
|
27
27
|
minitest (5.12.2)
|
28
28
|
mongo (2.9.2)
|
29
29
|
bson (>= 4.4.2, < 5.0.0)
|
30
|
-
nokogiri (1.10.
|
30
|
+
nokogiri (1.10.5)
|
31
31
|
mini_portile2 (~> 2.4.0)
|
32
32
|
pry (0.12.2)
|
33
33
|
coderay (~> 1.1.0)
|
@@ -43,7 +43,7 @@ GEM
|
|
43
43
|
addressable (>= 2.3.6)
|
44
44
|
crack (>= 0.3.2)
|
45
45
|
hashdiff (>= 0.4.0, < 2.0.0)
|
46
|
-
wgit (0.
|
46
|
+
wgit (0.5.0)
|
47
47
|
addressable (~> 2.6.0)
|
48
48
|
mongo (~> 2.9.0)
|
49
49
|
nokogiri (~> 1.10.3)
|
data/README.md
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
Does what it says on the tin; Finds a website's broken links.
|
4
4
|
|
5
|
-
Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a
|
5
|
+
Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a concise summary of the broken links found.
|
6
|
+
|
7
|
+
Because `libcurl` is used under the hood, Broken Link Finder is fast!
|
6
8
|
|
7
9
|
## How It Works
|
8
10
|
|
@@ -10,7 +12,7 @@ Any HTML page element with a `href` or `src` attribute is considered a link. For
|
|
10
12
|
|
11
13
|
- An empty HTML response body is returned.
|
12
14
|
- A response status code of `404 Not Found` is returned.
|
13
|
-
- The HTML response body doesn't contain an element ID matching that of the link's
|
15
|
+
- The HTML response body doesn't contain an element ID matching that of the link's fragment e.g. `http://server.com#about` must contain an element with `id="about"` or the link is considered broken.
|
14
16
|
- The link redirects more than 5 times consecutively.
|
15
17
|
|
16
18
|
**Note**: Not all link types are supported.
|
@@ -73,9 +75,9 @@ Below is a simple script which crawls a website and outputs its broken links to
|
|
73
75
|
require 'broken_link_finder'
|
74
76
|
|
75
77
|
finder = BrokenLinkFinder.new
|
76
|
-
finder.crawl_site 'http://txti.es'
|
77
|
-
finder.pretty_print_link_report
|
78
|
-
|
78
|
+
finder.crawl_site 'http://txti.es' # Or use Finder#crawl_page for a single webpage.
|
79
|
+
finder.pretty_print_link_report # Or use Finder#broken_links and Finder#ignored_links
|
80
|
+
# for direct access to the link Hashes.
|
79
81
|
```
|
80
82
|
|
81
83
|
Then execute the script with:
|
@@ -126,9 +128,11 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
126
128
|
To install this gem onto your local machine, run `bundle exec rake install`.
|
127
129
|
|
128
130
|
To release a new gem version:
|
131
|
+
- Update the deps in the `*.gemspec` if necessary
|
129
132
|
- Update the version number in `version.rb` and add the new version to the `CHANGELOG`
|
130
133
|
- Run `bundle install`
|
131
134
|
- Run `bundle exec rake test` ensuring all tests pass
|
132
135
|
- Run `bundle exec rake compile` ensuring no warnings
|
133
|
-
- Run `bundle exec rake install && rbenv rehash`
|
136
|
+
- Run `bundle exec rake install && rbenv rehash`
|
137
|
+
- Manually test the executable
|
134
138
|
- Run `bundle exec rake release[origin]`
|
data/broken_link_finder.gemspec
CHANGED
@@ -112,8 +112,8 @@ module BrokenLinkFinder
|
|
112
112
|
private
|
113
113
|
|
114
114
|
# Finds which links are unsupported or broken and records the details.
|
115
|
-
def find_broken_links(
|
116
|
-
links = get_supported_links(
|
115
|
+
def find_broken_links(page)
|
116
|
+
links = get_supported_links(page)
|
117
117
|
|
118
118
|
# Iterate over the supported links checking if they're broken or not.
|
119
119
|
links.each do |link|
|
@@ -121,18 +121,18 @@ module BrokenLinkFinder
|
|
121
121
|
next if @all_intact_links.include?(link)
|
122
122
|
|
123
123
|
if @all_broken_links.include?(link)
|
124
|
-
append_broken_link(
|
124
|
+
append_broken_link(page.url, link)
|
125
125
|
next
|
126
126
|
end
|
127
127
|
|
128
128
|
# The link hasn't been processed before so we crawl it.
|
129
|
-
link_doc = crawl_link(
|
129
|
+
link_doc = crawl_link(page, link)
|
130
130
|
|
131
131
|
# Determine if the crawled link is broken or not.
|
132
132
|
if link_doc.nil? ||
|
133
|
-
@crawler.last_response.
|
133
|
+
@crawler.last_response.not_found? ||
|
134
134
|
has_broken_anchor(link_doc)
|
135
|
-
append_broken_link(
|
135
|
+
append_broken_link(page.url, link)
|
136
136
|
else
|
137
137
|
@lock.synchronize { @all_intact_links << link }
|
138
138
|
end
|
@@ -155,24 +155,18 @@ module BrokenLinkFinder
|
|
155
155
|
|
156
156
|
# Makes the link absolute and crawls it, returning its Wgit::Document.
|
157
157
|
def crawl_link(doc, link)
|
158
|
-
link =
|
158
|
+
link = link.prefix_base(doc)
|
159
159
|
@crawler.crawl(link)
|
160
160
|
end
|
161
161
|
|
162
|
-
# Returns the link
|
163
|
-
def get_absolute_link(doc, link)
|
164
|
-
link.is_relative? ? doc.base_url(link: link).concat(link) : link
|
165
|
-
end
|
166
|
-
|
167
|
-
# Returns true if the link is/contains a broken anchor.
|
162
|
+
# Returns true if the link is/contains a broken anchor/fragment.
|
168
163
|
def has_broken_anchor(doc)
|
169
164
|
raise 'link document is nil' unless doc
|
170
165
|
|
171
|
-
|
172
|
-
return false if
|
166
|
+
fragment = doc.url.fragment
|
167
|
+
return false if fragment.nil? || fragment.empty?
|
173
168
|
|
174
|
-
|
175
|
-
doc.xpath("//*[@id='#{anchor}']").empty?
|
169
|
+
doc.xpath("//*[@id='#{fragment}']").empty?
|
176
170
|
end
|
177
171
|
|
178
172
|
# Append key => [value] to @broken_links.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -128,14 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.5.0
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.5.0
|
139
139
|
description: Finds a website's broken links using the 'wgit' gem and reports back
|
140
140
|
to you with a summary.
|
141
141
|
email: michael.telford@live.com
|