broken_link_finder 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +5 -5
- data/README.md +10 -6
- data/broken_link_finder.gemspec +1 -1
- data/lib/broken_link_finder/finder.rb +11 -17
- data/lib/broken_link_finder/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cb0cc981acce272911be9d8a3ed36dd49e0f621eee3e9fd71893020da1600945
|
4
|
+
data.tar.gz: 3b368404cf3b2da83445212c44e43f32ad7d1fc5119c8980aeaa04540ebce2c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92ffd946b60411dba032ac30b8a96820dea262520ab92e1f2d64c48477d4c4ca6e22fe41d221fb421423565f9d61883b48017c1c5af651c1bb71ba96eacf490c
|
7
|
+
data.tar.gz: 17455ab4cf7cb3ab0df9763b98cc844b1c1c07ed702c600cc623e263119a8f071b8a9f55519a021662db64e6bffda91d1c5d439ed8594ae87d830da19acf3529
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,17 +1,17 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.9.
|
4
|
+
broken_link_finder (0.9.4)
|
5
5
|
thor (~> 0.20.3)
|
6
6
|
thread (~> 0.2.0)
|
7
|
-
wgit (~> 0.
|
7
|
+
wgit (~> 0.5.0)
|
8
8
|
|
9
9
|
GEM
|
10
10
|
remote: https://rubygems.org/
|
11
11
|
specs:
|
12
12
|
addressable (2.6.0)
|
13
13
|
public_suffix (>= 2.0.2, < 4.0)
|
14
|
-
bson (4.
|
14
|
+
bson (4.6.0)
|
15
15
|
byebug (11.0.1)
|
16
16
|
coderay (1.1.2)
|
17
17
|
crack (0.4.3)
|
@@ -27,7 +27,7 @@ GEM
|
|
27
27
|
minitest (5.12.2)
|
28
28
|
mongo (2.9.2)
|
29
29
|
bson (>= 4.4.2, < 5.0.0)
|
30
|
-
nokogiri (1.10.
|
30
|
+
nokogiri (1.10.5)
|
31
31
|
mini_portile2 (~> 2.4.0)
|
32
32
|
pry (0.12.2)
|
33
33
|
coderay (~> 1.1.0)
|
@@ -43,7 +43,7 @@ GEM
|
|
43
43
|
addressable (>= 2.3.6)
|
44
44
|
crack (>= 0.3.2)
|
45
45
|
hashdiff (>= 0.4.0, < 2.0.0)
|
46
|
-
wgit (0.
|
46
|
+
wgit (0.5.0)
|
47
47
|
addressable (~> 2.6.0)
|
48
48
|
mongo (~> 2.9.0)
|
49
49
|
nokogiri (~> 1.10.3)
|
data/README.md
CHANGED
@@ -2,7 +2,9 @@
|
|
2
2
|
|
3
3
|
Does what it says on the tin; Finds a website's broken links.
|
4
4
|
|
5
|
-
Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a
|
5
|
+
Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a concise summary of the broken links found.
|
6
|
+
|
7
|
+
Because `libcurl` is used under the hood, Broken Link Finder is fast!
|
6
8
|
|
7
9
|
## How It Works
|
8
10
|
|
@@ -10,7 +12,7 @@ Any HTML page element with a `href` or `src` attribute is considered a link. For
|
|
10
12
|
|
11
13
|
- An empty HTML response body is returned.
|
12
14
|
- A response status code of `404 Not Found` is returned.
|
13
|
-
- The HTML response body doesn't contain an element ID matching that of the link's
|
15
|
+
- The HTML response body doesn't contain an element ID matching that of the link's fragment e.g. `http://server.com#about` must contain an element with `id="about"` or the link is considered broken.
|
14
16
|
- The link redirects more than 5 times consecutively.
|
15
17
|
|
16
18
|
**Note**: Not all link types are supported.
|
@@ -73,9 +75,9 @@ Below is a simple script which crawls a website and outputs its broken links to
|
|
73
75
|
require 'broken_link_finder'
|
74
76
|
|
75
77
|
finder = BrokenLinkFinder.new
|
76
|
-
finder.crawl_site 'http://txti.es'
|
77
|
-
finder.pretty_print_link_report
|
78
|
-
|
78
|
+
finder.crawl_site 'http://txti.es' # Or use Finder#crawl_page for a single webpage.
|
79
|
+
finder.pretty_print_link_report # Or use Finder#broken_links and Finder#ignored_links
|
80
|
+
# for direct access to the link Hashes.
|
79
81
|
```
|
80
82
|
|
81
83
|
Then execute the script with:
|
@@ -126,9 +128,11 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
|
126
128
|
To install this gem onto your local machine, run `bundle exec rake install`.
|
127
129
|
|
128
130
|
To release a new gem version:
|
131
|
+
- Update the deps in the `*.gemspec` if necessary
|
129
132
|
- Update the version number in `version.rb` and add the new version to the `CHANGELOG`
|
130
133
|
- Run `bundle install`
|
131
134
|
- Run `bundle exec rake test` ensuring all tests pass
|
132
135
|
- Run `bundle exec rake compile` ensuring no warnings
|
133
|
-
- Run `bundle exec rake install && rbenv rehash`
|
136
|
+
- Run `bundle exec rake install && rbenv rehash`
|
137
|
+
- Manually test the executable
|
134
138
|
- Run `bundle exec rake release[origin]`
|
data/broken_link_finder.gemspec
CHANGED
@@ -112,8 +112,8 @@ module BrokenLinkFinder
|
|
112
112
|
private
|
113
113
|
|
114
114
|
# Finds which links are unsupported or broken and records the details.
|
115
|
-
def find_broken_links(
|
116
|
-
links = get_supported_links(
|
115
|
+
def find_broken_links(page)
|
116
|
+
links = get_supported_links(page)
|
117
117
|
|
118
118
|
# Iterate over the supported links checking if they're broken or not.
|
119
119
|
links.each do |link|
|
@@ -121,18 +121,18 @@ module BrokenLinkFinder
|
|
121
121
|
next if @all_intact_links.include?(link)
|
122
122
|
|
123
123
|
if @all_broken_links.include?(link)
|
124
|
-
append_broken_link(
|
124
|
+
append_broken_link(page.url, link)
|
125
125
|
next
|
126
126
|
end
|
127
127
|
|
128
128
|
# The link hasn't been processed before so we crawl it.
|
129
|
-
link_doc = crawl_link(
|
129
|
+
link_doc = crawl_link(page, link)
|
130
130
|
|
131
131
|
# Determine if the crawled link is broken or not.
|
132
132
|
if link_doc.nil? ||
|
133
|
-
@crawler.last_response.
|
133
|
+
@crawler.last_response.not_found? ||
|
134
134
|
has_broken_anchor(link_doc)
|
135
|
-
append_broken_link(
|
135
|
+
append_broken_link(page.url, link)
|
136
136
|
else
|
137
137
|
@lock.synchronize { @all_intact_links << link }
|
138
138
|
end
|
@@ -155,24 +155,18 @@ module BrokenLinkFinder
|
|
155
155
|
|
156
156
|
# Makes the link absolute and crawls it, returning its Wgit::Document.
|
157
157
|
def crawl_link(doc, link)
|
158
|
-
link =
|
158
|
+
link = link.prefix_base(doc)
|
159
159
|
@crawler.crawl(link)
|
160
160
|
end
|
161
161
|
|
162
|
-
# Returns the link
|
163
|
-
def get_absolute_link(doc, link)
|
164
|
-
link.is_relative? ? doc.base_url(link: link).concat(link) : link
|
165
|
-
end
|
166
|
-
|
167
|
-
# Returns true if the link is/contains a broken anchor.
|
162
|
+
# Returns true if the link is/contains a broken anchor/fragment.
|
168
163
|
def has_broken_anchor(doc)
|
169
164
|
raise 'link document is nil' unless doc
|
170
165
|
|
171
|
-
|
172
|
-
return false if
|
166
|
+
fragment = doc.url.fragment
|
167
|
+
return false if fragment.nil? || fragment.empty?
|
173
168
|
|
174
|
-
|
175
|
-
doc.xpath("//*[@id='#{anchor}']").empty?
|
169
|
+
doc.xpath("//*[@id='#{fragment}']").empty?
|
176
170
|
end
|
177
171
|
|
178
172
|
# Append key => [value] to @broken_links.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-11-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -128,14 +128,14 @@ dependencies:
|
|
128
128
|
requirements:
|
129
129
|
- - "~>"
|
130
130
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0.
|
131
|
+
version: 0.5.0
|
132
132
|
type: :runtime
|
133
133
|
prerelease: false
|
134
134
|
version_requirements: !ruby/object:Gem::Requirement
|
135
135
|
requirements:
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0.
|
138
|
+
version: 0.5.0
|
139
139
|
description: Finds a website's broken links using the 'wgit' gem and reports back
|
140
140
|
to you with a summary.
|
141
141
|
email: michael.telford@live.com
|