broken_link_finder 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +7 -9
- data/broken_link_finder.gemspec +1 -1
- data/lib/broken_link_finder/finder.rb +6 -7
- data/lib/broken_link_finder/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b9617c6016a37c3afeec665d3f5d70e8574e8cf28100f4c661ee95fd7f7ccee
|
4
|
+
data.tar.gz: cf517abd6b554f3cdf01b535dfd4417dd226fdde8edae4556190b0da69cd3bda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 691714c983790ff8d8a004e3c92f1edae1a50e44e4771f123d177904364c65679c034580a3b182209a18b2baa83277b88c571e0cddcf131c638fcc106cc031a5
|
7
|
+
data.tar.gz: '0093753cea2740b13b5b8fda6dde5046e76ce4e8e7cf93e3be7ae99cf7bd6e6aa164218105fe81b4a35ddf4622717f01d69455be9dc65b738e7ae32c33341c32'
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.2.
|
5
|
-
wgit
|
4
|
+
broken_link_finder (0.2.1)
|
5
|
+
wgit (> 0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
@@ -32,7 +32,7 @@ GEM
|
|
32
32
|
addressable (>= 2.3.6)
|
33
33
|
crack (>= 0.3.2)
|
34
34
|
hashdiff
|
35
|
-
wgit (0.0.
|
35
|
+
wgit (0.0.9)
|
36
36
|
mongo (~> 2.6)
|
37
37
|
nokogiri (~> 1.10)
|
38
38
|
|
data/README.md
CHANGED
@@ -8,8 +8,6 @@ Simply point it at a website and it will crawl all of its webpages searching for
|
|
8
8
|
|
9
9
|
This repository utilises the awesome `wgit` Ruby gem. See its [repository](https://github.com/michaeltelford/wgit) for more details.
|
10
10
|
|
11
|
-
The only gotcha is that `wgit` doesn't currently follow redirects meaning they will appear as broken links in the results.
|
12
|
-
|
13
11
|
## Installation
|
14
12
|
|
15
13
|
Add this line to your application's Gemfile:
|
@@ -28,7 +26,7 @@ Or install it yourself as:
|
|
28
26
|
|
29
27
|
## Usage
|
30
28
|
|
31
|
-
Below is a
|
29
|
+
Below is a simple script which crawls a website and outputs its broken links to STDOUT.
|
32
30
|
|
33
31
|
> main.rb
|
34
32
|
|
@@ -36,8 +34,8 @@ Below is a sample script which crawls a website and outputs its broken links to
|
|
36
34
|
require 'broken_link_finder'
|
37
35
|
|
38
36
|
finder = BrokenLinkFinder::Finder.new
|
39
|
-
finder.crawl_site "http://txti.es" # Also, see Finder#
|
40
|
-
finder.pretty_print_broken_links
|
37
|
+
finder.crawl_site "http://txti.es" # Also, see Finder#crawl_page for a single webpage.
|
38
|
+
finder.pretty_print_broken_links # Also, see Finder#broken_links for a Hash.
|
41
39
|
```
|
42
40
|
|
43
41
|
Then execute the script with:
|
@@ -62,19 +60,19 @@ http://imgur.com
|
|
62
60
|
|
63
61
|
## TODO
|
64
62
|
|
63
|
+
- Speed boost.
|
65
64
|
- Create a `broken_link_finder` executable.
|
66
65
|
- Add logger functionality (especially useful in the console during development).
|
67
|
-
- Update the `wgit` gem as soon as redirects are implemented.
|
68
66
|
|
69
67
|
## Development
|
70
68
|
|
71
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
69
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
72
70
|
|
73
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
71
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release[origin]`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
74
72
|
|
75
73
|
## Contributing
|
76
74
|
|
77
|
-
Bug reports and pull requests are welcome on GitHub
|
75
|
+
Bug reports and pull requests are welcome on [GitHub](https://github.com/michaeltelford/broken-link-finder).
|
78
76
|
|
79
77
|
## License
|
80
78
|
|
data/broken_link_finder.gemspec
CHANGED
@@ -20,8 +20,8 @@ module BrokenLinkFinder
|
|
20
20
|
def crawl_site(url)
|
21
21
|
clear_broken_links
|
22
22
|
url = Wgit::Url.new(url)
|
23
|
-
crawled_pages = []
|
24
23
|
|
24
|
+
crawled_pages = []
|
25
25
|
@crawler.crawl_site(url) do |doc|
|
26
26
|
# Ensure the given website url is valid.
|
27
27
|
raise "Invalid URL: #{url}" if doc.url == url and doc.empty?
|
@@ -32,8 +32,7 @@ module BrokenLinkFinder
|
|
32
32
|
|
33
33
|
# Get all page links and determine which are broken.
|
34
34
|
next unless doc
|
35
|
-
|
36
|
-
find_broken_links(doc.url, links)
|
35
|
+
find_broken_links(doc)
|
37
36
|
end
|
38
37
|
|
39
38
|
!@broken_links.empty?
|
@@ -50,8 +49,7 @@ module BrokenLinkFinder
|
|
50
49
|
raise "Invalid URL: #{url}" unless doc
|
51
50
|
|
52
51
|
# Get all page links and determine which are broken.
|
53
|
-
|
54
|
-
find_broken_links(url, links)
|
52
|
+
find_broken_links(doc)
|
55
53
|
|
56
54
|
!@broken_links.empty?
|
57
55
|
end
|
@@ -84,11 +82,12 @@ broken links...")
|
|
84
82
|
private
|
85
83
|
|
86
84
|
# Finds which links are broken and append the details to @broken_links.
|
87
|
-
def find_broken_links(
|
85
|
+
def find_broken_links(doc)
|
86
|
+
links = doc.internal_full_links + doc.external_links
|
88
87
|
links.each do |link|
|
89
88
|
ok = @crawler.crawl_url(link)
|
90
89
|
if not ok # a.k.a. if the link is broken...
|
91
|
-
append_broken_link(url, link)
|
90
|
+
append_broken_link(doc.url, link)
|
92
91
|
end
|
93
92
|
end
|
94
93
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -98,14 +98,14 @@ dependencies:
|
|
98
98
|
name: wgit
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- - "
|
101
|
+
- - ">"
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - "
|
108
|
+
- - ">"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
description: Finds a website's broken links using the 'wgit' gem and reports back
|
@@ -152,7 +152,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
152
152
|
version: '0'
|
153
153
|
requirements: []
|
154
154
|
rubyforge_project:
|
155
|
-
rubygems_version: 2.7.
|
155
|
+
rubygems_version: 2.7.6
|
156
156
|
signing_key:
|
157
157
|
specification_version: 4
|
158
158
|
summary: Finds a website's broken links and reports back to you with a summary.
|