broken_link_finder 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +7 -9
- data/broken_link_finder.gemspec +1 -1
- data/lib/broken_link_finder/finder.rb +6 -7
- data/lib/broken_link_finder/version.rb +1 -1
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b9617c6016a37c3afeec665d3f5d70e8574e8cf28100f4c661ee95fd7f7ccee
|
4
|
+
data.tar.gz: cf517abd6b554f3cdf01b535dfd4417dd226fdde8edae4556190b0da69cd3bda
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 691714c983790ff8d8a004e3c92f1edae1a50e44e4771f123d177904364c65679c034580a3b182209a18b2baa83277b88c571e0cddcf131c638fcc106cc031a5
|
7
|
+
data.tar.gz: '0093753cea2740b13b5b8fda6dde5046e76ce4e8e7cf93e3be7ae99cf7bd6e6aa164218105fe81b4a35ddf4622717f01d69455be9dc65b738e7ae32c33341c32'
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
broken_link_finder (0.2.
|
5
|
-
wgit
|
4
|
+
broken_link_finder (0.2.1)
|
5
|
+
wgit (> 0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
@@ -32,7 +32,7 @@ GEM
|
|
32
32
|
addressable (>= 2.3.6)
|
33
33
|
crack (>= 0.3.2)
|
34
34
|
hashdiff
|
35
|
-
wgit (0.0.
|
35
|
+
wgit (0.0.9)
|
36
36
|
mongo (~> 2.6)
|
37
37
|
nokogiri (~> 1.10)
|
38
38
|
|
data/README.md
CHANGED
@@ -8,8 +8,6 @@ Simply point it at a website and it will crawl all of its webpages searching for
|
|
8
8
|
|
9
9
|
This repository utilises the awesome `wgit` Ruby gem. See its [repository](https://github.com/michaeltelford/wgit) for more details.
|
10
10
|
|
11
|
-
The only gotcha is that `wgit` doesn't currently follow redirects meaning they will appear as broken links in the results.
|
12
|
-
|
13
11
|
## Installation
|
14
12
|
|
15
13
|
Add this line to your application's Gemfile:
|
@@ -28,7 +26,7 @@ Or install it yourself as:
|
|
28
26
|
|
29
27
|
## Usage
|
30
28
|
|
31
|
-
Below is a
|
29
|
+
Below is a simple script which crawls a website and outputs its broken links to STDOUT.
|
32
30
|
|
33
31
|
> main.rb
|
34
32
|
|
@@ -36,8 +34,8 @@ Below is a sample script which crawls a website and outputs its broken links to
|
|
36
34
|
require 'broken_link_finder'
|
37
35
|
|
38
36
|
finder = BrokenLinkFinder::Finder.new
|
39
|
-
finder.crawl_site "http://txti.es" # Also, see Finder#
|
40
|
-
finder.pretty_print_broken_links
|
37
|
+
finder.crawl_site "http://txti.es" # Also, see Finder#crawl_page for a single webpage.
|
38
|
+
finder.pretty_print_broken_links # Also, see Finder#broken_links for a Hash.
|
41
39
|
```
|
42
40
|
|
43
41
|
Then execute the script with:
|
@@ -62,19 +60,19 @@ http://imgur.com
|
|
62
60
|
|
63
61
|
## TODO
|
64
62
|
|
63
|
+
- Speed boost.
|
65
64
|
- Create a `broken_link_finder` executable.
|
66
65
|
- Add logger functionality (especially useful in the console during development).
|
67
|
-
- Update the `wgit` gem as soon as redirects are implemented.
|
68
66
|
|
69
67
|
## Development
|
70
68
|
|
71
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
69
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bundle exec rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
72
70
|
|
73
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
71
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release[origin]`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
74
72
|
|
75
73
|
## Contributing
|
76
74
|
|
77
|
-
Bug reports and pull requests are welcome on GitHub
|
75
|
+
Bug reports and pull requests are welcome on [GitHub](https://github.com/michaeltelford/broken-link-finder).
|
78
76
|
|
79
77
|
## License
|
80
78
|
|
data/broken_link_finder.gemspec
CHANGED
@@ -20,8 +20,8 @@ module BrokenLinkFinder
|
|
20
20
|
def crawl_site(url)
|
21
21
|
clear_broken_links
|
22
22
|
url = Wgit::Url.new(url)
|
23
|
-
crawled_pages = []
|
24
23
|
|
24
|
+
crawled_pages = []
|
25
25
|
@crawler.crawl_site(url) do |doc|
|
26
26
|
# Ensure the given website url is valid.
|
27
27
|
raise "Invalid URL: #{url}" if doc.url == url and doc.empty?
|
@@ -32,8 +32,7 @@ module BrokenLinkFinder
|
|
32
32
|
|
33
33
|
# Get all page links and determine which are broken.
|
34
34
|
next unless doc
|
35
|
-
|
36
|
-
find_broken_links(doc.url, links)
|
35
|
+
find_broken_links(doc)
|
37
36
|
end
|
38
37
|
|
39
38
|
!@broken_links.empty?
|
@@ -50,8 +49,7 @@ module BrokenLinkFinder
|
|
50
49
|
raise "Invalid URL: #{url}" unless doc
|
51
50
|
|
52
51
|
# Get all page links and determine which are broken.
|
53
|
-
|
54
|
-
find_broken_links(url, links)
|
52
|
+
find_broken_links(doc)
|
55
53
|
|
56
54
|
!@broken_links.empty?
|
57
55
|
end
|
@@ -84,11 +82,12 @@ broken links...")
|
|
84
82
|
private
|
85
83
|
|
86
84
|
# Finds which links are broken and append the details to @broken_links.
|
87
|
-
def find_broken_links(
|
85
|
+
def find_broken_links(doc)
|
86
|
+
links = doc.internal_full_links + doc.external_links
|
88
87
|
links.each do |link|
|
89
88
|
ok = @crawler.crawl_url(link)
|
90
89
|
if not ok # a.k.a. if the link is broken...
|
91
|
-
append_broken_link(url, link)
|
90
|
+
append_broken_link(doc.url, link)
|
92
91
|
end
|
93
92
|
end
|
94
93
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: broken_link_finder
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Telford
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-06-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -98,14 +98,14 @@ dependencies:
|
|
98
98
|
name: wgit
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- - "
|
101
|
+
- - ">"
|
102
102
|
- !ruby/object:Gem::Version
|
103
103
|
version: '0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- - "
|
108
|
+
- - ">"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
description: Finds a website's broken links using the 'wgit' gem and reports back
|
@@ -152,7 +152,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
152
152
|
version: '0'
|
153
153
|
requirements: []
|
154
154
|
rubyforge_project:
|
155
|
-
rubygems_version: 2.7.
|
155
|
+
rubygems_version: 2.7.6
|
156
156
|
signing_key:
|
157
157
|
specification_version: 4
|
158
158
|
summary: Finds a website's broken links and reports back to you with a summary.
|