broken_link_finder 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 87ed607a6fa9669142546f0af4d62c11fb8767ae25c54f4c27373cc823706368
4
+ data.tar.gz: f90f38d18482eed566995288b669cfc71c0d66fe177e44850e8732bd7a6532b5
5
+ SHA512:
6
+ metadata.gz: 7450e7671c5559de801e3c8f8eae7c01ae0c37bfb7c1a476f95639ea987139d7638fc14655eb4b3948f87b3021c1716e3842a902bc09c46413e1a16529cdf870
7
+ data.tar.gz: 3b89341b5c35de1ed61b1ab39d324d5f7c33d5d69597f87286710921f4aff7b20ce3368b5889d446895ea8cc7b1ad2ecc9630ced62310fef56ef743457a2dcb1
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ **/spike.rb
10
+ main.rb
11
+ .DS_Store
12
+ .byebug_history
@@ -0,0 +1 @@
1
+ 2.5.3
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.2.2
5
+ before_install: gem install bundler -v 1.13.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in broken_link_finder.gemspec
4
+ gemspec
@@ -0,0 +1,52 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ broken_link_finder (0.2.0)
5
+ wgit
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ addressable (2.6.0)
11
+ public_suffix (>= 2.0.2, < 4.0)
12
+ bson (4.5.0)
13
+ byebug (11.0.1)
14
+ coderay (1.1.2)
15
+ crack (0.4.3)
16
+ safe_yaml (~> 1.0.0)
17
+ hashdiff (0.4.0)
18
+ method_source (0.9.2)
19
+ mini_portile2 (2.4.0)
20
+ minitest (5.11.3)
21
+ mongo (2.8.0)
22
+ bson (>= 4.4.2, < 5.0.0)
23
+ nokogiri (1.10.3)
24
+ mini_portile2 (~> 2.4.0)
25
+ pry (0.12.2)
26
+ coderay (~> 1.1.0)
27
+ method_source (~> 0.9.0)
28
+ public_suffix (3.1.0)
29
+ rake (10.5.0)
30
+ safe_yaml (1.0.5)
31
+ webmock (3.5.1)
32
+ addressable (>= 2.3.6)
33
+ crack (>= 0.3.2)
34
+ hashdiff
35
+ wgit (0.0.8)
36
+ mongo (~> 2.6)
37
+ nokogiri (~> 1.10)
38
+
39
+ PLATFORMS
40
+ ruby
41
+
42
+ DEPENDENCIES
43
+ broken_link_finder!
44
+ bundler (~> 2.0)
45
+ byebug (~> 11.0)
46
+ minitest (~> 5.0)
47
+ pry (~> 0.12)
48
+ rake (~> 10.0)
49
+ webmock (~> 3.5)
50
+
51
+ BUNDLED WITH
52
+ 2.0.1
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2017 Michael Telford
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,81 @@
1
+ # Broken Link Finder
2
+
3
+ Does what it says on the tin. Finds a website's broken links.
4
+
5
+ Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a nice concise summary of the broken links found.
6
+
7
+ ## Made Possible By
8
+
9
+ This repository utilises the awesome `wgit` Ruby gem. See its [repository](https://github.com/michaeltelford/wgit) for more details.
10
+
11
+ The only gotcha is that `wgit` doesn't currently follow redirects meaning they will appear as broken links in the results.
12
+
13
+ ## Installation
14
+
15
+ Add this line to your application's Gemfile:
16
+
17
+ ```ruby
18
+ gem 'broken_link_finder'
19
+ ```
20
+
21
+ And then execute:
22
+
23
+ $ bundle
24
+
25
+ Or install it yourself as:
26
+
27
+ $ gem install broken_link_finder
28
+
29
+ ## Usage
30
+
31
+ Below is a sample script which crawls a website and outputs its broken links to a file.
32
+
33
+ > main.rb
34
+
35
+ ```ruby
36
+ require 'broken_link_finder'
37
+
38
+ finder = BrokenLinkFinder::Finder.new
39
+ finder.crawl_site "http://txti.es" # Also, see Finder#crawl_url for a single webpage.
40
+ finder.pretty_print_broken_links
41
+ ```
42
+
43
+ Then execute the script with:
44
+
45
+ $ ruby main.rb
46
+
47
+ The output should look something like:
48
+
49
+ ```text
50
+ Below is a breakdown of the different pages and their broken links...
51
+
52
+ The following broken links exist in http://txti.es/about:
53
+ http://twitter.com/thebarrytone
54
+ http://twitter.com/nwbld
55
+ http://twitter.com/txties
56
+ https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=84L4BDS86FBUU
57
+
58
+ The following broken links exist in http://txti.es/how:
59
+ http://en.wikipedia.org/wiki/Markdown
60
+ http://imgur.com
61
+ ```
62
+
63
+ ## TODO
64
+
65
+ - Create a `broken_link_finder` executable.
66
+ - Add logger functionality (especially useful in the console during development).
67
+ - Update the `wgit` gem as soon as redirects are implemented.
68
+
69
+ ## Development
70
+
71
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
72
+
73
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
74
+
75
+ ## Contributing
76
+
77
+ Bug reports and pull requests are welcome on GitHub [here](https://github.com/michaeltelford/broken-link-finder).
78
+
79
+ ## License
80
+
81
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+
4
+ Rake::TestTask.new(:test) do |t|
5
+ t.libs << "test"
6
+ t.libs << "lib"
7
+ t.test_files = FileList['test/**/*_test.rb']
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "pry"
5
+ require "broken_link_finder"
6
+ require 'wgit/core_ext'
7
+
8
+ # Call reload to load all recent code changes.
9
+ def reload
10
+ original_verbose = $VERBOSE
11
+ $VERBOSE = nil # Temporarily suppress ruby warnings.
12
+ load 'load.rb'
13
+ include BrokenLinkFinder
14
+ $VERBOSE = original_verbose # Restore ruby warnings.
15
+ true
16
+ end
17
+
18
+ # You can add fixtures and/or initialization code here...
19
+ reload
20
+ url = "http://txti.es/"
21
+ finder = Finder.new
22
+
23
+ binding.pry
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,46 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'broken_link_finder/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "broken_link_finder"
8
+ spec.version = BrokenLinkFinder::VERSION
9
+ spec.author = "Michael Telford"
10
+ spec.email = "michael.telford@live.com"
11
+
12
+ spec.summary = "Finds a website's broken links and reports back to you with a summary."
13
+ spec.description = "Finds a website's broken links using the 'wgit' gem and reports back to you with a summary."
14
+ spec.homepage = "https://github.com/michaeltelford/broken-link-finder"
15
+ spec.license = "MIT"
16
+ spec.metadata = {
17
+ "source_code_uri" => "https://github.com/michaeltelford/broken-link-finder",
18
+ }
19
+
20
+ # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
21
+ # to allow pushing to a single host or delete this section to allow pushing to any host.
22
+ if spec.respond_to?(:metadata)
23
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
24
+ else
25
+ raise "RubyGems 2.0 or newer is required to protect against " \
26
+ "public gem pushes."
27
+ end
28
+
29
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
30
+ f.match(%r{^(test|spec|features)/})
31
+ end
32
+ spec.bindir = "bin"
33
+ # spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
34
+ spec.require_paths = ["lib"]
35
+
36
+ spec.required_ruby_version = '~> 2.5' # Only works with ruby 2.5.x
37
+
38
+ spec.add_development_dependency "bundler", "~> 2.0"
39
+ spec.add_development_dependency "rake", "~> 10.0"
40
+ spec.add_development_dependency "minitest", "~> 5.0"
41
+ spec.add_development_dependency "pry", "~> 0.12"
42
+ spec.add_development_dependency "byebug", "~> 11.0"
43
+ spec.add_development_dependency "webmock", "~> 3.5"
44
+
45
+ spec.add_runtime_dependency "wgit"
46
+ end
@@ -0,0 +1,2 @@
1
+ require_relative "./broken_link_finder/version"
2
+ require_relative "./broken_link_finder/finder"
@@ -0,0 +1,106 @@
1
+ require 'wgit'
2
+
3
+ module BrokenLinkFinder
4
+ class Finder
5
+ attr_reader :broken_links
6
+
7
+ # Create a new Finder instance.
8
+ def initialize
9
+ @broken_links = {}
10
+ @crawler = Wgit::Crawler.new
11
+ end
12
+
13
+ # Clear/empty the @broken_links Hash.
14
+ def clear_broken_links
15
+ @broken_links = {}
16
+ end
17
+
18
+ # Finds broken links within an entire site and appends them to the
19
+ # @broken_links array.
20
+ def crawl_site(url)
21
+ clear_broken_links
22
+ url = Wgit::Url.new(url)
23
+ crawled_pages = []
24
+
25
+ @crawler.crawl_site(url) do |doc|
26
+ # Ensure the given website url is valid.
27
+ raise "Invalid URL: #{url}" if doc.url == url and doc.empty?
28
+
29
+ # Ensure we only process each page once.
30
+ next if crawled_pages.include?(doc.url)
31
+ crawled_pages << doc.url
32
+
33
+ # Get all page links and determine which are broken.
34
+ next unless doc
35
+ links = doc.internal_full_links + doc.external_links
36
+ find_broken_links(doc.url, links)
37
+ end
38
+
39
+ !@broken_links.empty?
40
+ end
41
+
42
+ # Finds broken links within a single page and appends them to the
43
+ # @broken_links array.
44
+ def crawl_url(url)
45
+ clear_broken_links
46
+ url = Wgit::Url.new(url)
47
+
48
+ # Ensure the given page url is valid.
49
+ doc = @crawler.crawl_url(url)
50
+ raise "Invalid URL: #{url}" unless doc
51
+
52
+ # Get all page links and determine which are broken.
53
+ links = doc.internal_full_links + doc.external_links
54
+ find_broken_links(url, links)
55
+
56
+ !@broken_links.empty?
57
+ end
58
+
59
+ # Pretty prints the contents of broken_links into a stream e.g. Kernel
60
+ # (STDOUT) or a file.
61
+ # Returns true if there were broken links and vice versa.
62
+ def pretty_print_broken_links(stream = Kernel)
63
+ raise "stream must respond_to? :puts" unless stream.respond_to? :puts
64
+
65
+ if (@broken_links.empty?)
66
+ stream.puts("Good news, there are no broken links!")
67
+ false
68
+ else
69
+ stream.puts("Below is a breakdown of the different pages and their \
70
+ broken links...")
71
+ stream.puts("")
72
+
73
+ @broken_links.each do |page, links|
74
+ stream.puts("The following broken links exist in #{page}:")
75
+ links.each do |link|
76
+ stream.puts(link)
77
+ end
78
+ stream.puts("")
79
+ end
80
+ true
81
+ end
82
+ end
83
+
84
+ private
85
+
86
+ # Finds which links are broken and append the details to @broken_links.
87
+ def find_broken_links(url, links)
88
+ links.each do |link|
89
+ ok = @crawler.crawl_url(link)
90
+ if not ok # a.k.a. if the link is broken...
91
+ append_broken_link(url, link)
92
+ end
93
+ end
94
+ end
95
+
96
+ # Append url => [link] to @broken_links.
97
+ def append_broken_link(url, link)
98
+ unless @broken_links[url]
99
+ @broken_links[url] = []
100
+ end
101
+ @broken_links[url] << link
102
+ end
103
+
104
+ alias_method :crawl_page, :crawl_url
105
+ end
106
+ end
@@ -0,0 +1,3 @@
1
+ module BrokenLinkFinder
2
+ VERSION = "0.2.0"
3
+ end
data/load.rb ADDED
@@ -0,0 +1,2 @@
1
+ load "./lib/broken_link_finder/version.rb"
2
+ load "./lib/broken_link_finder/finder.rb"
metadata ADDED
@@ -0,0 +1,159 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: broken_link_finder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Telford
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-05-31 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '2.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: minitest
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '5.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '5.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.12'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.12'
69
+ - !ruby/object:Gem::Dependency
70
+ name: byebug
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '11.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '11.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: webmock
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.5'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.5'
97
+ - !ruby/object:Gem::Dependency
98
+ name: wgit
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Finds a website's broken links using the 'wgit' gem and reports back
112
+ to you with a summary.
113
+ email: michael.telford@live.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - ".ruby-version"
120
+ - ".travis.yml"
121
+ - Gemfile
122
+ - Gemfile.lock
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - bin/console
127
+ - bin/setup
128
+ - broken_link_finder.gemspec
129
+ - lib/broken_link_finder.rb
130
+ - lib/broken_link_finder/finder.rb
131
+ - lib/broken_link_finder/version.rb
132
+ - load.rb
133
+ homepage: https://github.com/michaeltelford/broken-link-finder
134
+ licenses:
135
+ - MIT
136
+ metadata:
137
+ source_code_uri: https://github.com/michaeltelford/broken-link-finder
138
+ allowed_push_host: https://rubygems.org
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - "~>"
146
+ - !ruby/object:Gem::Version
147
+ version: '2.5'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubyforge_project:
155
+ rubygems_version: 2.7.8
156
+ signing_key:
157
+ specification_version: 4
158
+ summary: Finds a website's broken links and reports back to you with a summary.
159
+ test_files: []