broken_link_finder 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 134dfae975f01be474a18d86cfa173a8509a6548f3d48343b7c4dc26de40dc53
4
- data.tar.gz: bbf9d2d32bc016637c9dfb97ab7a74f8fe6d50ea21f4998d167ed226926ad12d
3
+ metadata.gz: 364ec155bda54b8757fbce6425c8978d8d17cd91618d5650dc14f8a63e712a2f
4
+ data.tar.gz: b5c0e405f159aaed54725042105c519e1e9d0e085bf40aad99b114200ec8713a
5
5
  SHA512:
6
- metadata.gz: 9698dda0c3a1b513c425060a29207df1e6b56c32aff9fbfd3ada97d4e57fcb77ee893073c4cd4a1af40d425bba9c6a47fbe23730b87f2cd85f3088f1afab1581
7
- data.tar.gz: 581f2acd266a151b567708f6fe7e522972367fbcf4db0f6e746105483766a8ba2d3bfc699434961c4a6291d71a722dc11f47ea299a80074aed89fe46388cca4d
6
+ metadata.gz: eaf1aa2ea2b4f9177561291a8b729e91610f9b2fbd8541ceb2fa96f039a667ffd546dd758b9e4edb4fba9cad25c55368cb4af95ffbe9d0fdc20c546e3d4e5f0b
7
+ data.tar.gz: 8c6d407f74d900553782d7aedcdc4027c14e436bdb73b2888830827da9e305456e653ad9398c8752e01c8aae381871eb239aee5d7394ea995c5b68b0cbd38404
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- broken_link_finder (0.4.1)
4
+ broken_link_finder (0.5.0)
5
5
  thor (= 0.20.3)
6
6
  thread (= 0.2)
7
- wgit (= 0.0.10)
7
+ wgit (= 0.0.12)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
@@ -36,9 +36,9 @@ GEM
36
36
  addressable (>= 2.3.6)
37
37
  crack (>= 0.3.2)
38
38
  hashdiff
39
- wgit (0.0.10)
39
+ wgit (0.0.12)
40
40
  mongo (~> 2.8.0)
41
- nokogiri (~> 1.10)
41
+ nokogiri (~> 1.10.3)
42
42
 
43
43
  PLATFORMS
44
44
  ruby
data/README.md CHANGED
@@ -4,6 +4,14 @@ Does what it says on the tin. Finds a website's broken links.
4
4
 
5
5
  Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a nice concise summary of the broken links found.
6
6
 
7
+ ## How It Works
8
+
9
+ Any page element with a `href` or `src` attribute is considered a link. For each link on a given page, any of the following conditions (in order) constitutes that the link is broken:
10
+
11
+ 1) A response status code of `404 Not Found` is returned.
12
+ 2) An empty HTML response body is returned.
13
+ 3) The HTML response body doesn't contain an element ID matching that of the link's anchor e.g. `http://server.com#about` must contain an element with an ID of `about` or the link is considered broken.
14
+
7
15
  ## Made Possible By
8
16
 
9
17
  This repository utilises the awesome `wgit` Ruby gem. See its [repository](https://github.com/michaeltelford/wgit) for more details.
@@ -74,7 +82,6 @@ http://imgur.com
74
82
 
75
83
  ## TODO
76
84
 
77
- - Improve the intelligence of the finder. Currently a custom 'Not Found' webpage will not be discovered as a broken link; it should.
78
85
  - Add logger functionality (especially useful in the console during development).
79
86
 
80
87
  ## Development
data/Rakefile CHANGED
@@ -7,4 +7,19 @@ Rake::TestTask.new(:test) do |t|
7
7
  t.test_files = FileList['test/**/*_test.rb']
8
8
  end
9
9
 
10
- task :default => :test
10
+ desc "Print help information"
11
+ task default: :help
12
+
13
+ desc "Print help information"
14
+ task :help do
15
+ system "bundle exec rake -D"
16
+ end
17
+
18
+ desc "Compile all project Ruby files with warnings."
19
+ task :compile do
20
+ paths = Dir["**/*.rb", "**/*.gemspec", 'exe/broken_link_finder']
21
+ paths.each do |f|
22
+ puts "\nCompiling #{f}..."
23
+ puts `ruby -cw #{f}`
24
+ end
25
+ end
@@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
13
13
  spec.description = "Finds a website's broken links using the 'wgit' gem and reports back to you with a summary."
14
14
  spec.homepage = "https://github.com/michaeltelford/broken-link-finder"
15
15
  spec.license = "MIT"
16
- spec.metadata = {
16
+ spec.metadata = {
17
17
  "source_code_uri" => "https://github.com/michaeltelford/broken-link-finder",
18
18
  }
19
19
 
@@ -43,7 +43,7 @@ Gem::Specification.new do |spec|
43
43
  spec.add_development_dependency "byebug", "~> 11.0"
44
44
  spec.add_development_dependency "webmock", "~> 3.5"
45
45
 
46
- spec.add_runtime_dependency "wgit", "0.0.10"
46
+ spec.add_runtime_dependency "wgit", "0.0.12"
47
47
  spec.add_runtime_dependency "thread", "0.2"
48
48
  spec.add_runtime_dependency "thor", "0.20.3"
49
49
  end
@@ -67,7 +67,7 @@ module BrokenLinkFinder
67
67
 
68
68
  # Pretty prints the contents of broken_links into a stream e.g. Kernel
69
69
  # (STDOUT) or a file - anything that respond_to? :puts.
70
- # Returns true if there were broken links and vice versa.
70
+ # Returns true if there were broken links and vice versa.
71
71
  def pretty_print_broken_links(stream = Kernel)
72
72
  raise "stream must respond_to? :puts" unless stream.respond_to? :puts
73
73
 
@@ -96,13 +96,24 @@ broken links...")
96
96
  def find_broken_links(doc)
97
97
  links = doc.internal_full_links + doc.external_links
98
98
  links.each do |link|
99
- ok = @crawler.crawl_url(link)
100
- if not ok # a.k.a. if the link is broken...
99
+ link_doc = @crawler.crawl_url(link)
100
+ if @crawler.last_response.is_a?(Net::HTTPNotFound) or
101
+ link_doc.nil? or
102
+ has_broken_anchor(link_doc)
101
103
  append_broken_link(doc.url, link)
102
104
  end
103
105
  end
104
106
  end
105
107
 
108
+ # Returns true if the link is/contains a broken anchor.
109
+ def has_broken_anchor(doc)
110
+ raise "link document is nil" unless doc
111
+ return false unless doc.url.anchor
112
+
113
+ anchor = doc.url.anchor[1..-1] # Remove the # prefix.
114
+ doc.xpath("//*[@id='#{anchor}']").empty?
115
+ end
116
+
106
117
  # Append url => [link] to @broken_links.
107
118
  def append_broken_link(url, link)
108
119
  @lock.synchronize do
@@ -1,3 +1,3 @@
1
1
  module BrokenLinkFinder
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: broken_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Telford
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-12 00:00:00.000000000 Z
11
+ date: 2019-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - '='
102
102
  - !ruby/object:Gem::Version
103
- version: 0.0.10
103
+ version: 0.0.12
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - '='
109
109
  - !ruby/object:Gem::Version
110
- version: 0.0.10
110
+ version: 0.0.12
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: thread
113
113
  requirement: !ruby/object:Gem::Requirement