broken_link_finder 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 134dfae975f01be474a18d86cfa173a8509a6548f3d48343b7c4dc26de40dc53
4
- data.tar.gz: bbf9d2d32bc016637c9dfb97ab7a74f8fe6d50ea21f4998d167ed226926ad12d
3
+ metadata.gz: 364ec155bda54b8757fbce6425c8978d8d17cd91618d5650dc14f8a63e712a2f
4
+ data.tar.gz: b5c0e405f159aaed54725042105c519e1e9d0e085bf40aad99b114200ec8713a
5
5
  SHA512:
6
- metadata.gz: 9698dda0c3a1b513c425060a29207df1e6b56c32aff9fbfd3ada97d4e57fcb77ee893073c4cd4a1af40d425bba9c6a47fbe23730b87f2cd85f3088f1afab1581
7
- data.tar.gz: 581f2acd266a151b567708f6fe7e522972367fbcf4db0f6e746105483766a8ba2d3bfc699434961c4a6291d71a722dc11f47ea299a80074aed89fe46388cca4d
6
+ metadata.gz: eaf1aa2ea2b4f9177561291a8b729e91610f9b2fbd8541ceb2fa96f039a667ffd546dd758b9e4edb4fba9cad25c55368cb4af95ffbe9d0fdc20c546e3d4e5f0b
7
+ data.tar.gz: 8c6d407f74d900553782d7aedcdc4027c14e436bdb73b2888830827da9e305456e653ad9398c8752e01c8aae381871eb239aee5d7394ea995c5b68b0cbd38404
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- broken_link_finder (0.4.1)
4
+ broken_link_finder (0.5.0)
5
5
  thor (= 0.20.3)
6
6
  thread (= 0.2)
7
- wgit (= 0.0.10)
7
+ wgit (= 0.0.12)
8
8
 
9
9
  GEM
10
10
  remote: https://rubygems.org/
@@ -36,9 +36,9 @@ GEM
36
36
  addressable (>= 2.3.6)
37
37
  crack (>= 0.3.2)
38
38
  hashdiff
39
- wgit (0.0.10)
39
+ wgit (0.0.12)
40
40
  mongo (~> 2.8.0)
41
- nokogiri (~> 1.10)
41
+ nokogiri (~> 1.10.3)
42
42
 
43
43
  PLATFORMS
44
44
  ruby
data/README.md CHANGED
@@ -4,6 +4,14 @@ Does what it says on the tin. Finds a website's broken links.
4
4
 
5
5
  Simply point it at a website and it will crawl all of its webpages searching for and identifing any broken links. You will then be presented with a nice concise summary of the broken links found.
6
6
 
7
+ ## How It Works
8
+
9
+ Any page element with a `href` or `src` attribute is considered a link. For each link on a given page, any of the following conditions (in order) constitutes that the link is broken:
10
+
11
+ 1) A response status code of `404 Not Found` is returned.
12
+ 2) An empty HTML response body is returned.
13
+ 3) The HTML response body doesn't contain an element ID matching that of the link's anchor e.g. `http://server.com#about` must contain an element with an ID of `about` or the link is considered broken.
14
+
7
15
  ## Made Possible By
8
16
 
9
17
  This repository utilises the awesome `wgit` Ruby gem. See its [repository](https://github.com/michaeltelford/wgit) for more details.
@@ -74,7 +82,6 @@ http://imgur.com
74
82
 
75
83
  ## TODO
76
84
 
77
- - Improve the intelligence of the finder. Currently a custom 'Not Found' webpage will not be discovered as a broken link; it should.
78
85
  - Add logger functionality (especially useful in the console during development).
79
86
 
80
87
  ## Development
data/Rakefile CHANGED
@@ -7,4 +7,19 @@ Rake::TestTask.new(:test) do |t|
7
7
  t.test_files = FileList['test/**/*_test.rb']
8
8
  end
9
9
 
10
- task :default => :test
10
+ desc "Print help information"
11
+ task default: :help
12
+
13
+ desc "Print help information"
14
+ task :help do
15
+ system "bundle exec rake -D"
16
+ end
17
+
18
+ desc "Compile all project Ruby files with warnings."
19
+ task :compile do
20
+ paths = Dir["**/*.rb", "**/*.gemspec", 'exe/broken_link_finder']
21
+ paths.each do |f|
22
+ puts "\nCompiling #{f}..."
23
+ puts `ruby -cw #{f}`
24
+ end
25
+ end
@@ -13,7 +13,7 @@ Gem::Specification.new do |spec|
13
13
  spec.description = "Finds a website's broken links using the 'wgit' gem and reports back to you with a summary."
14
14
  spec.homepage = "https://github.com/michaeltelford/broken-link-finder"
15
15
  spec.license = "MIT"
16
- spec.metadata = {
16
+ spec.metadata = {
17
17
  "source_code_uri" => "https://github.com/michaeltelford/broken-link-finder",
18
18
  }
19
19
 
@@ -43,7 +43,7 @@ Gem::Specification.new do |spec|
43
43
  spec.add_development_dependency "byebug", "~> 11.0"
44
44
  spec.add_development_dependency "webmock", "~> 3.5"
45
45
 
46
- spec.add_runtime_dependency "wgit", "0.0.10"
46
+ spec.add_runtime_dependency "wgit", "0.0.12"
47
47
  spec.add_runtime_dependency "thread", "0.2"
48
48
  spec.add_runtime_dependency "thor", "0.20.3"
49
49
  end
@@ -67,7 +67,7 @@ module BrokenLinkFinder
67
67
 
68
68
  # Pretty prints the contents of broken_links into a stream e.g. Kernel
69
69
  # (STDOUT) or a file - anything that respond_to? :puts.
70
- # Returns true if there were broken links and vice versa.
70
+ # Returns true if there were broken links and vice versa.
71
71
  def pretty_print_broken_links(stream = Kernel)
72
72
  raise "stream must respond_to? :puts" unless stream.respond_to? :puts
73
73
 
@@ -96,13 +96,24 @@ broken links...")
96
96
  def find_broken_links(doc)
97
97
  links = doc.internal_full_links + doc.external_links
98
98
  links.each do |link|
99
- ok = @crawler.crawl_url(link)
100
- if not ok # a.k.a. if the link is broken...
99
+ link_doc = @crawler.crawl_url(link)
100
+ if @crawler.last_response.is_a?(Net::HTTPNotFound) or
101
+ link_doc.nil? or
102
+ has_broken_anchor(link_doc)
101
103
  append_broken_link(doc.url, link)
102
104
  end
103
105
  end
104
106
  end
105
107
 
108
+ # Returns true if the link is/contains a broken anchor.
109
+ def has_broken_anchor(doc)
110
+ raise "link document is nil" unless doc
111
+ return false unless doc.url.anchor
112
+
113
+ anchor = doc.url.anchor[1..-1] # Remove the # prefix.
114
+ doc.xpath("//*[@id='#{anchor}']").empty?
115
+ end
116
+
106
117
  # Append url => [link] to @broken_links.
107
118
  def append_broken_link(url, link)
108
119
  @lock.synchronize do
@@ -1,3 +1,3 @@
1
1
  module BrokenLinkFinder
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: broken_link_finder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Telford
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-12 00:00:00.000000000 Z
11
+ date: 2019-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - '='
102
102
  - !ruby/object:Gem::Version
103
- version: 0.0.10
103
+ version: 0.0.12
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - '='
109
109
  - !ruby/object:Gem::Version
110
- version: 0.0.10
110
+ version: 0.0.12
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: thread
113
113
  requirement: !ruby/object:Gem::Requirement