robinson 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/Gemfile +5 -0
  2. data/bin/robinson +3 -0
  3. data/lib/robinson.rb +157 -0
  4. metadata +70 -0
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ # A sample Gemfile
2
+ source "https://rubygems.org"
3
+
4
+ gem "smart_colored"
5
+ gem "anemone"
data/bin/robinson ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require 'robinson'
3
+ robinson_main(ARGV)
data/lib/robinson.rb ADDED
@@ -0,0 +1,157 @@
1
+ #!/bin/ruby
2
+
3
+ class Invocation
4
+ def initialize(args)
5
+ @address = args.first || ''
6
+ @ignoring_pages = args.include? '--ignoring'
7
+ @ignored_pages = args.slice(2..(args.size)) || []
8
+ end
9
+
10
+ def execute
11
+ check_args
12
+ Robinson.crawl @address, @ignored_pages
13
+ end
14
+
15
+ private
16
+ def check_args
17
+ if @address.include? '/' then usage('only accepts website server host[:port], not paths') end
18
+ if @address.empty? then usage('you need to pass in the website server host[:port]') end
19
+ if @ignoring_pages
20
+ if @ignored_pages.empty? then usage('you need to specify the paths of the pages to ignore') end
21
+ @ignored_pages.each { |path|
22
+ if !path.start_with?('/') then usage("the ignored pages' paths must all start with / character") end
23
+ }
24
+ end
25
+ end
26
+
27
+ def usage weirdness = ''
28
+ if weirdness.length > 0
29
+ puts "\nSorry, #{weirdness}\n\n"
30
+ end
31
+ puts "Usage: ./robinson <host>[:<port>] [--ignoring <ignorepath> [...]"
32
+ puts " e.g. ./robinson www.example.com"
33
+ puts " e.g. ./robinson localhost:8080 --ignoring /blogfeed /external_content"
34
+ exit 1
35
+ end
36
+ end
37
+
38
+
39
+
40
+ require 'anemone'
41
+ require 'smart_colored'
42
+
43
+ class Link
44
+ def initialize(uri)
45
+ @uri = uri
46
+ end
47
+ def on_website?(address)
48
+ #puts "#{host_and_port} vs #{host_and_port_of(address)}"
49
+ host_and_port == host_and_port_of(address)
50
+ end
51
+ private
52
+ def host_and_port_of(address)
53
+ address.include?(':') ? address : address + ':80'
54
+ end
55
+ def host_and_port
56
+ @uri.host + ':' + @uri.port.to_s
57
+ end
58
+ end
59
+
60
+ class Reporter
61
+ def on_see_link(uri)
62
+ end
63
+ def on_visit(page)
64
+ page.puts
65
+ end
66
+ def exit_code
67
+ 0
68
+ end
69
+ end
70
+
71
+ class NoisyReporter < Reporter
72
+ def on_see_link(uri)
73
+ puts "seen: #{uri}"
74
+ end
75
+ end
76
+
77
+ class InvestigativeReporter < Reporter
78
+ def initialize
79
+ @broken = []
80
+ @ok = []
81
+ end
82
+ def on_visit(page)
83
+ if page.broken?
84
+ @broken << page
85
+ else
86
+ @ok << page
87
+ end
88
+ page.puts
89
+ end
90
+ def exit_code
91
+ @broken.empty? ? success : failure
92
+ end
93
+ def success
94
+ puts "\nAll links (#{@ok.size}) check out OK."
95
+ 0
96
+ end
97
+ def failure
98
+ puts "\nBroken links (#{@broken.size} out of #{@ok.size}):"
99
+ @broken.each { |page| page.puts }
100
+ @broken.size
101
+ end
102
+ end
103
+
104
+ class Page
105
+ def initialize(anemone_page)
106
+ @page = anemone_page
107
+ end
108
+ def puts
109
+ if unfetchable?
110
+ $stdout.puts(pagelog("BROKEN!!", 'no http response').colored.red)
111
+ elsif broken?
112
+ $stdout.puts(pagelog("BROKEN!!", @page.code).colored.red)
113
+ else
114
+ $stdout.puts(pagelog("checked", @page.code).colored.green)
115
+ end
116
+ end
117
+ def pagelog(what, code)
118
+ "#{what}: #{@page.url} - #{code} (referer: #{@page.referer})"
119
+ end
120
+ def unfetchable?
121
+ @page.code.nil?
122
+ end
123
+ def broken?
124
+ if unfetchable?
125
+ return true
126
+ end
127
+ @page.code >= 400
128
+ end
129
+ end
130
+
131
+ class Robinson
132
+ def self.crawl(address, ignored_paths = [], reporter = InvestigativeReporter.new)
133
+ puts "Website server to check: '#{address}', ignoring paths '#{ignored_paths.join(', ')}' - NB. only internal links will be checked"
134
+ Anemone.crawl("http://#{address}") do |anemone|
135
+ anemone.focus_crawl { |page|
136
+ page.links.each { |link| reporter.on_see_link(link) }
137
+ links = page.links.select { |uri|
138
+ link = Link.new(uri)
139
+ link.on_website?(address) && !ignored_paths.include?(uri.path)
140
+ }
141
+ links
142
+ }
143
+ anemone.on_every_page { |anemone_page|
144
+ reporter.on_visit Page.new(anemone_page)
145
+ }
146
+ end
147
+ exit_code = reporter.exit_code
148
+ puts "finished (#{exit_code})"
149
+ exit(exit_code)
150
+ end
151
+ end
152
+
153
+ def robinson_main(arguments)
154
+ Invocation.new(arguments).execute
155
+ end
156
+
157
+
metadata ADDED
@@ -0,0 +1,70 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: robinson
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - damned
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-03-11 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: anemone
16
+ requirement: &18383740 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *18383740
25
+ - !ruby/object:Gem::Dependency
26
+ name: smart_colored
27
+ requirement: &18382440 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *18382440
36
+ description:
37
+ email:
38
+ executables:
39
+ - robinson
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - Gemfile
44
+ - lib/robinson.rb
45
+ - bin/robinson
46
+ homepage:
47
+ licenses: []
48
+ post_install_message:
49
+ rdoc_options: []
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ none: false
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ none: false
60
+ requirements:
61
+ - - ! '>='
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ requirements: []
65
+ rubyforge_project:
66
+ rubygems_version: 1.8.11
67
+ signing_key:
68
+ specification_version: 3
69
+ summary: A working website link checker
70
+ test_files: []