ulink-checker 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +7 -0
- data/bin/ulink-checker +161 -0
- data/lib/duration.rb +18 -0
- data/lib/link_checker_override.rb +49 -0
- data/readme.md +22 -0
- data/ulink-checker.gemspec +19 -0
- metadata +67 -0
data/Gemfile
ADDED
data/bin/ulink-checker
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
#!/usr/bin/env ruby -W0
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.dirname(__FILE__)
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'net/http'
|
8
|
+
require 'rainbow'
|
9
|
+
require 'link_checker'
|
10
|
+
require 'optparse'
|
11
|
+
|
12
|
+
require 'link_checker_override.rb'
|
13
|
+
require 'duration.rb'
|
14
|
+
|
15
|
+
errors=0
|
16
|
+
fails=0
|
17
|
+
oks=0
|
18
|
+
warnings=0
|
19
|
+
redirects=0
|
20
|
+
starttime = Time.now
|
21
|
+
|
22
|
+
doc = nil
|
23
|
+
|
24
|
+
|
25
|
+
# ======================================== read commandline parameters
|
26
|
+
def processCommandLine
|
27
|
+
options = {}
|
28
|
+
|
29
|
+
optparse = OptionParser.new do |opts|
|
30
|
+
opts.banner = "ulink-checker v0.0.1"
|
31
|
+
|
32
|
+
#options[:followXIInclude] = false
|
33
|
+
#opts.on( '-x', '--xiinclude', 'Follow' ) do
|
34
|
+
# options[:getSecurityLevels] = true
|
35
|
+
#end
|
36
|
+
|
37
|
+
options[:url] = ""
|
38
|
+
opts.on( '-u URL', '--url URL', 'check XML at URL (not implemented yet)' ) do | url |
|
39
|
+
options[:url] = url
|
40
|
+
end
|
41
|
+
|
42
|
+
options[:file] = ""
|
43
|
+
opts.on( '-f FILE', '--file FILE', 'check XML from file' ) do | file |
|
44
|
+
options[:file] = file
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
48
|
+
puts opts
|
49
|
+
exit
|
50
|
+
end
|
51
|
+
|
52
|
+
if (ARGV.size == 0) then
|
53
|
+
puts opts
|
54
|
+
exit
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
begin
|
60
|
+
optparse.parse!
|
61
|
+
rescue => error
|
62
|
+
puts "ERROR PARSING COMMANDLINE:"
|
63
|
+
puts "\t"+error.to_s
|
64
|
+
puts
|
65
|
+
puts optparse
|
66
|
+
exit
|
67
|
+
end
|
68
|
+
|
69
|
+
# if they specify URL then too bad!
|
70
|
+
if (options[:url] != "") then
|
71
|
+
puts "SORRY: URLs are not supported yet"
|
72
|
+
puts
|
73
|
+
puts optparse
|
74
|
+
exit
|
75
|
+
end
|
76
|
+
|
77
|
+
#can't specify both url and file, only one
|
78
|
+
if (options[:url] != "" and option[:file] != "") then
|
79
|
+
puts "ERROR: can only specify one of file or URL."
|
80
|
+
puts
|
81
|
+
puts optparse
|
82
|
+
exit
|
83
|
+
end
|
84
|
+
|
85
|
+
return options
|
86
|
+
end
|
87
|
+
|
88
|
+
options = processCommandLine
|
89
|
+
|
90
|
+
# ========================================
|
91
|
+
|
92
|
+
if (options[:file] != "") then
|
93
|
+
puts "Checking URLs in #{options[:file]} ...\n\n"
|
94
|
+
|
95
|
+
File.open(options[:file]) do |fp|
|
96
|
+
doc = Nokogiri::XML(fp) do |conf|
|
97
|
+
conf.strict.dtdload.noent.nocdata.xinclude
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# xpath the urls out
|
103
|
+
urls = doc.xpath("//ulink")
|
104
|
+
|
105
|
+
urls.each do | url |
|
106
|
+
|
107
|
+
url_string = url.xpath("./@url").to_s
|
108
|
+
|
109
|
+
begin
|
110
|
+
result = LinkChecker.check_uri(URI.parse(url_string.to_s))
|
111
|
+
rescue => error
|
112
|
+
#puts "\tERROR: #{error.to_s}"
|
113
|
+
if url_string.to_s.start_with?("mailto:")
|
114
|
+
print "WARNING".color("#800040")+"\t\t"
|
115
|
+
warnings = warnings + 1
|
116
|
+
else
|
117
|
+
#result = LinkChecker::Error.new(:uri_string => url_string.to_s)
|
118
|
+
print "ERROR".color("#800040")+"\t\t"
|
119
|
+
errors = errors + 1
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
if result.class.to_s == "LinkChecker::Error"
|
124
|
+
print "FAIL".foreground(:red).inverse.blink+"\t\t"
|
125
|
+
fails = fails + 1
|
126
|
+
end
|
127
|
+
if result.class.to_s == "LinkChecker::Good"
|
128
|
+
print "OK".foreground(:green)+"\t\t"
|
129
|
+
oks = oks + 1
|
130
|
+
end
|
131
|
+
if result.class.to_s == "LinkChecker::Redirect"
|
132
|
+
print "REDIRECT".color("#ff8000")+"\t"
|
133
|
+
redirects = redirects + 1
|
134
|
+
end
|
135
|
+
|
136
|
+
print " #{url_string.to_s.foreground(:blue).underline}\n"
|
137
|
+
|
138
|
+
if result.class.to_s == "LinkChecker::Redirect"
|
139
|
+
print "\t\t -> "+result.final_destination_uri_string.foreground(:blue).underline+"\n"
|
140
|
+
end
|
141
|
+
if result.class.to_s != "LinkChecker::Good"
|
142
|
+
print "\t\t at "+url.path.to_s+"\n"
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
endtime = Time.now
|
148
|
+
|
149
|
+
puts "\n"
|
150
|
+
puts "SUMMARY".underline
|
151
|
+
puts "Source:\t\t\t#{ARGV[0]}"
|
152
|
+
puts "Total URLs:\t\t#{errors+fails+oks+warnings+redirects}"
|
153
|
+
puts "Time taken:\t\t#{(endtime-starttime).duration}"
|
154
|
+
puts "URLs OK:\t\t#{oks}"
|
155
|
+
puts "URL Errors:\t\t#{errors}"
|
156
|
+
puts "URLs not found:\t\t#{fails}"
|
157
|
+
puts "URLs with warnings:\t#{warnings}"
|
158
|
+
puts "URLs that redirect:\t#{redirects}"
|
159
|
+
|
160
|
+
print "\n"
|
161
|
+
|
data/lib/duration.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
class Numeric
|
2
|
+
def duration
|
3
|
+
secs = self.to_int
|
4
|
+
mins = secs / 60
|
5
|
+
hours = mins / 60
|
6
|
+
days = hours / 24
|
7
|
+
|
8
|
+
if days > 0
|
9
|
+
"#{days} days and #{hours % 24} hours"
|
10
|
+
elsif hours > 0
|
11
|
+
"#{hours} hours and #{mins % 60} minutes"
|
12
|
+
elsif mins > 0
|
13
|
+
"#{mins} minutes and #{secs % 60} seconds"
|
14
|
+
elsif secs >= 0
|
15
|
+
"#{secs} seconds"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'net/https'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
class LinkChecker
|
6
|
+
|
7
|
+
def self.check_uri(uri, redirected=false)
|
8
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
9
|
+
http.use_ssl = true if uri.scheme == "https"
|
10
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
|
11
|
+
|
12
|
+
http.start do
|
13
|
+
path = (uri.path.empty?) ? '/' : uri.path
|
14
|
+
http.request_get(path) do |response|
|
15
|
+
#print "\n#{response.class}(#{response.code}) "
|
16
|
+
case response
|
17
|
+
|
18
|
+
when Net::HTTPSuccess
|
19
|
+
if redirected
|
20
|
+
return Redirect.new(:final_destination_uri_string => uri.to_s)
|
21
|
+
else
|
22
|
+
return Good.new(:uri_string => uri.to_s)
|
23
|
+
end
|
24
|
+
|
25
|
+
when Net::HTTPForbidden
|
26
|
+
if redirected
|
27
|
+
return Redirect.new(:final_destination_uri_string => uri.to_s)
|
28
|
+
else
|
29
|
+
return Good.new(:uri_string => uri.to_s)
|
30
|
+
end
|
31
|
+
|
32
|
+
when Net::HTTPUnauthorized
|
33
|
+
if redirected
|
34
|
+
return Redirect.new(:final_destination_uri_string => uri.to_s)
|
35
|
+
else
|
36
|
+
return Good.new(:uri_string => uri.to_s)
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
when Net::HTTPRedirection then
|
41
|
+
return self.check_uri(URI(response['location']), true)
|
42
|
+
else
|
43
|
+
return Error.new(:uri_string => uri.to_s, :error => response)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
data/readme.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Scans a specified Docbook XML file and checks the URLs for each `<ulink>` tag. It supports `<xi:include>`
|
2
|
+
|
3
|
+
Reports:
|
4
|
+
|
5
|
+
* ERROR: poorly formed URLs
|
6
|
+
* FAIL: URLs that fail to resolve (404 etc)
|
7
|
+
* WARNING: mailto links
|
8
|
+
* REDIRECT: when the URL gets redirected
|
9
|
+
* OK: if the URL works fine
|
10
|
+
|
11
|
+
## setup
|
12
|
+
|
13
|
+
1. `$ bundle update`
|
14
|
+
|
15
|
+
## run
|
16
|
+
|
17
|
+
1. `./ulink-checker.rb somedocbook.xml`
|
18
|
+
2. See the amazing technicolour output
|
19
|
+
|
20
|
+
## why isn't it a gem
|
21
|
+
|
22
|
+
Because I haven't done it yet.
|
@@ -0,0 +1,19 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'ulink-checker'
|
3
|
+
s.version = '0.0.1'
|
4
|
+
s.date = '2013-03-12'
|
5
|
+
s.summary = "checks urls in docbook ulink tags"
|
6
|
+
s.description = "checks urls in docbook ulink tags"
|
7
|
+
s.authors = ["darrin Mison"]
|
8
|
+
s.email = 'dmison@me.com'
|
9
|
+
s.executables << 'ulink-checker'
|
10
|
+
s.files = [
|
11
|
+
"Gemfile",
|
12
|
+
"readme.md",
|
13
|
+
"ulink-checker.gemspec",
|
14
|
+
"bin/ulink-checker",
|
15
|
+
"lib/duration.rb",
|
16
|
+
"lib/link_checker_override.rb"]
|
17
|
+
s.homepage =
|
18
|
+
'https://github.com/dmison/ulink-checker'
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ulink-checker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- darrin Mison
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2013-03-12 00:00:00 +10:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: checks urls in docbook ulink tags
|
22
|
+
email: dmison@me.com
|
23
|
+
executables:
|
24
|
+
- ulink-checker
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files: []
|
28
|
+
|
29
|
+
files:
|
30
|
+
- Gemfile
|
31
|
+
- readme.md
|
32
|
+
- ulink-checker.gemspec
|
33
|
+
- bin/ulink-checker
|
34
|
+
- lib/duration.rb
|
35
|
+
- lib/link_checker_override.rb
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: https://github.com/dmison/ulink-checker
|
38
|
+
licenses: []
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
segments:
|
50
|
+
- 0
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
requirements: []
|
60
|
+
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 1.3.6
|
63
|
+
signing_key:
|
64
|
+
specification_version: 3
|
65
|
+
summary: checks urls in docbook ulink tags
|
66
|
+
test_files: []
|
67
|
+
|