ulink-checker 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +7 -0
- data/bin/ulink-checker +161 -0
- data/lib/duration.rb +18 -0
- data/lib/link_checker_override.rb +49 -0
- data/readme.md +22 -0
- data/ulink-checker.gemspec +19 -0
- metadata +67 -0
data/Gemfile
ADDED
data/bin/ulink-checker
ADDED
@@ -0,0 +1,161 @@
|
|
1
|
+
#!/usr/bin/env ruby -W0
|
2
|
+
|
3
|
+
$LOAD_PATH.unshift File.dirname(__FILE__)
|
4
|
+
|
5
|
+
require 'rubygems'
|
6
|
+
require 'nokogiri'
|
7
|
+
require 'net/http'
|
8
|
+
require 'rainbow'
|
9
|
+
require 'link_checker'
|
10
|
+
require 'optparse'
|
11
|
+
|
12
|
+
require 'link_checker_override.rb'
|
13
|
+
require 'duration.rb'
|
14
|
+
|
15
|
+
errors=0
|
16
|
+
fails=0
|
17
|
+
oks=0
|
18
|
+
warnings=0
|
19
|
+
redirects=0
|
20
|
+
starttime = Time.now
|
21
|
+
|
22
|
+
doc = nil
|
23
|
+
|
24
|
+
|
25
|
+
# ======================================== read commandline parameters
|
26
|
+
def processCommandLine
|
27
|
+
options = {}
|
28
|
+
|
29
|
+
optparse = OptionParser.new do |opts|
|
30
|
+
opts.banner = "ulink-checker v0.0.1"
|
31
|
+
|
32
|
+
#options[:followXIInclude] = false
|
33
|
+
#opts.on( '-x', '--xiinclude', 'Follow' ) do
|
34
|
+
# options[:getSecurityLevels] = true
|
35
|
+
#end
|
36
|
+
|
37
|
+
options[:url] = ""
|
38
|
+
opts.on( '-u URL', '--url URL', 'check XML at URL (not implemented yet)' ) do | url |
|
39
|
+
options[:url] = url
|
40
|
+
end
|
41
|
+
|
42
|
+
options[:file] = ""
|
43
|
+
opts.on( '-f FILE', '--file FILE', 'check XML from file' ) do | file |
|
44
|
+
options[:file] = file
|
45
|
+
end
|
46
|
+
|
47
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
48
|
+
puts opts
|
49
|
+
exit
|
50
|
+
end
|
51
|
+
|
52
|
+
if (ARGV.size == 0) then
|
53
|
+
puts opts
|
54
|
+
exit
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
begin
|
60
|
+
optparse.parse!
|
61
|
+
rescue => error
|
62
|
+
puts "ERROR PARSING COMMANDLINE:"
|
63
|
+
puts "\t"+error.to_s
|
64
|
+
puts
|
65
|
+
puts optparse
|
66
|
+
exit
|
67
|
+
end
|
68
|
+
|
69
|
+
# if they specify URL then too bad!
|
70
|
+
if (options[:url] != "") then
|
71
|
+
puts "SORRY: URLs are not supported yet"
|
72
|
+
puts
|
73
|
+
puts optparse
|
74
|
+
exit
|
75
|
+
end
|
76
|
+
|
77
|
+
#can't specify both url and file, only one
|
78
|
+
if (options[:url] != "" and option[:file] != "") then
|
79
|
+
puts "ERROR: can only specify one of file or URL."
|
80
|
+
puts
|
81
|
+
puts optparse
|
82
|
+
exit
|
83
|
+
end
|
84
|
+
|
85
|
+
return options
|
86
|
+
end
|
87
|
+
|
88
|
+
options = processCommandLine
|
89
|
+
|
90
|
+
# ========================================
|
91
|
+
|
92
|
+
if (options[:file] != "") then
|
93
|
+
puts "Checking URLs in #{options[:file]} ...\n\n"
|
94
|
+
|
95
|
+
File.open(options[:file]) do |fp|
|
96
|
+
doc = Nokogiri::XML(fp) do |conf|
|
97
|
+
conf.strict.dtdload.noent.nocdata.xinclude
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# xpath the urls out
|
103
|
+
urls = doc.xpath("//ulink")
|
104
|
+
|
105
|
+
urls.each do | url |
|
106
|
+
|
107
|
+
url_string = url.xpath("./@url").to_s
|
108
|
+
|
109
|
+
begin
|
110
|
+
result = LinkChecker.check_uri(URI.parse(url_string.to_s))
|
111
|
+
rescue => error
|
112
|
+
#puts "\tERROR: #{error.to_s}"
|
113
|
+
if url_string.to_s.start_with?("mailto:")
|
114
|
+
print "WARNING".color("#800040")+"\t\t"
|
115
|
+
warnings = warnings + 1
|
116
|
+
else
|
117
|
+
#result = LinkChecker::Error.new(:uri_string => url_string.to_s)
|
118
|
+
print "ERROR".color("#800040")+"\t\t"
|
119
|
+
errors = errors + 1
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
if result.class.to_s == "LinkChecker::Error"
|
124
|
+
print "FAIL".foreground(:red).inverse.blink+"\t\t"
|
125
|
+
fails = fails + 1
|
126
|
+
end
|
127
|
+
if result.class.to_s == "LinkChecker::Good"
|
128
|
+
print "OK".foreground(:green)+"\t\t"
|
129
|
+
oks = oks + 1
|
130
|
+
end
|
131
|
+
if result.class.to_s == "LinkChecker::Redirect"
|
132
|
+
print "REDIRECT".color("#ff8000")+"\t"
|
133
|
+
redirects = redirects + 1
|
134
|
+
end
|
135
|
+
|
136
|
+
print " #{url_string.to_s.foreground(:blue).underline}\n"
|
137
|
+
|
138
|
+
if result.class.to_s == "LinkChecker::Redirect"
|
139
|
+
print "\t\t -> "+result.final_destination_uri_string.foreground(:blue).underline+"\n"
|
140
|
+
end
|
141
|
+
if result.class.to_s != "LinkChecker::Good"
|
142
|
+
print "\t\t at "+url.path.to_s+"\n"
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
endtime = Time.now
|
148
|
+
|
149
|
+
puts "\n"
|
150
|
+
puts "SUMMARY".underline
|
151
|
+
puts "Source:\t\t\t#{ARGV[0]}"
|
152
|
+
puts "Total URLs:\t\t#{errors+fails+oks+warnings+redirects}"
|
153
|
+
puts "Time taken:\t\t#{(endtime-starttime).duration}"
|
154
|
+
puts "URLs OK:\t\t#{oks}"
|
155
|
+
puts "URL Errors:\t\t#{errors}"
|
156
|
+
puts "URLs not found:\t\t#{fails}"
|
157
|
+
puts "URLs with warnings:\t#{warnings}"
|
158
|
+
puts "URLs that redirect:\t#{redirects}"
|
159
|
+
|
160
|
+
print "\n"
|
161
|
+
|
data/lib/duration.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
class Numeric
|
2
|
+
def duration
|
3
|
+
secs = self.to_int
|
4
|
+
mins = secs / 60
|
5
|
+
hours = mins / 60
|
6
|
+
days = hours / 24
|
7
|
+
|
8
|
+
if days > 0
|
9
|
+
"#{days} days and #{hours % 24} hours"
|
10
|
+
elsif hours > 0
|
11
|
+
"#{hours} hours and #{mins % 60} minutes"
|
12
|
+
elsif mins > 0
|
13
|
+
"#{mins} minutes and #{secs % 60} seconds"
|
14
|
+
elsif secs >= 0
|
15
|
+
"#{secs} seconds"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'net/https'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
class LinkChecker
|
6
|
+
|
7
|
+
def self.check_uri(uri, redirected=false)
|
8
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
9
|
+
http.use_ssl = true if uri.scheme == "https"
|
10
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE if http.use_ssl?
|
11
|
+
|
12
|
+
http.start do
|
13
|
+
path = (uri.path.empty?) ? '/' : uri.path
|
14
|
+
http.request_get(path) do |response|
|
15
|
+
#print "\n#{response.class}(#{response.code}) "
|
16
|
+
case response
|
17
|
+
|
18
|
+
when Net::HTTPSuccess
|
19
|
+
if redirected
|
20
|
+
return Redirect.new(:final_destination_uri_string => uri.to_s)
|
21
|
+
else
|
22
|
+
return Good.new(:uri_string => uri.to_s)
|
23
|
+
end
|
24
|
+
|
25
|
+
when Net::HTTPForbidden
|
26
|
+
if redirected
|
27
|
+
return Redirect.new(:final_destination_uri_string => uri.to_s)
|
28
|
+
else
|
29
|
+
return Good.new(:uri_string => uri.to_s)
|
30
|
+
end
|
31
|
+
|
32
|
+
when Net::HTTPUnauthorized
|
33
|
+
if redirected
|
34
|
+
return Redirect.new(:final_destination_uri_string => uri.to_s)
|
35
|
+
else
|
36
|
+
return Good.new(:uri_string => uri.to_s)
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
when Net::HTTPRedirection then
|
41
|
+
return self.check_uri(URI(response['location']), true)
|
42
|
+
else
|
43
|
+
return Error.new(:uri_string => uri.to_s, :error => response)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
data/readme.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Scans a specified Docbook XML file and checks the URLs for each `<ulink>` tag. It supports `<xi:include>`
|
2
|
+
|
3
|
+
Reports:
|
4
|
+
|
5
|
+
* ERROR: poorly formed URLs
|
6
|
+
* FAIL: URLs that fail to resolve (404 etc)
|
7
|
+
* WARNING: mailto links
|
8
|
+
* REDIRECT: when the URL gets redirected
|
9
|
+
* OK: if the URL works fine
|
10
|
+
|
11
|
+
## setup
|
12
|
+
|
13
|
+
1. `$ bundle update`
|
14
|
+
|
15
|
+
## run
|
16
|
+
|
17
|
+
1. `./ulink-checker.rb somedocbook.xml`
|
18
|
+
2. See the amazing technicolour output
|
19
|
+
|
20
|
+
## why isn't it a gem
|
21
|
+
|
22
|
+
Because I haven't done it yet.
|
@@ -0,0 +1,19 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'ulink-checker'
|
3
|
+
s.version = '0.0.1'
|
4
|
+
s.date = '2013-03-12'
|
5
|
+
s.summary = "checks urls in docbook ulink tags"
|
6
|
+
s.description = "checks urls in docbook ulink tags"
|
7
|
+
s.authors = ["darrin Mison"]
|
8
|
+
s.email = 'dmison@me.com'
|
9
|
+
s.executables << 'ulink-checker'
|
10
|
+
s.files = [
|
11
|
+
"Gemfile",
|
12
|
+
"readme.md",
|
13
|
+
"ulink-checker.gemspec",
|
14
|
+
"bin/ulink-checker",
|
15
|
+
"lib/duration.rb",
|
16
|
+
"lib/link_checker_override.rb"]
|
17
|
+
s.homepage =
|
18
|
+
'https://github.com/dmison/ulink-checker'
|
19
|
+
end
|
metadata
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ulink-checker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- darrin Mison
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2013-03-12 00:00:00 +10:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: checks urls in docbook ulink tags
|
22
|
+
email: dmison@me.com
|
23
|
+
executables:
|
24
|
+
- ulink-checker
|
25
|
+
extensions: []
|
26
|
+
|
27
|
+
extra_rdoc_files: []
|
28
|
+
|
29
|
+
files:
|
30
|
+
- Gemfile
|
31
|
+
- readme.md
|
32
|
+
- ulink-checker.gemspec
|
33
|
+
- bin/ulink-checker
|
34
|
+
- lib/duration.rb
|
35
|
+
- lib/link_checker_override.rb
|
36
|
+
has_rdoc: true
|
37
|
+
homepage: https://github.com/dmison/ulink-checker
|
38
|
+
licenses: []
|
39
|
+
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
segments:
|
50
|
+
- 0
|
51
|
+
version: "0"
|
52
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
segments:
|
57
|
+
- 0
|
58
|
+
version: "0"
|
59
|
+
requirements: []
|
60
|
+
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 1.3.6
|
63
|
+
signing_key:
|
64
|
+
specification_version: 3
|
65
|
+
summary: checks urls in docbook ulink tags
|
66
|
+
test_files: []
|
67
|
+
|