ccls-html_test 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +142 -0
- data/Rakefile +59 -0
- data/VERSION +1 -0
- data/ccls-html_test.gemspec +57 -0
- data/lib/DTD/xhtml-lat1.ent +196 -0
- data/lib/DTD/xhtml-special.ent +80 -0
- data/lib/DTD/xhtml-symbol.ent +237 -0
- data/lib/DTD/xhtml.soc +14 -0
- data/lib/DTD/xhtml1-frameset.dtd +1235 -0
- data/lib/DTD/xhtml1-strict.dtd +978 -0
- data/lib/DTD/xhtml1-transitional.dtd +1201 -0
- data/lib/DTD/xhtml1.dcl +192 -0
- data/lib/assertions.rb +57 -0
- data/lib/ccls-html_test.rb +1 -0
- data/lib/html_test.rb +98 -0
- data/lib/link_validator.rb +175 -0
- data/lib/url_checker.rb +147 -0
- data/lib/url_selector.rb +57 -0
- data/lib/validate_filter.rb +62 -0
- data/lib/validator.rb +127 -0
- data/rails/init.rb +1 -0
- data/script/rails +6 -0
- data/script/validate +47 -0
- metadata +69 -0
data/lib/url_checker.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
module Html
|
2
|
+
module Test
|
3
|
+
class InvalidUrl < RuntimeError; end
|
4
|
+
|
5
|
+
class UrlChecker
|
6
|
+
attr_accessor :request, :response, :params
|
7
|
+
|
8
|
+
include Html::Test::UrlSelector
|
9
|
+
|
10
|
+
def initialize(controller)
|
11
|
+
self.request = controller.request
|
12
|
+
self.response = controller.response
|
13
|
+
self.params = controller.params
|
14
|
+
end
|
15
|
+
|
16
|
+
def check_urls_resolve
|
17
|
+
urls_to_check.each do |url|
|
18
|
+
check_url_resolves(url)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def check_redirects_resolve
|
23
|
+
redirect_url = response.headers['Location']
|
24
|
+
if response.status =~ /302/ && redirect_url.present?
|
25
|
+
check_url_resolves(redirect_url)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
def urls_to_check
|
31
|
+
anchor_urls + image_urls + form_urls
|
32
|
+
end
|
33
|
+
|
34
|
+
def check_url_resolves(url)
|
35
|
+
return if skip_url?(url, root_url) || external_http?(url, root_url)
|
36
|
+
url = strip_anchor(remove_query(make_absolute(url)))
|
37
|
+
return if public_file_exists?(url)
|
38
|
+
check_action_exists(url)
|
39
|
+
end
|
40
|
+
|
41
|
+
def root_url
|
42
|
+
request.protocol + request.host_with_port
|
43
|
+
end
|
44
|
+
|
45
|
+
def public_file_exists?(url)
|
46
|
+
public_path = File.join(rails_public_path, url)
|
47
|
+
File.exists?(public_path) || File.exists?(public_path + ".html")
|
48
|
+
end
|
49
|
+
|
50
|
+
def rails_public_path
|
51
|
+
File.join(Rails.root, "public")
|
52
|
+
end
|
53
|
+
|
54
|
+
# Make URLs absolute paths, i.e. relative to the site root
|
55
|
+
def make_absolute(url)
|
56
|
+
url = remove_host(url) if has_protocol?(url)
|
57
|
+
return url if url =~ %r{^/}
|
58
|
+
current_url = request.url || url_from_params
|
59
|
+
current_url = File.dirname(current_url) if current_url !~ %r{/$}
|
60
|
+
url = File.join(current_url, url)
|
61
|
+
end
|
62
|
+
|
63
|
+
def remove_host(url)
|
64
|
+
url_no_host = url[%r{^[a-z]+://[^/]+(/.+)$}, 1]
|
65
|
+
url_no_host.blank? ? "/" : url_no_host
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_query(url)
|
69
|
+
url =~ /\?/ ? url[/^(.+?)\?/, 1] : url
|
70
|
+
end
|
71
|
+
|
72
|
+
def strip_anchor(url)
|
73
|
+
url =~ /\#/ ? url[/^(.+?)\#/, 1] : url
|
74
|
+
end
|
75
|
+
|
76
|
+
# Each URL is required to have at least one HTTP method for which there is a route with an action
|
77
|
+
def check_action_exists(url)
|
78
|
+
unless routes_from_url(url).any? { |route| route_has_action?(route) }
|
79
|
+
raise_invalid_url(url, "No action or template")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def route_has_action?(route)
|
84
|
+
controller = "#{route[:controller].camelize}Controller".constantize
|
85
|
+
controller.public_instance_methods.include?(route[:action]) || template_file_exists?(route, controller)
|
86
|
+
end
|
87
|
+
|
88
|
+
def template_file_exists?(route, controller)
|
89
|
+
# Workaround for Rails 1.2 that doesn't have the view_paths method
|
90
|
+
template_dirs = controller.respond_to?(:view_paths) ?
|
91
|
+
controller.view_paths : [controller.view_root]
|
92
|
+
template_dirs.each do |template_dir|
|
93
|
+
template_file = File.join(template_dir, controller.controller_path, "#{route[:action]}.*")
|
94
|
+
return true if !Dir.glob(template_file).empty?
|
95
|
+
end
|
96
|
+
false
|
97
|
+
end
|
98
|
+
|
99
|
+
# This is a special case where on my site I had a catch all route for 404s. If you have
|
100
|
+
# such a route, you can override this method and check for it, i.e. you could do something
|
101
|
+
# like this:
|
102
|
+
#
|
103
|
+
# if params[:action] == "rescue_404"
|
104
|
+
# raise Html::Test::InvalidUrl.new("Action rescue_404 invoked for url '#{url}'")
|
105
|
+
# end
|
106
|
+
def check_not_404(url, params)
|
107
|
+
# This method is unimplemented by default
|
108
|
+
end
|
109
|
+
|
110
|
+
def routes_from_url(url)
|
111
|
+
routes = [:get, :post, :put, :delete].map do |method|
|
112
|
+
begin
|
113
|
+
# Need to specify the method here for RESTful resource routes to work, i.e.
|
114
|
+
# for /posts/1 to be recognized as the show action etc.
|
115
|
+
|
116
|
+
|
117
|
+
# ActionController::Routing::Routes does not exist in Rails 3!
|
118
|
+
# fortunately, I don't use "check_urls" so not an issue right now.
|
119
|
+
|
120
|
+
params = ::ActionController::Routing::Routes.recognize_path(url, {:method => method})
|
121
|
+
check_not_404(url, params)
|
122
|
+
params
|
123
|
+
rescue
|
124
|
+
# Could not find a route with that method
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
end.compact
|
128
|
+
routes.present? ? routes : raise_invalid_url(url, "Cannot find a route")
|
129
|
+
end
|
130
|
+
|
131
|
+
def url_from_params(options = params)
|
132
|
+
return "/" if params.empty?
|
133
|
+
options[:controller] ||= params[:controller]
|
134
|
+
::ActionController::Routing::Routes.generate_extras(symbolize_hash(options))[0]
|
135
|
+
end
|
136
|
+
|
137
|
+
# Convert all keys in the hash to symbols. Not sure why this is needed.
|
138
|
+
def symbolize_hash(hash)
|
139
|
+
hash.keys.inject({}) { |h, k| h[k.to_sym] = hash[k]; h }
|
140
|
+
end
|
141
|
+
|
142
|
+
def raise_invalid_url(url, message)
|
143
|
+
raise(Html::Test::InvalidUrl.new("#{message} for url '#{url}' request_uri='#{request.url}' body='#{response.body}'"))
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
data/lib/url_selector.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
module Html
|
2
|
+
module Test
|
3
|
+
module UrlSelector
|
4
|
+
def skip_url?(url, root_url = nil)
|
5
|
+
if url.blank? || unsupported_protocol?(url) || special_url?(url)
|
6
|
+
true
|
7
|
+
else
|
8
|
+
false
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def special_url?(url)
|
13
|
+
[/^javascript:/, /^mailto:/, /^\#$/].any? { |pattern| url =~ pattern }
|
14
|
+
end
|
15
|
+
|
16
|
+
def external_http?(url, root_url = nil)
|
17
|
+
if root_url
|
18
|
+
http_protocol?(url) && !url.starts_with?(root_url)
|
19
|
+
else
|
20
|
+
http_protocol?(url)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def http_protocol?(url)
|
25
|
+
url =~ %r{^http(?:s)?://} ? true : false
|
26
|
+
end
|
27
|
+
|
28
|
+
def has_protocol?(url)
|
29
|
+
url =~ %r{^[a-z]+://} ? true : false
|
30
|
+
end
|
31
|
+
|
32
|
+
def unsupported_protocol?(url)
|
33
|
+
has_protocol?(url) && !http_protocol?(url)
|
34
|
+
end
|
35
|
+
|
36
|
+
def anchor_urls
|
37
|
+
select("a").map { |l| l.attributes['href'] }
|
38
|
+
end
|
39
|
+
|
40
|
+
def image_urls
|
41
|
+
select("img").map { |i| i.attributes['src'] }
|
42
|
+
end
|
43
|
+
|
44
|
+
def form_urls
|
45
|
+
select("form").map { |i| i.attributes['action'] }
|
46
|
+
end
|
47
|
+
|
48
|
+
def response_body
|
49
|
+
self.respond_to?(:response) ? response.body : @response.body
|
50
|
+
end
|
51
|
+
|
52
|
+
def select(pattern)
|
53
|
+
HTML::Selector.new(pattern).select(HTML::Document.new(response_body).root)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Html
|
2
|
+
module Test
|
3
|
+
class ValidateFilter
|
4
|
+
attr_accessor :request, :response, :params, :validators
|
5
|
+
|
6
|
+
include ::Test::Unit::Assertions
|
7
|
+
include ::Html::Test::Assertions
|
8
|
+
|
9
|
+
def initialize(controller)
|
10
|
+
self.request = controller.request
|
11
|
+
self.response = controller.response
|
12
|
+
self.params = controller.params
|
13
|
+
self.validators = controller.class.validators
|
14
|
+
end
|
15
|
+
|
16
|
+
def validate_page
|
17
|
+
# url = request.request_uri
|
18
|
+
# no more request_uri in Rails 3
|
19
|
+
url = request.url
|
20
|
+
return if (!should_validate? || ValidateFilter.already_validated?(url))
|
21
|
+
# assert_validates(validators, response.body.strip, url, :verbose => true)
|
22
|
+
assert_validates(validators, response.body.strip, url )
|
23
|
+
ValidateFilter.mark_url_validated(url)
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.already_validated?(url)
|
27
|
+
if Html::Test::Validator.revalidate_all
|
28
|
+
false
|
29
|
+
else
|
30
|
+
validated_urls[url]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.mark_url_validated(url)
|
35
|
+
validated_urls[url] = true
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.validated_urls
|
39
|
+
@validated_urls ||= {}
|
40
|
+
end
|
41
|
+
|
42
|
+
# Override this method if you only want to validate a subset of pages
|
43
|
+
def should_validate?
|
44
|
+
# response.status =~ /200/ &&
|
45
|
+
# (response.headers['Content-Type'] =~ /text\/html/i || response.body =~ /<html/)
|
46
|
+
# In rails 3,
|
47
|
+
# response.status is a Fixnum which would return nil in this match
|
48
|
+
# and response.headers['Content-Type'] is blank
|
49
|
+
response.status.to_s =~ /200/ &&
|
50
|
+
(response.content_type =~ /text\/html/i || response.body =~ /<html/)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Used in testing (of html_test_extension plugin)
|
54
|
+
# to remove the validated_urls hash
|
55
|
+
# so can test with the same url.
|
56
|
+
def self.clear_validated_urls
|
57
|
+
@validated_urls = {}
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/validator.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'net/http'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
module Html
|
6
|
+
module Test
|
7
|
+
class Validator
|
8
|
+
|
9
|
+
# verbose = true shows "validating ..."
|
10
|
+
# verbose = false shows NOTHING
|
11
|
+
@@verbose = true
|
12
|
+
cattr_accessor :verbose
|
13
|
+
#
|
14
|
+
# revalidate_all = true will validate every call to a url
|
15
|
+
# revalidate_all = false will only validate the first call to a url
|
16
|
+
@@revalidate_all = true
|
17
|
+
cattr_accessor :revalidate_all
|
18
|
+
|
19
|
+
@@tidy_ignore_list = []
|
20
|
+
cattr_accessor :tidy_ignore_list
|
21
|
+
|
22
|
+
# For local validation you might change this to http://localhost/validator/htdocs/check
|
23
|
+
DEFAULT_W3C_URL = "http://validator.w3.org/check"
|
24
|
+
@@w3c_url = DEFAULT_W3C_URL
|
25
|
+
cattr_accessor :w3c_url
|
26
|
+
|
27
|
+
# Whether the W3C validator should show the HTML document being validated in
|
28
|
+
# its response. Set to 0 to disable.
|
29
|
+
@@w3c_show_source = "1"
|
30
|
+
cattr_accessor :w3c_show_source
|
31
|
+
|
32
|
+
DEFAULT_DTD = File.join(File.dirname(__FILE__), 'DTD', 'xhtml1-strict.dtd')
|
33
|
+
|
34
|
+
# Path to DTD file that the xmllint validator uses
|
35
|
+
def self.dtd(document)
|
36
|
+
DEFAULT_DTD
|
37
|
+
end
|
38
|
+
|
39
|
+
# Validate an HTML document string using tidy.
|
40
|
+
# Code excerpted from the rails_tidy plugin
|
41
|
+
def self.tidy_errors(body)
|
42
|
+
tidy = RailsTidy.tidy_factory
|
43
|
+
tidy.clean(body)
|
44
|
+
errors = tidy.errors.empty? ? nil :
|
45
|
+
tidy.errors.delete_if { |e| tidy_ignore_list.select { |p| e =~ p }.size > 0 }.join("\n")
|
46
|
+
tidy.release
|
47
|
+
errors.blank? ? nil : errors
|
48
|
+
end
|
49
|
+
|
50
|
+
# Validate an HTML document string by going to the online W3C validator.
|
51
|
+
# Credit for the original code goes to Scott Baron (htonl)
|
52
|
+
def self.w3c_errors(body)
|
53
|
+
response = Net::HTTP.post_form(URI.parse(w3c_url),
|
54
|
+
{'ss'=>w3c_show_source, 'fragment'=>body})
|
55
|
+
status = response['x-w3c-validator-status']
|
56
|
+
if status != 'Valid'
|
57
|
+
# Reference in the stylesheets
|
58
|
+
response.body.sub!(%r{@import "./base.css"}, %Q{@import "#{File.dirname(w3c_url)}/base.css"})
|
59
|
+
response_file = find_unique_path(File.join(tmp_dir, "w3c_response.html"))
|
60
|
+
|
61
|
+
# open(response_file, "w" ) { |f| f.puts(response.body) }
|
62
|
+
# I was getting many errors like ... (in ruby 1.9.3 and rails 3)
|
63
|
+
# Encoding::UndefinedConversionError: "\xE2" from ASCII-8BIT to UTF-8
|
64
|
+
# adding force_encoding('UTF-8') seems to fix this.
|
65
|
+
open(response_file, "w" ) { |f| f.puts(response.body.force_encoding('UTF-8')) }
|
66
|
+
|
67
|
+
"W3C status #{status}. Response from W3C was written to the file #{response_file}"
|
68
|
+
else
|
69
|
+
nil
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Validate an HTML document string using the xmllint command line validator tool.
|
74
|
+
# Returns nil if validation passes and an error message otherwise.
|
75
|
+
# Original code taken from the book "Enterprise Integration with Ruby"
|
76
|
+
def self.xmllint_errors(body)
|
77
|
+
error_file = create_tmp_file("xmllint_error")
|
78
|
+
doc_file = command = nil
|
79
|
+
if dtd(body) =~ /^doctype$/i
|
80
|
+
# Use the DOCTYPE declaration
|
81
|
+
doc_file = create_tmp_file("xmllint", body)
|
82
|
+
command = "xmllint --noout --valid #{doc_file} &> #{error_file}"
|
83
|
+
else
|
84
|
+
# Override the DOCTYPE declaration
|
85
|
+
doc_file = create_tmp_file("xmllint", body.sub(/<!DOCTYPE[^>]+>/m, ""))
|
86
|
+
command = "xmllint --noout --dtdvalid #{dtd(body)} #{doc_file} &> #{error_file}"
|
87
|
+
end
|
88
|
+
system(command)
|
89
|
+
status = $?.exitstatus
|
90
|
+
if status == 0
|
91
|
+
return nil
|
92
|
+
else
|
93
|
+
failure_doc = File.join(tmp_dir, "xmllint_last_response.html")
|
94
|
+
FileUtils.cp doc_file, failure_doc
|
95
|
+
return ("command='#{command}'. HTML document at '#{failure_doc}'. " +
|
96
|
+
IO.read(error_file))
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
def self.find_unique_path(path)
|
102
|
+
filename = File.basename(path)
|
103
|
+
ext = File.extname(filename)
|
104
|
+
size_no_ext = filename.size - ext.size
|
105
|
+
filename_no_ext = filename[0, size_no_ext]
|
106
|
+
counter = 2
|
107
|
+
while File.exists?(path)
|
108
|
+
new_filename = [filename_no_ext, "-", counter, ext].join
|
109
|
+
path = File.join(File.dirname(path), new_filename)
|
110
|
+
counter += 1
|
111
|
+
end
|
112
|
+
path
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.create_tmp_file(name, contents = "")
|
116
|
+
tmp_file = Tempfile.new(name)
|
117
|
+
tmp_file.puts(contents)
|
118
|
+
tmp_file.close
|
119
|
+
tmp_file.path
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.tmp_dir
|
123
|
+
Dir::tmpdir
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
data/rails/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'ccls-html_test' if RAILS_ENV == 'test'
|
data/script/rails
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# This command will automatically be run when you run "rails" with Rails 3 gems installed from the root of your application.
|
3
|
+
|
4
|
+
APP_PATH = File.expand_path('../../config/application', __FILE__)
|
5
|
+
require File.expand_path('../../config/boot', __FILE__)
|
6
|
+
require 'rails/commands'
|
data/script/validate
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# == Synopsis
|
3
|
+
# This script validates an HTML page via HTTP and checks for broken
|
4
|
+
# links and images. Links are followed one step away from the start page.
|
5
|
+
# HTML validation is also done for pages linked internally from the start page
|
6
|
+
# (i.e. relative URLs).
|
7
|
+
#
|
8
|
+
# == Usage
|
9
|
+
# vendor/plugins/html_test/script/validate http://my.url.com [options]
|
10
|
+
#
|
11
|
+
# Options:
|
12
|
+
#
|
13
|
+
# --no-follow:: Don't follow any anchor or images URLs on
|
14
|
+
# the start page (i.e. only one page is requested).
|
15
|
+
#
|
16
|
+
# --validators validator_list:: A comma separated list of validators to use
|
17
|
+
# for HTML validation. Supported validators:
|
18
|
+
# tidy, xmllint, w3c. Default validator is
|
19
|
+
# tidy if it is installed, otherwise w3c.
|
20
|
+
#
|
21
|
+
# --dtd dtd_path:: Path to the DTD file to use for xmllint
|
22
|
+
# validation. By default xmllint will use
|
23
|
+
# the XHTML 1.0 strict DTD. Set dtd_path
|
24
|
+
# to "doctype" to make xmllint use the
|
25
|
+
# DTD specified in the DOCTYPE tag
|
26
|
+
# (can be slow).
|
27
|
+
#
|
28
|
+
# --skip skip_patterns:: A comma separated list of regexp patterns for
|
29
|
+
# URLs to not visit. Using the pattern '.*'
|
30
|
+
# is equivalent to the --no-follow option.
|
31
|
+
#
|
32
|
+
# --only only_pattern:: Only visit URLs matching given regexp pattern
|
33
|
+
#
|
34
|
+
# --no-external:: Do not visit external URLs, i.e. URLs with
|
35
|
+
# different domain than the start page.
|
36
|
+
#
|
37
|
+
# --quiet:: Don't output anything unless there is a failure.
|
38
|
+
|
39
|
+
require 'optparse'
|
40
|
+
require 'rdoc/usage'
|
41
|
+
require 'uri'
|
42
|
+
|
43
|
+
require File.join(File.dirname(__FILE__), "..", "..", "..", "..", "test", "test_helper")
|
44
|
+
require File.join(File.dirname(__FILE__), "..", "lib", "html_test")
|
45
|
+
|
46
|
+
options = Html::Test::LinkValidator.parse_command_line(ARGV) rescue RDoc::usage
|
47
|
+
Html::Test::LinkValidator.new(ARGV[0], options)
|