ccls-html_test 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +142 -0
- data/Rakefile +59 -0
- data/VERSION +1 -0
- data/ccls-html_test.gemspec +57 -0
- data/lib/DTD/xhtml-lat1.ent +196 -0
- data/lib/DTD/xhtml-special.ent +80 -0
- data/lib/DTD/xhtml-symbol.ent +237 -0
- data/lib/DTD/xhtml.soc +14 -0
- data/lib/DTD/xhtml1-frameset.dtd +1235 -0
- data/lib/DTD/xhtml1-strict.dtd +978 -0
- data/lib/DTD/xhtml1-transitional.dtd +1201 -0
- data/lib/DTD/xhtml1.dcl +192 -0
- data/lib/assertions.rb +57 -0
- data/lib/ccls-html_test.rb +1 -0
- data/lib/html_test.rb +98 -0
- data/lib/link_validator.rb +175 -0
- data/lib/url_checker.rb +147 -0
- data/lib/url_selector.rb +57 -0
- data/lib/validate_filter.rb +62 -0
- data/lib/validator.rb +127 -0
- data/rails/init.rb +1 -0
- data/script/rails +6 -0
- data/script/validate +47 -0
- metadata +69 -0
data/lib/url_checker.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
module Html
|
2
|
+
module Test
|
3
|
+
class InvalidUrl < RuntimeError; end
|
4
|
+
|
5
|
+
class UrlChecker
|
6
|
+
attr_accessor :request, :response, :params
|
7
|
+
|
8
|
+
include Html::Test::UrlSelector
|
9
|
+
|
10
|
+
def initialize(controller)
|
11
|
+
self.request = controller.request
|
12
|
+
self.response = controller.response
|
13
|
+
self.params = controller.params
|
14
|
+
end
|
15
|
+
|
16
|
+
def check_urls_resolve
|
17
|
+
urls_to_check.each do |url|
|
18
|
+
check_url_resolves(url)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def check_redirects_resolve
|
23
|
+
redirect_url = response.headers['Location']
|
24
|
+
if response.status =~ /302/ && redirect_url.present?
|
25
|
+
check_url_resolves(redirect_url)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
def urls_to_check
|
31
|
+
anchor_urls + image_urls + form_urls
|
32
|
+
end
|
33
|
+
|
34
|
+
def check_url_resolves(url)
|
35
|
+
return if skip_url?(url, root_url) || external_http?(url, root_url)
|
36
|
+
url = strip_anchor(remove_query(make_absolute(url)))
|
37
|
+
return if public_file_exists?(url)
|
38
|
+
check_action_exists(url)
|
39
|
+
end
|
40
|
+
|
41
|
+
def root_url
|
42
|
+
request.protocol + request.host_with_port
|
43
|
+
end
|
44
|
+
|
45
|
+
def public_file_exists?(url)
|
46
|
+
public_path = File.join(rails_public_path, url)
|
47
|
+
File.exists?(public_path) || File.exists?(public_path + ".html")
|
48
|
+
end
|
49
|
+
|
50
|
+
def rails_public_path
|
51
|
+
File.join(Rails.root, "public")
|
52
|
+
end
|
53
|
+
|
54
|
+
# Make URLs absolute paths, i.e. relative to the site root
|
55
|
+
def make_absolute(url)
|
56
|
+
url = remove_host(url) if has_protocol?(url)
|
57
|
+
return url if url =~ %r{^/}
|
58
|
+
current_url = request.url || url_from_params
|
59
|
+
current_url = File.dirname(current_url) if current_url !~ %r{/$}
|
60
|
+
url = File.join(current_url, url)
|
61
|
+
end
|
62
|
+
|
63
|
+
def remove_host(url)
|
64
|
+
url_no_host = url[%r{^[a-z]+://[^/]+(/.+)$}, 1]
|
65
|
+
url_no_host.blank? ? "/" : url_no_host
|
66
|
+
end
|
67
|
+
|
68
|
+
def remove_query(url)
|
69
|
+
url =~ /\?/ ? url[/^(.+?)\?/, 1] : url
|
70
|
+
end
|
71
|
+
|
72
|
+
def strip_anchor(url)
|
73
|
+
url =~ /\#/ ? url[/^(.+?)\#/, 1] : url
|
74
|
+
end
|
75
|
+
|
76
|
+
# Each URL is required to have at least one HTTP method for which there is a route with an action
|
77
|
+
def check_action_exists(url)
|
78
|
+
unless routes_from_url(url).any? { |route| route_has_action?(route) }
|
79
|
+
raise_invalid_url(url, "No action or template")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def route_has_action?(route)
|
84
|
+
controller = "#{route[:controller].camelize}Controller".constantize
|
85
|
+
controller.public_instance_methods.include?(route[:action]) || template_file_exists?(route, controller)
|
86
|
+
end
|
87
|
+
|
88
|
+
def template_file_exists?(route, controller)
|
89
|
+
# Workaround for Rails 1.2 that doesn't have the view_paths method
|
90
|
+
template_dirs = controller.respond_to?(:view_paths) ?
|
91
|
+
controller.view_paths : [controller.view_root]
|
92
|
+
template_dirs.each do |template_dir|
|
93
|
+
template_file = File.join(template_dir, controller.controller_path, "#{route[:action]}.*")
|
94
|
+
return true if !Dir.glob(template_file).empty?
|
95
|
+
end
|
96
|
+
false
|
97
|
+
end
|
98
|
+
|
99
|
+
# This is a special case where on my site I had a catch all route for 404s. If you have
|
100
|
+
# such a route, you can override this method and check for it, i.e. you could do something
|
101
|
+
# like this:
|
102
|
+
#
|
103
|
+
# if params[:action] == "rescue_404"
|
104
|
+
# raise Html::Test::InvalidUrl.new("Action rescue_404 invoked for url '#{url}'")
|
105
|
+
# end
|
106
|
+
def check_not_404(url, params)
|
107
|
+
# This method is unimplemented by default
|
108
|
+
end
|
109
|
+
|
110
|
+
def routes_from_url(url)
|
111
|
+
routes = [:get, :post, :put, :delete].map do |method|
|
112
|
+
begin
|
113
|
+
# Need to specify the method here for RESTful resource routes to work, i.e.
|
114
|
+
# for /posts/1 to be recognized as the show action etc.
|
115
|
+
|
116
|
+
|
117
|
+
# ActionController::Routing::Routes does not exist in Rails 3!
|
118
|
+
# fortunately, I don't use "check_urls" so not an issue right now.
|
119
|
+
|
120
|
+
params = ::ActionController::Routing::Routes.recognize_path(url, {:method => method})
|
121
|
+
check_not_404(url, params)
|
122
|
+
params
|
123
|
+
rescue
|
124
|
+
# Could not find a route with that method
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
end.compact
|
128
|
+
routes.present? ? routes : raise_invalid_url(url, "Cannot find a route")
|
129
|
+
end
|
130
|
+
|
131
|
+
def url_from_params(options = params)
|
132
|
+
return "/" if params.empty?
|
133
|
+
options[:controller] ||= params[:controller]
|
134
|
+
::ActionController::Routing::Routes.generate_extras(symbolize_hash(options))[0]
|
135
|
+
end
|
136
|
+
|
137
|
+
# Convert all keys in the hash to symbols. Not sure why this is needed.
|
138
|
+
def symbolize_hash(hash)
|
139
|
+
hash.keys.inject({}) { |h, k| h[k.to_sym] = hash[k]; h }
|
140
|
+
end
|
141
|
+
|
142
|
+
def raise_invalid_url(url, message)
|
143
|
+
raise(Html::Test::InvalidUrl.new("#{message} for url '#{url}' request_uri='#{request.url}' body='#{response.body}'"))
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
data/lib/url_selector.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
module Html
|
2
|
+
module Test
|
3
|
+
module UrlSelector
|
4
|
+
def skip_url?(url, root_url = nil)
|
5
|
+
if url.blank? || unsupported_protocol?(url) || special_url?(url)
|
6
|
+
true
|
7
|
+
else
|
8
|
+
false
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def special_url?(url)
|
13
|
+
[/^javascript:/, /^mailto:/, /^\#$/].any? { |pattern| url =~ pattern }
|
14
|
+
end
|
15
|
+
|
16
|
+
def external_http?(url, root_url = nil)
|
17
|
+
if root_url
|
18
|
+
http_protocol?(url) && !url.starts_with?(root_url)
|
19
|
+
else
|
20
|
+
http_protocol?(url)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def http_protocol?(url)
|
25
|
+
url =~ %r{^http(?:s)?://} ? true : false
|
26
|
+
end
|
27
|
+
|
28
|
+
def has_protocol?(url)
|
29
|
+
url =~ %r{^[a-z]+://} ? true : false
|
30
|
+
end
|
31
|
+
|
32
|
+
def unsupported_protocol?(url)
|
33
|
+
has_protocol?(url) && !http_protocol?(url)
|
34
|
+
end
|
35
|
+
|
36
|
+
def anchor_urls
|
37
|
+
select("a").map { |l| l.attributes['href'] }
|
38
|
+
end
|
39
|
+
|
40
|
+
def image_urls
|
41
|
+
select("img").map { |i| i.attributes['src'] }
|
42
|
+
end
|
43
|
+
|
44
|
+
def form_urls
|
45
|
+
select("form").map { |i| i.attributes['action'] }
|
46
|
+
end
|
47
|
+
|
48
|
+
def response_body
|
49
|
+
self.respond_to?(:response) ? response.body : @response.body
|
50
|
+
end
|
51
|
+
|
52
|
+
def select(pattern)
|
53
|
+
HTML::Selector.new(pattern).select(HTML::Document.new(response_body).root)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Html
|
2
|
+
module Test
|
3
|
+
class ValidateFilter
|
4
|
+
attr_accessor :request, :response, :params, :validators
|
5
|
+
|
6
|
+
include ::Test::Unit::Assertions
|
7
|
+
include ::Html::Test::Assertions
|
8
|
+
|
9
|
+
def initialize(controller)
|
10
|
+
self.request = controller.request
|
11
|
+
self.response = controller.response
|
12
|
+
self.params = controller.params
|
13
|
+
self.validators = controller.class.validators
|
14
|
+
end
|
15
|
+
|
16
|
+
def validate_page
|
17
|
+
# url = request.request_uri
|
18
|
+
# no more request_uri in Rails 3
|
19
|
+
url = request.url
|
20
|
+
return if (!should_validate? || ValidateFilter.already_validated?(url))
|
21
|
+
# assert_validates(validators, response.body.strip, url, :verbose => true)
|
22
|
+
assert_validates(validators, response.body.strip, url )
|
23
|
+
ValidateFilter.mark_url_validated(url)
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.already_validated?(url)
|
27
|
+
if Html::Test::Validator.revalidate_all
|
28
|
+
false
|
29
|
+
else
|
30
|
+
validated_urls[url]
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.mark_url_validated(url)
|
35
|
+
validated_urls[url] = true
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.validated_urls
|
39
|
+
@validated_urls ||= {}
|
40
|
+
end
|
41
|
+
|
42
|
+
# Override this method if you only want to validate a subset of pages
|
43
|
+
def should_validate?
|
44
|
+
# response.status =~ /200/ &&
|
45
|
+
# (response.headers['Content-Type'] =~ /text\/html/i || response.body =~ /<html/)
|
46
|
+
# In rails 3,
|
47
|
+
# response.status is a Fixnum which would return nil in this match
|
48
|
+
# and response.headers['Content-Type'] is blank
|
49
|
+
response.status.to_s =~ /200/ &&
|
50
|
+
(response.content_type =~ /text\/html/i || response.body =~ /<html/)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Used in testing (of html_test_extension plugin)
|
54
|
+
# to remove the validated_urls hash
|
55
|
+
# so can test with the same url.
|
56
|
+
def self.clear_validated_urls
|
57
|
+
@validated_urls = {}
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/validator.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'net/http'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
module Html
|
6
|
+
module Test
|
7
|
+
class Validator
|
8
|
+
|
9
|
+
# verbose = true shows "validating ..."
|
10
|
+
# verbose = false shows NOTHING
|
11
|
+
@@verbose = true
|
12
|
+
cattr_accessor :verbose
|
13
|
+
#
|
14
|
+
# revalidate_all = true will validate every call to a url
|
15
|
+
# revalidate_all = false will only validate the first call to a url
|
16
|
+
@@revalidate_all = true
|
17
|
+
cattr_accessor :revalidate_all
|
18
|
+
|
19
|
+
@@tidy_ignore_list = []
|
20
|
+
cattr_accessor :tidy_ignore_list
|
21
|
+
|
22
|
+
# For local validation you might change this to http://localhost/validator/htdocs/check
|
23
|
+
DEFAULT_W3C_URL = "http://validator.w3.org/check"
|
24
|
+
@@w3c_url = DEFAULT_W3C_URL
|
25
|
+
cattr_accessor :w3c_url
|
26
|
+
|
27
|
+
# Whether the W3C validator should show the HTML document being validated in
|
28
|
+
# its response. Set to 0 to disable.
|
29
|
+
@@w3c_show_source = "1"
|
30
|
+
cattr_accessor :w3c_show_source
|
31
|
+
|
32
|
+
DEFAULT_DTD = File.join(File.dirname(__FILE__), 'DTD', 'xhtml1-strict.dtd')
|
33
|
+
|
34
|
+
# Path to DTD file that the xmllint validator uses
|
35
|
+
def self.dtd(document)
|
36
|
+
DEFAULT_DTD
|
37
|
+
end
|
38
|
+
|
39
|
+
# Validate an HTML document string using tidy.
|
40
|
+
# Code excerpted from the rails_tidy plugin
|
41
|
+
def self.tidy_errors(body)
|
42
|
+
tidy = RailsTidy.tidy_factory
|
43
|
+
tidy.clean(body)
|
44
|
+
errors = tidy.errors.empty? ? nil :
|
45
|
+
tidy.errors.delete_if { |e| tidy_ignore_list.select { |p| e =~ p }.size > 0 }.join("\n")
|
46
|
+
tidy.release
|
47
|
+
errors.blank? ? nil : errors
|
48
|
+
end
|
49
|
+
|
50
|
+
# Validate an HTML document string by going to the online W3C validator.
|
51
|
+
# Credit for the original code goes to Scott Baron (htonl)
|
52
|
+
def self.w3c_errors(body)
|
53
|
+
response = Net::HTTP.post_form(URI.parse(w3c_url),
|
54
|
+
{'ss'=>w3c_show_source, 'fragment'=>body})
|
55
|
+
status = response['x-w3c-validator-status']
|
56
|
+
if status != 'Valid'
|
57
|
+
# Reference in the stylesheets
|
58
|
+
response.body.sub!(%r{@import "./base.css"}, %Q{@import "#{File.dirname(w3c_url)}/base.css"})
|
59
|
+
response_file = find_unique_path(File.join(tmp_dir, "w3c_response.html"))
|
60
|
+
|
61
|
+
# open(response_file, "w" ) { |f| f.puts(response.body) }
|
62
|
+
# I was getting many errors like ... (in ruby 1.9.3 and rails 3)
|
63
|
+
# Encoding::UndefinedConversionError: "\xE2" from ASCII-8BIT to UTF-8
|
64
|
+
# adding force_encoding('UTF-8') seems to fix this.
|
65
|
+
open(response_file, "w" ) { |f| f.puts(response.body.force_encoding('UTF-8')) }
|
66
|
+
|
67
|
+
"W3C status #{status}. Response from W3C was written to the file #{response_file}"
|
68
|
+
else
|
69
|
+
nil
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Validate an HTML document string using the xmllint command line validator tool.
|
74
|
+
# Returns nil if validation passes and an error message otherwise.
|
75
|
+
# Original code taken from the book "Enterprise Integration with Ruby"
|
76
|
+
def self.xmllint_errors(body)
|
77
|
+
error_file = create_tmp_file("xmllint_error")
|
78
|
+
doc_file = command = nil
|
79
|
+
if dtd(body) =~ /^doctype$/i
|
80
|
+
# Use the DOCTYPE declaration
|
81
|
+
doc_file = create_tmp_file("xmllint", body)
|
82
|
+
command = "xmllint --noout --valid #{doc_file} &> #{error_file}"
|
83
|
+
else
|
84
|
+
# Override the DOCTYPE declaration
|
85
|
+
doc_file = create_tmp_file("xmllint", body.sub(/<!DOCTYPE[^>]+>/m, ""))
|
86
|
+
command = "xmllint --noout --dtdvalid #{dtd(body)} #{doc_file} &> #{error_file}"
|
87
|
+
end
|
88
|
+
system(command)
|
89
|
+
status = $?.exitstatus
|
90
|
+
if status == 0
|
91
|
+
return nil
|
92
|
+
else
|
93
|
+
failure_doc = File.join(tmp_dir, "xmllint_last_response.html")
|
94
|
+
FileUtils.cp doc_file, failure_doc
|
95
|
+
return ("command='#{command}'. HTML document at '#{failure_doc}'. " +
|
96
|
+
IO.read(error_file))
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
def self.find_unique_path(path)
|
102
|
+
filename = File.basename(path)
|
103
|
+
ext = File.extname(filename)
|
104
|
+
size_no_ext = filename.size - ext.size
|
105
|
+
filename_no_ext = filename[0, size_no_ext]
|
106
|
+
counter = 2
|
107
|
+
while File.exists?(path)
|
108
|
+
new_filename = [filename_no_ext, "-", counter, ext].join
|
109
|
+
path = File.join(File.dirname(path), new_filename)
|
110
|
+
counter += 1
|
111
|
+
end
|
112
|
+
path
|
113
|
+
end
|
114
|
+
|
115
|
+
def self.create_tmp_file(name, contents = "")
|
116
|
+
tmp_file = Tempfile.new(name)
|
117
|
+
tmp_file.puts(contents)
|
118
|
+
tmp_file.close
|
119
|
+
tmp_file.path
|
120
|
+
end
|
121
|
+
|
122
|
+
def self.tmp_dir
|
123
|
+
Dir::tmpdir
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
data/rails/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'ccls-html_test' if RAILS_ENV == 'test'
|
data/script/rails
ADDED
@@ -0,0 +1,6 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# This command will automatically be run when you run "rails" with Rails 3 gems installed from the root of your application.
|
3
|
+
|
4
|
+
APP_PATH = File.expand_path('../../config/application', __FILE__)
|
5
|
+
require File.expand_path('../../config/boot', __FILE__)
|
6
|
+
require 'rails/commands'
|
data/script/validate
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# == Synopsis
|
3
|
+
# This script validates an HTML page via HTTP and checks for broken
|
4
|
+
# links and images. Links are followed one step away from the start page.
|
5
|
+
# HTML validation is also done for pages linked internally from the start page
|
6
|
+
# (i.e. relative URLs).
|
7
|
+
#
|
8
|
+
# == Usage
|
9
|
+
# vendor/plugins/html_test/script/validate http://my.url.com [options]
|
10
|
+
#
|
11
|
+
# Options:
|
12
|
+
#
|
13
|
+
# --no-follow:: Don't follow any anchor or images URLs on
|
14
|
+
# the start page (i.e. only one page is requested).
|
15
|
+
#
|
16
|
+
# --validators validator_list:: A comma separated list of validators to use
|
17
|
+
# for HTML validation. Supported validators:
|
18
|
+
# tidy, xmllint, w3c. Default validator is
|
19
|
+
# tidy if it is installed, otherwise w3c.
|
20
|
+
#
|
21
|
+
# --dtd dtd_path:: Path to the DTD file to use for xmllint
|
22
|
+
# validation. By default xmllint will use
|
23
|
+
# the XHTML 1.0 strict DTD. Set dtd_path
|
24
|
+
# to "doctype" to make xmllint use the
|
25
|
+
# DTD specified in the DOCTYPE tag
|
26
|
+
# (can be slow).
|
27
|
+
#
|
28
|
+
# --skip skip_patterns:: A comma separated list of regexp patterns for
|
29
|
+
# URLs to not visit. Using the pattern '.*'
|
30
|
+
# is equivalent to the --no-follow option.
|
31
|
+
#
|
32
|
+
# --only only_pattern:: Only visit URLs matching given regexp pattern
|
33
|
+
#
|
34
|
+
# --no-external:: Do not visit external URLs, i.e. URLs with
|
35
|
+
# different domain than the start page.
|
36
|
+
#
|
37
|
+
# --quiet:: Don't output anything unless there is a failure.
|
38
|
+
|
39
|
+
require 'optparse'
|
40
|
+
require 'rdoc/usage'
|
41
|
+
require 'uri'
|
42
|
+
|
43
|
+
require File.join(File.dirname(__FILE__), "..", "..", "..", "..", "test", "test_helper")
|
44
|
+
require File.join(File.dirname(__FILE__), "..", "lib", "html_test")
|
45
|
+
|
46
|
+
options = Html::Test::LinkValidator.parse_command_line(ARGV) rescue RDoc::usage
|
47
|
+
Html::Test::LinkValidator.new(ARGV[0], options)
|