ccls-html_test 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,147 @@
1
+ module Html
2
+ module Test
3
+ class InvalidUrl < RuntimeError; end
4
+
5
+ class UrlChecker
6
+ attr_accessor :request, :response, :params
7
+
8
+ include Html::Test::UrlSelector
9
+
10
+ def initialize(controller)
11
+ self.request = controller.request
12
+ self.response = controller.response
13
+ self.params = controller.params
14
+ end
15
+
16
+ def check_urls_resolve
17
+ urls_to_check.each do |url|
18
+ check_url_resolves(url)
19
+ end
20
+ end
21
+
22
+ def check_redirects_resolve
23
+ redirect_url = response.headers['Location']
24
+ if response.status =~ /302/ && redirect_url.present?
25
+ check_url_resolves(redirect_url)
26
+ end
27
+ end
28
+
29
+ private
30
+ def urls_to_check
31
+ anchor_urls + image_urls + form_urls
32
+ end
33
+
34
+ def check_url_resolves(url)
35
+ return if skip_url?(url, root_url) || external_http?(url, root_url)
36
+ url = strip_anchor(remove_query(make_absolute(url)))
37
+ return if public_file_exists?(url)
38
+ check_action_exists(url)
39
+ end
40
+
41
+ def root_url
42
+ request.protocol + request.host_with_port
43
+ end
44
+
45
+ def public_file_exists?(url)
46
+ public_path = File.join(rails_public_path, url)
47
+ File.exists?(public_path) || File.exists?(public_path + ".html")
48
+ end
49
+
50
+ def rails_public_path
51
+ File.join(Rails.root, "public")
52
+ end
53
+
54
+ # Make URLs absolute paths, i.e. relative to the site root
55
+ def make_absolute(url)
56
+ url = remove_host(url) if has_protocol?(url)
57
+ return url if url =~ %r{^/}
58
+ current_url = request.url || url_from_params
59
+ current_url = File.dirname(current_url) if current_url !~ %r{/$}
60
+ url = File.join(current_url, url)
61
+ end
62
+
63
+ def remove_host(url)
64
+ url_no_host = url[%r{^[a-z]+://[^/]+(/.+)$}, 1]
65
+ url_no_host.blank? ? "/" : url_no_host
66
+ end
67
+
68
+ def remove_query(url)
69
+ url =~ /\?/ ? url[/^(.+?)\?/, 1] : url
70
+ end
71
+
72
+ def strip_anchor(url)
73
+ url =~ /\#/ ? url[/^(.+?)\#/, 1] : url
74
+ end
75
+
76
+ # Each URL is required to have at least one HTTP method for which there is a route with an action
77
+ def check_action_exists(url)
78
+ unless routes_from_url(url).any? { |route| route_has_action?(route) }
79
+ raise_invalid_url(url, "No action or template")
80
+ end
81
+ end
82
+
83
+ def route_has_action?(route)
84
+ controller = "#{route[:controller].camelize}Controller".constantize
85
+ controller.public_instance_methods.include?(route[:action]) || template_file_exists?(route, controller)
86
+ end
87
+
88
+ def template_file_exists?(route, controller)
89
+ # Workaround for Rails 1.2 that doesn't have the view_paths method
90
+ template_dirs = controller.respond_to?(:view_paths) ?
91
+ controller.view_paths : [controller.view_root]
92
+ template_dirs.each do |template_dir|
93
+ template_file = File.join(template_dir, controller.controller_path, "#{route[:action]}.*")
94
+ return true if !Dir.glob(template_file).empty?
95
+ end
96
+ false
97
+ end
98
+
99
+ # This is a special case where on my site I had a catch all route for 404s. If you have
100
+ # such a route, you can override this method and check for it, i.e. you could do something
101
+ # like this:
102
+ #
103
+ # if params[:action] == "rescue_404"
104
+ # raise Html::Test::InvalidUrl.new("Action rescue_404 invoked for url '#{url}'")
105
+ # end
106
+ def check_not_404(url, params)
107
+ # This method is unimplemented by default
108
+ end
109
+
110
+ def routes_from_url(url)
111
+ routes = [:get, :post, :put, :delete].map do |method|
112
+ begin
113
+ # Need to specify the method here for RESTful resource routes to work, i.e.
114
+ # for /posts/1 to be recognized as the show action etc.
115
+
116
+
117
+ # ActionController::Routing::Routes does not exist in Rails 3!
118
+ # fortunately, I don't use "check_urls" so not an issue right now.
119
+
120
+ params = ::ActionController::Routing::Routes.recognize_path(url, {:method => method})
121
+ check_not_404(url, params)
122
+ params
123
+ rescue
124
+ # Could not find a route with that method
125
+ nil
126
+ end
127
+ end.compact
128
+ routes.present? ? routes : raise_invalid_url(url, "Cannot find a route")
129
+ end
130
+
131
+ def url_from_params(options = params)
132
+ return "/" if params.empty?
133
+ options[:controller] ||= params[:controller]
134
+ ::ActionController::Routing::Routes.generate_extras(symbolize_hash(options))[0]
135
+ end
136
+
137
+ # Convert all keys in the hash to symbols. Not sure why this is needed.
138
+ def symbolize_hash(hash)
139
+ hash.keys.inject({}) { |h, k| h[k.to_sym] = hash[k]; h }
140
+ end
141
+
142
+ def raise_invalid_url(url, message)
143
+ raise(Html::Test::InvalidUrl.new("#{message} for url '#{url}' request_uri='#{request.url}' body='#{response.body}'"))
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,57 @@
1
+ module Html
2
+ module Test
3
+ module UrlSelector
4
+ def skip_url?(url, root_url = nil)
5
+ if url.blank? || unsupported_protocol?(url) || special_url?(url)
6
+ true
7
+ else
8
+ false
9
+ end
10
+ end
11
+
12
+ def special_url?(url)
13
+ [/^javascript:/, /^mailto:/, /^\#$/].any? { |pattern| url =~ pattern }
14
+ end
15
+
16
+ def external_http?(url, root_url = nil)
17
+ if root_url
18
+ http_protocol?(url) && !url.starts_with?(root_url)
19
+ else
20
+ http_protocol?(url)
21
+ end
22
+ end
23
+
24
+ def http_protocol?(url)
25
+ url =~ %r{^http(?:s)?://} ? true : false
26
+ end
27
+
28
+ def has_protocol?(url)
29
+ url =~ %r{^[a-z]+://} ? true : false
30
+ end
31
+
32
+ def unsupported_protocol?(url)
33
+ has_protocol?(url) && !http_protocol?(url)
34
+ end
35
+
36
+ def anchor_urls
37
+ select("a").map { |l| l.attributes['href'] }
38
+ end
39
+
40
+ def image_urls
41
+ select("img").map { |i| i.attributes['src'] }
42
+ end
43
+
44
+ def form_urls
45
+ select("form").map { |i| i.attributes['action'] }
46
+ end
47
+
48
+ def response_body
49
+ self.respond_to?(:response) ? response.body : @response.body
50
+ end
51
+
52
+ def select(pattern)
53
+ HTML::Selector.new(pattern).select(HTML::Document.new(response_body).root)
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,62 @@
1
+ module Html
2
+ module Test
3
+ class ValidateFilter
4
+ attr_accessor :request, :response, :params, :validators
5
+
6
+ include ::Test::Unit::Assertions
7
+ include ::Html::Test::Assertions
8
+
9
+ def initialize(controller)
10
+ self.request = controller.request
11
+ self.response = controller.response
12
+ self.params = controller.params
13
+ self.validators = controller.class.validators
14
+ end
15
+
16
+ def validate_page
17
+ # url = request.request_uri
18
+ # no more request_uri in Rails 3
19
+ url = request.url
20
+ return if (!should_validate? || ValidateFilter.already_validated?(url))
21
+ # assert_validates(validators, response.body.strip, url, :verbose => true)
22
+ assert_validates(validators, response.body.strip, url )
23
+ ValidateFilter.mark_url_validated(url)
24
+ end
25
+
26
+ def self.already_validated?(url)
27
+ if Html::Test::Validator.revalidate_all
28
+ false
29
+ else
30
+ validated_urls[url]
31
+ end
32
+ end
33
+
34
+ def self.mark_url_validated(url)
35
+ validated_urls[url] = true
36
+ end
37
+
38
+ def self.validated_urls
39
+ @validated_urls ||= {}
40
+ end
41
+
42
+ # Override this method if you only want to validate a subset of pages
43
+ def should_validate?
44
+ # response.status =~ /200/ &&
45
+ # (response.headers['Content-Type'] =~ /text\/html/i || response.body =~ /<html/)
46
+ # In rails 3,
47
+ # response.status is a Fixnum which would return nil in this match
48
+ # and response.headers['Content-Type'] is blank
49
+ response.status.to_s =~ /200/ &&
50
+ (response.content_type =~ /text\/html/i || response.body =~ /<html/)
51
+ end
52
+
53
+ # Used in testing (of html_test_extension plugin)
54
+ # to remove the validated_urls hash
55
+ # so can test with the same url.
56
+ def self.clear_validated_urls
57
+ @validated_urls = {}
58
+ end
59
+
60
+ end
61
+ end
62
+ end
data/lib/validator.rb ADDED
@@ -0,0 +1,127 @@
1
+ require 'tempfile'
2
+ require 'net/http'
3
+ require 'fileutils'
4
+
5
+ module Html
6
+ module Test
7
+ class Validator
8
+
9
+ # verbose = true shows "validating ..."
10
+ # verbose = false shows NOTHING
11
+ @@verbose = true
12
+ cattr_accessor :verbose
13
+ #
14
+ # revalidate_all = true will validate every call to a url
15
+ # revalidate_all = false will only validate the first call to a url
16
+ @@revalidate_all = true
17
+ cattr_accessor :revalidate_all
18
+
19
+ @@tidy_ignore_list = []
20
+ cattr_accessor :tidy_ignore_list
21
+
22
+ # For local validation you might change this to http://localhost/validator/htdocs/check
23
+ DEFAULT_W3C_URL = "http://validator.w3.org/check"
24
+ @@w3c_url = DEFAULT_W3C_URL
25
+ cattr_accessor :w3c_url
26
+
27
+ # Whether the W3C validator should show the HTML document being validated in
28
+ # its response. Set to 0 to disable.
29
+ @@w3c_show_source = "1"
30
+ cattr_accessor :w3c_show_source
31
+
32
+ DEFAULT_DTD = File.join(File.dirname(__FILE__), 'DTD', 'xhtml1-strict.dtd')
33
+
34
+ # Path to DTD file that the xmllint validator uses
35
+ def self.dtd(document)
36
+ DEFAULT_DTD
37
+ end
38
+
39
+ # Validate an HTML document string using tidy.
40
+ # Code excerpted from the rails_tidy plugin
41
+ def self.tidy_errors(body)
42
+ tidy = RailsTidy.tidy_factory
43
+ tidy.clean(body)
44
+ errors = tidy.errors.empty? ? nil :
45
+ tidy.errors.delete_if { |e| tidy_ignore_list.select { |p| e =~ p }.size > 0 }.join("\n")
46
+ tidy.release
47
+ errors.blank? ? nil : errors
48
+ end
49
+
50
+ # Validate an HTML document string by going to the online W3C validator.
51
+ # Credit for the original code goes to Scott Baron (htonl)
52
+ def self.w3c_errors(body)
53
+ response = Net::HTTP.post_form(URI.parse(w3c_url),
54
+ {'ss'=>w3c_show_source, 'fragment'=>body})
55
+ status = response['x-w3c-validator-status']
56
+ if status != 'Valid'
57
+ # Reference in the stylesheets
58
+ response.body.sub!(%r{@import "./base.css"}, %Q{@import "#{File.dirname(w3c_url)}/base.css"})
59
+ response_file = find_unique_path(File.join(tmp_dir, "w3c_response.html"))
60
+
61
+ # open(response_file, "w" ) { |f| f.puts(response.body) }
62
+ # I was getting many errors like ... (in ruby 1.9.3 and rails 3)
63
+ # Encoding::UndefinedConversionError: "\xE2" from ASCII-8BIT to UTF-8
64
+ # adding force_encoding('UTF-8') seems to fix this.
65
+ open(response_file, "w" ) { |f| f.puts(response.body.force_encoding('UTF-8')) }
66
+
67
+ "W3C status #{status}. Response from W3C was written to the file #{response_file}"
68
+ else
69
+ nil
70
+ end
71
+ end
72
+
73
+ # Validate an HTML document string using the xmllint command line validator tool.
74
+ # Returns nil if validation passes and an error message otherwise.
75
+ # Original code taken from the book "Enterprise Integration with Ruby"
76
+ def self.xmllint_errors(body)
77
+ error_file = create_tmp_file("xmllint_error")
78
+ doc_file = command = nil
79
+ if dtd(body) =~ /^doctype$/i
80
+ # Use the DOCTYPE declaration
81
+ doc_file = create_tmp_file("xmllint", body)
82
+ command = "xmllint --noout --valid #{doc_file} &> #{error_file}"
83
+ else
84
+ # Override the DOCTYPE declaration
85
+ doc_file = create_tmp_file("xmllint", body.sub(/<!DOCTYPE[^>]+>/m, ""))
86
+ command = "xmllint --noout --dtdvalid #{dtd(body)} #{doc_file} &> #{error_file}"
87
+ end
88
+ system(command)
89
+ status = $?.exitstatus
90
+ if status == 0
91
+ return nil
92
+ else
93
+ failure_doc = File.join(tmp_dir, "xmllint_last_response.html")
94
+ FileUtils.cp doc_file, failure_doc
95
+ return ("command='#{command}'. HTML document at '#{failure_doc}'. " +
96
+ IO.read(error_file))
97
+ end
98
+ end
99
+
100
+ private
101
+ def self.find_unique_path(path)
102
+ filename = File.basename(path)
103
+ ext = File.extname(filename)
104
+ size_no_ext = filename.size - ext.size
105
+ filename_no_ext = filename[0, size_no_ext]
106
+ counter = 2
107
+ while File.exists?(path)
108
+ new_filename = [filename_no_ext, "-", counter, ext].join
109
+ path = File.join(File.dirname(path), new_filename)
110
+ counter += 1
111
+ end
112
+ path
113
+ end
114
+
115
+ def self.create_tmp_file(name, contents = "")
116
+ tmp_file = Tempfile.new(name)
117
+ tmp_file.puts(contents)
118
+ tmp_file.close
119
+ tmp_file.path
120
+ end
121
+
122
+ def self.tmp_dir
123
+ Dir::tmpdir
124
+ end
125
+ end
126
+ end
127
+ end
data/rails/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'ccls-html_test' if RAILS_ENV == 'test'
data/script/rails ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # This command will automatically be run when you run "rails" with Rails 3 gems installed from the root of your application.
3
+
4
+ APP_PATH = File.expand_path('../../config/application', __FILE__)
5
+ require File.expand_path('../../config/boot', __FILE__)
6
+ require 'rails/commands'
data/script/validate ADDED
@@ -0,0 +1,47 @@
1
+ #!/usr/bin/env ruby
2
+ # == Synopsis
3
+ # This script validates an HTML page via HTTP and checks for broken
4
+ # links and images. Links are followed one step away from the start page.
5
+ # HTML validation is also done for pages linked internally from the start page
6
+ # (i.e. relative URLs).
7
+ #
8
+ # == Usage
9
+ # vendor/plugins/html_test/script/validate http://my.url.com [options]
10
+ #
11
+ # Options:
12
+ #
13
+ # --no-follow:: Don't follow any anchor or images URLs on
14
+ # the start page (i.e. only one page is requested).
15
+ #
16
+ # --validators validator_list:: A comma separated list of validators to use
17
+ # for HTML validation. Supported validators:
18
+ # tidy, xmllint, w3c. Default validator is
19
+ # tidy if it is installed, otherwise w3c.
20
+ #
21
+ # --dtd dtd_path:: Path to the DTD file to use for xmllint
22
+ # validation. By default xmllint will use
23
+ # the XHTML 1.0 strict DTD. Set dtd_path
24
+ # to "doctype" to make xmllint use the
25
+ # DTD specified in the DOCTYPE tag
26
+ # (can be slow).
27
+ #
28
+ # --skip skip_patterns:: A comma separated list of regexp patterns for
29
+ # URLs to not visit. Using the pattern '.*'
30
+ # is equivalent to the --no-follow option.
31
+ #
32
+ # --only only_pattern:: Only visit URLs matching given regexp pattern
33
+ #
34
+ # --no-external:: Do not visit external URLs, i.e. URLs with
35
+ # different domain than the start page.
36
+ #
37
+ # --quiet:: Don't output anything unless there is a failure.
38
+
39
+ require 'optparse'
40
+ require 'rdoc/usage'
41
+ require 'uri'
42
+
43
+ require File.join(File.dirname(__FILE__), "..", "..", "..", "..", "test", "test_helper")
44
+ require File.join(File.dirname(__FILE__), "..", "lib", "html_test")
45
+
46
+ options = Html::Test::LinkValidator.parse_command_line(ARGV) rescue RDoc::usage
47
+ Html::Test::LinkValidator.new(ARGV[0], options)