ccls-html_test 0.3.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +142 -0
- data/Rakefile +59 -0
- data/VERSION +1 -0
- data/ccls-html_test.gemspec +57 -0
- data/lib/DTD/xhtml-lat1.ent +196 -0
- data/lib/DTD/xhtml-special.ent +80 -0
- data/lib/DTD/xhtml-symbol.ent +237 -0
- data/lib/DTD/xhtml.soc +14 -0
- data/lib/DTD/xhtml1-frameset.dtd +1235 -0
- data/lib/DTD/xhtml1-strict.dtd +978 -0
- data/lib/DTD/xhtml1-transitional.dtd +1201 -0
- data/lib/DTD/xhtml1.dcl +192 -0
- data/lib/assertions.rb +57 -0
- data/lib/ccls-html_test.rb +1 -0
- data/lib/html_test.rb +98 -0
- data/lib/link_validator.rb +175 -0
- data/lib/url_checker.rb +147 -0
- data/lib/url_selector.rb +57 -0
- data/lib/validate_filter.rb +62 -0
- data/lib/validator.rb +127 -0
- data/rails/init.rb +1 -0
- data/script/rails +6 -0
- data/script/validate +47 -0
- metadata +69 -0
data/lib/DTD/xhtml1.dcl
ADDED
@@ -0,0 +1,192 @@
|
|
1
|
+
<!SGML "ISO 8879:1986 (WWW)"
|
2
|
+
|
3
|
+
-- SGML Declaration for XML 1.0 --
|
4
|
+
|
5
|
+
-- from:
|
6
|
+
Final text of revised Web SGML Adaptations Annex (TC2) to ISO 8879:1986
|
7
|
+
ISO/IEC JTC1/SC34 N0029: 1998-12-06
|
8
|
+
Annex L.2 (informative): SGML Declaration for XML
|
9
|
+
|
10
|
+
changes made to accommodate validation are noted with 'VALID:'
|
11
|
+
--
|
12
|
+
|
13
|
+
CHARSET
|
14
|
+
BASESET
|
15
|
+
"ISO Registration Number 177//CHARSET
|
16
|
+
ISO/IEC 10646-1:1993 UCS-4 with implementation
|
17
|
+
level 3//ESC 2/5 2/15 4/6"
|
18
|
+
DESCSET
|
19
|
+
0 9 UNUSED
|
20
|
+
9 2 9
|
21
|
+
11 2 UNUSED
|
22
|
+
13 1 13
|
23
|
+
14 18 UNUSED
|
24
|
+
32 95 32
|
25
|
+
127 1 UNUSED
|
26
|
+
128 32 UNUSED
|
27
|
+
160 55136 160
|
28
|
+
55296 2048 UNUSED -- surrogates --
|
29
|
+
57344 8190 57344
|
30
|
+
65534 2 UNUSED -- FFFE and FFFF --
|
31
|
+
65536 1048576 65536
|
32
|
+
|
33
|
+
CAPACITY NONE -- Capacities are not restricted in XML --
|
34
|
+
|
35
|
+
SCOPE DOCUMENT
|
36
|
+
|
37
|
+
SYNTAX
|
38
|
+
SHUNCHAR NONE
|
39
|
+
BASESET "ISO Registration Number 177//CHARSET
|
40
|
+
ISO/IEC 10646-1:1993 UCS-4 with implementation
|
41
|
+
level 3//ESC 2/5 2/15 4/6"
|
42
|
+
DESCSET
|
43
|
+
0 1114112 0
|
44
|
+
FUNCTION
|
45
|
+
RE 13
|
46
|
+
RS 10
|
47
|
+
SPACE 32
|
48
|
+
TAB SEPCHAR 9
|
49
|
+
NAMING
|
50
|
+
LCNMSTRT ""
|
51
|
+
UCNMSTRT ""
|
52
|
+
NAMESTRT
|
53
|
+
58 95 192-214 216-246 248-305 308-318 321-328
|
54
|
+
330-382 384-451 461-496 500-501 506-535 592-680
|
55
|
+
699-705 902 904-906 908 910-929 931-974 976-982
|
56
|
+
986 988 990 992 994-1011 1025-1036 1038-1103
|
57
|
+
1105-1116 1118-1153 1168-1220 1223-1224
|
58
|
+
1227-1228 1232-1259 1262-1269 1272-1273
|
59
|
+
1329-1366 1369 1377-1414 1488-1514 1520-1522
|
60
|
+
1569-1594 1601-1610 1649-1719 1722-1726
|
61
|
+
1728-1742 1744-1747 1749 1765-1766 2309-2361
|
62
|
+
2365 2392-2401 2437-2444 2447-2448 2451-2472
|
63
|
+
2474-2480 2482 2486-2489 2524-2525 2527-2529
|
64
|
+
2544-2545 2565-2570 2575-2576 2579-2600
|
65
|
+
2602-2608 2610-2611 2613-2614 2616-2617
|
66
|
+
2649-2652 2654 2674-2676 2693-2699 2701
|
67
|
+
2703-2705 2707-2728 2730-2736 2738-2739
|
68
|
+
2741-2745 2749 2784 2821-2828 2831-2832
|
69
|
+
2835-2856 2858-2864 2866-2867 2870-2873 2877
|
70
|
+
2908-2909 2911-2913 2949-2954 2958-2960
|
71
|
+
2962-2965 2969-2970 2972 2974-2975 2979-2980
|
72
|
+
2984-2986 2990-2997 2999-3001 3077-3084
|
73
|
+
3086-3088 3090-3112 3114-3123 3125-3129
|
74
|
+
3168-3169 3205-3212 3214-3216 3218-3240
|
75
|
+
3242-3251 3253-3257 3294 3296-3297 3333-3340
|
76
|
+
3342-3344 3346-3368 3370-3385 3424-3425
|
77
|
+
3585-3630 3632 3634-3635 3648-3653 3713-3714
|
78
|
+
3716 3719-3720 3722 3725 3732-3735 3737-3743
|
79
|
+
3745-3747 3749 3751 3754-3755 3757-3758 3760
|
80
|
+
3762-3763 3773 3776-3780 3904-3911 3913-3945
|
81
|
+
4256-4293 4304-4342 4352 4354-4355 4357-4359
|
82
|
+
4361 4363-4364 4366-4370 4412 4414 4416 4428
|
83
|
+
4430 4432 4436-4437 4441 4447-4449 4451 4453
|
84
|
+
4455 4457 4461-4462 4466-4467 4469 4510 4520
|
85
|
+
4523 4526-4527 4535-4536 4538 4540-4546 4587
|
86
|
+
4592 4601 7680-7835 7840-7929 7936-7957
|
87
|
+
7960-7965 7968-8005 8008-8013 8016-8023 8025
|
88
|
+
8027 8029 8031-8061 8064-8116 8118-8124 8126
|
89
|
+
8130-8132 8134-8140 8144-8147 8150-8155
|
90
|
+
8160-8172 8178-8180 8182-8188 8486 8490-8491
|
91
|
+
8494 8576-8578 12295 12321-12329 12353-12436
|
92
|
+
12449-12538 12549-12588 19968-40869 44032-55203
|
93
|
+
|
94
|
+
LCNMCHAR ""
|
95
|
+
UCNMCHAR ""
|
96
|
+
NAMECHAR
|
97
|
+
45-46 183 720-721 768-837 864-865 903 1155-1158
|
98
|
+
1425-1441 1443-1465 1467-1469 1471 1473-1474
|
99
|
+
1476 1600 1611-1618 1632-1641 1648 1750-1764
|
100
|
+
1767-1768 1770-1773 1776-1785 2305-2307 2364
|
101
|
+
2366-2381 2385-2388 2402-2403 2406-2415
|
102
|
+
2433-2435 2492 2494-2500 2503-2504 2507-2509
|
103
|
+
2519 2530-2531 2534-2543 2562 2620 2622-2626
|
104
|
+
2631-2632 2635-2637 2662-2673 2689-2691 2748
|
105
|
+
2750-2757 2759-2761 2763-2765 2790-2799
|
106
|
+
2817-2819 2876 2878-2883 2887-2888 2891-2893
|
107
|
+
2902-2903 2918-2927 2946-2947 3006-3010
|
108
|
+
3014-3016 3018-3021 3031 3047-3055 3073-3075
|
109
|
+
3134-3140 3142-3144 3146-3149 3157-3158
|
110
|
+
3174-3183 3202-3203 3262-3268 3270-3272
|
111
|
+
3274-3277 3285-3286 3302-3311 3330-3331
|
112
|
+
3390-3395 3398-3400 3402-3405 3415 3430-3439
|
113
|
+
3633 3636-3642 3654-3662 3664-3673 3761
|
114
|
+
3764-3769 3771-3772 3782 3784-3789 3792-3801
|
115
|
+
3864-3865 3872-3881 3893 3895 3897 3902-3903
|
116
|
+
3953-3972 3974-3979 3984-3989 3991 3993-4013
|
117
|
+
4017-4023 4025 8400-8412 8417 12293 12330-12335
|
118
|
+
12337-12341 12441-12442 12445-12446 12540-12542
|
119
|
+
|
120
|
+
NAMECASE
|
121
|
+
GENERAL NO
|
122
|
+
ENTITY NO
|
123
|
+
DELIM
|
124
|
+
GENERAL SGMLREF
|
125
|
+
HCRO "&#x"
|
126
|
+
-- Ampersand followed by "#x" (without quotes) --
|
127
|
+
NESTC "/"
|
128
|
+
NET ">"
|
129
|
+
PIC "?>"
|
130
|
+
SHORTREF NONE
|
131
|
+
|
132
|
+
NAMES
|
133
|
+
SGMLREF
|
134
|
+
|
135
|
+
QUANTITY
|
136
|
+
NONE -- Quantities are not restricted in XML --
|
137
|
+
|
138
|
+
ENTITIES
|
139
|
+
"amp" 38
|
140
|
+
"lt" 60
|
141
|
+
"gt" 62
|
142
|
+
"quot" 34
|
143
|
+
"apos" 39
|
144
|
+
|
145
|
+
FEATURES
|
146
|
+
MINIMIZE
|
147
|
+
DATATAG NO
|
148
|
+
OMITTAG NO
|
149
|
+
RANK NO
|
150
|
+
SHORTTAG
|
151
|
+
STARTTAG
|
152
|
+
EMPTY NO
|
153
|
+
UNCLOSED NO
|
154
|
+
NETENABL IMMEDNET
|
155
|
+
ENDTAG
|
156
|
+
EMPTY NO
|
157
|
+
UNCLOSED NO
|
158
|
+
ATTRIB
|
159
|
+
DEFAULT YES
|
160
|
+
OMITNAME NO
|
161
|
+
VALUE NO
|
162
|
+
EMPTYNRM YES
|
163
|
+
IMPLYDEF
|
164
|
+
ATTLIST NO -- VALID: was YES --
|
165
|
+
DOCTYPE NO
|
166
|
+
ELEMENT NO -- VALID: was YES --
|
167
|
+
ENTITY NO
|
168
|
+
NOTATION NO -- VALID: was YES --
|
169
|
+
LINK
|
170
|
+
SIMPLE NO
|
171
|
+
IMPLICIT NO
|
172
|
+
EXPLICIT NO
|
173
|
+
OTHER
|
174
|
+
CONCUR NO
|
175
|
+
SUBDOC NO
|
176
|
+
FORMAL NO
|
177
|
+
URN NO
|
178
|
+
KEEPRSRE YES
|
179
|
+
VALIDITY TYPE -- VALID: was NOASSERT --
|
180
|
+
ENTITIES
|
181
|
+
REF ANY
|
182
|
+
INTEGRAL YES
|
183
|
+
|
184
|
+
APPINFO NONE
|
185
|
+
|
186
|
+
SEEALSO "ISO 8879//NOTATION Extensible Markup Language (XML) 1.0//EN"
|
187
|
+
>
|
188
|
+
<!-- Id: $Id: xml1.dcl,v 4.3 2001/04/08 10:30:18 altheim Exp $ SMI
|
189
|
+
Revisions:
|
190
|
+
#1999-04-09 changes for XML validation
|
191
|
+
#2001-04-08 updated ISO registration number for UCS-4
|
192
|
+
-->
|
data/lib/assertions.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
module Html
|
2
|
+
module Test
|
3
|
+
module Assertions
|
4
|
+
def assert_tidy(body = nil)
|
5
|
+
assert_validates(:tidy, body)
|
6
|
+
end
|
7
|
+
|
8
|
+
def assert_w3c(body = nil)
|
9
|
+
assert_validates(:w3c, body)
|
10
|
+
end
|
11
|
+
|
12
|
+
def assert_xmllint(body = nil)
|
13
|
+
assert_validates(:xmllint, body)
|
14
|
+
end
|
15
|
+
|
16
|
+
def assert_validates(types = nil, body = nil, url = nil, options = {})
|
17
|
+
verbose = ( options[:verbose] ) ? options[:verbose] : Html::Test::Validator.verbose
|
18
|
+
body ||= @response.body
|
19
|
+
types ||= [:tidy, :w3c, :xmllint]
|
20
|
+
types = [types] if !types.is_a?(Array)
|
21
|
+
types.each do |t|
|
22
|
+
log("validating #{url} with #{t} ... ") if verbose
|
23
|
+
error = Html::Test::Validator.send("#{t}_errors", body)
|
24
|
+
if error.nil?
|
25
|
+
log("OK\n") if verbose
|
26
|
+
else
|
27
|
+
log("FAILURE\n") if verbose
|
28
|
+
assert_message = "Validator #{t} failed"
|
29
|
+
assert_message << " for url #{url}" if url
|
30
|
+
assert_message << " with message '#{error}'"
|
31
|
+
|
32
|
+
# Why would I want to print this in the log file???
|
33
|
+
# Rails.logger.error(assert_message + " for response body:\n #{with_line_counts(body)}")
|
34
|
+
assert(error.nil?, assert_message)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
def log(message)
|
41
|
+
print(message)
|
42
|
+
end
|
43
|
+
|
44
|
+
def with_line_counts(body)
|
45
|
+
separator = ("-" * 40) + $/
|
46
|
+
body_counts = separator.dup
|
47
|
+
# string no longer responds to each_with_index in ruby 1.9.3
|
48
|
+
# body.each_with_index do |line, i|
|
49
|
+
body.split(/\n/).each_with_index do |line, i|
|
50
|
+
body_counts << sprintf("%4u %s", i+1, line) # Right align line numbers
|
51
|
+
end
|
52
|
+
body_counts << separator
|
53
|
+
body_counts
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'html_test'
|
data/lib/html_test.rb
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
#if !defined?(RAILS_ENV) || RAILS_ENV == 'test'
|
2
|
+
if !defined?(RAILS_ENV) || RAILS_ENV == 'test' || Rails.env == 'test'
|
3
|
+
|
4
|
+
%w(validator assertions url_selector url_checker link_validator validate_filter).each do |file|
|
5
|
+
require File.join(File.dirname(__FILE__), file)
|
6
|
+
end
|
7
|
+
|
8
|
+
class Test::Unit::TestCase
|
9
|
+
include Html::Test::Assertions
|
10
|
+
end
|
11
|
+
|
12
|
+
module ActionController
|
13
|
+
module Integration #:nodoc:
|
14
|
+
class Session
|
15
|
+
include Html::Test::Assertions
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# ActionController::Base
|
21
|
+
require 'action_controller'
|
22
|
+
class ActionController::Base
|
23
|
+
@@validate_all = false
|
24
|
+
cattr_accessor :validate_all
|
25
|
+
|
26
|
+
@@validators = [:tidy]
|
27
|
+
cattr_accessor :validators
|
28
|
+
|
29
|
+
@@check_urls = false
|
30
|
+
cattr_accessor :check_urls
|
31
|
+
|
32
|
+
@@check_redirects = false
|
33
|
+
cattr_accessor :check_redirects
|
34
|
+
|
35
|
+
after_filter :validate_page
|
36
|
+
after_filter :check_urls_resolve
|
37
|
+
after_filter :check_redirects_resolve
|
38
|
+
|
39
|
+
private
|
40
|
+
def validate_page
|
41
|
+
return if !validate_all
|
42
|
+
Html::Test::ValidateFilter.new(self).validate_page
|
43
|
+
end
|
44
|
+
|
45
|
+
def check_urls_resolve
|
46
|
+
return if !check_urls
|
47
|
+
Html::Test::UrlChecker.new(self).check_urls_resolve
|
48
|
+
end
|
49
|
+
|
50
|
+
def check_redirects_resolve
|
51
|
+
return if !check_redirects
|
52
|
+
Html::Test::UrlChecker.new(self).check_redirects_resolve
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
validate = false
|
60
|
+
validators = ["http://localhost/w3c-validator/check",
|
61
|
+
Html::Test::Validator.w3c_url]
|
62
|
+
|
63
|
+
validators.each do |validator|
|
64
|
+
vhost = validator.split('/')[2]
|
65
|
+
vpath = "/"<< validator.split('/')[3..-1].join('/')
|
66
|
+
begin
|
67
|
+
response = Net::HTTP.get_response(vhost, vpath)
|
68
|
+
if response.code == '200'
|
69
|
+
Html::Test::Validator.w3c_url = validator
|
70
|
+
validate = true
|
71
|
+
break
|
72
|
+
end
|
73
|
+
rescue
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
if validate
|
78
|
+
# In Rails 3, ApplicationController not defined yet
|
79
|
+
# ApplicationController.validate_all = true
|
80
|
+
ActionController::Base.validate_all = true
|
81
|
+
# default is :tidy, but it doesn't really validate.
|
82
|
+
# I've purposely not closed tags and it doesn't complain.
|
83
|
+
# :w3c is ridiculously slow! even when used locally
|
84
|
+
# ApplicationController.validators = [:w3c]
|
85
|
+
# In Rails 3, ApplicationController not defined yet
|
86
|
+
ActionController::Base.validators = [:w3c]
|
87
|
+
#ApplicationController.validators = [:tidy, :w3c]
|
88
|
+
Html::Test::Validator.verbose = false
|
89
|
+
Html::Test::Validator.revalidate_all = true
|
90
|
+
Html::Test::Validator.tidy_ignore_list =
|
91
|
+
[/<table> lacks "summary" attribute/]
|
92
|
+
puts "Validating all html with " <<
|
93
|
+
Html::Test::Validator.w3c_url
|
94
|
+
else
|
95
|
+
puts "NOT validating html at all"
|
96
|
+
end
|
97
|
+
|
98
|
+
end #if !defined?(RAILS_ENV) || RAILS_ENV == 'test'
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
module Html
|
6
|
+
module Test
|
7
|
+
class LinkValidator
|
8
|
+
include ::Test::Unit::Assertions
|
9
|
+
include ::Html::Test::Assertions
|
10
|
+
include UrlSelector
|
11
|
+
|
12
|
+
attr_accessor :start_url, :options, :log
|
13
|
+
|
14
|
+
def options
|
15
|
+
@options ||= {}
|
16
|
+
end
|
17
|
+
|
18
|
+
def log
|
19
|
+
@log ||= ""
|
20
|
+
end
|
21
|
+
|
22
|
+
def initialize(url, options = {})
|
23
|
+
self.start_url = strip_anchor(url)
|
24
|
+
|
25
|
+
# Default values for options
|
26
|
+
if Object.const_defined?(:RailsTidy)
|
27
|
+
self.options[:validators] = %w(tidy)
|
28
|
+
else
|
29
|
+
self.options[:validators] = %w(w3c)
|
30
|
+
end
|
31
|
+
self.options[:skip_patterns] = []
|
32
|
+
self.options[:follow_external] = true
|
33
|
+
self.options[:follow_links] = true
|
34
|
+
self.options[:quiet] = false
|
35
|
+
Html::Test::Validator.dtd = options[:dtd] if options[:dtd]
|
36
|
+
|
37
|
+
self.options.merge!(options)
|
38
|
+
|
39
|
+
validate_links
|
40
|
+
end
|
41
|
+
|
42
|
+
# Generates an options hash from the command line options
|
43
|
+
def self.parse_command_line(options_array)
|
44
|
+
if options_array[0] !~ /:\/\//
|
45
|
+
raise "First argument must be URL, i.e. http://my.url.com"
|
46
|
+
end
|
47
|
+
|
48
|
+
options = {}
|
49
|
+
|
50
|
+
opts = OptionParser.new
|
51
|
+
opts.on("--no-follow") { options[:follow_links] = false }
|
52
|
+
opts.on("--validators validators") do |validators|
|
53
|
+
options[:validators] = validators.split(",")
|
54
|
+
end
|
55
|
+
opts.on('--dtd DTD') { |dtd| options[:dtd] = dtd }
|
56
|
+
opts.on("--skip skip_patterns") do |skip_patterns|
|
57
|
+
options[:skip_patterns] = skip_patterns.split(",").map { |p| Regexp.new(p) }
|
58
|
+
end
|
59
|
+
opts.on("--only only_pattern") do |only_pattern|
|
60
|
+
options[:only_pattern] = Regexp.new(only_pattern)
|
61
|
+
end
|
62
|
+
opts.on('--no-external') { options[:follow_external] = false }
|
63
|
+
opts.on('--quiet') { options[:quiet] = true }
|
64
|
+
|
65
|
+
opts.parse!(options_array)
|
66
|
+
|
67
|
+
options
|
68
|
+
end
|
69
|
+
|
70
|
+
protected
|
71
|
+
def validate_links
|
72
|
+
@url_history = []
|
73
|
+
@failed_urls = []
|
74
|
+
|
75
|
+
get(start_url)
|
76
|
+
return if !options[:follow_links]
|
77
|
+
|
78
|
+
# Fetch links and images on the start page
|
79
|
+
(anchor_urls + image_urls).each do |url|
|
80
|
+
if skip_url?(url)
|
81
|
+
output "skipping url #{url}\n"
|
82
|
+
next
|
83
|
+
end
|
84
|
+
next unless options[:skip_patterns].select { |pattern| url =~ pattern }.empty?
|
85
|
+
next if visited?(url)
|
86
|
+
next if options[:only_pattern] and url !~ options[:only_pattern]
|
87
|
+
|
88
|
+
if external_http?(url)
|
89
|
+
if options[:follow_external]
|
90
|
+
get(url, false)
|
91
|
+
else
|
92
|
+
output "skipping external link #{url}\n"
|
93
|
+
end
|
94
|
+
else
|
95
|
+
get(url)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
if !@failed_urls.empty?
|
100
|
+
raise "The following URLs had failures:\n#{@failed_urls.join("\n")}."
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def get(path, validate = true)
|
105
|
+
begin
|
106
|
+
url = canonical_url(path)
|
107
|
+
output "GET #{url}"
|
108
|
+
@response = Net::HTTP.get_response(URI.parse(url))
|
109
|
+
output " ... #{@response.code}"
|
110
|
+
@url_history << url
|
111
|
+
if ![/200/, /^3/].find { |pattern| @response.code =~ pattern }
|
112
|
+
raise "Invalid response code #{@response.code} for url #{url}"
|
113
|
+
end
|
114
|
+
validate_response if validate
|
115
|
+
output "\n"
|
116
|
+
rescue Exception => e
|
117
|
+
output_error "Exception thrown while getting url #{qualify_url(path)}:" +
|
118
|
+
" #{e} #{e.backtrace.join("\n ")}\n"
|
119
|
+
@failed_urls << qualify_url(path)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def validate_response
|
124
|
+
if @response and @response.header['content-type'] =~ /text\/html/i
|
125
|
+
options[:validators].each do |type|
|
126
|
+
assert_validates(type)
|
127
|
+
output " ... #{type}"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def visited?(path)
|
133
|
+
@url_history.include?(canonical_url(path))
|
134
|
+
end
|
135
|
+
|
136
|
+
def strip_anchor(url)
|
137
|
+
url[/^(.*?)(?:\#.+)?$/, 1]
|
138
|
+
end
|
139
|
+
|
140
|
+
def qualify_url(url)
|
141
|
+
return url if url =~ /:\/\// # External URL
|
142
|
+
|
143
|
+
if url =~ /^\//
|
144
|
+
# Relative site root
|
145
|
+
root_url + url
|
146
|
+
else
|
147
|
+
# Relative start dir
|
148
|
+
start_dir + "/" + url
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def canonical_url(url)
|
153
|
+
strip_anchor(qualify_url(url))
|
154
|
+
end
|
155
|
+
|
156
|
+
def root_url
|
157
|
+
start_url[/^([a-z]+:\/\/[^\/]+)/,1]
|
158
|
+
end
|
159
|
+
|
160
|
+
def start_dir
|
161
|
+
return start_url if start_url == root_url # When start_url=http://site.com
|
162
|
+
start_url[/^(.+?)\/[^\/]*$/,1] # Strip last slash and everything after it
|
163
|
+
end
|
164
|
+
|
165
|
+
def output_error(message)
|
166
|
+
output(message, :error)
|
167
|
+
end
|
168
|
+
|
169
|
+
def output(message, type = :info)
|
170
|
+
log << message
|
171
|
+
print message if (type == :error or !options[:quiet])
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|