raakt 0.4 → 0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/raakt.rb +346 -274
- data/tests/areadoc1.htm +12 -0
- data/tests/areadoc2.htm +12 -0
- data/tests/areadoc3.htm +12 -0
- data/tests/bdoc.htm +9 -0
- data/tests/charset_nocharset_specified.htm +8 -0
- data/tests/charset_utf8.htm +9 -0
- data/tests/embeddoc1.htm +9 -0
- data/tests/formdoc1.htm +14 -0
- data/tests/formdoc2.htm +15 -0
- data/tests/formdoc3.htm +22 -0
- data/tests/full_berg.htm +441 -0
- data/tests/inputimgdoc1.htm +14 -0
- data/tests/langinfodoc2.htm +8 -0
- data/tests/nestedtabledoc.htm +205 -0
- data/tests/raakt_test.rb +204 -144
- metadata +21 -7
- data/lib/raakt (kopia).rb +0 -495
data/lib/raakt.rb
CHANGED
@@ -1,101 +1,110 @@
|
|
1
|
-
#
|
2
|
-
#
|
1
|
+
# == The Ruby Accessibility Analysis Kit (RAAKT)
|
2
|
+
# :title: Ruby Accessibility Analysis Kit (RAAKT)
|
3
|
+
# Author:: Peter Krantz (http://www.peterkrantz.com/)
|
4
|
+
# License:: See LICENSE file
|
3
5
|
#
|
4
|
-
#
|
5
|
-
|
6
|
+
# RAAKT is a toolkit to find accessibility issues in HTML documents. RAAKT can be used as part of a an automatic test procedure or as a standalone module for mass validation of all pages in a site.
|
7
|
+
#
|
8
|
+
# The ambition has been to provide tests that can be fully automated. Currently, none of the included tests should fail for any web page.
|
9
|
+
#
|
10
|
+
# Many of the tests included here map to tests defined in the Unified Web Evaluation Methodology (UWEM[http://www.wabcluster.org/uwem/tests/]). See note for each test to find the corresponding UWEM test.
|
11
|
+
#
|
12
|
+
# == Output
|
13
|
+
# RAAKT output is in the form of an array of Raakt::ErrorMessage objects.
|
14
|
+
#
|
15
|
+
# == Contributions
|
16
|
+
# Thanks to Derek Perrault for refactoring RAAKT to use Hpricot[http://code.whytheluckystiff.net/hpricot/] while at the same time making the code more readable.
|
17
|
+
#
|
18
|
+
# == Example usage
|
19
|
+
# See the examples folder for a small script that shows how to retrieve a remote web page and perform an accessibility test on it.
|
6
20
|
module Raakt
|
7
|
-
require '
|
21
|
+
require 'hpricot'
|
8
22
|
|
9
23
|
MESSAGES = {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
:missing_title => "The title element is missing. Provide a descriptive title for your document.",
|
25
|
+
:empty_title => "The title element is empty. Provide a descriptive title for your document.",
|
26
|
+
:missing_alt => "Missing alt attribute for image (with src '%s').",
|
27
|
+
:missing_heading => "Missing first level heading (h1). Provide at least one first level heading describing document content.",
|
28
|
+
:wrong_h_structure => "Document heading structure is wrong.",
|
29
|
+
:first_h_not_h1 => "The first heading is not h1.",
|
30
|
+
:has_nested_tables => "You have one or more nested tables.",
|
31
|
+
:missing_semantics => "You have used %s for visual formatting. Use CSS instead.",
|
32
|
+
:has_flicker => "You have used <blink> and/or <marquee>. These may create accessibility issues and should be avoided.",
|
33
|
+
:missing_lang_info => "Document language information is missing. Use the lang attribute on the html element.",
|
34
|
+
:missing_th => "Missing table headings (th) for table #%s.",
|
35
|
+
:ambiguous_link_text => "One or more links have the same link text ('%s'). Make sure each link is unambiguous.",
|
36
|
+
:field_missing_label => "A field (with id/name '%s') is missing a corresponding label element. Make sure a label exists for all visible fields.",
|
37
|
+
:missing_frame_title => "Missing title attribute for frame with url %s",
|
38
|
+
:has_meta_refresh => "Client side redirect (meta refresh) detected. Use server side redirection instead.",
|
39
|
+
:charset_mismatch => "The character set specified in the HTTP headers does not match that specified in the markup.",
|
40
|
+
:embed_used => "You have used the embed element. It does not provide a way to express a text representation.",
|
41
|
+
:wrong_lang_code => "You have used a language code ('%s') not recognized in the ISO 639 standard.",
|
42
|
+
:fieldset_missing_legend => "Missing legend element for fieldset #%s.",
|
43
|
+
:missing_input_alt => "Missing alt attribute for image button with id/name '%s'.",
|
44
|
+
:missing_input_alt_text => "Missing alt text for image button with id/name '%s'.",
|
45
|
+
:missing_area_alt => "Missing alt attribute for area with id/name '%s'.",
|
46
|
+
:missing_area_alt_text => "Missing alt text for area with id/name '%s'."
|
25
47
|
}
|
26
48
|
|
27
|
-
VERSION = "0.
|
49
|
+
VERSION = "0.5"
|
28
50
|
|
29
51
|
class ErrorMessage
|
30
52
|
|
31
53
|
attr_reader :eid, :text, :note
|
32
|
-
|
54
|
+
|
33
55
|
def initialize(eid, note=nil)
|
34
56
|
@eid = eid
|
57
|
+
|
35
58
|
if note
|
36
|
-
@text = MESSAGES[eid].sub(/%s/, note)
|
59
|
+
@text = MESSAGES[@eid].sub(/%s/, note)
|
37
60
|
else
|
38
|
-
@text = MESSAGES[eid]
|
61
|
+
@text = MESSAGES[@eid]
|
39
62
|
end
|
40
63
|
@note = note
|
41
64
|
end
|
42
|
-
|
65
|
+
|
43
66
|
def to_s
|
44
|
-
@eid
|
67
|
+
"#{@eid}: #{@text}"
|
45
68
|
end
|
46
|
-
end
|
47
|
-
|
48
69
|
|
70
|
+
# Return single error message as an xml element.
|
71
|
+
def to_xml
|
72
|
+
"<message id=\"#{@eid}\">#{@text}</message>"
|
73
|
+
end
|
74
|
+
end
|
49
75
|
|
50
76
|
|
51
77
|
|
52
78
|
class Test
|
53
79
|
|
54
|
-
attr_accessor :
|
55
|
-
|
56
|
-
def initialize(html=nil)
|
80
|
+
attr_accessor :html, :headers, :user_agent, :ignore_bi
|
81
|
+
|
82
|
+
def initialize(html=nil, headers=nil)
|
57
83
|
@html = html
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
def feed(html)
|
63
|
-
@html = html || ""
|
64
|
-
if @html.length > 0
|
65
|
-
@soup = BeautifulSoup.new(@html)
|
66
|
-
else
|
67
|
-
raise "You called feed with no data. There is nothing to check."
|
68
|
-
end
|
84
|
+
@headers = headers
|
85
|
+
self.doc = @html if html
|
86
|
+
self.headers = @headers if headers
|
87
|
+
@ignore_bi = false
|
69
88
|
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
def feedurl(url)
|
74
|
-
if url.length == 0
|
75
|
-
raise "You called feedurl with a blank url. There is nothing to check."
|
76
|
-
end
|
77
|
-
|
78
|
-
#Clean the url and make sure protocol and trailing slash is available
|
79
|
-
url = "http://" + url unless url[0..3] == "http"
|
80
|
-
|
81
|
-
require 'open-uri'
|
82
89
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
if @html.length == 0
|
88
|
-
raise "Could not fetch html from the url #{url}. There is nothing to check."
|
89
|
-
else
|
90
|
-
@soup = BeautifulSoup.new(@html)
|
91
|
-
end
|
92
|
-
|
90
|
+
# Set the HTML used in the test.
|
91
|
+
def doc=(html)
|
92
|
+
Hpricot.buffer_size = 262144 #Allow for asp.net bastard-sized viewstate attributes...
|
93
|
+
@doc = Hpricot(html)
|
93
94
|
end
|
94
95
|
|
95
|
-
|
96
|
-
|
96
|
+
# Set HTML headers to be used in the test. Headers are necessary for some tests (e.g. to check encoding).
|
97
|
+
def headers=(headers)
|
98
|
+
if headers
|
99
|
+
@headers = downcase_hash_keys(headers)
|
100
|
+
else
|
101
|
+
@headers = nil
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# Call all check methods.
|
97
107
|
def all
|
98
|
-
#Call all check methods
|
99
108
|
messages = []
|
100
109
|
|
101
110
|
self.methods.each do |method|
|
@@ -107,69 +116,121 @@ module Raakt
|
|
107
116
|
return messages
|
108
117
|
end
|
109
118
|
|
119
|
+
|
120
|
+
# Verify that all fieldset elements have a legend child element. See UWEM 1.0 Test 12.3_HTML_01.
|
121
|
+
def check_fieldset_legend
|
122
|
+
messages = []
|
123
|
+
fieldsets = (@doc/"fieldset")
|
124
|
+
fieldset_instance = 1
|
125
|
+
for fieldset in fieldsets
|
126
|
+
if (fieldset/"legend").empty?
|
127
|
+
messages << ErrorMessage.new(:fieldset_missing_legend, fieldset_instance.to_s)
|
128
|
+
end
|
129
|
+
fieldset_instance += 1
|
130
|
+
end
|
131
|
+
messages
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Verify that the embed element isn't used. See UWEM 1.0 Test 1.1_HTML_06.
|
136
|
+
def check_embed
|
137
|
+
return [ErrorMessage.new(:embed_used)] unless (@doc/'embed').empty?
|
138
|
+
[]
|
139
|
+
end
|
140
|
+
|
110
141
|
|
142
|
+
# Verify that the charater set specified in HTTP headers match that specidied in the HTML meta element.
|
143
|
+
def check_character_set
|
144
|
+
messages = []
|
145
|
+
header_charset = meta_charset = ""
|
146
|
+
if @headers and @headers.length > 0 then
|
147
|
+
if @headers.has_key?("content-type")
|
148
|
+
header_charset = parse_charset(@headers["content-type"].to_s)
|
149
|
+
end
|
150
|
+
|
151
|
+
#get meta element charset
|
152
|
+
meta_elements = @doc.search("//meta[@http-equiv]")
|
153
|
+
for element in meta_elements do
|
154
|
+
if element["http-equiv"].downcase == "content-type" then
|
155
|
+
meta_charset = parse_charset(element["content"])
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
if header_charset.length > 0 and meta_charset.length > 0
|
160
|
+
unless meta_charset == header_charset
|
161
|
+
messages << ErrorMessage.new(:charset_mismatch)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
return messages
|
167
|
+
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
# Verify that all input type=image elements have an alt attribute.
|
172
|
+
def check_input_type_img
|
173
|
+
#Covers UWEM 1.0 Test 1.1_HTML_01
|
174
|
+
|
175
|
+
messages = []
|
176
|
+
image_input_buttons = @doc.search("input").select { |element| element['type'] =~ /image/i }
|
177
|
+
image_input_buttons.map { |element|
|
178
|
+
unless element['alt']
|
179
|
+
messages << ErrorMessage.new(:missing_input_alt, element['name'] || element['id'] || "")
|
180
|
+
else
|
181
|
+
if element['alt'].length == 0
|
182
|
+
messages << ErrorMessage.new(:missing_input_alt_text, element['name'] || element['id'] || "")
|
183
|
+
end
|
184
|
+
end
|
185
|
+
}
|
186
|
+
|
187
|
+
messages
|
188
|
+
end
|
189
|
+
|
190
|
+
|
191
|
+
# Verify that all img elements have an alt attribute.
|
111
192
|
def check_images
|
112
|
-
|
113
|
-
|
114
|
-
messages = []
|
115
|
-
|
116
|
-
for image in images:
|
117
|
-
if image["alt"] == nil:
|
118
|
-
img_src = image["src"] || ""
|
119
|
-
messages << ErrorMessage.new("missingalt", img_src)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
return messages
|
193
|
+
no_alt_images = (@doc/"img:not([@alt])")
|
194
|
+
no_alt_images.map { |img| ErrorMessage.new(:missing_alt, img['src']) }
|
124
195
|
end
|
125
196
|
|
126
197
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
return messages
|
141
|
-
end
|
198
|
+
# Verify that all area elements have a non-empty alt attribute. See UWEM 1.0 Test 1.1_HTML_01 (together with check_images)
|
199
|
+
def check_areas
|
200
|
+
messages = []
|
201
|
+
area_elements = (@doc/"area")
|
202
|
+
area_elements.map { |element|
|
203
|
+
unless element['alt']
|
204
|
+
messages << ErrorMessage.new(:missing_area_alt, element['name'] || element['id'] || "unknown")
|
205
|
+
else
|
206
|
+
if element['alt'].length == 0
|
207
|
+
messages << ErrorMessage.new(:missing_area_alt_text, element['name'] || element['id'] || "unknown")
|
208
|
+
end
|
209
|
+
end
|
210
|
+
}
|
142
211
|
|
143
|
-
|
144
|
-
def check_has_heading
|
145
|
-
messages = []
|
146
|
-
|
147
|
-
if @soup.find_all("h1").length == 0
|
148
|
-
messages << ErrorMessage.new("missingheading")
|
149
|
-
end
|
150
|
-
|
151
|
-
return messages
|
212
|
+
messages
|
152
213
|
end
|
153
214
|
|
154
215
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
headings.push(@soup.find_all("h6")) if @soup.find_all("h6").length > 0
|
163
|
-
|
164
|
-
return headings.flatten
|
216
|
+
|
217
|
+
# Verify that the document has a non-empty title element.
|
218
|
+
def check_title
|
219
|
+
title = @doc.at('title')
|
220
|
+
return [ErrorMessage.new(:missing_title)] unless title
|
221
|
+
return [ErrorMessage.new(:empty_title)] if normalize_text(title.inner_html).empty?
|
222
|
+
[]
|
165
223
|
end
|
166
224
|
|
167
|
-
|
168
|
-
|
169
|
-
|
225
|
+
|
226
|
+
# Verify that the document has at least one h1 element.
|
227
|
+
def check_has_heading
|
228
|
+
return [ErrorMessage.new(:missing_heading)] if (@doc/'h1').empty?
|
229
|
+
[]
|
170
230
|
end
|
171
231
|
|
172
|
-
|
232
|
+
|
233
|
+
# Verify that heading elements (h1-h6) appear in the correct order (no levels skipped). See UWEM 1.0 Test 3.5_HTML_03.
|
173
234
|
def check_document_structure
|
174
235
|
messages = []
|
175
236
|
currentitem = 0
|
@@ -178,11 +239,11 @@ module Raakt
|
|
178
239
|
for heading in docheadings
|
179
240
|
if currentitem == 0
|
180
241
|
if level(heading.name) != 1
|
181
|
-
messages << ErrorMessage.new(
|
242
|
+
messages << ErrorMessage.new(:first_h_not_h1, "h" + heading.name[1,1])
|
182
243
|
end
|
183
244
|
else
|
184
245
|
if level(heading.name) - level(docheadings[currentitem - 1].name) > 1
|
185
|
-
messages << ErrorMessage.new(
|
246
|
+
messages << ErrorMessage.new(:wrong_h_structure)
|
186
247
|
break
|
187
248
|
end
|
188
249
|
end
|
@@ -191,122 +252,125 @@ module Raakt
|
|
191
252
|
|
192
253
|
end
|
193
254
|
|
194
|
-
|
255
|
+
messages
|
195
256
|
end
|
196
257
|
|
197
258
|
|
259
|
+
# Verify that the document does not have any nested tabled. This is indicative of a table-based layout.
|
198
260
|
def check_for_nested_tables
|
199
261
|
|
200
262
|
messages = []
|
201
|
-
tables = @
|
263
|
+
tables = (@doc/"table")
|
202
264
|
|
203
265
|
for table in tables
|
204
|
-
|
205
|
-
messages << ErrorMessage.new(
|
206
|
-
break
|
266
|
+
unless (table/"table").empty?
|
267
|
+
return messages << ErrorMessage.new(:has_nested_tables)
|
207
268
|
end
|
208
269
|
end
|
209
270
|
|
210
|
-
|
271
|
+
messages
|
211
272
|
end
|
212
273
|
|
213
274
|
|
275
|
+
# Verify that all tables have at least on table header (th) element.
|
214
276
|
def check_tables
|
215
|
-
|
216
277
|
messages = []
|
217
|
-
tables = @
|
218
|
-
hasth = false
|
278
|
+
tables = (@doc/"table")
|
219
279
|
currenttable = 1
|
220
280
|
|
221
281
|
for table in tables
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
hasth = true
|
226
|
-
end
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
if table.tr
|
231
|
-
if table.tr.th
|
232
|
-
hasth = true
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
unless hasth
|
237
|
-
messages << ErrorMessage.new("missingth", currenttable.to_s)
|
238
|
-
end
|
282
|
+
hasth = false
|
283
|
+
hasth = true unless (table/">tr>th").empty?
|
284
|
+
hasth = true unless (table/">thead>tr>th").empty?
|
239
285
|
|
286
|
+
messages << ErrorMessage.new(:missing_th, currenttable.to_s) unless hasth
|
287
|
+
|
240
288
|
currenttable += 1
|
241
289
|
end
|
242
290
|
|
243
|
-
|
291
|
+
messages
|
244
292
|
end
|
245
293
|
|
246
294
|
|
295
|
+
|
296
|
+
# Verify that no formatting elements have been used. See UWEM 1.0 Test 7.2_HTML_01 and Test 7.3_HTML_01.
|
247
297
|
def check_for_formatting_elements
|
248
298
|
|
249
|
-
|
250
|
-
formatting_items = @soup.find_all(%w(font b i u tt small big strike s))
|
251
|
-
flicker_items = @soup.find_all(["blink", "marquee"])
|
252
|
-
|
253
|
-
formatting_items.each do |element|
|
254
|
-
messages << ErrorMessage.new("missingsemantics", "<#{element.name}>")
|
255
|
-
end
|
299
|
+
messages = []
|
256
300
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
301
|
+
formatting_elements = %w(font b i u tt small big strike s)
|
302
|
+
formatting_elements = %w(font u tt small big strike s) if @ignore_bi
|
303
|
+
|
304
|
+
formatting_items = (@doc/formatting_elements.join('|'))
|
305
|
+
|
306
|
+
unless formatting_items.empty?
|
307
|
+
messages << ErrorMessage.new(:missing_semantics, "#{formatting_items.join(', ')}")
|
308
|
+
end
|
309
|
+
|
310
|
+
flicker_elements = %w(blink marquee)
|
311
|
+
flicker_items = (@doc/flicker_elements.join('|'))
|
312
|
+
|
313
|
+
unless flicker_items.empty?
|
314
|
+
messages << ErrorMessage.new(:has_flicker)
|
315
|
+
end
|
316
|
+
|
317
|
+
messages
|
262
318
|
end
|
263
319
|
|
264
320
|
|
321
|
+
# Verify that the root documet html element as a lang attribute.
|
265
322
|
def check_for_language_info
|
266
|
-
messages = []
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
return messages
|
323
|
+
messages = []
|
324
|
+
unless (@doc/'html[@lang]').empty?
|
325
|
+
lang_code = (@doc/"html").first["lang"].to_s
|
326
|
+
if lang_code.length < 2
|
327
|
+
messages << ErrorMessage.new(:missing_lang_info)
|
328
|
+
end
|
329
|
+
else
|
330
|
+
messages << ErrorMessage.new(:missing_lang_info)
|
331
|
+
end
|
332
|
+
messages
|
277
333
|
end
|
334
|
+
|
335
|
+
|
336
|
+
# Verify that the html element has a valid lang code.
|
337
|
+
def check_valid_language_code
|
338
|
+
messages = []
|
339
|
+
unless (@doc/"html[@lang]").empty?
|
340
|
+
#load list of valid language codes
|
341
|
+
iso_lang_codes = []
|
342
|
+
IO.foreach(File.dirname(__FILE__) + "/iso_language_codes.txt") { |code| iso_lang_codes << code.chomp }
|
343
|
+
|
344
|
+
doc_main_lang_code = (@doc/"html").first["lang"].to_s.downcase
|
345
|
+
unless iso_lang_codes.include?(doc_main_lang_code[0..1])
|
346
|
+
messages << ErrorMessage.new(:wrong_lang_code, doc_main_lang_code)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
messages
|
351
|
+
end
|
278
352
|
|
279
353
|
|
354
|
+
# Verify that no link texts are ambiguous. A typical example is the presence of multiple "Read more" links.
|
280
355
|
def check_link_text
|
281
|
-
messages = []
|
282
356
|
links = get_links
|
283
|
-
linktexts = links.collect { |el| el[3] }
|
284
357
|
|
285
|
-
|
286
|
-
|
287
|
-
for link_b in links
|
288
|
-
if link_a[0] != link_b[0]
|
289
|
-
if is_ambiguous_link(link_a, link_b)
|
290
|
-
#add message if not added already for link text
|
291
|
-
unless find_errormsg_with_text(messages, link_a[3])
|
292
|
-
messages << ErrorMessage.new("ambiguouslinktext", link_a[3])
|
293
|
-
end
|
294
|
-
end
|
295
|
-
end
|
296
|
-
end
|
358
|
+
link = links.find do |link|
|
359
|
+
links.find { |cmp_link| is_ambiguous_link(link, cmp_link) }
|
297
360
|
end
|
298
361
|
|
299
|
-
return
|
362
|
+
return [] unless link
|
363
|
+
[ErrorMessage.new(:ambiguous_link_text, get_link_text(link))]
|
300
364
|
end
|
301
365
|
|
302
366
|
|
367
|
+
# Verify that all form fields have a corresponding label element. See UWEM 1.0 Test 12.4_HTML_02.
|
303
368
|
def check_form
|
304
369
|
messages = []
|
305
370
|
labels = get_labels
|
306
371
|
fields = get_editable_fields
|
307
372
|
|
308
373
|
#make sure all fields have associated labels
|
309
|
-
|
310
374
|
label_for_ids = []
|
311
375
|
for label in labels
|
312
376
|
if label["for"]
|
@@ -320,94 +384,80 @@ module Raakt
|
|
320
384
|
field_id = (field["id"] || "")
|
321
385
|
field_identifier = (field["id"] || field["name"] || "unknown")
|
322
386
|
if not label_for_ids.include?(field_id)
|
323
|
-
messages << ErrorMessage.new(
|
387
|
+
messages << ErrorMessage.new(:field_missing_label, field_identifier)
|
324
388
|
end
|
325
389
|
end
|
326
390
|
|
327
|
-
|
391
|
+
messages
|
328
392
|
end
|
329
393
|
|
330
394
|
|
395
|
+
# Verify that all frame elements have a title atribute.
|
331
396
|
def check_frames
|
332
|
-
|
333
|
-
|
334
|
-
messages = []
|
335
|
-
if is_frameset
|
336
|
-
frames = @soup.find_all("frame")
|
337
|
-
frame_title = ""
|
397
|
+
# Covers UWEM Test 12.1_HTML_01
|
398
|
+
return [] unless is_frameset
|
338
399
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
end
|
344
|
-
end
|
345
|
-
end
|
346
|
-
|
347
|
-
return messages
|
400
|
+
(@doc/"frame").find_all do |frame|
|
401
|
+
frame_title = frame['title'] || ''
|
402
|
+
normalize_text(frame_title).empty?
|
403
|
+
end.map { |frame| ErrorMessage.new(:missing_frame_title, frame['src']) }
|
348
404
|
end
|
349
405
|
|
350
406
|
|
407
|
+
# Verify that the document does not use meta-refresh to redirect the user away after a period of time.
|
351
408
|
def check_refresh
|
409
|
+
meta_elements = (@doc/'meta')
|
352
410
|
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
for element in meta_elements
|
357
|
-
if element["http-equiv"] == "refresh"
|
358
|
-
messages << ErrorMessage.new("hasmetarefresh")
|
359
|
-
end
|
360
|
-
end
|
361
|
-
|
362
|
-
return messages
|
411
|
+
meta_elements.find_all do |element|
|
412
|
+
element["http-equiv"] == "refresh"
|
413
|
+
end.map { ErrorMessage.new(:has_meta_refresh) }
|
363
414
|
end
|
364
415
|
|
365
416
|
|
366
417
|
#Utility methods
|
367
|
-
|
368
|
-
def
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
#3. The title text is identical (if present)
|
373
|
-
if link_a[1] != link_b[1] and
|
374
|
-
normalize_text(link_a[2]) == normalize_text(link_b[2]) and
|
375
|
-
normalize_text(link_a[3]) == normalize_text(link_b[3]) then
|
376
|
-
return true
|
418
|
+
|
419
|
+
def headings
|
420
|
+
headings = []
|
421
|
+
1.upto(6) do |i|
|
422
|
+
headings.push((@doc/"h#{i}")) if (@doc/"h#{i}").length > 0
|
377
423
|
end
|
378
|
-
|
379
|
-
return false
|
424
|
+
headings.flatten
|
380
425
|
end
|
426
|
+
|
381
427
|
|
428
|
+
def level(heading)
|
429
|
+
Integer(heading[1].chr)
|
430
|
+
end
|
431
|
+
|
382
432
|
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
433
|
+
def downcase_hash_keys(a_hash)
|
434
|
+
downcased_hash = {}
|
435
|
+
a_hash.collect {|key,value| downcased_hash[key.downcase] = value}
|
436
|
+
return downcased_hash
|
437
|
+
end
|
438
|
+
|
439
|
+
def parse_charset(contenttype)
|
440
|
+
# get charset identifier from content type string
|
441
|
+
if contenttype=~/charset=(.*)\w?/ then
|
442
|
+
return $1.downcase.strip
|
443
|
+
end
|
444
|
+
|
445
|
+
return ""
|
446
|
+
end
|
447
|
+
|
448
|
+
|
449
|
+
def is_ambiguous_link(link_a, link_b)
|
450
|
+
return false if links_point_to_same_resource?(link_a, link_b)
|
451
|
+
return true if link_text_identical?(link_a, link_b) &&
|
452
|
+
link_title_identical?(link_a, link_b)
|
389
453
|
|
390
|
-
|
454
|
+
false
|
391
455
|
end
|
392
456
|
|
393
|
-
|
394
457
|
def get_links
|
395
|
-
|
396
|
-
links = []
|
397
|
-
currentlink = 0
|
398
|
-
|
399
|
-
for element in linkelements
|
400
|
-
title = normalize_text((element['title'] || "").strip)
|
401
|
-
linktext = normalize_text((elements_to_text(element) || "").strip)
|
402
|
-
url = element['href']
|
403
|
-
links << [currentlink, url, title, linktext]
|
404
|
-
currentlink += 1
|
405
|
-
end
|
406
|
-
|
407
|
-
return links
|
458
|
+
(@doc/'a')
|
408
459
|
end
|
409
460
|
|
410
|
-
|
411
461
|
def langinfo(element)
|
412
462
|
langval = ""
|
413
463
|
|
@@ -423,39 +473,33 @@ module Raakt
|
|
423
473
|
end
|
424
474
|
|
425
475
|
|
426
|
-
def
|
427
|
-
|
476
|
+
def alt_to_text(element)
|
477
|
+
if element.kind_of?(Hpricot::Elem) then
|
478
|
+
element.has_attribute?("alt") ? element['alt'] : ""
|
479
|
+
else
|
480
|
+
""
|
481
|
+
end
|
428
482
|
end
|
429
|
-
|
430
|
-
|
483
|
+
|
431
484
|
def elements_to_text(element)
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
if el.class.to_s == 'NavigableString'
|
436
|
-
retval += el
|
437
|
-
else
|
438
|
-
if el.name == "img"
|
439
|
-
retval += img_to_text(el)
|
440
|
-
else
|
441
|
-
retval += elements_to_text(el)
|
442
|
-
end
|
443
|
-
end
|
485
|
+
str = ''
|
486
|
+
element.traverse_all_element do |elem|
|
487
|
+
elem.kind_of?(Hpricot::Text) ? str += "#{elem}" : str += alt_to_text(elem)
|
444
488
|
end
|
445
489
|
|
446
|
-
|
490
|
+
str
|
447
491
|
end
|
448
492
|
|
449
493
|
|
450
494
|
def normalize_text(text)
|
451
|
-
text
|
452
|
-
retval = text.gsub(/ /,
|
453
|
-
retval = retval.gsub(/ /,
|
454
|
-
retval = retval.gsub(/\n/,
|
455
|
-
retval = retval.gsub(/\r/,
|
456
|
-
retval = retval.gsub(/\t/,
|
495
|
+
text ||= ''
|
496
|
+
retval = text.gsub(/ /, ' ')
|
497
|
+
retval = retval.gsub(/ /, ' ')
|
498
|
+
retval = retval.gsub(/\n/, '')
|
499
|
+
retval = retval.gsub(/\r/, '')
|
500
|
+
retval = retval.gsub(/\t/, '')
|
457
501
|
while / /.match(retval) do
|
458
|
-
retval = retval.gsub(/ /,
|
502
|
+
retval = retval.gsub(/ /, ' ')
|
459
503
|
end
|
460
504
|
|
461
505
|
retval = retval.strip
|
@@ -465,12 +509,12 @@ module Raakt
|
|
465
509
|
|
466
510
|
|
467
511
|
def get_labels
|
468
|
-
|
512
|
+
@doc/'label'
|
469
513
|
end
|
470
514
|
|
471
515
|
|
472
516
|
def get_editable_fields
|
473
|
-
allfields = @
|
517
|
+
allfields = (@doc/"textarea|select|input")
|
474
518
|
fields = []
|
475
519
|
field_type = ""
|
476
520
|
|
@@ -487,9 +531,37 @@ module Raakt
|
|
487
531
|
|
488
532
|
|
489
533
|
def is_frameset
|
490
|
-
|
534
|
+
(@doc/"frameset").length > 0
|
491
535
|
end
|
492
536
|
|
537
|
+
|
538
|
+
def link_text_identical?(link_a, link_b)
|
539
|
+
get_link_text(link_a) == get_link_text(link_b)
|
540
|
+
end
|
541
|
+
|
542
|
+
def link_title_identical?(link_a, link_b)
|
543
|
+
get_link_title(link_a) == get_link_title(link_b)
|
544
|
+
end
|
545
|
+
|
546
|
+
def links_point_to_same_resource?(link_a, link_b)
|
547
|
+
(link_a == link_b) ||
|
548
|
+
(get_link_url(link_a) == get_link_url(link_b))
|
549
|
+
end
|
550
|
+
|
551
|
+
def get_link_text(link)
|
552
|
+
text = (elements_to_text(link) || '').strip
|
553
|
+
normalize_text(text)
|
554
|
+
end
|
555
|
+
|
556
|
+
def get_link_url(link)
|
557
|
+
link['href']
|
558
|
+
end
|
559
|
+
|
560
|
+
def get_link_title(link)
|
561
|
+
text = (link['title'] || '').strip
|
562
|
+
normalize_text(text)
|
563
|
+
end
|
564
|
+
|
493
565
|
end
|
494
566
|
|
495
|
-
end
|
567
|
+
end
|