raakt 0.4 → 0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/raakt.rb +346 -274
- data/tests/areadoc1.htm +12 -0
- data/tests/areadoc2.htm +12 -0
- data/tests/areadoc3.htm +12 -0
- data/tests/bdoc.htm +9 -0
- data/tests/charset_nocharset_specified.htm +8 -0
- data/tests/charset_utf8.htm +9 -0
- data/tests/embeddoc1.htm +9 -0
- data/tests/formdoc1.htm +14 -0
- data/tests/formdoc2.htm +15 -0
- data/tests/formdoc3.htm +22 -0
- data/tests/full_berg.htm +441 -0
- data/tests/inputimgdoc1.htm +14 -0
- data/tests/langinfodoc2.htm +8 -0
- data/tests/nestedtabledoc.htm +205 -0
- data/tests/raakt_test.rb +204 -144
- metadata +21 -7
- data/lib/raakt (kopia).rb +0 -495
data/lib/raakt.rb
CHANGED
@@ -1,101 +1,110 @@
|
|
1
|
-
#
|
2
|
-
#
|
1
|
+
# == The Ruby Accessibility Analysis Kit (RAAKT)
|
2
|
+
# :title: Ruby Accessibility Analysis Kit (RAAKT)
|
3
|
+
# Author:: Peter Krantz (http://www.peterkrantz.com/)
|
4
|
+
# License:: See LICENSE file
|
3
5
|
#
|
4
|
-
#
|
5
|
-
|
6
|
+
# RAAKT is a toolkit to find accessibility issues in HTML documents. RAAKT can be used as part of a an automatic test procedure or as a standalone module for mass validation of all pages in a site.
|
7
|
+
#
|
8
|
+
# The ambition has been to provide tests that can be fully automated. Currently, none of the included tests should fail for any web page.
|
9
|
+
#
|
10
|
+
# Many of the tests included here map to tests defined in the Unified Web Evaluation Methodology (UWEM[http://www.wabcluster.org/uwem/tests/]). See note for each test to find the corresponding UWEM test.
|
11
|
+
#
|
12
|
+
# == Output
|
13
|
+
# RAAKT output is in the form of an array of Raakt::ErrorMessage objects.
|
14
|
+
#
|
15
|
+
# == Contributions
|
16
|
+
# Thanks to Derek Perrault for refactoring RAAKT to use Hpricot[http://code.whytheluckystiff.net/hpricot/] while at the same time making the code more readable.
|
17
|
+
#
|
18
|
+
# == Example usage
|
19
|
+
# See the examples folder for a small script that shows how to retrieve a remote web page and perform an accessibility test on it.
|
6
20
|
module Raakt
|
7
|
-
require '
|
21
|
+
require 'hpricot'
|
8
22
|
|
9
23
|
MESSAGES = {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
24
|
+
:missing_title => "The title element is missing. Provide a descriptive title for your document.",
|
25
|
+
:empty_title => "The title element is empty. Provide a descriptive title for your document.",
|
26
|
+
:missing_alt => "Missing alt attribute for image (with src '%s').",
|
27
|
+
:missing_heading => "Missing first level heading (h1). Provide at least one first level heading describing document content.",
|
28
|
+
:wrong_h_structure => "Document heading structure is wrong.",
|
29
|
+
:first_h_not_h1 => "The first heading is not h1.",
|
30
|
+
:has_nested_tables => "You have one or more nested tables.",
|
31
|
+
:missing_semantics => "You have used %s for visual formatting. Use CSS instead.",
|
32
|
+
:has_flicker => "You have used <blink> and/or <marquee>. These may create accessibility issues and should be avoided.",
|
33
|
+
:missing_lang_info => "Document language information is missing. Use the lang attribute on the html element.",
|
34
|
+
:missing_th => "Missing table headings (th) for table #%s.",
|
35
|
+
:ambiguous_link_text => "One or more links have the same link text ('%s'). Make sure each link is unambiguous.",
|
36
|
+
:field_missing_label => "A field (with id/name '%s') is missing a corresponding label element. Make sure a label exists for all visible fields.",
|
37
|
+
:missing_frame_title => "Missing title attribute for frame with url %s",
|
38
|
+
:has_meta_refresh => "Client side redirect (meta refresh) detected. Use server side redirection instead.",
|
39
|
+
:charset_mismatch => "The character set specified in the HTTP headers does not match that specified in the markup.",
|
40
|
+
:embed_used => "You have used the embed element. It does not provide a way to express a text representation.",
|
41
|
+
:wrong_lang_code => "You have used a language code ('%s') not recognized in the ISO 639 standard.",
|
42
|
+
:fieldset_missing_legend => "Missing legend element for fieldset #%s.",
|
43
|
+
:missing_input_alt => "Missing alt attribute for image button with id/name '%s'.",
|
44
|
+
:missing_input_alt_text => "Missing alt text for image button with id/name '%s'.",
|
45
|
+
:missing_area_alt => "Missing alt attribute for area with id/name '%s'.",
|
46
|
+
:missing_area_alt_text => "Missing alt text for area with id/name '%s'."
|
25
47
|
}
|
26
48
|
|
27
|
-
VERSION = "0.
|
49
|
+
VERSION = "0.5"
|
28
50
|
|
29
51
|
class ErrorMessage
|
30
52
|
|
31
53
|
attr_reader :eid, :text, :note
|
32
|
-
|
54
|
+
|
33
55
|
def initialize(eid, note=nil)
|
34
56
|
@eid = eid
|
57
|
+
|
35
58
|
if note
|
36
|
-
@text = MESSAGES[eid].sub(/%s/, note)
|
59
|
+
@text = MESSAGES[@eid].sub(/%s/, note)
|
37
60
|
else
|
38
|
-
@text = MESSAGES[eid]
|
61
|
+
@text = MESSAGES[@eid]
|
39
62
|
end
|
40
63
|
@note = note
|
41
64
|
end
|
42
|
-
|
65
|
+
|
43
66
|
def to_s
|
44
|
-
@eid
|
67
|
+
"#{@eid}: #{@text}"
|
45
68
|
end
|
46
|
-
end
|
47
|
-
|
48
69
|
|
70
|
+
# Return single error message as an xml element.
|
71
|
+
def to_xml
|
72
|
+
"<message id=\"#{@eid}\">#{@text}</message>"
|
73
|
+
end
|
74
|
+
end
|
49
75
|
|
50
76
|
|
51
77
|
|
52
78
|
class Test
|
53
79
|
|
54
|
-
attr_accessor :
|
55
|
-
|
56
|
-
def initialize(html=nil)
|
80
|
+
attr_accessor :html, :headers, :user_agent, :ignore_bi
|
81
|
+
|
82
|
+
def initialize(html=nil, headers=nil)
|
57
83
|
@html = html
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
def feed(html)
|
63
|
-
@html = html || ""
|
64
|
-
if @html.length > 0
|
65
|
-
@soup = BeautifulSoup.new(@html)
|
66
|
-
else
|
67
|
-
raise "You called feed with no data. There is nothing to check."
|
68
|
-
end
|
84
|
+
@headers = headers
|
85
|
+
self.doc = @html if html
|
86
|
+
self.headers = @headers if headers
|
87
|
+
@ignore_bi = false
|
69
88
|
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
def feedurl(url)
|
74
|
-
if url.length == 0
|
75
|
-
raise "You called feedurl with a blank url. There is nothing to check."
|
76
|
-
end
|
77
|
-
|
78
|
-
#Clean the url and make sure protocol and trailing slash is available
|
79
|
-
url = "http://" + url unless url[0..3] == "http"
|
80
|
-
|
81
|
-
require 'open-uri'
|
82
89
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
if @html.length == 0
|
88
|
-
raise "Could not fetch html from the url #{url}. There is nothing to check."
|
89
|
-
else
|
90
|
-
@soup = BeautifulSoup.new(@html)
|
91
|
-
end
|
92
|
-
|
90
|
+
# Set the HTML used in the test.
|
91
|
+
def doc=(html)
|
92
|
+
Hpricot.buffer_size = 262144 #Allow for asp.net bastard-sized viewstate attributes...
|
93
|
+
@doc = Hpricot(html)
|
93
94
|
end
|
94
95
|
|
95
|
-
|
96
|
-
|
96
|
+
# Set HTML headers to be used in the test. Headers are necessary for some tests (e.g. to check encoding).
|
97
|
+
def headers=(headers)
|
98
|
+
if headers
|
99
|
+
@headers = downcase_hash_keys(headers)
|
100
|
+
else
|
101
|
+
@headers = nil
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# Call all check methods.
|
97
107
|
def all
|
98
|
-
#Call all check methods
|
99
108
|
messages = []
|
100
109
|
|
101
110
|
self.methods.each do |method|
|
@@ -107,69 +116,121 @@ module Raakt
|
|
107
116
|
return messages
|
108
117
|
end
|
109
118
|
|
119
|
+
|
120
|
+
# Verify that all fieldset elements have a legend child element. See UWEM 1.0 Test 12.3_HTML_01.
|
121
|
+
def check_fieldset_legend
|
122
|
+
messages = []
|
123
|
+
fieldsets = (@doc/"fieldset")
|
124
|
+
fieldset_instance = 1
|
125
|
+
for fieldset in fieldsets
|
126
|
+
if (fieldset/"legend").empty?
|
127
|
+
messages << ErrorMessage.new(:fieldset_missing_legend, fieldset_instance.to_s)
|
128
|
+
end
|
129
|
+
fieldset_instance += 1
|
130
|
+
end
|
131
|
+
messages
|
132
|
+
end
|
133
|
+
|
134
|
+
|
135
|
+
# Verify that the embed element isn't used. See UWEM 1.0 Test 1.1_HTML_06.
|
136
|
+
def check_embed
|
137
|
+
return [ErrorMessage.new(:embed_used)] unless (@doc/'embed').empty?
|
138
|
+
[]
|
139
|
+
end
|
140
|
+
|
110
141
|
|
142
|
+
# Verify that the charater set specified in HTTP headers match that specidied in the HTML meta element.
|
143
|
+
def check_character_set
|
144
|
+
messages = []
|
145
|
+
header_charset = meta_charset = ""
|
146
|
+
if @headers and @headers.length > 0 then
|
147
|
+
if @headers.has_key?("content-type")
|
148
|
+
header_charset = parse_charset(@headers["content-type"].to_s)
|
149
|
+
end
|
150
|
+
|
151
|
+
#get meta element charset
|
152
|
+
meta_elements = @doc.search("//meta[@http-equiv]")
|
153
|
+
for element in meta_elements do
|
154
|
+
if element["http-equiv"].downcase == "content-type" then
|
155
|
+
meta_charset = parse_charset(element["content"])
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
if header_charset.length > 0 and meta_charset.length > 0
|
160
|
+
unless meta_charset == header_charset
|
161
|
+
messages << ErrorMessage.new(:charset_mismatch)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
return messages
|
167
|
+
|
168
|
+
end
|
169
|
+
|
170
|
+
|
171
|
+
# Verify that all input type=image elements have an alt attribute.
|
172
|
+
def check_input_type_img
|
173
|
+
#Covers UWEM 1.0 Test 1.1_HTML_01
|
174
|
+
|
175
|
+
messages = []
|
176
|
+
image_input_buttons = @doc.search("input").select { |element| element['type'] =~ /image/i }
|
177
|
+
image_input_buttons.map { |element|
|
178
|
+
unless element['alt']
|
179
|
+
messages << ErrorMessage.new(:missing_input_alt, element['name'] || element['id'] || "")
|
180
|
+
else
|
181
|
+
if element['alt'].length == 0
|
182
|
+
messages << ErrorMessage.new(:missing_input_alt_text, element['name'] || element['id'] || "")
|
183
|
+
end
|
184
|
+
end
|
185
|
+
}
|
186
|
+
|
187
|
+
messages
|
188
|
+
end
|
189
|
+
|
190
|
+
|
191
|
+
# Verify that all img elements have an alt attribute.
|
111
192
|
def check_images
|
112
|
-
|
113
|
-
|
114
|
-
messages = []
|
115
|
-
|
116
|
-
for image in images:
|
117
|
-
if image["alt"] == nil:
|
118
|
-
img_src = image["src"] || ""
|
119
|
-
messages << ErrorMessage.new("missingalt", img_src)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
return messages
|
193
|
+
no_alt_images = (@doc/"img:not([@alt])")
|
194
|
+
no_alt_images.map { |img| ErrorMessage.new(:missing_alt, img['src']) }
|
124
195
|
end
|
125
196
|
|
126
197
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
return messages
|
141
|
-
end
|
198
|
+
# Verify that all area elements have a non-empty alt attribute. See UWEM 1.0 Test 1.1_HTML_01 (together with check_images)
|
199
|
+
def check_areas
|
200
|
+
messages = []
|
201
|
+
area_elements = (@doc/"area")
|
202
|
+
area_elements.map { |element|
|
203
|
+
unless element['alt']
|
204
|
+
messages << ErrorMessage.new(:missing_area_alt, element['name'] || element['id'] || "unknown")
|
205
|
+
else
|
206
|
+
if element['alt'].length == 0
|
207
|
+
messages << ErrorMessage.new(:missing_area_alt_text, element['name'] || element['id'] || "unknown")
|
208
|
+
end
|
209
|
+
end
|
210
|
+
}
|
142
211
|
|
143
|
-
|
144
|
-
def check_has_heading
|
145
|
-
messages = []
|
146
|
-
|
147
|
-
if @soup.find_all("h1").length == 0
|
148
|
-
messages << ErrorMessage.new("missingheading")
|
149
|
-
end
|
150
|
-
|
151
|
-
return messages
|
212
|
+
messages
|
152
213
|
end
|
153
214
|
|
154
215
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
headings.push(@soup.find_all("h6")) if @soup.find_all("h6").length > 0
|
163
|
-
|
164
|
-
return headings.flatten
|
216
|
+
|
217
|
+
# Verify that the document has a non-empty title element.
|
218
|
+
def check_title
|
219
|
+
title = @doc.at('title')
|
220
|
+
return [ErrorMessage.new(:missing_title)] unless title
|
221
|
+
return [ErrorMessage.new(:empty_title)] if normalize_text(title.inner_html).empty?
|
222
|
+
[]
|
165
223
|
end
|
166
224
|
|
167
|
-
|
168
|
-
|
169
|
-
|
225
|
+
|
226
|
+
# Verify that the document has at least one h1 element.
|
227
|
+
def check_has_heading
|
228
|
+
return [ErrorMessage.new(:missing_heading)] if (@doc/'h1').empty?
|
229
|
+
[]
|
170
230
|
end
|
171
231
|
|
172
|
-
|
232
|
+
|
233
|
+
# Verify that heading elements (h1-h6) appear in the correct order (no levels skipped). See UWEM 1.0 Test 3.5_HTML_03.
|
173
234
|
def check_document_structure
|
174
235
|
messages = []
|
175
236
|
currentitem = 0
|
@@ -178,11 +239,11 @@ module Raakt
|
|
178
239
|
for heading in docheadings
|
179
240
|
if currentitem == 0
|
180
241
|
if level(heading.name) != 1
|
181
|
-
messages << ErrorMessage.new(
|
242
|
+
messages << ErrorMessage.new(:first_h_not_h1, "h" + heading.name[1,1])
|
182
243
|
end
|
183
244
|
else
|
184
245
|
if level(heading.name) - level(docheadings[currentitem - 1].name) > 1
|
185
|
-
messages << ErrorMessage.new(
|
246
|
+
messages << ErrorMessage.new(:wrong_h_structure)
|
186
247
|
break
|
187
248
|
end
|
188
249
|
end
|
@@ -191,122 +252,125 @@ module Raakt
|
|
191
252
|
|
192
253
|
end
|
193
254
|
|
194
|
-
|
255
|
+
messages
|
195
256
|
end
|
196
257
|
|
197
258
|
|
259
|
+
# Verify that the document does not have any nested tabled. This is indicative of a table-based layout.
|
198
260
|
def check_for_nested_tables
|
199
261
|
|
200
262
|
messages = []
|
201
|
-
tables = @
|
263
|
+
tables = (@doc/"table")
|
202
264
|
|
203
265
|
for table in tables
|
204
|
-
|
205
|
-
messages << ErrorMessage.new(
|
206
|
-
break
|
266
|
+
unless (table/"table").empty?
|
267
|
+
return messages << ErrorMessage.new(:has_nested_tables)
|
207
268
|
end
|
208
269
|
end
|
209
270
|
|
210
|
-
|
271
|
+
messages
|
211
272
|
end
|
212
273
|
|
213
274
|
|
275
|
+
# Verify that all tables have at least on table header (th) element.
|
214
276
|
def check_tables
|
215
|
-
|
216
277
|
messages = []
|
217
|
-
tables = @
|
218
|
-
hasth = false
|
278
|
+
tables = (@doc/"table")
|
219
279
|
currenttable = 1
|
220
280
|
|
221
281
|
for table in tables
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
hasth = true
|
226
|
-
end
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
if table.tr
|
231
|
-
if table.tr.th
|
232
|
-
hasth = true
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
unless hasth
|
237
|
-
messages << ErrorMessage.new("missingth", currenttable.to_s)
|
238
|
-
end
|
282
|
+
hasth = false
|
283
|
+
hasth = true unless (table/">tr>th").empty?
|
284
|
+
hasth = true unless (table/">thead>tr>th").empty?
|
239
285
|
|
286
|
+
messages << ErrorMessage.new(:missing_th, currenttable.to_s) unless hasth
|
287
|
+
|
240
288
|
currenttable += 1
|
241
289
|
end
|
242
290
|
|
243
|
-
|
291
|
+
messages
|
244
292
|
end
|
245
293
|
|
246
294
|
|
295
|
+
|
296
|
+
# Verify that no formatting elements have been used. See UWEM 1.0 Test 7.2_HTML_01 and Test 7.3_HTML_01.
|
247
297
|
def check_for_formatting_elements
|
248
298
|
|
249
|
-
|
250
|
-
formatting_items = @soup.find_all(%w(font b i u tt small big strike s))
|
251
|
-
flicker_items = @soup.find_all(["blink", "marquee"])
|
252
|
-
|
253
|
-
formatting_items.each do |element|
|
254
|
-
messages << ErrorMessage.new("missingsemantics", "<#{element.name}>")
|
255
|
-
end
|
299
|
+
messages = []
|
256
300
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
301
|
+
formatting_elements = %w(font b i u tt small big strike s)
|
302
|
+
formatting_elements = %w(font u tt small big strike s) if @ignore_bi
|
303
|
+
|
304
|
+
formatting_items = (@doc/formatting_elements.join('|'))
|
305
|
+
|
306
|
+
unless formatting_items.empty?
|
307
|
+
messages << ErrorMessage.new(:missing_semantics, "#{formatting_items.join(', ')}")
|
308
|
+
end
|
309
|
+
|
310
|
+
flicker_elements = %w(blink marquee)
|
311
|
+
flicker_items = (@doc/flicker_elements.join('|'))
|
312
|
+
|
313
|
+
unless flicker_items.empty?
|
314
|
+
messages << ErrorMessage.new(:has_flicker)
|
315
|
+
end
|
316
|
+
|
317
|
+
messages
|
262
318
|
end
|
263
319
|
|
264
320
|
|
321
|
+
# Verify that the root documet html element as a lang attribute.
|
265
322
|
def check_for_language_info
|
266
|
-
messages = []
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
return messages
|
323
|
+
messages = []
|
324
|
+
unless (@doc/'html[@lang]').empty?
|
325
|
+
lang_code = (@doc/"html").first["lang"].to_s
|
326
|
+
if lang_code.length < 2
|
327
|
+
messages << ErrorMessage.new(:missing_lang_info)
|
328
|
+
end
|
329
|
+
else
|
330
|
+
messages << ErrorMessage.new(:missing_lang_info)
|
331
|
+
end
|
332
|
+
messages
|
277
333
|
end
|
334
|
+
|
335
|
+
|
336
|
+
# Verify that the html element has a valid lang code.
|
337
|
+
def check_valid_language_code
|
338
|
+
messages = []
|
339
|
+
unless (@doc/"html[@lang]").empty?
|
340
|
+
#load list of valid language codes
|
341
|
+
iso_lang_codes = []
|
342
|
+
IO.foreach(File.dirname(__FILE__) + "/iso_language_codes.txt") { |code| iso_lang_codes << code.chomp }
|
343
|
+
|
344
|
+
doc_main_lang_code = (@doc/"html").first["lang"].to_s.downcase
|
345
|
+
unless iso_lang_codes.include?(doc_main_lang_code[0..1])
|
346
|
+
messages << ErrorMessage.new(:wrong_lang_code, doc_main_lang_code)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
|
350
|
+
messages
|
351
|
+
end
|
278
352
|
|
279
353
|
|
354
|
+
# Verify that no link texts are ambiguous. A typical example is the presence of multiple "Read more" links.
|
280
355
|
def check_link_text
|
281
|
-
messages = []
|
282
356
|
links = get_links
|
283
|
-
linktexts = links.collect { |el| el[3] }
|
284
357
|
|
285
|
-
|
286
|
-
|
287
|
-
for link_b in links
|
288
|
-
if link_a[0] != link_b[0]
|
289
|
-
if is_ambiguous_link(link_a, link_b)
|
290
|
-
#add message if not added already for link text
|
291
|
-
unless find_errormsg_with_text(messages, link_a[3])
|
292
|
-
messages << ErrorMessage.new("ambiguouslinktext", link_a[3])
|
293
|
-
end
|
294
|
-
end
|
295
|
-
end
|
296
|
-
end
|
358
|
+
link = links.find do |link|
|
359
|
+
links.find { |cmp_link| is_ambiguous_link(link, cmp_link) }
|
297
360
|
end
|
298
361
|
|
299
|
-
return
|
362
|
+
return [] unless link
|
363
|
+
[ErrorMessage.new(:ambiguous_link_text, get_link_text(link))]
|
300
364
|
end
|
301
365
|
|
302
366
|
|
367
|
+
# Verify that all form fields have a corresponding label element. See UWEM 1.0 Test 12.4_HTML_02.
|
303
368
|
def check_form
|
304
369
|
messages = []
|
305
370
|
labels = get_labels
|
306
371
|
fields = get_editable_fields
|
307
372
|
|
308
373
|
#make sure all fields have associated labels
|
309
|
-
|
310
374
|
label_for_ids = []
|
311
375
|
for label in labels
|
312
376
|
if label["for"]
|
@@ -320,94 +384,80 @@ module Raakt
|
|
320
384
|
field_id = (field["id"] || "")
|
321
385
|
field_identifier = (field["id"] || field["name"] || "unknown")
|
322
386
|
if not label_for_ids.include?(field_id)
|
323
|
-
messages << ErrorMessage.new(
|
387
|
+
messages << ErrorMessage.new(:field_missing_label, field_identifier)
|
324
388
|
end
|
325
389
|
end
|
326
390
|
|
327
|
-
|
391
|
+
messages
|
328
392
|
end
|
329
393
|
|
330
394
|
|
395
|
+
# Verify that all frame elements have a title atribute.
|
331
396
|
def check_frames
|
332
|
-
|
333
|
-
|
334
|
-
messages = []
|
335
|
-
if is_frameset
|
336
|
-
frames = @soup.find_all("frame")
|
337
|
-
frame_title = ""
|
397
|
+
# Covers UWEM Test 12.1_HTML_01
|
398
|
+
return [] unless is_frameset
|
338
399
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
end
|
344
|
-
end
|
345
|
-
end
|
346
|
-
|
347
|
-
return messages
|
400
|
+
(@doc/"frame").find_all do |frame|
|
401
|
+
frame_title = frame['title'] || ''
|
402
|
+
normalize_text(frame_title).empty?
|
403
|
+
end.map { |frame| ErrorMessage.new(:missing_frame_title, frame['src']) }
|
348
404
|
end
|
349
405
|
|
350
406
|
|
407
|
+
# Verify that the document does not use meta-refresh to redirect the user away after a period of time.
|
351
408
|
def check_refresh
|
409
|
+
meta_elements = (@doc/'meta')
|
352
410
|
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
for element in meta_elements
|
357
|
-
if element["http-equiv"] == "refresh"
|
358
|
-
messages << ErrorMessage.new("hasmetarefresh")
|
359
|
-
end
|
360
|
-
end
|
361
|
-
|
362
|
-
return messages
|
411
|
+
meta_elements.find_all do |element|
|
412
|
+
element["http-equiv"] == "refresh"
|
413
|
+
end.map { ErrorMessage.new(:has_meta_refresh) }
|
363
414
|
end
|
364
415
|
|
365
416
|
|
366
417
|
#Utility methods
|
367
|
-
|
368
|
-
def
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
#3. The title text is identical (if present)
|
373
|
-
if link_a[1] != link_b[1] and
|
374
|
-
normalize_text(link_a[2]) == normalize_text(link_b[2]) and
|
375
|
-
normalize_text(link_a[3]) == normalize_text(link_b[3]) then
|
376
|
-
return true
|
418
|
+
|
419
|
+
def headings
|
420
|
+
headings = []
|
421
|
+
1.upto(6) do |i|
|
422
|
+
headings.push((@doc/"h#{i}")) if (@doc/"h#{i}").length > 0
|
377
423
|
end
|
378
|
-
|
379
|
-
return false
|
424
|
+
headings.flatten
|
380
425
|
end
|
426
|
+
|
381
427
|
|
428
|
+
def level(heading)
|
429
|
+
Integer(heading[1].chr)
|
430
|
+
end
|
431
|
+
|
382
432
|
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
433
|
+
def downcase_hash_keys(a_hash)
|
434
|
+
downcased_hash = {}
|
435
|
+
a_hash.collect {|key,value| downcased_hash[key.downcase] = value}
|
436
|
+
return downcased_hash
|
437
|
+
end
|
438
|
+
|
439
|
+
def parse_charset(contenttype)
|
440
|
+
# get charset identifier from content type string
|
441
|
+
if contenttype=~/charset=(.*)\w?/ then
|
442
|
+
return $1.downcase.strip
|
443
|
+
end
|
444
|
+
|
445
|
+
return ""
|
446
|
+
end
|
447
|
+
|
448
|
+
|
449
|
+
def is_ambiguous_link(link_a, link_b)
|
450
|
+
return false if links_point_to_same_resource?(link_a, link_b)
|
451
|
+
return true if link_text_identical?(link_a, link_b) &&
|
452
|
+
link_title_identical?(link_a, link_b)
|
389
453
|
|
390
|
-
|
454
|
+
false
|
391
455
|
end
|
392
456
|
|
393
|
-
|
394
457
|
def get_links
|
395
|
-
|
396
|
-
links = []
|
397
|
-
currentlink = 0
|
398
|
-
|
399
|
-
for element in linkelements
|
400
|
-
title = normalize_text((element['title'] || "").strip)
|
401
|
-
linktext = normalize_text((elements_to_text(element) || "").strip)
|
402
|
-
url = element['href']
|
403
|
-
links << [currentlink, url, title, linktext]
|
404
|
-
currentlink += 1
|
405
|
-
end
|
406
|
-
|
407
|
-
return links
|
458
|
+
(@doc/'a')
|
408
459
|
end
|
409
460
|
|
410
|
-
|
411
461
|
def langinfo(element)
|
412
462
|
langval = ""
|
413
463
|
|
@@ -423,39 +473,33 @@ module Raakt
|
|
423
473
|
end
|
424
474
|
|
425
475
|
|
426
|
-
def
|
427
|
-
|
476
|
+
def alt_to_text(element)
|
477
|
+
if element.kind_of?(Hpricot::Elem) then
|
478
|
+
element.has_attribute?("alt") ? element['alt'] : ""
|
479
|
+
else
|
480
|
+
""
|
481
|
+
end
|
428
482
|
end
|
429
|
-
|
430
|
-
|
483
|
+
|
431
484
|
def elements_to_text(element)
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
if el.class.to_s == 'NavigableString'
|
436
|
-
retval += el
|
437
|
-
else
|
438
|
-
if el.name == "img"
|
439
|
-
retval += img_to_text(el)
|
440
|
-
else
|
441
|
-
retval += elements_to_text(el)
|
442
|
-
end
|
443
|
-
end
|
485
|
+
str = ''
|
486
|
+
element.traverse_all_element do |elem|
|
487
|
+
elem.kind_of?(Hpricot::Text) ? str += "#{elem}" : str += alt_to_text(elem)
|
444
488
|
end
|
445
489
|
|
446
|
-
|
490
|
+
str
|
447
491
|
end
|
448
492
|
|
449
493
|
|
450
494
|
def normalize_text(text)
|
451
|
-
text
|
452
|
-
retval = text.gsub(/ /,
|
453
|
-
retval = retval.gsub(/ /,
|
454
|
-
retval = retval.gsub(/\n/,
|
455
|
-
retval = retval.gsub(/\r/,
|
456
|
-
retval = retval.gsub(/\t/,
|
495
|
+
text ||= ''
|
496
|
+
retval = text.gsub(/ /, ' ')
|
497
|
+
retval = retval.gsub(/ /, ' ')
|
498
|
+
retval = retval.gsub(/\n/, '')
|
499
|
+
retval = retval.gsub(/\r/, '')
|
500
|
+
retval = retval.gsub(/\t/, '')
|
457
501
|
while / /.match(retval) do
|
458
|
-
retval = retval.gsub(/ /,
|
502
|
+
retval = retval.gsub(/ /, ' ')
|
459
503
|
end
|
460
504
|
|
461
505
|
retval = retval.strip
|
@@ -465,12 +509,12 @@ module Raakt
|
|
465
509
|
|
466
510
|
|
467
511
|
def get_labels
|
468
|
-
|
512
|
+
@doc/'label'
|
469
513
|
end
|
470
514
|
|
471
515
|
|
472
516
|
def get_editable_fields
|
473
|
-
allfields = @
|
517
|
+
allfields = (@doc/"textarea|select|input")
|
474
518
|
fields = []
|
475
519
|
field_type = ""
|
476
520
|
|
@@ -487,9 +531,37 @@ module Raakt
|
|
487
531
|
|
488
532
|
|
489
533
|
def is_frameset
|
490
|
-
|
534
|
+
(@doc/"frameset").length > 0
|
491
535
|
end
|
492
536
|
|
537
|
+
|
538
|
+
def link_text_identical?(link_a, link_b)
|
539
|
+
get_link_text(link_a) == get_link_text(link_b)
|
540
|
+
end
|
541
|
+
|
542
|
+
def link_title_identical?(link_a, link_b)
|
543
|
+
get_link_title(link_a) == get_link_title(link_b)
|
544
|
+
end
|
545
|
+
|
546
|
+
def links_point_to_same_resource?(link_a, link_b)
|
547
|
+
(link_a == link_b) ||
|
548
|
+
(get_link_url(link_a) == get_link_url(link_b))
|
549
|
+
end
|
550
|
+
|
551
|
+
def get_link_text(link)
|
552
|
+
text = (elements_to_text(link) || '').strip
|
553
|
+
normalize_text(text)
|
554
|
+
end
|
555
|
+
|
556
|
+
def get_link_url(link)
|
557
|
+
link['href']
|
558
|
+
end
|
559
|
+
|
560
|
+
def get_link_title(link)
|
561
|
+
text = (link['title'] || '').strip
|
562
|
+
normalize_text(text)
|
563
|
+
end
|
564
|
+
|
493
565
|
end
|
494
566
|
|
495
|
-
end
|
567
|
+
end
|