raakt 0.3 → 0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,495 @@
1
+ # :title: Ruby Accessibility Analysis Kit
2
+ # =Ruby Accessibility Analysis Kit
3
+ #
4
+ # See README for a full explanation of this library.
5
+
6
+ module Raakt
7
+ require 'rubyful_soup'
8
+
9
+ MESSAGES = {
10
+ "missingtitle" => "The title element is missing. Provide a descriptive title for your document.",
11
+ "emptytitle" => "The title element is empty. Provide a descriptive title for your document.",
12
+ "missingalt" => "Missing alt attribute for image (with src '%s').",
13
+ "missingheading" => "Missing first level heading (h1). Provide at least one first level heading describing document content.",
14
+ "wronghstructure" => "Document heading structure is wrong.",
15
+ "firsthnoth1" => "The first heading is not h1.",
16
+ "hasnestedtables" => "You have one or more nested tables.",
17
+ "missingsemantics"=> "You have used <font>, <b> or <i> for visual formatting. Use CSS instead.",
18
+ "hasflicker" => "You have used <blink> or <marquee>. These may create accessibility issues and should be avoided.",
19
+ "missinglanginfo" => "Document language information is missing. Use the lang attribute on the html element.",
20
+ "missingth" => "Missing table headings (th) for table #%s.",
21
+ "ambiguouslinktext" => "One or more links have the same link text ('%s'). Make sure each link is unambiguous.",
22
+ "fieldmissinglabel" => "A field (with id/name '%s') is missing a corresponding label element. Make sure a label exists for all visible fields.",
23
+ "missingframetitle" => "Missing title attribute for frame with url %s",
24
+ "hasmetarefresh" => "Client side redirect (meta refresh) detected. Use server side redirection instead."
25
+ }
26
+
27
+ VERSION = "0.31"
28
+
29
+ class ErrorMessage
30
+
31
+ attr_reader :eid, :text, :note
32
+
33
+ def initialize(eid, note=nil)
34
+ @eid = eid
35
+ if note
36
+ @text = MESSAGES[eid].sub(/%s/, note)
37
+ else
38
+ @text = MESSAGES[eid]
39
+ end
40
+ @note = note
41
+ end
42
+
43
+ def to_s
44
+ @eid + ": " + @text
45
+ end
46
+ end
47
+
48
+
49
+
50
+
51
+
52
+ class Test
53
+
54
+ attr_accessor :soup, :html, :user_agent
55
+
56
+ def initialize(html=nil)
57
+ @html = html
58
+ @soup = BeautifulSoup.new(@html) if html
59
+ @user_agent = "Mozilla/5.0 (RAAKT v#{VERSION}; http://raakt.rubyforge.org; The Ruby Accessibility Analysis Kit)"
60
+ end
61
+
62
+ def feed(html)
63
+ @html = html || ""
64
+ if @html.length > 0
65
+ @soup = BeautifulSoup.new(@html)
66
+ else
67
+ raise "You called feed with no data. There is nothing to check."
68
+ end
69
+ end
70
+
71
+
72
+
73
+ def feedurl(url)
74
+ if url.length == 0
75
+ raise "You called feedurl with a blank url. There is nothing to check."
76
+ end
77
+
78
+ #Clean the url and make sure protocol and trailing slash is available
79
+ url = "http://" + url unless url[0..3] == "http"
80
+
81
+ require 'open-uri'
82
+
83
+ open(url, "User-Agent" => @user_agent) { |f|
84
+ @html = f.read || ""
85
+ }
86
+
87
+ if @html.length == 0
88
+ raise "Could not fetch html from the url #{url}. There is nothing to check."
89
+ else
90
+ @soup = BeautifulSoup.new(@html)
91
+ end
92
+
93
+ end
94
+
95
+
96
+
97
+ def all
98
+ #Call all check methods
99
+ messages = []
100
+
101
+ self.methods.each do |method|
102
+ if method[0..5] == "check_"
103
+ messages += self.send(method)
104
+ end
105
+ end
106
+
107
+ return messages
108
+ end
109
+
110
+
111
+ def check_images
112
+ #soup = BeautifulSoup.new(html)
113
+ images = @soup.find_all("img")
114
+ messages = []
115
+
116
+ for image in images:
117
+ if image["alt"] == nil:
118
+ img_src = image["src"] || ""
119
+ messages << ErrorMessage.new("missingalt", img_src)
120
+ end
121
+ end
122
+
123
+ return messages
124
+ end
125
+
126
+
127
+ def check_title
128
+ title = @soup.find("title")
129
+ messages = []
130
+
131
+ if title
132
+ titletext = normalize_text(title.string)
133
+ if titletext.length == 0
134
+ messages << ErrorMessage.new("emptytitle")
135
+ end
136
+ else
137
+ messages << ErrorMessage.new("missingtitle")
138
+ end
139
+
140
+ return messages
141
+ end
142
+
143
+
144
+ def check_has_heading
145
+ messages = []
146
+
147
+ if @soup.find_all("h1").length == 0
148
+ messages << ErrorMessage.new("missingheading")
149
+ end
150
+
151
+ return messages
152
+ end
153
+
154
+
155
+ def headings
156
+ headings = []
157
+ headings.push(@soup.find_all("h1")) if @soup.find_all("h1").length > 0
158
+ headings.push(@soup.find_all("h2")) if @soup.find_all("h2").length > 0
159
+ headings.push(@soup.find_all("h3")) if @soup.find_all("h3").length > 0
160
+ headings.push(@soup.find_all("h4")) if @soup.find_all("h4").length > 0
161
+ headings.push(@soup.find_all("h5")) if @soup.find_all("h5").length > 0
162
+ headings.push(@soup.find_all("h6")) if @soup.find_all("h6").length > 0
163
+
164
+ return headings.flatten
165
+ end
166
+
167
+
168
+ def level(heading)
169
+ Integer(heading[1,1])
170
+ end
171
+
172
+
173
+ def check_document_structure
174
+ messages = []
175
+ currentitem = 0
176
+ docheadings = headings
177
+
178
+ for heading in docheadings
179
+ if currentitem == 0
180
+ if level(heading.name) != 1
181
+ messages << ErrorMessage.new("firsthnoth1", "h" + heading.name[1,1])
182
+ end
183
+ else
184
+ if level(heading.name) - level(docheadings[currentitem - 1].name) > 1
185
+ messages << ErrorMessage.new("wronghstructure")
186
+ break
187
+ end
188
+ end
189
+
190
+ currentitem += 1
191
+
192
+ end
193
+
194
+ return messages
195
+ end
196
+
197
+
198
+ def check_for_nested_tables
199
+
200
+ messages = []
201
+ tables = @soup.find_all("table")
202
+
203
+ for table in tables
204
+ if table.find_all("table").length > 0
205
+ messages << ErrorMessage.new("hasnestedtables")
206
+ break
207
+ end
208
+ end
209
+
210
+ return messages
211
+ end
212
+
213
+
214
+ def check_tables
215
+
216
+ messages = []
217
+ tables = @soup.find_all("table")
218
+ hasth = false
219
+ currenttable = 1
220
+
221
+ for table in tables
222
+ if table.thead
223
+ if table.thead.tr
224
+ if table.thead.tr.th
225
+ hasth = true
226
+ end
227
+ end
228
+ end
229
+
230
+ if table.tr
231
+ if table.tr.th
232
+ hasth = true
233
+ end
234
+ end
235
+
236
+ unless hasth
237
+ messages << ErrorMessage.new("missingth", currenttable.to_s)
238
+ end
239
+
240
+ currenttable += 1
241
+ end
242
+
243
+ return messages
244
+ end
245
+
246
+
247
+ def check_for_formatting_elements
248
+
249
+ messages = []
250
+ formatting_items = @soup.find_all(["font", "b", "i"])
251
+ flicker_items = @soup.find_all(["blink", "marquee"])
252
+
253
+ if formatting_items.length > 0
254
+ messages << ErrorMessage.new("missingsemantics")
255
+ end
256
+
257
+ if flicker_items.length > 0
258
+ messages << ErrorMessage.new("hasflicker")
259
+ end
260
+
261
+ return messages
262
+ end
263
+
264
+
265
+ def check_for_language_info
266
+ messages = []
267
+
268
+ htmlelement = @soup.find("html")
269
+
270
+ lang = langinfo(htmlelement) || ""
271
+
272
+ unless lang.length > 1
273
+ messages << ErrorMessage.new("missinglanginfo")
274
+ end
275
+
276
+ return messages
277
+ end
278
+
279
+
280
+ def check_link_text
281
+ messages = []
282
+ links = get_links
283
+ linktexts = links.collect { |el| el[3] }
284
+
285
+ for link_a in links
286
+ #compare to other links in collection
287
+ for link_b in links
288
+ if link_a[0] != link_b[0]
289
+ if is_ambiguous_link(link_a, link_b)
290
+ #add message if not added already for link text
291
+ unless find_errormsg_with_text(messages, link_a[3])
292
+ messages << ErrorMessage.new("ambiguouslinktext", link_a[3])
293
+ end
294
+ end
295
+ end
296
+ end
297
+ end
298
+
299
+ return messages
300
+ end
301
+
302
+
303
+ def check_form
304
+ messages = []
305
+ labels = get_labels
306
+ fields = get_editable_fields
307
+
308
+ #make sure all fields have associated labels
309
+
310
+ label_for_ids = []
311
+ for label in labels
312
+ if label["for"]
313
+ label_for_ids << label["for"]
314
+ end
315
+ end
316
+
317
+ field_id = nil
318
+
319
+ for field in fields
320
+ field_id = (field["id"] || "")
321
+ field_identifier = (field["id"] || field["name"] || "unknown")
322
+ if not label_for_ids.include?(field_id)
323
+ messages << ErrorMessage.new("fieldmissinglabel", field_identifier)
324
+ end
325
+ end
326
+
327
+ return messages
328
+ end
329
+
330
+
331
+ def check_frames
332
+ #Verify frame titles
333
+
334
+ messages = []
335
+ if is_frameset
336
+ frames = @soup.find_all("frame")
337
+ frame_title = ""
338
+
339
+ for frame in frames
340
+ frame_title = frame["title"] || ""
341
+ if normalize_text(frame_title).length == 0
342
+ messages << ErrorMessage.new("missingframetitle", frame["src"])
343
+ end
344
+ end
345
+ end
346
+
347
+ return messages
348
+ end
349
+
350
+
351
+ def check_refresh
352
+
353
+ messages = []
354
+ meta_elements = @soup.find_all("meta")
355
+
356
+ for element in meta_elements
357
+ if element["http-equiv"] == "refresh"
358
+ messages << ErrorMessage.new("hasmetarefresh")
359
+ end
360
+ end
361
+
362
+ return messages
363
+ end
364
+
365
+
366
+ #Utility methods
367
+
368
+ def is_ambiguous_link(link_a, link_b)
369
+ #Link A and B are ambiguous if:
370
+ #1. The url differs
371
+ #2. The link text is identical
372
+ #3. The title text is identical (if present)
373
+ if link_a[1] != link_b[1] and
374
+ normalize_text(link_a[2]) == normalize_text(link_b[2]) and
375
+ normalize_text(link_a[3]) == normalize_text(link_b[3]) then
376
+ return true
377
+ end
378
+
379
+ return false
380
+ end
381
+
382
+
383
+ def find_errormsg_with_text(messages, text)
384
+ for errormessage in messages
385
+ if errormessage.note == text
386
+ return errormessage
387
+ end
388
+ end
389
+
390
+ return nil
391
+ end
392
+
393
+
394
+ def get_links
395
+ linkelements = @soup.find_all("a")
396
+ links = []
397
+ currentlink = 0
398
+
399
+ for element in linkelements
400
+ title = normalize_text((element['title'] || "").strip)
401
+ linktext = normalize_text((elements_to_text(element) || "").strip)
402
+ url = element['href']
403
+ links << [currentlink, url, title, linktext]
404
+ currentlink += 1
405
+ end
406
+
407
+ return links
408
+ end
409
+
410
+
411
+ def langinfo(element)
412
+ langval = ""
413
+
414
+ if element.class.to_s == 'Tag'
415
+ if element['lang']
416
+ langval = element['lang']
417
+ end
418
+ else
419
+ return nil
420
+ end
421
+
422
+ return langval
423
+ end
424
+
425
+
426
+ def img_to_text(imgtag)
427
+ return (imgtag['alt'] || "")
428
+ end
429
+
430
+
431
+ def elements_to_text(element)
432
+ retval = ""
433
+
434
+ for el in element.contents
435
+ if el.class.to_s == 'NavigableString'
436
+ retval += el
437
+ else
438
+ if el.name == "img"
439
+ retval += img_to_text(el)
440
+ else
441
+ retval += elements_to_text(el)
442
+ end
443
+ end
444
+ end
445
+
446
+ return retval
447
+ end
448
+
449
+
450
+ def normalize_text(text)
451
+ text = (text || "")
452
+ retval = text.gsub(/&nbsp;/, " ")
453
+ retval = retval.gsub(/&#160;/, " ")
454
+ retval = retval.gsub(/\n/, "")
455
+ retval = retval.gsub(/\r/, "")
456
+ retval = retval.gsub(/\t/, "")
457
+ while / /.match(retval) do
458
+ retval = retval.gsub(/ /, " ")
459
+ end
460
+
461
+ retval = retval.strip
462
+
463
+ return retval
464
+ end
465
+
466
+
467
+ def get_labels
468
+ return @soup.find_all("label")
469
+ end
470
+
471
+
472
+ def get_editable_fields
473
+ allfields = @soup.find_all(["textarea", "select", "input"])
474
+ fields = []
475
+ field_type = ""
476
+
477
+ for field in allfields do
478
+ field_type = field["type"] || ""
479
+ unless ["button", "submit", "hidden", "image"].include?(field_type)
480
+ fields << field
481
+ end
482
+
483
+ end
484
+
485
+ return fields
486
+ end
487
+
488
+
489
+ def is_frameset
490
+ return (@soup.find("frameset") != nil)
491
+ end
492
+
493
+ end
494
+
495
+ end
data/lib/raakt.rb CHANGED
@@ -14,7 +14,7 @@ module Raakt
14
14
  "wronghstructure" => "Document heading structure is wrong.",
15
15
  "firsthnoth1" => "The first heading is not h1.",
16
16
  "hasnestedtables" => "You have one or more nested tables.",
17
- "missingsemantics"=> "You have used <font>, <b> or <i> for visual formatting. Use CSS instead.",
17
+ "missingsemantics"=> "You have used %s for visual formatting. Use CSS instead.",
18
18
  "hasflicker" => "You have used <blink> or <marquee>. These may create accessibility issues and should be avoided.",
19
19
  "missinglanginfo" => "Document language information is missing. Use the lang attribute on the html element.",
20
20
  "missingth" => "Missing table headings (th) for table #%s.",
@@ -24,7 +24,7 @@ module Raakt
24
24
  "hasmetarefresh" => "Client side redirect (meta refresh) detected. Use server side redirection instead."
25
25
  }
26
26
 
27
- VERSION = "0.3"
27
+ VERSION = "0.4"
28
28
 
29
29
  class ErrorMessage
30
30
 
@@ -51,11 +51,12 @@ module Raakt
51
51
 
52
52
  class Test
53
53
 
54
- attr_accessor :soup, :html
54
+ attr_accessor :soup, :html, :user_agent
55
55
 
56
56
  def initialize(html=nil)
57
57
  @html = html
58
58
  @soup = BeautifulSoup.new(@html) if html
59
+ @user_agent = "Mozilla/5.0 (RAAKT v#{VERSION}; http://raakt.rubyforge.org; The Ruby Accessibility Analysis Kit)"
59
60
  end
60
61
 
61
62
  def feed(html)
@@ -67,8 +68,34 @@ module Raakt
67
68
  end
68
69
  end
69
70
 
71
+
72
+
73
+ def feedurl(url)
74
+ if url.length == 0
75
+ raise "You called feedurl with a blank url. There is nothing to check."
76
+ end
77
+
78
+ #Clean the url and make sure protocol and trailing slash is available
79
+ url = "http://" + url unless url[0..3] == "http"
80
+
81
+ require 'open-uri'
82
+
83
+ open(url, "User-Agent" => @user_agent) { |f|
84
+ @html = f.read || ""
85
+ }
86
+
87
+ if @html.length == 0
88
+ raise "Could not fetch html from the url #{url}. There is nothing to check."
89
+ else
90
+ @soup = BeautifulSoup.new(@html)
91
+ end
92
+
93
+ end
94
+
95
+
96
+
70
97
  def all
71
- #Do all checks
98
+ #Call all check methods
72
99
  messages = []
73
100
 
74
101
  self.methods.each do |method|
@@ -220,11 +247,11 @@ module Raakt
220
247
  def check_for_formatting_elements
221
248
 
222
249
  messages = []
223
- formatting_items = @soup.find_all(["font", "b", "i"])
250
+ formatting_items = @soup.find_all(%w(font b i u tt small big strike s))
224
251
  flicker_items = @soup.find_all(["blink", "marquee"])
225
252
 
226
- if formatting_items.length > 0
227
- messages << ErrorMessage.new("missingsemantics")
253
+ formatting_items.each do |element|
254
+ messages << ErrorMessage.new("missingsemantics", "<#{element.name}>")
228
255
  end
229
256
 
230
257
  if flicker_items.length > 0
@@ -14,5 +14,6 @@
14
14
  <table><tr><td>Test table</td></tr></table>
15
15
  <H1>Third h1 heading</H1>
16
16
  <marquee>Marquees are so nineties!</marquee>
17
+ <center>Centering should be done with CSS</center>
17
18
  </body>
18
19
  </html>
@@ -0,0 +1,17 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
2
+ "http://www.w3.org/TR/html4/strict.dtd">
3
+ <html lang="zh-tw" dir="ltr">
4
+ <head>
5
+ <?xml version="1.0" encoding="UTF-8"?>
6
+ <title>This is the title</title>
7
+ <link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" />
8
+ </head>
9
+ <body>
10
+ <TABLE><tr><td>
11
+ <p>This is a minimal <a href="http://www.w3.org/TR/html/">HTML 4</a>
12
+ document.</p>
13
+ </td>
14
+ </tr>
15
+ </table>
16
+ </body>
17
+ </html>
data/tests/raakt_test.rb CHANGED
@@ -6,13 +6,32 @@ class RaaktTest < Test::Unit::TestCase
6
6
 
7
7
  def setup
8
8
  @raakt = Raakt::Test.new
9
+
10
+ #Use includeremote option to run unit tests that fetch test data from http://raakt.rubyforge.org
11
+ @remote_tests = false
12
+ if ARGV[0] == "includeremote"
13
+ @remote_tests = true
14
+ end
9
15
  end
10
16
 
11
17
  def test_all
12
18
  @raakt.feed(data_full_google)
13
- assert_equal 7, @raakt.all.length
19
+ assert_equal 15, @raakt.all.length
20
+ end
21
+
22
+
23
+ def test_feedurl
24
+ if @remote_tests
25
+ @raakt.feedurl("http://raakt.rubyforge.org/tests/langdoc2-iso88591.htm")
26
+ assert_equal 477, @raakt.html.length
27
+
28
+ #a redirect
29
+ else
30
+ assert true
31
+ end
14
32
  end
15
33
 
34
+
16
35
  def test_check_images
17
36
  @raakt.feed(data_imagedoc1)
18
37
  assert_equal 1, @raakt.check_images.length
@@ -62,12 +81,15 @@ class RaaktTest < Test::Unit::TestCase
62
81
  assert_equal 0, @raakt.headings.length
63
82
  end
64
83
 
84
+
65
85
  def test_level
66
86
  assert_equal 1, @raakt.level("h1")
67
87
  assert_equal 2, @raakt.level("h2")
68
88
  assert_equal 6, @raakt.level("h6")
69
89
  end
70
90
 
91
+
92
+
71
93
  def test_check_has_heading
72
94
  @raakt.feed(data_empty)
73
95
  assert_equal 1, @raakt.check_has_heading.length
@@ -84,6 +106,7 @@ class RaaktTest < Test::Unit::TestCase
84
106
  assert_equal "missingheading", @raakt.check_has_heading[0].eid
85
107
  end
86
108
 
109
+
87
110
  def test_check_document_structure
88
111
 
89
112
  @raakt.feed(data_headingsdoc1)
@@ -126,6 +149,7 @@ class RaaktTest < Test::Unit::TestCase
126
149
  assert_equal 1, @raakt.check_for_nested_tables.length
127
150
  end
128
151
 
152
+
129
153
  def test_check_tables
130
154
  @raakt.feed(data_tabledoc4)
131
155
  assert_equal 0, @raakt.check_tables.length
@@ -155,6 +179,16 @@ class RaaktTest < Test::Unit::TestCase
155
179
  @raakt.feed(data_tablelayoutdoc)
156
180
  assert_equal 1, @raakt.check_for_language_info.length
157
181
 
182
+ @raakt.feed(data_langinfodoc1)
183
+ assert_equal 0, @raakt.check_for_language_info.length
184
+
185
+ if @remote_tests
186
+ @raakt.feedurl("http://raakt.rubyforge.org/tests/langdoc2-iso88591.htm")
187
+ assert_equal 1, @raakt.check_for_language_info.length
188
+
189
+ @raakt.feedurl("http://raakt.rubyforge.org/tests/langdoc1-utf8.htm")
190
+ assert_equal 0, @raakt.check_for_language_info.length
191
+ end
158
192
  end
159
193
 
160
194
 
@@ -295,9 +329,10 @@ class RaaktTest < Test::Unit::TestCase
295
329
  def test_check_for_formatting_elements
296
330
  @raakt.feed(data_invalidelements1)
297
331
  invaliderrs = @raakt.check_for_formatting_elements
298
- assert_equal 2, invaliderrs.length
332
+ assert_equal 3, invaliderrs.length
299
333
  assert_equal "missingsemantics", invaliderrs[0].eid
300
- assert_equal "hasflicker", invaliderrs[1].eid
334
+ assert_equal "missingsemantics", invaliderrs[1].eid
335
+ assert_equal "hasflicker", invaliderrs[2].eid
301
336
 
302
337
  @raakt.feed(data_xhtmldoc1)
303
338
  assert_equal 0, @raakt.check_for_formatting_elements.length
metadata CHANGED
@@ -3,13 +3,13 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: raakt
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.3"
7
- date: 2006-07-13 00:00:00 +02:00
6
+ version: "0.4"
7
+ date: 2006-08-17 00:00:00 +02:00
8
8
  summary: A toolkit to find accessibility issues in HTML documents.
9
9
  require_paths:
10
10
  - lib
11
11
  email: peter.krantzNODAMNSPAM@gmail.com
12
- homepage: http://peterkrantz.com/projects/raakt
12
+ homepage: http://raakt.rubyforge.org
13
13
  rubyforge_project: raakt
14
14
  description:
15
15
  autorequire: raakt
@@ -28,6 +28,7 @@ cert_chain:
28
28
  authors:
29
29
  - Peter Krantz
30
30
  files:
31
+ - lib/raakt (kopia).rb
31
32
  - lib/raakt.rb
32
33
  - tests/empty.htm
33
34
  - tests/emptytitledoc.htm
@@ -55,6 +56,7 @@ files:
55
56
  - tests/invalidhtmldoc1.htm
56
57
  - tests/invalidhtmldoc2.htm
57
58
  - tests/invalidxhtmldoc1.htm
59
+ - tests/langinfodoc1.htm
58
60
  - tests/linkdoc1.htm
59
61
  - tests/linkdoc2.htm
60
62
  - tests/linkdoc3.htm