raakt 0.3 → 0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,495 @@
1
+ # :title: Ruby Accessibility Analysis Kit
2
+ # =Ruby Accessibility Analysis Kit
3
+ #
4
+ # See README for a full explanation of this library.
5
+
6
+ module Raakt
7
+ require 'rubyful_soup'
8
+
9
+ MESSAGES = {
10
+ "missingtitle" => "The title element is missing. Provide a descriptive title for your document.",
11
+ "emptytitle" => "The title element is empty. Provide a descriptive title for your document.",
12
+ "missingalt" => "Missing alt attribute for image (with src '%s').",
13
+ "missingheading" => "Missing first level heading (h1). Provide at least one first level heading describing document content.",
14
+ "wronghstructure" => "Document heading structure is wrong.",
15
+ "firsthnoth1" => "The first heading is not h1.",
16
+ "hasnestedtables" => "You have one or more nested tables.",
17
+ "missingsemantics"=> "You have used <font>, <b> or <i> for visual formatting. Use CSS instead.",
18
+ "hasflicker" => "You have used <blink> or <marquee>. These may create accessibility issues and should be avoided.",
19
+ "missinglanginfo" => "Document language information is missing. Use the lang attribute on the html element.",
20
+ "missingth" => "Missing table headings (th) for table #%s.",
21
+ "ambiguouslinktext" => "One or more links have the same link text ('%s'). Make sure each link is unambiguous.",
22
+ "fieldmissinglabel" => "A field (with id/name '%s') is missing a corresponding label element. Make sure a label exists for all visible fields.",
23
+ "missingframetitle" => "Missing title attribute for frame with url %s",
24
+ "hasmetarefresh" => "Client side redirect (meta refresh) detected. Use server side redirection instead."
25
+ }
26
+
27
+ VERSION = "0.31"
28
+
29
+ class ErrorMessage
30
+
31
+ attr_reader :eid, :text, :note
32
+
33
+ def initialize(eid, note=nil)
34
+ @eid = eid
35
+ if note
36
+ @text = MESSAGES[eid].sub(/%s/, note)
37
+ else
38
+ @text = MESSAGES[eid]
39
+ end
40
+ @note = note
41
+ end
42
+
43
+ def to_s
44
+ @eid + ": " + @text
45
+ end
46
+ end
47
+
48
+
49
+
50
+
51
+
52
+ class Test
53
+
54
+ attr_accessor :soup, :html, :user_agent
55
+
56
+ def initialize(html=nil)
57
+ @html = html
58
+ @soup = BeautifulSoup.new(@html) if html
59
+ @user_agent = "Mozilla/5.0 (RAAKT v#{VERSION}; http://raakt.rubyforge.org; The Ruby Accessibility Analysis Kit)"
60
+ end
61
+
62
+ def feed(html)
63
+ @html = html || ""
64
+ if @html.length > 0
65
+ @soup = BeautifulSoup.new(@html)
66
+ else
67
+ raise "You called feed with no data. There is nothing to check."
68
+ end
69
+ end
70
+
71
+
72
+
73
+ def feedurl(url)
74
+ if url.length == 0
75
+ raise "You called feedurl with a blank url. There is nothing to check."
76
+ end
77
+
78
+ #Clean the url and make sure protocol and trailing slash is available
79
+ url = "http://" + url unless url[0..3] == "http"
80
+
81
+ require 'open-uri'
82
+
83
+ open(url, "User-Agent" => @user_agent) { |f|
84
+ @html = f.read || ""
85
+ }
86
+
87
+ if @html.length == 0
88
+ raise "Could not fetch html from the url #{url}. There is nothing to check."
89
+ else
90
+ @soup = BeautifulSoup.new(@html)
91
+ end
92
+
93
+ end
94
+
95
+
96
+
97
+ def all
98
+ #Call all check methods
99
+ messages = []
100
+
101
+ self.methods.each do |method|
102
+ if method[0..5] == "check_"
103
+ messages += self.send(method)
104
+ end
105
+ end
106
+
107
+ return messages
108
+ end
109
+
110
+
111
+ def check_images
112
+ #soup = BeautifulSoup.new(html)
113
+ images = @soup.find_all("img")
114
+ messages = []
115
+
116
+ for image in images:
117
+ if image["alt"] == nil:
118
+ img_src = image["src"] || ""
119
+ messages << ErrorMessage.new("missingalt", img_src)
120
+ end
121
+ end
122
+
123
+ return messages
124
+ end
125
+
126
+
127
+ def check_title
128
+ title = @soup.find("title")
129
+ messages = []
130
+
131
+ if title
132
+ titletext = normalize_text(title.string)
133
+ if titletext.length == 0
134
+ messages << ErrorMessage.new("emptytitle")
135
+ end
136
+ else
137
+ messages << ErrorMessage.new("missingtitle")
138
+ end
139
+
140
+ return messages
141
+ end
142
+
143
+
144
+ def check_has_heading
145
+ messages = []
146
+
147
+ if @soup.find_all("h1").length == 0
148
+ messages << ErrorMessage.new("missingheading")
149
+ end
150
+
151
+ return messages
152
+ end
153
+
154
+
155
+ def headings
156
+ headings = []
157
+ headings.push(@soup.find_all("h1")) if @soup.find_all("h1").length > 0
158
+ headings.push(@soup.find_all("h2")) if @soup.find_all("h2").length > 0
159
+ headings.push(@soup.find_all("h3")) if @soup.find_all("h3").length > 0
160
+ headings.push(@soup.find_all("h4")) if @soup.find_all("h4").length > 0
161
+ headings.push(@soup.find_all("h5")) if @soup.find_all("h5").length > 0
162
+ headings.push(@soup.find_all("h6")) if @soup.find_all("h6").length > 0
163
+
164
+ return headings.flatten
165
+ end
166
+
167
+
168
+ def level(heading)
169
+ Integer(heading[1,1])
170
+ end
171
+
172
+
173
+ def check_document_structure
174
+ messages = []
175
+ currentitem = 0
176
+ docheadings = headings
177
+
178
+ for heading in docheadings
179
+ if currentitem == 0
180
+ if level(heading.name) != 1
181
+ messages << ErrorMessage.new("firsthnoth1", "h" + heading.name[1,1])
182
+ end
183
+ else
184
+ if level(heading.name) - level(docheadings[currentitem - 1].name) > 1
185
+ messages << ErrorMessage.new("wronghstructure")
186
+ break
187
+ end
188
+ end
189
+
190
+ currentitem += 1
191
+
192
+ end
193
+
194
+ return messages
195
+ end
196
+
197
+
198
+ def check_for_nested_tables
199
+
200
+ messages = []
201
+ tables = @soup.find_all("table")
202
+
203
+ for table in tables
204
+ if table.find_all("table").length > 0
205
+ messages << ErrorMessage.new("hasnestedtables")
206
+ break
207
+ end
208
+ end
209
+
210
+ return messages
211
+ end
212
+
213
+
214
+ def check_tables
215
+
216
+ messages = []
217
+ tables = @soup.find_all("table")
218
+ hasth = false
219
+ currenttable = 1
220
+
221
+ for table in tables
222
+ if table.thead
223
+ if table.thead.tr
224
+ if table.thead.tr.th
225
+ hasth = true
226
+ end
227
+ end
228
+ end
229
+
230
+ if table.tr
231
+ if table.tr.th
232
+ hasth = true
233
+ end
234
+ end
235
+
236
+ unless hasth
237
+ messages << ErrorMessage.new("missingth", currenttable.to_s)
238
+ end
239
+
240
+ currenttable += 1
241
+ end
242
+
243
+ return messages
244
+ end
245
+
246
+
247
+ def check_for_formatting_elements
248
+
249
+ messages = []
250
+ formatting_items = @soup.find_all(["font", "b", "i"])
251
+ flicker_items = @soup.find_all(["blink", "marquee"])
252
+
253
+ if formatting_items.length > 0
254
+ messages << ErrorMessage.new("missingsemantics")
255
+ end
256
+
257
+ if flicker_items.length > 0
258
+ messages << ErrorMessage.new("hasflicker")
259
+ end
260
+
261
+ return messages
262
+ end
263
+
264
+
265
+ def check_for_language_info
266
+ messages = []
267
+
268
+ htmlelement = @soup.find("html")
269
+
270
+ lang = langinfo(htmlelement) || ""
271
+
272
+ unless lang.length > 1
273
+ messages << ErrorMessage.new("missinglanginfo")
274
+ end
275
+
276
+ return messages
277
+ end
278
+
279
+
280
+ def check_link_text
281
+ messages = []
282
+ links = get_links
283
+ linktexts = links.collect { |el| el[3] }
284
+
285
+ for link_a in links
286
+ #compare to other links in collection
287
+ for link_b in links
288
+ if link_a[0] != link_b[0]
289
+ if is_ambiguous_link(link_a, link_b)
290
+ #add message if not added already for link text
291
+ unless find_errormsg_with_text(messages, link_a[3])
292
+ messages << ErrorMessage.new("ambiguouslinktext", link_a[3])
293
+ end
294
+ end
295
+ end
296
+ end
297
+ end
298
+
299
+ return messages
300
+ end
301
+
302
+
303
+ def check_form
304
+ messages = []
305
+ labels = get_labels
306
+ fields = get_editable_fields
307
+
308
+ #make sure all fields have associated labels
309
+
310
+ label_for_ids = []
311
+ for label in labels
312
+ if label["for"]
313
+ label_for_ids << label["for"]
314
+ end
315
+ end
316
+
317
+ field_id = nil
318
+
319
+ for field in fields
320
+ field_id = (field["id"] || "")
321
+ field_identifier = (field["id"] || field["name"] || "unknown")
322
+ if not label_for_ids.include?(field_id)
323
+ messages << ErrorMessage.new("fieldmissinglabel", field_identifier)
324
+ end
325
+ end
326
+
327
+ return messages
328
+ end
329
+
330
+
331
+ def check_frames
332
+ #Verify frame titles
333
+
334
+ messages = []
335
+ if is_frameset
336
+ frames = @soup.find_all("frame")
337
+ frame_title = ""
338
+
339
+ for frame in frames
340
+ frame_title = frame["title"] || ""
341
+ if normalize_text(frame_title).length == 0
342
+ messages << ErrorMessage.new("missingframetitle", frame["src"])
343
+ end
344
+ end
345
+ end
346
+
347
+ return messages
348
+ end
349
+
350
+
351
+ def check_refresh
352
+
353
+ messages = []
354
+ meta_elements = @soup.find_all("meta")
355
+
356
+ for element in meta_elements
357
+ if element["http-equiv"] == "refresh"
358
+ messages << ErrorMessage.new("hasmetarefresh")
359
+ end
360
+ end
361
+
362
+ return messages
363
+ end
364
+
365
+
366
+ #Utility methods
367
+
368
+ def is_ambiguous_link(link_a, link_b)
369
+ #Link A and B are ambiguous if:
370
+ #1. The url differs
371
+ #2. The link text is identical
372
+ #3. The title text is identical (if present)
373
+ if link_a[1] != link_b[1] and
374
+ normalize_text(link_a[2]) == normalize_text(link_b[2]) and
375
+ normalize_text(link_a[3]) == normalize_text(link_b[3]) then
376
+ return true
377
+ end
378
+
379
+ return false
380
+ end
381
+
382
+
383
+ def find_errormsg_with_text(messages, text)
384
+ for errormessage in messages
385
+ if errormessage.note == text
386
+ return errormessage
387
+ end
388
+ end
389
+
390
+ return nil
391
+ end
392
+
393
+
394
+ def get_links
395
+ linkelements = @soup.find_all("a")
396
+ links = []
397
+ currentlink = 0
398
+
399
+ for element in linkelements
400
+ title = normalize_text((element['title'] || "").strip)
401
+ linktext = normalize_text((elements_to_text(element) || "").strip)
402
+ url = element['href']
403
+ links << [currentlink, url, title, linktext]
404
+ currentlink += 1
405
+ end
406
+
407
+ return links
408
+ end
409
+
410
+
411
+ def langinfo(element)
412
+ langval = ""
413
+
414
+ if element.class.to_s == 'Tag'
415
+ if element['lang']
416
+ langval = element['lang']
417
+ end
418
+ else
419
+ return nil
420
+ end
421
+
422
+ return langval
423
+ end
424
+
425
+
426
+ def img_to_text(imgtag)
427
+ return (imgtag['alt'] || "")
428
+ end
429
+
430
+
431
+ def elements_to_text(element)
432
+ retval = ""
433
+
434
+ for el in element.contents
435
+ if el.class.to_s == 'NavigableString'
436
+ retval += el
437
+ else
438
+ if el.name == "img"
439
+ retval += img_to_text(el)
440
+ else
441
+ retval += elements_to_text(el)
442
+ end
443
+ end
444
+ end
445
+
446
+ return retval
447
+ end
448
+
449
+
450
+ def normalize_text(text)
451
+ text = (text || "")
452
+ retval = text.gsub(/&nbsp;/, " ")
453
+ retval = retval.gsub(/&#160;/, " ")
454
+ retval = retval.gsub(/\n/, "")
455
+ retval = retval.gsub(/\r/, "")
456
+ retval = retval.gsub(/\t/, "")
457
+ while / /.match(retval) do
458
+ retval = retval.gsub(/ /, " ")
459
+ end
460
+
461
+ retval = retval.strip
462
+
463
+ return retval
464
+ end
465
+
466
+
467
+ def get_labels
468
+ return @soup.find_all("label")
469
+ end
470
+
471
+
472
+ def get_editable_fields
473
+ allfields = @soup.find_all(["textarea", "select", "input"])
474
+ fields = []
475
+ field_type = ""
476
+
477
+ for field in allfields do
478
+ field_type = field["type"] || ""
479
+ unless ["button", "submit", "hidden", "image"].include?(field_type)
480
+ fields << field
481
+ end
482
+
483
+ end
484
+
485
+ return fields
486
+ end
487
+
488
+
489
+ def is_frameset
490
+ return (@soup.find("frameset") != nil)
491
+ end
492
+
493
+ end
494
+
495
+ end
data/lib/raakt.rb CHANGED
@@ -14,7 +14,7 @@ module Raakt
14
14
  "wronghstructure" => "Document heading structure is wrong.",
15
15
  "firsthnoth1" => "The first heading is not h1.",
16
16
  "hasnestedtables" => "You have one or more nested tables.",
17
- "missingsemantics"=> "You have used <font>, <b> or <i> for visual formatting. Use CSS instead.",
17
+ "missingsemantics"=> "You have used %s for visual formatting. Use CSS instead.",
18
18
  "hasflicker" => "You have used <blink> or <marquee>. These may create accessibility issues and should be avoided.",
19
19
  "missinglanginfo" => "Document language information is missing. Use the lang attribute on the html element.",
20
20
  "missingth" => "Missing table headings (th) for table #%s.",
@@ -24,7 +24,7 @@ module Raakt
24
24
  "hasmetarefresh" => "Client side redirect (meta refresh) detected. Use server side redirection instead."
25
25
  }
26
26
 
27
- VERSION = "0.3"
27
+ VERSION = "0.4"
28
28
 
29
29
  class ErrorMessage
30
30
 
@@ -51,11 +51,12 @@ module Raakt
51
51
 
52
52
  class Test
53
53
 
54
- attr_accessor :soup, :html
54
+ attr_accessor :soup, :html, :user_agent
55
55
 
56
56
  def initialize(html=nil)
57
57
  @html = html
58
58
  @soup = BeautifulSoup.new(@html) if html
59
+ @user_agent = "Mozilla/5.0 (RAAKT v#{VERSION}; http://raakt.rubyforge.org; The Ruby Accessibility Analysis Kit)"
59
60
  end
60
61
 
61
62
  def feed(html)
@@ -67,8 +68,34 @@ module Raakt
67
68
  end
68
69
  end
69
70
 
71
+
72
+
73
+ def feedurl(url)
74
+ if url.length == 0
75
+ raise "You called feedurl with a blank url. There is nothing to check."
76
+ end
77
+
78
+ #Clean the url and make sure protocol and trailing slash is available
79
+ url = "http://" + url unless url[0..3] == "http"
80
+
81
+ require 'open-uri'
82
+
83
+ open(url, "User-Agent" => @user_agent) { |f|
84
+ @html = f.read || ""
85
+ }
86
+
87
+ if @html.length == 0
88
+ raise "Could not fetch html from the url #{url}. There is nothing to check."
89
+ else
90
+ @soup = BeautifulSoup.new(@html)
91
+ end
92
+
93
+ end
94
+
95
+
96
+
70
97
  def all
71
- #Do all checks
98
+ #Call all check methods
72
99
  messages = []
73
100
 
74
101
  self.methods.each do |method|
@@ -220,11 +247,11 @@ module Raakt
220
247
  def check_for_formatting_elements
221
248
 
222
249
  messages = []
223
- formatting_items = @soup.find_all(["font", "b", "i"])
250
+ formatting_items = @soup.find_all(%w(font b i u tt small big strike s))
224
251
  flicker_items = @soup.find_all(["blink", "marquee"])
225
252
 
226
- if formatting_items.length > 0
227
- messages << ErrorMessage.new("missingsemantics")
253
+ formatting_items.each do |element|
254
+ messages << ErrorMessage.new("missingsemantics", "<#{element.name}>")
228
255
  end
229
256
 
230
257
  if flicker_items.length > 0
@@ -14,5 +14,6 @@
14
14
  <table><tr><td>Test table</td></tr></table>
15
15
  <H1>Third h1 heading</H1>
16
16
  <marquee>Marquees are so nineties!</marquee>
17
+ <center>Centering should be done with CSS</center>
17
18
  </body>
18
19
  </html>
@@ -0,0 +1,17 @@
1
+ <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
2
+ "http://www.w3.org/TR/html4/strict.dtd">
3
+ <html lang="zh-tw" dir="ltr">
4
+ <head>
5
+ <?xml version="1.0" encoding="UTF-8"?>
6
+ <title>This is the title</title>
7
+ <link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" />
8
+ </head>
9
+ <body>
10
+ <TABLE><tr><td>
11
+ <p>This is a minimal <a href="http://www.w3.org/TR/html/">HTML 4</a>
12
+ document.</p>
13
+ </td>
14
+ </tr>
15
+ </table>
16
+ </body>
17
+ </html>
data/tests/raakt_test.rb CHANGED
@@ -6,13 +6,32 @@ class RaaktTest < Test::Unit::TestCase
6
6
 
7
7
  def setup
8
8
  @raakt = Raakt::Test.new
9
+
10
+ #Use includeremote option to run unit tests that fetch test data from http://raakt.rubyforge.org
11
+ @remote_tests = false
12
+ if ARGV[0] == "includeremote"
13
+ @remote_tests = true
14
+ end
9
15
  end
10
16
 
11
17
  def test_all
12
18
  @raakt.feed(data_full_google)
13
- assert_equal 7, @raakt.all.length
19
+ assert_equal 15, @raakt.all.length
20
+ end
21
+
22
+
23
+ def test_feedurl
24
+ if @remote_tests
25
+ @raakt.feedurl("http://raakt.rubyforge.org/tests/langdoc2-iso88591.htm")
26
+ assert_equal 477, @raakt.html.length
27
+
28
+ #a redirect
29
+ else
30
+ assert true
31
+ end
14
32
  end
15
33
 
34
+
16
35
  def test_check_images
17
36
  @raakt.feed(data_imagedoc1)
18
37
  assert_equal 1, @raakt.check_images.length
@@ -62,12 +81,15 @@ class RaaktTest < Test::Unit::TestCase
62
81
  assert_equal 0, @raakt.headings.length
63
82
  end
64
83
 
84
+
65
85
  def test_level
66
86
  assert_equal 1, @raakt.level("h1")
67
87
  assert_equal 2, @raakt.level("h2")
68
88
  assert_equal 6, @raakt.level("h6")
69
89
  end
70
90
 
91
+
92
+
71
93
  def test_check_has_heading
72
94
  @raakt.feed(data_empty)
73
95
  assert_equal 1, @raakt.check_has_heading.length
@@ -84,6 +106,7 @@ class RaaktTest < Test::Unit::TestCase
84
106
  assert_equal "missingheading", @raakt.check_has_heading[0].eid
85
107
  end
86
108
 
109
+
87
110
  def test_check_document_structure
88
111
 
89
112
  @raakt.feed(data_headingsdoc1)
@@ -126,6 +149,7 @@ class RaaktTest < Test::Unit::TestCase
126
149
  assert_equal 1, @raakt.check_for_nested_tables.length
127
150
  end
128
151
 
152
+
129
153
  def test_check_tables
130
154
  @raakt.feed(data_tabledoc4)
131
155
  assert_equal 0, @raakt.check_tables.length
@@ -155,6 +179,16 @@ class RaaktTest < Test::Unit::TestCase
155
179
  @raakt.feed(data_tablelayoutdoc)
156
180
  assert_equal 1, @raakt.check_for_language_info.length
157
181
 
182
+ @raakt.feed(data_langinfodoc1)
183
+ assert_equal 0, @raakt.check_for_language_info.length
184
+
185
+ if @remote_tests
186
+ @raakt.feedurl("http://raakt.rubyforge.org/tests/langdoc2-iso88591.htm")
187
+ assert_equal 1, @raakt.check_for_language_info.length
188
+
189
+ @raakt.feedurl("http://raakt.rubyforge.org/tests/langdoc1-utf8.htm")
190
+ assert_equal 0, @raakt.check_for_language_info.length
191
+ end
158
192
  end
159
193
 
160
194
 
@@ -295,9 +329,10 @@ class RaaktTest < Test::Unit::TestCase
295
329
  def test_check_for_formatting_elements
296
330
  @raakt.feed(data_invalidelements1)
297
331
  invaliderrs = @raakt.check_for_formatting_elements
298
- assert_equal 2, invaliderrs.length
332
+ assert_equal 3, invaliderrs.length
299
333
  assert_equal "missingsemantics", invaliderrs[0].eid
300
- assert_equal "hasflicker", invaliderrs[1].eid
334
+ assert_equal "missingsemantics", invaliderrs[1].eid
335
+ assert_equal "hasflicker", invaliderrs[2].eid
301
336
 
302
337
  @raakt.feed(data_xhtmldoc1)
303
338
  assert_equal 0, @raakt.check_for_formatting_elements.length
metadata CHANGED
@@ -3,13 +3,13 @@ rubygems_version: 0.8.11
3
3
  specification_version: 1
4
4
  name: raakt
5
5
  version: !ruby/object:Gem::Version
6
- version: "0.3"
7
- date: 2006-07-13 00:00:00 +02:00
6
+ version: "0.4"
7
+ date: 2006-08-17 00:00:00 +02:00
8
8
  summary: A toolkit to find accessibility issues in HTML documents.
9
9
  require_paths:
10
10
  - lib
11
11
  email: peter.krantzNODAMNSPAM@gmail.com
12
- homepage: http://peterkrantz.com/projects/raakt
12
+ homepage: http://raakt.rubyforge.org
13
13
  rubyforge_project: raakt
14
14
  description:
15
15
  autorequire: raakt
@@ -28,6 +28,7 @@ cert_chain:
28
28
  authors:
29
29
  - Peter Krantz
30
30
  files:
31
+ - lib/raakt (kopia).rb
31
32
  - lib/raakt.rb
32
33
  - tests/empty.htm
33
34
  - tests/emptytitledoc.htm
@@ -55,6 +56,7 @@ files:
55
56
  - tests/invalidhtmldoc1.htm
56
57
  - tests/invalidhtmldoc2.htm
57
58
  - tests/invalidxhtmldoc1.htm
59
+ - tests/langinfodoc1.htm
58
60
  - tests/linkdoc1.htm
59
61
  - tests/linkdoc2.htm
60
62
  - tests/linkdoc3.htm