RubyGems - raakt - Versions diffs - 0.4 → 0.5 - Mend

raakt 0.4 → 0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

data/lib/raakt.rb +346 -274
data/tests/areadoc1.htm +12 -0
data/tests/areadoc2.htm +12 -0
data/tests/areadoc3.htm +12 -0
data/tests/bdoc.htm +9 -0
data/tests/charset_nocharset_specified.htm +8 -0
data/tests/charset_utf8.htm +9 -0
data/tests/embeddoc1.htm +9 -0
data/tests/formdoc1.htm +14 -0
data/tests/formdoc2.htm +15 -0
data/tests/formdoc3.htm +22 -0
data/tests/full_berg.htm +441 -0
data/tests/inputimgdoc1.htm +14 -0
data/tests/langinfodoc2.htm +8 -0
data/tests/nestedtabledoc.htm +205 -0
data/tests/raakt_test.rb +204 -144
metadata +21 -7
data/lib/raakt (kopia).rb +0 -495

data/lib/raakt.rb CHANGED Viewed

@@ -1,101 +1,110 @@
-# :title: Ruby Accessibility Analysis Kit
-# =Ruby Accessibility Analysis Kit
+# == The Ruby Accessibility Analysis Kit (RAAKT)
+# :title: Ruby Accessibility Analysis Kit (RAAKT)
+# Author::    Peter Krantz (http://www.peterkrantz.com/)
+# License::   See LICENSE file
 #
-# See README for a full explanation of this library.
+# RAAKT is a toolkit to find accessibility issues in HTML documents. RAAKT can be used as part of a an automatic test procedure or as a standalone module for mass validation of all pages in a site.
+#
+# The ambition has been to provide tests that can be fully automated. Currently, none of the included tests should fail for any web page.
+#
+# Many of the tests included here map to tests defined in the Unified Web Evaluation Methodology (UWEM[http://www.wabcluster.org/uwem/tests/]). See note for each test to find the corresponding UWEM test.
+#
+# == Output
+# RAAKT output is in the form of an array of Raakt::ErrorMessage objects.
+#
+# == Contributions
+# Thanks to Derek Perrault for refactoring RAAKT to use Hpricot[http://code.whytheluckystiff.net/hpricot/] while at the same time making the code more readable.
+#
+# == Example usage
+# See the examples folder for a small script that shows how to retrieve a remote web page and perform an accessibility test on it.
 module Raakt
-  require 'rubyful_soup'
+  require 'hpricot'
   MESSAGES = {
-    "missingtitle"    => "The title element is missing. Provide a descriptive title for your document.",
-    "emptytitle"      => "The title element is empty. Provide a descriptive title for your document.",
-    "missingalt"      => "Missing alt attribute for image (with src '%s').",
-    "missingheading"  => "Missing first level heading (h1). Provide at least one first level heading describing document content.",
-    "wronghstructure" => "Document heading structure is wrong.",
-    "firsthnoth1"     => "The first heading is not h1.",
-    "hasnestedtables" => "You have one or more nested tables.",
-    "missingsemantics"=> "You have used %s for visual formatting. Use CSS instead.",
-    "hasflicker"      => "You have used <blink> or <marquee>. These may create accessibility issues and should be avoided.",
-    "missinglanginfo" => "Document language information is missing. Use the lang attribute on the html element.",
-    "missingth"       => "Missing table headings (th) for table #%s.",
-    "ambiguouslinktext" => "One or more links have the same link text ('%s'). Make sure each link is unambiguous.",
-    "fieldmissinglabel" => "A field (with id/name '%s') is missing a corresponding label element. Make sure a label exists for all visible fields.",
-    "missingframetitle" => "Missing title attribute for frame with url %s",
-    "hasmetarefresh"  => "Client side redirect (meta refresh) detected. Use server side redirection instead."
+    :missing_title       => "The title element is missing. Provide a descriptive title for your document.",
+    :empty_title         => "The title element is empty. Provide a descriptive title for your document.",
+    :missing_alt         => "Missing alt attribute for image (with src '%s').",
+    :missing_heading     => "Missing first level heading (h1). Provide at least one first level heading describing document content.",
+    :wrong_h_structure   => "Document heading structure is wrong.",
+    :first_h_not_h1      => "The first heading is not h1.",
+    :has_nested_tables   => "You have one or more nested tables.",
+    :missing_semantics   => "You have used %s for visual formatting. Use CSS instead.",
+    :has_flicker         => "You have used <blink> and/or <marquee>. These may create accessibility issues and should be avoided.",
+    :missing_lang_info   => "Document language information is missing. Use the lang attribute on the html element.",
+    :missing_th          => "Missing table headings (th) for table #%s.",
+    :ambiguous_link_text => "One or more links have the same link text ('%s'). Make sure each link is unambiguous.",
+    :field_missing_label => "A field (with id/name '%s') is missing a corresponding label element. Make sure a label exists for all visible fields.",
+    :missing_frame_title => "Missing title attribute for frame with url %s",
+    :has_meta_refresh    => "Client side redirect (meta refresh) detected. Use server side redirection instead.",
+	:charset_mismatch	 => "The character set specified in the HTTP headers does not match that specified in the markup.",
+	:embed_used			 => "You have used the embed element. It does not provide a way to express a text representation.",
+	:wrong_lang_code	 => "You have used a language code ('%s') not recognized in the ISO 639 standard.",
+	:fieldset_missing_legend => "Missing legend element for fieldset #%s.",
+	:missing_input_alt	 => "Missing alt attribute for image button with id/name '%s'.",
+	:missing_input_alt_text	 => "Missing alt text for image button with id/name '%s'.",
+	:missing_area_alt	 => "Missing alt attribute for area with id/name '%s'.",
+	:missing_area_alt_text	 => "Missing alt text for area with id/name '%s'."
   }
-  VERSION = "0.4"
+  VERSION = "0.5"
   class ErrorMessage
     attr_reader :eid, :text, :note
     def initialize(eid, note=nil)
       @eid = eid
       if note
-        @text = MESSAGES[eid].sub(/%s/, note)
+        @text = MESSAGES[@eid].sub(/%s/, note)
       else
-        @text = MESSAGES[eid]
+        @text = MESSAGES[@eid]
       end
       @note = note
     end
     def to_s
-      @eid + ": " + @text
+      "#{@eid}: #{@text}"
     end
-  end
+	# Return single error message as an xml element.
+  	def to_xml
+  		"<message id=\"#{@eid}\">#{@text}</message>"
+  	end
+  end
   class Test
-    attr_accessor :soup, :html, :user_agent
-    def initialize(html=nil)
+    attr_accessor :html, :headers, :user_agent, :ignore_bi
+    def initialize(html=nil, headers=nil)
       @html = html
-      @soup = BeautifulSoup.new(@html) if html
-      @user_agent = "Mozilla/5.0 (RAAKT v#{VERSION}; http://raakt.rubyforge.org; The Ruby Accessibility Analysis Kit)"
-    end
-    def feed(html)
-      @html = html || ""
-      if @html.length > 0
-        @soup = BeautifulSoup.new(@html)
-      else
-        raise "You called feed with no data. There is nothing to check."
-      end
+	  @headers = headers
+      self.doc = @html if html
+	  self.headers = @headers if headers
+	  @ignore_bi = false
     end
-    def feedurl(url)
-      if url.length == 0
-        raise "You called feedurl with a blank url. There is nothing to check."
-      end
-      #Clean the url and make sure protocol and trailing slash is available
-      url = "http://" + url unless url[0..3] == "http"
-      require 'open-uri'
-      open(url, "User-Agent" => @user_agent) { |f|
-        @html = f.read || ""
-      }
-      if @html.length == 0
-        raise "Could not fetch html from the url #{url}. There is nothing to check."
-      else
-        @soup = BeautifulSoup.new(@html)
-      end
+	# Set the HTML used in the test.
+    def doc=(html)
+	  Hpricot.buffer_size = 262144 #Allow for asp.net bastard-sized viewstate attributes...
+      @doc = Hpricot(html)
     end
+    # Set HTML headers to be used in the test. Headers are necessary for some tests (e.g. to check encoding).
+    def headers=(headers)
+		if headers
+      		@headers = downcase_hash_keys(headers)
+		else
+			@headers = nil
+		end
+    end
+	# Call all check methods.
     def all
-      #Call all check methods
       messages = []
       self.methods.each do |method|
@@ -107,69 +116,121 @@ module Raakt
       return messages
     end
+	# Verify that all fieldset elements have a legend child element. See UWEM 1.0 Test 12.3_HTML_01.
+	def check_fieldset_legend
+		messages = []
+		fieldsets = (@doc/"fieldset")
+		fieldset_instance = 1
+		for fieldset in fieldsets
+			if (fieldset/"legend").empty?
+				messages << ErrorMessage.new(:fieldset_missing_legend, fieldset_instance.to_s)
+			end
+			fieldset_instance += 1
+		end
+		messages
+	end
+	# Verify that the embed element isn't used. See UWEM 1.0 Test 1.1_HTML_06.
+	def check_embed
+		return [ErrorMessage.new(:embed_used)] unless (@doc/'embed').empty?
+		[]
+	end
+	# Verify that the charater set specified in HTTP headers match that specidied in the HTML meta element.
+	def check_character_set
+		messages = []
+		header_charset = meta_charset = ""
+		if @headers and @headers.length > 0 then
+			if @headers.has_key?("content-type")
+				header_charset = parse_charset(@headers["content-type"].to_s)
+			end
+			#get meta element charset
+			meta_elements = @doc.search("//meta[@http-equiv]")
+			for element in meta_elements do
+				if element["http-equiv"].downcase == "content-type" then
+					meta_charset = parse_charset(element["content"])
+				end
+			end
+			if header_charset.length > 0 and meta_charset.length > 0
+				unless meta_charset == header_charset
+					messages << ErrorMessage.new(:charset_mismatch)
+				end
+			end
+		end
+		return messages
+	end
+	# Verify that all input type=image elements have an alt attribute.
+	def check_input_type_img
+		#Covers UWEM 1.0 Test 1.1_HTML_01
+		messages = []
+		image_input_buttons = @doc.search("input").select { |element| element['type'] =~ /image/i }
+		image_input_buttons.map { |element|
+			unless element['alt']
+				messages << ErrorMessage.new(:missing_input_alt, element['name'] || element['id'] || "")
+			else
+				if element['alt'].length == 0
+					messages << ErrorMessage.new(:missing_input_alt_text, element['name'] || element['id'] || "")
+				end
+			end
+		}
+		messages
+	end
+	# Verify that all img elements have an alt attribute.
     def check_images
-      #soup = BeautifulSoup.new(html)
-      images = @soup.find_all("img")
-      messages = []
-      for image in images:
-          if image["alt"] == nil:
-            img_src = image["src"] || ""
-            messages << ErrorMessage.new("missingalt", img_src)
-          end
-      end
-      return messages
+      no_alt_images = (@doc/"img:not([@alt])")
+      no_alt_images.map { |img| ErrorMessage.new(:missing_alt, img['src']) }
     end
-    def check_title
-      title = @soup.find("title")
-      messages = []
-      if title
-        titletext = normalize_text(title.string)
-        if titletext.length == 0
-          messages << ErrorMessage.new("emptytitle")
-        end
-      else
-        messages << ErrorMessage.new("missingtitle")
-      end
-      return messages
-    end
+	# Verify that all area elements have a non-empty alt attribute. See UWEM 1.0 Test 1.1_HTML_01 (together with check_images)
+    def check_areas
+		messages = []
+		area_elements = (@doc/"area")
+		area_elements.map { |element|
+			unless element['alt']
+				messages << ErrorMessage.new(:missing_area_alt, element['name'] || element['id'] || "unknown")
+			else
+				if element['alt'].length == 0
+					messages << ErrorMessage.new(:missing_area_alt_text, element['name'] || element['id'] || "unknown")
+				end
+			end
+		}
-    def check_has_heading
-      messages = []
-      if @soup.find_all("h1").length == 0
-        messages << ErrorMessage.new("missingheading")
-      end
-      return messages
+		messages
     end
-    def headings
-      headings = []
-      headings.push(@soup.find_all("h1")) if @soup.find_all("h1").length > 0
-      headings.push(@soup.find_all("h2")) if @soup.find_all("h2").length > 0
-      headings.push(@soup.find_all("h3")) if @soup.find_all("h3").length > 0
-      headings.push(@soup.find_all("h4")) if @soup.find_all("h4").length > 0
-      headings.push(@soup.find_all("h5")) if @soup.find_all("h5").length > 0
-      headings.push(@soup.find_all("h6")) if @soup.find_all("h6").length > 0
-      return headings.flatten
+	# Verify that the document has a non-empty title element.
+    def check_title
+      title = @doc.at('title')
+      return [ErrorMessage.new(:missing_title)] unless title
+      return [ErrorMessage.new(:empty_title)] if normalize_text(title.inner_html).empty?
+      []
     end
-    def level(heading)
-      Integer(heading[1,1])
+	# Verify that the document has at least one h1 element.
+    def check_has_heading
+      return [ErrorMessage.new(:missing_heading)] if (@doc/'h1').empty?
+      []
     end
+	# Verify that heading elements (h1-h6) appear in the correct order (no levels skipped). See UWEM 1.0 Test 3.5_HTML_03.
     def check_document_structure
       messages = []
       currentitem = 0
@@ -178,11 +239,11 @@ module Raakt
       for heading in docheadings
         if currentitem == 0
           if level(heading.name) != 1
-            messages << ErrorMessage.new("firsthnoth1", "h" + heading.name[1,1])
+            messages << ErrorMessage.new(:first_h_not_h1, "h" + heading.name[1,1])
           end
         else
           if level(heading.name) - level(docheadings[currentitem - 1].name) > 1
-            messages << ErrorMessage.new("wronghstructure")
+            messages << ErrorMessage.new(:wrong_h_structure)
             break
           end
         end
@@ -191,122 +252,125 @@ module Raakt
       end
-      return messages
+      messages
     end
+	# Verify that the document does not have any nested tabled. This is indicative of a table-based layout.
     def check_for_nested_tables
       messages = []
-      tables = @soup.find_all("table")
+      tables = (@doc/"table")
       for table in tables
-        if table.find_all("table").length > 0
-          messages << ErrorMessage.new("hasnestedtables")
-          break
+        unless (table/"table").empty?
+          return messages << ErrorMessage.new(:has_nested_tables)
         end
       end
-      return messages
+      messages
     end
+	# Verify that all tables have at least on table header (th) element.
     def check_tables
       messages = []
-      tables = @soup.find_all("table")
-      hasth = false
+      tables = (@doc/"table")
       currenttable = 1
       for table in tables
-        if table.thead
-          if table.thead.tr
-            if table.thead.tr.th
-              hasth = true
-            end
-          end
-        end
-        if table.tr
-          if table.tr.th
-            hasth = true
-          end
-        end
-        unless hasth
-          messages << ErrorMessage.new("missingth", currenttable.to_s)
-        end
+      	hasth = false
+        hasth = true unless (table/">tr>th").empty?
+        hasth = true unless (table/">thead>tr>th").empty?
+        messages << ErrorMessage.new(:missing_th, currenttable.to_s) unless hasth
         currenttable += 1
       end
-      return messages
+      messages
     end
+	# Verify that no formatting elements have been used. See UWEM 1.0 Test 7.2_HTML_01 and Test 7.3_HTML_01.
     def check_for_formatting_elements
-      messages = []
-      formatting_items = @soup.find_all(%w(font b i u tt small big strike s))
-      flicker_items = @soup.find_all(["blink", "marquee"])
-      formatting_items.each do |element|
-        messages << ErrorMessage.new("missingsemantics", "<#{element.name}>")
-      end
+      	messages = []
-      if flicker_items.length > 0
-          messages << ErrorMessage.new("hasflicker")
-      end
-      return messages
+	  	formatting_elements = %w(font b i u tt small big strike s)
+		formatting_elements = %w(font u tt small big strike s) if @ignore_bi
+	    formatting_items = (@doc/formatting_elements.join('|'))
+      	unless formatting_items.empty?
+        	messages << ErrorMessage.new(:missing_semantics, "#{formatting_items.join(', ')}")
+	    end
+	    flicker_elements = %w(blink marquee)
+	    flicker_items = (@doc/flicker_elements.join('|'))
+      	unless flicker_items.empty?
+        	messages << ErrorMessage.new(:has_flicker)
+      	end
+      	messages
     end
+	# Verify that the root documet html element as a lang attribute.
     def check_for_language_info
-      messages = []
-      htmlelement = @soup.find("html")
-      lang = langinfo(htmlelement) || ""
-      unless lang.length > 1
-        messages << ErrorMessage.new("missinglanginfo")
-      end
-      return messages
+      messages = []
+	  unless (@doc/'html[@lang]').empty?
+	  	lang_code = (@doc/"html").first["lang"].to_s
+	  	if lang_code.length < 2
+      		messages << ErrorMessage.new(:missing_lang_info)
+	  	end
+	  else
+      	messages << ErrorMessage.new(:missing_lang_info)
+	  end
+	  messages
     end
+	# Verify that the html element has a valid lang code.
+	def check_valid_language_code
+	  messages = []
+	  unless (@doc/"html[@lang]").empty?
+		#load list of valid language codes
+		iso_lang_codes = []
+		IO.foreach(File.dirname(__FILE__) + "/iso_language_codes.txt") { |code| iso_lang_codes << code.chomp }
+		doc_main_lang_code = (@doc/"html").first["lang"].to_s.downcase
+		unless iso_lang_codes.include?(doc_main_lang_code[0..1])
+			messages << ErrorMessage.new(:wrong_lang_code, doc_main_lang_code)
+		end
+	  end
+	  messages
+	end
+	# Verify that no link texts are ambiguous. A typical example is the presence of multiple "Read more" links.
     def check_link_text
-      messages = []
       links = get_links
-      linktexts = links.collect { |el| el[3] }
-      for link_a in links
-        #compare to other links in collection
-        for link_b in links
-          if link_a[0] != link_b[0]
-            if is_ambiguous_link(link_a, link_b)
-              #add message if not added already for link text
-              unless find_errormsg_with_text(messages, link_a[3])
-                messages << ErrorMessage.new("ambiguouslinktext", link_a[3])
-              end
-            end
-          end
-        end
+      link = links.find do |link|
+        links.find { |cmp_link| is_ambiguous_link(link, cmp_link) }
       end
-      return messages
+      return [] unless link
+      [ErrorMessage.new(:ambiguous_link_text, get_link_text(link))]
     end
+	# Verify that all form fields have a corresponding label element. See UWEM 1.0 Test 12.4_HTML_02.
     def check_form
       messages = []
       labels = get_labels
       fields = get_editable_fields
       #make sure all fields have associated labels
       label_for_ids = []
       for label in labels
         if label["for"]
@@ -320,94 +384,80 @@ module Raakt
         field_id = (field["id"] || "")
         field_identifier = (field["id"] || field["name"] || "unknown")
         if not label_for_ids.include?(field_id)
-          messages << ErrorMessage.new("fieldmissinglabel", field_identifier)
+          messages << ErrorMessage.new(:field_missing_label, field_identifier)
         end
       end
-      return messages
+      messages
     end
+	# Verify that all frame elements have a title atribute.
     def check_frames
-      #Verify frame titles
-      messages = []
-      if is_frameset
-        frames = @soup.find_all("frame")
-        frame_title = ""
+	  # Covers UWEM Test 12.1_HTML_01
+      return [] unless is_frameset
-        for frame in frames
-          frame_title = frame["title"] || ""
-          if normalize_text(frame_title).length == 0
-            messages << ErrorMessage.new("missingframetitle", frame["src"])
-          end
-        end
-      end
-      return messages
+      (@doc/"frame").find_all do |frame|
+        frame_title = frame['title'] || ''
+        normalize_text(frame_title).empty?
+      end.map { |frame| ErrorMessage.new(:missing_frame_title, frame['src']) }
     end
+	# Verify that the document does not use meta-refresh to redirect the user away after a period of time.
     def check_refresh
+      meta_elements = (@doc/'meta')
-      messages = []
-      meta_elements = @soup.find_all("meta")
-      for element in meta_elements
-        if element["http-equiv"] == "refresh"
-          messages << ErrorMessage.new("hasmetarefresh")
-        end
-      end
-      return messages
+      meta_elements.find_all do |element|
+        element["http-equiv"] == "refresh"
+      end.map { ErrorMessage.new(:has_meta_refresh) }
     end
     #Utility methods
-    def is_ambiguous_link(link_a, link_b)
-      #Link A and B are ambiguous if:
-      #1. The url differs
-      #2. The link text is identical
-      #3. The title text is identical (if present)
-      if link_a[1] != link_b[1] and
-         normalize_text(link_a[2]) == normalize_text(link_b[2]) and
-         normalize_text(link_a[3]) == normalize_text(link_b[3]) then
-         return true
+    def headings
+      headings = []
+      1.upto(6) do |i|
+        headings.push((@doc/"h#{i}")) if (@doc/"h#{i}").length > 0
       end
-      return false
+      headings.flatten
     end
+    def level(heading)
+      Integer(heading[1].chr)
+    end
-    def find_errormsg_with_text(messages, text)
-      for errormessage in messages
-        if errormessage.note == text
-          return errormessage
-        end
-      end
+	def downcase_hash_keys(a_hash)
+		downcased_hash = {}
+		a_hash.collect {|key,value| downcased_hash[key.downcase] = value}
+		return downcased_hash
+	end
+	def parse_charset(contenttype)
+		# get charset identifier from content type string
+		if contenttype=~/charset=(.*)\w?/ then
+			return $1.downcase.strip
+		end
+		return ""
+	end
+    def is_ambiguous_link(link_a, link_b)
+      return false if links_point_to_same_resource?(link_a, link_b)
+      return true if link_text_identical?(link_a, link_b) &&
+                     link_title_identical?(link_a, link_b)
-      return nil
+      false
     end
     def get_links
-      linkelements = @soup.find_all("a")
-      links = []
-      currentlink = 0
-      for element in linkelements
-        title = normalize_text((element['title'] || "").strip)
-        linktext = normalize_text((elements_to_text(element) || "").strip)
-        url = element['href']
-        links << [currentlink, url, title, linktext]
-        currentlink += 1
-      end
-      return links
+      (@doc/'a')
     end
     def langinfo(element)
       langval = ""
@@ -423,39 +473,33 @@ module Raakt
     end
-    def img_to_text(imgtag)
-      return (imgtag['alt'] || "")
+    def alt_to_text(element)
+		if element.kind_of?(Hpricot::Elem) then
+      		element.has_attribute?("alt") ? element['alt'] : ""
+		else
+			""
+		end
     end
     def elements_to_text(element)
-      retval = ""
-      for el in element.contents
-        if el.class.to_s == 'NavigableString'
-          retval += el
-        else
-          if el.name == "img"
-            retval += img_to_text(el)
-          else
-            retval += elements_to_text(el)
-          end
-        end
+      str = ''
+      element.traverse_all_element do |elem|
+        elem.kind_of?(Hpricot::Text) ? str += "#{elem}" : str += alt_to_text(elem)
       end
-      return retval
+      str
     end
     def normalize_text(text)
-      text = (text || "")
-      retval = text.gsub(/&nbsp;/, " ")
-      retval = retval.gsub(/&#160;/, " ")
-      retval = retval.gsub(/\n/, "")
-      retval = retval.gsub(/\r/, "")
-      retval = retval.gsub(/\t/, "")
+      text ||= ''
+      retval = text.gsub(/&nbsp;/, ' ')
+      retval = retval.gsub(/&#160;/, ' ')
+      retval = retval.gsub(/\n/, '')
+      retval = retval.gsub(/\r/, '')
+      retval = retval.gsub(/\t/, '')
       while /  /.match(retval) do
-        retval = retval.gsub(/  /, " ")
+        retval = retval.gsub(/  /, ' ')
       end
       retval = retval.strip
@@ -465,12 +509,12 @@ module Raakt
     def get_labels
-      return @soup.find_all("label")
+      @doc/'label'
     end
     def get_editable_fields
-      allfields = @soup.find_all(["textarea", "select", "input"])
+      allfields = (@doc/"textarea|select|input")
       fields = []
       field_type = ""
@@ -487,9 +531,37 @@ module Raakt
     def is_frameset
-      return (@soup.find("frameset") != nil)
+      (@doc/"frameset").length > 0
     end
+    def link_text_identical?(link_a, link_b)
+      get_link_text(link_a) == get_link_text(link_b)
+    end
+    def link_title_identical?(link_a, link_b)
+      get_link_title(link_a) == get_link_title(link_b)
+    end
+    def links_point_to_same_resource?(link_a, link_b)
+      (link_a == link_b) ||
+      (get_link_url(link_a) == get_link_url(link_b))
+    end
+    def get_link_text(link)
+      text = (elements_to_text(link) || '').strip
+      normalize_text(text)
+    end
+    def get_link_url(link)
+      link['href']
+    end
+    def get_link_title(link)
+      text = (link['title'] || '').strip
+      normalize_text(text)
+    end
   end
-end
+end