RubyGems - diamond-mechanize - Versions diffs - 2.1 - Mend

diamond-mechanize 2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (154) hide show

data/CHANGELOG.rdoc +718 -0
data/EXAMPLES.rdoc +187 -0
data/FAQ.rdoc +11 -0
data/GUIDE.rdoc +163 -0
data/LICENSE.rdoc +20 -0
data/Manifest.txt +159 -0
data/README.rdoc +64 -0
data/Rakefile +49 -0
data/lib/mechanize.rb +1079 -0
data/lib/mechanize/content_type_error.rb +13 -0
data/lib/mechanize/cookie.rb +232 -0
data/lib/mechanize/cookie_jar.rb +194 -0
data/lib/mechanize/download.rb +59 -0
data/lib/mechanize/element_matcher.rb +36 -0
data/lib/mechanize/file.rb +65 -0
data/lib/mechanize/file_connection.rb +17 -0
data/lib/mechanize/file_request.rb +26 -0
data/lib/mechanize/file_response.rb +74 -0
data/lib/mechanize/file_saver.rb +39 -0
data/lib/mechanize/form.rb +543 -0
data/lib/mechanize/form/button.rb +6 -0
data/lib/mechanize/form/check_box.rb +12 -0
data/lib/mechanize/form/field.rb +54 -0
data/lib/mechanize/form/file_upload.rb +21 -0
data/lib/mechanize/form/hidden.rb +3 -0
data/lib/mechanize/form/image_button.rb +19 -0
data/lib/mechanize/form/keygen.rb +34 -0
data/lib/mechanize/form/multi_select_list.rb +94 -0
data/lib/mechanize/form/option.rb +50 -0
data/lib/mechanize/form/radio_button.rb +55 -0
data/lib/mechanize/form/reset.rb +3 -0
data/lib/mechanize/form/select_list.rb +44 -0
data/lib/mechanize/form/submit.rb +3 -0
data/lib/mechanize/form/text.rb +3 -0
data/lib/mechanize/form/textarea.rb +3 -0
data/lib/mechanize/headers.rb +23 -0
data/lib/mechanize/history.rb +82 -0
data/lib/mechanize/http.rb +8 -0
data/lib/mechanize/http/agent.rb +1004 -0
data/lib/mechanize/http/auth_challenge.rb +59 -0
data/lib/mechanize/http/auth_realm.rb +31 -0
data/lib/mechanize/http/content_disposition_parser.rb +188 -0
data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
data/lib/mechanize/monkey_patch.rb +16 -0
data/lib/mechanize/page.rb +440 -0
data/lib/mechanize/page/base.rb +7 -0
data/lib/mechanize/page/frame.rb +27 -0
data/lib/mechanize/page/image.rb +30 -0
data/lib/mechanize/page/label.rb +20 -0
data/lib/mechanize/page/link.rb +98 -0
data/lib/mechanize/page/meta_refresh.rb +68 -0
data/lib/mechanize/parser.rb +173 -0
data/lib/mechanize/pluggable_parsers.rb +144 -0
data/lib/mechanize/redirect_limit_reached_error.rb +19 -0
data/lib/mechanize/redirect_not_get_or_head_error.rb +21 -0
data/lib/mechanize/response_code_error.rb +21 -0
data/lib/mechanize/response_read_error.rb +27 -0
data/lib/mechanize/robots_disallowed_error.rb +28 -0
data/lib/mechanize/test_case.rb +663 -0
data/lib/mechanize/unauthorized_error.rb +3 -0
data/lib/mechanize/unsupported_scheme_error.rb +6 -0
data/lib/mechanize/util.rb +101 -0
data/test/data/htpasswd +1 -0
data/test/data/server.crt +16 -0
data/test/data/server.csr +12 -0
data/test/data/server.key +15 -0
data/test/data/server.pem +15 -0
data/test/htdocs/alt_text.html +10 -0
data/test/htdocs/bad_form_test.html +9 -0
data/test/htdocs/button.jpg +0 -0
data/test/htdocs/canonical_uri.html +9 -0
data/test/htdocs/dir with spaces/foo.html +1 -0
data/test/htdocs/empty_form.html +6 -0
data/test/htdocs/file_upload.html +26 -0
data/test/htdocs/find_link.html +41 -0
data/test/htdocs/form_multi_select.html +16 -0
data/test/htdocs/form_multival.html +37 -0
data/test/htdocs/form_no_action.html +18 -0
data/test/htdocs/form_no_input_name.html +16 -0
data/test/htdocs/form_order_test.html +11 -0
data/test/htdocs/form_select.html +16 -0
data/test/htdocs/form_set_fields.html +14 -0
data/test/htdocs/form_test.html +188 -0
data/test/htdocs/frame_referer_test.html +10 -0
data/test/htdocs/frame_test.html +30 -0
data/test/htdocs/google.html +13 -0
data/test/htdocs/index.html +6 -0
data/test/htdocs/link with space.html +5 -0
data/test/htdocs/meta_cookie.html +11 -0
data/test/htdocs/no_title_test.html +6 -0
data/test/htdocs/noindex.html +9 -0
data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
data/test/htdocs/relative/tc_relative_links.html +21 -0
data/test/htdocs/robots.html +8 -0
data/test/htdocs/robots.txt +2 -0
data/test/htdocs/tc_bad_charset.html +9 -0
data/test/htdocs/tc_bad_links.html +5 -0
data/test/htdocs/tc_base_link.html +8 -0
data/test/htdocs/tc_blank_form.html +11 -0
data/test/htdocs/tc_charset.html +6 -0
data/test/htdocs/tc_checkboxes.html +19 -0
data/test/htdocs/tc_encoded_links.html +5 -0
data/test/htdocs/tc_field_precedence.html +11 -0
data/test/htdocs/tc_follow_meta.html +8 -0
data/test/htdocs/tc_form_action.html +48 -0
data/test/htdocs/tc_links.html +19 -0
data/test/htdocs/tc_meta_in_body.html +9 -0
data/test/htdocs/tc_pretty_print.html +17 -0
data/test/htdocs/tc_referer.html +16 -0
data/test/htdocs/tc_relative_links.html +19 -0
data/test/htdocs/tc_textarea.html +23 -0
data/test/htdocs/test_click.html +11 -0
data/test/htdocs/unusual______.html +5 -0
data/test/test_mechanize.rb +1164 -0
data/test/test_mechanize_cookie.rb +451 -0
data/test/test_mechanize_cookie_jar.rb +483 -0
data/test/test_mechanize_download.rb +43 -0
data/test/test_mechanize_file.rb +61 -0
data/test/test_mechanize_file_connection.rb +21 -0
data/test/test_mechanize_file_request.rb +19 -0
data/test/test_mechanize_file_saver.rb +21 -0
data/test/test_mechanize_form.rb +875 -0
data/test/test_mechanize_form_check_box.rb +38 -0
data/test/test_mechanize_form_encoding.rb +114 -0
data/test/test_mechanize_form_field.rb +63 -0
data/test/test_mechanize_form_file_upload.rb +20 -0
data/test/test_mechanize_form_image_button.rb +12 -0
data/test/test_mechanize_form_keygen.rb +32 -0
data/test/test_mechanize_form_multi_select_list.rb +84 -0
data/test/test_mechanize_form_option.rb +55 -0
data/test/test_mechanize_form_radio_button.rb +78 -0
data/test/test_mechanize_form_select_list.rb +76 -0
data/test/test_mechanize_form_textarea.rb +52 -0
data/test/test_mechanize_headers.rb +35 -0
data/test/test_mechanize_history.rb +103 -0
data/test/test_mechanize_http_agent.rb +1225 -0
data/test/test_mechanize_http_auth_challenge.rb +39 -0
data/test/test_mechanize_http_auth_realm.rb +49 -0
data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
data/test/test_mechanize_link.rb +80 -0
data/test/test_mechanize_page.rb +118 -0
data/test/test_mechanize_page_encoding.rb +182 -0
data/test/test_mechanize_page_frame.rb +16 -0
data/test/test_mechanize_page_link.rb +390 -0
data/test/test_mechanize_page_meta_refresh.rb +127 -0
data/test/test_mechanize_parser.rb +289 -0
data/test/test_mechanize_pluggable_parser.rb +52 -0
data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
data/test/test_mechanize_redirect_not_get_or_head_error.rb +14 -0
data/test/test_mechanize_subclass.rb +22 -0
data/test/test_mechanize_util.rb +103 -0
data/test/test_multi_select.rb +119 -0
metadata +216 -0

data/lib/mechanize/page/image.rb ADDED

@@ -0,0 +1,30 @@
+##
+# An image element on an HTML page
+class Mechanize::Page::Image
+  attr_reader :node
+  attr_reader :page
+  def initialize(node, page)
+    @node = node
+    @page = page
+  end
+  def src
+    @node['src']
+  end
+  def url
+    case src
+    when %r{^https?://}
+      src
+    else
+      if page.bases[0]
+        (page.bases[0].href + src).to_s
+      else
+        (page.uri + src).to_s
+      end
+    end
+  end
+end

data/lib/mechanize/page/label.rb ADDED

@@ -0,0 +1,20 @@
+##
+# A form label on an HTML page
+class Mechanize::Page::Label
+  attr_reader :node
+  attr_reader :text
+  attr_reader :page
+  alias :to_s :text
+  def initialize(node, page)
+    @node = node
+    @text = node.inner_text
+    @page = page
+  end
+  def for
+    (id = @node['for']) && page.search("##{id}") || nil
+  end
+end

data/lib/mechanize/page/link.rb ADDED

@@ -0,0 +1,98 @@
+##
+# This class encapsulates links.  It contains the text and the URI for
+# 'a' tags parsed out of an HTML page.  If the link contains an image,
+# the alt text will be used for that image.
+#
+# For example, the text for the following links with both be 'Hello World':
+#
+#   <a href="http://example">Hello World</a>
+#   <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
+class Mechanize::Page::Link
+  attr_reader :node
+  attr_reader :href
+  attr_reader :attributes
+  attr_reader :page
+  alias :referer :page
+  def initialize(node, mech, page)
+    @node       = node
+    @attributes = node
+    @href       = node['href']
+    @mech       = mech
+    @page       = page
+    @text       = nil
+    @uri        = nil
+  end
+  # Click on this link
+  def click
+    @mech.click self
+  end
+  # This method is a shorthand to get link's DOM id.
+  # Common usage:
+  #   page.link_with(:dom_id => "links_exact_id")
+  def dom_id
+    node['id']
+  end
+  # This method is a shorthand to get a link's DOM class
+  # Common usage:
+  #   page.link_with(:dom_class => "links_exact_class")
+  def dom_class
+    node['class']
+  end
+  def pretty_print(q) # :nodoc:
+    q.object_group(self) {
+      q.breakable; q.pp text
+      q.breakable; q.pp href
+    }
+  end
+  alias inspect pretty_inspect # :nodoc:
+  # A list of words in the rel attribute, all lower-cased.
+  def rel
+    @rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : []
+  end
+  # Test if the rel attribute includes +kind+.
+  def rel? kind
+    rel.include? kind
+  end
+  # The text content of this link
+  def text
+    return @text if @text
+    @text = @node.inner_text
+    # If there is no text, try to find an image and use it's alt text
+    if (@text.nil? or @text.empty?) and imgs = @node.search('img') then
+      @text = imgs.map do |e|
+        e['alt']
+      end.join
+    end
+    @text
+  end
+  alias :to_s :text
+  # A URI for the #href for this link.  The link is first parsed as a raw
+  # link.  If that fails parsing an escaped link is attepmted.
+  def uri
+    @uri ||= if @href then
+               begin
+                 URI.parse @href
+               rescue URI::InvalidURIError
+                 URI.parse WEBrick::HTTPUtils.escape @href
+               end
+             end
+  end
+end

data/lib/mechanize/page/meta_refresh.rb ADDED

@@ -0,0 +1,68 @@
+##
+# This class encapsulates a meta element with a refresh http-equiv.  Mechanize
+# treats meta refresh elements just like 'a' tags.  MetaRefresh objects will
+# contain links, but most likely will have no text.
+class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
+  ##
+  # Time to wait before next refresh
+  attr_reader :delay
+  ##
+  # This MetaRefresh links did not contain a url= in the content attribute and
+  # links to itself.
+  attr_reader :link_self
+  ##
+  # Matches the content attribute of a meta refresh element.  After the match:
+  #
+  #   $1:: delay
+  #   $3:: url
+  CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
+  ##
+  # Parses the delay and url from the content attribute of a meta refresh
+  # element.  Parse requires the uri of the current page to infer a url when
+  # no url is specified.
+  #
+  # Returns an array of [delay, url]. (both in string)
+  #
+  # Returns nil if the delay and url cannot be parsed.
+  def self.parse content, base_uri
+    return unless content =~ CONTENT_REGEXP
+    link_self = $3.nil? || $3.empty?
+    delay, refresh_uri = $1, $3
+    dest = base_uri
+    dest += refresh_uri if refresh_uri
+    return delay, dest, link_self
+  end
+  def self.from_node node, page, uri
+    http_equiv = node['http-equiv']
+    return unless http_equiv and http_equiv.downcase == 'refresh'
+    delay, uri, link_self = parse node['content'], uri
+    return unless delay
+    new node, page, delay, uri.to_s, link_self
+  end
+  def initialize node, page, delay, href, link_self = false
+    super node, page.mech, page
+    @delay     = delay =~ /\./ ? delay.to_f : delay.to_i
+    @href      = href
+    @link_self = link_self
+  end
+end

data/lib/mechanize/parser.rb ADDED

@@ -0,0 +1,173 @@
+##
+# The parser module provides standard methods for accessing the headers and
+# content of a response that are shared across pluggable parsers.
+module Mechanize::Parser
+  extend Forwardable
+  special_filenames = Regexp.union %w[
+    AUX
+    COM1
+    COM2
+    COM3
+    COM4
+    COM5
+    COM6
+    COM7
+    COM8
+    COM9
+    CON
+    LPT1
+    LPT2
+    LPT3
+    LPT4
+    LPT5
+    LPT6
+    LPT7
+    LPT8
+    LPT9
+    NUL
+    PRN
+  ]
+  ##
+  # Special filenames that must be escaped
+  SPECIAL_FILENAMES = /\A#{special_filenames}/i
+  ##
+  # The URI this file was retrieved from
+  attr_accessor :uri
+  ##
+  # The Mechanize::Headers for this file
+  attr_accessor :response
+  alias header response
+  ##
+  # The HTTP response code
+  attr_accessor :code
+  ##
+  # :method: [](header)
+  #
+  # Access HTTP +header+ by name
+  def_delegator :header, :[], :[]
+  ##
+  # :method: []=(header, value)
+  #
+  # Set HTTP +header+ to +value+
+  def_delegator :header, :[]=, :[]=
+  ##
+  # :method: key?(header)
+  #
+  # Is the named +header+ present?
+  def_delegator :header, :key?, :key?
+  ##
+  # :method: each
+  #
+  # Enumerate HTTP headers
+  def_delegator :header, :each, :each
+  ##
+  # :method: each
+  #
+  # Enumerate HTTP headers in capitalized (canonical) form
+  def_delegator :header, :canonical_each, :canonical_each
+  ##
+  # Extracts the filename from a Content-Disposition header in the #response
+  # or from the URI.  If +full_path+ is true the filename will include the
+  # host name and path to the resource, otherwise a filename in the current
+  # directory is given.
+  def extract_filename full_path = @full_path
+    handled = false
+    if @uri then
+      uri = @uri
+      uri += 'index.html' if uri.path.end_with? '/'
+      path     = uri.path.split(/\//)
+      filename = path.pop || 'index.html'
+    else
+      path     = []
+      filename = 'index.html'
+    end
+    # Set the filename
+    if disposition = @response['content-disposition'] then
+      content_disposition =
+        Mechanize::HTTP::ContentDispositionParser.parse disposition
+      if content_disposition then
+        filename = content_disposition.filename
+        filename = filename.split(/[\\\/]/).last
+        handled = true
+      end
+    end
+    if not handled and @uri then
+      filename << '.html' unless filename =~ /\./
+      filename << "?#{@uri.query}" if @uri.query
+    end
+    if SPECIAL_FILENAMES =~ filename then
+      filename = "_#{filename}"
+    end
+    filename = filename.tr "\x00-\x20<>:\"/\\|?*", '_'
+    @filename = if full_path then
+                  File.join @uri.host, path, filename
+                else
+                  filename
+                end
+  end
+  ##
+  # Creates a Mechanize::Header from the Net::HTTPResponse +response+.
+  #
+  # This allows the Net::HTTPResponse to be garbage collected sooner.
+  def fill_header response
+    @response = Mechanize::Headers.new
+    response.each { |k,v|
+      @response[k] = v
+    } if response
+    @response
+  end
+  ##
+  # Finds a free filename based on +filename+, but is not race-free
+  def find_free_name filename
+    filename = @filename unless filename
+    number = 1
+    while File.exist? filename do
+      filename = "#{@filename}.#{number}"
+      number += 1
+    end
+    filename
+  end
+end

data/lib/mechanize/pluggable_parsers.rb ADDED

@@ -0,0 +1,144 @@
+require 'mechanize/file'
+require 'mechanize/file_saver'
+require 'mechanize/page'
+##
+# This class is used to register and maintain pluggable parsers for Mechanize
+# to use.
+#
+# Mechanize allows different parsers for different content types.  Mechanize
+# uses PluggableParser to determine which parser to use for any content type.
+# To use your own pluggable parser or to change the default pluggable parsers,
+# register them with this class.
+#
+# The default parser for unregistered content types is Mechanize::File.
+#
+# The module Mechanize::Parser provides basic functionality for any content
+# type, so you may use it in custom parsers you write.  For small files you
+# wish to perform in-memory operations on, you should subclass
+# Mechanize::File.  For large files you should subclass Mechanize::Download as
+# the content is only loaded into memory in small chunks.
+#
+# == Example
+#
+# To create your own parser, just create a class that takes four parameters in
+# the constructor.  Here is an example of registering a pluggable parser that
+# handles CSV files:
+#
+#   require 'csv'
+#
+#   class CSVParser < Mechanize::File
+#     attr_reader :csv
+#
+#     def initialize uri = nil, response = nil, body = nil, code = nil
+#       super uri, response, body, code
+#       @csv = CSV.parse body
+#     end
+#   end
+#
+#   agent = Mechanize.new
+#   agent.pluggable_parser.csv = CSVParser
+#   agent.get('http://example.com/test.csv')  # => CSVParser
+#
+# Now any response with a content type of 'text/csv' will initialize a
+# CSVParser and return that object to the caller.
+#
+# To register a pluggable parser for a content type that pluggable parser does
+# not know about, use the hash syntax:
+#
+#   agent.pluggable_parser['text/something'] = SomeClass
+#
+# To set the default parser, use #default:
+#
+#   agent.pluggable_parser.default = Mechanize::Download
+#
+# Now all unknown content types will be saved to disk and not loaded into
+# memory.
+class Mechanize::PluggableParser
+  CONTENT_TYPES = {
+    :html  => 'text/html',
+    :wap   => 'application/vnd.wap.xhtml+xml',
+    :xhtml => 'application/xhtml+xml',
+    :pdf   => 'application/pdf',
+    :csv   => 'text/csv',
+    :xml   => 'text/xml',
+  }
+  attr_accessor :default
+  def initialize
+    @parsers = {
+      CONTENT_TYPES[:html]  => Mechanize::Page,
+      CONTENT_TYPES[:xhtml] => Mechanize::Page,
+      CONTENT_TYPES[:wap]   => Mechanize::Page,
+    }
+    @default = Mechanize::File
+  end
+  ##
+  # Returns the parser registered for the given +content_type+
+  def parser(content_type)
+    content_type.nil? ? default : @parsers[content_type] || default
+  end
+  def register_parser(content_type, klass) # :nodoc:
+    @parsers[content_type] = klass
+  end
+  ##
+  # Registers +klass+ as the parser for text/html and application/xhtml+xml
+  # content
+  def html=(klass)
+    register_parser(CONTENT_TYPES[:html], klass)
+    register_parser(CONTENT_TYPES[:xhtml], klass)
+  end
+  ##
+  # Registers +klass+ as the parser for application/xhtml+xml content
+  def xhtml=(klass)
+    register_parser(CONTENT_TYPES[:xhtml], klass)
+  end
+  ##
+  # Registers +klass+ as the parser for application/pdf content
+  def pdf=(klass)
+    register_parser(CONTENT_TYPES[:pdf], klass)
+  end
+  ##
+  # Registers +klass+ as the parser for text/csv content
+  def csv=(klass)
+    register_parser(CONTENT_TYPES[:csv], klass)
+  end
+  ##
+  # Registers +klass+ as the parser for text/xml content
+  def xml=(klass)
+    register_parser(CONTENT_TYPES[:xml], klass)
+  end
+  ##
+  # Retrieves the parser for +content_type+ content
+  def [](content_type)
+    @parsers[content_type]
+  end
+  ##
+  # Sets the parser for +content_type+ content to +klass+
+  def []=(content_type, klass)
+    @parsers[content_type] = klass
+  end
+end