RubyGems - diamond-mechanize - Versions diffs - 2.2 → 2.3 - Mend

diamond-mechanize 2.2 → 2.3

Files changed (147) hide show

data/Rakefile +49 -0
data/lib/mechanize.rb +1079 -0
data/lib/mechanize/content_type_error.rb +13 -0
data/lib/mechanize/cookie.rb +232 -0
data/lib/mechanize/cookie_jar.rb +194 -0
data/lib/mechanize/download.rb +59 -0
data/lib/mechanize/element_matcher.rb +36 -0
data/lib/mechanize/file.rb +65 -0
data/lib/mechanize/file_connection.rb +17 -0
data/lib/mechanize/file_request.rb +26 -0
data/lib/mechanize/file_response.rb +74 -0
data/lib/mechanize/file_saver.rb +39 -0
data/lib/mechanize/form.rb +543 -0
data/lib/mechanize/form/button.rb +6 -0
data/lib/mechanize/form/check_box.rb +12 -0
data/lib/mechanize/form/field.rb +54 -0
data/lib/mechanize/form/file_upload.rb +21 -0
data/lib/mechanize/form/hidden.rb +3 -0
data/lib/mechanize/form/image_button.rb +19 -0
data/lib/mechanize/form/keygen.rb +34 -0
data/lib/mechanize/form/multi_select_list.rb +94 -0
data/lib/mechanize/form/option.rb +50 -0
data/lib/mechanize/form/radio_button.rb +55 -0
data/lib/mechanize/form/reset.rb +3 -0
data/lib/mechanize/form/select_list.rb +44 -0
data/lib/mechanize/form/submit.rb +3 -0
data/lib/mechanize/form/text.rb +3 -0
data/lib/mechanize/form/textarea.rb +3 -0
data/lib/mechanize/headers.rb +23 -0
data/lib/mechanize/history.rb +82 -0
data/lib/mechanize/http.rb +8 -0
data/lib/mechanize/http/agent.rb +1004 -0
data/lib/mechanize/http/auth_challenge.rb +59 -0
data/lib/mechanize/http/auth_realm.rb +31 -0
data/lib/mechanize/http/content_disposition_parser.rb +188 -0
data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
data/lib/mechanize/monkey_patch.rb +16 -0
data/lib/mechanize/page.rb +440 -0
data/lib/mechanize/page/base.rb +7 -0
data/lib/mechanize/page/frame.rb +27 -0
data/lib/mechanize/page/image.rb +30 -0
data/lib/mechanize/page/label.rb +20 -0
data/lib/mechanize/page/link.rb +98 -0
data/lib/mechanize/page/meta_refresh.rb +68 -0
data/lib/mechanize/parser.rb +173 -0
data/lib/mechanize/pluggable_parsers.rb +144 -0
data/lib/mechanize/redirect_limit_reached_error.rb +19 -0
data/lib/mechanize/redirect_not_get_or_head_error.rb +21 -0
data/lib/mechanize/response_code_error.rb +21 -0
data/lib/mechanize/response_read_error.rb +27 -0
data/lib/mechanize/robots_disallowed_error.rb +28 -0
data/lib/mechanize/test_case.rb +663 -0
data/lib/mechanize/unauthorized_error.rb +3 -0
data/lib/mechanize/unsupported_scheme_error.rb +6 -0
data/lib/mechanize/util.rb +101 -0
data/test/data/htpasswd +1 -0
data/test/data/server.crt +16 -0
data/test/data/server.csr +12 -0
data/test/data/server.key +15 -0
data/test/data/server.pem +15 -0
data/test/htdocs/alt_text.html +10 -0
data/test/htdocs/bad_form_test.html +9 -0
data/test/htdocs/button.jpg +0 -0
data/test/htdocs/canonical_uri.html +9 -0
data/test/htdocs/dir with spaces/foo.html +1 -0
data/test/htdocs/empty_form.html +6 -0
data/test/htdocs/file_upload.html +26 -0
data/test/htdocs/find_link.html +41 -0
data/test/htdocs/form_multi_select.html +16 -0
data/test/htdocs/form_multival.html +37 -0
data/test/htdocs/form_no_action.html +18 -0
data/test/htdocs/form_no_input_name.html +16 -0
data/test/htdocs/form_order_test.html +11 -0
data/test/htdocs/form_select.html +16 -0
data/test/htdocs/form_set_fields.html +14 -0
data/test/htdocs/form_test.html +188 -0
data/test/htdocs/frame_referer_test.html +10 -0
data/test/htdocs/frame_test.html +30 -0
data/test/htdocs/google.html +13 -0
data/test/htdocs/index.html +6 -0
data/test/htdocs/link with space.html +5 -0
data/test/htdocs/meta_cookie.html +11 -0
data/test/htdocs/no_title_test.html +6 -0
data/test/htdocs/noindex.html +9 -0
data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
data/test/htdocs/relative/tc_relative_links.html +21 -0
data/test/htdocs/robots.html +8 -0
data/test/htdocs/robots.txt +2 -0
data/test/htdocs/tc_bad_charset.html +9 -0
data/test/htdocs/tc_bad_links.html +5 -0
data/test/htdocs/tc_base_link.html +8 -0
data/test/htdocs/tc_blank_form.html +11 -0
data/test/htdocs/tc_charset.html +6 -0
data/test/htdocs/tc_checkboxes.html +19 -0
data/test/htdocs/tc_encoded_links.html +5 -0
data/test/htdocs/tc_field_precedence.html +11 -0
data/test/htdocs/tc_follow_meta.html +8 -0
data/test/htdocs/tc_form_action.html +48 -0
data/test/htdocs/tc_links.html +19 -0
data/test/htdocs/tc_meta_in_body.html +9 -0
data/test/htdocs/tc_pretty_print.html +17 -0
data/test/htdocs/tc_referer.html +16 -0
data/test/htdocs/tc_relative_links.html +19 -0
data/test/htdocs/tc_textarea.html +23 -0
data/test/htdocs/test_click.html +11 -0
data/test/htdocs/unusual______.html +5 -0
data/test/test_mechanize.rb +1164 -0
data/test/test_mechanize_cookie.rb +451 -0
data/test/test_mechanize_cookie_jar.rb +483 -0
data/test/test_mechanize_download.rb +43 -0
data/test/test_mechanize_file.rb +61 -0
data/test/test_mechanize_file_connection.rb +21 -0
data/test/test_mechanize_file_request.rb +19 -0
data/test/test_mechanize_file_saver.rb +21 -0
data/test/test_mechanize_form.rb +875 -0
data/test/test_mechanize_form_check_box.rb +38 -0
data/test/test_mechanize_form_encoding.rb +114 -0
data/test/test_mechanize_form_field.rb +63 -0
data/test/test_mechanize_form_file_upload.rb +20 -0
data/test/test_mechanize_form_image_button.rb +12 -0
data/test/test_mechanize_form_keygen.rb +32 -0
data/test/test_mechanize_form_multi_select_list.rb +84 -0
data/test/test_mechanize_form_option.rb +55 -0
data/test/test_mechanize_form_radio_button.rb +78 -0
data/test/test_mechanize_form_select_list.rb +76 -0
data/test/test_mechanize_form_textarea.rb +52 -0
data/test/test_mechanize_headers.rb +35 -0
data/test/test_mechanize_history.rb +103 -0
data/test/test_mechanize_http_agent.rb +1225 -0
data/test/test_mechanize_http_auth_challenge.rb +39 -0
data/test/test_mechanize_http_auth_realm.rb +49 -0
data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
data/test/test_mechanize_link.rb +80 -0
data/test/test_mechanize_page.rb +118 -0
data/test/test_mechanize_page_encoding.rb +182 -0
data/test/test_mechanize_page_frame.rb +16 -0
data/test/test_mechanize_page_link.rb +390 -0
data/test/test_mechanize_page_meta_refresh.rb +127 -0
data/test/test_mechanize_parser.rb +289 -0
data/test/test_mechanize_pluggable_parser.rb +52 -0
data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
data/test/test_mechanize_redirect_not_get_or_head_error.rb +14 -0
data/test/test_mechanize_subclass.rb +22 -0
data/test/test_mechanize_util.rb +103 -0
data/test/test_multi_select.rb +119 -0
metadata +148 -71

@@ -0,0 +1,30 @@
+##
+# An image element on an HTML page
+class Mechanize::Page::Image
+  attr_reader :node
+  attr_reader :page
+  def initialize(node, page)
+    @node = node
+    @page = page
+  end
+  def src
+    @node['src']
+  end
+  def url
+    case src
+    when %r{^https?://}
+      src
+    else
+      if page.bases[0]
+        (page.bases[0].href + src).to_s
+      else
+        (page.uri + src).to_s
+      end
+    end
+  end
+end

data/lib/mechanize/page/label.rb ADDED

@@ -0,0 +1,20 @@
+##
+# A form label on an HTML page
+class Mechanize::Page::Label
+  attr_reader :node
+  attr_reader :text
+  attr_reader :page
+  alias :to_s :text
+  def initialize(node, page)
+    @node = node
+    @text = node.inner_text
+    @page = page
+  end
+  def for
+    (id = @node['for']) && page.search("##{id}") || nil
+  end
+end

data/lib/mechanize/page/link.rb ADDED

@@ -0,0 +1,98 @@
+##
+# This class encapsulates links.  It contains the text and the URI for
+# 'a' tags parsed out of an HTML page.  If the link contains an image,
+# the alt text will be used for that image.
+#
+# For example, the text for the following links with both be 'Hello World':
+#
+#   <a href="http://example">Hello World</a>
+#   <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
+class Mechanize::Page::Link
+  attr_reader :node
+  attr_reader :href
+  attr_reader :attributes
+  attr_reader :page
+  alias :referer :page
+  def initialize(node, mech, page)
+    @node       = node
+    @attributes = node
+    @href       = node['href']
+    @mech       = mech
+    @page       = page
+    @text       = nil
+    @uri        = nil
+  end
+  # Click on this link
+  def click
+    @mech.click self
+  end
+  # This method is a shorthand to get link's DOM id.
+  # Common usage:
+  #   page.link_with(:dom_id => "links_exact_id")
+  def dom_id
+    node['id']
+  end
+  # This method is a shorthand to get a link's DOM class
+  # Common usage:
+  #   page.link_with(:dom_class => "links_exact_class")
+  def dom_class
+    node['class']
+  end
+  def pretty_print(q) # :nodoc:
+    q.object_group(self) {
+      q.breakable; q.pp text
+      q.breakable; q.pp href
+    }
+  end
+  alias inspect pretty_inspect # :nodoc:
+  # A list of words in the rel attribute, all lower-cased.
+  def rel
+    @rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : []
+  end
+  # Test if the rel attribute includes +kind+.
+  def rel? kind
+    rel.include? kind
+  end
+  # The text content of this link
+  def text
+    return @text if @text
+    @text = @node.inner_text
+    # If there is no text, try to find an image and use it's alt text
+    if (@text.nil? or @text.empty?) and imgs = @node.search('img') then
+      @text = imgs.map do |e|
+        e['alt']
+      end.join
+    end
+    @text
+  end
+  alias :to_s :text
+  # A URI for the #href for this link.  The link is first parsed as a raw
+  # link.  If that fails parsing an escaped link is attepmted.
+  def uri
+    @uri ||= if @href then
+               begin
+                 URI.parse @href
+               rescue URI::InvalidURIError
+                 URI.parse WEBrick::HTTPUtils.escape @href
+               end
+             end
+  end
+end

data/lib/mechanize/page/meta_refresh.rb ADDED

@@ -0,0 +1,68 @@
+##
+# This class encapsulates a meta element with a refresh http-equiv.  Mechanize
+# treats meta refresh elements just like 'a' tags.  MetaRefresh objects will
+# contain links, but most likely will have no text.
+class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
+  ##
+  # Time to wait before next refresh
+  attr_reader :delay
+  ##
+  # This MetaRefresh links did not contain a url= in the content attribute and
+  # links to itself.
+  attr_reader :link_self
+  ##
+  # Matches the content attribute of a meta refresh element.  After the match:
+  #
+  #   $1:: delay
+  #   $3:: url
+  CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
+  ##
+  # Parses the delay and url from the content attribute of a meta refresh
+  # element.  Parse requires the uri of the current page to infer a url when
+  # no url is specified.
+  #
+  # Returns an array of [delay, url]. (both in string)
+  #
+  # Returns nil if the delay and url cannot be parsed.
+  def self.parse content, base_uri
+    return unless content =~ CONTENT_REGEXP
+    link_self = $3.nil? || $3.empty?
+    delay, refresh_uri = $1, $3
+    dest = base_uri
+    dest += refresh_uri if refresh_uri
+    return delay, dest, link_self
+  end
+  def self.from_node node, page, uri
+    http_equiv = node['http-equiv']
+    return unless http_equiv and http_equiv.downcase == 'refresh'
+    delay, uri, link_self = parse node['content'], uri
+    return unless delay
+    new node, page, delay, uri.to_s, link_self
+  end
+  def initialize node, page, delay, href, link_self = false
+    super node, page.mech, page
+    @delay     = delay =~ /\./ ? delay.to_f : delay.to_i
+    @href      = href
+    @link_self = link_self
+  end
+end

data/lib/mechanize/parser.rb ADDED

@@ -0,0 +1,173 @@
+##
+# The parser module provides standard methods for accessing the headers and
+# content of a response that are shared across pluggable parsers.
+module Mechanize::Parser
+  extend Forwardable
+  special_filenames = Regexp.union %w[
+    AUX
+    COM1
+    COM2
+    COM3
+    COM4
+    COM5
+    COM6
+    COM7
+    COM8
+    COM9
+    CON
+    LPT1
+    LPT2
+    LPT3
+    LPT4
+    LPT5
+    LPT6
+    LPT7
+    LPT8
+    LPT9
+    NUL
+    PRN
+  ]
+  ##
+  # Special filenames that must be escaped
+  SPECIAL_FILENAMES = /\A#{special_filenames}/i
+  ##
+  # The URI this file was retrieved from
+  attr_accessor :uri
+  ##
+  # The Mechanize::Headers for this file
+  attr_accessor :response
+  alias header response
+  ##
+  # The HTTP response code
+  attr_accessor :code
+  ##
+  # :method: [](header)
+  #
+  # Access HTTP +header+ by name
+  def_delegator :header, :[], :[]
+  ##
+  # :method: []=(header, value)
+  #
+  # Set HTTP +header+ to +value+
+  def_delegator :header, :[]=, :[]=
+  ##
+  # :method: key?(header)
+  #
+  # Is the named +header+ present?
+  def_delegator :header, :key?, :key?
+  ##
+  # :method: each
+  #
+  # Enumerate HTTP headers
+  def_delegator :header, :each, :each
+  ##
+  # :method: each
+  #
+  # Enumerate HTTP headers in capitalized (canonical) form
+  def_delegator :header, :canonical_each, :canonical_each
+  ##
+  # Extracts the filename from a Content-Disposition header in the #response
+  # or from the URI.  If +full_path+ is true the filename will include the
+  # host name and path to the resource, otherwise a filename in the current
+  # directory is given.
+  def extract_filename full_path = @full_path
+    handled = false
+    if @uri then
+      uri = @uri
+      uri += 'index.html' if uri.path.end_with? '/'
+      path     = uri.path.split(/\//)
+      filename = path.pop || 'index.html'
+    else
+      path     = []
+      filename = 'index.html'
+    end
+    # Set the filename
+    if disposition = @response['content-disposition'] then
+      content_disposition =
+        Mechanize::HTTP::ContentDispositionParser.parse disposition
+      if content_disposition then
+        filename = content_disposition.filename
+        filename = filename.split(/[\\\/]/).last
+        handled = true
+      end
+    end
+    if not handled and @uri then
+      filename << '.html' unless filename =~ /\./
+      filename << "?#{@uri.query}" if @uri.query
+    end
+    if SPECIAL_FILENAMES =~ filename then
+      filename = "_#{filename}"
+    end
+    filename = filename.tr "\x00-\x20<>:\"/\\|?*", '_'
+    @filename = if full_path then
+                  File.join @uri.host, path, filename
+                else
+                  filename
+                end
+  end
+  ##
+  # Creates a Mechanize::Header from the Net::HTTPResponse +response+.
+  #
+  # This allows the Net::HTTPResponse to be garbage collected sooner.
+  def fill_header response
+    @response = Mechanize::Headers.new
+    response.each { |k,v|
+      @response[k] = v
+    } if response
+    @response
+  end
+  ##
+  # Finds a free filename based on +filename+, but is not race-free
+  def find_free_name filename
+    filename = @filename unless filename
+    number = 1
+    while File.exist? filename do
+      filename = "#{@filename}.#{number}"
+      number += 1
+    end
+    filename
+  end
+end

data/lib/mechanize/pluggable_parsers.rb ADDED

@@ -0,0 +1,144 @@
+require 'mechanize/file'
+require 'mechanize/file_saver'
+require 'mechanize/page'
+##
+# This class is used to register and maintain pluggable parsers for Mechanize
+# to use.
+#
+# Mechanize allows different parsers for different content types.  Mechanize
+# uses PluggableParser to determine which parser to use for any content type.
+# To use your own pluggable parser or to change the default pluggable parsers,
+# register them with this class.
+#
+# The default parser for unregistered content types is Mechanize::File.
+#
+# The module Mechanize::Parser provides basic functionality for any content
+# type, so you may use it in custom parsers you write.  For small files you
+# wish to perform in-memory operations on, you should subclass
+# Mechanize::File.  For large files you should subclass Mechanize::Download as
+# the content is only loaded into memory in small chunks.
+#
+# == Example
+#
+# To create your own parser, just create a class that takes four parameters in
+# the constructor.  Here is an example of registering a pluggable parser that
+# handles CSV files:
+#
+#   require 'csv'
+#
+#   class CSVParser < Mechanize::File
+#     attr_reader :csv
+#
+#     def initialize uri = nil, response = nil, body = nil, code = nil
+#       super uri, response, body, code
+#       @csv = CSV.parse body
+#     end
+#   end
+#
+#   agent = Mechanize.new
+#   agent.pluggable_parser.csv = CSVParser
+#   agent.get('http://example.com/test.csv')  # => CSVParser
+#
+# Now any response with a content type of 'text/csv' will initialize a
+# CSVParser and return that object to the caller.
+#
+# To register a pluggable parser for a content type that pluggable parser does
+# not know about, use the hash syntax:
+#
+#   agent.pluggable_parser['text/something'] = SomeClass
+#
+# To set the default parser, use #default:
+#
+#   agent.pluggable_parser.default = Mechanize::Download
+#
+# Now all unknown content types will be saved to disk and not loaded into
+# memory.
+class Mechanize::PluggableParser
+  CONTENT_TYPES = {
+    :html  => 'text/html',
+    :wap   => 'application/vnd.wap.xhtml+xml',
+    :xhtml => 'application/xhtml+xml',
+    :pdf   => 'application/pdf',
+    :csv   => 'text/csv',
+    :xml   => 'text/xml',
+  }
+  attr_accessor :default
+  def initialize
+    @parsers = {
+      CONTENT_TYPES[:html]  => Mechanize::Page,
+      CONTENT_TYPES[:xhtml] => Mechanize::Page,
+      CONTENT_TYPES[:wap]   => Mechanize::Page,
+    }
+    @default = Mechanize::File
+  end
+  ##
+  # Returns the parser registered for the given +content_type+
+  def parser(content_type)
+    content_type.nil? ? default : @parsers[content_type] || default
+  end
+  def register_parser(content_type, klass) # :nodoc:
+    @parsers[content_type] = klass
+  end
+  ##
+  # Registers +klass+ as the parser for text/html and application/xhtml+xml
+  # content
+  def html=(klass)
+    register_parser(CONTENT_TYPES[:html], klass)
+    register_parser(CONTENT_TYPES[:xhtml], klass)
+  end
+  ##
+  # Registers +klass+ as the parser for application/xhtml+xml content
+  def xhtml=(klass)
+    register_parser(CONTENT_TYPES[:xhtml], klass)
+  end
+  ##
+  # Registers +klass+ as the parser for application/pdf content
+  def pdf=(klass)
+    register_parser(CONTENT_TYPES[:pdf], klass)
+  end
+  ##
+  # Registers +klass+ as the parser for text/csv content
+  def csv=(klass)
+    register_parser(CONTENT_TYPES[:csv], klass)
+  end
+  ##
+  # Registers +klass+ as the parser for text/xml content
+  def xml=(klass)
+    register_parser(CONTENT_TYPES[:xml], klass)
+  end
+  ##
+  # Retrieves the parser for +content_type+ content
+  def [](content_type)
+    @parsers[content_type]
+  end
+  ##
+  # Sets the parser for +content_type+ content to +klass+
+  def []=(content_type, klass)
+    @parsers[content_type] = klass
+  end
+end