RubyGems - mechanize - Versions diffs - 0.4.7 → 0.5.0 - Mend

mechanize 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mechanize might be problematic. Click here for more details.

Files changed (48) hide show

data/CHANGELOG +17 -0
data/EXAMPLES +23 -44
data/NOTES +49 -0
data/lib/mechanize.rb +95 -80
data/lib/mechanize/cookie.rb +147 -148
data/lib/mechanize/cookie.rb.rej +16 -0
data/lib/mechanize/errors.rb +29 -0
data/lib/mechanize/form.rb +211 -186
data/lib/mechanize/form_elements.rb +31 -71
data/lib/mechanize/list.rb +34 -0
data/lib/mechanize/mech_version.rb +3 -1
data/lib/mechanize/module.rb +1 -1
data/lib/mechanize/page.rb +162 -180
data/lib/mechanize/page_elements.rb +53 -40
data/lib/mechanize/parsing.rb +11 -3
data/lib/mechanize/pluggable_parsers.rb +147 -0
data/test/data/server.crt +14 -0
data/test/data/server.csr +11 -0
data/test/data/server.key +18 -0
data/test/data/server.pem +15 -0
data/test/htdocs/no_title_test.html +6 -0
data/test/parse.rb +39 -0
data/test/proxy.rb +30 -0
data/test/server.rb +2 -0
data/test/servlets.rb +8 -0
data/test/ssl_server.rb +49 -0
data/test/tc_authenticate.rb +8 -6
data/test/tc_cookie_class.rb +28 -18
data/test/tc_cookie_jar.rb +88 -27
data/test/tc_cookies.rb +41 -44
data/test/tc_errors.rb +9 -23
data/test/tc_forms.rb +36 -32
data/test/tc_frames.rb +6 -4
data/test/tc_links.rb +7 -6
data/test/tc_mech.rb +43 -46
data/test/tc_page.rb +24 -0
data/test/tc_pluggable_parser.rb +103 -0
data/test/tc_post_form.rb +41 -0
data/test/tc_proxy.rb +25 -0
data/test/tc_response_code.rb +13 -10
data/test/tc_save_file.rb +25 -0
data/test/tc_ssl_server.rb +27 -0
data/test/tc_upload.rb +8 -6
data/test/tc_watches.rb +5 -2
data/test/test_includes.rb +3 -3
data/test/ts_mech.rb +11 -2
metadata +100 -86
data/test/tc_filter.rb +0 -34

data/lib/mechanize/page_elements.rb CHANGED Viewed

@@ -1,51 +1,64 @@
 module WWW
-  class Link
-    attr_reader :node
-    attr_reader :href
-    attr_reader :text
-    alias :to_s :text
-    def initialize(node)
-      @node = node
-      @href = node.attributes['href']
-      @text = node.all_text
+  class Mechanize
+    # This class encapsulates links.  It contains the text and the URI for
+    # 'a' tags parsed out of an HTML page.  If the link contains an image,
+    # the alt text will be used for that image.
+    #
+    # For example, the text for the following links with both be 'Hello World':
+    #
+    # <a href="http://rubyforge.org">Hello World</a>
+    # <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
+    class Link
+      attr_reader :node
+      attr_reader :href
+      attr_reader :text
+      alias :to_s :text
+      def initialize(node)
+        @node = node
+        @href = node.attributes['href']
+        @text = node.all_text
-      # If there is no text, try to find an image and use it's alt text
-      if (@text.nil? || @text.length == 0) && @node.has_elements?
-        @text = ''
-        @node.each_element { |e|
-          if e.name == 'img'
-            @text << (e.has_attributes? ? e.attributes['alt'] || '' : '')
-          end
-        }
+        # If there is no text, try to find an image and use it's alt text
+        if (@text.nil? || @text.length == 0) && @node.has_elements?
+          @text = ''
+          @node.each_element { |e|
+            if e.name == 'img'
+              @text << (e.has_attributes? ? e.attributes['alt'] || '' : '')
+            end
+          }
+        end
       end
-    end
-    def uri
-      URI.parse(@href)
-    end
+      def uri
+        URI.parse(@href)
+      end
-    def inspect
-      "'#{@text}' -> #{@href}\n"
+      def inspect
+        "'#{@text}' -> #{@href}"
+      end
     end
-  end
-  class Meta < Link
-  end
-  class Frame
-    attr_reader :node
-    attr_reader :name
-    attr_reader :src
-    def initialize(node)
-      @node = node
-      @name = node.attributes['name']
-      @src  = node.attributes['src']
+    # This class encapsulates a Meta tag.  Mechanize treats meta tags just
+    # like 'a' tags.  Meta objects will contain links, but most likely will
+    # have no text.
+    class Meta < Link
     end
-    def inspect
-      "'#{@name}' -> #{@src}\n"
+    # This class encapsulates a 'frame' tag.  Frame objects can be treated
+    # just like Link objects.  They contain src, the link they refer to,
+    # name, the name of the frame.  'src' and 'name' are aliased to 'href'
+    # and 'text' respectively so that a Frame object can be treated just
+    # like a Link.
+    class Frame < Link
+      alias :src :href
+      alias :name :text
+      def initialize(node)
+        @node = node
+        @text = node.attributes['name']
+        @href = node.attributes['src']
+      end
     end
   end
 end

data/lib/mechanize/parsing.rb CHANGED Viewed

@@ -19,6 +19,14 @@ end
 module REXML::Node
+# Aliasing functions to get rid of warnings.  Remove when support for 1.8.2
+# is dropped.
+if RUBY_VERSION > "1.8.2"
+  alias :old_each_recursive       :each_recursive
+  alias :old_find_first_recursive :find_first_recursive
+  alias :old_index_in_parent      :index_in_parent
+end
   # Visit all subnodes of +self+ recursively
   def each_recursive(&block) # :yields: node
@@ -54,7 +62,7 @@ module REXML::Node
   #   node == node.parent.elements[node.index_in_parent]
   def index_in_parent
-    parent.elements.index(self)
+    parent.index(self)+1
   end
   # Recursivly collects all text strings starting into an array.
@@ -161,9 +169,9 @@ def extract_from_table(root_node, headers, header_tags = %w(td th))
   # for each row we collect the elements at the same positions as the header_nodes.
   # this is what we finally return from the method.
-  (header_row.index_in_parent+1 .. table.elements.size).collect do |inx|
+  (header_row.index_in_parent .. table.elements.size).collect do |inx|
     row = table.elements[inx]
-    header_nodes.collect { |n| row.elements[ n.index_in_parent ].text }
+    header_nodes.collect { |n| row.elements[ n.parent.elements.index(n) ].text }
   end
 end

data/lib/mechanize/pluggable_parsers.rb ADDED Viewed

@@ -0,0 +1,147 @@
+module WWW
+  class Mechanize
+    # = Synopsis
+    # This is the default (and base) class for the Pluggable Parsers.  If
+    # Mechanize cannot find an appropriate class to use for the content type,
+    # this class will be used.  For example, if you download a JPG, Mechanize
+    # will not know how to parse it, so this class will be instantiated.
+    #
+    # This is a good class to use as the base class for building your own
+    # pluggable parsers.
+    #
+    # == Example
+    #  require 'rubygems'
+    #  require 'mechanize'
+    #
+    #  agent = WWW::Mechanize.new
+    #  agent.get('http://example.com/foo.jpg').class  #=> WWW::Mechanize::File
+    #
+    class File
+      attr_accessor :uri, :response, :body, :code
+      alias :content :body
+      def initialize(uri=nil, response=nil, body=nil, code=nil)
+        @uri, @response, @body, @code = uri, response, body, code
+      end
+      # Use this method to save the content of this object to filename
+      def save_as(filename)
+        ::File::open(filename, "w") { |f|
+          f.write body
+        }
+      end
+    end
+    # = Synopsis
+    # This is a pluggable parser that automatically saves every file
+    # it encounters.  It saves the files as a tree, reflecting the
+    # host and file path.
+    #
+    # == Example to save all PDF's
+    #  require 'rubygems'
+    #  require 'mechanize'
+    #
+    #  agent = WWW::Mechanize.new
+    #  agent.pluggable_parser.pdf = WWW::Mechanize::FileSaver
+    #  agent.get('http://example.com/foo.pdf')
+    #
+    class FileSaver < File
+      def initialize(uri=nil, response=nil, body=nil, code=nil)
+        @uri, @response, @body, @code = uri, response, body, code
+        path = uri.path == '/' ? '/index.html' : uri.path
+        path =~ /^(.*)\/([^\/]*)$/
+        filename = $2
+        path = "#{uri.host}#{$1}"
+        FileUtils.mkdir_p(path)
+        save_as("#{path}/#{filename}")
+      end
+    end
+    # = Synopsis
+    # This class is used to register and maintain pluggable parsers for
+    # Mechanize to use.
+    #
+    # A Pluggable Parser is a parser that Mechanize uses for any particular
+    # content type.  Mechanize will ask PluggableParser for the class it
+    # should initialize given any content type.  This class allows users to
+    # register their own pluggable parsers, or modify existing pluggable
+    # parsers.
+    #
+    # PluggableParser returns a WWW::Mechanize::File object for content types
+    # that it does not know how to handle.  WWW::Mechanize::File provides
+    # basic functionality for any content type, so it is a good class to
+    # extend when building your own parsers.
+    # == Example
+    # To create your own parser, just create a class that takes four
+    # parameters in the constructor.  Here is an example of registering
+    # a pluggable parser that handles CSV files:
+    #  class CSVParser < WWW::Mechanize::File
+    #    attr_reader :csv
+    #    def initialize(uri=nil, response=nil, body=nil, code=nil)
+    #      super(uri, response, body, code)
+    #      @csv = CSV.parse(body)
+    #    end
+    #  end
+    #  agent = WWW::Mechanize.new
+    #  agent.pluggable_parser.csv = CSVParser
+    #  agent.get('http://example.com/test.csv')  # => CSVParser
+    # Now any page that returns the content type of 'text/csv' will initialize
+    # a CSVParser and return that object to the caller.
+    #
+    # To register a pluggable parser for a content type that pluggable parser
+    # does not know about, just use the hash syntax:
+    #  agent.pluggable_parser['text/something'] = SomeClass
+    #
+    # To set the default parser, just use the 'defaut' method:
+    #  agent.pluggable_parser.default = SomeClass
+    # Now all unknown content types will be instances of SomeClass.
+    class PluggableParser
+      CONTENT_TYPES = {
+        :html => 'text/html',
+        :pdf  => 'application/pdf',
+        :csv  => 'text/csv',
+        :xml  => 'text/xml',
+      }
+      attr_accessor :default
+      def initialize
+        @parsers = { CONTENT_TYPES[:html] => Page }
+        @default = File
+      end
+      def parser(content_type)
+        content_type.nil? ? default : @parsers[content_type] || default
+      end
+      def register_parser(content_type, klass)
+        @parsers[content_type] = klass
+      end
+      def html=(klass)
+        register_parser(CONTENT_TYPES[:html], klass)
+      end
+      def pdf=(klass)
+        register_parser(CONTENT_TYPES[:pdf], klass)
+      end
+      def csv=(klass)
+        register_parser(CONTENT_TYPES[:csv], klass)
+      end
+      def xml=(klass)
+        register_parser(CONTENT_TYPES[:xml], klass)
+      end
+      def [](content_type)
+        @parsers[content_type]
+      end
+      def []=(content_type, klass)
+        @parsers[content_type] = klass
+      end
+    end
+  end
+end

data/test/data/server.crt ADDED Viewed

@@ -0,0 +1,14 @@
+-----BEGIN CERTIFICATE-----
+MIICLzCCAZgCCQDS5ue63ULFQDANBgkqhkiG9w0BAQUFADBcMQswCQYDVQQGEwJV
+UzETMBEGA1UECBMKV2FzaGluZ3RvbjEQMA4GA1UEBxMHU2VhdHRsZTESMBAGA1UE
+ChMJTWVjaGFuaXplMRIwEAYDVQQDEwlsb2NhbGhvc3QwHhcNMDYwNjI5MjEzMjIy
+WhcNMDYwNzI5MjEzMjIyWjBcMQswCQYDVQQGEwJVUzETMBEGA1UECBMKV2FzaGlu
+Z3RvbjEQMA4GA1UEBxMHU2VhdHRsZTESMBAGA1UEChMJTWVjaGFuaXplMRIwEAYD
+VQQDEwlsb2NhbGhvc3QwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAL3qHTIQ
+32nJtgwvL86UYhO3W8WPAPWEmY1FgsAxboWDdoHr/klGCKuPXR5tUovymD26/G3Z
+yAN+ev7IJUIA6E++jCIQ9v7l22NOJyN/7bS9gNXFfKeNWJXSd0D6DWypgPURHVhi
+A4viRFYiv/Q1XDU/UxNTqu2/OoQo/KjWEGQvAgMBAAEwDQYJKoZIhvcNAQEFBQAD
+gYEAZhacqH+7sknkjnptrBkYjbmmlj8STYXwKs9+xWsUW1NSW01jT61e7qlqOTR3
+26tUms1aq4OTBovGSBboNKI2NqWSHD0stdudjPMyNj0eZBJVLlaYiS7/1AqV6fM/
+OGmX/Alaaa3fTytbuocHtQfm9ue18dTzabfIw2Wp6Hscm/Q=
+-----END CERTIFICATE-----

data/test/data/server.csr ADDED Viewed

@@ -0,0 +1,11 @@
+-----BEGIN CERTIFICATE REQUEST-----
+MIIBnDCCAQUCAQAwXDELMAkGA1UEBhMCVVMxEzARBgNVBAgTCldhc2hpbmd0b24x
+EDAOBgNVBAcTB1NlYXR0bGUxEjAQBgNVBAoTCU1lY2hhbml6ZTESMBAGA1UEAxMJ
+bG9jYWxob3N0MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC96h0yEN9pybYM
+Ly/OlGITt1vFjwD1hJmNRYLAMW6Fg3aB6/5JRgirj10ebVKL8pg9uvxt2cgDfnr+
+yCVCAOhPvowiEPb+5dtjTicjf+20vYDVxXynjViV0ndA+g1sqYD1ER1YYgOL4kRW
+Ir/0NVw1P1MTU6rtvzqEKPyo1hBkLwIDAQABoAAwDQYJKoZIhvcNAQEFBQADgYEA
+BdaUO9CUvFe6RIXPxJfeHnU39SDrzXAgQ4zoi9EwbJO1rs/cid3qcF6sjGgtSLgF
+qJqpJplLa0wezecjHtDKAIwchNYrv+MrchWCKWlVtxYdCX6kjn796Tpjl0w7CUfm
+mYhE04+mqjhS3SMMCiIyxnM/zGDiMmxsxyhUF+WUppo=
+-----END CERTIFICATE REQUEST-----

data/test/data/server.key ADDED Viewed

@@ -0,0 +1,18 @@
+-----BEGIN RSA PRIVATE KEY-----
+Proc-Type: 4,ENCRYPTED
+DEK-Info: DES-EDE3-CBC,A32B4789D9FBDA52
+Y937xENBSIMLD1Vkh95DjHW30UZc00xkpLa/JrPQPVQWY8gyFX47bbvE2SkyPp9D
+vsriBwTOfNB7PZpRWo5DRQMK+ZRWvfUstkvXqbG/dQfg8hWdwUbTiaiZC8pTqdVv
+qupHruzu6LLzlWqvd03lb/TlcOveFnvgeqwICJ23LMvwexJWzvOaY2+q7YKq1XSg
+/rpMTxPmXrx8+QRfI7bHkJk/aPaGFMvzgckf3pSFSARWm/1n7cYgiBTYCbgQOHEP
+99JG0WbmFQF0/Rfb8+V5eCeIEBvSatoBjTs/Mb61Ah4bNo1DIgeSvKAShfumu/Tb
+rB9wa164/eHfA0u65gKF8HS7rGzP994KEfzGQ62zRm2V1UW0aOz+3/CaCe/0bkAo
+Wy4V72psOZ5QhUH3yuyoCDze/wiuoBz3zBX3JZzQ9Za4Z7u2eQP7qhh8Dz4IOYU0
+lhM67QhIRkgGQqgXM80NbSi6VKF1EwAF3BaRIhEnjRC9spUwmKfMvxxJDjRU1YVC
+X7cSZp2budXcr13jf8g3+fSKN1vD3byliJwdnVtB9a+t1gStxJ5FSuQ2A/I0JszY
+eL2kNvb8GwakOxV1PL4T8UIobHVsaP+evrbk3iwloI2CfCya3t9T5JYubCoo6rK3
+/0PAxOR6x+HzRBDc/5cMA6WUNWrGK/QQPKmGFGyZW8oSYTCEmeiKxFSHNCv38bVq
+11TlZ5Lj1/+jYbz88pN4qkVYNIN7tqUUOxIowc3atZLBQzn21toJcRygv9abkRSh
+XmgzQYK6N3laTSr1R7KsmIA90+yDXTo44064gOu6VaTMeOJoMegetA==
+-----END RSA PRIVATE KEY-----

data/test/data/server.pem ADDED Viewed

@@ -0,0 +1,15 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIICXAIBAAKBgQC96h0yEN9pybYMLy/OlGITt1vFjwD1hJmNRYLAMW6Fg3aB6/5J
+Rgirj10ebVKL8pg9uvxt2cgDfnr+yCVCAOhPvowiEPb+5dtjTicjf+20vYDVxXyn
+jViV0ndA+g1sqYD1ER1YYgOL4kRWIr/0NVw1P1MTU6rtvzqEKPyo1hBkLwIDAQAB
+AoGAJP/XcEmZPSLvNLB8v+JouS6f0n1p0mti/wLr1kXLInfaPrWbdOsQBVyI58Sk
+sF8zCdPEGiKztPqPO9QCbjl/CLPn0AmQ/L+MAc/4RnKdB/noC2l9zRBs6A/vxPuR
+8H8fYDgfVZTITaWqaFZMIScpQpVjDf50hR5REsvHtnC3vwECQQDynzUxX5bJjOPr
+MMwFG9fWyWmi/GP6xktTluvI2t4BfQVuHO/ds+N1X+Jhpv0gSmW3HB2NRoxpyW7W
+rjSnqbQFAkEAyGLmwCHd1Y39OLwl26HZ3/qvJs66oCqAdCH/oakDFg2Sxkx8ANMj
+VU/1/Fs4x95JdsdAz/c8YnhwfguuuA/BowJAK8fS32/yU7hawGvzxNA7aIS7zS9i
+EzSJDzJipcHo4bffrPkHx6fYWavNu/VYI+WI6TvnkUCzwbsZhx+0XLTgyQJAXOuv
+wOxzZ7Hd+E8W5w/Ybkpaqg428EqFIlUZqycqbTGCHD4fSmqd+dOIZ6sNW3Eyc8fV
+wBoPgEIFOA3GyUdVLQJBAMI/b3c0zYYXTDYeU/nGmqheQlT8MRqN9a2am3Zq74wO
+xUZTU/SbjWH/qor1EUtF8is0MxPoh9Gmx2Gt3QDbnM4=
+-----END RSA PRIVATE KEY-----

data/test/htdocs/no_title_test.html ADDED Viewed

@@ -0,0 +1,6 @@
+<html>
+  <title></title>
+  <body>
+    No title in the title tag
+  </body>
+</html>

data/test/parse.rb ADDED Viewed

@@ -0,0 +1,39 @@
+require 'rubygems'
+require 'web/htmltools/xmltree'
+parser = HTMLTree::XMLParser.new
+parser.feed(DATA.read.chomp)
+root = parser.document
+root.each_recursive { |node|
+  name = node.name.downcase
+  case name
+  when 'form'
+    node.each_recursive { |n|
+      puts n.name.downcase
+    }
+  end
+}
+__END__
+<html>
+<body>
+  <table>
+    <tr>
+      <td>
+        <form name="foo">
+        <table>
+          <tr><td><h1>Header</h1></td></tr>
+          <tr>
+            <td>
+              <input type="text" name="hey" value="" />
+            </td>
+          </tr>
+        </table>
+        </form>
+      </td>
+    </tr>
+  </table>
+</body>
+</html>

data/test/proxy.rb ADDED Viewed

@@ -0,0 +1,30 @@
+# This is a simple proxy that assumes the destination server will
+# close the connection after sending data, otherwise it will get blocked
+# on reads.
+require 'rubygems'
+require 'eventmachine'
+require 'socket'
+module HttpProxy
+  include Socket::Constants
+  def receive_data(data)
+    if data =~ /Host: (.*)$/
+      (host, port) = $1.chomp.split(/:/)
+      port ||= 80
+      socket = Socket.new( AF_INET, SOCK_STREAM, 0 )
+      puts port.to_i
+      puts host
+      sockaddr = Socket.pack_sockaddr_in( port.to_i, host )
+      socket.connect(sockaddr)
+      socket.write(data)
+      results = socket.read
+      send_data results
+    end
+  end
+end
+EventMachine::run {
+  EventMachine::start_server "127.0.0.1", 2001, HttpProxy
+}

data/test/server.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 require 'webrick'
+require 'base64'
 require 'servlets'
 require 'logger'
@@ -20,6 +21,7 @@ s.mount("/form post", FormTest)
 s.mount("/response_code", ResponseCodeTest)
 s.mount("/file_upload", FileUploadTest)
 s.mount("/bad_content_type", BadContentTypeTest)
+s.mount("/content_type_test", ContentTypeTest)
 htpasswd = WEBrick::HTTPAuth::Htpasswd.new(base_dir + '/data/htpasswd')
 auth = WEBrick::HTTPAuth::BasicAuth.new(