RubyGems - mechanize - Versions diffs - 0.6.11 → 0.7.0 - Mend

mechanize 0.6.11 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mechanize might be problematic. Click here for more details.

Files changed (91) hide show

data/CHANGELOG.txt +8 -0
data/Manifest.txt +31 -22
data/lib/mechanize.rb +2 -652
data/lib/www/mechanize.rb +635 -0
data/lib/www/mechanize/content_type_error.rb +16 -0
data/lib/www/mechanize/cookie.rb +64 -0
data/lib/{mechanize/cookie.rb → www/mechanize/cookie_jar.rb} +0 -60
data/lib/www/mechanize/file.rb +73 -0
data/lib/www/mechanize/file_saver.rb +39 -0
data/lib/{mechanize → www/mechanize}/form.rb +119 -137
data/lib/www/mechanize/form/button.rb +8 -0
data/lib/www/mechanize/form/check_box.rb +13 -0
data/lib/www/mechanize/form/field.rb +28 -0
data/lib/www/mechanize/form/file_upload.rb +24 -0
data/lib/www/mechanize/form/image_button.rb +23 -0
data/lib/www/mechanize/form/multi_select_list.rb +69 -0
data/lib/www/mechanize/form/option.rb +51 -0
data/lib/www/mechanize/form/radio_button.rb +38 -0
data/lib/www/mechanize/form/select_list.rb +41 -0
data/lib/www/mechanize/headers.rb +12 -0
data/lib/{mechanize → www/mechanize}/history.rb +0 -0
data/lib/{mechanize → www/mechanize}/inspect.rb +21 -28
data/lib/{mechanize → www/mechanize}/list.rb +0 -0
data/lib/{mechanize → www/mechanize}/monkey_patch.rb +19 -0
data/lib/www/mechanize/page.rb +121 -0
data/lib/www/mechanize/page/base.rb +10 -0
data/lib/www/mechanize/page/frame.rb +22 -0
data/lib/www/mechanize/page/link.rb +50 -0
data/lib/www/mechanize/page/meta.rb +10 -0
data/lib/www/mechanize/pluggable_parsers.rb +93 -0
data/lib/{mechanize/errors.rb → www/mechanize/response_code_error.rb} +1 -13
data/test/{test_includes.rb → helper.rb} +4 -18
data/test/{test_servlets.rb → servlets.rb} +0 -0
data/test/tc_authenticate.rb +1 -8
data/test/tc_bad_links.rb +3 -10
data/test/tc_blank_form.rb +1 -8
data/test/tc_checkboxes.rb +1 -8
data/test/tc_cookie_class.rb +1 -6
data/test/tc_cookie_jar.rb +1 -7
data/test/tc_cookies.rb +10 -17
data/test/tc_encoded_links.rb +5 -12
data/test/tc_errors.rb +4 -11
data/test/tc_follow_meta.rb +1 -8
data/test/tc_form_action.rb +6 -14
data/test/tc_form_as_hash.rb +1 -9
data/test/tc_form_button.rb +5 -8
data/test/tc_form_no_inputname.rb +1 -8
data/test/tc_forms.rb +16 -24
data/test/tc_frames.rb +3 -10
data/test/tc_gzipping.rb +2 -9
data/test/tc_history.rb +5 -12
data/test/tc_html_unscape_forms.rb +8 -15
data/test/tc_if_modified_since.rb +1 -6
data/test/tc_keep_alive.rb +1 -8
data/test/tc_links.rb +12 -19
data/test/tc_mech.rb +26 -34
data/test/{test_mechanize_file.rb → tc_mechanize_file.rb} +1 -6
data/test/tc_multi_select.rb +10 -17
data/test/tc_no_attributes.rb +1 -8
data/test/tc_page.rb +3 -10
data/test/tc_pluggable_parser.rb +8 -15
data/test/tc_post_form.rb +3 -10
data/test/tc_pretty_print.rb +3 -10
data/test/tc_radiobutton.rb +2 -9
data/test/tc_referer.rb +13 -20
data/test/tc_relative_links.rb +1 -8
data/test/tc_response_code.rb +14 -21
data/test/tc_save_file.rb +1 -9
data/test/tc_select.rb +3 -10
data/test/tc_select_all.rb +2 -10
data/test/tc_select_none.rb +2 -10
data/test/tc_select_noopts.rb +2 -9
data/test/tc_set_fields.rb +2 -9
data/test/tc_ssl_server.rb +5 -12
data/test/tc_subclass.rb +2 -9
data/test/tc_textarea.rb +2 -9
data/test/tc_upload.rb +2 -9
data/test/test_all.rb +4 -43
metadata +96 -80
data/lib/mechanize/form_elements.rb +0 -254
data/lib/mechanize/net-overrides/net/http.rb +0 -2107
data/lib/mechanize/net-overrides/net/https.rb +0 -172
data/lib/mechanize/net-overrides/net/protocol.rb +0 -380
data/lib/mechanize/page.rb +0 -138
data/lib/mechanize/page_elements.rb +0 -77
data/lib/mechanize/parsers/rexml_page.rb +0 -35
data/lib/mechanize/pluggable_parsers.rb +0 -204
data/lib/mechanize/rexml.rb +0 -236
data/setup.rb +0 -1585
data/test/tc_proxy.rb +0 -25
data/test/tc_watches.rb +0 -32

data/lib/www/mechanize/page.rb ADDED Viewed

@@ -0,0 +1,121 @@
+require 'fileutils'
+require 'hpricot'
+require 'forwardable'
+require 'www/mechanize/page/link'
+require 'www/mechanize/page/meta'
+require 'www/mechanize/page/base'
+require 'www/mechanize/page/frame'
+require 'www/mechanize/headers'
+module WWW
+  class Mechanize
+    # = Synopsis
+    # This class encapsulates an HTML page.  If Mechanize finds a content
+    # type of 'text/html', this class will be instantiated and returned.
+    #
+    # == Example
+    #  require 'rubygems'
+    #  require 'mechanize'
+    #
+    #  agent = WWW::Mechanize.new
+    #  agent.get('http://google.com/').class  #=> WWW::Mechanize::Page
+    #
+    class Page < WWW::Mechanize::File
+      extend Forwardable
+      attr_accessor :mech
+      def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
+        super(uri, response, body, code)
+        @mech           ||= mech
+        raise Mechanize::ContentTypeError.new(response['content-type']) unless
+            content_type() =~ /^text\/html/
+        @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
+      end
+      def title
+        @title ||= if parser && search('title').text.length > 0
+          search('title').text
+        end
+      end
+      def parser
+        @parser ||= body && response ? Hpricot.parse(body) : nil
+      end
+      alias :root :parser
+      # Get the content type
+      def content_type
+        response['content-type']
+      end
+      # Search through the page like HPricot
+      def_delegator :parser, :search, :search
+      def_delegator :parser, :/, :/
+      def_delegator :parser, :at, :at
+      # Find a form with +name+.  Form will be yielded if a block is given.
+      def form(name)
+        f = forms.name(name).first
+        yield f if block_given?
+        f
+      end
+      def links
+        @links ||= WWW::Mechanize::List.new(
+          %w{ a area }.map do |tag|
+            search(tag).map do |node|
+              Link.new(node, @mech, self)
+            end
+          end.flatten
+        )
+      end
+      def forms
+        @forms ||= WWW::Mechanize::List.new(
+          search('form').map do |html_form|
+            form = Form.new(html_form, @mech, self)
+            form.action ||= @uri
+            form
+          end
+        )
+      end
+      def meta
+        @meta ||= WWW::Mechanize::List.new(
+          search('meta').map do |node|
+            next unless node['http-equiv'] && node['content']
+            (equiv, content) = node['http-equiv'], node['content']
+            if equiv && equiv.downcase == 'refresh'
+              if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
+                node['href'] = $1
+                Meta.new(node, @mech, self)
+              end
+            end
+          end.compact
+        )
+      end
+      def bases
+        @bases ||= WWW::Mechanize::List.new(
+          search('base').map { |node| Base.new(node, @mech, self) }
+        )
+      end
+      def frames
+        @frames ||= WWW::Mechanize::List.new(
+          search('frame').map { |node| Frame.new(node, @mech, self) }
+        )
+      end
+      def iframes
+        @iframes ||= WWW::Mechanize::List.new(
+          search('iframe').map { |node| Frame.new(node, @mech, self) }
+        )
+      end
+    end
+  end
+end

data/lib/www/mechanize/page/base.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module WWW
+  class Mechanize
+    class Page < WWW::Mechanize::File
+      # This class encapsulates a Base tag.  Mechanize treats base tags just
+      # like 'a' tags.  Base objects will contain links, but most likely will
+      # have no text.
+      class Base < Link; end
+    end
+  end
+end

data/lib/www/mechanize/page/frame.rb ADDED Viewed

@@ -0,0 +1,22 @@
+module WWW
+  class Mechanize
+    class Page < WWW::Mechanize::File
+      # This class encapsulates a 'frame' tag.  Frame objects can be treated
+      # just like Link objects.  They contain src, the link they refer to,
+      # name, the name of the frame.  'src' and 'name' are aliased to 'href'
+      # and 'text' respectively so that a Frame object can be treated just
+      # like a Link.
+      class Frame < Link
+        alias :src :href
+        alias :name :text
+        def initialize(node, mech, referer)
+          super(node, mech, referer)
+          @node = node
+          @text = node['name']
+          @href = node['src']
+        end
+      end
+    end
+  end
+end

data/lib/www/mechanize/page/link.rb ADDED Viewed

@@ -0,0 +1,50 @@
+module WWW
+  class Mechanize
+    class Page < WWW::Mechanize::File
+      # This class encapsulates links.  It contains the text and the URI for
+      # 'a' tags parsed out of an HTML page.  If the link contains an image,
+      # the alt text will be used for that image.
+      #
+      # For example, the text for the following links with both be 'Hello World':
+      #
+      # <a href="http://rubyforge.org">Hello World</a>
+      # <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
+      class Link
+        attr_reader :node
+        attr_reader :href
+        attr_reader :text
+        attr_reader :attributes
+        attr_reader :page
+        alias :to_s :text
+        alias :referer :page
+        def initialize(node, mech, page)
+          @node = node
+          @href = node['href']
+          @text = node.inner_text
+          @page = page
+          @mech = mech
+          @attributes = node
+          # If there is no text, try to find an image and use it's alt text
+          if (@text.nil? || @text.length == 0) && (node/'img').length > 0
+            @text = ''
+            (node/'img').each do |e|
+              @text << ( e['alt'] || '')
+            end
+          end
+        end
+        def uri
+          URI.parse(@href)
+        end
+        # Click on this link
+        def click
+          @mech.click self
+        end
+      end
+    end
+  end
+end

data/lib/www/mechanize/page/meta.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module WWW
+  class Mechanize
+    class Page < WWW::Mechanize::File
+      # This class encapsulates a Meta tag.  Mechanize treats meta tags just
+      # like 'a' tags.  Meta objects will contain links, but most likely will
+      # have no text.
+      class Meta < Link; end
+    end
+  end
+end

data/lib/www/mechanize/pluggable_parsers.rb ADDED Viewed

@@ -0,0 +1,93 @@
+require 'www/mechanize/file'
+require 'www/mechanize/file_saver'
+require 'www/mechanize/page'
+module WWW
+  class Mechanize
+    # = Synopsis
+    # This class is used to register and maintain pluggable parsers for
+    # Mechanize to use.
+    #
+    # A Pluggable Parser is a parser that Mechanize uses for any particular
+    # content type.  Mechanize will ask PluggableParser for the class it
+    # should initialize given any content type.  This class allows users to
+    # register their own pluggable parsers, or modify existing pluggable
+    # parsers.
+    #
+    # PluggableParser returns a WWW::Mechanize::File object for content types
+    # that it does not know how to handle.  WWW::Mechanize::File provides
+    # basic functionality for any content type, so it is a good class to
+    # extend when building your own parsers.
+    # == Example
+    # To create your own parser, just create a class that takes four
+    # parameters in the constructor.  Here is an example of registering
+    # a pluggable parser that handles CSV files:
+    #  class CSVParser < WWW::Mechanize::File
+    #    attr_reader :csv
+    #    def initialize(uri=nil, response=nil, body=nil, code=nil)
+    #      super(uri, response, body, code)
+    #      @csv = CSV.parse(body)
+    #    end
+    #  end
+    #  agent = WWW::Mechanize.new
+    #  agent.pluggable_parser.csv = CSVParser
+    #  agent.get('http://example.com/test.csv')  # => CSVParser
+    # Now any page that returns the content type of 'text/csv' will initialize
+    # a CSVParser and return that object to the caller.
+    #
+    # To register a pluggable parser for a content type that pluggable parser
+    # does not know about, just use the hash syntax:
+    #  agent.pluggable_parser['text/something'] = SomeClass
+    #
+    # To set the default parser, just use the 'defaut' method:
+    #  agent.pluggable_parser.default = SomeClass
+    # Now all unknown content types will be instances of SomeClass.
+    class PluggableParser
+      CONTENT_TYPES = {
+        :html => 'text/html',
+        :pdf  => 'application/pdf',
+        :csv  => 'text/csv',
+        :xml  => 'text/xml',
+      }
+      attr_accessor :default
+      def initialize
+        @parsers = { CONTENT_TYPES[:html] => Page }
+        @default = File
+      end
+      def parser(content_type)
+        content_type.nil? ? default : @parsers[content_type] || default
+      end
+      def register_parser(content_type, klass)
+        @parsers[content_type] = klass
+      end
+      def html=(klass)
+        register_parser(CONTENT_TYPES[:html], klass)
+      end
+      def pdf=(klass)
+        register_parser(CONTENT_TYPES[:pdf], klass)
+      end
+      def csv=(klass)
+        register_parser(CONTENT_TYPES[:csv], klass)
+      end
+      def xml=(klass)
+        register_parser(CONTENT_TYPES[:xml], klass)
+      end
+      def [](content_type)
+        @parsers[content_type]
+      end
+      def []=(content_type, klass)
+        @parsers[content_type] = klass
+      end
+    end
+  end
+end

data/lib/{mechanize/errors.rb → www/mechanize/response_code_error.rb} RENAMED Viewed

@@ -1,18 +1,5 @@
 module WWW
   class Mechanize
-    # =Synopsis
-    # This class contains an error for when a pluggable parser tries to
-    # parse a content type that it does not know how to handle.  For example
-    # if WWW::Mechanize::Page were to try to parse a PDF, a ContentTypeError
-    # would be thrown.
-    class ContentTypeError < RuntimeError
-      attr_reader :content_type
-      def initialize(content_type)
-        @content_type = content_type
-      end
-    end
     # =Synopsis
     # This error is thrown when Mechanize encounters a response code it does
     # not know how to handle.  Currently, this exception will be thrown
@@ -35,3 +22,4 @@ module WWW
     end
   end
 end

data/test/{test_includes.rb → helper.rb} RENAMED Viewed

@@ -1,15 +1,12 @@
-require 'net/http'
-require 'test_servlets'
+require 'test/unit'
+require 'rubygems'
+require 'mechanize'
 require 'webrick/httputils'
+require 'servlets'
 BASE_DIR = File.dirname(__FILE__)
 class Net::HTTP
-  #def self.new(*args)
-  #  obj = allocate
-  #  return obj
-  #end
   alias :old_do_start :do_start
   def do_start
@@ -108,14 +105,3 @@ class Response
     yield body
   end
 end
-module TestMethods
-  PORT      = 2000
-  PROXYPORT = 2001
-  SSLPORT   = 2002
-  def html_response
-    { 'content-type' => 'text/html' }
-  end
-end

data/test/{test_servlets.rb → servlets.rb} RENAMED Viewed

File without changes

data/test/tc_authenticate.rb CHANGED Viewed

@@ -1,13 +1,6 @@
-$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
-require 'test/unit'
-require 'rubygems'
-require 'mechanize'
-require 'test_includes'
+require File.dirname(__FILE__) + "/helper"
 class BasicAuthTest < Test::Unit::TestCase
-  include TestMethods
   def setup
     @agent = WWW::Mechanize.new
   end

data/test/tc_bad_links.rb CHANGED Viewed

@@ -1,16 +1,9 @@
-$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
-require 'test/unit'
-require 'rubygems'
-require 'mechanize'
-require 'test_includes'
+require File.dirname(__FILE__) + "/helper"
 class TestBadLinks < Test::Unit::TestCase
-  include TestMethods
   def setup
     @agent = WWW::Mechanize.new
-    @page = @agent.get("http://localhost:#{PORT}/tc_bad_links.html")
+    @page = @agent.get("http://localhost/tc_bad_links.html")
   end
   def test_space_in_link
@@ -24,7 +17,7 @@ class TestBadLinks < Test::Unit::TestCase
   def test_space_in_url
     page = nil
     assert_nothing_raised do
-      page = @agent.get("http://localhost:#{PORT}/tc_bad_links.html ")
+      page = @agent.get("http://localhost/tc_bad_links.html ")
     end
     assert_match(/tc_bad_links.html$/, @agent.history.last.uri.to_s)
     assert_equal(2, @agent.history.length)

data/test/tc_blank_form.rb CHANGED Viewed

@@ -1,13 +1,6 @@
-$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
-require 'test/unit'
-require 'rubygems'
-require 'mechanize'
-require 'test_includes'
+require File.dirname(__FILE__) + "/helper"
 class BlankFormTest < Test::Unit::TestCase
-  include TestMethods
   def setup
     @agent = WWW::Mechanize.new
   end

data/test/tc_checkboxes.rb CHANGED Viewed

@@ -1,13 +1,6 @@
-$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
-require 'test/unit'
-require 'rubygems'
-require 'mechanize'
-require 'test_includes'
+require File.dirname(__FILE__) + "/helper"
 class TestCheckBoxes < Test::Unit::TestCase
-  include TestMethods
   def setup
     @agent = WWW::Mechanize.new
     @page = @agent.get('http://localhost/tc_checkboxes.html')