RubyGems - mechanize - Versions diffs - 0.6.11 → 0.7.0 - Mend

mechanize 0.6.11 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mechanize might be problematic. Click here for more details.

Files changed (91) hide show

data/CHANGELOG.txt +8 -0
data/Manifest.txt +31 -22
data/lib/mechanize.rb +2 -652
data/lib/www/mechanize.rb +635 -0
data/lib/www/mechanize/content_type_error.rb +16 -0
data/lib/www/mechanize/cookie.rb +64 -0
data/lib/{mechanize/cookie.rb → www/mechanize/cookie_jar.rb} +0 -60
data/lib/www/mechanize/file.rb +73 -0
data/lib/www/mechanize/file_saver.rb +39 -0
data/lib/{mechanize → www/mechanize}/form.rb +119 -137
data/lib/www/mechanize/form/button.rb +8 -0
data/lib/www/mechanize/form/check_box.rb +13 -0
data/lib/www/mechanize/form/field.rb +28 -0
data/lib/www/mechanize/form/file_upload.rb +24 -0
data/lib/www/mechanize/form/image_button.rb +23 -0
data/lib/www/mechanize/form/multi_select_list.rb +69 -0
data/lib/www/mechanize/form/option.rb +51 -0
data/lib/www/mechanize/form/radio_button.rb +38 -0
data/lib/www/mechanize/form/select_list.rb +41 -0
data/lib/www/mechanize/headers.rb +12 -0
data/lib/{mechanize → www/mechanize}/history.rb +0 -0
data/lib/{mechanize → www/mechanize}/inspect.rb +21 -28
data/lib/{mechanize → www/mechanize}/list.rb +0 -0
data/lib/{mechanize → www/mechanize}/monkey_patch.rb +19 -0
data/lib/www/mechanize/page.rb +121 -0
data/lib/www/mechanize/page/base.rb +10 -0
data/lib/www/mechanize/page/frame.rb +22 -0
data/lib/www/mechanize/page/link.rb +50 -0
data/lib/www/mechanize/page/meta.rb +10 -0
data/lib/www/mechanize/pluggable_parsers.rb +93 -0
data/lib/{mechanize/errors.rb → www/mechanize/response_code_error.rb} +1 -13
data/test/{test_includes.rb → helper.rb} +4 -18
data/test/{test_servlets.rb → servlets.rb} +0 -0
data/test/tc_authenticate.rb +1 -8
data/test/tc_bad_links.rb +3 -10
data/test/tc_blank_form.rb +1 -8
data/test/tc_checkboxes.rb +1 -8
data/test/tc_cookie_class.rb +1 -6
data/test/tc_cookie_jar.rb +1 -7
data/test/tc_cookies.rb +10 -17
data/test/tc_encoded_links.rb +5 -12
data/test/tc_errors.rb +4 -11
data/test/tc_follow_meta.rb +1 -8
data/test/tc_form_action.rb +6 -14
data/test/tc_form_as_hash.rb +1 -9
data/test/tc_form_button.rb +5 -8
data/test/tc_form_no_inputname.rb +1 -8
data/test/tc_forms.rb +16 -24
data/test/tc_frames.rb +3 -10
data/test/tc_gzipping.rb +2 -9
data/test/tc_history.rb +5 -12
data/test/tc_html_unscape_forms.rb +8 -15
data/test/tc_if_modified_since.rb +1 -6
data/test/tc_keep_alive.rb +1 -8
data/test/tc_links.rb +12 -19
data/test/tc_mech.rb +26 -34
data/test/{test_mechanize_file.rb → tc_mechanize_file.rb} +1 -6
data/test/tc_multi_select.rb +10 -17
data/test/tc_no_attributes.rb +1 -8
data/test/tc_page.rb +3 -10
data/test/tc_pluggable_parser.rb +8 -15
data/test/tc_post_form.rb +3 -10
data/test/tc_pretty_print.rb +3 -10
data/test/tc_radiobutton.rb +2 -9
data/test/tc_referer.rb +13 -20
data/test/tc_relative_links.rb +1 -8
data/test/tc_response_code.rb +14 -21
data/test/tc_save_file.rb +1 -9
data/test/tc_select.rb +3 -10
data/test/tc_select_all.rb +2 -10
data/test/tc_select_none.rb +2 -10
data/test/tc_select_noopts.rb +2 -9
data/test/tc_set_fields.rb +2 -9
data/test/tc_ssl_server.rb +5 -12
data/test/tc_subclass.rb +2 -9
data/test/tc_textarea.rb +2 -9
data/test/tc_upload.rb +2 -9
data/test/test_all.rb +4 -43
metadata +96 -80
data/lib/mechanize/form_elements.rb +0 -254
data/lib/mechanize/net-overrides/net/http.rb +0 -2107
data/lib/mechanize/net-overrides/net/https.rb +0 -172
data/lib/mechanize/net-overrides/net/protocol.rb +0 -380
data/lib/mechanize/page.rb +0 -138
data/lib/mechanize/page_elements.rb +0 -77
data/lib/mechanize/parsers/rexml_page.rb +0 -35
data/lib/mechanize/pluggable_parsers.rb +0 -204
data/lib/mechanize/rexml.rb +0 -236
data/setup.rb +0 -1585
data/test/tc_proxy.rb +0 -25
data/test/tc_watches.rb +0 -32

data/lib/mechanize/page.rb DELETED Viewed

@@ -1,138 +0,0 @@
-require 'fileutils'
-require 'hpricot'
-require 'forwardable'
-module WWW
-  class Mechanize
-    # = Synopsis
-    # This class encapsulates an HTML page.  If Mechanize finds a content
-    # type of 'text/html', this class will be instantiated and returned.
-    #
-    # == Example
-    #  require 'rubygems'
-    #  require 'mechanize'
-    #
-    #  agent = WWW::Mechanize.new
-    #  agent.get('http://google.com/').class  #=> WWW::Mechanize::Page
-    #
-    class Page < File
-      extend Forwardable
-      attr_reader :parser, :title, :watch_for_set
-      attr_reader :frames, :iframes, :links, :forms, :meta, :watches, :bases
-      attr_accessor :mech
-      alias :root :parser
-      def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
-        super(uri, response, body, code)
-        @watch_for_set  ||= {}
-        @mech           ||= mech
-        raise Mechanize::ContentTypeError.new(response['content-type']) unless
-            content_type() =~ /^text\/html/
-        # construct parser and feed with HTML
-        if body && response
-          @parser ||= Hpricot.parse(body)
-          parse_html
-        end
-      end
-      # Get the content type
-      def content_type
-        @response['content-type']
-      end
-      # Search through the page like HPricot
-      def_delegator :@parser, :search, :search
-      def_delegator :@parser, :/, :/
-      def_delegator :@parser, :at, :at
-      def watch_for_set=(obj)
-        @watch_for_set = obj
-        parse_html if @body && @watch_for_set
-      end
-      # Find a form with +name+.  Form will be yeilded if a block is given.
-      def form(name)
-        f = forms.name(name).first
-        yield f if block_given?
-        f
-      end
-      private
-      def parse_html
-        @forms    = WWW::Mechanize::List.new
-        @links    = WWW::Mechanize::List.new
-        @meta     = WWW::Mechanize::List.new
-        @frames   = WWW::Mechanize::List.new
-        @iframes  = WWW::Mechanize::List.new
-        @bases    = WWW::Mechanize::List.new
-        @watches  = {}
-        # Set the title
-        @title = if (@parser/'title').text.length > 0
-          (@parser/'title').text
-        end
-        # Find all 'base' tags
-        (@parser/'base').each do |node|
-          @bases << Base.new(node, @mech, self)
-        end
-        # Find all the form tags
-        (@parser/'form').each do |html_form|
-          form = Form.new(html_form, @mech, self)
-          form.action ||= @uri
-          @forms << form
-        end
-        # Find all the 'a' tags
-        (@parser/'a').each do |node|
-          @links << Link.new(node, @mech, self)
-        end
-        # Find all the 'area' tags
-        (@parser/'area').each do |node|
-          @links << Link.new(node, @mech, self)
-        end
-        # Find all 'meta' tags
-        (@parser/'meta').each do |node|
-          next unless node['http-equiv']
-          next unless node['content']
-          equiv   = node['http-equiv']
-          content = node['content']
-          if equiv != nil && equiv.downcase == 'refresh'
-            if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
-              node['href'] = $1
-              @meta << Meta.new(node, @mech, self)
-            end
-          end
-        end
-        # Find all 'frame' tags
-        (@parser/'frame').each do |node|
-          @frames << Frame.new(node, @mech, self)
-        end
-        # Find all 'iframe' tags
-        (@parser/'iframe').each do |node|
-          @iframes << Frame.new(node, @mech, self)
-        end
-        # Find all watch tags
-        unless @watch_for_set.nil?
-          @watch_for_set.each do |key, klass|
-            (@parser/key).each do |node|
-              @watches[key] ||= []
-              @watches[key] << (klass ? klass.new(node) : node)
-            end
-          end
-        end
-      end
-    end
-  end
-end

data/lib/mechanize/page_elements.rb DELETED Viewed

@@ -1,77 +0,0 @@
-module WWW
-  class Mechanize
-    # This class encapsulates links.  It contains the text and the URI for
-    # 'a' tags parsed out of an HTML page.  If the link contains an image,
-    # the alt text will be used for that image.
-    #
-    # For example, the text for the following links with both be 'Hello World':
-    #
-    # <a href="http://rubyforge.org">Hello World</a>
-    # <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
-    class Link
-      attr_reader :node
-      attr_reader :href
-      attr_reader :text
-      attr_reader :attributes
-      attr_reader :page
-      alias :to_s :text
-      alias :referer :page
-      def initialize(node, mech, page)
-        @node = node
-        @href = node['href']
-        @text = node.inner_text
-        @page = page
-        @mech = mech
-        @attributes = node
-        # If there is no text, try to find an image and use it's alt text
-        if (@text.nil? || @text.length == 0) && (node/'img').length > 0
-          @text = ''
-          (node/'img').each do |e|
-            @text << ( e['alt'] || '')
-          end
-        end
-      end
-      def uri
-        URI.parse(@href)
-      end
-      # Click on this link
-      def click
-        @mech.click self
-      end
-    end
-    # This class encapsulates a Meta tag.  Mechanize treats meta tags just
-    # like 'a' tags.  Meta objects will contain links, but most likely will
-    # have no text.
-    class Meta < Link
-    end
-    # This class encapsulates a 'frame' tag.  Frame objects can be treated
-    # just like Link objects.  They contain src, the link they refer to,
-    # name, the name of the frame.  'src' and 'name' are aliased to 'href'
-    # and 'text' respectively so that a Frame object can be treated just
-    # like a Link.
-    class Frame < Link
-      alias :src :href
-      alias :name :text
-      def initialize(node, mech, referer)
-        super(node, mech, referer)
-        @node = node
-        @text = node['name']
-        @href = node['src']
-      end
-    end
-    # This class encapsulates a Base tag.  Mechanize treats base tags just like
-    # 'a' tags.  Base objects will contain links, but most likely will have
-    # no text.
-    class Base < Link
-    end
-  end
-end

data/lib/mechanize/parsers/rexml_page.rb DELETED Viewed

@@ -1,35 +0,0 @@
-require 'web/htmltools/xmltree'
-require 'mechanize/rexml'
-class WWW::Mechanize::REXMLPage < WWW::Mechanize::Page
-  def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
-    @body           = body
-    @watch_for_set  = {}
-    @mech           = mech
-    # construct parser and feed with HTML
-    parser = HTMLTree::XMLParser.new
-    begin
-      parser.feed(@body)
-    rescue => ex
-      if ex.message =~ /attempted adding second root element to document/ and
-        # Put the whole document inside a single root element, which I
-        # simply name <root>, just to make the parser happy. It's no
-        #longer valid HTML, but without a single root element, it's not
-        # valid HTML as well.
-        # TODO: leave a possible doctype definition outside this element.
-        parser = HTMLTree::XMLParser.new
-        parser.feed("<root>" + @body + "</root>")
-      else
-        raise
-      end
-    end
-    @root = parser.document
-    yield self if block_given?
-    super(uri, response, body, code)
-  end
-end

data/lib/mechanize/pluggable_parsers.rb DELETED Viewed

@@ -1,204 +0,0 @@
-module WWW
-  class Mechanize
-    # = Synopsis
-    # This is the default (and base) class for the Pluggable Parsers.  If
-    # Mechanize cannot find an appropriate class to use for the content type,
-    # this class will be used.  For example, if you download a JPG, Mechanize
-    # will not know how to parse it, so this class will be instantiated.
-    #
-    # This is a good class to use as the base class for building your own
-    # pluggable parsers.
-    #
-    # == Example
-    #  require 'rubygems'
-    #  require 'mechanize'
-    #
-    #  agent = WWW::Mechanize.new
-    #  agent.get('http://example.com/foo.jpg').class  #=> WWW::Mechanize::File
-    #
-    class File
-      attr_accessor :uri, :response, :body, :code, :filename
-      alias :header :response
-      alias :content :body
-      def initialize(uri=nil, response=nil, body=nil, code=nil)
-        @uri, @body, @code = uri, body, code
-        @response = Headers.new
-        # Copy the headers in to a hash to prevent memory leaks
-        if response
-          response.each { |k,v|
-            @response[k] = v
-          }
-        end
-        @filename = 'index.html'
-        # Set the filename
-        if disposition = @response['content-disposition']
-          disposition.split(/;\s*/).each do |pair|
-            k,v = pair.split(/=/, 2)
-            @filename = v if k.downcase == 'filename'
-          end
-        else
-          if @uri
-            @filename = @uri.path.split(/\//).last || 'index.html'
-            @filename << ".html" unless @filename =~ /\./
-          end
-        end
-        yield self if block_given?
-      end
-      # Use this method to save the content of this object to filename
-      def save_as(filename = nil)
-        if filename.nil?
-          filename = @filename
-          number = 1
-          while(::File.exists?(filename))
-            filename = "#{@filename}.#{number}"
-            number += 1
-          end
-        end
-        ::File::open(filename, "wb") { |f|
-          f.write body
-        }
-      end
-      alias :save :save_as
-    end
-    # = Synopsis
-    # This is a pluggable parser that automatically saves every file
-    # it encounters.  It saves the files as a tree, reflecting the
-    # host and file path.
-    #
-    # == Example to save all PDF's
-    #  require 'rubygems'
-    #  require 'mechanize'
-    #
-    #  agent = WWW::Mechanize.new
-    #  agent.pluggable_parser.pdf = WWW::Mechanize::FileSaver
-    #  agent.get('http://example.com/foo.pdf')
-    #
-    class FileSaver < File
-      attr_reader :filename
-      def initialize(uri=nil, response=nil, body=nil, code=nil)
-        super(uri, response, body, code)
-        path = uri.path.empty? ? 'index.html' : uri.path.gsub(/^[\/]*/, '')
-        path += 'index.html' if path =~ /\/$/
-        split_path = path.split(/\//)
-        filename = split_path.length > 0 ? split_path.pop : 'index.html'
-        joined_path = split_path.join(::File::SEPARATOR)
-        path = if joined_path.empty?
-          uri.host
-        else
-          "#{uri.host}#{::File::SEPARATOR}#{joined_path}"
-        end
-        @filename = "#{path}#{::File::SEPARATOR}#{filename}"
-        FileUtils.mkdir_p(path)
-        save_as(@filename)
-      end
-    end
-    # = Synopsis
-    # This class is used to register and maintain pluggable parsers for
-    # Mechanize to use.
-    #
-    # A Pluggable Parser is a parser that Mechanize uses for any particular
-    # content type.  Mechanize will ask PluggableParser for the class it
-    # should initialize given any content type.  This class allows users to
-    # register their own pluggable parsers, or modify existing pluggable
-    # parsers.
-    #
-    # PluggableParser returns a WWW::Mechanize::File object for content types
-    # that it does not know how to handle.  WWW::Mechanize::File provides
-    # basic functionality for any content type, so it is a good class to
-    # extend when building your own parsers.
-    # == Example
-    # To create your own parser, just create a class that takes four
-    # parameters in the constructor.  Here is an example of registering
-    # a pluggable parser that handles CSV files:
-    #  class CSVParser < WWW::Mechanize::File
-    #    attr_reader :csv
-    #    def initialize(uri=nil, response=nil, body=nil, code=nil)
-    #      super(uri, response, body, code)
-    #      @csv = CSV.parse(body)
-    #    end
-    #  end
-    #  agent = WWW::Mechanize.new
-    #  agent.pluggable_parser.csv = CSVParser
-    #  agent.get('http://example.com/test.csv')  # => CSVParser
-    # Now any page that returns the content type of 'text/csv' will initialize
-    # a CSVParser and return that object to the caller.
-    #
-    # To register a pluggable parser for a content type that pluggable parser
-    # does not know about, just use the hash syntax:
-    #  agent.pluggable_parser['text/something'] = SomeClass
-    #
-    # To set the default parser, just use the 'defaut' method:
-    #  agent.pluggable_parser.default = SomeClass
-    # Now all unknown content types will be instances of SomeClass.
-    class PluggableParser
-      CONTENT_TYPES = {
-        :html => 'text/html',
-        :pdf  => 'application/pdf',
-        :csv  => 'text/csv',
-        :xml  => 'text/xml',
-      }
-      attr_accessor :default
-      def initialize
-        @parsers = { CONTENT_TYPES[:html] => Page }
-        @default = File
-      end
-      def parser(content_type)
-        content_type.nil? ? default : @parsers[content_type] || default
-      end
-      def register_parser(content_type, klass)
-        @parsers[content_type] = klass
-      end
-      def html=(klass)
-        register_parser(CONTENT_TYPES[:html], klass)
-      end
-      def pdf=(klass)
-        register_parser(CONTENT_TYPES[:pdf], klass)
-      end
-      def csv=(klass)
-        register_parser(CONTENT_TYPES[:csv], klass)
-      end
-      def xml=(klass)
-        register_parser(CONTENT_TYPES[:xml], klass)
-      end
-      def [](content_type)
-        @parsers[content_type]
-      end
-      def []=(content_type, klass)
-        @parsers[content_type] = klass
-      end
-    end
-    class Headers < Hash
-      def [](key)
-        super(key.downcase)
-      end
-      def []=(key, value)
-        super(key.downcase, value)
-      end
-    end
-  end
-end