RubyGems - mechanize - Versions diffs - 0.4.7 → 0.5.0 - Mend

mechanize 0.4.7 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mechanize might be problematic. Click here for more details.

Files changed (48) hide show

data/CHANGELOG +17 -0
data/EXAMPLES +23 -44
data/NOTES +49 -0
data/lib/mechanize.rb +95 -80
data/lib/mechanize/cookie.rb +147 -148
data/lib/mechanize/cookie.rb.rej +16 -0
data/lib/mechanize/errors.rb +29 -0
data/lib/mechanize/form.rb +211 -186
data/lib/mechanize/form_elements.rb +31 -71
data/lib/mechanize/list.rb +34 -0
data/lib/mechanize/mech_version.rb +3 -1
data/lib/mechanize/module.rb +1 -1
data/lib/mechanize/page.rb +162 -180
data/lib/mechanize/page_elements.rb +53 -40
data/lib/mechanize/parsing.rb +11 -3
data/lib/mechanize/pluggable_parsers.rb +147 -0
data/test/data/server.crt +14 -0
data/test/data/server.csr +11 -0
data/test/data/server.key +18 -0
data/test/data/server.pem +15 -0
data/test/htdocs/no_title_test.html +6 -0
data/test/parse.rb +39 -0
data/test/proxy.rb +30 -0
data/test/server.rb +2 -0
data/test/servlets.rb +8 -0
data/test/ssl_server.rb +49 -0
data/test/tc_authenticate.rb +8 -6
data/test/tc_cookie_class.rb +28 -18
data/test/tc_cookie_jar.rb +88 -27
data/test/tc_cookies.rb +41 -44
data/test/tc_errors.rb +9 -23
data/test/tc_forms.rb +36 -32
data/test/tc_frames.rb +6 -4
data/test/tc_links.rb +7 -6
data/test/tc_mech.rb +43 -46
data/test/tc_page.rb +24 -0
data/test/tc_pluggable_parser.rb +103 -0
data/test/tc_post_form.rb +41 -0
data/test/tc_proxy.rb +25 -0
data/test/tc_response_code.rb +13 -10
data/test/tc_save_file.rb +25 -0
data/test/tc_ssl_server.rb +27 -0
data/test/tc_upload.rb +8 -6
data/test/tc_watches.rb +5 -2
data/test/test_includes.rb +3 -3
data/test/ts_mech.rb +11 -2
metadata +100 -86
data/test/tc_filter.rb +0 -34

data/lib/mechanize/form_elements.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 module WWW
+  class Mechanize
   # This class represents a field in a form.  It handles the following input
   # tags found in a form:
   # text, password, hidden, int, textarea
@@ -12,22 +13,8 @@ module WWW
       @name, @value = name, value
     end
-    # Returns an array of Field objects
-    # TODO: is this correct?
-    def self.extract_all_from(root_node)
-      fields = []
-      root_node.each_recursive {|node|
-        if (node.name.downcase == 'input' and
-           %w(text password hidden checkbox radio int).include?(node.attributes['type'].downcase)) or
-           %w(textarea option).include?(node.name.downcase)
-          fields << Field.new(node.attributes['name'], node.attributes['value'])
-        end
-      }
-      return fields
-    end
     def inspect
-      "#{name} = #{@value}\n"
+      "#{name} = #{@value}"
     end
   end
@@ -36,45 +23,26 @@ module WWW
   # to upload and WWW::FileUpload#mime_type= to the appropriate mime type
   # of the file.
   # See the example in EXAMPLES[link://files/EXAMPLES.html]
-  class FileUpload
-    # value is the file-name, not the file-content
-    attr_accessor :name
+  class FileUpload < Field
+    attr_accessor :name # Field name
+    attr_accessor :file_name # File name
+    attr_accessor :mime_type # Mime Type (Optional)
-    attr_accessor :file_name, :file_data, :mime_type
+    alias :file_data :value
+    alias :file_data= :value=
     def initialize(name, file_name)
-      @name, @file_name = name, file_name
+      @file_name = file_name
       @file_data = nil
+      super(name, @file_data)
     end
   end
   # This class represents a Submit button in a form.
-  class Button
-    attr_accessor :name, :value
-    def initialize(name, value)
-      @name, @value = name, value
-    end
+  class Button < Field
     def add_to_query(query)
       query << [@name, @value || ''] if @name
     end
-    # Returns an array of Button objects
-    def self.extract_all_from(root_node)
-      buttons = []
-      root_node.each_recursive {|node|
-        if node.name.downcase == 'input' and
-           ['submit'].include?(node.attributes['type'].downcase)
-          buttons << Button.new(node.attributes['name'], node.attributes['value'])
-        end
-      }
-      return buttons
-    end
-    def inspect
-      "#{name} = #{@value}\n"
-    end
   end
   # This class represents an image button in a form.  Use the x and y methods
@@ -82,6 +50,12 @@ module WWW
   class ImageButton < Button
     attr_accessor :x, :y
+    def initialize(name, value)
+      @x = nil
+      @y = nil
+      super(name, value)
+    end
     def add_to_query(query)
       if @name
         query << [@name, @value || '']
@@ -93,30 +67,18 @@ module WWW
   # This class represents a radio button found in a Form.  To activate the
   # RadioButton in the Form, set the checked method to true.
-  class RadioButton
-    attr_accessor :name, :value, :checked
+  class RadioButton < Field
+    attr_accessor :checked
     def initialize(name, value, checked)
-      @name, @value, @checked = name, value, checked
-    end
-    def inspect
-      "#{name} = #{@value}\n"
+      @checked = checked
+      super(name, value)
     end
   end
   # This class represents a check box found in a Form.  To activate the
   # CheckBox in the Form, set the checked method to true.
-  class CheckBox
-    attr_accessor :name, :value, :checked
-    def initialize(name, value, checked)
-      @name, @value, @checked = name, value, checked
-    end
-    def inspect
-      "#{name} = #{@value}\n"
-    end
+  class CheckBox < RadioButton
   end
   # This class represents a select list or drop down box in a Form.  Set the
@@ -124,13 +86,11 @@ module WWW
   # list of Option that were found.  After finding the correct option, set
   # the select lists value to the option value:
   #  selectlist.value = selectlist.options.first.value
-  class SelectList
-    attr_accessor :name, :options
-    attr_reader :value
+  class SelectList < Field
+    attr_accessor :options
     def initialize(name, node)
-      @name = name
-      @value = nil
+      value = nil
       @options = WWW::Mechanize::List.new
       # parse
@@ -138,19 +98,18 @@ module WWW
         if n.name.downcase == 'option'
           option = Option.new(n)
           @options << option
-          @value = option.value if option.selected
+          value = option.value if option.selected
         end
       }
-      @value = @options.first.value if (@value == nil && @options.first)
+      value = @options.first.value if (value == nil && @options.first)
+      super(name, value)
     end
+    alias :old_value= :value=
     def value=(value)
       @value = value.to_s
     end
-    def inspect
-      "#{name} = #{@value}\n"
-    end
   end
   # This class contains option an option found within SelectList.  A
@@ -166,4 +125,5 @@ module WWW
       @selected = node.attributes['selected'] ? true : false
     end
   end
+  end
 end

data/lib/mechanize/list.rb CHANGED Viewed

@@ -1,10 +1,44 @@
 module WWW
   class Mechanize
+    # = Synopsis
+    # This class provides syntax sugar to help find things within Mechanize.
+    # Most calls in Mechanize that return arrays, like the 'links' method
+    # WWW::Mechanize::Page return a Mechanize::List.  This class lets you
+    # find things with a particular attribute on the found class.
+    #
+    # If you have an array with objects that response to the method "name",
+    # and you want to find all objects where name equals 'foo', your code
+    # would look like this:
+    #
+    #  list.name('foo') # => Mechanize::List
+    #
+    # == A bit more information
+    # Mechanize::List will iterate through all of the objects it contains,
+    # testing to see if the object will respond to the "name" method.  If it
+    # does, it will test to see if calling the name method returns a value
+    # equal to the value passed in.
+    #
+    # Finding the list will return another list, so it is possible to chain
+    # calls with Mechanize::List.  For example:
+    #
+    #  list.name('foo').href('bar.html')
+    #
+    # This code will find all elements with name 'foo' and href 'bar.html'.
     class List < Array
+      # This method provides syntax sugar so that you can write expressions
+      # like this:
+      #  form.fields.with.name('foo').and.href('bar.html')
+      #
       def with
         self
       end
+      # This method will allow the you to set the value of the first element
+      # in the list.  For example, finding an input field with name 'foo'
+      # and setting the value to 'bar'.
+      #
+      #  form.fields.name('foo').value = 'bar'
+      #
       def value=(arg)
         first().value=(arg)
       end

data/lib/mechanize/mech_version.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # DO NOT EDIT
 # This file is auto-generated by build scripts
 module WWW
-  MechVersion = '0.4.7'
+  class Mechanize
+    Version = '0.5.0'
+  end
 end

data/lib/mechanize/module.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-class Module
+class Module # :nodoc:
   def attr_finder(*syms)
     syms.each do |sym|
       class_eval %{ def #{sym.to_s}(hash = nil)

data/lib/mechanize/page.rb CHANGED Viewed

@@ -1,192 +1,174 @@
-module WWW
-# = Synopsis
-# This class encapsulates a page.
-#
-# == Example
-#  require 'rubygems'
-#  require 'mechanize'
-#  require 'logger'
-#
-#  class Body
-#    def initialize(node)
-#      puts node.attributes['bgcolor']
-#    end
-#  end
-#
-#  agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
-#  agent.user_agent_alias = 'Mac Safari'
-#  page = agent.get("http://www.google.com/")
-#  page.watch_for_set = { 'body' => Body }
-#
-#  body = page.watches
-  class Page
-    attr_accessor :uri, :cookies, :response, :body, :code, :watch_for_set
-    attr_finder :frames, :iframes, :links, :forms, :meta, :watches
-    attr_reader :body_filter
+require 'fileutils'
-    alias :content :body
+module WWW
+  class Mechanize
+    # = Synopsis
+    # This class encapsulates an HTML page.  If Mechanize finds a content
+    # type of 'text/html', this class will be instantiated and returned.
+    #
+    # == Example
+    #  require 'rubygems'
+    #  require 'mechanize'
+    #
+    #  agent = WWW::Mechanize.new
+    #  agent.get('http://google.com/').class  #=> WWW::Mechanize::Page
+    #
+    class Page < File
+      attr_accessor :watch_for_set
+      attr_finder :frames, :iframes, :links, :forms, :meta, :watches
-    # Alias our finders so that we can lazily parse the html
-    alias :find_frames   :frames
-    alias :find_iframes  :iframes
-    alias :find_links    :links
-    alias :find_forms    :forms
-    alias :find_meta     :meta
-    alias :find_watches  :watches
-    def initialize(uri=nil, cookies=[], response=nil, body=nil, code=nil)
-      @uri, @cookies, @response, @body, @code = uri, cookies, response, body, code
-      @frames       = nil
-      @iframes      = nil
-      @links        = nil
-      @forms        = nil
-      @meta         = nil
-      @watches      = nil
-      @root         = nil
-      @body_filter  = lambda { |body| body }
-    end
-    # Set the body filter for the page.  The body should be a Proc object that
-    # returns what the body should be set to.  For example, replace all
-    # occurrences of 'foo' with 'bar':
-    #  page.body_filter = lambda { |body| body.gsub(/foo/, bar) }
-    def body_filter=(filter)
-      @body_filter = filter
-      parse_html()
-    end
+      # Alias our finders so that we can lazily parse the html
+      alias :find_frames   :frames
+      alias :find_iframes  :iframes
+      alias :find_links    :links
+      alias :find_forms    :forms
+      alias :find_meta     :meta
+      alias :find_watches  :watches
+      def initialize(uri=nil, response=nil, body=nil, code=nil)
+        super(uri, response, body, code)
+        @frames       = nil
+        @iframes      = nil
+        @links        = nil
+        @forms        = nil
+        @meta         = nil
+        @watches      = nil
+        @root         = nil
+        @title        = nil
+      end
+      # Get the response header
+      def header
+        @response
+      end
+      # Get the content type
+      def content_type
+        @response['Content-Type']
+      end
+      # Get a list of Form associated with this page.
+      def forms(*args)
+        parse_html() unless @forms
+        find_forms(*args)
+      end
+      # Get a list of Link associated with this page.
+      def links(*args)
+        parse_html() unless @links
+        find_links(*args)
+      end
+      # Get the root XML parse tree for this page.
+      def root
+        parse_html() unless @root
+        @root
+      end
+      # This method watches out for a particular tag, and will call back to the
+      # class specified for the tag in the watch_for_set method.  See the example
+      # in this class.
+      def watches(*args)
+        parse_html() unless @watches
+        find_watches(*args)
+      end
+      # Get a list of Meta links, usually used for refreshing the page.
+      def meta(*args)
+        parse_html() unless @meta
+        find_meta(*args)
+      end
-    # Get the response header
-    def header
-      @response.header
-    end
-    # Get the content type
-    def content_type
-      @response['Content-Type']
-    end
-    # Get a list of Form associated with this page.
-    def forms(*args)
-      parse_html() unless @forms
-      find_forms(*args)
-    end
-    # Get a list of Link associated with this page.
-    def links(*args)
-      parse_html() unless @links
-      find_links(*args)
-    end
-    # Get the root XML parse tree for this page.
-    def root
-      parse_html() unless @root
-      @root
-    end
-    # This method watches out for a particular tag, and will call back to the
-    # class specified for the tag in the watch_for_set method.  See the example
-    # in this class.
-    def watches(*args)
-      parse_html() unless @watches
-      find_watches(*args)
-    end
-    # Get a list of Meta links, usually used for refreshing the page.
-    def meta(*args)
-      parse_html() unless @meta
-      find_meta(*args)
-    end
+      # Get a list of Frame from the page
+      def frames(*args)
+        parse_html() unless @frames
+        find_frames(*args)
+      end
-    # Get a list of Frame from the page
-    def frames(*args)
-      parse_html() unless @frames
-      find_frames(*args)
-    end
+      # Get a list of IFrame from the page
+      def iframes(*args)
+        parse_html() unless @iframes
+        find_iframes(*args)
+      end
-    # Get a list of IFrame from the page
-    def iframes(*args)
-      parse_html() unless @iframes
-      find_iframes(*args)
-    end
-    def inspect
-      string = "[meta]\n"
-      meta.each { |l| string << l.inspect }
-      string << "[frames]\n"
-      frames.each { |l| string << l.inspect }
-      string << "[iframes]\n"
-      iframes.each { |l| string << l.inspect }
-      string << "[links]\n"
-      links.each { |l| string << l.inspect }
-      string << "[forms]\n"
-      forms.each { |l| string << l.inspect }
-      string
-    end
+      # Fetch the title of the page
+      def title
+        parse_html() unless @title
+        @title
+      end
+      def inspect
+        "Page: [#{title} '#{uri.to_s}']"
+      end
-    private
-    def parse_html
-      raise Mechanize::ContentTypeError.new(content_type()) unless
-        content_type() =~ /^text\/html/
-      # construct parser and feed with HTML
-      parser = HTMLTree::XMLParser.new
-      begin
-        parser.feed(body_filter.call(@body))
-      rescue => ex
-        if ex.message =~ /attempted adding second root element to document/ and
-          # Put the whole document inside a single root element, which I simply name
-          # <root>, just to make the parser happy. It's no longer valid HTML, but
-          # without a single root element, it's not valid HTML as well.
-          # TODO: leave a possible doctype definition outside this element.
-          parser = HTMLTree::XMLParser.new
-          parser.feed("<root>" + @body + "</root>")
-        else
-          raise
+      private
+      def parse_html
+        raise Mechanize::ContentTypeError.new(content_type()) unless
+          content_type() =~ /^text\/html/
+        # construct parser and feed with HTML
+        parser = HTMLTree::XMLParser.new
+        begin
+          parser.feed(@body)
+        rescue => ex
+          if ex.message =~ /attempted adding second root element to document/ and
+            # Put the whole document inside a single root element, which I
+            # simply name <root>, just to make the parser happy. It's no
+            #longer valid HTML, but without a single root element, it's not
+            # valid HTML as well.
+            # TODO: leave a possible doctype definition outside this element.
+            parser = HTMLTree::XMLParser.new
+            parser.feed("<root>" + @body + "</root>")
+          else
+            raise
+          end
         end
-      end
-      @root = parser.document
-      @forms    = WWW::Mechanize::List.new
-      @links    = WWW::Mechanize::List.new
-      @meta     = WWW::Mechanize::List.new
-      @frames   = WWW::Mechanize::List.new
-      @iframes  = WWW::Mechanize::List.new
-      @watches  = {}
-      @root.each_recursive {|node|
-        name = node.name.downcase
-        case name
-        when 'form'
-          form = Form.new(node)
-          form.action ||= @uri
-          @forms << form
-        when 'a'
-          @links << Link.new(node)
-        when 'meta'
-          equiv   = node.attributes['http-equiv']
-          content = node.attributes['content']
-          if equiv != nil && equiv.downcase == 'refresh'
-            if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i
-              node.attributes['href'] = $1
-              @meta << Meta.new(node)
+        @root = parser.document
+        @forms    = WWW::Mechanize::List.new
+        @links    = WWW::Mechanize::List.new
+        @meta     = WWW::Mechanize::List.new
+        @frames   = WWW::Mechanize::List.new
+        @iframes  = WWW::Mechanize::List.new
+        @watches  = {}
+        @root.each_recursive {|node|
+          name = node.name.downcase
+          case name
+          when 'form'
+            form = Form.new(node)
+            form.action ||= @uri
+            @forms << form
+          when 'title'
+            @title = node.text
+          when 'a'
+            @links << Link.new(node)
+          when 'meta'
+            equiv   = node.attributes['http-equiv']
+            content = node.attributes['content']
+            if equiv != nil && equiv.downcase == 'refresh'
+              if content != nil && content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i
+                node.attributes['href'] = $1
+                @meta << Meta.new(node)
+              end
+            end
+          when 'frame'
+            @frames << Frame.new(node)
+          when 'iframe'
+            @iframes << Frame.new(node)
+          else
+            if @watch_for_set and @watch_for_set.keys.include?( name )
+              @watches[name] = [] unless @watches[name]
+              klass = @watch_for_set[name]
+              @watches[name] << (klass ? klass.new(node) : node)
             end
           end
-        when 'frame'
-          @frames << Frame.new(node)
-        when 'iframe'
-          @iframes << Frame.new(node)
-        else
-          if @watch_for_set and @watch_for_set.keys.include?( name )
-            @watches[name] = [] unless @watches[name]
-            klass = @watch_for_set[name]
-            @watches[name] << (klass ? klass.new(node) : node)
-          end
-        end
-      }
+        }
+      end
     end
   end
 end