mechanize 0.4.7 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/CHANGELOG +17 -0
 - data/EXAMPLES +23 -44
 - data/NOTES +49 -0
 - data/lib/mechanize.rb +95 -80
 - data/lib/mechanize/cookie.rb +147 -148
 - data/lib/mechanize/cookie.rb.rej +16 -0
 - data/lib/mechanize/errors.rb +29 -0
 - data/lib/mechanize/form.rb +211 -186
 - data/lib/mechanize/form_elements.rb +31 -71
 - data/lib/mechanize/list.rb +34 -0
 - data/lib/mechanize/mech_version.rb +3 -1
 - data/lib/mechanize/module.rb +1 -1
 - data/lib/mechanize/page.rb +162 -180
 - data/lib/mechanize/page_elements.rb +53 -40
 - data/lib/mechanize/parsing.rb +11 -3
 - data/lib/mechanize/pluggable_parsers.rb +147 -0
 - data/test/data/server.crt +14 -0
 - data/test/data/server.csr +11 -0
 - data/test/data/server.key +18 -0
 - data/test/data/server.pem +15 -0
 - data/test/htdocs/no_title_test.html +6 -0
 - data/test/parse.rb +39 -0
 - data/test/proxy.rb +30 -0
 - data/test/server.rb +2 -0
 - data/test/servlets.rb +8 -0
 - data/test/ssl_server.rb +49 -0
 - data/test/tc_authenticate.rb +8 -6
 - data/test/tc_cookie_class.rb +28 -18
 - data/test/tc_cookie_jar.rb +88 -27
 - data/test/tc_cookies.rb +41 -44
 - data/test/tc_errors.rb +9 -23
 - data/test/tc_forms.rb +36 -32
 - data/test/tc_frames.rb +6 -4
 - data/test/tc_links.rb +7 -6
 - data/test/tc_mech.rb +43 -46
 - data/test/tc_page.rb +24 -0
 - data/test/tc_pluggable_parser.rb +103 -0
 - data/test/tc_post_form.rb +41 -0
 - data/test/tc_proxy.rb +25 -0
 - data/test/tc_response_code.rb +13 -10
 - data/test/tc_save_file.rb +25 -0
 - data/test/tc_ssl_server.rb +27 -0
 - data/test/tc_upload.rb +8 -6
 - data/test/tc_watches.rb +5 -2
 - data/test/test_includes.rb +3 -3
 - data/test/ts_mech.rb +11 -2
 - metadata +100 -86
 - data/test/tc_filter.rb +0 -34
 
    
        data/CHANGELOG
    CHANGED
    
    | 
         @@ -1,3 +1,20 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            = Mechanize CHANGELOG
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            == 0.5.0
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            * Added pluggable parsers. (Thanks to Eric Kolve for the idea)
         
     | 
| 
      
 6 
     | 
    
         
            +
            * Changed namespace so all classes are under WWW::Mechanize.
         
     | 
| 
      
 7 
     | 
    
         
            +
            * Updating Forms so that fields can be used as accessors (Thanks Gregory Brown)
         
     | 
| 
      
 8 
     | 
    
         
            +
            * Added WWW::Mechanize::File as default object used for unknown content types.
         
     | 
| 
      
 9 
     | 
    
         
            +
            * Added 'save_as' method to Mechanize::File, so any page can be saved.
         
     | 
| 
      
 10 
     | 
    
         
            +
            * Adding 'save_as' and 'load' to CookieJar so that cookies can be saved
         
     | 
| 
      
 11 
     | 
    
         
            +
              between sessions.
         
     | 
| 
      
 12 
     | 
    
         
            +
            * Added WWW::Mechanize::FileSaver pluggable parser to automatically save files.
         
     | 
| 
      
 13 
     | 
    
         
            +
            * Added WWW::Mechanize::Page#title for page titles
         
     | 
| 
      
 14 
     | 
    
         
            +
            * Added OpenSSL certificate support (Thanks Mike Dalessio)
         
     | 
| 
      
 15 
     | 
    
         
            +
            * Removed support for body filters in favor of pluggable parsers.
         
     | 
| 
      
 16 
     | 
    
         
            +
            * Fixed cookie bug adding a '/' when the url is missing one (Thanks Nick Dainty)
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
       1 
18 
     | 
    
         
             
            == 0.4.7
         
     | 
| 
       2 
19 
     | 
    
         | 
| 
       3 
20 
     | 
    
         
             
            * Fixed bug with no action in forms.  Thanks to Adam Wiggins
         
     | 
    
        data/EXAMPLES
    CHANGED
    
    | 
         @@ -3,26 +3,25 @@ 
     | 
|
| 
       3 
3 
     | 
    
         
             
            == Google
         
     | 
| 
       4 
4 
     | 
    
         
             
              require 'rubygems'
         
     | 
| 
       5 
5 
     | 
    
         
             
              require 'mechanize'
         
     | 
| 
       6 
     | 
    
         
            -
              require 'logger'
         
     | 
| 
       7 
6 
     | 
    
         | 
| 
       8 
     | 
    
         
            -
              agent = WWW::Mechanize.new 
     | 
| 
      
 7 
     | 
    
         
            +
              agent = WWW::Mechanize.new
         
     | 
| 
       9 
8 
     | 
    
         
             
              agent.user_agent_alias = 'Mac Safari'
         
     | 
| 
       10 
9 
     | 
    
         
             
              page = agent.get("http://www.google.com/")
         
     | 
| 
       11 
10 
     | 
    
         
             
              search_form = page.forms.with.name("f").first
         
     | 
| 
       12 
     | 
    
         
            -
              search_form. 
     | 
| 
      
 11 
     | 
    
         
            +
              search_form.q = "Hello"
         
     | 
| 
       13 
12 
     | 
    
         
             
              search_results = agent.submit(search_form)
         
     | 
| 
       14 
13 
     | 
    
         
             
              puts search_results.body
         
     | 
| 
       15 
14 
     | 
    
         | 
| 
       16 
15 
     | 
    
         
             
            == Rubyforge
         
     | 
| 
       17 
16 
     | 
    
         
             
              require 'mechanize'
         
     | 
| 
       18 
17 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
              agent = WWW::Mechanize.new 
     | 
| 
      
 18 
     | 
    
         
            +
              agent = WWW::Mechanize.new
         
     | 
| 
       20 
19 
     | 
    
         
             
              page = agent.get('http://rubyforge.org/')
         
     | 
| 
       21 
20 
     | 
    
         
             
              link = page.links.text(/Log In/).first
         
     | 
| 
       22 
21 
     | 
    
         
             
              page = agent.click(link)
         
     | 
| 
       23 
22 
     | 
    
         
             
              form = page.forms[1]
         
     | 
| 
       24 
     | 
    
         
            -
              form. 
     | 
| 
       25 
     | 
    
         
            -
              form. 
     | 
| 
      
 23 
     | 
    
         
            +
              form.form_loginname = ARGV[0]
         
     | 
| 
      
 24 
     | 
    
         
            +
              form.form_pw = ARGV[1]
         
     | 
| 
       26 
25 
     | 
    
         
             
              page = agent.submit(form, form.buttons.first)
         
     | 
| 
       27 
26 
     | 
    
         | 
| 
       28 
27 
     | 
    
         
             
              puts page.body
         
     | 
| 
         @@ -36,8 +35,8 @@ This example uploads one image as two different images to flickr. 
     | 
|
| 
       36 
35 
     | 
    
         
             
              agent = WWW::Mechanize.new
         
     | 
| 
       37 
36 
     | 
    
         
             
              page = agent.get('http://flickr.com/signin/flickr/')
         
     | 
| 
       38 
37 
     | 
    
         
             
              form = page.forms.first
         
     | 
| 
       39 
     | 
    
         
            -
              form. 
     | 
| 
       40 
     | 
    
         
            -
              form. 
     | 
| 
      
 38 
     | 
    
         
            +
              form.email = ARGV[0]
         
     | 
| 
      
 39 
     | 
    
         
            +
              form.password = ARGV[1]
         
     | 
| 
       41 
40 
     | 
    
         
             
              page = agent.submit(form)
         
     | 
| 
       42 
41 
     | 
    
         
             
              page = agent.click page.links.text('Upload').first
         
     | 
| 
       43 
42 
     | 
    
         
             
              form = page.forms.first
         
     | 
| 
         @@ -53,48 +52,28 @@ This example uploads one image as two different images to flickr. 
     | 
|
| 
       53 
52 
     | 
    
         | 
| 
       54 
53 
     | 
    
         
             
              agent.submit(form)
         
     | 
| 
       55 
54 
     | 
    
         | 
| 
       56 
     | 
    
         
            -
            ==  
     | 
| 
       57 
     | 
    
         
            -
             
     | 
| 
       58 
     | 
    
         
            -
             
     | 
| 
       59 
     | 
    
         
            -
            block returns.  The filter on WWW::Page#body_filter is a "per-page" filter,
         
     | 
| 
       60 
     | 
    
         
            -
            meaning that it is only applied to one page object.
         
     | 
| 
      
 55 
     | 
    
         
            +
            == Pluggable Parsers
         
     | 
| 
      
 56 
     | 
    
         
            +
            Lets say you want html pages to automatically be parsed with Rubyful Soup.
         
     | 
| 
      
 57 
     | 
    
         
            +
            This example shows you how:
         
     | 
| 
       61 
58 
     | 
    
         | 
| 
       62 
59 
     | 
    
         
             
              require 'rubygems'
         
     | 
| 
       63 
60 
     | 
    
         
             
              require 'mechanize'
         
     | 
| 
       64 
     | 
    
         
            -
              
         
     | 
| 
       65 
     | 
    
         
            -
              agent = WWW::Mechanize.new
         
     | 
| 
       66 
     | 
    
         
            -
              
         
     | 
| 
       67 
     | 
    
         
            -
              page = agent.get('http://google.com/')
         
     | 
| 
       68 
     | 
    
         
            -
              page.body_filter = lambda { |body|
         
     | 
| 
       69 
     | 
    
         
            -
                  body.gsub(/google/i, "Net::DAAP::Client")
         
     | 
| 
       70 
     | 
    
         
            -
              }
         
     | 
| 
       71 
     | 
    
         
            -
              puts page.body
         
     | 
| 
       72 
     | 
    
         
            -
              
         
     | 
| 
       73 
     | 
    
         
            -
              page = agent.get('http://google.com/')
         
     | 
| 
       74 
     | 
    
         
            -
              puts page.body
         
     | 
| 
      
 61 
     | 
    
         
            +
              require 'rubyful_soup'
         
     | 
| 
       75 
62 
     | 
    
         | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
      
 63 
     | 
    
         
            +
              class SoupParser < WWW::Mechanize::Page
         
     | 
| 
      
 64 
     | 
    
         
            +
                attr_reader :soup
         
     | 
| 
      
 65 
     | 
    
         
            +
                def initialize(uri = nil, response = nil, body = nil, code = nil)
         
     | 
| 
      
 66 
     | 
    
         
            +
                  @soup = BeautifulSoup.new(body)
         
     | 
| 
      
 67 
     | 
    
         
            +
                  super(uri, response, body, code)
         
     | 
| 
      
 68 
     | 
    
         
            +
                end
         
     | 
| 
      
 69 
     | 
    
         
            +
              end
         
     | 
| 
       81 
70 
     | 
    
         | 
| 
       82 
     | 
    
         
            -
              require 'rubygems'
         
     | 
| 
       83 
     | 
    
         
            -
              require 'mechanize'
         
     | 
| 
       84 
     | 
    
         
            -
              
         
     | 
| 
       85 
71 
     | 
    
         
             
              agent = WWW::Mechanize.new
         
     | 
| 
       86 
     | 
    
         
            -
              
         
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
              
         
     | 
| 
       92 
     | 
    
         
            -
              page = agent.get('http://google.com/')
         
     | 
| 
       93 
     | 
    
         
            -
              page.links.each { |l| puts l.text }
         
     | 
| 
       94 
     | 
    
         
            -
              
         
     | 
| 
       95 
     | 
    
         
            -
              agent.body_filter = old_filter
         
     | 
| 
       96 
     | 
    
         
            -
              page = agent.get('http://google.com/')
         
     | 
| 
       97 
     | 
    
         
            -
              page.links.each { |l| puts l.text }
         
     | 
| 
      
 72 
     | 
    
         
            +
              agent.pluggable_parser.html = SoupParser
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
            Now all HTML pages will be parsed with the SoupParser class, and automatically
         
     | 
| 
      
 75 
     | 
    
         
            +
            give you access to a method called 'soup' where you can get access to the
         
     | 
| 
      
 76 
     | 
    
         
            +
            Beautiful Soup for that page.
         
     | 
| 
       98 
77 
     | 
    
         | 
| 
       99 
78 
     | 
    
         
             
            == Using a proxy
         
     | 
| 
       100 
79 
     | 
    
         | 
    
        data/NOTES
    CHANGED
    
    | 
         @@ -1,5 +1,54 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            = Mechanize Release Notes
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
      
 3 
     | 
    
         
            +
            == 0.5.0
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Good News first:
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            This release has many new great features!  Mechanize has been updated to
         
     | 
| 
      
 8 
     | 
    
         
            +
            handle any content type a web server returns using a system called "Pluggable
         
     | 
| 
      
 9 
     | 
    
         
            +
            Parsers".  Mechanize has always been able to handle any content type
         
     | 
| 
      
 10 
     | 
    
         
            +
            (sort of), but the pluggable parser system lets us cleanly handle any
         
     | 
| 
      
 11 
     | 
    
         
            +
            content type by instantiating a class for the content type returned from the
         
     | 
| 
      
 12 
     | 
    
         
            +
            server.  For example, a web server returns type 'text/html', mechanize asks
         
     | 
| 
      
 13 
     | 
    
         
            +
            the pluggable parser for a class to instantiate for 'text/html'.  Mechanize
         
     | 
| 
      
 14 
     | 
    
         
            +
            then instantiates that class and returns it.  Users can define their own
         
     | 
| 
      
 15 
     | 
    
         
            +
            parsers, and register them with the Pluggable Parser so that mechanize will
         
     | 
| 
      
 16 
     | 
    
         
            +
            instantiate your class when the content type you specify is returned.  This
         
     | 
| 
      
 17 
     | 
    
         
            +
            allows you to easily preprocess your HTML, or even use other HTML parsers.
         
     | 
| 
      
 18 
     | 
    
         
            +
            Content types that the pluggable parser doesn't know how to handle will
         
     | 
| 
      
 19 
     | 
    
         
            +
            return WWW::Mechanize::File which has basic functionality like a 'save_as'
         
     | 
| 
      
 20 
     | 
    
         
            +
            method.  For more information, see the RDoc for
         
     | 
| 
      
 21 
     | 
    
         
            +
            WWW::Mechanize::PluggableParser also see the EXAMPLES file.
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            A 'save_as' method has been added so that any page downloaded can be easily
         
     | 
| 
      
 24 
     | 
    
         
            +
            saved to a file.
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            The cookie jar for mechanize can now be saved to disk and loaded back up at
         
     | 
| 
      
 27 
     | 
    
         
            +
            another time.  If your script needs to save cookie state between executions,
         
     | 
| 
      
 28 
     | 
    
         
            +
            you can now use the 'save_as' and 'load' methods on WWW::Mechanize::CookieJar.
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            Form fields can now be treated as accessors.  This means that if you have a
         
     | 
| 
      
 31 
     | 
    
         
            +
            form with the fields 'username' and 'password', you could manipulate them like
         
     | 
| 
      
 32 
     | 
    
         
            +
            this:
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
              form.username = 'test'
         
     | 
| 
      
 35 
     | 
    
         
            +
              form.password = 'testing'
         
     | 
| 
      
 36 
     | 
    
         
            +
              puts "username: #{form.username}"
         
     | 
| 
      
 37 
     | 
    
         
            +
              puts "password: #{form.password}"
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            Form fields can still be accessed in the usual way in case there are multiple
         
     | 
| 
      
 40 
     | 
    
         
            +
            input fields with the same name.
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            Bad news second:
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            In this release, the name space has been altered to be more consistent.  Many
         
     | 
| 
      
 45 
     | 
    
         
            +
            classes used to be under WWW directly, they are now all under WWW::Mechanize.
         
     | 
| 
      
 46 
     | 
    
         
            +
            For example, in 0.4.7 Page was WWW::Page, in this release it is now
         
     | 
| 
      
 47 
     | 
    
         
            +
            WWW::Mechanize::Page.  This may break your code, but if you aren't using
         
     | 
| 
      
 48 
     | 
    
         
            +
            class names directly, everything should be fine.
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
            Body filters have been removed in favor of Pluggable Parsers.
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
       3 
52 
     | 
    
         
             
            == 0.4.7
         
     | 
| 
       4 
53 
     | 
    
         | 
| 
       5 
54 
     | 
    
         
             
            This release of mechanize comes with a few bug fixes including fixing a
         
     | 
    
        data/lib/mechanize.rb
    CHANGED
    
    | 
         @@ -17,27 +17,20 @@ require 'net/https' 
     | 
|
| 
       17 
17 
     | 
    
         
             
            require 'uri'
         
     | 
| 
       18 
18 
     | 
    
         
             
            require 'logger'
         
     | 
| 
       19 
19 
     | 
    
         
             
            require 'webrick'
         
     | 
| 
       20 
     | 
    
         
            -
            require 'date'
         
     | 
| 
       21 
20 
     | 
    
         
             
            require 'web/htmltools/xmltree'   # narf
         
     | 
| 
       22 
21 
     | 
    
         
             
            require 'mechanize/module'
         
     | 
| 
       23 
     | 
    
         
            -
            require 'mechanize/ 
     | 
| 
       24 
     | 
    
         
            -
            require 'mechanize/parsing'
         
     | 
| 
      
 22 
     | 
    
         
            +
            require 'mechanize/mech_version'
         
     | 
| 
       25 
23 
     | 
    
         
             
            require 'mechanize/cookie'
         
     | 
| 
      
 24 
     | 
    
         
            +
            require 'mechanize/errors'
         
     | 
| 
      
 25 
     | 
    
         
            +
            require 'mechanize/pluggable_parsers'
         
     | 
| 
       26 
26 
     | 
    
         
             
            require 'mechanize/form'
         
     | 
| 
       27 
27 
     | 
    
         
             
            require 'mechanize/form_elements'
         
     | 
| 
      
 28 
     | 
    
         
            +
            require 'mechanize/list'
         
     | 
| 
       28 
29 
     | 
    
         
             
            require 'mechanize/page'
         
     | 
| 
       29 
30 
     | 
    
         
             
            require 'mechanize/page_elements'
         
     | 
| 
      
 31 
     | 
    
         
            +
            require 'mechanize/parsing'
         
     | 
| 
       30 
32 
     | 
    
         | 
| 
       31 
33 
     | 
    
         
             
            module WWW
         
     | 
| 
       32 
     | 
    
         
            -
              require 'mechanize/mech_version.rb'
         
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
            class ResponseCodeError < RuntimeError
         
     | 
| 
       35 
     | 
    
         
            -
              attr_reader :response_code
         
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
              def initialize(response_code)
         
     | 
| 
       38 
     | 
    
         
            -
                @response_code = response_code
         
     | 
| 
       39 
     | 
    
         
            -
              end
         
     | 
| 
       40 
     | 
    
         
            -
            end
         
     | 
| 
       41 
34 
     | 
    
         | 
| 
       42 
35 
     | 
    
         
             
            # = Synopsis
         
     | 
| 
       43 
36 
     | 
    
         
             
            # The Mechanize library is used for automating interaction with a website.  It
         
     | 
| 
         @@ -57,7 +50,6 @@ end 
     | 
|
| 
       57 
50 
     | 
    
         
             
            #  search_results = agent.submit(search_form)
         
     | 
| 
       58 
51 
     | 
    
         
             
            #  puts search_results.body
         
     | 
| 
       59 
52 
     | 
    
         
             
            class Mechanize
         
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
53 
     | 
    
         
             
              AGENT_ALIASES = {
         
     | 
| 
       62 
54 
     | 
    
         
             
                'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
         
     | 
| 
       63 
55 
     | 
    
         
             
                'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
         
     | 
| 
         @@ -66,34 +58,51 @@ class Mechanize 
     | 
|
| 
       66 
58 
     | 
    
         
             
                'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
         
     | 
| 
       67 
59 
     | 
    
         
             
                'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
         
     | 
| 
       68 
60 
     | 
    
         
             
                'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
         
     | 
| 
       69 
     | 
    
         
            -
                'Mechanize' => "WWW-Mechanize/#{ 
     | 
| 
      
 61 
     | 
    
         
            +
                'Mechanize' => "WWW-Mechanize/#{Version} (http://rubyforge.org/projects/mechanize/)"
         
     | 
| 
       70 
62 
     | 
    
         
             
              }
         
     | 
| 
       71 
63 
     | 
    
         | 
| 
       72 
     | 
    
         
            -
              attr_accessor :log
         
     | 
| 
       73 
     | 
    
         
            -
              attr_accessor :user_agent
         
     | 
| 
       74 
64 
     | 
    
         
             
              attr_accessor :cookie_jar
         
     | 
| 
      
 65 
     | 
    
         
            +
              attr_accessor :log
         
     | 
| 
      
 66 
     | 
    
         
            +
              attr_accessor :max_history
         
     | 
| 
       75 
67 
     | 
    
         
             
              attr_accessor :open_timeout, :read_timeout
         
     | 
| 
      
 68 
     | 
    
         
            +
              attr_accessor :user_agent
         
     | 
| 
       76 
69 
     | 
    
         
             
              attr_accessor :watch_for_set
         
     | 
| 
       77 
     | 
    
         
            -
              attr_accessor :max_history
         
     | 
| 
       78 
70 
     | 
    
         
             
              attr_accessor :ca_file
         
     | 
| 
       79 
     | 
    
         
            -
              attr_accessor : 
     | 
| 
      
 71 
     | 
    
         
            +
              attr_accessor :key
         
     | 
| 
      
 72 
     | 
    
         
            +
              attr_accessor :cert
         
     | 
| 
      
 73 
     | 
    
         
            +
              attr_accessor :pass
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
       80 
75 
     | 
    
         
             
              attr_reader :history
         
     | 
| 
      
 76 
     | 
    
         
            +
              attr_reader :pluggable_parser
         
     | 
| 
       81 
77 
     | 
    
         | 
| 
       82 
78 
     | 
    
         
             
              def initialize
         
     | 
| 
       83 
     | 
    
         
            -
                 
     | 
| 
       84 
     | 
    
         
            -
                @ 
     | 
| 
       85 
     | 
    
         
            -
                @ 
     | 
| 
      
 79 
     | 
    
         
            +
                # attr_accessors
         
     | 
| 
      
 80 
     | 
    
         
            +
                @cookie_jar = CookieJar.new
         
     | 
| 
      
 81 
     | 
    
         
            +
                @log = Logger.new(nil)
         
     | 
| 
      
 82 
     | 
    
         
            +
                @max_history    = nil
         
     | 
| 
       86 
83 
     | 
    
         
             
                @open_timeout   = nil
         
     | 
| 
       87 
84 
     | 
    
         
             
                @read_timeout   = nil
         
     | 
| 
      
 85 
     | 
    
         
            +
                @user_agent     = AGENT_ALIASES['Mechanize']
         
     | 
| 
       88 
86 
     | 
    
         
             
                @watch_for_set  = nil
         
     | 
| 
       89 
     | 
    
         
            -
                @ 
     | 
| 
       90 
     | 
    
         
            -
                @ 
     | 
| 
       91 
     | 
    
         
            -
                @ 
     | 
| 
       92 
     | 
    
         
            -
                @ 
     | 
| 
      
 87 
     | 
    
         
            +
                @ca_file        = nil
         
     | 
| 
      
 88 
     | 
    
         
            +
                @cert           = nil # OpenSSL Certificate
         
     | 
| 
      
 89 
     | 
    
         
            +
                @key            = nil # OpenSSL Private Key
         
     | 
| 
      
 90 
     | 
    
         
            +
                @pass           = nil # OpenSSL Password
         
     | 
| 
      
 91 
     | 
    
         
            +
                
         
     | 
| 
      
 92 
     | 
    
         
            +
                # attr_readers
         
     | 
| 
      
 93 
     | 
    
         
            +
                @history        = []
         
     | 
| 
      
 94 
     | 
    
         
            +
                @pluggable_parser = PluggableParser.new
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                # Basic Auth variables
         
     | 
| 
      
 97 
     | 
    
         
            +
                @user           = nil # Basic Auth User
         
     | 
| 
      
 98 
     | 
    
         
            +
                @password       = nil # Basic Auth Password
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                # Proxy settings
         
     | 
| 
       93 
101 
     | 
    
         
             
                @proxy_addr     = nil
         
     | 
| 
      
 102 
     | 
    
         
            +
                @proxy_pass     = nil
         
     | 
| 
       94 
103 
     | 
    
         
             
                @proxy_port     = nil
         
     | 
| 
       95 
104 
     | 
    
         
             
                @proxy_user     = nil
         
     | 
| 
       96 
     | 
    
         
            -
             
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
       97 
106 
     | 
    
         
             
                yield self if block_given?
         
     | 
| 
       98 
107 
     | 
    
         
             
              end
         
     | 
| 
       99 
108 
     | 
    
         | 
| 
         @@ -110,13 +119,7 @@ class Mechanize 
     | 
|
| 
       110 
119 
     | 
    
         | 
| 
       111 
120 
     | 
    
         
             
              # Returns a list of cookies stored in the cookie jar.
         
     | 
| 
       112 
121 
     | 
    
         
             
              def cookies
         
     | 
| 
       113 
     | 
    
         
            -
                 
     | 
| 
       114 
     | 
    
         
            -
                @cookie_jar.jar.each_key do |domain|
         
     | 
| 
       115 
     | 
    
         
            -
                  @cookie_jar.jar[domain].each_key do |name|
         
     | 
| 
       116 
     | 
    
         
            -
                    cookies << @cookie_jar.jar[domain][name]
         
     | 
| 
       117 
     | 
    
         
            -
                  end
         
     | 
| 
       118 
     | 
    
         
            -
                end
         
     | 
| 
       119 
     | 
    
         
            -
                cookies
         
     | 
| 
      
 122 
     | 
    
         
            +
                @cookie_jar.to_a
         
     | 
| 
       120 
123 
     | 
    
         
             
              end
         
     | 
| 
       121 
124 
     | 
    
         | 
| 
       122 
125 
     | 
    
         
             
              # Sets the user and password to be used for basic authentication.
         
     | 
| 
         @@ -125,12 +128,7 @@ class Mechanize 
     | 
|
| 
       125 
128 
     | 
    
         
             
                @password = password
         
     | 
| 
       126 
129 
     | 
    
         
             
              end
         
     | 
| 
       127 
130 
     | 
    
         | 
| 
       128 
     | 
    
         
            -
               
     | 
| 
       129 
     | 
    
         
            -
                $stderr.puts "This method will be deprecated, please change to 'basic_auth'"
         
     | 
| 
       130 
     | 
    
         
            -
                basic_auth(user, password)
         
     | 
| 
       131 
     | 
    
         
            -
              end
         
     | 
| 
       132 
     | 
    
         
            -
             
     | 
| 
       133 
     | 
    
         
            -
              # Fetches the URL passed in.
         
     | 
| 
      
 131 
     | 
    
         
            +
              # Fetches the URL passed in and returns a page.
         
     | 
| 
       134 
132 
     | 
    
         
             
              def get(url)
         
     | 
| 
       135 
133 
     | 
    
         
             
                cur_page = current_page() || Page.new
         
     | 
| 
       136 
134 
     | 
    
         | 
| 
         @@ -140,31 +138,14 @@ class Mechanize 
     | 
|
| 
       140 
138 
     | 
    
         
             
                page
         
     | 
| 
       141 
139 
     | 
    
         
             
              end
         
     | 
| 
       142 
140 
     | 
    
         | 
| 
       143 
     | 
    
         
            -
              # Fetch a file and return the contents
         
     | 
| 
      
 141 
     | 
    
         
            +
              # Fetch a file and return the contents of the file.
         
     | 
| 
       144 
142 
     | 
    
         
             
              def get_file(url)
         
     | 
| 
       145 
143 
     | 
    
         
             
                get(url).body
         
     | 
| 
       146 
144 
     | 
    
         
             
              end
         
     | 
| 
       147 
145 
     | 
    
         | 
| 
       148 
     | 
    
         
            -
              # Posts to the given URL wht the query parameters passed in.
         
     | 
| 
       149 
     | 
    
         
            -
              def post(url, query={})
         
     | 
| 
       150 
     | 
    
         
            -
                cur_page = current_page() || Page.new
         
     | 
| 
       151 
     | 
    
         
            -
             
     | 
| 
       152 
     | 
    
         
            -
                request_data = [WWW::Mechanize.build_query_string(query)]
         
     | 
| 
       153 
     | 
    
         
            -
             
     | 
| 
       154 
     | 
    
         
            -
                # this is called before the request is sent
         
     | 
| 
       155 
     | 
    
         
            -
                pre_request_hook = proc {|request|
         
     | 
| 
       156 
     | 
    
         
            -
                  log.debug("query: #{ query.inspect }")
         
     | 
| 
       157 
     | 
    
         
            -
                  request.add_field('Content-Type', 'application/x-www-form-urlencoded')
         
     | 
| 
       158 
     | 
    
         
            -
                  request.add_field('Content-Length', request_data[0].size.to_s)
         
     | 
| 
       159 
     | 
    
         
            -
                }
         
     | 
| 
       160 
146 
     | 
    
         | 
| 
       161 
     | 
    
         
            -
             
     | 
| 
       162 
     | 
    
         
            -
             
     | 
| 
       163 
     | 
    
         
            -
                add_to_history(page) 
         
     | 
| 
       164 
     | 
    
         
            -
                page
         
     | 
| 
       165 
     | 
    
         
            -
              end
         
     | 
| 
       166 
     | 
    
         
            -
             
     | 
| 
       167 
     | 
    
         
            -
              # Clicks the WWW::Link object passed in.
         
     | 
| 
      
 147 
     | 
    
         
            +
              # Clicks the WWW::Mechanize::Link object passed in and returns the
         
     | 
| 
      
 148 
     | 
    
         
            +
              # page fetched.
         
     | 
| 
       168 
149 
     | 
    
         
             
              def click(link)
         
     | 
| 
       169 
150 
     | 
    
         
             
                uri = to_absolute_uri(link.href)
         
     | 
| 
       170 
151 
     | 
    
         
             
                get(uri)
         
     | 
| 
         @@ -176,23 +157,48 @@ class Mechanize 
     | 
|
| 
       176 
157 
     | 
    
         
             
                @history.pop
         
     | 
| 
       177 
158 
     | 
    
         
             
              end
         
     | 
| 
       178 
159 
     | 
    
         | 
| 
      
 160 
     | 
    
         
            +
              # Posts to the given URL wht the query parameters passed in.  Query
         
     | 
| 
      
 161 
     | 
    
         
            +
              # parameters can be passed as a hash, or as an array of arrays.
         
     | 
| 
      
 162 
     | 
    
         
            +
              # Example:
         
     | 
| 
      
 163 
     | 
    
         
            +
              #  agent.post('http://example.com/', "foo" => "bar")
         
     | 
| 
      
 164 
     | 
    
         
            +
              # or
         
     | 
| 
      
 165 
     | 
    
         
            +
              #  agent.post('http://example.com/', [ ["foo", "bar"] ])
         
     | 
| 
      
 166 
     | 
    
         
            +
              def post(url, query={})
         
     | 
| 
      
 167 
     | 
    
         
            +
                cur_page = current_page() || Page.new
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
                node = REXML::Element.new
         
     | 
| 
      
 170 
     | 
    
         
            +
                node.add_attribute('method', 'POST')
         
     | 
| 
      
 171 
     | 
    
         
            +
                node.add_attribute('enctype', 'application/x-www-form-urlencoded')
         
     | 
| 
      
 172 
     | 
    
         
            +
             
     | 
| 
      
 173 
     | 
    
         
            +
                form = Form.new(node)
         
     | 
| 
      
 174 
     | 
    
         
            +
                query.each { |k,v|
         
     | 
| 
      
 175 
     | 
    
         
            +
                  form.fields << Field.new(k,v)
         
     | 
| 
      
 176 
     | 
    
         
            +
                }
         
     | 
| 
      
 177 
     | 
    
         
            +
                post_form(url, form)
         
     | 
| 
      
 178 
     | 
    
         
            +
              end
         
     | 
| 
      
 179 
     | 
    
         
            +
             
     | 
| 
      
 180 
     | 
    
         
            +
              # Submit a form with an optional button.
         
     | 
| 
      
 181 
     | 
    
         
            +
              # Without a button:
         
     | 
| 
      
 182 
     | 
    
         
            +
              #  page = agent.get('http://example.com')
         
     | 
| 
      
 183 
     | 
    
         
            +
              #  agent.submit(page.forms.first)
         
     | 
| 
      
 184 
     | 
    
         
            +
              # With a button
         
     | 
| 
      
 185 
     | 
    
         
            +
              #  agent.submit(page.forms.first, page.forms.first.buttons.first)
         
     | 
| 
       179 
186 
     | 
    
         
             
              def submit(form, button=nil)
         
     | 
| 
       180 
187 
     | 
    
         
             
                form.add_button_to_query(button) if button
         
     | 
| 
       181 
     | 
    
         
            -
                query = form.build_query
         
     | 
| 
       182 
     | 
    
         
            -
             
     | 
| 
       183 
188 
     | 
    
         
             
                uri = to_absolute_uri(form.action)
         
     | 
| 
       184 
189 
     | 
    
         
             
                case form.method.upcase
         
     | 
| 
       185 
190 
     | 
    
         
             
                when 'POST'
         
     | 
| 
       186 
191 
     | 
    
         
             
                  post_form(uri, form) 
         
     | 
| 
       187 
192 
     | 
    
         
             
                when 'GET'
         
     | 
| 
       188 
193 
     | 
    
         
             
                  if uri.query.nil?
         
     | 
| 
       189 
     | 
    
         
            -
                    uri.query = WWW::Mechanize.build_query_string( 
     | 
| 
      
 194 
     | 
    
         
            +
                    uri.query = WWW::Mechanize.build_query_string(form.build_query)
         
     | 
| 
       190 
195 
     | 
    
         
             
                  else
         
     | 
| 
       191 
     | 
    
         
            -
                    uri.query = uri.query + "&" + 
     | 
| 
      
 196 
     | 
    
         
            +
                    uri.query = uri.query + "&" +
         
     | 
| 
      
 197 
     | 
    
         
            +
                      WWW::Mechanize.build_query_string(form.build_query)
         
     | 
| 
       192 
198 
     | 
    
         
             
                  end
         
     | 
| 
       193 
199 
     | 
    
         
             
                  get(uri)
         
     | 
| 
       194 
200 
     | 
    
         
             
                else
         
     | 
| 
       195 
     | 
    
         
            -
                  raise  
     | 
| 
      
 201 
     | 
    
         
            +
                  raise "unsupported method: #{form.method.upcase}"
         
     | 
| 
       196 
202 
     | 
    
         
             
                end
         
     | 
| 
       197 
203 
     | 
    
         
             
              end
         
     | 
| 
       198 
204 
     | 
    
         | 
| 
         @@ -203,14 +209,14 @@ class Mechanize 
     | 
|
| 
       203 
209 
     | 
    
         | 
| 
       204 
210 
     | 
    
         
             
              # Returns whether or not a url has been visited
         
     | 
| 
       205 
211 
     | 
    
         
             
              def visited?(url)
         
     | 
| 
       206 
     | 
    
         
            -
                if url.is_a?( 
     | 
| 
      
 212 
     | 
    
         
            +
                if url.is_a?(Link)
         
     | 
| 
       207 
213 
     | 
    
         
             
                  url = url.uri
         
     | 
| 
       208 
214 
     | 
    
         
             
                end
         
     | 
| 
       209 
215 
     | 
    
         
             
                uri = to_absolute_uri(url)
         
     | 
| 
       210 
216 
     | 
    
         
             
                ! @history.find { |h| h.uri.to_s == uri.to_s }.nil?
         
     | 
| 
       211 
217 
     | 
    
         
             
              end
         
     | 
| 
       212 
218 
     | 
    
         | 
| 
       213 
     | 
    
         
            -
              alias page current_page
         
     | 
| 
      
 219 
     | 
    
         
            +
              alias :page :current_page
         
     | 
| 
       214 
220 
     | 
    
         | 
| 
       215 
221 
     | 
    
         
             
              private
         
     | 
| 
       216 
222 
     | 
    
         | 
| 
         @@ -269,10 +275,15 @@ class Mechanize 
     | 
|
| 
       269 
275 
     | 
    
         | 
| 
       270 
276 
     | 
    
         
             
                if uri.scheme == 'https'
         
     | 
| 
       271 
277 
     | 
    
         
             
                  http.use_ssl = true
         
     | 
| 
      
 278 
     | 
    
         
            +
                  http.verify_mode = OpenSSL::SSL::VERIFY_NONE
         
     | 
| 
       272 
279 
     | 
    
         
             
                  if @ca_file
         
     | 
| 
       273 
280 
     | 
    
         
             
                    http.ca_file = @ca_file
         
     | 
| 
       274 
281 
     | 
    
         
             
                    http.verify_mode = OpenSSL::SSL::VERIFY_PEER
         
     | 
| 
       275 
282 
     | 
    
         
             
                  end
         
     | 
| 
      
 283 
     | 
    
         
            +
                  if @cert && @key
         
     | 
| 
      
 284 
     | 
    
         
            +
                    http.cert = OpenSSL::X509::Certificate.new(::File.read(@cert))
         
     | 
| 
      
 285 
     | 
    
         
            +
                    http.key  = OpenSSL::PKey::RSA.new(::File.read(@key), @pass)
         
     | 
| 
      
 286 
     | 
    
         
            +
                  end
         
     | 
| 
       276 
287 
     | 
    
         
             
                end
         
     | 
| 
       277 
288 
     | 
    
         | 
| 
       278 
289 
     | 
    
         | 
| 
         @@ -334,15 +345,28 @@ class Mechanize 
     | 
|
| 
       334 
345 
     | 
    
         
             
                      log.debug("header: #{ k } : #{ v }")
         
     | 
| 
       335 
346 
     | 
    
         
             
                    }
         
     | 
| 
       336 
347 
     | 
    
         | 
| 
       337 
     | 
    
         
            -
                    page.response = response
         
     | 
| 
       338 
     | 
    
         
            -
                    page.code = response.code
         
     | 
| 
       339 
     | 
    
         
            -
             
     | 
| 
       340 
348 
     | 
    
         
             
                    response.read_body
         
     | 
| 
       341 
     | 
    
         
            -
             
     | 
| 
      
 349 
     | 
    
         
            +
             
     | 
| 
      
 350 
     | 
    
         
            +
                    content_type = nil
         
     | 
| 
      
 351 
     | 
    
         
            +
                    unless response['Content-Type'].nil?
         
     | 
| 
      
 352 
     | 
    
         
            +
                      data = response['Content-Type'].match(/^([^;]*)/)
         
     | 
| 
      
 353 
     | 
    
         
            +
                      content_type = data[1].downcase unless data.nil?
         
     | 
| 
      
 354 
     | 
    
         
            +
                    end
         
     | 
| 
      
 355 
     | 
    
         
            +
             
     | 
| 
      
 356 
     | 
    
         
            +
             
     | 
| 
      
 357 
     | 
    
         
            +
                    # Find our pluggable parser
         
     | 
| 
      
 358 
     | 
    
         
            +
                    page = @pluggable_parser.parser(content_type).new(
         
     | 
| 
      
 359 
     | 
    
         
            +
                      uri,
         
     | 
| 
      
 360 
     | 
    
         
            +
                      response,
         
     | 
| 
      
 361 
     | 
    
         
            +
                      response.body,
         
     | 
| 
      
 362 
     | 
    
         
            +
                      response.code
         
     | 
| 
      
 363 
     | 
    
         
            +
                    )
         
     | 
| 
       342 
364 
     | 
    
         | 
| 
       343 
365 
     | 
    
         
             
                    log.info("status: #{ page.code }")
         
     | 
| 
       344 
366 
     | 
    
         | 
| 
       345 
     | 
    
         
            -
                    page. 
     | 
| 
      
 367 
     | 
    
         
            +
                    if page.respond_to? :watch_for_set
         
     | 
| 
      
 368 
     | 
    
         
            +
                      page.watch_for_set = @watch_for_set
         
     | 
| 
      
 369 
     | 
    
         
            +
                    end
         
     | 
| 
       346 
370 
     | 
    
         | 
| 
       347 
371 
     | 
    
         
             
                    case page.code
         
     | 
| 
       348 
372 
     | 
    
         
             
                    when "200"
         
     | 
| 
         @@ -375,15 +399,6 @@ class Mechanize 
     | 
|
| 
       375 
399 
     | 
    
         
             
                  @history = @history[@history.size - @max_history, @max_history] 
         
     | 
| 
       376 
400 
     | 
    
         
             
                end
         
     | 
| 
       377 
401 
     | 
    
         
             
              end
         
     | 
| 
       378 
     | 
    
         
            -
             
     | 
| 
       379 
     | 
    
         
            -
              class ContentTypeError < RuntimeError
         
     | 
| 
       380 
     | 
    
         
            -
                attr_reader :content_type
         
     | 
| 
       381 
     | 
    
         
            -
              
         
     | 
| 
       382 
     | 
    
         
            -
                def initialize(content_type)
         
     | 
| 
       383 
     | 
    
         
            -
                  @content_type = content_type
         
     | 
| 
       384 
     | 
    
         
            -
                end
         
     | 
| 
       385 
     | 
    
         
            -
              end
         
     | 
| 
       386 
     | 
    
         
            -
             
     | 
| 
       387 
402 
     | 
    
         
             
            end
         
     | 
| 
       388 
403 
     | 
    
         | 
| 
       389 
404 
     | 
    
         
             
            end # module WWW
         
     |