mechanize 1.0.1.beta.20110107104205 → 2.0.pre.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (89) hide show
  1. data.tar.gz.sig +2 -0
  2. data/{lib/mechanize/chain/post_connect_hook.rb → .gemtest} +0 -0
  3. data/CHANGELOG.rdoc +51 -6
  4. data/EXAMPLES.rdoc +5 -3
  5. data/GUIDE.rdoc +72 -32
  6. data/LICENSE.rdoc +20 -340
  7. data/Manifest.txt +20 -27
  8. data/README.rdoc +12 -9
  9. data/Rakefile +5 -2
  10. data/examples/spider.rb +13 -2
  11. data/lib/mechanize.rb +545 -267
  12. data/lib/mechanize/content_type_error.rb +1 -1
  13. data/lib/mechanize/cookie.rb +72 -65
  14. data/lib/mechanize/cookie_jar.rb +197 -148
  15. data/lib/mechanize/element_matcher.rb +35 -0
  16. data/lib/mechanize/file.rb +3 -1
  17. data/lib/mechanize/file_connection.rb +17 -0
  18. data/lib/mechanize/file_request.rb +26 -0
  19. data/lib/mechanize/file_response.rb +61 -47
  20. data/lib/mechanize/form.rb +57 -58
  21. data/lib/mechanize/form/image_button.rb +2 -3
  22. data/lib/mechanize/form/multi_select_list.rb +71 -55
  23. data/lib/mechanize/form/select_list.rb +34 -62
  24. data/lib/mechanize/monkey_patch.rb +13 -11
  25. data/lib/mechanize/page.rb +277 -270
  26. data/lib/mechanize/page/image.rb +6 -2
  27. data/lib/mechanize/redirect_limit_reached_error.rb +1 -1
  28. data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -1
  29. data/lib/mechanize/response_code_error.rb +3 -3
  30. data/lib/mechanize/unsupported_scheme_error.rb +1 -1
  31. data/lib/mechanize/uri_resolver.rb +82 -0
  32. data/lib/mechanize/util.rb +76 -60
  33. data/test/helper.rb +35 -5
  34. data/test/htdocs/dir with spaces/foo.html +1 -0
  35. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  36. data/test/htdocs/tc_base_images.html +10 -0
  37. data/test/htdocs/tc_images.html +8 -0
  38. data/test/htdocs/test_click.html +11 -0
  39. data/test/servlets.rb +3 -2
  40. data/test/test_authenticate.rb +5 -5
  41. data/test/test_errors.rb +8 -8
  42. data/test/test_follow_meta.rb +4 -4
  43. data/test/test_form_as_hash.rb +4 -4
  44. data/test/test_forms.rb +3 -7
  45. data/test/test_hash_api.rb +2 -2
  46. data/test/test_headers.rb +1 -1
  47. data/test/test_images.rb +19 -0
  48. data/test/test_mech.rb +6 -6
  49. data/test/test_mechanize.rb +687 -0
  50. data/test/{test_cookie_class.rb → test_mechanize_cookie.rb} +52 -45
  51. data/test/test_mechanize_cookie_jar.rb +400 -0
  52. data/test/test_mechanize_file.rb +7 -1
  53. data/test/test_mechanize_file_request.rb +19 -0
  54. data/test/test_mechanize_file_response.rb +21 -0
  55. data/test/test_mechanize_form_image_button.rb +12 -0
  56. data/test/test_mechanize_page.rb +165 -0
  57. data/test/test_mechanize_uri_resolver.rb +29 -0
  58. data/test/{test_util.rb → test_mechanize_util.rb} +1 -1
  59. data/test/test_multi_select.rb +12 -0
  60. data/test/test_post_form.rb +7 -0
  61. data/test/test_redirect_verb_handling.rb +6 -6
  62. data/test/test_scheme.rb +0 -7
  63. data/test/test_verbs.rb +3 -3
  64. metadata +106 -72
  65. metadata.gz.sig +0 -0
  66. data/lib/mechanize/chain.rb +0 -36
  67. data/lib/mechanize/chain/auth_headers.rb +0 -78
  68. data/lib/mechanize/chain/body_decoding_handler.rb +0 -50
  69. data/lib/mechanize/chain/connection_resolver.rb +0 -28
  70. data/lib/mechanize/chain/custom_headers.rb +0 -21
  71. data/lib/mechanize/chain/handler.rb +0 -9
  72. data/lib/mechanize/chain/header_resolver.rb +0 -48
  73. data/lib/mechanize/chain/parameter_resolver.rb +0 -22
  74. data/lib/mechanize/chain/pre_connect_hook.rb +0 -20
  75. data/lib/mechanize/chain/request_resolver.rb +0 -31
  76. data/lib/mechanize/chain/response_body_parser.rb +0 -36
  77. data/lib/mechanize/chain/response_header_handler.rb +0 -34
  78. data/lib/mechanize/chain/response_reader.rb +0 -39
  79. data/lib/mechanize/chain/ssl_resolver.rb +0 -40
  80. data/lib/mechanize/chain/uri_resolver.rb +0 -75
  81. data/test/chain/test_argument_validator.rb +0 -14
  82. data/test/chain/test_auth_headers.rb +0 -25
  83. data/test/chain/test_custom_headers.rb +0 -18
  84. data/test/chain/test_header_resolver.rb +0 -27
  85. data/test/chain/test_parameter_resolver.rb +0 -35
  86. data/test/chain/test_request_resolver.rb +0 -29
  87. data/test/chain/test_response_reader.rb +0 -24
  88. data/test/test_cookie_jar.rb +0 -324
  89. data/test/test_page.rb +0 -124
@@ -12,26 +12,13 @@ examples/proxy_req.rb
12
12
  examples/rubyforge.rb
13
13
  examples/spider.rb
14
14
  lib/mechanize.rb
15
- lib/mechanize/chain.rb
16
- lib/mechanize/chain/auth_headers.rb
17
- lib/mechanize/chain/body_decoding_handler.rb
18
- lib/mechanize/chain/connection_resolver.rb
19
- lib/mechanize/chain/custom_headers.rb
20
- lib/mechanize/chain/handler.rb
21
- lib/mechanize/chain/header_resolver.rb
22
- lib/mechanize/chain/parameter_resolver.rb
23
- lib/mechanize/chain/post_connect_hook.rb
24
- lib/mechanize/chain/pre_connect_hook.rb
25
- lib/mechanize/chain/request_resolver.rb
26
- lib/mechanize/chain/response_body_parser.rb
27
- lib/mechanize/chain/response_header_handler.rb
28
- lib/mechanize/chain/response_reader.rb
29
- lib/mechanize/chain/ssl_resolver.rb
30
- lib/mechanize/chain/uri_resolver.rb
31
15
  lib/mechanize/content_type_error.rb
32
16
  lib/mechanize/cookie.rb
33
17
  lib/mechanize/cookie_jar.rb
18
+ lib/mechanize/element_matcher.rb
34
19
  lib/mechanize/file.rb
20
+ lib/mechanize/file_connection.rb
21
+ lib/mechanize/file_request.rb
35
22
  lib/mechanize/file_response.rb
36
23
  lib/mechanize/file_saver.rb
37
24
  lib/mechanize/form.rb
@@ -60,14 +47,8 @@ lib/mechanize/redirect_limit_reached_error.rb
60
47
  lib/mechanize/redirect_not_get_or_head_error.rb
61
48
  lib/mechanize/response_code_error.rb
62
49
  lib/mechanize/unsupported_scheme_error.rb
50
+ lib/mechanize/uri_resolver.rb
63
51
  lib/mechanize/util.rb
64
- test/chain/test_argument_validator.rb
65
- test/chain/test_auth_headers.rb
66
- test/chain/test_custom_headers.rb
67
- test/chain/test_header_resolver.rb
68
- test/chain/test_parameter_resolver.rb
69
- test/chain/test_request_resolver.rb
70
- test/chain/test_response_reader.rb
71
52
  test/data/htpasswd
72
53
  test/data/server.crt
73
54
  test/data/server.csr
@@ -77,6 +58,7 @@ test/helper.rb
77
58
  test/htdocs/alt_text.html
78
59
  test/htdocs/bad_form_test.html
79
60
  test/htdocs/button.jpg
61
+ test/htdocs/dir with spaces/foo.html
80
62
  test/htdocs/empty_form.html
81
63
  test/htdocs/file_upload.html
82
64
  test/htdocs/find_link.html
@@ -97,9 +79,11 @@ test/htdocs/index.html
97
79
  test/htdocs/link with space.html
98
80
  test/htdocs/meta_cookie.html
99
81
  test/htdocs/no_title_test.html
82
+ test/htdocs/rails_3_encoding_hack_form_test.html
100
83
  test/htdocs/relative/tc_relative_links.html
101
84
  test/htdocs/tc_bad_charset.html
102
85
  test/htdocs/tc_bad_links.html
86
+ test/htdocs/tc_base_images.html
103
87
  test/htdocs/tc_base_link.html
104
88
  test/htdocs/tc_blank_form.html
105
89
  test/htdocs/tc_charset.html
@@ -108,6 +92,7 @@ test/htdocs/tc_encoded_links.html
108
92
  test/htdocs/tc_field_precedence.html
109
93
  test/htdocs/tc_follow_meta.html
110
94
  test/htdocs/tc_form_action.html
95
+ test/htdocs/tc_images.html
111
96
  test/htdocs/tc_links.html
112
97
  test/htdocs/tc_meta_in_body.html
113
98
  test/htdocs/tc_no_attributes.html
@@ -117,6 +102,7 @@ test/htdocs/tc_referer.html
117
102
  test/htdocs/tc_relative_links.html
118
103
  test/htdocs/tc_textarea.html
119
104
  test/htdocs/test_bad_encoding.html
105
+ test/htdocs/test_click.html
120
106
  test/htdocs/unusual______.html
121
107
  test/servlets.rb
122
108
  test/ssl_server.rb
@@ -125,8 +111,6 @@ test/test_bad_links.rb
125
111
  test/test_blank_form.rb
126
112
  test/test_checkboxes.rb
127
113
  test/test_content_type.rb
128
- test/test_cookie_class.rb
129
- test/test_cookie_jar.rb
130
114
  test/test_cookies.rb
131
115
  test/test_encoded_links.rb
132
116
  test/test_errors.rb
@@ -141,19 +125,29 @@ test/test_frames.rb
141
125
  test/test_get_headers.rb
142
126
  test/test_gzipping.rb
143
127
  test/test_hash_api.rb
128
+ test/test_headers.rb
144
129
  test/test_history.rb
145
130
  test/test_history_added.rb
146
131
  test/test_html_unscape_forms.rb
147
132
  test/test_if_modified_since.rb
133
+ test/test_images.rb
148
134
  test/test_links.rb
149
135
  test/test_mech.rb
150
136
  test/test_mech_proxy.rb
137
+ test/test_mechanize.rb
138
+ test/test_mechanize_cookie.rb
139
+ test/test_mechanize_cookie_jar.rb
151
140
  test/test_mechanize_file.rb
141
+ test/test_mechanize_file_request.rb
142
+ test/test_mechanize_file_response.rb
143
+ test/test_mechanize_form_image_button.rb
144
+ test/test_mechanize_page.rb
145
+ test/test_mechanize_uri_resolver.rb
146
+ test/test_mechanize_util.rb
152
147
  test/test_meta.rb
153
148
  test/test_multi_select.rb
154
149
  test/test_no_attributes.rb
155
150
  test/test_option.rb
156
- test/test_page.rb
157
151
  test/test_pluggable_parser.rb
158
152
  test/test_post_form.rb
159
153
  test/test_pretty_print.rb
@@ -175,5 +169,4 @@ test/test_ssl_server.rb
175
169
  test/test_subclass.rb
176
170
  test/test_textarea.rb
177
171
  test/test_upload.rb
178
- test/test_util.rb
179
172
  test/test_verbs.rb
@@ -5,7 +5,7 @@
5
5
 
6
6
  == DESCRIPTION
7
7
 
8
- The Mechanize library is used for automating interaction with websites.
8
+ The Mechanize library is used for automating interaction with websites.
9
9
  Mechanize automatically stores and sends cookies, follows redirects,
10
10
  can follow links, and submit forms. Form fields can be populated and
11
11
  submitted. Mechanize also keeps track of the sites that you have visited as
@@ -13,7 +13,7 @@ a history.
13
13
 
14
14
  == Dependencies
15
15
 
16
- * ruby 1.8.6
16
+ * ruby 1.8.7
17
17
  * nokogiri[http://nokogiri.rubyforge.org]
18
18
 
19
19
  == SUPPORT:
@@ -28,21 +28,24 @@ The bug tracker is available here:
28
28
 
29
29
  == Examples
30
30
 
31
- If you are just starting, check out the GUIDE.
31
+ If you are just starting, check out the GUIDE.
32
32
  Also, check out the EXAMPLES file.
33
33
 
34
34
  == Authors
35
35
 
36
- Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
37
 
38
- Copyright (c) 2006-2010:
38
+ Copyright (c) 2006-2011:
39
39
 
40
40
  * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
41
  * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
42
 
43
+ Copyright (c) 2011:
44
+
45
+ * {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net)
46
+
43
47
  This library comes with a shameless plug for employing me
44
- (Aaron[http://tenderlovemaking.com/]) programming
45
- Ruby, my favorite language!
48
+ (Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language!
46
49
 
47
50
  == Acknowledgments
48
51
 
@@ -53,8 +56,8 @@ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mech
53
56
  Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
54
57
  who's helped out in various ways. Finally, thank you to the people using this
55
58
  library!
56
-
59
+
57
60
  == License
58
61
 
59
- This library is distributed under the GPL. Please see the LICENSE file.
62
+ This library is distributed under the MIT license. Please see the LICENSE file.
60
63
 
data/Rakefile CHANGED
@@ -5,14 +5,17 @@ Hoe.plugin :gemspec
5
5
  Hoe.plugin :git
6
6
 
7
7
  Hoe.spec 'mechanize' do
8
+ developer 'Eric Hodel', 'drbrain@segment7.net'
8
9
  developer 'Aaron Patterson', 'aaronp@rubyforge.org'
9
10
  developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
10
11
 
11
12
  self.readme_file = 'README.rdoc'
12
13
  self.history_file = 'CHANGELOG.rdoc'
13
14
  self.extra_rdoc_files += Dir['*.rdoc']
14
- self.extra_deps << ['nokogiri', '>= 1.2.1']
15
- self.extra_deps << ['net-http-persistent', '~> 1.1']
15
+ self.extra_deps << ['nokogiri', '~> 1.4']
16
+ self.extra_deps << ['net-http-persistent', '~> 1.6']
17
+ self.extra_deps << ['net-http-digest_auth', '~> 1.1', '>= 1.1.1']
18
+ self.spec_extras[:required_ruby_version] = '>= 1.8.7'
16
19
  end
17
20
 
18
21
  desc "Update SSL Certificate"
@@ -5,7 +5,18 @@ require 'mechanize'
5
5
 
6
6
  agent = Mechanize.new
7
7
  stack = agent.get(ARGV[0]).links
8
+
8
9
  while l = stack.pop
9
- next unless l.uri.host == agent.history.first.uri.host
10
- stack.push(*(agent.click(l).links)) unless agent.visited? l.href
10
+ host = l.uri.host
11
+ next unless host.nil? or host == agent.history.first.uri.host
12
+ next if agent.visited? l.href
13
+
14
+ puts "crawling #{l.uri}"
15
+ begin
16
+ page = agent.click(l)
17
+ next unless Mechanize::Page === page
18
+ stack.push(*page.links)
19
+ rescue Mechanize::ResponseCodeError
20
+ end
11
21
  end
22
+
@@ -1,34 +1,17 @@
1
- require 'openssl'
2
- require 'net/http/persistent'
3
- require 'uri'
4
- require 'webrick/httputils'
5
- require 'zlib'
6
- require 'stringio'
7
- require 'digest/md5'
8
1
  require 'fileutils'
9
- require 'nokogiri'
10
2
  require 'forwardable'
11
-
12
3
  require 'iconv' if RUBY_VERSION < '1.9.2'
13
-
14
- require 'nkf'
15
4
  require 'mutex_m'
5
+ require 'net/http/digest_auth'
6
+ require 'net/http/persistent'
7
+ require 'nkf'
8
+ require 'nokogiri'
9
+ require 'openssl'
10
+ require 'stringio'
11
+ require 'uri'
12
+ require 'webrick/httputils'
13
+ require 'zlib'
16
14
 
17
- require 'mechanize/util'
18
- require 'mechanize/content_type_error'
19
- require 'mechanize/response_code_error'
20
- require 'mechanize/unsupported_scheme_error'
21
- require 'mechanize/redirect_limit_reached_error'
22
- require 'mechanize/redirect_not_get_or_head_error'
23
- require 'mechanize/cookie'
24
- require 'mechanize/cookie_jar'
25
- require 'mechanize/history'
26
- require 'mechanize/form'
27
- require 'mechanize/pluggable_parsers'
28
- require 'mechanize/file_response'
29
- require 'mechanize/inspect'
30
- require 'mechanize/chain'
31
- require 'mechanize/monkey_patch'
32
15
 
33
16
  # = Synopsis
34
17
  # The Mechanize library is used for automating interaction with a website. It
@@ -48,12 +31,22 @@ require 'mechanize/monkey_patch'
48
31
  # search_results = agent.submit(search_form)
49
32
  # puts search_results.body
50
33
  class Mechanize
34
+
51
35
  ##
52
36
  # The version of Mechanize you are using.
53
- VERSION = '1.0.1.beta'
37
+ VERSION = '2.0'
38
+
39
+ class Error < RuntimeError
40
+ end
54
41
 
42
+ ruby_version = if RUBY_PATCHLEVEL >= 0 then
43
+ "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
44
+ else
45
+ "#{RUBY_VERSION}dev#{RUBY_REVISION}"
46
+ end
55
47
  ##
56
48
  # User Agent aliases
49
+
57
50
  AGENT_ALIASES = {
58
51
  'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
59
52
  'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
@@ -65,16 +58,35 @@ class Mechanize
65
58
  'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
66
59
  'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
67
60
  'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
68
- 'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
61
+ 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
69
62
  }
70
63
 
64
+ # A Mechanize::CookieJar which stores cookies
71
65
  attr_accessor :cookie_jar
72
- attr_accessor :open_timeout, :read_timeout
66
+
67
+ # Length of time to wait until a connection is opened in seconds
68
+ attr_accessor :open_timeout
69
+
70
+ # Length of time to attempt to read data from the server
71
+ attr_accessor :read_timeout
72
+
73
+ # The identification string for the client initiating a web request
73
74
  attr_accessor :user_agent
75
+
76
+ # The value of watch_for_set is passed to pluggable parsers for retrieved
77
+ # content
74
78
  attr_accessor :watch_for_set
79
+
80
+ # Path to an OpenSSL server certificate file
75
81
  attr_accessor :ca_file
82
+
83
+ # An OpenSSL private key or the path to a private key
76
84
  attr_accessor :key
85
+
86
+ # An OpenSSL client certificate or the path to a certificate file.
77
87
  attr_accessor :cert
88
+
89
+ # OpenSSL key password
78
90
  attr_accessor :pass
79
91
 
80
92
  # Controls how this agent deals with redirects. If it is set to
@@ -84,12 +96,25 @@ class Mechanize
84
96
  # redirects are followed.
85
97
  attr_accessor :redirect_ok
86
98
 
99
+ # Disables HTTP/1.1 gzip compression (enabled by default)
87
100
  attr_accessor :gzip_enabled
101
+
102
+ # HTTP/1.0 keep-alive time
88
103
  attr_accessor :keep_alive_time
104
+
105
+ # HTTP/1.1 keep-alives are always active. This does nothing.
89
106
  attr_accessor :keep_alive
107
+
108
+ # Disables If-Modified-Since conditional requests (enabled by default)
90
109
  attr_accessor :conditional_requests
110
+
111
+ # Follow HTML meta refresh
91
112
  attr_accessor :follow_meta_refresh
113
+
114
+ # A callback for additional certificate verification. See
115
+ # OpenSSL::SSL::SSLContext#verify_callback
92
116
  attr_accessor :verify_callback
117
+
93
118
  attr_accessor :history_added
94
119
  attr_accessor :scheme_handlers
95
120
  attr_accessor :redirection_limit
@@ -111,6 +136,16 @@ class Mechanize
111
136
  attr_reader :history
112
137
  attr_reader :pluggable_parser
113
138
 
139
+ # A list of hooks to call after retrieving a response. Hooks are called with
140
+ # the agent and the response returned.
141
+
142
+ attr_reader :post_connect_hooks
143
+
144
+ # A list of hooks to call before making a request. Hooks are called with
145
+ # the agent and the request to be performed.
146
+
147
+ attr_reader :pre_connect_hooks
148
+
114
149
  alias :follow_redirect? :redirect_ok
115
150
 
116
151
  @html_parser = Nokogiri::HTML
@@ -138,6 +173,7 @@ class Mechanize
138
173
  # callback for OpenSSL errors while verifying the server certificate
139
174
  # chain, can be used for debugging or to ignore errors by always
140
175
  # returning _true_
176
+ # specifying nil uses the default method that was valid when the SSL was created
141
177
  @verify_callback = nil
142
178
  @cert = nil # OpenSSL Certificate
143
179
  @key = nil # OpenSSL Private Key
@@ -153,6 +189,7 @@ class Mechanize
153
189
  @user = nil # Auth User
154
190
  @password = nil # Auth Password
155
191
  @digest = nil # DigestAuth Digest
192
+ @digest_auth = Net::HTTP::DigestAuth.new
156
193
  @auth_hash = {} # Keep track of urls for sending auth
157
194
  @request_headers= {} # A hash of request headers to be used
158
195
 
@@ -165,23 +202,27 @@ class Mechanize
165
202
  @keep_alive_time = 300
166
203
  @keep_alive = true
167
204
 
168
- @scheme_handlers = Hash.new { |h,k|
169
- h[k] = lambda { |link, page|
170
- raise UnsupportedSchemeError.new(k)
171
- }
172
- }
173
- @scheme_handlers['http'] = lambda { |link, page| link }
174
- @scheme_handlers['https'] = @scheme_handlers['http']
175
- @scheme_handlers['relative'] = @scheme_handlers['http']
176
- @scheme_handlers['file'] = @scheme_handlers['http']
205
+ # Proxy
206
+ @proxy_addr = nil
207
+ @proxy_port = nil
208
+ @proxy_user = nil
209
+ @proxy_pass = nil
210
+
211
+ @resolver = Mechanize::URIResolver.new
212
+ @scheme_handlers = @resolver.scheme_handlers
177
213
 
178
- @pre_connect_hook = Chain::PreConnectHook.new
179
- @post_connect_hook = Chain::PostConnectHook.new
214
+ @pre_connect_hooks = []
215
+ @post_connect_hooks = []
180
216
 
181
- set_http
182
217
  @html_parser = self.class.html_parser
183
218
 
184
219
  yield self if block_given?
220
+
221
+ if @proxy_addr and @proxy_pass then
222
+ set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
223
+ else
224
+ set_http
225
+ end
185
226
  end
186
227
 
187
228
  def max_history=(length); @history.max_size = length end
@@ -189,14 +230,6 @@ class Mechanize
189
230
  def log=(l); self.class.log = l end
190
231
  def log; self.class.log end
191
232
 
192
- def pre_connect_hooks
193
- @pre_connect_hook.hooks
194
- end
195
-
196
- def post_connect_hooks
197
- @post_connect_hook.hooks
198
- end
199
-
200
233
  # Sets the proxy address, port, user, and password
201
234
  # +addr+ should be a host, with no "http://"
202
235
  def set_proxy(addr, port, user = nil, pass = nil)
@@ -213,7 +246,8 @@ class Mechanize
213
246
  # Set the user agent for the Mechanize object.
214
247
  # See AGENT_ALIASES
215
248
  def user_agent_alias=(al)
216
- self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
249
+ @user_agent = AGENT_ALIASES[al] ||
250
+ raise(ArgumentError, "unknown agent alias")
217
251
  end
218
252
 
219
253
  # Returns a list of cookies stored in the cookie jar.
@@ -230,7 +264,7 @@ class Mechanize
230
264
 
231
265
  # Fetches the URL passed in and returns a page.
232
266
  def get(options, parameters = [], referer = nil)
233
- verb = :get
267
+ method = :get
234
268
 
235
269
  unless options.is_a? Hash
236
270
  url = options
@@ -239,11 +273,11 @@ class Mechanize
239
273
  parameters = []
240
274
  end
241
275
  else
242
- raise ArgumentError.new("url must be specified") unless url = options[:url]
276
+ raise ArgumentError, "url must be specified" unless url = options[:url]
243
277
  parameters = options[:params] || []
244
278
  referer = options[:referer]
245
279
  headers = options[:headers]
246
- verb = options[:verb] || verb
280
+ method = options[:verb] || method
247
281
  end
248
282
 
249
283
  unless referer
@@ -264,51 +298,41 @@ class Mechanize
264
298
  end
265
299
 
266
300
  # fetch the page
267
- page = fetch_page( :uri => url,
268
- :referer => referer,
269
- :headers => headers || {},
270
- :verb => verb,
271
- :params => parameters
272
- )
301
+ headers ||= {}
302
+ page = fetch_page url, method, headers, parameters, referer
273
303
  add_to_history(page)
274
304
  yield page if block_given?
275
305
  page
276
306
  end
277
307
 
278
- ####
279
- # PUT to +url+ with +entity+, and setting +options+:
308
+ ##
309
+ # PUT to +url+ with +entity+, and setting +headers+:
280
310
  #
281
- # put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
311
+ # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
282
312
  #
283
- def put(url, entity, options = {})
284
- request_with_entity(:put, url, entity, options)
313
+ def put(url, entity, headers = {})
314
+ request_with_entity(:put, url, entity, headers)
285
315
  end
286
316
 
287
- ####
288
- # DELETE to +url+ with +query_params+, and setting +options+:
317
+ ##
318
+ # DELETE to +url+ with +query_params+, and setting +headers+:
289
319
  #
290
- # delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
320
+ # delete('http://example/', {'q' => 'foo'}, {})
291
321
  #
292
- def delete(url, query_params = {}, options = {})
293
- page = head(url, query_params, options.merge({:verb => :delete}))
322
+ def delete(uri, query_params = {}, headers = {})
323
+ page = fetch_page(uri, :delete, headers, query_params)
294
324
  add_to_history(page)
295
325
  page
296
326
  end
297
327
 
298
- ####
299
- # HEAD to +url+ with +query_params+, and setting +options+:
328
+ ##
329
+ # HEAD to +url+ with +query_params+, and setting +headers+:
300
330
  #
301
- # head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
331
+ # head('http://example/', {'q' => 'foo'}, {})
302
332
  #
303
- def head(url, query_params = {}, options = {})
304
- options = {
305
- :uri => url,
306
- :headers => {},
307
- :params => query_params,
308
- :verb => :head
309
- }.merge(options)
333
+ def head(uri, query_params = {}, headers = {})
310
334
  # fetch the page
311
- page = fetch_page(options)
335
+ page = fetch_page(uri, :head, headers, query_params)
312
336
  yield page if block_given?
313
337
  page
314
338
  end
@@ -360,7 +384,7 @@ class Mechanize
360
384
  # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
361
385
  def post(url, query={}, headers={})
362
386
  if query.is_a?(String)
363
- return request_with_entity(:post, url, query, :headers => headers)
387
+ return request_with_entity(:post, url, query, headers)
364
388
  end
365
389
  node = {}
366
390
  # Create a fake form
@@ -371,7 +395,8 @@ class Mechanize
371
395
  node['enctype'] = 'application/x-www-form-urlencoded'
372
396
 
373
397
  form = Form.new(node)
374
- query.each { |k,v|
398
+
399
+ query.each { |k, v|
375
400
  if v.is_a?(IO)
376
401
  form.enctype = 'multipart/form-data'
377
402
  ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
@@ -402,31 +427,19 @@ class Mechanize
402
427
  :referer => form.page
403
428
  )
404
429
  else
405
- raise "unsupported method: #{form.method.upcase}"
430
+ raise ArgumentError, "unsupported method: #{form.method.upcase}"
406
431
  end
407
432
  end
408
433
 
409
- def request_with_entity(verb, url, entity, options={})
410
- cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
411
-
412
- options = {
413
- :uri => url,
414
- :referer => cur_page,
415
- :headers => {},
416
- }.update(options)
434
+ def request_with_entity(verb, uri, entity, headers = {})
435
+ cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
417
436
 
418
437
  headers = {
419
438
  'Content-Type' => 'application/octet-stream',
420
439
  'Content-Length' => entity.size.to_s,
421
- }.update(options[:headers])
422
-
423
- options.update({
424
- :verb => verb,
425
- :params => [entity],
426
- :headers => headers,
427
- })
440
+ }.update headers
428
441
 
429
- page = fetch_page(options)
442
+ page = fetch_page uri, verb, headers, [entity], cur_page
430
443
  add_to_history(page)
431
444
  page
432
445
  end
@@ -462,14 +475,347 @@ class Mechanize
462
475
 
463
476
  alias :page :current_page
464
477
 
478
+ def connection_for uri
479
+ case uri.scheme.downcase
480
+ when 'http', 'https' then
481
+ return @http
482
+ when 'file' then
483
+ return Mechanize::FileConnection.new
484
+ end
485
+ end
486
+
487
+ def enable_gzip request
488
+ request['accept-encoding'] = if @gzip_enabled
489
+ 'gzip,deflate,identity'
490
+ else
491
+ 'identity'
492
+ end
493
+ end
494
+
495
+ def http_request uri, method, params = nil
496
+ case uri.scheme.downcase
497
+ when 'http', 'https' then
498
+ klass = Net::HTTP.const_get(method.to_s.capitalize)
499
+
500
+ request ||= klass.new(uri.request_uri)
501
+ request.body = params.first if params
502
+
503
+ request
504
+ when 'file' then
505
+ Mechanize::FileRequest.new uri
506
+ end
507
+ end
508
+
509
+ ##
510
+ # Invokes hooks added to post_connect_hooks after a +response+ is returned.
511
+ # Yields the +agent+ and the +response+ returned to each hook.
512
+
513
+ def post_connect response # :yields: agent, response
514
+ @post_connect_hooks.each do |hook|
515
+ hook.call self, response
516
+ end
517
+ end
518
+
519
+ ##
520
+ # Invokes hooks added to pre_connect_hooks before a +request+ is made.
521
+ # Yields the +agent+ and the +request+ that will be performed to each hook.
522
+
523
+ def pre_connect request # :yields: agent, request
524
+ @pre_connect_hooks.each do |hook|
525
+ hook.call self, request
526
+ end
527
+ end
528
+
529
+ def request_auth request, uri
530
+ auth_type = @auth_hash[uri.host]
531
+
532
+ return unless auth_type
533
+
534
+ case auth_type
535
+ when :basic
536
+ request.basic_auth @user, @password
537
+ when :digest, :iis_digest
538
+ uri.user = @user
539
+ uri.password = @password
540
+
541
+ iis = auth_type == :iis_digest
542
+
543
+ auth = @digest_auth.auth_header uri, @digest, request.method, iis
544
+
545
+ request['Authorization'] = auth
546
+ end
547
+ end
548
+
549
+ def request_cookies request, uri
550
+ return if @cookie_jar.empty? uri
551
+
552
+ cookies = @cookie_jar.cookies uri
553
+
554
+ return if cookies.empty?
555
+
556
+ request.add_field 'Cookie', cookies.join('; ')
557
+ end
558
+
559
+ def request_host request, uri
560
+ port = [80, 443].include?(uri.port.to_i) ? nil : uri.port
561
+ host = uri.host
562
+
563
+ request['Host'] = [host, port].compact.join ':'
564
+ end
565
+
566
+ def request_language_charset request
567
+ request['accept-charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
568
+ request['accept-language'] = 'en-us,en;q=0.5'
569
+ end
570
+
571
+ # Log specified headers for the request
572
+ def request_log request
573
+ return unless log
574
+
575
+ log.info("#{request.class}: #{request.path}")
576
+
577
+ request.each_header do |k, v|
578
+ log.debug("request-header: #{k} => #{v}")
579
+ end
580
+ end
581
+
582
+ def request_add_headers request, headers = {}
583
+ @request_headers.each do |k,v|
584
+ request[k] = v
585
+ end
586
+
587
+ headers.each do |field, value|
588
+ case field
589
+ when :etag then request["ETag"] = value
590
+ when :if_modified_since then request["If-Modified-Since"] = value
591
+ when Symbol then
592
+ raise ArgumentError, "unknown header symbol #{field}"
593
+ else
594
+ request[field] = value
595
+ end
596
+ end
597
+ end
598
+
599
+ def request_referer request, uri, referer
600
+ return unless referer
601
+ return if 'https' == referer.scheme.downcase and
602
+ 'https' != uri.scheme.downcase
603
+
604
+ request['Referer'] = referer
605
+ end
606
+
607
+ def request_user_agent request
608
+ request['User-Agent'] = @user_agent if @user_agent
609
+ end
610
+
611
+ def resolve_parameters uri, method, parameters
612
+ case method
613
+ when :head, :get, :delete, :trace then
614
+ if parameters and parameters.length > 0
615
+ uri.query ||= ''
616
+ uri.query << '&' if uri.query.length > 0
617
+ uri.query << Mechanize::Util.build_query_string(parameters)
618
+ end
619
+
620
+ return uri, nil
621
+ end
622
+
623
+ return uri, parameters
624
+ end
625
+
626
+ def response_cookies response, uri, page
627
+ if Mechanize::Page === page and page.body =~ /Set-Cookie/n
628
+ page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
629
+ Mechanize::Cookie.parse(uri, meta['content']) { |c|
630
+ log.debug("saved cookie: #{c}") if log
631
+ @cookie_jar.add(uri, c)
632
+ }
633
+ end
634
+ end
635
+
636
+ header_cookies = response.get_fields 'Set-Cookie'
637
+
638
+ return unless header_cookies
639
+
640
+ header_cookies.each do |cookie|
641
+ Mechanize::Cookie.parse(uri, cookie) { |c|
642
+ log.debug("saved cookie: #{c}") if log
643
+ @cookie_jar.add(uri, c)
644
+ }
645
+ end
646
+ end
647
+
648
+ def response_follow_meta_refresh response, uri, page, redirects
649
+ return unless @follow_meta_refresh
650
+
651
+ redirect_uri = nil
652
+ referer = page
653
+
654
+ if page.respond_to?(:meta) and (redirect = page.meta.first)
655
+ redirect_uri = Mechanize::Util.uri_unescape redirect.uri.to_s
656
+ sleep redirect.node['delay'].to_f
657
+ referer = Page.new(nil, {'content-type'=>'text/html'})
658
+ elsif refresh = response['refresh']
659
+ delay, redirect_uri = Page::Meta.parse(refresh, uri)
660
+ raise Mechanize::Error, 'Invalid refresh http header' unless delay
661
+ raise RedirectLimitReachedError.new(page, redirects) if
662
+ redirects + 1 > redirection_limit
663
+ sleep delay.to_f
664
+ end
665
+
666
+ if redirect_uri
667
+ @history.push(page, page.uri)
668
+ fetch_page(redirect_uri, :get, {}, [], referer, redirects + 1)
669
+ end
670
+ end
671
+
672
+ def response_log response
673
+ return unless log
674
+
675
+ log.info("status: #{response.class} #{response.http_version} " \
676
+ "#{response.code} #{response.message}")
677
+
678
+ response.each_header do |k, v|
679
+ log.debug("response-header: #{k} => #{v}")
680
+ end
681
+ end
682
+
683
+ def response_parse response, body, uri
684
+ content_type = nil
685
+
686
+ unless response['Content-Type'].nil?
687
+ data, = response['Content-Type'].split ';', 2
688
+ content_type, = data.downcase.split ',', 2 unless data.nil?
689
+ end
690
+
691
+ # Find our pluggable parser
692
+ parser_klass = @pluggable_parser.parser(content_type)
693
+
694
+ parser_klass.new(uri, response, body, response.code) { |parser|
695
+ parser.mech = self if parser.respond_to? :mech=
696
+ if @watch_for_set and parser.respond_to?(:watch_for_set=)
697
+ parser.watch_for_set = @watch_for_set
698
+ end
699
+ }
700
+ end
701
+
702
+ def response_read response, request
703
+ body = StringIO.new
704
+ body.set_encoding Encoding::BINARY if body.respond_to? :set_encoding
705
+ total = 0
706
+
707
+ response.read_body { |part|
708
+ total += part.length
709
+ body.write(part)
710
+ log.debug("Read #{total} bytes") if log
711
+ }
712
+
713
+ body.rewind
714
+
715
+ raise Mechanize::ResponseCodeError, response if
716
+ Net::HTTPUnknownResponse === response
717
+
718
+ content_length = response.content_length
719
+
720
+ unless Net::HTTP::Head === request or Net::HTTPRedirection === response then
721
+ raise EOFError, "Content-Length (#{content_length}) does not match " \
722
+ "response body length (#{body.length})" if
723
+ content_length and content_length != body.length
724
+ end
725
+
726
+ case response['Content-Encoding']
727
+ when nil, 'none', '7bit' then
728
+ body.string
729
+ when 'deflate' then
730
+ log.debug('deflate body') if log
731
+
732
+ if content_length > 0 or body.length > 0 then
733
+ begin
734
+ Zlib::Inflate.inflate body.string
735
+ rescue Zlib::BufError, Zlib::DataError
736
+ log.error('Unable to inflate page, retrying with raw deflate') if log
737
+ begin
738
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.string)
739
+ rescue Zlib::BufError, Zlib::DataError
740
+ log.error("unable to inflate page: #{$!}") if log
741
+ ''
742
+ end
743
+ end
744
+ end
745
+ when 'gzip', 'x-gzip' then
746
+ log.debug('gzip body') if log
747
+
748
+ if content_length > 0 or body.length > 0 then
749
+ begin
750
+ zio = Zlib::GzipReader.new body
751
+ zio.read
752
+ rescue Zlib::BufError, Zlib::GzipFile::Error
753
+ log.error('Unable to gunzip body, trying raw inflate') if log
754
+ body.rewind
755
+ body.read 10
756
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
757
+ rescue Zlib::DataError
758
+ log.error("unable to gunzip page: #{$!}") if log
759
+ ''
760
+ ensure
761
+ zio.close if zio and not zio.closed?
762
+ end
763
+ end
764
+ else
765
+ raise Mechanize::Error,
766
+ "Unsupported Content-Encoding: #{response['Content-Encoding']}"
767
+ end
768
+ end
769
+
770
+ def response_redirect response, method, page, redirects
771
+ case @redirect_ok
772
+ when true, :all
773
+ # shortcut
774
+ when false, nil
775
+ return page
776
+ when :permanent
777
+ return page if response_class != Net::HTTPMovedPermanently
778
+ end
779
+
780
+ log.info("follow redirect to: #{response['Location']}") if log
781
+
782
+ from_uri = page.uri
783
+
784
+ raise RedirectLimitReachedError.new(page, redirects) if
785
+ redirects + 1 > redirection_limit
786
+
787
+ redirect_method = method == :head ? :head : :get
788
+
789
+ page = fetch_page(response['Location'].to_s, redirect_method, {}, [],
790
+ page, redirects + 1)
791
+
792
+ @history.push(page, from_uri)
793
+
794
+ return page
795
+ end
796
+
797
+ def response_authenticate(response, page, uri, request, headers, params,
798
+ referer)
799
+ raise ResponseCodeError, page unless @user || @password
800
+ raise ResponseCodeError, page if @auth_hash.has_key?(uri.host)
801
+
802
+ if response['www-authenticate'] =~ /Digest/i
803
+ @auth_hash[uri.host] = :digest
804
+ if response['server'] =~ /Microsoft-IIS/
805
+ @auth_hash[uri.host] = :iis_digest
806
+ end
807
+ @digest = response['www-authenticate']
808
+ else
809
+ @auth_hash[uri.host] = :basic
810
+ end
811
+
812
+ fetch_page(uri, request.method.downcase.to_sym, headers, params, referer)
813
+ end
814
+
465
815
  private
466
816
 
467
817
  def resolve(url, referer = current_page())
468
- hash = { :uri => url, :referer => referer }
469
- Chain.new([
470
- Chain::URIResolver.new(@scheme_handlers)
471
- ]).handle(hash)
472
- hash[:uri].to_s
818
+ @resolver.resolve(url, referer).to_s
473
819
  end
474
820
 
475
821
  def set_http proxy = nil
@@ -481,184 +827,113 @@ class Mechanize
481
827
  @http.verify_callback = @verify_callback
482
828
 
483
829
  if @cert and @key then
484
- @http.certificate = OpenSSL::X509::Certificate.new ::File.read(@cert)
485
- @http.private_key = OpenSSL::PKey::RSA.new ::File.read(@key), @pass
830
+ cert = if OpenSSL::X509::Certificate === @cert then
831
+ @cert
832
+ else
833
+ OpenSSL::X509::Certificate.new ::File.read @cert
834
+ end
835
+
836
+ key = if OpenSSL::PKey::PKey === @key then
837
+ @key
838
+ else
839
+ OpenSSL::PKey::RSA.new ::File.read(@key), @pass
840
+ end
841
+
842
+ @http.certificate = cert
843
+ @http.private_key = key
486
844
  end
487
845
  end
488
846
 
489
- def post_form(url, form, headers = {})
847
+ def post_form(uri, form, headers = {})
490
848
  cur_page = form.page || current_page ||
491
- Page.new( nil, {'content-type'=>'text/html'})
849
+ Page.new(nil, {'content-type'=>'text/html'})
492
850
 
493
851
  request_data = form.request_data
494
852
 
495
853
  log.debug("query: #{ request_data.inspect }") if log
496
854
 
855
+ headers = {
856
+ 'Content-Type' => form.enctype,
857
+ 'Content-Length' => request_data.size.to_s,
858
+ }.merge headers
859
+
497
860
  # fetch the page
498
- page = fetch_page( :uri => url,
499
- :referer => cur_page,
500
- :verb => :post,
501
- :params => [request_data],
502
- :headers => {
503
- 'Content-Type' => form.enctype,
504
- 'Content-Length' => request_data.size.to_s,
505
- }.merge(headers))
861
+ page = fetch_page uri, :post, headers, [request_data], cur_page
506
862
  add_to_history(page)
507
863
  page
508
864
  end
509
865
 
510
866
  # uri is an absolute URI
511
- def fetch_page(params)
512
- options = {
513
- :request => nil,
514
- :response => nil,
515
- :connection => nil,
516
- :referer => current_page(),
517
- :uri => nil,
518
- :verb => :get,
519
- :agent => self,
520
- :redirects => 0,
521
- :params => [],
522
- :headers => {},
523
- }.merge(params)
524
-
525
- before_connect = Chain.new([
526
- Chain::URIResolver.new(@scheme_handlers),
527
- Chain::ParameterResolver.new,
528
- Chain::RequestResolver.new,
529
- Chain::ConnectionResolver.new,
530
- Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
531
- Chain::HeaderResolver.new(
532
- @cookie_jar,
533
- @user_agent,
534
- @gzip_enabled,
535
- @request_headers
536
- ),
537
- Chain::CustomHeaders.new,
538
- @pre_connect_hook,
539
- ], @http)
540
-
541
- before_connect.handle(options)
542
-
543
- uri = options[:uri]
544
- request = options[:request]
545
- cur_page = options[:referer]
546
- request_data = options[:params]
547
- redirects = options[:redirects]
548
- http_obj = options[:connection]
867
+ def fetch_page uri, method = :get, headers = {}, params = [],
868
+ referer = current_page, redirects = 0
869
+ referer_uri = referer ? referer.uri : nil
870
+
871
+ uri = @resolver.resolve uri, referer
872
+
873
+ uri, params = resolve_parameters uri, method, params
874
+
875
+ request = http_request uri, method, params
876
+
877
+ connection = connection_for uri
878
+
879
+ request_auth request, uri
880
+
881
+ enable_gzip request
882
+
883
+ request_language_charset request
884
+ request_cookies request, uri
885
+ request_host request, uri
886
+ request_referer request, uri, referer_uri
887
+ request_user_agent request
888
+ request_add_headers request, headers
889
+
890
+ pre_connect request
549
891
 
550
892
  # Add If-Modified-Since if page is in history
551
- if( (page = visited_page(uri)) && page.response['Last-Modified'] )
893
+ if (page = visited_page(uri)) and page.response['Last-Modified']
552
894
  request['If-Modified-Since'] = page.response['Last-Modified']
553
895
  end if(@conditional_requests)
554
896
 
555
897
  # Specify timeouts if given
556
- http_obj.open_timeout = @open_timeout if @open_timeout
557
- http_obj.read_timeout = @read_timeout if @read_timeout
898
+ connection.open_timeout = @open_timeout if @open_timeout
899
+ connection.read_timeout = @read_timeout if @read_timeout
558
900
 
559
- # Log specified headers for the request
560
- log.info("#{ request.class }: #{ request.path }") if log
561
- request.each_header do |k, v|
562
- log.debug("request-header: #{ k } => #{ v }")
563
- end if log
901
+ request_log request
902
+
903
+ response_body = nil
564
904
 
565
905
  # Send the request
566
- response = http_obj.request(uri, request) { |r|
567
- connection_chain = Chain.new([
568
- Chain::ResponseReader.new(r),
569
- Chain::BodyDecodingHandler.new,
570
- ])
571
- connection_chain.handle(options)
906
+ response = connection.request(uri, request) { |res|
907
+ response_log res
908
+
909
+ response_body = response_read res, request
910
+
911
+ res
572
912
  }
573
913
 
574
- after_connect = Chain.new([
575
- @post_connect_hook,
576
- Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
577
- Chain::ResponseHeaderHandler.new(@cookie_jar),
578
- ])
579
- after_connect.handle(options)
580
-
581
- res_klass = options[:res_klass]
582
- response_body = options[:response_body]
583
- page = options[:page]
584
-
585
- log.info("status: #{ page.code }") if log
586
-
587
- if follow_meta_refresh
588
- redirect_uri = nil
589
- referer = page
590
- if (page.respond_to?(:meta) && (redirect = page.meta.first))
591
- redirect_uri = redirect.uri.to_s
592
- sleep redirect.node['delay'].to_f
593
- referer = Page.new(nil, {'content-type'=>'text/html'})
594
- elsif refresh = response['refresh']
595
- delay, redirect_uri = Page::Meta.parse(refresh, uri)
596
- raise StandardError, "Invalid refresh http header" unless delay
597
- if redirects + 1 > redirection_limit
598
- raise RedirectLimitReachedError.new(page, redirects)
599
- end
600
- sleep delay.to_f
601
- end
914
+ post_connect response
602
915
 
603
- if redirect_uri
604
- @history.push(page, page.uri)
605
- return fetch_page(
606
- :uri => redirect_uri,
607
- :referer => referer,
608
- :params => [],
609
- :verb => :get,
610
- :redirects => redirects + 1
611
- )
612
- end
613
- end
916
+ page = response_parse response, response_body, uri
917
+
918
+ response_cookies response, uri, page
614
919
 
615
- return page if res_klass <= Net::HTTPSuccess
920
+ meta = response_follow_meta_refresh response, uri, page, redirects
921
+ return meta if meta
616
922
 
617
- if res_klass == Net::HTTPNotModified
923
+ case response
924
+ when Net::HTTPSuccess, Mechanize::FileResponse
925
+ page
926
+ when Net::HTTPNotModified
618
927
  log.debug("Got cached page") if log
619
- return visited_page(uri) || page
620
- elsif res_klass <= Net::HTTPRedirection
621
- case redirect_ok
622
- when true, :all
623
- # shortcut
624
- when false, nil
625
- return page
626
- when :permanent
627
- return page if res_klass != Net::HTTPMovedPermanently
628
- end
629
- log.info("follow redirect to: #{ response['Location'] }") if log
630
- from_uri = page.uri
631
- raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
632
- redirect_verb = options[:verb] == :head ? :head : :get
633
- page = fetch_page( :uri => response['Location'].to_s,
634
- :referer => page,
635
- :params => [],
636
- :verb => redirect_verb,
637
- :redirects => redirects + 1
638
- )
639
- @history.push(page, from_uri)
640
- return page
641
- elsif res_klass <= Net::HTTPUnauthorized
642
- raise ResponseCodeError.new(page) unless @user || @password
643
- raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
644
- if response['www-authenticate'] =~ /Digest/i
645
- @auth_hash[uri.host] = :digest
646
- if response['server'] =~ /Microsoft-IIS/
647
- @auth_hash[uri.host] = :iis_digest
648
- end
649
- @digest = response['www-authenticate']
650
- else
651
- @auth_hash[uri.host] = :basic
652
- end
653
- return fetch_page( :uri => uri,
654
- :referer => cur_page,
655
- :verb => request.method.downcase.to_sym,
656
- :params => request_data,
657
- :headers => options[:headers]
658
- )
928
+ visited_page(uri) || page
929
+ when Net::HTTPRedirection
930
+ response_redirect response, method, page, redirects
931
+ when Net::HTTPUnauthorized
932
+ response_authenticate(response, page, uri, request, headers, params,
933
+ referer)
934
+ else
935
+ raise ResponseCodeError.new(page), "Unhandled response"
659
936
  end
660
-
661
- raise ResponseCodeError.new(page), "Unhandled response", caller
662
937
  end
663
938
 
664
939
  def add_to_history(page)
@@ -667,20 +942,23 @@ class Mechanize
667
942
  end
668
943
  end
669
944
 
670
- module WWW
671
- def self.const_missing klass
672
- warn <<eomsg
673
- !!!!! DEPRECATION NOTICE !!!!!
674
- The WWW constant is deprecated, please switch to the new top-level Mechanize
675
- constant. WWW will be removed in Mechanize version 2.0
676
-
677
- You've referenced the WWW constant from #{caller.first}, please
678
- switch the "WWW" to "Mechanize". Thanks!
679
-
680
- Sincerely,
945
+ require 'mechanize/content_type_error'
946
+ require 'mechanize/cookie'
947
+ require 'mechanize/cookie_jar'
948
+ require 'mechanize/file'
949
+ require 'mechanize/file_connection'
950
+ require 'mechanize/file_request'
951
+ require 'mechanize/file_response'
952
+ require 'mechanize/form'
953
+ require 'mechanize/history'
954
+ require 'mechanize/page'
955
+ require 'mechanize/inspect'
956
+ require 'mechanize/monkey_patch'
957
+ require 'mechanize/pluggable_parsers'
958
+ require 'mechanize/redirect_limit_reached_error'
959
+ require 'mechanize/redirect_not_get_or_head_error'
960
+ require 'mechanize/response_code_error'
961
+ require 'mechanize/unsupported_scheme_error'
962
+ require 'mechanize/uri_resolver'
963
+ require 'mechanize/util'
681
964
 
682
- Pew Pew Pew
683
- eomsg
684
- Object.const_get(klass)
685
- end
686
- end