mechanize 0.7.6 → 0.7.7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (65) hide show
  1. data/EXAMPLES.txt +87 -40
  2. data/History.txt +21 -0
  3. data/Manifest.txt +51 -47
  4. data/lib/www/mechanize.rb +88 -22
  5. data/lib/www/mechanize/cookie.rb +1 -1
  6. data/lib/www/mechanize/form.rb +27 -14
  7. data/lib/www/mechanize/form/multi_select_list.rb +1 -1
  8. data/lib/www/mechanize/monkey_patch.rb +3 -0
  9. data/lib/www/mechanize/page.rb +20 -16
  10. data/lib/www/mechanize/page/link.rb +2 -2
  11. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  12. data/test/helper.rb +14 -0
  13. data/test/htdocs/meta_cookie.html +11 -0
  14. data/test/servlets.rb +10 -0
  15. data/test/{tc_authenticate.rb → test_authenticate.rb} +0 -0
  16. data/test/{tc_bad_links.rb → test_bad_links.rb} +0 -0
  17. data/test/{tc_blank_form.rb → test_blank_form.rb} +0 -0
  18. data/test/{tc_checkboxes.rb → test_checkboxes.rb} +0 -0
  19. data/test/{tc_cookie_class.rb → test_cookie_class.rb} +9 -0
  20. data/test/{tc_cookie_jar.rb → test_cookie_jar.rb} +0 -0
  21. data/test/{tc_cookies.rb → test_cookies.rb} +6 -0
  22. data/test/{tc_encoded_links.rb → test_encoded_links.rb} +1 -1
  23. data/test/{tc_errors.rb → test_errors.rb} +0 -0
  24. data/test/{tc_follow_meta.rb → test_follow_meta.rb} +0 -0
  25. data/test/{tc_form_action.rb → test_form_action.rb} +1 -1
  26. data/test/{tc_form_as_hash.rb → test_form_as_hash.rb} +0 -0
  27. data/test/{tc_form_button.rb → test_form_button.rb} +0 -0
  28. data/test/{tc_form_no_inputname.rb → test_form_no_inputname.rb} +0 -0
  29. data/test/{tc_forms.rb → test_forms.rb} +0 -0
  30. data/test/{tc_frames.rb → test_frames.rb} +0 -0
  31. data/test/test_get_headers.rb +45 -0
  32. data/test/{tc_gzipping.rb → test_gzipping.rb} +0 -0
  33. data/test/test_hash_api.rb +42 -0
  34. data/test/{tc_history.rb → test_history.rb} +0 -0
  35. data/test/{tc_history_added.rb → test_history_added.rb} +0 -0
  36. data/test/{tc_html_unscape_forms.rb → test_html_unscape_forms.rb} +0 -0
  37. data/test/{tc_if_modified_since.rb → test_if_modified_since.rb} +0 -0
  38. data/test/{tc_keep_alive.rb → test_keep_alive.rb} +0 -0
  39. data/test/{tc_links.rb → test_links.rb} +0 -0
  40. data/test/{tc_mech.rb → test_mech.rb} +2 -2
  41. data/test/{tc_mechanize_file.rb → test_mechanize_file.rb} +0 -0
  42. data/test/{tc_multi_select.rb → test_multi_select.rb} +0 -0
  43. data/test/{tc_no_attributes.rb → test_no_attributes.rb} +0 -0
  44. data/test/{tc_option.rb → test_option.rb} +0 -0
  45. data/test/{tc_page.rb → test_page.rb} +17 -0
  46. data/test/{tc_pluggable_parser.rb → test_pluggable_parser.rb} +0 -0
  47. data/test/{tc_post_form.rb → test_post_form.rb} +0 -0
  48. data/test/{tc_pretty_print.rb → test_pretty_print.rb} +0 -0
  49. data/test/{tc_radiobutton.rb → test_radiobutton.rb} +0 -0
  50. data/test/test_redirect_limit_reached.rb +41 -0
  51. data/test/{tc_referer.rb → test_referer.rb} +0 -0
  52. data/test/{tc_relative_links.rb → test_relative_links.rb} +0 -0
  53. data/test/{tc_response_code.rb → test_response_code.rb} +0 -0
  54. data/test/{tc_save_file.rb → test_save_file.rb} +0 -0
  55. data/test/{tc_select.rb → test_select.rb} +0 -0
  56. data/test/{tc_select_all.rb → test_select_all.rb} +0 -0
  57. data/test/{tc_select_none.rb → test_select_none.rb} +0 -0
  58. data/test/{tc_select_noopts.rb → test_select_noopts.rb} +0 -0
  59. data/test/{tc_set_fields.rb → test_set_fields.rb} +8 -0
  60. data/test/{tc_ssl_server.rb → test_ssl_server.rb} +0 -0
  61. data/test/{tc_subclass.rb → test_subclass.rb} +0 -0
  62. data/test/{tc_textarea.rb → test_textarea.rb} +0 -0
  63. data/test/{tc_upload.rb → test_upload.rb} +11 -11
  64. metadata +106 -52
  65. data/test/test_all.rb +0 -5
data/EXAMPLES.txt CHANGED
@@ -4,53 +4,68 @@
4
4
  require 'rubygems'
5
5
  require 'mechanize'
6
6
 
7
- agent = WWW::Mechanize.new
8
- agent.user_agent_alias = 'Mac Safari'
9
- page = agent.get("http://www.google.com/")
10
- search_form = page.forms.with.name("f").first
11
- search_form.q = "Hello"
12
- search_results = agent.submit(search_form)
13
- puts search_results.body
7
+ a = WWW::Mechanize.new { |agent|
8
+ agent.user_agent_alias = 'Mac Safari'
9
+ }
10
+
11
+ a.get('http://google.com/') do |page|
12
+ search_result = page.form_with(:name => 'f') do |search|
13
+ search.q = 'Hello world'
14
+ end.submit
15
+
16
+ search_result.links.each do |link|
17
+ puts link.text
18
+ end
19
+ end
14
20
 
15
21
  == Rubyforge
16
- require 'mechanize'
22
+
23
+ a = WWW::Mechanize.new
24
+ a.get('http://rubyforge.org/') do |page|
25
+ # Click the login link
26
+ login_page = a.click(page.links.text(/Log In/))
17
27
 
18
- agent = WWW::Mechanize.new
19
- page = agent.get('http://rubyforge.org/')
20
- link = page.links.text(/Log In/)
21
- page = agent.click(link)
22
- form = page.forms[1]
23
- form.form_loginname = ARGV[0]
24
- form.form_pw = ARGV[1]
25
- page = agent.submit(form, form.buttons.first)
28
+ # Submit the login form
29
+ my_page = login_page.form_with(:action => '/account/login.php') do |f|
30
+ f.form_loginname = ARGV[0]
31
+ f.form_pw = ARGV[1]
32
+ end.click_button
26
33
 
27
- puts page.body
34
+ my_page.links.each do |link|
35
+ text = link.text.strip
36
+ next unless text.length > 0
37
+ puts text
38
+ end
39
+ end
28
40
 
29
41
  == File Upload
30
- This example uploads one image as two different images to flickr.
31
-
32
- require 'rubygems'
33
- require 'mechanize'
34
-
35
- agent = WWW::Mechanize.new
36
-
37
- # Get the flickr sign in page
38
- page = agent.get('http://flickr.com/signin/flickr/')
39
-
40
- # Fill out the login form
41
- form = page.forms.name('flickrloginform').first
42
- form.email = ARGV[0]
43
- form.password = ARGV[1]
44
- page = agent.submit(form)
45
-
46
- # Go to the upload page
47
- page = agent.click page.links.text('Upload')
48
-
49
- # Fill out the form
50
- form = page.forms.action('/photos_upload_process.gne').first
51
- form.file_uploads.name('file1').first.file_name = ARGV[2]
52
- agent.submit(form)
42
+ Upload a file to flickr.
43
+
44
+ a = WWW::Mechanize.new { |agent|
45
+ # Flickr refreshes after login
46
+ agent.follow_meta_refresh = true
47
+ }
48
+
49
+ a.get('http://flickr.com/') do |home_page|
50
+ signin_page = a.click(home_page.links.text(/Sign In/))
51
+
52
+ my_page = signin_page.form_with(:name => 'login_form') do |form|
53
+ form.login = ARGV[0]
54
+ form.passwd = ARGV[1]
55
+ end.submit
56
+
57
+ # Click the upload link
58
+ upload_page = a.click(my_page.links.text(/Upload/))
53
59
 
60
+ # We want the basic upload page.
61
+ upload_page = a.click(upload_page.links.text(/basic Uploader/))
62
+
63
+ # Upload the file
64
+ upload_page.form_with(:method => 'POST') do |upload_form|
65
+ upload_form.file_uploads.first.file_name = ARGV[2]
66
+ end.submit
67
+ end
68
+
54
69
  == Pluggable Parsers
55
70
  Lets say you want html pages to automatically be parsed with Rubyful Soup.
56
71
  This example shows you how:
@@ -122,3 +137,35 @@ This example also demonstrates subclassing Mechanize.
122
137
 
123
138
  TestMech.new.process
124
139
 
140
+ == Client Certificate Authentication (Mutual Auth)
141
+
142
+ In most cases a client certificate is created as an additional layer of security
143
+ for certain websites. The specific case that this was initially tested on was
144
+ for automating the download of archived images from a banks (Wachovia) lockbox
145
+ system. Once the certificate is installed into your browser you will have to
146
+ export it and split the certificate and private key into separate files. Exported
147
+ files are usually in .p12 format (IE 7 & Firefox 2.0) which stands for PKCS #12.
148
+ You can convert them from p12 to pem format by using the following commands:
149
+
150
+ openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.key -nocerts -nodes
151
+ openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.cer -nokeys
152
+
153
+ require 'rubygems'
154
+ require 'mechanize'
155
+
156
+ # create Mechanize instance
157
+ agent = WWW::Mechanize.new
158
+
159
+ # set the path of the certificate file
160
+ agent.cert = 'example.cer'
161
+
162
+ # set the path of the private key file
163
+ agent.key = 'example.key'
164
+
165
+ # get the login form & fill it out with the username/password
166
+ login_form = @agent.get("http://example.com/login_page").form('Login')
167
+ login_form.Userid = 'TestUser'
168
+ login_form.Password = 'TestPassword'
169
+
170
+ # submit login form
171
+ agent.submit(login_form, login_form.buttons.first)
data/History.txt CHANGED
@@ -1,5 +1,26 @@
1
1
  = Mechanize CHANGELOG
2
2
 
3
+ === 0.7.7
4
+
5
+ * New Features:
6
+ * Page#form_with takes a +criteria+ hash.
7
+ * Page#form is changed to Page#form_with
8
+ * Mechanize#get takes custom http headers. Thanks Mike Dalessio!
9
+ * Form#click_button submits a form defaulting to the current button.
10
+ * Form#set_fields now takes a hash. Thanks Tobi!
11
+ * Mechanize#redirection_limit= for setting the max number of redirects.
12
+
13
+ * Bug Fixes:
14
+ * Added more examples. Thanks Robert Jackson.
15
+ * #20480 Making sure the Host header is set.
16
+ * #20672 Making sure cookies with weird semicolons work.
17
+ * Fixed bug with percent signs in urls.
18
+ http://d.hatena.ne.jp/kitamomonga/20080410/ruby_mechanize_percent_url_bug
19
+ * #21132 Not checking for EOF errors on redirect
20
+ * Fixed a weird gzipping error.
21
+ * #21233 Smarter multipart boundry. Thanks Todd Willey!
22
+ * #20097 Supporting meta tag cookies.
23
+
3
24
  === 0.7.6
4
25
 
5
26
  * New Features:
data/Manifest.txt CHANGED
@@ -40,6 +40,7 @@ lib/www/mechanize/page/frame.rb
40
40
  lib/www/mechanize/page/link.rb
41
41
  lib/www/mechanize/page/meta.rb
42
42
  lib/www/mechanize/pluggable_parsers.rb
43
+ lib/www/mechanize/redirect_limit_reached_error.rb
43
44
  lib/www/mechanize/response_code_error.rb
44
45
  lib/www/mechanize/unsupported_scheme_error.rb
45
46
  test/data/htpasswd
@@ -69,6 +70,7 @@ test/htdocs/google.html
69
70
  test/htdocs/iframe_test.html
70
71
  test/htdocs/index.html
71
72
  test/htdocs/link with space.html
73
+ test/htdocs/meta_cookie.html
72
74
  test/htdocs/no_title_test.html
73
75
  test/htdocs/relative/tc_relative_links.html
74
76
  test/htdocs/tc_bad_links.html
@@ -88,50 +90,52 @@ test/htdocs/tc_textarea.html
88
90
  test/htdocs/unusual______.html
89
91
  test/servlets.rb
90
92
  test/ssl_server.rb
91
- test/tc_authenticate.rb
92
- test/tc_bad_links.rb
93
- test/tc_blank_form.rb
94
- test/tc_checkboxes.rb
95
- test/tc_cookie_class.rb
96
- test/tc_cookie_jar.rb
97
- test/tc_cookies.rb
98
- test/tc_encoded_links.rb
99
- test/tc_errors.rb
100
- test/tc_follow_meta.rb
101
- test/tc_form_action.rb
102
- test/tc_form_as_hash.rb
103
- test/tc_form_button.rb
104
- test/tc_form_no_inputname.rb
105
- test/tc_forms.rb
106
- test/tc_frames.rb
107
- test/tc_gzipping.rb
108
- test/tc_history.rb
109
- test/tc_history_added.rb
110
- test/tc_html_unscape_forms.rb
111
- test/tc_if_modified_since.rb
112
- test/tc_keep_alive.rb
113
- test/tc_links.rb
114
- test/tc_mech.rb
115
- test/tc_mechanize_file.rb
116
- test/tc_multi_select.rb
117
- test/tc_no_attributes.rb
118
- test/tc_option.rb
119
- test/tc_page.rb
120
- test/tc_pluggable_parser.rb
121
- test/tc_post_form.rb
122
- test/tc_pretty_print.rb
123
- test/tc_radiobutton.rb
124
- test/tc_referer.rb
125
- test/tc_relative_links.rb
126
- test/tc_response_code.rb
127
- test/tc_save_file.rb
128
- test/tc_select.rb
129
- test/tc_select_all.rb
130
- test/tc_select_none.rb
131
- test/tc_select_noopts.rb
132
- test/tc_set_fields.rb
133
- test/tc_ssl_server.rb
134
- test/tc_subclass.rb
135
- test/tc_textarea.rb
136
- test/tc_upload.rb
137
- test/test_all.rb
93
+ test/test_authenticate.rb
94
+ test/test_bad_links.rb
95
+ test/test_blank_form.rb
96
+ test/test_checkboxes.rb
97
+ test/test_cookie_class.rb
98
+ test/test_cookie_jar.rb
99
+ test/test_cookies.rb
100
+ test/test_encoded_links.rb
101
+ test/test_errors.rb
102
+ test/test_follow_meta.rb
103
+ test/test_form_action.rb
104
+ test/test_form_as_hash.rb
105
+ test/test_form_button.rb
106
+ test/test_form_no_inputname.rb
107
+ test/test_forms.rb
108
+ test/test_frames.rb
109
+ test/test_get_headers.rb
110
+ test/test_gzipping.rb
111
+ test/test_hash_api.rb
112
+ test/test_history.rb
113
+ test/test_history_added.rb
114
+ test/test_html_unscape_forms.rb
115
+ test/test_if_modified_since.rb
116
+ test/test_keep_alive.rb
117
+ test/test_links.rb
118
+ test/test_mech.rb
119
+ test/test_mechanize_file.rb
120
+ test/test_multi_select.rb
121
+ test/test_no_attributes.rb
122
+ test/test_option.rb
123
+ test/test_page.rb
124
+ test/test_pluggable_parser.rb
125
+ test/test_post_form.rb
126
+ test/test_pretty_print.rb
127
+ test/test_radiobutton.rb
128
+ test/test_redirect_limit_reached.rb
129
+ test/test_referer.rb
130
+ test/test_relative_links.rb
131
+ test/test_response_code.rb
132
+ test/test_save_file.rb
133
+ test/test_select.rb
134
+ test/test_select_all.rb
135
+ test/test_select_none.rb
136
+ test/test_select_noopts.rb
137
+ test/test_set_fields.rb
138
+ test/test_ssl_server.rb
139
+ test/test_subclass.rb
140
+ test/test_textarea.rb
141
+ test/test_upload.rb
data/lib/www/mechanize.rb CHANGED
@@ -5,10 +5,14 @@ require 'webrick/httputils'
5
5
  require 'zlib'
6
6
  require 'stringio'
7
7
  require 'digest/md5'
8
+ require 'fileutils'
9
+ require 'hpricot'
10
+ require 'forwardable'
8
11
 
9
12
  require 'www/mechanize/content_type_error'
10
13
  require 'www/mechanize/response_code_error'
11
14
  require 'www/mechanize/unsupported_scheme_error'
15
+ require 'www/mechanize/redirect_limit_reached_error'
12
16
  require 'www/mechanize/cookie'
13
17
  require 'www/mechanize/cookie_jar'
14
18
  require 'www/mechanize/history'
@@ -39,7 +43,7 @@ module WWW
39
43
  class Mechanize
40
44
  ##
41
45
  # The version of Mechanize you are using.
42
- VERSION = '0.7.6'
46
+ VERSION = '0.7.7'
43
47
 
44
48
  ##
45
49
  # User Agent aliases
@@ -73,6 +77,7 @@ module WWW
73
77
  attr_accessor :verify_callback
74
78
  attr_accessor :history_added
75
79
  attr_accessor :scheme_handlers
80
+ attr_accessor :redirection_limit
76
81
 
77
82
  attr_reader :history
78
83
  attr_reader :pluggable_parser
@@ -81,6 +86,8 @@ module WWW
81
86
 
82
87
  @@nonce_count = -1
83
88
  CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
89
+ @html_parser = Hpricot
90
+ class << self; attr_accessor :html_parser end
84
91
 
85
92
  def initialize
86
93
  # attr_accessors
@@ -122,6 +129,7 @@ module WWW
122
129
  @conditional_requests = true
123
130
 
124
131
  @follow_meta_refresh = false
132
+ @redirection_limit = 20
125
133
 
126
134
  # Connection Cache & Keep alive
127
135
  @connection_cache = {}
@@ -170,10 +178,18 @@ module WWW
170
178
  end
171
179
 
172
180
  # Fetches the URL passed in and returns a page.
173
- def get(url, parameters = [], referer = nil)
174
- unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
175
- referer = parameters
176
- parameters = []
181
+ def get(options, parameters = [], referer = nil)
182
+ unless options.is_a? Hash
183
+ url = options
184
+ unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
185
+ referer = parameters
186
+ parameters = []
187
+ end
188
+ else
189
+ raise ArgumentError.new("url must be specified") unless url = options[:url]
190
+ parameters = options[:params] || []
191
+ referer = options[:referer]
192
+ headers = options[:headers]
177
193
  end
178
194
 
179
195
  referer ||= current_page || Page.new(nil, {'content-type'=>'text/html'})
@@ -196,7 +212,7 @@ module WWW
196
212
 
197
213
  # fetch the page
198
214
  request = fetch_request(abs_uri)
199
- page = fetch_page(abs_uri, request, referer)
215
+ page = fetch_page(:uri => abs_uri, :request => request, :page => referer, :headers => headers)
200
216
  add_to_history(page)
201
217
  yield page if block_given?
202
218
  page
@@ -217,10 +233,9 @@ module WWW
217
233
  rescue
218
234
  nil
219
235
  end
220
- uri = to_absolute_uri(
221
- link.attributes['href'] || link.attributes['src'] || link.href,
222
- referer || current_page()
223
- )
236
+ href = link.respond_to?(:has_attribute?) ?
237
+ (link['href'] || link['src']) : link.href
238
+ uri = to_absolute_uri(href, referer || current_page())
224
239
  get(uri, referer)
225
240
  end
226
241
 
@@ -237,7 +252,11 @@ module WWW
237
252
  # or
238
253
  # agent.post('http://example.com/', [ ["foo", "bar"] ])
239
254
  def post(url, query={})
240
- node = Hpricot::Elem.new(Hpricot::STag.new('form'))
255
+ node = {}
256
+ # Create a fake form
257
+ class << node
258
+ def search(*args); []; end
259
+ end
241
260
  node['method'] = 'POST'
242
261
  node['enctype'] = 'application/x-www-form-urlencoded'
243
262
 
@@ -312,7 +331,7 @@ module WWW
312
331
  s.gsub(/&(\w+|#[0-9]+);/) { |match|
313
332
  number = case match
314
333
  when /&(\w+);/
315
- Hpricot::NamedCharacters[$1]
334
+ Mechanize.html_parser::NamedCharacters[$1]
316
335
  when /&#([0-9]+);/
317
336
  $1.to_i
318
337
  end
@@ -323,7 +342,13 @@ module WWW
323
342
  end
324
343
 
325
344
  protected
326
- def set_headers(uri, request, cur_page)
345
+ def set_headers(uri, request, options)
346
+ unless options.is_a? Hash
347
+ cur_page = options
348
+ else
349
+ raise ArgumentError.new("cur_page must be specified") unless cur_page = options[:page]
350
+ headers = options[:headers]
351
+ end
327
352
  if @keep_alive
328
353
  request.add_field('Connection', 'keep-alive')
329
354
  request.add_field('Keep-Alive', keep_alive_time.to_s)
@@ -332,6 +357,7 @@ module WWW
332
357
  end
333
358
  request.add_field('Accept-Encoding', 'gzip,identity')
334
359
  request.add_field('Accept-Language', 'en-us,en;q=0.5')
360
+ request.add_field('Host', uri.host)
335
361
  request.add_field('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
336
362
 
337
363
  unless @cookie_jar.empty?(uri)
@@ -370,6 +396,18 @@ module WWW
370
396
  end
371
397
  end
372
398
 
399
+ if headers
400
+ headers.each do |k,v|
401
+ case k
402
+ when :etag then request.add_field("ETag", v)
403
+ when :if_modified_since then request.add_field("If-Modified-Since", v)
404
+ else
405
+ raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
406
+ request.add_field(k,v)
407
+ end
408
+ end
409
+ end
410
+
373
411
  request
374
412
  end
375
413
 
@@ -422,8 +460,8 @@ module WWW
422
460
 
423
461
  url = URI.parse(
424
462
  Mechanize.html_unescape(
425
- url.split(/%[0-9A-Fa-f]{2}|#/).zip(
426
- url.scan(/%[0-9A-Fa-f]{2}|#/)
463
+ url.split(/(?:%[0-9A-Fa-f]{2})+|#/).zip(
464
+ url.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
427
465
  ).map { |x,y|
428
466
  "#{URI.escape(x)}#{y}"
429
467
  }.join('')
@@ -479,7 +517,17 @@ module WWW
479
517
  end
480
518
 
481
519
  # uri is an absolute URI
482
- def fetch_page(uri, request, cur_page=current_page(), request_data=[])
520
+ def fetch_page(options, request=nil, cur_page=current_page(), request_data=[], redirects = 0)
521
+ unless options.is_a? Hash
522
+ raise ArgumentError.new("uri must be specified") unless uri = options
523
+ raise ArgumentError.new("request must be specified") unless request
524
+ else
525
+ raise ArgumentError.new("uri must be specified") unless uri = options[:uri]
526
+ raise ArgumentError.new("request must be specified") unless request = options[:request]
527
+ cur_page = options[:page] || current_page()
528
+ request_data = options[:request_data] || []
529
+ headers = options[:headers]
530
+ end
483
531
  raise "unsupported scheme: #{uri.scheme}" unless ['http', 'https'].include?(uri.scheme.downcase)
484
532
 
485
533
  log.info("#{ request.class }: #{ request.path }") if log
@@ -538,7 +586,11 @@ module WWW
538
586
 
539
587
  http_obj.start unless http_obj.started?
540
588
 
541
- request = set_headers(uri, request, cur_page)
589
+ if headers
590
+ request = set_headers(uri, request, {:page => cur_page, :headers => headers})
591
+ else
592
+ request = set_headers(uri, request, cur_page)
593
+ end
542
594
 
543
595
  # Log specified headers for the request
544
596
  if log
@@ -551,6 +603,7 @@ module WWW
551
603
 
552
604
  # Send the request
553
605
  begin
606
+ res_klass = nil
554
607
  response = http_obj.request(request, *request_data) {|response|
555
608
 
556
609
  body = StringIO.new
@@ -560,8 +613,13 @@ module WWW
560
613
  body.write(part)
561
614
  log.debug("Read #{total} bytes") if log
562
615
  }
616
+
617
+ res_klass = Net::HTTPResponse::CODE_TO_OBJ[response.code.to_s]
618
+
563
619
  # Net::HTTP ignores EOFError if Content-length is given, so we emulate it here.
564
- raise EOFError if response.content_length() && response.content_length() != total
620
+ unless res_klass <= Net::HTTPRedirection
621
+ raise EOFError if response.content_length() && response.content_length() != total
622
+ end
565
623
  body.rewind
566
624
 
567
625
  response.each_header { |k,v|
@@ -582,7 +640,7 @@ module WWW
582
640
  if response['Content-Length'].to_i > 0 || body.length > 0
583
641
  begin
584
642
  Zlib::GzipReader.new(body).read
585
- rescue Zlib::BufError => e
643
+ rescue Zlib::BufError, Zlib::GzipFile::Error
586
644
  log.error('Caught a Zlib::BufError') if log
587
645
  body.rewind
588
646
  body.read(10)
@@ -631,6 +689,15 @@ module WWW
631
689
  end
632
690
  end
633
691
 
692
+ if page.is_a?(Page) && page.body =~ /Set-Cookie/
693
+ page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
694
+ Cookie::parse(uri, meta['content'], log) { |c|
695
+ log.debug("saved cookie: #{c}") if log
696
+ @cookie_jar.add(uri, c)
697
+ }
698
+ end
699
+ end
700
+
634
701
  (response.get_fields('Set-Cookie')||[]).each do |cookie|
635
702
  Cookie::parse(uri, cookie, log) { |c|
636
703
  log.debug("saved cookie: #{c}") if log
@@ -640,8 +707,6 @@ module WWW
640
707
 
641
708
  log.info("status: #{ page.code }") if log
642
709
 
643
- res_klass = Net::HTTPResponse::CODE_TO_OBJ[page.code.to_s]
644
-
645
710
  if follow_meta_refresh && page.respond_to?(:meta) &&
646
711
  (redirect = page.meta.first)
647
712
  return redirect.click
@@ -657,7 +722,8 @@ module WWW
657
722
  log.info("follow redirect to: #{ response['Location'] }") if log
658
723
  from_uri = page.uri
659
724
  abs_uri = to_absolute_uri(response['Location'].to_s, page)
660
- page = fetch_page(abs_uri, fetch_request(abs_uri), page)
725
+ raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
726
+ page = fetch_page(abs_uri, fetch_request(abs_uri), page, request_data, redirects + 1)
661
727
  @history.push(page, from_uri)
662
728
  return page
663
729
  elsif res_klass <= Net::HTTPUnauthorized