mechanize 2.4 → 2.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.rdoc +30 -4
- data/EXAMPLES.rdoc +2 -2
- data/GUIDE.rdoc +13 -13
- data/Manifest.txt +30 -0
- data/README.rdoc +19 -2
- data/lib/mechanize.rb +30 -7
- data/lib/mechanize/chunked_termination_error.rb +7 -0
- data/lib/mechanize/form.rb +7 -5
- data/lib/mechanize/form/radio_button.rb +14 -0
- data/lib/mechanize/http/agent.rb +54 -24
- data/lib/mechanize/http/auth_store.rb +6 -0
- data/lib/mechanize/http/content_disposition_parser.rb +1 -1
- data/lib/mechanize/monkey_patch.rb +3 -2
- data/lib/mechanize/page.rb +0 -2
- data/lib/mechanize/parser.rb +2 -2
- data/lib/mechanize/test_case.rb +70 -435
- data/lib/mechanize/test_case/.document +1 -0
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +14 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +24 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +8 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +33 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +20 -0
- data/lib/mechanize/test_case/form_servlet.rb +55 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +32 -0
- data/lib/mechanize/test_case/header_servlet.rb +14 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +9 -0
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +10 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +10 -0
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +37 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +33 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +21 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +30 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +11 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +11 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +11 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +13 -0
- data/lib/mechanize/test_case/referer_servlet.rb +12 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +15 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +14 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +15 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +19 -0
- data/lib/mechanize/test_case/server.rb +36 -0
- data/lib/mechanize/test_case/servlets.rb +55 -0
- data/lib/mechanize/test_case/verb_servlet.rb +11 -0
- data/test/test_mechanize.rb +12 -12
- data/test/test_mechanize_file.rb +11 -0
- data/test/test_mechanize_file_response.rb +23 -0
- data/test/test_mechanize_form.rb +34 -0
- data/test/test_mechanize_form_radio_button.rb +19 -2
- data/test/test_mechanize_http_agent.rb +104 -26
- data/test/test_mechanize_http_auth_store.rb +23 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +6 -0
- data/test/test_mechanize_page.rb +3 -5
- data/test/test_mechanize_page_link.rb +0 -32
- metadata +39 -8
- metadata.gz.sig +2 -2
data.tar.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,12 +1,38 @@
|
|
1
1
|
= Mechanize CHANGELOG
|
2
2
|
|
3
|
+
=== 2.5
|
4
|
+
|
5
|
+
* Minor enhancement
|
6
|
+
* Added Mechanize#ignore_bad_chunking for working around servers that don't
|
7
|
+
terminate chunked transfer-encoding properly. Enabling this may cause
|
8
|
+
data loss. Issue #116
|
9
|
+
* Removed content-type check from Mechanize::Page allowing forced parsing
|
10
|
+
of incorrect or missing content-types. Issue #221 by GarthSnyder
|
11
|
+
* Bug fixes
|
12
|
+
* Fixed typos in EXAMPLES and GUIDES. Pull Request #213 by Erkan Yilmaz.
|
13
|
+
* Fixed handling of a quoted content-disposition size. Pull Request #220 by
|
14
|
+
Jason Rust
|
15
|
+
* Mechanize now ignores a missing gzip footer like browsers do. Issue #224
|
16
|
+
by afhbl
|
17
|
+
* Mechanize handles saving of files with the same name better now. Pull
|
18
|
+
Request #223 by Godfrey Chan, Issue #219 by Jon Hart
|
19
|
+
* Mechanize now sends headers across redirects. Issue #215 by Chris Gahan
|
20
|
+
* Mechanize now raises Mechanize::ResponseReadError when the server does not
|
21
|
+
terminate chunked transfer-encoding properly. Issue #116
|
22
|
+
* Mechanize no longer raises an exception when multiple identical
|
23
|
+
radiobuttons are checked. Issue #214 by Matthias Guenther
|
24
|
+
* Fixed documentation for pre_connect_hooks and post_connect_hooks. Issue
|
25
|
+
#226 by Robert Poor
|
26
|
+
* Worked around ruby 1.8 run with -Ku and ISO-8859-1 encoded characters in
|
27
|
+
URIs. Issue #228 by Stanislav O.Pogrebnyak
|
28
|
+
|
3
29
|
=== 2.4
|
4
30
|
|
5
31
|
* Security fix:
|
6
32
|
|
7
33
|
Mechanize#auth and Mechanize#basic_auth allowed disclosure of passwords to
|
8
|
-
malicious servers and have been
|
9
|
-
|
34
|
+
malicious servers and have been deprecated.
|
35
|
+
|
10
36
|
In prior versions of mechanize only one set of HTTP authentication
|
11
37
|
credentials were allowed for all connections. If a mechanize instance
|
12
38
|
connected to more than one server then a malicious server detecting
|
@@ -14,8 +40,8 @@
|
|
14
40
|
username and password intended only for one server.
|
15
41
|
|
16
42
|
Mechanize#auth and Mechanize#basic_auth now warn when used.
|
17
|
-
|
18
|
-
To fix the warning switch to Mechanize#add_auth which requires
|
43
|
+
|
44
|
+
To fix the warning switch to Mechanize#add_auth which requires the URI
|
19
45
|
the credentials are intended for, the username and the password.
|
20
46
|
Optionally an HTTP authentication realm or NTLM domain may be provided.
|
21
47
|
|
data/EXAMPLES.rdoc
CHANGED
@@ -83,7 +83,7 @@ Upload a file to flickr.
|
|
83
83
|
|
84
84
|
== Pluggable Parsers
|
85
85
|
|
86
|
-
|
86
|
+
Let's say you want HTML pages to automatically be parsed with Rubyful Soup.
|
87
87
|
This example shows you how:
|
88
88
|
|
89
89
|
require 'rubygems'
|
@@ -119,7 +119,7 @@ Beautiful Soup for that page.
|
|
119
119
|
|
120
120
|
Mechanize#transact runs the given block and then resets the page history. I.e.
|
121
121
|
after the block has been executed, you're back at the original page; no need
|
122
|
-
count how many times to call the back method at the end of a loop (while
|
122
|
+
to count how many times to call the back method at the end of a loop (while
|
123
123
|
accounting for possible exceptions).
|
124
124
|
|
125
125
|
This example also demonstrates subclassing Mechanize.
|
data/GUIDE.rdoc
CHANGED
@@ -26,7 +26,7 @@ Mechanize stored any cookies that were set, and followed any redirects that
|
|
26
26
|
google may have sent. The agent gave us back a page that we can use to
|
27
27
|
scrape data, find links to click, or find forms to fill out.
|
28
28
|
|
29
|
-
Next,
|
29
|
+
Next, let's try finding some links to click.
|
30
30
|
|
31
31
|
== Finding Links
|
32
32
|
|
@@ -34,14 +34,14 @@ Mechanize returns a page object whenever you get a page, post, or submit a
|
|
34
34
|
form. When a page is fetched, the agent will parse the page and put a list
|
35
35
|
of links on the page object.
|
36
36
|
|
37
|
-
Now that we've fetched google's homepage,
|
37
|
+
Now that we've fetched google's homepage, let's try listing all of the links:
|
38
38
|
|
39
39
|
page.links.each do |link|
|
40
40
|
puts link.text
|
41
41
|
end
|
42
42
|
|
43
43
|
We can list the links, but Mechanize gives a few shortcuts to help us find a
|
44
|
-
link to click on.
|
44
|
+
link to click on. Let's say we wanted to click the link whose text is 'News'.
|
45
45
|
Normally, we would have to do this:
|
46
46
|
|
47
47
|
page = agent.page.links.find { |l| l.text == 'News' }.click
|
@@ -65,13 +65,13 @@ Or chain them together to find a link with certain text and certain href:
|
|
65
65
|
|
66
66
|
page.link_with(:text => 'News', :href => '/something')
|
67
67
|
|
68
|
-
These shortcuts that
|
68
|
+
These shortcuts that Mechanize provides are available on any list that you
|
69
69
|
can fetch like frames, iframes, or forms. Now that we know how to find and
|
70
|
-
click links,
|
70
|
+
click links, let's try something more complicated like filling out a form.
|
71
71
|
|
72
72
|
== Filling Out Forms
|
73
73
|
|
74
|
-
|
74
|
+
Let's continue with our google example. Here's the code we have so far:
|
75
75
|
require 'rubygems'
|
76
76
|
require 'mechanize'
|
77
77
|
|
@@ -83,17 +83,17 @@ that has a couple buttons and a few fields:
|
|
83
83
|
|
84
84
|
pp page
|
85
85
|
|
86
|
-
Now that we know the name of the form,
|
86
|
+
Now that we know the name of the form, let's fetch it off the page:
|
87
87
|
|
88
88
|
google_form = page.form('f')
|
89
89
|
|
90
90
|
Mechanize lets you access form input fields in a few different ways, but the
|
91
91
|
most convenient is that you can access input fields as accessors on the
|
92
|
-
object. So
|
92
|
+
object. So let's set the form field named 'q' on the form to 'ruby mechanize':
|
93
93
|
|
94
94
|
google_form.q = 'ruby mechanize'
|
95
95
|
|
96
|
-
To make sure that we set the value,
|
96
|
+
To make sure that we set the value, let's pretty print the form, and you should
|
97
97
|
see a line similar to this:
|
98
98
|
|
99
99
|
#<Mechanize::Field:0x1403488 @name="q", @value="ruby mechanize">
|
@@ -109,7 +109,7 @@ clicking the 'Google Search' button. If we had submitted the form without
|
|
109
109
|
a button, it would be like typing in the text field and hitting the return
|
110
110
|
button.
|
111
111
|
|
112
|
-
|
112
|
+
Let's take a look at the code all together:
|
113
113
|
|
114
114
|
require 'rubygems'
|
115
115
|
require 'mechanize'
|
@@ -121,7 +121,7 @@ Lets take a look at the code all together:
|
|
121
121
|
page = agent.submit(google_form)
|
122
122
|
pp page
|
123
123
|
|
124
|
-
Before we go on to screen scraping,
|
124
|
+
Before we go on to screen scraping, let's take a look at forms a little more
|
125
125
|
in depth. Unless you want to skip ahead!
|
126
126
|
|
127
127
|
== Advanced Form Techniques
|
@@ -132,11 +132,11 @@ text input fields. Select fields are very similar to text fields, but they
|
|
132
132
|
have many options associated with them. If you select one option, mechanize
|
133
133
|
will de-select the other options (unless it is a multi select!).
|
134
134
|
|
135
|
-
For example,
|
135
|
+
For example, let's select an option on a list:
|
136
136
|
|
137
137
|
form.field_with(:name => 'list').options[0].select
|
138
138
|
|
139
|
-
Now
|
139
|
+
Now let's take a look at checkboxes and radio buttons. To select a checkbox,
|
140
140
|
just check it like this:
|
141
141
|
|
142
142
|
form.checkbox_with(:name => 'box').check
|
data/Manifest.txt
CHANGED
@@ -13,6 +13,7 @@ examples/rubyforge.rb
|
|
13
13
|
examples/spider.rb
|
14
14
|
examples/wikipedia_links_to_philosophy.rb
|
15
15
|
lib/mechanize.rb
|
16
|
+
lib/mechanize/chunked_termination_error.rb
|
16
17
|
lib/mechanize/content_type_error.rb
|
17
18
|
lib/mechanize/cookie.rb
|
18
19
|
lib/mechanize/cookie_jar.rb
|
@@ -66,6 +67,34 @@ lib/mechanize/response_code_error.rb
|
|
66
67
|
lib/mechanize/response_read_error.rb
|
67
68
|
lib/mechanize/robots_disallowed_error.rb
|
68
69
|
lib/mechanize/test_case.rb
|
70
|
+
lib/mechanize/test_case/.document
|
71
|
+
lib/mechanize/test_case/bad_chunking_servlet.rb
|
72
|
+
lib/mechanize/test_case/basic_auth_servlet.rb
|
73
|
+
lib/mechanize/test_case/content_type_servlet.rb
|
74
|
+
lib/mechanize/test_case/digest_auth_servlet.rb
|
75
|
+
lib/mechanize/test_case/file_upload_servlet.rb
|
76
|
+
lib/mechanize/test_case/form_servlet.rb
|
77
|
+
lib/mechanize/test_case/gzip_servlet.rb
|
78
|
+
lib/mechanize/test_case/header_servlet.rb
|
79
|
+
lib/mechanize/test_case/http_refresh_servlet.rb
|
80
|
+
lib/mechanize/test_case/infinite_redirect_servlet.rb
|
81
|
+
lib/mechanize/test_case/infinite_refresh_servlet.rb
|
82
|
+
lib/mechanize/test_case/many_cookies_as_string_servlet.rb
|
83
|
+
lib/mechanize/test_case/many_cookies_servlet.rb
|
84
|
+
lib/mechanize/test_case/modified_since_servlet.rb
|
85
|
+
lib/mechanize/test_case/ntlm_servlet.rb
|
86
|
+
lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb
|
87
|
+
lib/mechanize/test_case/one_cookie_servlet.rb
|
88
|
+
lib/mechanize/test_case/quoted_value_cookie_servlet.rb
|
89
|
+
lib/mechanize/test_case/redirect_servlet.rb
|
90
|
+
lib/mechanize/test_case/referer_servlet.rb
|
91
|
+
lib/mechanize/test_case/refresh_with_empty_url.rb
|
92
|
+
lib/mechanize/test_case/refresh_without_url.rb
|
93
|
+
lib/mechanize/test_case/response_code_servlet.rb
|
94
|
+
lib/mechanize/test_case/send_cookies_servlet.rb
|
95
|
+
lib/mechanize/test_case/server.rb
|
96
|
+
lib/mechanize/test_case/servlets.rb
|
97
|
+
lib/mechanize/test_case/verb_servlet.rb
|
69
98
|
lib/mechanize/unauthorized_error.rb
|
70
99
|
lib/mechanize/unsupported_scheme_error.rb
|
71
100
|
lib/mechanize/util.rb
|
@@ -128,6 +157,7 @@ test/test_mechanize_download.rb
|
|
128
157
|
test/test_mechanize_file.rb
|
129
158
|
test/test_mechanize_file_connection.rb
|
130
159
|
test/test_mechanize_file_request.rb
|
160
|
+
test/test_mechanize_file_response.rb
|
131
161
|
test/test_mechanize_file_saver.rb
|
132
162
|
test/test_mechanize_form.rb
|
133
163
|
test/test_mechanize_form_check_box.rb
|
data/README.rdoc
CHANGED
@@ -28,8 +28,25 @@ The bug tracker is available here:
|
|
28
28
|
|
29
29
|
== Examples
|
30
30
|
|
31
|
-
If you are just starting, check out the GUIDE.
|
32
|
-
|
31
|
+
If you are just starting, check out the GUIDE. Also, check out the EXAMPLES
|
32
|
+
file.
|
33
|
+
|
34
|
+
== Developers
|
35
|
+
|
36
|
+
To run the tests for the first time:
|
37
|
+
|
38
|
+
gem install hoe rake
|
39
|
+
rake newb
|
40
|
+
|
41
|
+
This will install all the required dependencies for running the tests. For
|
42
|
+
subsequent test runs:
|
43
|
+
|
44
|
+
rake test
|
45
|
+
|
46
|
+
You can also use +autotest+ from the ZenTest gem to run tests.
|
47
|
+
|
48
|
+
See also Mechanize::TestCase to read about the built-in testing
|
49
|
+
infrastructure.
|
33
50
|
|
34
51
|
== Authors
|
35
52
|
|
data/lib/mechanize.rb
CHANGED
@@ -73,7 +73,7 @@ class Mechanize
|
|
73
73
|
##
|
74
74
|
# The version of Mechanize you are using.
|
75
75
|
|
76
|
-
VERSION = '2.
|
76
|
+
VERSION = '2.5'
|
77
77
|
|
78
78
|
##
|
79
79
|
# Base mechanize error class
|
@@ -263,16 +263,16 @@ class Mechanize
|
|
263
263
|
attr_accessor :history_added
|
264
264
|
|
265
265
|
##
|
266
|
-
# A list of hooks to call after retrieving a response.
|
267
|
-
# the agent and the response
|
266
|
+
# A list of hooks to call after retrieving a response. Hooks are called with
|
267
|
+
# the agent, the URI, the response, and the response body.
|
268
268
|
|
269
269
|
def post_connect_hooks
|
270
270
|
@agent.post_connect_hooks
|
271
271
|
end
|
272
272
|
|
273
273
|
##
|
274
|
-
# A list of hooks to call before
|
275
|
-
# the agent
|
274
|
+
# A list of hooks to call before retrieving a response. Hooks are called
|
275
|
+
# with the agent, the URI, the response, and the response body.
|
276
276
|
|
277
277
|
def pre_connect_hooks
|
278
278
|
@agent.pre_connect_hooks
|
@@ -752,6 +752,28 @@ Use of #auth and #basic_auth are deprecated due to a security vulnerability.
|
|
752
752
|
@agent.idle_timeout = idle_timeout
|
753
753
|
end
|
754
754
|
|
755
|
+
##
|
756
|
+
# When set to true mechanize will ignore an EOF during chunked transfer
|
757
|
+
# encoding so long as at least one byte was received. Be careful when
|
758
|
+
# enabling this as it may cause data loss.
|
759
|
+
#
|
760
|
+
# Net::HTTP does not inform mechanize of where in the chunked stream the EOF
|
761
|
+
# occurred. Usually it is after the last-chunk but before the terminating
|
762
|
+
# CRLF (invalid termination) but it may occur earlier. In the second case
|
763
|
+
# your response body may be incomplete.
|
764
|
+
|
765
|
+
def ignore_bad_chunking
|
766
|
+
@agent.ignore_bad_chunking
|
767
|
+
end
|
768
|
+
|
769
|
+
##
|
770
|
+
# When set to true mechanize will ignore an EOF during chunked transfer
|
771
|
+
# encoding. See ignore_bad_chunking for further details
|
772
|
+
|
773
|
+
def ignore_bad_chunking= ignore_bad_chunking
|
774
|
+
@agent.ignore_bad_chunking = ignore_bad_chunking
|
775
|
+
end
|
776
|
+
|
755
777
|
##
|
756
778
|
# Are HTTP/1.1 keep-alive connections enabled?
|
757
779
|
|
@@ -1219,6 +1241,8 @@ Use of #auth and #basic_auth are deprecated due to a security vulnerability.
|
|
1219
1241
|
|
1220
1242
|
end
|
1221
1243
|
|
1244
|
+
require 'mechanize/response_read_error'
|
1245
|
+
require 'mechanize/chunked_termination_error'
|
1222
1246
|
require 'mechanize/content_type_error'
|
1223
1247
|
require 'mechanize/cookie'
|
1224
1248
|
require 'mechanize/cookie_jar'
|
@@ -1244,9 +1268,8 @@ require 'mechanize/pluggable_parsers'
|
|
1244
1268
|
require 'mechanize/redirect_limit_reached_error'
|
1245
1269
|
require 'mechanize/redirect_not_get_or_head_error'
|
1246
1270
|
require 'mechanize/response_code_error'
|
1247
|
-
require 'mechanize/unauthorized_error'
|
1248
|
-
require 'mechanize/response_read_error'
|
1249
1271
|
require 'mechanize/robots_disallowed_error'
|
1272
|
+
require 'mechanize/unauthorized_error'
|
1250
1273
|
require 'mechanize/unsupported_scheme_error'
|
1251
1274
|
require 'mechanize/util'
|
1252
1275
|
|
data/lib/mechanize/form.rb
CHANGED
@@ -231,12 +231,14 @@ class Mechanize::Form
|
|
231
231
|
radio_groups.each_value do |g|
|
232
232
|
checked = g.select {|f| f.checked}
|
233
233
|
|
234
|
-
if checked.size
|
235
|
-
|
236
|
-
|
237
|
-
elsif checked.size > 1
|
234
|
+
if checked.uniq.size > 1 then
|
235
|
+
values = checked.map { |button| button.value }.join(', ').inspect
|
236
|
+
name = checked.first.name.inspect
|
238
237
|
raise Mechanize::Error,
|
239
|
-
"
|
238
|
+
"radiobuttons #{values} are checked in the #{name} group, " \
|
239
|
+
"only one is allowed"
|
240
|
+
else
|
241
|
+
successful_controls << checked.first unless checked.empty?
|
240
242
|
end
|
241
243
|
end
|
242
244
|
|
@@ -4,6 +4,7 @@
|
|
4
4
|
|
5
5
|
class Mechanize::Form::RadioButton < Mechanize::Form::Field
|
6
6
|
attr_accessor :checked
|
7
|
+
attr_reader :form
|
7
8
|
|
8
9
|
def initialize node, form
|
9
10
|
@checked = !!node['checked']
|
@@ -11,6 +12,15 @@ class Mechanize::Form::RadioButton < Mechanize::Form::Field
|
|
11
12
|
super(node)
|
12
13
|
end
|
13
14
|
|
15
|
+
def == other # :nodoc:
|
16
|
+
self.class === other and
|
17
|
+
other.form == @form and
|
18
|
+
other.name == @name and
|
19
|
+
other.value == @value
|
20
|
+
end
|
21
|
+
|
22
|
+
alias eql? == # :nodoc:
|
23
|
+
|
14
24
|
def check
|
15
25
|
uncheck_peers
|
16
26
|
@checked = true
|
@@ -26,6 +36,10 @@ class Mechanize::Form::RadioButton < Mechanize::Form::Field
|
|
26
36
|
checked ? uncheck : check
|
27
37
|
end
|
28
38
|
|
39
|
+
def hash # :nodoc:
|
40
|
+
@form.hash ^ @name.hash ^ @value.hash
|
41
|
+
end
|
42
|
+
|
29
43
|
def label
|
30
44
|
(id = self['id']) && @form.page.labels_hash[id] || nil
|
31
45
|
end
|
data/lib/mechanize/http/agent.rb
CHANGED
@@ -106,6 +106,11 @@ class Mechanize::HTTP::Agent
|
|
106
106
|
|
107
107
|
attr_reader :http # :nodoc:
|
108
108
|
|
109
|
+
# When set to true mechanize will ignore an EOF during chunked transfer
|
110
|
+
# encoding so long as at least one byte was received. Be careful when
|
111
|
+
# enabling this as it may cause data loss.
|
112
|
+
attr_accessor :ignore_bad_chunking
|
113
|
+
|
109
114
|
# Handlers for various URI schemes
|
110
115
|
attr_accessor :scheme_handlers
|
111
116
|
|
@@ -123,6 +128,7 @@ class Mechanize::HTTP::Agent
|
|
123
128
|
@follow_meta_refresh_self = false
|
124
129
|
@gzip_enabled = true
|
125
130
|
@history = Mechanize::History.new
|
131
|
+
@ignore_bad_chunking = false
|
126
132
|
@keep_alive = true
|
127
133
|
@max_file_buffer = 100_000 # 5MB for response bodies
|
128
134
|
@open_timeout = nil
|
@@ -248,13 +254,20 @@ class Mechanize::HTTP::Agent
|
|
248
254
|
response_body_io = nil
|
249
255
|
|
250
256
|
# Send the request
|
251
|
-
|
252
|
-
|
257
|
+
begin
|
258
|
+
response = connection.request(uri, request) { |res|
|
259
|
+
response_log res
|
253
260
|
|
254
|
-
|
261
|
+
response_body_io = response_read res, request, uri
|
255
262
|
|
256
|
-
|
257
|
-
|
263
|
+
res
|
264
|
+
}
|
265
|
+
rescue Mechanize::ChunkedTerminationError => e
|
266
|
+
raise unless @ignore_bad_chunking
|
267
|
+
|
268
|
+
response = e.response
|
269
|
+
response_body_io = e.body_io
|
270
|
+
end
|
258
271
|
|
259
272
|
hook_content_encoding response, uri, response_body_io
|
260
273
|
|
@@ -283,7 +296,7 @@ class Mechanize::HTTP::Agent
|
|
283
296
|
log.debug("Got cached page") if log
|
284
297
|
visited_page(uri) || page
|
285
298
|
when Net::HTTPRedirection
|
286
|
-
response_redirect response, method, page, redirects, referer
|
299
|
+
response_redirect response, method, page, redirects, headers, referer
|
287
300
|
when Net::HTTPUnauthorized
|
288
301
|
response_authenticate(response, page, uri, request, headers, params,
|
289
302
|
referer)
|
@@ -402,20 +415,23 @@ class Mechanize::HTTP::Agent
|
|
402
415
|
zio.finish
|
403
416
|
|
404
417
|
return out_io
|
405
|
-
rescue Zlib::Error
|
406
|
-
log.
|
418
|
+
rescue Zlib::Error => gz_error
|
419
|
+
log.warn "unable to gunzip response: #{gz_error} (#{gz_error.class})" if
|
420
|
+
log
|
407
421
|
|
408
422
|
body_io.rewind
|
409
423
|
body_io.read 10
|
410
424
|
|
411
425
|
begin
|
426
|
+
log.warn "trying raw inflate on response" if log
|
412
427
|
return inflate body_io, -Zlib::MAX_WBITS
|
413
428
|
rescue Zlib::Error => e
|
414
|
-
log.error
|
429
|
+
log.error "unable to inflate response: #{e} (#{e.class})" if log
|
415
430
|
raise
|
416
431
|
end
|
417
432
|
ensure
|
418
|
-
|
433
|
+
# do not close a second time if we failed the first time
|
434
|
+
zio.close if zio and not (zio.closed? or gz_error)
|
419
435
|
body_io.close unless body_io.closed?
|
420
436
|
end
|
421
437
|
|
@@ -583,7 +599,13 @@ class Mechanize::HTTP::Agent
|
|
583
599
|
if RUBY_VERSION >= "1.9.0"
|
584
600
|
Mechanize::Util.uri_escape(match)
|
585
601
|
else
|
586
|
-
|
602
|
+
begin
|
603
|
+
sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C').first)
|
604
|
+
rescue ArgumentError
|
605
|
+
# workaround for ruby 1.8 with -Ku but ISO-8859-1 characters in
|
606
|
+
# URIs. See #227. I can't wait to drop 1.8 support
|
607
|
+
sprintf('%%%X', match.unpack('C').first)
|
608
|
+
end
|
587
609
|
end
|
588
610
|
}
|
589
611
|
|
@@ -845,9 +867,7 @@ class Mechanize::HTTP::Agent
|
|
845
867
|
content_length = response.content_length
|
846
868
|
|
847
869
|
if use_tempfile? content_length then
|
848
|
-
body_io =
|
849
|
-
body_io.unlink
|
850
|
-
body_io.binmode if defined? body_io.binmode
|
870
|
+
body_io = make_tempfile 'mechanize-raw'
|
851
871
|
else
|
852
872
|
body_io = StringIO.new
|
853
873
|
end
|
@@ -860,9 +880,7 @@ class Mechanize::HTTP::Agent
|
|
860
880
|
total += part.length
|
861
881
|
|
862
882
|
if StringIO === body_io and use_tempfile? total then
|
863
|
-
new_io =
|
864
|
-
new_io.unlink
|
865
|
-
new_io.binmode
|
883
|
+
new_io = make_tempfile 'mechanize-raw'
|
866
884
|
|
867
885
|
new_io.write body_io.string
|
868
886
|
|
@@ -872,6 +890,13 @@ class Mechanize::HTTP::Agent
|
|
872
890
|
body_io.write(part)
|
873
891
|
log.debug("Read #{part.length} bytes (#{total} total)") if log
|
874
892
|
}
|
893
|
+
rescue EOFError => e
|
894
|
+
# terminating CRLF might be missing, let the user check the document
|
895
|
+
raise unless response.chunked? and total.nonzero?
|
896
|
+
|
897
|
+
body_io.rewind
|
898
|
+
raise Mechanize::ChunkedTerminationError.new(e, response, body_io, uri,
|
899
|
+
@context)
|
875
900
|
rescue Net::HTTP::Persistent::Error => e
|
876
901
|
body_io.rewind
|
877
902
|
raise Mechanize::ResponseReadError.new(e, response, body_io, uri,
|
@@ -895,7 +920,8 @@ class Mechanize::HTTP::Agent
|
|
895
920
|
body_io
|
896
921
|
end
|
897
922
|
|
898
|
-
def response_redirect
|
923
|
+
def response_redirect(response, method, page, redirects, headers,
|
924
|
+
referer = current_page)
|
899
925
|
case @redirect_ok
|
900
926
|
when true, :all
|
901
927
|
# shortcut
|
@@ -915,7 +941,7 @@ class Mechanize::HTTP::Agent
|
|
915
941
|
@history.push(page, page.uri)
|
916
942
|
new_uri = resolve response['Location'].to_s, page
|
917
943
|
|
918
|
-
fetch new_uri, redirect_method,
|
944
|
+
fetch new_uri, redirect_method, headers, [], referer, redirects + 1
|
919
945
|
end
|
920
946
|
|
921
947
|
# :section: Robots
|
@@ -1089,10 +1115,7 @@ class Mechanize::HTTP::Agent
|
|
1089
1115
|
|
1090
1116
|
until input_io.eof? do
|
1091
1117
|
if StringIO === out_io and use_tempfile? out_io.size then
|
1092
|
-
new_io =
|
1093
|
-
new_io.unlink
|
1094
|
-
new_io.binmode
|
1095
|
-
|
1118
|
+
new_io = make_tempfile name
|
1096
1119
|
new_io.write out_io.string
|
1097
1120
|
out_io = new_io
|
1098
1121
|
end
|
@@ -1115,7 +1138,7 @@ class Mechanize::HTTP::Agent
|
|
1115
1138
|
inflate.inflate chunk
|
1116
1139
|
end
|
1117
1140
|
|
1118
|
-
|
1141
|
+
inflate.finish
|
1119
1142
|
|
1120
1143
|
out_io
|
1121
1144
|
ensure
|
@@ -1158,6 +1181,13 @@ class Mechanize::HTTP::Agent
|
|
1158
1181
|
@http.proxy = proxy_uri
|
1159
1182
|
end
|
1160
1183
|
|
1184
|
+
def make_tempfile name
|
1185
|
+
io = Tempfile.new name
|
1186
|
+
io.unlink
|
1187
|
+
io.binmode if io.respond_to? :binmode
|
1188
|
+
io
|
1189
|
+
end
|
1190
|
+
|
1161
1191
|
def use_tempfile? size
|
1162
1192
|
return false unless @max_file_buffer
|
1163
1193
|
return false unless size
|