mechanize 0.7.6 → 0.7.7
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/EXAMPLES.txt +87 -40
- data/History.txt +21 -0
- data/Manifest.txt +51 -47
- data/lib/www/mechanize.rb +88 -22
- data/lib/www/mechanize/cookie.rb +1 -1
- data/lib/www/mechanize/form.rb +27 -14
- data/lib/www/mechanize/form/multi_select_list.rb +1 -1
- data/lib/www/mechanize/monkey_patch.rb +3 -0
- data/lib/www/mechanize/page.rb +20 -16
- data/lib/www/mechanize/page/link.rb +2 -2
- data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
- data/test/helper.rb +14 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/servlets.rb +10 -0
- data/test/{tc_authenticate.rb → test_authenticate.rb} +0 -0
- data/test/{tc_bad_links.rb → test_bad_links.rb} +0 -0
- data/test/{tc_blank_form.rb → test_blank_form.rb} +0 -0
- data/test/{tc_checkboxes.rb → test_checkboxes.rb} +0 -0
- data/test/{tc_cookie_class.rb → test_cookie_class.rb} +9 -0
- data/test/{tc_cookie_jar.rb → test_cookie_jar.rb} +0 -0
- data/test/{tc_cookies.rb → test_cookies.rb} +6 -0
- data/test/{tc_encoded_links.rb → test_encoded_links.rb} +1 -1
- data/test/{tc_errors.rb → test_errors.rb} +0 -0
- data/test/{tc_follow_meta.rb → test_follow_meta.rb} +0 -0
- data/test/{tc_form_action.rb → test_form_action.rb} +1 -1
- data/test/{tc_form_as_hash.rb → test_form_as_hash.rb} +0 -0
- data/test/{tc_form_button.rb → test_form_button.rb} +0 -0
- data/test/{tc_form_no_inputname.rb → test_form_no_inputname.rb} +0 -0
- data/test/{tc_forms.rb → test_forms.rb} +0 -0
- data/test/{tc_frames.rb → test_frames.rb} +0 -0
- data/test/test_get_headers.rb +45 -0
- data/test/{tc_gzipping.rb → test_gzipping.rb} +0 -0
- data/test/test_hash_api.rb +42 -0
- data/test/{tc_history.rb → test_history.rb} +0 -0
- data/test/{tc_history_added.rb → test_history_added.rb} +0 -0
- data/test/{tc_html_unscape_forms.rb → test_html_unscape_forms.rb} +0 -0
- data/test/{tc_if_modified_since.rb → test_if_modified_since.rb} +0 -0
- data/test/{tc_keep_alive.rb → test_keep_alive.rb} +0 -0
- data/test/{tc_links.rb → test_links.rb} +0 -0
- data/test/{tc_mech.rb → test_mech.rb} +2 -2
- data/test/{tc_mechanize_file.rb → test_mechanize_file.rb} +0 -0
- data/test/{tc_multi_select.rb → test_multi_select.rb} +0 -0
- data/test/{tc_no_attributes.rb → test_no_attributes.rb} +0 -0
- data/test/{tc_option.rb → test_option.rb} +0 -0
- data/test/{tc_page.rb → test_page.rb} +17 -0
- data/test/{tc_pluggable_parser.rb → test_pluggable_parser.rb} +0 -0
- data/test/{tc_post_form.rb → test_post_form.rb} +0 -0
- data/test/{tc_pretty_print.rb → test_pretty_print.rb} +0 -0
- data/test/{tc_radiobutton.rb → test_radiobutton.rb} +0 -0
- data/test/test_redirect_limit_reached.rb +41 -0
- data/test/{tc_referer.rb → test_referer.rb} +0 -0
- data/test/{tc_relative_links.rb → test_relative_links.rb} +0 -0
- data/test/{tc_response_code.rb → test_response_code.rb} +0 -0
- data/test/{tc_save_file.rb → test_save_file.rb} +0 -0
- data/test/{tc_select.rb → test_select.rb} +0 -0
- data/test/{tc_select_all.rb → test_select_all.rb} +0 -0
- data/test/{tc_select_none.rb → test_select_none.rb} +0 -0
- data/test/{tc_select_noopts.rb → test_select_noopts.rb} +0 -0
- data/test/{tc_set_fields.rb → test_set_fields.rb} +8 -0
- data/test/{tc_ssl_server.rb → test_ssl_server.rb} +0 -0
- data/test/{tc_subclass.rb → test_subclass.rb} +0 -0
- data/test/{tc_textarea.rb → test_textarea.rb} +0 -0
- data/test/{tc_upload.rb → test_upload.rb} +11 -11
- metadata +106 -52
- data/test/test_all.rb +0 -5
data/EXAMPLES.txt
CHANGED
@@ -4,53 +4,68 @@
|
|
4
4
|
require 'rubygems'
|
5
5
|
require 'mechanize'
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
7
|
+
a = WWW::Mechanize.new { |agent|
|
8
|
+
agent.user_agent_alias = 'Mac Safari'
|
9
|
+
}
|
10
|
+
|
11
|
+
a.get('http://google.com/') do |page|
|
12
|
+
search_result = page.form_with(:name => 'f') do |search|
|
13
|
+
search.q = 'Hello world'
|
14
|
+
end.submit
|
15
|
+
|
16
|
+
search_result.links.each do |link|
|
17
|
+
puts link.text
|
18
|
+
end
|
19
|
+
end
|
14
20
|
|
15
21
|
== Rubyforge
|
16
|
-
|
22
|
+
|
23
|
+
a = WWW::Mechanize.new
|
24
|
+
a.get('http://rubyforge.org/') do |page|
|
25
|
+
# Click the login link
|
26
|
+
login_page = a.click(page.links.text(/Log In/))
|
17
27
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
form.form_loginname = ARGV[0]
|
24
|
-
form.form_pw = ARGV[1]
|
25
|
-
page = agent.submit(form, form.buttons.first)
|
28
|
+
# Submit the login form
|
29
|
+
my_page = login_page.form_with(:action => '/account/login.php') do |f|
|
30
|
+
f.form_loginname = ARGV[0]
|
31
|
+
f.form_pw = ARGV[1]
|
32
|
+
end.click_button
|
26
33
|
|
27
|
-
|
34
|
+
my_page.links.each do |link|
|
35
|
+
text = link.text.strip
|
36
|
+
next unless text.length > 0
|
37
|
+
puts text
|
38
|
+
end
|
39
|
+
end
|
28
40
|
|
29
41
|
== File Upload
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
page = agent.click page.links.text('Upload')
|
48
|
-
|
49
|
-
# Fill out the form
|
50
|
-
form = page.forms.action('/photos_upload_process.gne').first
|
51
|
-
form.file_uploads.name('file1').first.file_name = ARGV[2]
|
52
|
-
agent.submit(form)
|
42
|
+
Upload a file to flickr.
|
43
|
+
|
44
|
+
a = WWW::Mechanize.new { |agent|
|
45
|
+
# Flickr refreshes after login
|
46
|
+
agent.follow_meta_refresh = true
|
47
|
+
}
|
48
|
+
|
49
|
+
a.get('http://flickr.com/') do |home_page|
|
50
|
+
signin_page = a.click(home_page.links.text(/Sign In/))
|
51
|
+
|
52
|
+
my_page = signin_page.form_with(:name => 'login_form') do |form|
|
53
|
+
form.login = ARGV[0]
|
54
|
+
form.passwd = ARGV[1]
|
55
|
+
end.submit
|
56
|
+
|
57
|
+
# Click the upload link
|
58
|
+
upload_page = a.click(my_page.links.text(/Upload/))
|
53
59
|
|
60
|
+
# We want the basic upload page.
|
61
|
+
upload_page = a.click(upload_page.links.text(/basic Uploader/))
|
62
|
+
|
63
|
+
# Upload the file
|
64
|
+
upload_page.form_with(:method => 'POST') do |upload_form|
|
65
|
+
upload_form.file_uploads.first.file_name = ARGV[2]
|
66
|
+
end.submit
|
67
|
+
end
|
68
|
+
|
54
69
|
== Pluggable Parsers
|
55
70
|
Lets say you want html pages to automatically be parsed with Rubyful Soup.
|
56
71
|
This example shows you how:
|
@@ -122,3 +137,35 @@ This example also demonstrates subclassing Mechanize.
|
|
122
137
|
|
123
138
|
TestMech.new.process
|
124
139
|
|
140
|
+
== Client Certificate Authentication (Mutual Auth)
|
141
|
+
|
142
|
+
In most cases a client certificate is created as an additional layer of security
|
143
|
+
for certain websites. The specific case that this was initially tested on was
|
144
|
+
for automating the download of archived images from a banks (Wachovia) lockbox
|
145
|
+
system. Once the certificate is installed into your browser you will have to
|
146
|
+
export it and split the certificate and private key into separate files. Exported
|
147
|
+
files are usually in .p12 format (IE 7 & Firefox 2.0) which stands for PKCS #12.
|
148
|
+
You can convert them from p12 to pem format by using the following commands:
|
149
|
+
|
150
|
+
openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.key -nocerts -nodes
|
151
|
+
openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.cer -nokeys
|
152
|
+
|
153
|
+
require 'rubygems'
|
154
|
+
require 'mechanize'
|
155
|
+
|
156
|
+
# create Mechanize instance
|
157
|
+
agent = WWW::Mechanize.new
|
158
|
+
|
159
|
+
# set the path of the certificate file
|
160
|
+
agent.cert = 'example.cer'
|
161
|
+
|
162
|
+
# set the path of the private key file
|
163
|
+
agent.key = 'example.key'
|
164
|
+
|
165
|
+
# get the login form & fill it out with the username/password
|
166
|
+
login_form = @agent.get("http://example.com/login_page").form('Login')
|
167
|
+
login_form.Userid = 'TestUser'
|
168
|
+
login_form.Password = 'TestPassword'
|
169
|
+
|
170
|
+
# submit login form
|
171
|
+
agent.submit(login_form, login_form.buttons.first)
|
data/History.txt
CHANGED
@@ -1,5 +1,26 @@
|
|
1
1
|
= Mechanize CHANGELOG
|
2
2
|
|
3
|
+
=== 0.7.7
|
4
|
+
|
5
|
+
* New Features:
|
6
|
+
* Page#form_with takes a +criteria+ hash.
|
7
|
+
* Page#form is changed to Page#form_with
|
8
|
+
* Mechanize#get takes custom http headers. Thanks Mike Dalessio!
|
9
|
+
* Form#click_button submits a form defaulting to the current button.
|
10
|
+
* Form#set_fields now takes a hash. Thanks Tobi!
|
11
|
+
* Mechanize#redirection_limit= for setting the max number of redirects.
|
12
|
+
|
13
|
+
* Bug Fixes:
|
14
|
+
* Added more examples. Thanks Robert Jackson.
|
15
|
+
* #20480 Making sure the Host header is set.
|
16
|
+
* #20672 Making sure cookies with weird semicolons work.
|
17
|
+
* Fixed bug with percent signs in urls.
|
18
|
+
http://d.hatena.ne.jp/kitamomonga/20080410/ruby_mechanize_percent_url_bug
|
19
|
+
* #21132 Not checking for EOF errors on redirect
|
20
|
+
* Fixed a weird gzipping error.
|
21
|
+
* #21233 Smarter multipart boundry. Thanks Todd Willey!
|
22
|
+
* #20097 Supporting meta tag cookies.
|
23
|
+
|
3
24
|
=== 0.7.6
|
4
25
|
|
5
26
|
* New Features:
|
data/Manifest.txt
CHANGED
@@ -40,6 +40,7 @@ lib/www/mechanize/page/frame.rb
|
|
40
40
|
lib/www/mechanize/page/link.rb
|
41
41
|
lib/www/mechanize/page/meta.rb
|
42
42
|
lib/www/mechanize/pluggable_parsers.rb
|
43
|
+
lib/www/mechanize/redirect_limit_reached_error.rb
|
43
44
|
lib/www/mechanize/response_code_error.rb
|
44
45
|
lib/www/mechanize/unsupported_scheme_error.rb
|
45
46
|
test/data/htpasswd
|
@@ -69,6 +70,7 @@ test/htdocs/google.html
|
|
69
70
|
test/htdocs/iframe_test.html
|
70
71
|
test/htdocs/index.html
|
71
72
|
test/htdocs/link with space.html
|
73
|
+
test/htdocs/meta_cookie.html
|
72
74
|
test/htdocs/no_title_test.html
|
73
75
|
test/htdocs/relative/tc_relative_links.html
|
74
76
|
test/htdocs/tc_bad_links.html
|
@@ -88,50 +90,52 @@ test/htdocs/tc_textarea.html
|
|
88
90
|
test/htdocs/unusual______.html
|
89
91
|
test/servlets.rb
|
90
92
|
test/ssl_server.rb
|
91
|
-
test/
|
92
|
-
test/
|
93
|
-
test/
|
94
|
-
test/
|
95
|
-
test/
|
96
|
-
test/
|
97
|
-
test/
|
98
|
-
test/
|
99
|
-
test/
|
100
|
-
test/
|
101
|
-
test/
|
102
|
-
test/
|
103
|
-
test/
|
104
|
-
test/
|
105
|
-
test/
|
106
|
-
test/
|
107
|
-
test/
|
108
|
-
test/
|
109
|
-
test/
|
110
|
-
test/
|
111
|
-
test/
|
112
|
-
test/
|
113
|
-
test/
|
114
|
-
test/
|
115
|
-
test/
|
116
|
-
test/
|
117
|
-
test/
|
118
|
-
test/
|
119
|
-
test/
|
120
|
-
test/
|
121
|
-
test/
|
122
|
-
test/
|
123
|
-
test/
|
124
|
-
test/
|
125
|
-
test/
|
126
|
-
test/
|
127
|
-
test/
|
128
|
-
test/
|
129
|
-
test/
|
130
|
-
test/
|
131
|
-
test/
|
132
|
-
test/
|
133
|
-
test/
|
134
|
-
test/
|
135
|
-
test/
|
136
|
-
test/
|
137
|
-
test/
|
93
|
+
test/test_authenticate.rb
|
94
|
+
test/test_bad_links.rb
|
95
|
+
test/test_blank_form.rb
|
96
|
+
test/test_checkboxes.rb
|
97
|
+
test/test_cookie_class.rb
|
98
|
+
test/test_cookie_jar.rb
|
99
|
+
test/test_cookies.rb
|
100
|
+
test/test_encoded_links.rb
|
101
|
+
test/test_errors.rb
|
102
|
+
test/test_follow_meta.rb
|
103
|
+
test/test_form_action.rb
|
104
|
+
test/test_form_as_hash.rb
|
105
|
+
test/test_form_button.rb
|
106
|
+
test/test_form_no_inputname.rb
|
107
|
+
test/test_forms.rb
|
108
|
+
test/test_frames.rb
|
109
|
+
test/test_get_headers.rb
|
110
|
+
test/test_gzipping.rb
|
111
|
+
test/test_hash_api.rb
|
112
|
+
test/test_history.rb
|
113
|
+
test/test_history_added.rb
|
114
|
+
test/test_html_unscape_forms.rb
|
115
|
+
test/test_if_modified_since.rb
|
116
|
+
test/test_keep_alive.rb
|
117
|
+
test/test_links.rb
|
118
|
+
test/test_mech.rb
|
119
|
+
test/test_mechanize_file.rb
|
120
|
+
test/test_multi_select.rb
|
121
|
+
test/test_no_attributes.rb
|
122
|
+
test/test_option.rb
|
123
|
+
test/test_page.rb
|
124
|
+
test/test_pluggable_parser.rb
|
125
|
+
test/test_post_form.rb
|
126
|
+
test/test_pretty_print.rb
|
127
|
+
test/test_radiobutton.rb
|
128
|
+
test/test_redirect_limit_reached.rb
|
129
|
+
test/test_referer.rb
|
130
|
+
test/test_relative_links.rb
|
131
|
+
test/test_response_code.rb
|
132
|
+
test/test_save_file.rb
|
133
|
+
test/test_select.rb
|
134
|
+
test/test_select_all.rb
|
135
|
+
test/test_select_none.rb
|
136
|
+
test/test_select_noopts.rb
|
137
|
+
test/test_set_fields.rb
|
138
|
+
test/test_ssl_server.rb
|
139
|
+
test/test_subclass.rb
|
140
|
+
test/test_textarea.rb
|
141
|
+
test/test_upload.rb
|
data/lib/www/mechanize.rb
CHANGED
@@ -5,10 +5,14 @@ require 'webrick/httputils'
|
|
5
5
|
require 'zlib'
|
6
6
|
require 'stringio'
|
7
7
|
require 'digest/md5'
|
8
|
+
require 'fileutils'
|
9
|
+
require 'hpricot'
|
10
|
+
require 'forwardable'
|
8
11
|
|
9
12
|
require 'www/mechanize/content_type_error'
|
10
13
|
require 'www/mechanize/response_code_error'
|
11
14
|
require 'www/mechanize/unsupported_scheme_error'
|
15
|
+
require 'www/mechanize/redirect_limit_reached_error'
|
12
16
|
require 'www/mechanize/cookie'
|
13
17
|
require 'www/mechanize/cookie_jar'
|
14
18
|
require 'www/mechanize/history'
|
@@ -39,7 +43,7 @@ module WWW
|
|
39
43
|
class Mechanize
|
40
44
|
##
|
41
45
|
# The version of Mechanize you are using.
|
42
|
-
VERSION = '0.7.
|
46
|
+
VERSION = '0.7.7'
|
43
47
|
|
44
48
|
##
|
45
49
|
# User Agent aliases
|
@@ -73,6 +77,7 @@ module WWW
|
|
73
77
|
attr_accessor :verify_callback
|
74
78
|
attr_accessor :history_added
|
75
79
|
attr_accessor :scheme_handlers
|
80
|
+
attr_accessor :redirection_limit
|
76
81
|
|
77
82
|
attr_reader :history
|
78
83
|
attr_reader :pluggable_parser
|
@@ -81,6 +86,8 @@ module WWW
|
|
81
86
|
|
82
87
|
@@nonce_count = -1
|
83
88
|
CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
|
89
|
+
@html_parser = Hpricot
|
90
|
+
class << self; attr_accessor :html_parser end
|
84
91
|
|
85
92
|
def initialize
|
86
93
|
# attr_accessors
|
@@ -122,6 +129,7 @@ module WWW
|
|
122
129
|
@conditional_requests = true
|
123
130
|
|
124
131
|
@follow_meta_refresh = false
|
132
|
+
@redirection_limit = 20
|
125
133
|
|
126
134
|
# Connection Cache & Keep alive
|
127
135
|
@connection_cache = {}
|
@@ -170,10 +178,18 @@ module WWW
|
|
170
178
|
end
|
171
179
|
|
172
180
|
# Fetches the URL passed in and returns a page.
|
173
|
-
def get(
|
174
|
-
unless
|
175
|
-
|
176
|
-
parameters
|
181
|
+
def get(options, parameters = [], referer = nil)
|
182
|
+
unless options.is_a? Hash
|
183
|
+
url = options
|
184
|
+
unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
|
185
|
+
referer = parameters
|
186
|
+
parameters = []
|
187
|
+
end
|
188
|
+
else
|
189
|
+
raise ArgumentError.new("url must be specified") unless url = options[:url]
|
190
|
+
parameters = options[:params] || []
|
191
|
+
referer = options[:referer]
|
192
|
+
headers = options[:headers]
|
177
193
|
end
|
178
194
|
|
179
195
|
referer ||= current_page || Page.new(nil, {'content-type'=>'text/html'})
|
@@ -196,7 +212,7 @@ module WWW
|
|
196
212
|
|
197
213
|
# fetch the page
|
198
214
|
request = fetch_request(abs_uri)
|
199
|
-
page = fetch_page(abs_uri, request, referer)
|
215
|
+
page = fetch_page(:uri => abs_uri, :request => request, :page => referer, :headers => headers)
|
200
216
|
add_to_history(page)
|
201
217
|
yield page if block_given?
|
202
218
|
page
|
@@ -217,10 +233,9 @@ module WWW
|
|
217
233
|
rescue
|
218
234
|
nil
|
219
235
|
end
|
220
|
-
|
221
|
-
link
|
222
|
-
|
223
|
-
)
|
236
|
+
href = link.respond_to?(:has_attribute?) ?
|
237
|
+
(link['href'] || link['src']) : link.href
|
238
|
+
uri = to_absolute_uri(href, referer || current_page())
|
224
239
|
get(uri, referer)
|
225
240
|
end
|
226
241
|
|
@@ -237,7 +252,11 @@ module WWW
|
|
237
252
|
# or
|
238
253
|
# agent.post('http://example.com/', [ ["foo", "bar"] ])
|
239
254
|
def post(url, query={})
|
240
|
-
node =
|
255
|
+
node = {}
|
256
|
+
# Create a fake form
|
257
|
+
class << node
|
258
|
+
def search(*args); []; end
|
259
|
+
end
|
241
260
|
node['method'] = 'POST'
|
242
261
|
node['enctype'] = 'application/x-www-form-urlencoded'
|
243
262
|
|
@@ -312,7 +331,7 @@ module WWW
|
|
312
331
|
s.gsub(/&(\w+|#[0-9]+);/) { |match|
|
313
332
|
number = case match
|
314
333
|
when /&(\w+);/
|
315
|
-
|
334
|
+
Mechanize.html_parser::NamedCharacters[$1]
|
316
335
|
when /&#([0-9]+);/
|
317
336
|
$1.to_i
|
318
337
|
end
|
@@ -323,7 +342,13 @@ module WWW
|
|
323
342
|
end
|
324
343
|
|
325
344
|
protected
|
326
|
-
def set_headers(uri, request,
|
345
|
+
def set_headers(uri, request, options)
|
346
|
+
unless options.is_a? Hash
|
347
|
+
cur_page = options
|
348
|
+
else
|
349
|
+
raise ArgumentError.new("cur_page must be specified") unless cur_page = options[:page]
|
350
|
+
headers = options[:headers]
|
351
|
+
end
|
327
352
|
if @keep_alive
|
328
353
|
request.add_field('Connection', 'keep-alive')
|
329
354
|
request.add_field('Keep-Alive', keep_alive_time.to_s)
|
@@ -332,6 +357,7 @@ module WWW
|
|
332
357
|
end
|
333
358
|
request.add_field('Accept-Encoding', 'gzip,identity')
|
334
359
|
request.add_field('Accept-Language', 'en-us,en;q=0.5')
|
360
|
+
request.add_field('Host', uri.host)
|
335
361
|
request.add_field('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
|
336
362
|
|
337
363
|
unless @cookie_jar.empty?(uri)
|
@@ -370,6 +396,18 @@ module WWW
|
|
370
396
|
end
|
371
397
|
end
|
372
398
|
|
399
|
+
if headers
|
400
|
+
headers.each do |k,v|
|
401
|
+
case k
|
402
|
+
when :etag then request.add_field("ETag", v)
|
403
|
+
when :if_modified_since then request.add_field("If-Modified-Since", v)
|
404
|
+
else
|
405
|
+
raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
|
406
|
+
request.add_field(k,v)
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
373
411
|
request
|
374
412
|
end
|
375
413
|
|
@@ -422,8 +460,8 @@ module WWW
|
|
422
460
|
|
423
461
|
url = URI.parse(
|
424
462
|
Mechanize.html_unescape(
|
425
|
-
url.split(
|
426
|
-
url.scan(
|
463
|
+
url.split(/(?:%[0-9A-Fa-f]{2})+|#/).zip(
|
464
|
+
url.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
|
427
465
|
).map { |x,y|
|
428
466
|
"#{URI.escape(x)}#{y}"
|
429
467
|
}.join('')
|
@@ -479,7 +517,17 @@ module WWW
|
|
479
517
|
end
|
480
518
|
|
481
519
|
# uri is an absolute URI
|
482
|
-
def fetch_page(
|
520
|
+
def fetch_page(options, request=nil, cur_page=current_page(), request_data=[], redirects = 0)
|
521
|
+
unless options.is_a? Hash
|
522
|
+
raise ArgumentError.new("uri must be specified") unless uri = options
|
523
|
+
raise ArgumentError.new("request must be specified") unless request
|
524
|
+
else
|
525
|
+
raise ArgumentError.new("uri must be specified") unless uri = options[:uri]
|
526
|
+
raise ArgumentError.new("request must be specified") unless request = options[:request]
|
527
|
+
cur_page = options[:page] || current_page()
|
528
|
+
request_data = options[:request_data] || []
|
529
|
+
headers = options[:headers]
|
530
|
+
end
|
483
531
|
raise "unsupported scheme: #{uri.scheme}" unless ['http', 'https'].include?(uri.scheme.downcase)
|
484
532
|
|
485
533
|
log.info("#{ request.class }: #{ request.path }") if log
|
@@ -538,7 +586,11 @@ module WWW
|
|
538
586
|
|
539
587
|
http_obj.start unless http_obj.started?
|
540
588
|
|
541
|
-
|
589
|
+
if headers
|
590
|
+
request = set_headers(uri, request, {:page => cur_page, :headers => headers})
|
591
|
+
else
|
592
|
+
request = set_headers(uri, request, cur_page)
|
593
|
+
end
|
542
594
|
|
543
595
|
# Log specified headers for the request
|
544
596
|
if log
|
@@ -551,6 +603,7 @@ module WWW
|
|
551
603
|
|
552
604
|
# Send the request
|
553
605
|
begin
|
606
|
+
res_klass = nil
|
554
607
|
response = http_obj.request(request, *request_data) {|response|
|
555
608
|
|
556
609
|
body = StringIO.new
|
@@ -560,8 +613,13 @@ module WWW
|
|
560
613
|
body.write(part)
|
561
614
|
log.debug("Read #{total} bytes") if log
|
562
615
|
}
|
616
|
+
|
617
|
+
res_klass = Net::HTTPResponse::CODE_TO_OBJ[response.code.to_s]
|
618
|
+
|
563
619
|
# Net::HTTP ignores EOFError if Content-length is given, so we emulate it here.
|
564
|
-
|
620
|
+
unless res_klass <= Net::HTTPRedirection
|
621
|
+
raise EOFError if response.content_length() && response.content_length() != total
|
622
|
+
end
|
565
623
|
body.rewind
|
566
624
|
|
567
625
|
response.each_header { |k,v|
|
@@ -582,7 +640,7 @@ module WWW
|
|
582
640
|
if response['Content-Length'].to_i > 0 || body.length > 0
|
583
641
|
begin
|
584
642
|
Zlib::GzipReader.new(body).read
|
585
|
-
rescue Zlib::BufError
|
643
|
+
rescue Zlib::BufError, Zlib::GzipFile::Error
|
586
644
|
log.error('Caught a Zlib::BufError') if log
|
587
645
|
body.rewind
|
588
646
|
body.read(10)
|
@@ -631,6 +689,15 @@ module WWW
|
|
631
689
|
end
|
632
690
|
end
|
633
691
|
|
692
|
+
if page.is_a?(Page) && page.body =~ /Set-Cookie/
|
693
|
+
page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
|
694
|
+
Cookie::parse(uri, meta['content'], log) { |c|
|
695
|
+
log.debug("saved cookie: #{c}") if log
|
696
|
+
@cookie_jar.add(uri, c)
|
697
|
+
}
|
698
|
+
end
|
699
|
+
end
|
700
|
+
|
634
701
|
(response.get_fields('Set-Cookie')||[]).each do |cookie|
|
635
702
|
Cookie::parse(uri, cookie, log) { |c|
|
636
703
|
log.debug("saved cookie: #{c}") if log
|
@@ -640,8 +707,6 @@ module WWW
|
|
640
707
|
|
641
708
|
log.info("status: #{ page.code }") if log
|
642
709
|
|
643
|
-
res_klass = Net::HTTPResponse::CODE_TO_OBJ[page.code.to_s]
|
644
|
-
|
645
710
|
if follow_meta_refresh && page.respond_to?(:meta) &&
|
646
711
|
(redirect = page.meta.first)
|
647
712
|
return redirect.click
|
@@ -657,7 +722,8 @@ module WWW
|
|
657
722
|
log.info("follow redirect to: #{ response['Location'] }") if log
|
658
723
|
from_uri = page.uri
|
659
724
|
abs_uri = to_absolute_uri(response['Location'].to_s, page)
|
660
|
-
|
725
|
+
raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
|
726
|
+
page = fetch_page(abs_uri, fetch_request(abs_uri), page, request_data, redirects + 1)
|
661
727
|
@history.push(page, from_uri)
|
662
728
|
return page
|
663
729
|
elsif res_klass <= Net::HTTPUnauthorized
|