mechanize 0.7.8 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/History.txt +14 -0
- data/Manifest.txt +30 -5
- data/README.txt +5 -5
- data/Rakefile +6 -0
- data/{eg → examples}/flickr_upload.rb +0 -0
- data/{eg → examples}/mech-dump.rb +0 -0
- data/{eg → examples}/proxy_req.rb +0 -0
- data/{eg → examples}/rubyforge.rb +0 -0
- data/{eg → examples}/spider.rb +0 -0
- data/lib/www/mechanize.rb +183 -404
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/chain/auth_headers.rb +79 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +43 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +47 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +23 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +28 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +36 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +56 -0
- data/lib/www/mechanize/cookie.rb +1 -1
- data/lib/www/mechanize/file_response.rb +60 -0
- data/lib/www/mechanize/form.rb +12 -4
- data/lib/www/mechanize/form/field.rb +2 -2
- data/lib/www/mechanize/form/file_upload.rb +1 -1
- data/lib/www/mechanize/form/option.rb +1 -1
- data/lib/www/mechanize/list.rb +4 -0
- data/lib/www/mechanize/page.rb +20 -10
- data/lib/www/mechanize/util.rb +29 -0
- data/mechanize.gemspec +4 -4
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/helper.rb +3 -1
- data/test/servlets.rb +43 -0
- data/test/test_authenticate.rb +13 -12
- data/test/test_bad_links.rb +1 -1
- data/test/test_blank_form.rb +1 -1
- data/test/test_checkboxes.rb +1 -1
- data/test/test_content_type.rb +1 -1
- data/test/test_cookie_class.rb +1 -1
- data/test/test_cookie_jar.rb +1 -1
- data/test/test_cookies.rb +1 -1
- data/test/test_encoded_links.rb +1 -1
- data/test/test_errors.rb +1 -1
- data/test/test_follow_meta.rb +1 -1
- data/test/test_form_action.rb +1 -1
- data/test/test_form_as_hash.rb +1 -1
- data/test/test_form_button.rb +22 -17
- data/test/test_form_no_inputname.rb +1 -1
- data/test/test_forms.rb +2 -1
- data/test/test_frames.rb +1 -1
- data/test/test_get_headers.rb +1 -1
- data/test/test_gzipping.rb +1 -1
- data/test/test_hash_api.rb +17 -14
- data/test/test_history.rb +1 -1
- data/test/test_history_added.rb +1 -1
- data/test/test_html_unscape_forms.rb +1 -1
- data/test/test_if_modified_since.rb +1 -1
- data/test/test_keep_alive.rb +1 -1
- data/test/test_links.rb +1 -1
- data/test/test_mech.rb +18 -11
- data/test/test_mechanize_file.rb +1 -1
- data/test/test_multi_select.rb +1 -1
- data/test/test_no_attributes.rb +1 -1
- data/test/test_option.rb +2 -1
- data/test/test_page.rb +1 -1
- data/test/test_pluggable_parser.rb +1 -1
- data/test/test_post_form.rb +1 -1
- data/test/test_pretty_print.rb +1 -1
- data/test/test_radiobutton.rb +1 -1
- data/test/test_redirect_limit_reached.rb +1 -1
- data/test/test_referer.rb +1 -1
- data/test/test_relative_links.rb +1 -1
- data/test/test_response_code.rb +7 -1
- data/test/test_save_file.rb +1 -1
- data/test/test_scheme.rb +44 -0
- data/test/test_select.rb +1 -1
- data/test/test_select_all.rb +1 -1
- data/test/test_select_none.rb +1 -1
- data/test/test_select_noopts.rb +1 -1
- data/test/test_set_fields.rb +1 -1
- data/test/test_ssl_server.rb +1 -1
- data/test/test_subclass.rb +4 -11
- data/test/test_textarea.rb +1 -1
- data/test/test_upload.rb +1 -1
- data/test/test_verbs.rb +22 -0
- metadata +39 -7
data/History.txt
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
= Mechanize CHANGELOG
|
2
2
|
|
3
|
+
=== 0.8.0
|
4
|
+
|
5
|
+
* New Features:
|
6
|
+
* Lifecycle hooks. Mechanize#pre_connect_hooks, Mechanize#post_connect_hooks
|
7
|
+
* file:/// urls are now supported
|
8
|
+
* Added Mechanize::Page#link_with, frame_with for searching for links using
|
9
|
+
+criteria+.
|
10
|
+
* Implementing PUT, DELETE, and HEAD requests
|
11
|
+
|
12
|
+
* Bug Fixes:
|
13
|
+
* Fixed an infinite loop when content-length and body length don't match.
|
14
|
+
* Only setting headers once
|
15
|
+
* Adding IIS authentication support
|
16
|
+
|
3
17
|
=== 0.7.8
|
4
18
|
|
5
19
|
* Bug Fixes:
|
data/Manifest.txt
CHANGED
@@ -7,17 +7,34 @@ Manifest.txt
|
|
7
7
|
NOTES.txt
|
8
8
|
README.txt
|
9
9
|
Rakefile
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
10
|
+
examples/flickr_upload.rb
|
11
|
+
examples/mech-dump.rb
|
12
|
+
examples/proxy_req.rb
|
13
|
+
examples/rubyforge.rb
|
14
|
+
examples/spider.rb
|
15
15
|
lib/mechanize.rb
|
16
16
|
lib/www/mechanize.rb
|
17
|
+
lib/www/mechanize/chain.rb
|
18
|
+
lib/www/mechanize/chain/auth_headers.rb
|
19
|
+
lib/www/mechanize/chain/body_decoding_handler.rb
|
20
|
+
lib/www/mechanize/chain/connection_resolver.rb
|
21
|
+
lib/www/mechanize/chain/custom_headers.rb
|
22
|
+
lib/www/mechanize/chain/handler.rb
|
23
|
+
lib/www/mechanize/chain/header_resolver.rb
|
24
|
+
lib/www/mechanize/chain/parameter_resolver.rb
|
25
|
+
lib/www/mechanize/chain/post_connect_hook.rb
|
26
|
+
lib/www/mechanize/chain/pre_connect_hook.rb
|
27
|
+
lib/www/mechanize/chain/request_resolver.rb
|
28
|
+
lib/www/mechanize/chain/response_body_parser.rb
|
29
|
+
lib/www/mechanize/chain/response_header_handler.rb
|
30
|
+
lib/www/mechanize/chain/response_reader.rb
|
31
|
+
lib/www/mechanize/chain/ssl_resolver.rb
|
32
|
+
lib/www/mechanize/chain/uri_resolver.rb
|
17
33
|
lib/www/mechanize/content_type_error.rb
|
18
34
|
lib/www/mechanize/cookie.rb
|
19
35
|
lib/www/mechanize/cookie_jar.rb
|
20
36
|
lib/www/mechanize/file.rb
|
37
|
+
lib/www/mechanize/file_response.rb
|
21
38
|
lib/www/mechanize/file_saver.rb
|
22
39
|
lib/www/mechanize/form.rb
|
23
40
|
lib/www/mechanize/form/button.rb
|
@@ -43,7 +60,13 @@ lib/www/mechanize/pluggable_parsers.rb
|
|
43
60
|
lib/www/mechanize/redirect_limit_reached_error.rb
|
44
61
|
lib/www/mechanize/response_code_error.rb
|
45
62
|
lib/www/mechanize/unsupported_scheme_error.rb
|
63
|
+
lib/www/mechanize/util.rb
|
46
64
|
mechanize.gemspec
|
65
|
+
test/chain/test_argument_validator.rb
|
66
|
+
test/chain/test_custom_headers.rb
|
67
|
+
test/chain/test_parameter_resolver.rb
|
68
|
+
test/chain/test_request_resolver.rb
|
69
|
+
test/chain/test_response_reader.rb
|
47
70
|
test/data/htpasswd
|
48
71
|
test/data/server.crt
|
49
72
|
test/data/server.csr
|
@@ -132,6 +155,7 @@ test/test_referer.rb
|
|
132
155
|
test/test_relative_links.rb
|
133
156
|
test/test_response_code.rb
|
134
157
|
test/test_save_file.rb
|
158
|
+
test/test_scheme.rb
|
135
159
|
test/test_select.rb
|
136
160
|
test/test_select_all.rb
|
137
161
|
test/test_select_none.rb
|
@@ -141,3 +165,4 @@ test/test_ssl_server.rb
|
|
141
165
|
test/test_subclass.rb
|
142
166
|
test/test_textarea.rb
|
143
167
|
test/test_upload.rb
|
168
|
+
test/test_verbs.rb
|
data/README.txt
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
= WWW::Mechanize
|
2
2
|
|
3
3
|
http://mechanize.rubyforge.org/
|
4
|
+
http://github.com/tenderlove/mechanize/tree/master
|
4
5
|
|
5
6
|
== DESCRIPTION
|
6
7
|
|
@@ -15,8 +16,6 @@ a history.
|
|
15
16
|
* ruby 1.8.4
|
16
17
|
* hpricot[http://code.whytheluckystiff.net/hpricot/]
|
17
18
|
|
18
|
-
Note that the files in the net-overrides/ directory are taken from Ruby 1.9.0.
|
19
|
-
|
20
19
|
|
21
20
|
== Examples
|
22
21
|
|
@@ -25,11 +24,12 @@ Also, check out the EXAMPLES[link://files/EXAMPLES_txt.html] file.
|
|
25
24
|
|
26
25
|
== Authors
|
27
26
|
|
28
|
-
Original Code:
|
29
27
|
Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
|
30
28
|
|
31
|
-
|
32
|
-
|
29
|
+
Copyright (c) 2006-2008:
|
30
|
+
|
31
|
+
* {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
|
32
|
+
* Mike Dalessio (mike@csa.net)
|
33
33
|
|
34
34
|
This library comes with a shameless plug for employing me
|
35
35
|
(Aaron[http://tenderlovemaking.com/]) programming
|
data/Rakefile
CHANGED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
data/{eg → examples}/spider.rb
RENAMED
File without changes
|
data/lib/www/mechanize.rb
CHANGED
@@ -9,6 +9,7 @@ require 'fileutils'
|
|
9
9
|
require 'hpricot'
|
10
10
|
require 'forwardable'
|
11
11
|
|
12
|
+
require 'www/mechanize/util'
|
12
13
|
require 'www/mechanize/content_type_error'
|
13
14
|
require 'www/mechanize/response_code_error'
|
14
15
|
require 'www/mechanize/unsupported_scheme_error'
|
@@ -19,7 +20,9 @@ require 'www/mechanize/history'
|
|
19
20
|
require 'www/mechanize/list'
|
20
21
|
require 'www/mechanize/form'
|
21
22
|
require 'www/mechanize/pluggable_parsers'
|
23
|
+
require 'www/mechanize/file_response'
|
22
24
|
require 'www/mechanize/inspect'
|
25
|
+
require 'www/mechanize/chain'
|
23
26
|
require 'www/mechanize/monkey_patch'
|
24
27
|
|
25
28
|
module WWW
|
@@ -43,7 +46,7 @@ module WWW
|
|
43
46
|
class Mechanize
|
44
47
|
##
|
45
48
|
# The version of Mechanize you are using.
|
46
|
-
VERSION = '0.
|
49
|
+
VERSION = '0.8.0'
|
47
50
|
|
48
51
|
##
|
49
52
|
# User Agent aliases
|
@@ -61,7 +64,6 @@ module WWW
|
|
61
64
|
}
|
62
65
|
|
63
66
|
attr_accessor :cookie_jar
|
64
|
-
attr_accessor :log
|
65
67
|
attr_accessor :open_timeout, :read_timeout
|
66
68
|
attr_accessor :user_agent
|
67
69
|
attr_accessor :watch_for_set
|
@@ -84,10 +86,8 @@ module WWW
|
|
84
86
|
|
85
87
|
alias :follow_redirect? :redirect_ok
|
86
88
|
|
87
|
-
@@nonce_count = -1
|
88
|
-
CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
|
89
89
|
@html_parser = Hpricot
|
90
|
-
class << self; attr_accessor :html_parser end
|
90
|
+
class << self; attr_accessor :html_parser, :log end
|
91
91
|
|
92
92
|
def initialize
|
93
93
|
# attr_accessors
|
@@ -118,7 +118,6 @@ module WWW
|
|
118
118
|
@password = nil # Auth Password
|
119
119
|
@digest = nil # DigestAuth Digest
|
120
120
|
@auth_hash = {} # Keep track of urls for sending auth
|
121
|
-
@digest_response = nil
|
122
121
|
|
123
122
|
# Proxy settings
|
124
123
|
@proxy_addr = nil
|
@@ -144,14 +143,29 @@ module WWW
|
|
144
143
|
@scheme_handlers['http'] = lambda { |link, page| link }
|
145
144
|
@scheme_handlers['https'] = @scheme_handlers['http']
|
146
145
|
@scheme_handlers['relative'] = @scheme_handlers['http']
|
146
|
+
@scheme_handlers['file'] = @scheme_handlers['http']
|
147
|
+
|
148
|
+
@pre_connect_hook = Chain::PreConnectHook.new
|
149
|
+
@post_connect_hook = Chain::PostConnectHook.new
|
147
150
|
|
148
151
|
yield self if block_given?
|
149
152
|
end
|
150
153
|
|
151
|
-
def max_history=(length); @history.max_size = length
|
152
|
-
def max_history; @history.max_size
|
154
|
+
def max_history=(length); @history.max_size = length end
|
155
|
+
def max_history; @history.max_size end
|
156
|
+
def log=(l); self.class.log = l end
|
157
|
+
def log; self.class.log end
|
158
|
+
|
159
|
+
def pre_connect_hooks
|
160
|
+
@pre_connect_hook.hooks
|
161
|
+
end
|
162
|
+
|
163
|
+
def post_connect_hooks
|
164
|
+
@post_connect_hook.hooks
|
165
|
+
end
|
153
166
|
|
154
167
|
# Sets the proxy address, port, user, and password
|
168
|
+
# +addr+ should be a host, with no "http://"
|
155
169
|
def set_proxy(addr, port, user = nil, pass = nil)
|
156
170
|
@proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
|
157
171
|
end
|
@@ -167,15 +181,12 @@ module WWW
|
|
167
181
|
@cookie_jar.to_a
|
168
182
|
end
|
169
183
|
|
170
|
-
# Sets the user and password to be used for
|
171
|
-
def basic_auth(user, password)
|
172
|
-
auth(user, password)
|
173
|
-
end
|
174
|
-
|
184
|
+
# Sets the user and password to be used for authentication.
|
175
185
|
def auth(user, password)
|
176
186
|
@user = user
|
177
187
|
@password = password
|
178
188
|
end
|
189
|
+
alias :basic_auth :auth
|
179
190
|
|
180
191
|
# Fetches the URL passed in and returns a page.
|
181
192
|
def get(options, parameters = [], referer = nil)
|
@@ -202,41 +213,67 @@ module WWW
|
|
202
213
|
Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
|
203
214
|
Page.new(referer, {'content-type' => 'text/html'})
|
204
215
|
end
|
205
|
-
abs_uri = to_absolute_uri(url, referer)
|
206
216
|
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
217
|
+
# fetch the page
|
218
|
+
page = fetch_page( :uri => url,
|
219
|
+
:referer => referer,
|
220
|
+
:headers => headers || {},
|
221
|
+
:params => parameters
|
222
|
+
)
|
223
|
+
add_to_history(page)
|
224
|
+
yield page if block_given?
|
225
|
+
page
|
226
|
+
end
|
212
227
|
|
228
|
+
####
|
229
|
+
# PUT to +url+ with +query_params+, and setting +options+:
|
230
|
+
#
|
231
|
+
# put('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
|
232
|
+
#
|
233
|
+
def put(url, query_params = {}, options = {})
|
234
|
+
options = {
|
235
|
+
:uri => url,
|
236
|
+
:headers => {},
|
237
|
+
:params => query_params,
|
238
|
+
:verb => :put
|
239
|
+
}.merge(options)
|
213
240
|
# fetch the page
|
214
|
-
|
215
|
-
page = fetch_page(:uri => abs_uri, :request => request, :page => referer, :headers => headers)
|
241
|
+
page = fetch_page(options)
|
216
242
|
add_to_history(page)
|
217
243
|
yield page if block_given?
|
218
244
|
page
|
219
245
|
end
|
246
|
+
|
247
|
+
####
|
248
|
+
# DELETE to +url+ with +query_params+, and setting +options+:
|
249
|
+
#
|
250
|
+
# delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
|
251
|
+
#
|
252
|
+
def delete(url, query_params = {}, options = {})
|
253
|
+
put(url, query_params, options.merge({:verb => :delete}))
|
254
|
+
end
|
255
|
+
|
256
|
+
####
|
257
|
+
# HEAD to +url+ with +query_params+, and setting +options+:
|
258
|
+
#
|
259
|
+
# head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
|
260
|
+
#
|
261
|
+
def head(url, query_params = {}, options = {})
|
262
|
+
put(url, query_params, options.merge({:verb => :head}))
|
263
|
+
end
|
220
264
|
|
221
265
|
# Fetch a file and return the contents of the file.
|
222
266
|
def get_file(url)
|
223
267
|
get(url).body
|
224
268
|
end
|
225
269
|
|
226
|
-
|
227
270
|
# Clicks the WWW::Mechanize::Link object passed in and returns the
|
228
271
|
# page fetched.
|
229
272
|
def click(link)
|
230
|
-
referer =
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
nil
|
235
|
-
end
|
236
|
-
href = link.respond_to?(:has_attribute?) ?
|
237
|
-
(link['href'] || link['src']) : link.href
|
238
|
-
uri = to_absolute_uri(href, referer || current_page())
|
239
|
-
get(uri, referer)
|
273
|
+
referer = link.page rescue referer = nil
|
274
|
+
href = link.respond_to?(:href) ? link.href :
|
275
|
+
(link['href'] || link['src'])
|
276
|
+
get(:url => href, :referer => (referer || current_page()))
|
240
277
|
end
|
241
278
|
|
242
279
|
# Equivalent to the browser back button. Returns the most recent page
|
@@ -282,13 +319,14 @@ module WWW
|
|
282
319
|
# agent.submit(page.forms.first, page.forms.first.buttons.first)
|
283
320
|
def submit(form, button=nil)
|
284
321
|
form.add_button_to_query(button) if button
|
285
|
-
uri = to_absolute_uri(form.action, form.page)
|
286
322
|
case form.method.upcase
|
287
323
|
when 'POST'
|
288
|
-
post_form(
|
324
|
+
post_form(form.action, form)
|
289
325
|
when 'GET'
|
290
|
-
|
291
|
-
|
326
|
+
get( :url => form.action.gsub(/\?[^\?]*$/, ''),
|
327
|
+
:params => form.build_query,
|
328
|
+
:referer => form.page
|
329
|
+
)
|
292
330
|
else
|
293
331
|
raise "unsupported method: #{form.method.upcase}"
|
294
332
|
end
|
@@ -309,7 +347,7 @@ module WWW
|
|
309
347
|
if url.respond_to? :href
|
310
348
|
url = url.href
|
311
349
|
end
|
312
|
-
@history.visited_page(
|
350
|
+
@history.visited_page(resolve(url))
|
313
351
|
end
|
314
352
|
|
315
353
|
# Runs given block, then resets the page history as it was before. self is
|
@@ -325,166 +363,14 @@ module WWW
|
|
325
363
|
|
326
364
|
alias :page :current_page
|
327
365
|
|
328
|
-
class << self
|
329
|
-
def html_unescape(s)
|
330
|
-
return s unless s
|
331
|
-
s.gsub(/&(\w+|#[0-9]+);/) { |match|
|
332
|
-
number = case match
|
333
|
-
when /&(\w+);/
|
334
|
-
Mechanize.html_parser::NamedCharacters[$1]
|
335
|
-
when /&#([0-9]+);/
|
336
|
-
$1.to_i
|
337
|
-
end
|
338
|
-
|
339
|
-
number ? ([number].pack('U') rescue match) : match
|
340
|
-
}
|
341
|
-
end
|
342
|
-
end
|
343
|
-
|
344
|
-
protected
|
345
|
-
def set_headers(uri, request, options)
|
346
|
-
unless options.is_a? Hash
|
347
|
-
cur_page = options
|
348
|
-
else
|
349
|
-
raise ArgumentError.new("cur_page must be specified") unless cur_page = options[:page]
|
350
|
-
headers = options[:headers]
|
351
|
-
end
|
352
|
-
if @keep_alive
|
353
|
-
request.add_field('Connection', 'keep-alive')
|
354
|
-
request.add_field('Keep-Alive', keep_alive_time.to_s)
|
355
|
-
else
|
356
|
-
request.add_field('Connection', 'close')
|
357
|
-
end
|
358
|
-
request.add_field('Accept-Encoding', 'gzip,identity')
|
359
|
-
request.add_field('Accept-Language', 'en-us,en;q=0.5')
|
360
|
-
request.add_field('Host', uri.host)
|
361
|
-
request.add_field('Accept-Charset', 'ISO-8859-1,utf-8;q=0.7,*;q=0.7')
|
362
|
-
|
363
|
-
unless @cookie_jar.empty?(uri)
|
364
|
-
cookies = @cookie_jar.cookies(uri)
|
365
|
-
cookie = cookies.length > 0 ? cookies.join("; ") : nil
|
366
|
-
if log
|
367
|
-
cookies.each do |c|
|
368
|
-
log.debug("using cookie: #{c}")
|
369
|
-
end
|
370
|
-
end
|
371
|
-
request.add_field('Cookie', cookie)
|
372
|
-
end
|
373
|
-
|
374
|
-
# Add Referer header to request
|
375
|
-
unless cur_page.uri.nil?
|
376
|
-
request.add_field('Referer', cur_page.uri.to_s)
|
377
|
-
end
|
378
|
-
|
379
|
-
# Add User-Agent header to request
|
380
|
-
request.add_field('User-Agent', @user_agent) if @user_agent
|
381
|
-
|
382
|
-
# Add If-Modified-Since if page is in history
|
383
|
-
if @conditional_requests
|
384
|
-
if( (page = visited_page(uri)) && page.response['Last-Modified'] )
|
385
|
-
request.add_field('If-Modified-Since', page.response['Last-Modified'])
|
386
|
-
end
|
387
|
-
end
|
388
|
-
|
389
|
-
if( @auth_hash[uri.host] )
|
390
|
-
case @auth_hash[uri.host]
|
391
|
-
when :basic
|
392
|
-
request.basic_auth(@user, @password)
|
393
|
-
when :digest
|
394
|
-
@digest_response = self.gen_auth_header(uri,request,@digest) if @digest
|
395
|
-
request.add_field('Authorization', @digest_response) if @digest_response
|
396
|
-
end
|
397
|
-
end
|
398
|
-
|
399
|
-
if headers
|
400
|
-
headers.each do |k,v|
|
401
|
-
case k
|
402
|
-
when :etag then request.add_field("ETag", v)
|
403
|
-
when :if_modified_since then request.add_field("If-Modified-Since", v)
|
404
|
-
else
|
405
|
-
raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
|
406
|
-
request.add_field(k,v)
|
407
|
-
end
|
408
|
-
end
|
409
|
-
end
|
410
|
-
|
411
|
-
request
|
412
|
-
end
|
413
|
-
|
414
|
-
def gen_auth_header(uri, request, auth_header, is_IIS = false)
|
415
|
-
@@nonce_count += 1
|
416
|
-
|
417
|
-
user = @digest_user
|
418
|
-
password = @digest_password
|
419
|
-
|
420
|
-
auth_header =~ /^(\w+) (.*)/
|
421
|
-
|
422
|
-
params = {}
|
423
|
-
$2.gsub(/(\w+)="(.*?)"/) { params[$1] = $2 }
|
424
|
-
|
425
|
-
a_1 = "#{@user}:#{params['realm']}:#{@password}"
|
426
|
-
a_2 = "#{request.method}:#{uri.path}"
|
427
|
-
request_digest = ''
|
428
|
-
request_digest << Digest::MD5.hexdigest(a_1)
|
429
|
-
request_digest << ':' << params['nonce']
|
430
|
-
request_digest << ':' << ('%08x' % @@nonce_count)
|
431
|
-
request_digest << ':' << CNONCE
|
432
|
-
request_digest << ':' << params['qop']
|
433
|
-
request_digest << ':' << Digest::MD5.hexdigest(a_2)
|
434
|
-
|
435
|
-
header = ''
|
436
|
-
header << "Digest username=\"#{@user}\", "
|
437
|
-
header << "realm=\"#{params['realm']}\", "
|
438
|
-
if is_IIS then
|
439
|
-
header << "qop=\"#{params['qop']}\", "
|
440
|
-
else
|
441
|
-
header << "qop=#{params['qop']}, "
|
442
|
-
end
|
443
|
-
header << "uri=\"#{uri.path}\", "
|
444
|
-
header << "algorithm=MD5, "
|
445
|
-
header << "nonce=\"#{params['nonce']}\", "
|
446
|
-
header << "nc=#{'%08x' % @@nonce_count}, "
|
447
|
-
header << "cnonce=\"#{CNONCE}\", "
|
448
|
-
header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
|
449
|
-
|
450
|
-
return header
|
451
|
-
end
|
452
|
-
|
453
366
|
private
|
454
367
|
|
455
|
-
def
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
url = URI.parse(
|
462
|
-
Mechanize.html_unescape(
|
463
|
-
url.split(/(?:%[0-9A-Fa-f]{2})+|#/).zip(
|
464
|
-
url.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
|
465
|
-
).map { |x,y|
|
466
|
-
"#{URI.escape(x)}#{y}"
|
467
|
-
}.join('')
|
468
|
-
)
|
469
|
-
)
|
470
|
-
end
|
471
|
-
|
472
|
-
url = @scheme_handlers[url.relative? ? 'relative' : url.scheme.downcase].call(url, cur_page)
|
473
|
-
url.path = '/' if url.path.length == 0
|
474
|
-
|
475
|
-
# construct an absolute uri
|
476
|
-
if url.relative?
|
477
|
-
raise 'no history. please specify an absolute URL' unless cur_page.uri
|
478
|
-
base = cur_page.respond_to?(:bases) ? cur_page.bases.last : nil
|
479
|
-
url = ((base && base.uri && base.uri.absolute?) ?
|
480
|
-
base.uri :
|
481
|
-
cur_page.uri) + url
|
482
|
-
url = cur_page.uri + url
|
483
|
-
# Strip initial "/.." bits from the path
|
484
|
-
url.path.sub!(/^(\/\.\.)+(?=\/)/, '')
|
485
|
-
end
|
486
|
-
|
487
|
-
return url
|
368
|
+
def resolve(url, referer = current_page())
|
369
|
+
hash = { :uri => url, :referer => referer }
|
370
|
+
chain = Chain.new([
|
371
|
+
Chain::URIResolver.new(@scheme_handlers)
|
372
|
+
]).handle(hash)
|
373
|
+
hash[:uri].to_s
|
488
374
|
end
|
489
375
|
|
490
376
|
def post_form(url, form)
|
@@ -493,218 +379,113 @@ module WWW
|
|
493
379
|
|
494
380
|
request_data = form.request_data
|
495
381
|
|
496
|
-
abs_url = to_absolute_uri(url, cur_page)
|
497
|
-
request = fetch_request(abs_url, :post)
|
498
|
-
request.add_field('Content-Type', form.enctype)
|
499
|
-
request.add_field('Content-Length', request_data.size.to_s)
|
500
|
-
|
501
382
|
log.debug("query: #{ request_data.inspect }") if log
|
502
383
|
|
503
384
|
# fetch the page
|
504
|
-
page = fetch_page(
|
385
|
+
page = fetch_page( :uri => url,
|
386
|
+
:referer => cur_page,
|
387
|
+
:verb => :post,
|
388
|
+
:params => [request_data],
|
389
|
+
:headers => {
|
390
|
+
'Content-Type' => form.enctype,
|
391
|
+
'Content-Length' => request_data.size.to_s,
|
392
|
+
})
|
505
393
|
add_to_history(page)
|
506
394
|
page
|
507
395
|
end
|
508
396
|
|
509
|
-
# Creates a new request object based on the scheme and type
|
510
|
-
def fetch_request(uri, type = :get)
|
511
|
-
raise "unsupported scheme: #{uri.scheme}" unless ['http', 'https'].include?(uri.scheme.downcase)
|
512
|
-
if type == :get
|
513
|
-
Net::HTTP::Get.new(uri.request_uri)
|
514
|
-
else
|
515
|
-
Net::HTTP::Post.new(uri.request_uri)
|
516
|
-
end
|
517
|
-
end
|
518
|
-
|
519
397
|
# uri is an absolute URI
|
520
|
-
def fetch_page(
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
# If we're keeping connections alive and the last request time is too
|
573
|
-
# long ago, stop the connection. Or, if the max requests left is 1,
|
574
|
-
# reset the connection.
|
575
|
-
if @keep_alive && http_obj.started?
|
576
|
-
opts = cache_obj[:keep_alive_options]
|
577
|
-
if((opts[:timeout] &&
|
578
|
-
Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
|
579
|
-
opts[:max] && opts[:max].to_i == 1)
|
580
|
-
|
581
|
-
log.debug('Finishing stale connection') if log
|
582
|
-
http_obj.finish
|
583
|
-
|
584
|
-
end
|
585
|
-
end
|
586
|
-
|
398
|
+
def fetch_page(params)
|
399
|
+
options = {
|
400
|
+
:request => nil,
|
401
|
+
:response => nil,
|
402
|
+
:connection => nil,
|
403
|
+
:referer => current_page(),
|
404
|
+
:uri => nil,
|
405
|
+
:verb => :get,
|
406
|
+
:agent => self,
|
407
|
+
:redirects => 0,
|
408
|
+
:params => [],
|
409
|
+
:headers => {},
|
410
|
+
}.merge(params)
|
411
|
+
|
412
|
+
before_connect = Chain.new([
|
413
|
+
Chain::URIResolver.new(@scheme_handlers),
|
414
|
+
Chain::ParameterResolver.new,
|
415
|
+
Chain::RequestResolver.new,
|
416
|
+
Chain::ConnectionResolver.new(
|
417
|
+
@connection_cache,
|
418
|
+
@keep_alive,
|
419
|
+
@proxy_addr,
|
420
|
+
@proxy_port,
|
421
|
+
@proxy_user,
|
422
|
+
@proxy_pass
|
423
|
+
),
|
424
|
+
Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
|
425
|
+
Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
|
426
|
+
Chain::HeaderResolver.new( @keep_alive,
|
427
|
+
@keep_alive_time,
|
428
|
+
@cookie_jar,
|
429
|
+
@user_agent),
|
430
|
+
Chain::CustomHeaders.new,
|
431
|
+
@pre_connect_hook,
|
432
|
+
])
|
433
|
+
before_connect.handle(options)
|
434
|
+
|
435
|
+
uri = options[:uri]
|
436
|
+
request = options[:request]
|
437
|
+
cur_page = options[:referer]
|
438
|
+
request_data = options[:params]
|
439
|
+
redirects = options[:redirects]
|
440
|
+
http_obj = options[:connection]
|
441
|
+
|
442
|
+
# Add If-Modified-Since if page is in history
|
443
|
+
if( (page = visited_page(uri)) && cur_page.response['Last-Modified'] )
|
444
|
+
request['If-Modified-Since'] = cur_page.response['Last-Modified']
|
445
|
+
end if(@conditional_requests)
|
446
|
+
|
447
|
+
# Specify timeouts if given
|
448
|
+
http_obj.open_timeout = @open_timeout if @open_timeout
|
449
|
+
http_obj.read_timeout = @read_timeout if @read_timeout
|
587
450
|
http_obj.start unless http_obj.started?
|
588
|
-
|
589
|
-
if headers
|
590
|
-
request = set_headers(uri, request, {:page => cur_page, :headers => headers})
|
591
|
-
else
|
592
|
-
request = set_headers(uri, request, cur_page)
|
593
|
-
end
|
594
|
-
|
451
|
+
|
595
452
|
# Log specified headers for the request
|
596
|
-
if log
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
cache_obj[:last_request_time] = Time.now.to_i
|
603
|
-
|
453
|
+
log.info("#{ request.class }: #{ request.path }") if log
|
454
|
+
request.each_header do |k, v|
|
455
|
+
log.debug("request-header: #{ k } => #{ v }")
|
456
|
+
end if log
|
457
|
+
|
604
458
|
# Send the request
|
459
|
+
attempts = 0
|
605
460
|
begin
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
total += part.length
|
613
|
-
body.write(part)
|
614
|
-
log.debug("Read #{total} bytes") if log
|
615
|
-
}
|
616
|
-
|
617
|
-
res_klass = Net::HTTPResponse::CODE_TO_OBJ[response.code.to_s]
|
618
|
-
|
619
|
-
# Net::HTTP ignores EOFError if Content-length is given, so we emulate it here.
|
620
|
-
unless res_klass <= Net::HTTPRedirection
|
621
|
-
raise EOFError if response.content_length() && response.content_length() != total
|
622
|
-
end
|
623
|
-
body.rewind
|
624
|
-
|
625
|
-
response.each_header { |k,v|
|
626
|
-
log.debug("response-header: #{ k } => #{ v }")
|
627
|
-
} if log
|
628
|
-
|
629
|
-
content_type = nil
|
630
|
-
unless response['Content-Type'].nil?
|
631
|
-
data = response['Content-Type'].match(/^([^;]*)/)
|
632
|
-
content_type = data[1].downcase unless data.nil?
|
633
|
-
end
|
634
|
-
|
635
|
-
response_body =
|
636
|
-
if encoding = response['Content-Encoding']
|
637
|
-
case encoding.downcase
|
638
|
-
when 'gzip'
|
639
|
-
log.debug('gunzip body') if log
|
640
|
-
if response['Content-Length'].to_i > 0 || body.length > 0
|
641
|
-
begin
|
642
|
-
Zlib::GzipReader.new(body).read
|
643
|
-
rescue Zlib::BufError, Zlib::GzipFile::Error
|
644
|
-
log.error('Caught a Zlib::BufError') if log
|
645
|
-
body.rewind
|
646
|
-
body.read(10)
|
647
|
-
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
|
648
|
-
end
|
649
|
-
else
|
650
|
-
''
|
651
|
-
end
|
652
|
-
when 'x-gzip'
|
653
|
-
body.read
|
654
|
-
else
|
655
|
-
raise 'Unsupported content encoding'
|
656
|
-
end
|
657
|
-
else
|
658
|
-
body.read
|
659
|
-
end
|
660
|
-
|
661
|
-
# Find our pluggable parser
|
662
|
-
page = @pluggable_parser.parser(content_type).new(
|
663
|
-
uri,
|
664
|
-
response,
|
665
|
-
response_body,
|
666
|
-
response.code
|
667
|
-
) { |parser|
|
668
|
-
parser.mech = self if parser.respond_to? :mech=
|
669
|
-
if parser.respond_to?(:watch_for_set=) && @watch_for_set
|
670
|
-
parser.watch_for_set = @watch_for_set
|
671
|
-
end
|
672
|
-
}
|
673
|
-
|
461
|
+
response = http_obj.request(request, *request_data) { |response|
|
462
|
+
connection_chain = Chain.new([
|
463
|
+
Chain::ResponseReader.new(response),
|
464
|
+
Chain::BodyDecodingHandler.new,
|
465
|
+
])
|
466
|
+
connection_chain.handle(options)
|
674
467
|
}
|
675
|
-
rescue EOFError, Errno::ECONNRESET, Errno::EPIPE
|
468
|
+
rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
|
676
469
|
log.error("Rescuing EOF error") if log
|
677
470
|
http_obj.finish
|
471
|
+
raise x if attempts >= 2
|
678
472
|
request.body = nil
|
679
473
|
http_obj.start
|
474
|
+
attempts += 1
|
680
475
|
retry
|
681
476
|
end
|
682
|
-
|
683
|
-
# If the server sends back keep alive options, save them
|
684
|
-
if keep_alive_info = response['keep-alive']
|
685
|
-
keep_alive_info.split(/,\s*/).each do |option|
|
686
|
-
k, v = option.split(/=/)
|
687
|
-
cache_obj[:keep_alive_options] ||= {}
|
688
|
-
cache_obj[:keep_alive_options][k.intern] = v
|
689
|
-
end
|
690
|
-
end
|
691
|
-
|
692
|
-
if page.is_a?(Page) && page.body =~ /Set-Cookie/
|
693
|
-
page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
|
694
|
-
Cookie::parse(uri, meta['content'], log) { |c|
|
695
|
-
log.debug("saved cookie: #{c}") if log
|
696
|
-
@cookie_jar.add(uri, c)
|
697
|
-
}
|
698
|
-
end
|
699
|
-
end
|
700
477
|
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
478
|
+
after_connect = Chain.new([
|
479
|
+
@post_connect_hook,
|
480
|
+
Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
|
481
|
+
Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
|
482
|
+
])
|
483
|
+
after_connect.handle(options)
|
484
|
+
|
485
|
+
res_klass = options[:res_klass]
|
486
|
+
response_body = options[:response_body]
|
487
|
+
page = options[:page]
|
488
|
+
|
708
489
|
log.info("status: #{ page.code }") if log
|
709
490
|
|
710
491
|
if follow_meta_refresh && page.respond_to?(:meta) &&
|
@@ -721,9 +502,12 @@ module WWW
|
|
721
502
|
return page unless follow_redirect?
|
722
503
|
log.info("follow redirect to: #{ response['Location'] }") if log
|
723
504
|
from_uri = page.uri
|
724
|
-
abs_uri = to_absolute_uri(response['Location'].to_s, page)
|
725
505
|
raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
|
726
|
-
page = fetch_page(
|
506
|
+
page = fetch_page( :uri => response['Location'].to_s,
|
507
|
+
:referer => page,
|
508
|
+
:params => [],
|
509
|
+
:redirects => redirects + 1
|
510
|
+
)
|
727
511
|
@history.push(page, from_uri)
|
728
512
|
return page
|
729
513
|
elsif res_klass <= Net::HTTPUnauthorized
|
@@ -731,31 +515,26 @@ module WWW
|
|
731
515
|
raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
|
732
516
|
if response['www-authenticate'] =~ /Digest/i
|
733
517
|
@auth_hash[uri.host] = :digest
|
518
|
+
if response['server'] =~ /Microsoft-IIS/
|
519
|
+
@auth_hash[uri.host] = :iis_digest
|
520
|
+
end
|
734
521
|
@digest = response['www-authenticate']
|
735
522
|
else
|
736
523
|
@auth_hash[uri.host] = :basic
|
737
524
|
end
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
525
|
+
return fetch_page( :uri => uri,
|
526
|
+
:referer => cur_page,
|
527
|
+
:verb => request.method.downcase.to_sym,
|
528
|
+
:params => request_data,
|
529
|
+
:headers => request.to_hash
|
530
|
+
)
|
744
531
|
end
|
745
532
|
|
746
533
|
raise ResponseCodeError.new(page), "Unhandled response", caller
|
747
534
|
end
|
748
535
|
|
749
|
-
def self.build_query_string(parameters)
|
750
|
-
parameters.map { |k,v|
|
751
|
-
k &&
|
752
|
-
[WEBrick::HTTPUtils.escape_form(k.to_s),
|
753
|
-
WEBrick::HTTPUtils.escape_form(v.to_s)].join("=")
|
754
|
-
}.compact.join('&')
|
755
|
-
end
|
756
|
-
|
757
536
|
def add_to_history(page)
|
758
|
-
@history.push(page,
|
537
|
+
@history.push(page, resolve(page.uri))
|
759
538
|
history_added.call(page) if history_added
|
760
539
|
end
|
761
540
|
end
|