mechanize 1.0.1.beta.20110107104205 → 2.0.pre.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data.tar.gz.sig +2 -0
- data/{lib/mechanize/chain/post_connect_hook.rb → .gemtest} +0 -0
- data/CHANGELOG.rdoc +51 -6
- data/EXAMPLES.rdoc +5 -3
- data/GUIDE.rdoc +72 -32
- data/LICENSE.rdoc +20 -340
- data/Manifest.txt +20 -27
- data/README.rdoc +12 -9
- data/Rakefile +5 -2
- data/examples/spider.rb +13 -2
- data/lib/mechanize.rb +545 -267
- data/lib/mechanize/content_type_error.rb +1 -1
- data/lib/mechanize/cookie.rb +72 -65
- data/lib/mechanize/cookie_jar.rb +197 -148
- data/lib/mechanize/element_matcher.rb +35 -0
- data/lib/mechanize/file.rb +3 -1
- data/lib/mechanize/file_connection.rb +17 -0
- data/lib/mechanize/file_request.rb +26 -0
- data/lib/mechanize/file_response.rb +61 -47
- data/lib/mechanize/form.rb +57 -58
- data/lib/mechanize/form/image_button.rb +2 -3
- data/lib/mechanize/form/multi_select_list.rb +71 -55
- data/lib/mechanize/form/select_list.rb +34 -62
- data/lib/mechanize/monkey_patch.rb +13 -11
- data/lib/mechanize/page.rb +277 -270
- data/lib/mechanize/page/image.rb +6 -2
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -1
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -1
- data/lib/mechanize/response_code_error.rb +3 -3
- data/lib/mechanize/unsupported_scheme_error.rb +1 -1
- data/lib/mechanize/uri_resolver.rb +82 -0
- data/lib/mechanize/util.rb +76 -60
- data/test/helper.rb +35 -5
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
- data/test/htdocs/tc_base_images.html +10 -0
- data/test/htdocs/tc_images.html +8 -0
- data/test/htdocs/test_click.html +11 -0
- data/test/servlets.rb +3 -2
- data/test/test_authenticate.rb +5 -5
- data/test/test_errors.rb +8 -8
- data/test/test_follow_meta.rb +4 -4
- data/test/test_form_as_hash.rb +4 -4
- data/test/test_forms.rb +3 -7
- data/test/test_hash_api.rb +2 -2
- data/test/test_headers.rb +1 -1
- data/test/test_images.rb +19 -0
- data/test/test_mech.rb +6 -6
- data/test/test_mechanize.rb +687 -0
- data/test/{test_cookie_class.rb → test_mechanize_cookie.rb} +52 -45
- data/test/test_mechanize_cookie_jar.rb +400 -0
- data/test/test_mechanize_file.rb +7 -1
- data/test/test_mechanize_file_request.rb +19 -0
- data/test/test_mechanize_file_response.rb +21 -0
- data/test/test_mechanize_form_image_button.rb +12 -0
- data/test/test_mechanize_page.rb +165 -0
- data/test/test_mechanize_uri_resolver.rb +29 -0
- data/test/{test_util.rb → test_mechanize_util.rb} +1 -1
- data/test/test_multi_select.rb +12 -0
- data/test/test_post_form.rb +7 -0
- data/test/test_redirect_verb_handling.rb +6 -6
- data/test/test_scheme.rb +0 -7
- data/test/test_verbs.rb +3 -3
- metadata +106 -72
- metadata.gz.sig +0 -0
- data/lib/mechanize/chain.rb +0 -36
- data/lib/mechanize/chain/auth_headers.rb +0 -78
- data/lib/mechanize/chain/body_decoding_handler.rb +0 -50
- data/lib/mechanize/chain/connection_resolver.rb +0 -28
- data/lib/mechanize/chain/custom_headers.rb +0 -21
- data/lib/mechanize/chain/handler.rb +0 -9
- data/lib/mechanize/chain/header_resolver.rb +0 -48
- data/lib/mechanize/chain/parameter_resolver.rb +0 -22
- data/lib/mechanize/chain/pre_connect_hook.rb +0 -20
- data/lib/mechanize/chain/request_resolver.rb +0 -31
- data/lib/mechanize/chain/response_body_parser.rb +0 -36
- data/lib/mechanize/chain/response_header_handler.rb +0 -34
- data/lib/mechanize/chain/response_reader.rb +0 -39
- data/lib/mechanize/chain/ssl_resolver.rb +0 -40
- data/lib/mechanize/chain/uri_resolver.rb +0 -75
- data/test/chain/test_argument_validator.rb +0 -14
- data/test/chain/test_auth_headers.rb +0 -25
- data/test/chain/test_custom_headers.rb +0 -18
- data/test/chain/test_header_resolver.rb +0 -27
- data/test/chain/test_parameter_resolver.rb +0 -35
- data/test/chain/test_request_resolver.rb +0 -29
- data/test/chain/test_response_reader.rb +0 -24
- data/test/test_cookie_jar.rb +0 -324
- data/test/test_page.rb +0 -124
data/Manifest.txt
CHANGED
@@ -12,26 +12,13 @@ examples/proxy_req.rb
|
|
12
12
|
examples/rubyforge.rb
|
13
13
|
examples/spider.rb
|
14
14
|
lib/mechanize.rb
|
15
|
-
lib/mechanize/chain.rb
|
16
|
-
lib/mechanize/chain/auth_headers.rb
|
17
|
-
lib/mechanize/chain/body_decoding_handler.rb
|
18
|
-
lib/mechanize/chain/connection_resolver.rb
|
19
|
-
lib/mechanize/chain/custom_headers.rb
|
20
|
-
lib/mechanize/chain/handler.rb
|
21
|
-
lib/mechanize/chain/header_resolver.rb
|
22
|
-
lib/mechanize/chain/parameter_resolver.rb
|
23
|
-
lib/mechanize/chain/post_connect_hook.rb
|
24
|
-
lib/mechanize/chain/pre_connect_hook.rb
|
25
|
-
lib/mechanize/chain/request_resolver.rb
|
26
|
-
lib/mechanize/chain/response_body_parser.rb
|
27
|
-
lib/mechanize/chain/response_header_handler.rb
|
28
|
-
lib/mechanize/chain/response_reader.rb
|
29
|
-
lib/mechanize/chain/ssl_resolver.rb
|
30
|
-
lib/mechanize/chain/uri_resolver.rb
|
31
15
|
lib/mechanize/content_type_error.rb
|
32
16
|
lib/mechanize/cookie.rb
|
33
17
|
lib/mechanize/cookie_jar.rb
|
18
|
+
lib/mechanize/element_matcher.rb
|
34
19
|
lib/mechanize/file.rb
|
20
|
+
lib/mechanize/file_connection.rb
|
21
|
+
lib/mechanize/file_request.rb
|
35
22
|
lib/mechanize/file_response.rb
|
36
23
|
lib/mechanize/file_saver.rb
|
37
24
|
lib/mechanize/form.rb
|
@@ -60,14 +47,8 @@ lib/mechanize/redirect_limit_reached_error.rb
|
|
60
47
|
lib/mechanize/redirect_not_get_or_head_error.rb
|
61
48
|
lib/mechanize/response_code_error.rb
|
62
49
|
lib/mechanize/unsupported_scheme_error.rb
|
50
|
+
lib/mechanize/uri_resolver.rb
|
63
51
|
lib/mechanize/util.rb
|
64
|
-
test/chain/test_argument_validator.rb
|
65
|
-
test/chain/test_auth_headers.rb
|
66
|
-
test/chain/test_custom_headers.rb
|
67
|
-
test/chain/test_header_resolver.rb
|
68
|
-
test/chain/test_parameter_resolver.rb
|
69
|
-
test/chain/test_request_resolver.rb
|
70
|
-
test/chain/test_response_reader.rb
|
71
52
|
test/data/htpasswd
|
72
53
|
test/data/server.crt
|
73
54
|
test/data/server.csr
|
@@ -77,6 +58,7 @@ test/helper.rb
|
|
77
58
|
test/htdocs/alt_text.html
|
78
59
|
test/htdocs/bad_form_test.html
|
79
60
|
test/htdocs/button.jpg
|
61
|
+
test/htdocs/dir with spaces/foo.html
|
80
62
|
test/htdocs/empty_form.html
|
81
63
|
test/htdocs/file_upload.html
|
82
64
|
test/htdocs/find_link.html
|
@@ -97,9 +79,11 @@ test/htdocs/index.html
|
|
97
79
|
test/htdocs/link with space.html
|
98
80
|
test/htdocs/meta_cookie.html
|
99
81
|
test/htdocs/no_title_test.html
|
82
|
+
test/htdocs/rails_3_encoding_hack_form_test.html
|
100
83
|
test/htdocs/relative/tc_relative_links.html
|
101
84
|
test/htdocs/tc_bad_charset.html
|
102
85
|
test/htdocs/tc_bad_links.html
|
86
|
+
test/htdocs/tc_base_images.html
|
103
87
|
test/htdocs/tc_base_link.html
|
104
88
|
test/htdocs/tc_blank_form.html
|
105
89
|
test/htdocs/tc_charset.html
|
@@ -108,6 +92,7 @@ test/htdocs/tc_encoded_links.html
|
|
108
92
|
test/htdocs/tc_field_precedence.html
|
109
93
|
test/htdocs/tc_follow_meta.html
|
110
94
|
test/htdocs/tc_form_action.html
|
95
|
+
test/htdocs/tc_images.html
|
111
96
|
test/htdocs/tc_links.html
|
112
97
|
test/htdocs/tc_meta_in_body.html
|
113
98
|
test/htdocs/tc_no_attributes.html
|
@@ -117,6 +102,7 @@ test/htdocs/tc_referer.html
|
|
117
102
|
test/htdocs/tc_relative_links.html
|
118
103
|
test/htdocs/tc_textarea.html
|
119
104
|
test/htdocs/test_bad_encoding.html
|
105
|
+
test/htdocs/test_click.html
|
120
106
|
test/htdocs/unusual______.html
|
121
107
|
test/servlets.rb
|
122
108
|
test/ssl_server.rb
|
@@ -125,8 +111,6 @@ test/test_bad_links.rb
|
|
125
111
|
test/test_blank_form.rb
|
126
112
|
test/test_checkboxes.rb
|
127
113
|
test/test_content_type.rb
|
128
|
-
test/test_cookie_class.rb
|
129
|
-
test/test_cookie_jar.rb
|
130
114
|
test/test_cookies.rb
|
131
115
|
test/test_encoded_links.rb
|
132
116
|
test/test_errors.rb
|
@@ -141,19 +125,29 @@ test/test_frames.rb
|
|
141
125
|
test/test_get_headers.rb
|
142
126
|
test/test_gzipping.rb
|
143
127
|
test/test_hash_api.rb
|
128
|
+
test/test_headers.rb
|
144
129
|
test/test_history.rb
|
145
130
|
test/test_history_added.rb
|
146
131
|
test/test_html_unscape_forms.rb
|
147
132
|
test/test_if_modified_since.rb
|
133
|
+
test/test_images.rb
|
148
134
|
test/test_links.rb
|
149
135
|
test/test_mech.rb
|
150
136
|
test/test_mech_proxy.rb
|
137
|
+
test/test_mechanize.rb
|
138
|
+
test/test_mechanize_cookie.rb
|
139
|
+
test/test_mechanize_cookie_jar.rb
|
151
140
|
test/test_mechanize_file.rb
|
141
|
+
test/test_mechanize_file_request.rb
|
142
|
+
test/test_mechanize_file_response.rb
|
143
|
+
test/test_mechanize_form_image_button.rb
|
144
|
+
test/test_mechanize_page.rb
|
145
|
+
test/test_mechanize_uri_resolver.rb
|
146
|
+
test/test_mechanize_util.rb
|
152
147
|
test/test_meta.rb
|
153
148
|
test/test_multi_select.rb
|
154
149
|
test/test_no_attributes.rb
|
155
150
|
test/test_option.rb
|
156
|
-
test/test_page.rb
|
157
151
|
test/test_pluggable_parser.rb
|
158
152
|
test/test_post_form.rb
|
159
153
|
test/test_pretty_print.rb
|
@@ -175,5 +169,4 @@ test/test_ssl_server.rb
|
|
175
169
|
test/test_subclass.rb
|
176
170
|
test/test_textarea.rb
|
177
171
|
test/test_upload.rb
|
178
|
-
test/test_util.rb
|
179
172
|
test/test_verbs.rb
|
data/README.rdoc
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
== DESCRIPTION
|
7
7
|
|
8
|
-
The Mechanize library is used for automating interaction with websites.
|
8
|
+
The Mechanize library is used for automating interaction with websites.
|
9
9
|
Mechanize automatically stores and sends cookies, follows redirects,
|
10
10
|
can follow links, and submit forms. Form fields can be populated and
|
11
11
|
submitted. Mechanize also keeps track of the sites that you have visited as
|
@@ -13,7 +13,7 @@ a history.
|
|
13
13
|
|
14
14
|
== Dependencies
|
15
15
|
|
16
|
-
* ruby 1.8.
|
16
|
+
* ruby 1.8.7
|
17
17
|
* nokogiri[http://nokogiri.rubyforge.org]
|
18
18
|
|
19
19
|
== SUPPORT:
|
@@ -28,21 +28,24 @@ The bug tracker is available here:
|
|
28
28
|
|
29
29
|
== Examples
|
30
30
|
|
31
|
-
If you are just starting, check out the GUIDE.
|
31
|
+
If you are just starting, check out the GUIDE.
|
32
32
|
Also, check out the EXAMPLES file.
|
33
33
|
|
34
34
|
== Authors
|
35
35
|
|
36
|
-
Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
|
36
|
+
Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
|
37
37
|
|
38
|
-
Copyright (c) 2006-
|
38
|
+
Copyright (c) 2006-2011:
|
39
39
|
|
40
40
|
* {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
|
41
41
|
* {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
|
42
42
|
|
43
|
+
Copyright (c) 2011:
|
44
|
+
|
45
|
+
* {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net)
|
46
|
+
|
43
47
|
This library comes with a shameless plug for employing me
|
44
|
-
(Aaron[http://tenderlovemaking.com/]) programming
|
45
|
-
Ruby, my favorite language!
|
48
|
+
(Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language!
|
46
49
|
|
47
50
|
== Acknowledgments
|
48
51
|
|
@@ -53,8 +56,8 @@ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mech
|
|
53
56
|
Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
|
54
57
|
who's helped out in various ways. Finally, thank you to the people using this
|
55
58
|
library!
|
56
|
-
|
59
|
+
|
57
60
|
== License
|
58
61
|
|
59
|
-
This library is distributed under the
|
62
|
+
This library is distributed under the MIT license. Please see the LICENSE file.
|
60
63
|
|
data/Rakefile
CHANGED
@@ -5,14 +5,17 @@ Hoe.plugin :gemspec
|
|
5
5
|
Hoe.plugin :git
|
6
6
|
|
7
7
|
Hoe.spec 'mechanize' do
|
8
|
+
developer 'Eric Hodel', 'drbrain@segment7.net'
|
8
9
|
developer 'Aaron Patterson', 'aaronp@rubyforge.org'
|
9
10
|
developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
|
10
11
|
|
11
12
|
self.readme_file = 'README.rdoc'
|
12
13
|
self.history_file = 'CHANGELOG.rdoc'
|
13
14
|
self.extra_rdoc_files += Dir['*.rdoc']
|
14
|
-
self.extra_deps << ['nokogiri', '
|
15
|
-
self.extra_deps << ['net-http-persistent', '~> 1.
|
15
|
+
self.extra_deps << ['nokogiri', '~> 1.4']
|
16
|
+
self.extra_deps << ['net-http-persistent', '~> 1.6']
|
17
|
+
self.extra_deps << ['net-http-digest_auth', '~> 1.1', '>= 1.1.1']
|
18
|
+
self.spec_extras[:required_ruby_version] = '>= 1.8.7'
|
16
19
|
end
|
17
20
|
|
18
21
|
desc "Update SSL Certificate"
|
data/examples/spider.rb
CHANGED
@@ -5,7 +5,18 @@ require 'mechanize'
|
|
5
5
|
|
6
6
|
agent = Mechanize.new
|
7
7
|
stack = agent.get(ARGV[0]).links
|
8
|
+
|
8
9
|
while l = stack.pop
|
9
|
-
|
10
|
-
|
10
|
+
host = l.uri.host
|
11
|
+
next unless host.nil? or host == agent.history.first.uri.host
|
12
|
+
next if agent.visited? l.href
|
13
|
+
|
14
|
+
puts "crawling #{l.uri}"
|
15
|
+
begin
|
16
|
+
page = agent.click(l)
|
17
|
+
next unless Mechanize::Page === page
|
18
|
+
stack.push(*page.links)
|
19
|
+
rescue Mechanize::ResponseCodeError
|
20
|
+
end
|
11
21
|
end
|
22
|
+
|
data/lib/mechanize.rb
CHANGED
@@ -1,34 +1,17 @@
|
|
1
|
-
require 'openssl'
|
2
|
-
require 'net/http/persistent'
|
3
|
-
require 'uri'
|
4
|
-
require 'webrick/httputils'
|
5
|
-
require 'zlib'
|
6
|
-
require 'stringio'
|
7
|
-
require 'digest/md5'
|
8
1
|
require 'fileutils'
|
9
|
-
require 'nokogiri'
|
10
2
|
require 'forwardable'
|
11
|
-
|
12
3
|
require 'iconv' if RUBY_VERSION < '1.9.2'
|
13
|
-
|
14
|
-
require 'nkf'
|
15
4
|
require 'mutex_m'
|
5
|
+
require 'net/http/digest_auth'
|
6
|
+
require 'net/http/persistent'
|
7
|
+
require 'nkf'
|
8
|
+
require 'nokogiri'
|
9
|
+
require 'openssl'
|
10
|
+
require 'stringio'
|
11
|
+
require 'uri'
|
12
|
+
require 'webrick/httputils'
|
13
|
+
require 'zlib'
|
16
14
|
|
17
|
-
require 'mechanize/util'
|
18
|
-
require 'mechanize/content_type_error'
|
19
|
-
require 'mechanize/response_code_error'
|
20
|
-
require 'mechanize/unsupported_scheme_error'
|
21
|
-
require 'mechanize/redirect_limit_reached_error'
|
22
|
-
require 'mechanize/redirect_not_get_or_head_error'
|
23
|
-
require 'mechanize/cookie'
|
24
|
-
require 'mechanize/cookie_jar'
|
25
|
-
require 'mechanize/history'
|
26
|
-
require 'mechanize/form'
|
27
|
-
require 'mechanize/pluggable_parsers'
|
28
|
-
require 'mechanize/file_response'
|
29
|
-
require 'mechanize/inspect'
|
30
|
-
require 'mechanize/chain'
|
31
|
-
require 'mechanize/monkey_patch'
|
32
15
|
|
33
16
|
# = Synopsis
|
34
17
|
# The Mechanize library is used for automating interaction with a website. It
|
@@ -48,12 +31,22 @@ require 'mechanize/monkey_patch'
|
|
48
31
|
# search_results = agent.submit(search_form)
|
49
32
|
# puts search_results.body
|
50
33
|
class Mechanize
|
34
|
+
|
51
35
|
##
|
52
36
|
# The version of Mechanize you are using.
|
53
|
-
VERSION = '
|
37
|
+
VERSION = '2.0'
|
38
|
+
|
39
|
+
class Error < RuntimeError
|
40
|
+
end
|
54
41
|
|
42
|
+
ruby_version = if RUBY_PATCHLEVEL >= 0 then
|
43
|
+
"#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
|
44
|
+
else
|
45
|
+
"#{RUBY_VERSION}dev#{RUBY_REVISION}"
|
46
|
+
end
|
55
47
|
##
|
56
48
|
# User Agent aliases
|
49
|
+
|
57
50
|
AGENT_ALIASES = {
|
58
51
|
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
59
52
|
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
@@ -65,16 +58,35 @@ class Mechanize
|
|
65
58
|
'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
|
66
59
|
'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
|
67
60
|
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
|
68
|
-
'Mechanize' => "
|
61
|
+
'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
|
69
62
|
}
|
70
63
|
|
64
|
+
# A Mechanize::CookieJar which stores cookies
|
71
65
|
attr_accessor :cookie_jar
|
72
|
-
|
66
|
+
|
67
|
+
# Length of time to wait until a connection is opened in seconds
|
68
|
+
attr_accessor :open_timeout
|
69
|
+
|
70
|
+
# Length of time to attempt to read data from the server
|
71
|
+
attr_accessor :read_timeout
|
72
|
+
|
73
|
+
# The identification string for the client initiating a web request
|
73
74
|
attr_accessor :user_agent
|
75
|
+
|
76
|
+
# The value of watch_for_set is passed to pluggable parsers for retrieved
|
77
|
+
# content
|
74
78
|
attr_accessor :watch_for_set
|
79
|
+
|
80
|
+
# Path to an OpenSSL server certificate file
|
75
81
|
attr_accessor :ca_file
|
82
|
+
|
83
|
+
# An OpenSSL private key or the path to a private key
|
76
84
|
attr_accessor :key
|
85
|
+
|
86
|
+
# An OpenSSL client certificate or the path to a certificate file.
|
77
87
|
attr_accessor :cert
|
88
|
+
|
89
|
+
# OpenSSL key password
|
78
90
|
attr_accessor :pass
|
79
91
|
|
80
92
|
# Controls how this agent deals with redirects. If it is set to
|
@@ -84,12 +96,25 @@ class Mechanize
|
|
84
96
|
# redirects are followed.
|
85
97
|
attr_accessor :redirect_ok
|
86
98
|
|
99
|
+
# Disables HTTP/1.1 gzip compression (enabled by default)
|
87
100
|
attr_accessor :gzip_enabled
|
101
|
+
|
102
|
+
# HTTP/1.0 keep-alive time
|
88
103
|
attr_accessor :keep_alive_time
|
104
|
+
|
105
|
+
# HTTP/1.1 keep-alives are always active. This does nothing.
|
89
106
|
attr_accessor :keep_alive
|
107
|
+
|
108
|
+
# Disables If-Modified-Since conditional requests (enabled by default)
|
90
109
|
attr_accessor :conditional_requests
|
110
|
+
|
111
|
+
# Follow HTML meta refresh
|
91
112
|
attr_accessor :follow_meta_refresh
|
113
|
+
|
114
|
+
# A callback for additional certificate verification. See
|
115
|
+
# OpenSSL::SSL::SSLContext#verify_callback
|
92
116
|
attr_accessor :verify_callback
|
117
|
+
|
93
118
|
attr_accessor :history_added
|
94
119
|
attr_accessor :scheme_handlers
|
95
120
|
attr_accessor :redirection_limit
|
@@ -111,6 +136,16 @@ class Mechanize
|
|
111
136
|
attr_reader :history
|
112
137
|
attr_reader :pluggable_parser
|
113
138
|
|
139
|
+
# A list of hooks to call after retrieving a response. Hooks are called with
|
140
|
+
# the agent and the response returned.
|
141
|
+
|
142
|
+
attr_reader :post_connect_hooks
|
143
|
+
|
144
|
+
# A list of hooks to call before making a request. Hooks are called with
|
145
|
+
# the agent and the request to be performed.
|
146
|
+
|
147
|
+
attr_reader :pre_connect_hooks
|
148
|
+
|
114
149
|
alias :follow_redirect? :redirect_ok
|
115
150
|
|
116
151
|
@html_parser = Nokogiri::HTML
|
@@ -138,6 +173,7 @@ class Mechanize
|
|
138
173
|
# callback for OpenSSL errors while verifying the server certificate
|
139
174
|
# chain, can be used for debugging or to ignore errors by always
|
140
175
|
# returning _true_
|
176
|
+
# specifying nil uses the default method that was valid when the SSL was created
|
141
177
|
@verify_callback = nil
|
142
178
|
@cert = nil # OpenSSL Certificate
|
143
179
|
@key = nil # OpenSSL Private Key
|
@@ -153,6 +189,7 @@ class Mechanize
|
|
153
189
|
@user = nil # Auth User
|
154
190
|
@password = nil # Auth Password
|
155
191
|
@digest = nil # DigestAuth Digest
|
192
|
+
@digest_auth = Net::HTTP::DigestAuth.new
|
156
193
|
@auth_hash = {} # Keep track of urls for sending auth
|
157
194
|
@request_headers= {} # A hash of request headers to be used
|
158
195
|
|
@@ -165,23 +202,27 @@ class Mechanize
|
|
165
202
|
@keep_alive_time = 300
|
166
203
|
@keep_alive = true
|
167
204
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
@
|
175
|
-
@scheme_handlers
|
176
|
-
@scheme_handlers['file'] = @scheme_handlers['http']
|
205
|
+
# Proxy
|
206
|
+
@proxy_addr = nil
|
207
|
+
@proxy_port = nil
|
208
|
+
@proxy_user = nil
|
209
|
+
@proxy_pass = nil
|
210
|
+
|
211
|
+
@resolver = Mechanize::URIResolver.new
|
212
|
+
@scheme_handlers = @resolver.scheme_handlers
|
177
213
|
|
178
|
-
@
|
179
|
-
@
|
214
|
+
@pre_connect_hooks = []
|
215
|
+
@post_connect_hooks = []
|
180
216
|
|
181
|
-
set_http
|
182
217
|
@html_parser = self.class.html_parser
|
183
218
|
|
184
219
|
yield self if block_given?
|
220
|
+
|
221
|
+
if @proxy_addr and @proxy_pass then
|
222
|
+
set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
|
223
|
+
else
|
224
|
+
set_http
|
225
|
+
end
|
185
226
|
end
|
186
227
|
|
187
228
|
def max_history=(length); @history.max_size = length end
|
@@ -189,14 +230,6 @@ class Mechanize
|
|
189
230
|
def log=(l); self.class.log = l end
|
190
231
|
def log; self.class.log end
|
191
232
|
|
192
|
-
def pre_connect_hooks
|
193
|
-
@pre_connect_hook.hooks
|
194
|
-
end
|
195
|
-
|
196
|
-
def post_connect_hooks
|
197
|
-
@post_connect_hook.hooks
|
198
|
-
end
|
199
|
-
|
200
233
|
# Sets the proxy address, port, user, and password
|
201
234
|
# +addr+ should be a host, with no "http://"
|
202
235
|
def set_proxy(addr, port, user = nil, pass = nil)
|
@@ -213,7 +246,8 @@ class Mechanize
|
|
213
246
|
# Set the user agent for the Mechanize object.
|
214
247
|
# See AGENT_ALIASES
|
215
248
|
def user_agent_alias=(al)
|
216
|
-
|
249
|
+
@user_agent = AGENT_ALIASES[al] ||
|
250
|
+
raise(ArgumentError, "unknown agent alias")
|
217
251
|
end
|
218
252
|
|
219
253
|
# Returns a list of cookies stored in the cookie jar.
|
@@ -230,7 +264,7 @@ class Mechanize
|
|
230
264
|
|
231
265
|
# Fetches the URL passed in and returns a page.
|
232
266
|
def get(options, parameters = [], referer = nil)
|
233
|
-
|
267
|
+
method = :get
|
234
268
|
|
235
269
|
unless options.is_a? Hash
|
236
270
|
url = options
|
@@ -239,11 +273,11 @@ class Mechanize
|
|
239
273
|
parameters = []
|
240
274
|
end
|
241
275
|
else
|
242
|
-
raise ArgumentError
|
276
|
+
raise ArgumentError, "url must be specified" unless url = options[:url]
|
243
277
|
parameters = options[:params] || []
|
244
278
|
referer = options[:referer]
|
245
279
|
headers = options[:headers]
|
246
|
-
|
280
|
+
method = options[:verb] || method
|
247
281
|
end
|
248
282
|
|
249
283
|
unless referer
|
@@ -264,51 +298,41 @@ class Mechanize
|
|
264
298
|
end
|
265
299
|
|
266
300
|
# fetch the page
|
267
|
-
|
268
|
-
|
269
|
-
:headers => headers || {},
|
270
|
-
:verb => verb,
|
271
|
-
:params => parameters
|
272
|
-
)
|
301
|
+
headers ||= {}
|
302
|
+
page = fetch_page url, method, headers, parameters, referer
|
273
303
|
add_to_history(page)
|
274
304
|
yield page if block_given?
|
275
305
|
page
|
276
306
|
end
|
277
307
|
|
278
|
-
|
279
|
-
# PUT to +url+ with +entity+, and setting +
|
308
|
+
##
|
309
|
+
# PUT to +url+ with +entity+, and setting +headers+:
|
280
310
|
#
|
281
|
-
# put('http://
|
311
|
+
# put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
|
282
312
|
#
|
283
|
-
def put(url, entity,
|
284
|
-
request_with_entity(:put, url, entity,
|
313
|
+
def put(url, entity, headers = {})
|
314
|
+
request_with_entity(:put, url, entity, headers)
|
285
315
|
end
|
286
316
|
|
287
|
-
|
288
|
-
# DELETE to +url+ with +query_params+, and setting +
|
317
|
+
##
|
318
|
+
# DELETE to +url+ with +query_params+, and setting +headers+:
|
289
319
|
#
|
290
|
-
# delete('http://
|
320
|
+
# delete('http://example/', {'q' => 'foo'}, {})
|
291
321
|
#
|
292
|
-
def delete(
|
293
|
-
page =
|
322
|
+
def delete(uri, query_params = {}, headers = {})
|
323
|
+
page = fetch_page(uri, :delete, headers, query_params)
|
294
324
|
add_to_history(page)
|
295
325
|
page
|
296
326
|
end
|
297
327
|
|
298
|
-
|
299
|
-
# HEAD to +url+ with +query_params+, and setting +
|
328
|
+
##
|
329
|
+
# HEAD to +url+ with +query_params+, and setting +headers+:
|
300
330
|
#
|
301
|
-
# head('http://
|
331
|
+
# head('http://example/', {'q' => 'foo'}, {})
|
302
332
|
#
|
303
|
-
def head(
|
304
|
-
options = {
|
305
|
-
:uri => url,
|
306
|
-
:headers => {},
|
307
|
-
:params => query_params,
|
308
|
-
:verb => :head
|
309
|
-
}.merge(options)
|
333
|
+
def head(uri, query_params = {}, headers = {})
|
310
334
|
# fetch the page
|
311
|
-
page = fetch_page(
|
335
|
+
page = fetch_page(uri, :head, headers, query_params)
|
312
336
|
yield page if block_given?
|
313
337
|
page
|
314
338
|
end
|
@@ -360,7 +384,7 @@ class Mechanize
|
|
360
384
|
# agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
|
361
385
|
def post(url, query={}, headers={})
|
362
386
|
if query.is_a?(String)
|
363
|
-
return request_with_entity(:post, url, query,
|
387
|
+
return request_with_entity(:post, url, query, headers)
|
364
388
|
end
|
365
389
|
node = {}
|
366
390
|
# Create a fake form
|
@@ -371,7 +395,8 @@ class Mechanize
|
|
371
395
|
node['enctype'] = 'application/x-www-form-urlencoded'
|
372
396
|
|
373
397
|
form = Form.new(node)
|
374
|
-
|
398
|
+
|
399
|
+
query.each { |k, v|
|
375
400
|
if v.is_a?(IO)
|
376
401
|
form.enctype = 'multipart/form-data'
|
377
402
|
ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
|
@@ -402,31 +427,19 @@ class Mechanize
|
|
402
427
|
:referer => form.page
|
403
428
|
)
|
404
429
|
else
|
405
|
-
raise "unsupported method: #{form.method.upcase}"
|
430
|
+
raise ArgumentError, "unsupported method: #{form.method.upcase}"
|
406
431
|
end
|
407
432
|
end
|
408
433
|
|
409
|
-
def request_with_entity(verb,
|
410
|
-
cur_page = current_page || Page.new(
|
411
|
-
|
412
|
-
options = {
|
413
|
-
:uri => url,
|
414
|
-
:referer => cur_page,
|
415
|
-
:headers => {},
|
416
|
-
}.update(options)
|
434
|
+
def request_with_entity(verb, uri, entity, headers = {})
|
435
|
+
cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
|
417
436
|
|
418
437
|
headers = {
|
419
438
|
'Content-Type' => 'application/octet-stream',
|
420
439
|
'Content-Length' => entity.size.to_s,
|
421
|
-
}.update
|
422
|
-
|
423
|
-
options.update({
|
424
|
-
:verb => verb,
|
425
|
-
:params => [entity],
|
426
|
-
:headers => headers,
|
427
|
-
})
|
440
|
+
}.update headers
|
428
441
|
|
429
|
-
page = fetch_page
|
442
|
+
page = fetch_page uri, verb, headers, [entity], cur_page
|
430
443
|
add_to_history(page)
|
431
444
|
page
|
432
445
|
end
|
@@ -462,14 +475,347 @@ class Mechanize
|
|
462
475
|
|
463
476
|
alias :page :current_page
|
464
477
|
|
478
|
+
def connection_for uri
|
479
|
+
case uri.scheme.downcase
|
480
|
+
when 'http', 'https' then
|
481
|
+
return @http
|
482
|
+
when 'file' then
|
483
|
+
return Mechanize::FileConnection.new
|
484
|
+
end
|
485
|
+
end
|
486
|
+
|
487
|
+
def enable_gzip request
|
488
|
+
request['accept-encoding'] = if @gzip_enabled
|
489
|
+
'gzip,deflate,identity'
|
490
|
+
else
|
491
|
+
'identity'
|
492
|
+
end
|
493
|
+
end
|
494
|
+
|
495
|
+
def http_request uri, method, params = nil
|
496
|
+
case uri.scheme.downcase
|
497
|
+
when 'http', 'https' then
|
498
|
+
klass = Net::HTTP.const_get(method.to_s.capitalize)
|
499
|
+
|
500
|
+
request ||= klass.new(uri.request_uri)
|
501
|
+
request.body = params.first if params
|
502
|
+
|
503
|
+
request
|
504
|
+
when 'file' then
|
505
|
+
Mechanize::FileRequest.new uri
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
##
|
510
|
+
# Invokes hooks added to post_connect_hooks after a +response+ is returned.
|
511
|
+
# Yields the +agent+ and the +response+ returned to each hook.
|
512
|
+
|
513
|
+
def post_connect response # :yields: agent, response
|
514
|
+
@post_connect_hooks.each do |hook|
|
515
|
+
hook.call self, response
|
516
|
+
end
|
517
|
+
end
|
518
|
+
|
519
|
+
##
|
520
|
+
# Invokes hooks added to pre_connect_hooks before a +request+ is made.
|
521
|
+
# Yields the +agent+ and the +request+ that will be performed to each hook.
|
522
|
+
|
523
|
+
def pre_connect request # :yields: agent, request
|
524
|
+
@pre_connect_hooks.each do |hook|
|
525
|
+
hook.call self, request
|
526
|
+
end
|
527
|
+
end
|
528
|
+
|
529
|
+
def request_auth request, uri
|
530
|
+
auth_type = @auth_hash[uri.host]
|
531
|
+
|
532
|
+
return unless auth_type
|
533
|
+
|
534
|
+
case auth_type
|
535
|
+
when :basic
|
536
|
+
request.basic_auth @user, @password
|
537
|
+
when :digest, :iis_digest
|
538
|
+
uri.user = @user
|
539
|
+
uri.password = @password
|
540
|
+
|
541
|
+
iis = auth_type == :iis_digest
|
542
|
+
|
543
|
+
auth = @digest_auth.auth_header uri, @digest, request.method, iis
|
544
|
+
|
545
|
+
request['Authorization'] = auth
|
546
|
+
end
|
547
|
+
end
|
548
|
+
|
549
|
+
def request_cookies request, uri
|
550
|
+
return if @cookie_jar.empty? uri
|
551
|
+
|
552
|
+
cookies = @cookie_jar.cookies uri
|
553
|
+
|
554
|
+
return if cookies.empty?
|
555
|
+
|
556
|
+
request.add_field 'Cookie', cookies.join('; ')
|
557
|
+
end
|
558
|
+
|
559
|
+
def request_host request, uri
|
560
|
+
port = [80, 443].include?(uri.port.to_i) ? nil : uri.port
|
561
|
+
host = uri.host
|
562
|
+
|
563
|
+
request['Host'] = [host, port].compact.join ':'
|
564
|
+
end
|
565
|
+
|
566
|
+
def request_language_charset request
|
567
|
+
request['accept-charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
|
568
|
+
request['accept-language'] = 'en-us,en;q=0.5'
|
569
|
+
end
|
570
|
+
|
571
|
+
# Log specified headers for the request
|
572
|
+
def request_log request
|
573
|
+
return unless log
|
574
|
+
|
575
|
+
log.info("#{request.class}: #{request.path}")
|
576
|
+
|
577
|
+
request.each_header do |k, v|
|
578
|
+
log.debug("request-header: #{k} => #{v}")
|
579
|
+
end
|
580
|
+
end
|
581
|
+
|
582
|
+
def request_add_headers request, headers = {}
|
583
|
+
@request_headers.each do |k,v|
|
584
|
+
request[k] = v
|
585
|
+
end
|
586
|
+
|
587
|
+
headers.each do |field, value|
|
588
|
+
case field
|
589
|
+
when :etag then request["ETag"] = value
|
590
|
+
when :if_modified_since then request["If-Modified-Since"] = value
|
591
|
+
when Symbol then
|
592
|
+
raise ArgumentError, "unknown header symbol #{field}"
|
593
|
+
else
|
594
|
+
request[field] = value
|
595
|
+
end
|
596
|
+
end
|
597
|
+
end
|
598
|
+
|
599
|
+
def request_referer request, uri, referer
|
600
|
+
return unless referer
|
601
|
+
return if 'https' == referer.scheme.downcase and
|
602
|
+
'https' != uri.scheme.downcase
|
603
|
+
|
604
|
+
request['Referer'] = referer
|
605
|
+
end
|
606
|
+
|
607
|
+
def request_user_agent request
|
608
|
+
request['User-Agent'] = @user_agent if @user_agent
|
609
|
+
end
|
610
|
+
|
611
|
+
def resolve_parameters uri, method, parameters
|
612
|
+
case method
|
613
|
+
when :head, :get, :delete, :trace then
|
614
|
+
if parameters and parameters.length > 0
|
615
|
+
uri.query ||= ''
|
616
|
+
uri.query << '&' if uri.query.length > 0
|
617
|
+
uri.query << Mechanize::Util.build_query_string(parameters)
|
618
|
+
end
|
619
|
+
|
620
|
+
return uri, nil
|
621
|
+
end
|
622
|
+
|
623
|
+
return uri, parameters
|
624
|
+
end
|
625
|
+
|
626
|
+
def response_cookies response, uri, page
|
627
|
+
if Mechanize::Page === page and page.body =~ /Set-Cookie/n
|
628
|
+
page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
|
629
|
+
Mechanize::Cookie.parse(uri, meta['content']) { |c|
|
630
|
+
log.debug("saved cookie: #{c}") if log
|
631
|
+
@cookie_jar.add(uri, c)
|
632
|
+
}
|
633
|
+
end
|
634
|
+
end
|
635
|
+
|
636
|
+
header_cookies = response.get_fields 'Set-Cookie'
|
637
|
+
|
638
|
+
return unless header_cookies
|
639
|
+
|
640
|
+
header_cookies.each do |cookie|
|
641
|
+
Mechanize::Cookie.parse(uri, cookie) { |c|
|
642
|
+
log.debug("saved cookie: #{c}") if log
|
643
|
+
@cookie_jar.add(uri, c)
|
644
|
+
}
|
645
|
+
end
|
646
|
+
end
|
647
|
+
|
648
|
+
def response_follow_meta_refresh response, uri, page, redirects
|
649
|
+
return unless @follow_meta_refresh
|
650
|
+
|
651
|
+
redirect_uri = nil
|
652
|
+
referer = page
|
653
|
+
|
654
|
+
if page.respond_to?(:meta) and (redirect = page.meta.first)
|
655
|
+
redirect_uri = Mechanize::Util.uri_unescape redirect.uri.to_s
|
656
|
+
sleep redirect.node['delay'].to_f
|
657
|
+
referer = Page.new(nil, {'content-type'=>'text/html'})
|
658
|
+
elsif refresh = response['refresh']
|
659
|
+
delay, redirect_uri = Page::Meta.parse(refresh, uri)
|
660
|
+
raise Mechanize::Error, 'Invalid refresh http header' unless delay
|
661
|
+
raise RedirectLimitReachedError.new(page, redirects) if
|
662
|
+
redirects + 1 > redirection_limit
|
663
|
+
sleep delay.to_f
|
664
|
+
end
|
665
|
+
|
666
|
+
if redirect_uri
|
667
|
+
@history.push(page, page.uri)
|
668
|
+
fetch_page(redirect_uri, :get, {}, [], referer, redirects + 1)
|
669
|
+
end
|
670
|
+
end
|
671
|
+
|
672
|
+
def response_log response
|
673
|
+
return unless log
|
674
|
+
|
675
|
+
log.info("status: #{response.class} #{response.http_version} " \
|
676
|
+
"#{response.code} #{response.message}")
|
677
|
+
|
678
|
+
response.each_header do |k, v|
|
679
|
+
log.debug("response-header: #{k} => #{v}")
|
680
|
+
end
|
681
|
+
end
|
682
|
+
|
683
|
+
def response_parse response, body, uri
|
684
|
+
content_type = nil
|
685
|
+
|
686
|
+
unless response['Content-Type'].nil?
|
687
|
+
data, = response['Content-Type'].split ';', 2
|
688
|
+
content_type, = data.downcase.split ',', 2 unless data.nil?
|
689
|
+
end
|
690
|
+
|
691
|
+
# Find our pluggable parser
|
692
|
+
parser_klass = @pluggable_parser.parser(content_type)
|
693
|
+
|
694
|
+
parser_klass.new(uri, response, body, response.code) { |parser|
|
695
|
+
parser.mech = self if parser.respond_to? :mech=
|
696
|
+
if @watch_for_set and parser.respond_to?(:watch_for_set=)
|
697
|
+
parser.watch_for_set = @watch_for_set
|
698
|
+
end
|
699
|
+
}
|
700
|
+
end
|
701
|
+
|
702
|
+
def response_read response, request
|
703
|
+
body = StringIO.new
|
704
|
+
body.set_encoding Encoding::BINARY if body.respond_to? :set_encoding
|
705
|
+
total = 0
|
706
|
+
|
707
|
+
response.read_body { |part|
|
708
|
+
total += part.length
|
709
|
+
body.write(part)
|
710
|
+
log.debug("Read #{total} bytes") if log
|
711
|
+
}
|
712
|
+
|
713
|
+
body.rewind
|
714
|
+
|
715
|
+
raise Mechanize::ResponseCodeError, response if
|
716
|
+
Net::HTTPUnknownResponse === response
|
717
|
+
|
718
|
+
content_length = response.content_length
|
719
|
+
|
720
|
+
unless Net::HTTP::Head === request or Net::HTTPRedirection === response then
|
721
|
+
raise EOFError, "Content-Length (#{content_length}) does not match " \
|
722
|
+
"response body length (#{body.length})" if
|
723
|
+
content_length and content_length != body.length
|
724
|
+
end
|
725
|
+
|
726
|
+
case response['Content-Encoding']
|
727
|
+
when nil, 'none', '7bit' then
|
728
|
+
body.string
|
729
|
+
when 'deflate' then
|
730
|
+
log.debug('deflate body') if log
|
731
|
+
|
732
|
+
if content_length > 0 or body.length > 0 then
|
733
|
+
begin
|
734
|
+
Zlib::Inflate.inflate body.string
|
735
|
+
rescue Zlib::BufError, Zlib::DataError
|
736
|
+
log.error('Unable to inflate page, retrying with raw deflate') if log
|
737
|
+
begin
|
738
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.string)
|
739
|
+
rescue Zlib::BufError, Zlib::DataError
|
740
|
+
log.error("unable to inflate page: #{$!}") if log
|
741
|
+
''
|
742
|
+
end
|
743
|
+
end
|
744
|
+
end
|
745
|
+
when 'gzip', 'x-gzip' then
|
746
|
+
log.debug('gzip body') if log
|
747
|
+
|
748
|
+
if content_length > 0 or body.length > 0 then
|
749
|
+
begin
|
750
|
+
zio = Zlib::GzipReader.new body
|
751
|
+
zio.read
|
752
|
+
rescue Zlib::BufError, Zlib::GzipFile::Error
|
753
|
+
log.error('Unable to gunzip body, trying raw inflate') if log
|
754
|
+
body.rewind
|
755
|
+
body.read 10
|
756
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
|
757
|
+
rescue Zlib::DataError
|
758
|
+
log.error("unable to gunzip page: #{$!}") if log
|
759
|
+
''
|
760
|
+
ensure
|
761
|
+
zio.close if zio and not zio.closed?
|
762
|
+
end
|
763
|
+
end
|
764
|
+
else
|
765
|
+
raise Mechanize::Error,
|
766
|
+
"Unsupported Content-Encoding: #{response['Content-Encoding']}"
|
767
|
+
end
|
768
|
+
end
|
769
|
+
|
770
|
+
def response_redirect response, method, page, redirects
|
771
|
+
case @redirect_ok
|
772
|
+
when true, :all
|
773
|
+
# shortcut
|
774
|
+
when false, nil
|
775
|
+
return page
|
776
|
+
when :permanent
|
777
|
+
return page if response_class != Net::HTTPMovedPermanently
|
778
|
+
end
|
779
|
+
|
780
|
+
log.info("follow redirect to: #{response['Location']}") if log
|
781
|
+
|
782
|
+
from_uri = page.uri
|
783
|
+
|
784
|
+
raise RedirectLimitReachedError.new(page, redirects) if
|
785
|
+
redirects + 1 > redirection_limit
|
786
|
+
|
787
|
+
redirect_method = method == :head ? :head : :get
|
788
|
+
|
789
|
+
page = fetch_page(response['Location'].to_s, redirect_method, {}, [],
|
790
|
+
page, redirects + 1)
|
791
|
+
|
792
|
+
@history.push(page, from_uri)
|
793
|
+
|
794
|
+
return page
|
795
|
+
end
|
796
|
+
|
797
|
+
def response_authenticate(response, page, uri, request, headers, params,
|
798
|
+
referer)
|
799
|
+
raise ResponseCodeError, page unless @user || @password
|
800
|
+
raise ResponseCodeError, page if @auth_hash.has_key?(uri.host)
|
801
|
+
|
802
|
+
if response['www-authenticate'] =~ /Digest/i
|
803
|
+
@auth_hash[uri.host] = :digest
|
804
|
+
if response['server'] =~ /Microsoft-IIS/
|
805
|
+
@auth_hash[uri.host] = :iis_digest
|
806
|
+
end
|
807
|
+
@digest = response['www-authenticate']
|
808
|
+
else
|
809
|
+
@auth_hash[uri.host] = :basic
|
810
|
+
end
|
811
|
+
|
812
|
+
fetch_page(uri, request.method.downcase.to_sym, headers, params, referer)
|
813
|
+
end
|
814
|
+
|
465
815
|
private
|
466
816
|
|
467
817
|
def resolve(url, referer = current_page())
|
468
|
-
|
469
|
-
Chain.new([
|
470
|
-
Chain::URIResolver.new(@scheme_handlers)
|
471
|
-
]).handle(hash)
|
472
|
-
hash[:uri].to_s
|
818
|
+
@resolver.resolve(url, referer).to_s
|
473
819
|
end
|
474
820
|
|
475
821
|
def set_http proxy = nil
|
@@ -481,184 +827,113 @@ class Mechanize
|
|
481
827
|
@http.verify_callback = @verify_callback
|
482
828
|
|
483
829
|
if @cert and @key then
|
484
|
-
|
485
|
-
|
830
|
+
cert = if OpenSSL::X509::Certificate === @cert then
|
831
|
+
@cert
|
832
|
+
else
|
833
|
+
OpenSSL::X509::Certificate.new ::File.read @cert
|
834
|
+
end
|
835
|
+
|
836
|
+
key = if OpenSSL::PKey::PKey === @key then
|
837
|
+
@key
|
838
|
+
else
|
839
|
+
OpenSSL::PKey::RSA.new ::File.read(@key), @pass
|
840
|
+
end
|
841
|
+
|
842
|
+
@http.certificate = cert
|
843
|
+
@http.private_key = key
|
486
844
|
end
|
487
845
|
end
|
488
846
|
|
489
|
-
def post_form(
|
847
|
+
def post_form(uri, form, headers = {})
|
490
848
|
cur_page = form.page || current_page ||
|
491
|
-
Page.new(
|
849
|
+
Page.new(nil, {'content-type'=>'text/html'})
|
492
850
|
|
493
851
|
request_data = form.request_data
|
494
852
|
|
495
853
|
log.debug("query: #{ request_data.inspect }") if log
|
496
854
|
|
855
|
+
headers = {
|
856
|
+
'Content-Type' => form.enctype,
|
857
|
+
'Content-Length' => request_data.size.to_s,
|
858
|
+
}.merge headers
|
859
|
+
|
497
860
|
# fetch the page
|
498
|
-
page = fetch_page
|
499
|
-
:referer => cur_page,
|
500
|
-
:verb => :post,
|
501
|
-
:params => [request_data],
|
502
|
-
:headers => {
|
503
|
-
'Content-Type' => form.enctype,
|
504
|
-
'Content-Length' => request_data.size.to_s,
|
505
|
-
}.merge(headers))
|
861
|
+
page = fetch_page uri, :post, headers, [request_data], cur_page
|
506
862
|
add_to_history(page)
|
507
863
|
page
|
508
864
|
end
|
509
865
|
|
510
866
|
# uri is an absolute URI
|
511
|
-
def fetch_page
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
@request_headers
|
536
|
-
),
|
537
|
-
Chain::CustomHeaders.new,
|
538
|
-
@pre_connect_hook,
|
539
|
-
], @http)
|
540
|
-
|
541
|
-
before_connect.handle(options)
|
542
|
-
|
543
|
-
uri = options[:uri]
|
544
|
-
request = options[:request]
|
545
|
-
cur_page = options[:referer]
|
546
|
-
request_data = options[:params]
|
547
|
-
redirects = options[:redirects]
|
548
|
-
http_obj = options[:connection]
|
867
|
+
def fetch_page uri, method = :get, headers = {}, params = [],
|
868
|
+
referer = current_page, redirects = 0
|
869
|
+
referer_uri = referer ? referer.uri : nil
|
870
|
+
|
871
|
+
uri = @resolver.resolve uri, referer
|
872
|
+
|
873
|
+
uri, params = resolve_parameters uri, method, params
|
874
|
+
|
875
|
+
request = http_request uri, method, params
|
876
|
+
|
877
|
+
connection = connection_for uri
|
878
|
+
|
879
|
+
request_auth request, uri
|
880
|
+
|
881
|
+
enable_gzip request
|
882
|
+
|
883
|
+
request_language_charset request
|
884
|
+
request_cookies request, uri
|
885
|
+
request_host request, uri
|
886
|
+
request_referer request, uri, referer_uri
|
887
|
+
request_user_agent request
|
888
|
+
request_add_headers request, headers
|
889
|
+
|
890
|
+
pre_connect request
|
549
891
|
|
550
892
|
# Add If-Modified-Since if page is in history
|
551
|
-
if
|
893
|
+
if (page = visited_page(uri)) and page.response['Last-Modified']
|
552
894
|
request['If-Modified-Since'] = page.response['Last-Modified']
|
553
895
|
end if(@conditional_requests)
|
554
896
|
|
555
897
|
# Specify timeouts if given
|
556
|
-
|
557
|
-
|
898
|
+
connection.open_timeout = @open_timeout if @open_timeout
|
899
|
+
connection.read_timeout = @read_timeout if @read_timeout
|
558
900
|
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
log.debug("request-header: #{ k } => #{ v }")
|
563
|
-
end if log
|
901
|
+
request_log request
|
902
|
+
|
903
|
+
response_body = nil
|
564
904
|
|
565
905
|
# Send the request
|
566
|
-
response =
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
906
|
+
response = connection.request(uri, request) { |res|
|
907
|
+
response_log res
|
908
|
+
|
909
|
+
response_body = response_read res, request
|
910
|
+
|
911
|
+
res
|
572
912
|
}
|
573
913
|
|
574
|
-
|
575
|
-
@post_connect_hook,
|
576
|
-
Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
|
577
|
-
Chain::ResponseHeaderHandler.new(@cookie_jar),
|
578
|
-
])
|
579
|
-
after_connect.handle(options)
|
580
|
-
|
581
|
-
res_klass = options[:res_klass]
|
582
|
-
response_body = options[:response_body]
|
583
|
-
page = options[:page]
|
584
|
-
|
585
|
-
log.info("status: #{ page.code }") if log
|
586
|
-
|
587
|
-
if follow_meta_refresh
|
588
|
-
redirect_uri = nil
|
589
|
-
referer = page
|
590
|
-
if (page.respond_to?(:meta) && (redirect = page.meta.first))
|
591
|
-
redirect_uri = redirect.uri.to_s
|
592
|
-
sleep redirect.node['delay'].to_f
|
593
|
-
referer = Page.new(nil, {'content-type'=>'text/html'})
|
594
|
-
elsif refresh = response['refresh']
|
595
|
-
delay, redirect_uri = Page::Meta.parse(refresh, uri)
|
596
|
-
raise StandardError, "Invalid refresh http header" unless delay
|
597
|
-
if redirects + 1 > redirection_limit
|
598
|
-
raise RedirectLimitReachedError.new(page, redirects)
|
599
|
-
end
|
600
|
-
sleep delay.to_f
|
601
|
-
end
|
914
|
+
post_connect response
|
602
915
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
:uri => redirect_uri,
|
607
|
-
:referer => referer,
|
608
|
-
:params => [],
|
609
|
-
:verb => :get,
|
610
|
-
:redirects => redirects + 1
|
611
|
-
)
|
612
|
-
end
|
613
|
-
end
|
916
|
+
page = response_parse response, response_body, uri
|
917
|
+
|
918
|
+
response_cookies response, uri, page
|
614
919
|
|
615
|
-
|
920
|
+
meta = response_follow_meta_refresh response, uri, page, redirects
|
921
|
+
return meta if meta
|
616
922
|
|
617
|
-
|
923
|
+
case response
|
924
|
+
when Net::HTTPSuccess, Mechanize::FileResponse
|
925
|
+
page
|
926
|
+
when Net::HTTPNotModified
|
618
927
|
log.debug("Got cached page") if log
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
return page if res_klass != Net::HTTPMovedPermanently
|
628
|
-
end
|
629
|
-
log.info("follow redirect to: #{ response['Location'] }") if log
|
630
|
-
from_uri = page.uri
|
631
|
-
raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
|
632
|
-
redirect_verb = options[:verb] == :head ? :head : :get
|
633
|
-
page = fetch_page( :uri => response['Location'].to_s,
|
634
|
-
:referer => page,
|
635
|
-
:params => [],
|
636
|
-
:verb => redirect_verb,
|
637
|
-
:redirects => redirects + 1
|
638
|
-
)
|
639
|
-
@history.push(page, from_uri)
|
640
|
-
return page
|
641
|
-
elsif res_klass <= Net::HTTPUnauthorized
|
642
|
-
raise ResponseCodeError.new(page) unless @user || @password
|
643
|
-
raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
|
644
|
-
if response['www-authenticate'] =~ /Digest/i
|
645
|
-
@auth_hash[uri.host] = :digest
|
646
|
-
if response['server'] =~ /Microsoft-IIS/
|
647
|
-
@auth_hash[uri.host] = :iis_digest
|
648
|
-
end
|
649
|
-
@digest = response['www-authenticate']
|
650
|
-
else
|
651
|
-
@auth_hash[uri.host] = :basic
|
652
|
-
end
|
653
|
-
return fetch_page( :uri => uri,
|
654
|
-
:referer => cur_page,
|
655
|
-
:verb => request.method.downcase.to_sym,
|
656
|
-
:params => request_data,
|
657
|
-
:headers => options[:headers]
|
658
|
-
)
|
928
|
+
visited_page(uri) || page
|
929
|
+
when Net::HTTPRedirection
|
930
|
+
response_redirect response, method, page, redirects
|
931
|
+
when Net::HTTPUnauthorized
|
932
|
+
response_authenticate(response, page, uri, request, headers, params,
|
933
|
+
referer)
|
934
|
+
else
|
935
|
+
raise ResponseCodeError.new(page), "Unhandled response"
|
659
936
|
end
|
660
|
-
|
661
|
-
raise ResponseCodeError.new(page), "Unhandled response", caller
|
662
937
|
end
|
663
938
|
|
664
939
|
def add_to_history(page)
|
@@ -667,20 +942,23 @@ class Mechanize
|
|
667
942
|
end
|
668
943
|
end
|
669
944
|
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
945
|
+
require 'mechanize/content_type_error'
|
946
|
+
require 'mechanize/cookie'
|
947
|
+
require 'mechanize/cookie_jar'
|
948
|
+
require 'mechanize/file'
|
949
|
+
require 'mechanize/file_connection'
|
950
|
+
require 'mechanize/file_request'
|
951
|
+
require 'mechanize/file_response'
|
952
|
+
require 'mechanize/form'
|
953
|
+
require 'mechanize/history'
|
954
|
+
require 'mechanize/page'
|
955
|
+
require 'mechanize/inspect'
|
956
|
+
require 'mechanize/monkey_patch'
|
957
|
+
require 'mechanize/pluggable_parsers'
|
958
|
+
require 'mechanize/redirect_limit_reached_error'
|
959
|
+
require 'mechanize/redirect_not_get_or_head_error'
|
960
|
+
require 'mechanize/response_code_error'
|
961
|
+
require 'mechanize/unsupported_scheme_error'
|
962
|
+
require 'mechanize/uri_resolver'
|
963
|
+
require 'mechanize/util'
|
681
964
|
|
682
|
-
Pew Pew Pew
|
683
|
-
eomsg
|
684
|
-
Object.const_get(klass)
|
685
|
-
end
|
686
|
-
end
|