mechanize 2.0.1 → 2.1.pre.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.rdoc +82 -0
- data/EXAMPLES.rdoc +1 -1
- data/FAQ.rdoc +9 -9
- data/Manifest.txt +35 -48
- data/README.rdoc +2 -1
- data/Rakefile +16 -3
- data/lib/mechanize.rb +809 -392
- data/lib/mechanize/content_type_error.rb +10 -11
- data/lib/mechanize/cookie.rb +193 -60
- data/lib/mechanize/cookie_jar.rb +39 -86
- data/lib/mechanize/download.rb +59 -0
- data/lib/mechanize/element_matcher.rb +1 -0
- data/lib/mechanize/file.rb +61 -76
- data/lib/mechanize/file_saver.rb +37 -35
- data/lib/mechanize/form.rb +475 -410
- data/lib/mechanize/form/button.rb +4 -7
- data/lib/mechanize/form/check_box.rb +10 -9
- data/lib/mechanize/form/field.rb +52 -42
- data/lib/mechanize/form/file_upload.rb +17 -19
- data/lib/mechanize/form/hidden.rb +3 -0
- data/lib/mechanize/form/image_button.rb +15 -16
- data/lib/mechanize/form/keygen.rb +34 -0
- data/lib/mechanize/form/multi_select_list.rb +20 -9
- data/lib/mechanize/form/option.rb +48 -47
- data/lib/mechanize/form/radio_button.rb +52 -45
- data/lib/mechanize/form/reset.rb +3 -0
- data/lib/mechanize/form/select_list.rb +10 -6
- data/lib/mechanize/form/submit.rb +3 -0
- data/lib/mechanize/form/text.rb +3 -0
- data/lib/mechanize/form/textarea.rb +3 -0
- data/lib/mechanize/headers.rb +17 -19
- data/lib/mechanize/history.rb +60 -61
- data/lib/mechanize/http.rb +5 -0
- data/lib/mechanize/http/agent.rb +485 -218
- data/lib/mechanize/http/auth_challenge.rb +59 -0
- data/lib/mechanize/http/auth_realm.rb +31 -0
- data/lib/mechanize/http/content_disposition_parser.rb +188 -0
- data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
- data/lib/mechanize/monkey_patch.rb +14 -35
- data/lib/mechanize/page.rb +34 -2
- data/lib/mechanize/page/base.rb +6 -7
- data/lib/mechanize/page/frame.rb +5 -5
- data/lib/mechanize/page/image.rb +23 -23
- data/lib/mechanize/page/label.rb +16 -16
- data/lib/mechanize/page/link.rb +16 -0
- data/lib/mechanize/page/meta_refresh.rb +19 -7
- data/lib/mechanize/parser.rb +173 -0
- data/lib/mechanize/pluggable_parsers.rb +126 -83
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -13
- data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -16
- data/lib/mechanize/response_code_error.rb +16 -17
- data/lib/mechanize/robots_disallowed_error.rb +22 -23
- data/lib/mechanize/test_case.rb +659 -0
- data/lib/mechanize/unauthorized_error.rb +3 -0
- data/lib/mechanize/unsupported_scheme_error.rb +4 -6
- data/lib/mechanize/util.rb +0 -12
- data/test/htdocs/form_order_test.html +11 -0
- data/test/htdocs/form_test.html +2 -2
- data/test/htdocs/tc_links.html +1 -0
- data/test/test_mechanize.rb +367 -59
- data/test/test_mechanize_cookie.rb +69 -4
- data/test/test_mechanize_cookie_jar.rb +200 -124
- data/test/test_mechanize_download.rb +43 -0
- data/test/test_mechanize_file.rb +53 -45
- data/test/{test_mechanize_file_response.rb → test_mechanize_file_connection.rb} +2 -2
- data/test/test_mechanize_file_request.rb +2 -2
- data/test/test_mechanize_file_saver.rb +21 -0
- data/test/test_mechanize_form.rb +345 -46
- data/test/test_mechanize_form_check_box.rb +5 -4
- data/test/test_mechanize_form_encoding.rb +10 -16
- data/test/test_mechanize_form_field.rb +45 -3
- data/test/test_mechanize_form_file_upload.rb +20 -0
- data/test/test_mechanize_form_image_button.rb +2 -2
- data/test/test_mechanize_form_keygen.rb +32 -0
- data/test/test_mechanize_form_multi_select_list.rb +84 -0
- data/test/test_mechanize_form_option.rb +55 -0
- data/test/test_mechanize_form_radio_button.rb +78 -0
- data/test/test_mechanize_form_select_list.rb +76 -0
- data/test/test_mechanize_form_textarea.rb +8 -7
- data/test/{test_headers.rb → test_mechanize_headers.rb} +4 -2
- data/test/test_mechanize_history.rb +103 -0
- data/test/test_mechanize_http_agent.rb +525 -17
- data/test/test_mechanize_http_auth_challenge.rb +39 -0
- data/test/test_mechanize_http_auth_realm.rb +49 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
- data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
- data/test/test_mechanize_link.rb +10 -14
- data/test/test_mechanize_page.rb +118 -0
- data/test/test_mechanize_page_encoding.rb +48 -13
- data/test/test_mechanize_page_frame.rb +16 -0
- data/test/test_mechanize_page_link.rb +27 -19
- data/test/test_mechanize_page_meta_refresh.rb +26 -14
- data/test/test_mechanize_parser.rb +289 -0
- data/test/test_mechanize_pluggable_parser.rb +52 -0
- data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +3 -7
- data/test/test_mechanize_subclass.rb +2 -2
- data/test/test_mechanize_util.rb +24 -13
- data/test/test_multi_select.rb +23 -22
- metadata +145 -114
- metadata.gz.sig +0 -0
- data/lib/mechanize/inspect.rb +0 -88
- data/test/helper.rb +0 -175
- data/test/htdocs/form_select_all.html +0 -16
- data/test/htdocs/form_select_none.html +0 -17
- data/test/htdocs/form_select_noopts.html +0 -10
- data/test/htdocs/iframe_test.html +0 -16
- data/test/htdocs/nofollow.html +0 -9
- data/test/htdocs/norobots.html +0 -8
- data/test/htdocs/rel_nofollow.html +0 -8
- data/test/htdocs/tc_base_images.html +0 -10
- data/test/htdocs/tc_images.html +0 -8
- data/test/htdocs/tc_no_attributes.html +0 -16
- data/test/htdocs/tc_radiobuttons.html +0 -17
- data/test/htdocs/test_bad_encoding.html +0 -52
- data/test/servlets.rb +0 -402
- data/test/ssl_server.rb +0 -48
- data/test/test_cookies.rb +0 -129
- data/test/test_form_action.rb +0 -52
- data/test/test_form_as_hash.rb +0 -59
- data/test/test_form_button.rb +0 -46
- data/test/test_frames.rb +0 -34
- data/test/test_history.rb +0 -118
- data/test/test_history_added.rb +0 -16
- data/test/test_html_unscape_forms.rb +0 -46
- data/test/test_if_modified_since.rb +0 -20
- data/test/test_images.rb +0 -19
- data/test/test_no_attributes.rb +0 -13
- data/test/test_option.rb +0 -18
- data/test/test_pluggable_parser.rb +0 -136
- data/test/test_post_form.rb +0 -37
- data/test/test_pretty_print.rb +0 -22
- data/test/test_radiobutton.rb +0 -75
- data/test/test_redirect_limit_reached.rb +0 -39
- data/test/test_referer.rb +0 -81
- data/test/test_relative_links.rb +0 -40
- data/test/test_request.rb +0 -13
- data/test/test_response_code.rb +0 -53
- data/test/test_robots.rb +0 -72
- data/test/test_save_file.rb +0 -48
- data/test/test_scheme.rb +0 -48
- data/test/test_select.rb +0 -119
- data/test/test_select_all.rb +0 -15
- data/test/test_select_none.rb +0 -15
- data/test/test_select_noopts.rb +0 -18
- data/test/test_set_fields.rb +0 -44
- data/test/test_ssl_server.rb +0 -20
@@ -1,10 +1,13 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
1
|
+
# This class represents a select list or drop down box in a Form. Set the
|
2
|
+
# value for the list by calling SelectList#value=. SelectList contains a list
|
3
|
+
# of Option that were found. After finding the correct option, set the select
|
4
|
+
# lists value to the option value:
|
5
|
+
#
|
6
|
+
# selectlist.value = selectlist.options.first.value
|
7
|
+
#
|
8
|
+
# Options can also be selected by "clicking" or selecting them. See Option
|
7
9
|
class Mechanize::Form::SelectList < Mechanize::Form::MultiSelectList
|
10
|
+
|
8
11
|
def initialize node
|
9
12
|
super
|
10
13
|
if selected_options.length > 1
|
@@ -36,5 +39,6 @@ class Mechanize::Form::SelectList < Mechanize::Form::MultiSelectList
|
|
36
39
|
def query_value
|
37
40
|
value ? [[name, value]] : nil
|
38
41
|
end
|
42
|
+
|
39
43
|
end
|
40
44
|
|
data/lib/mechanize/headers.rb
CHANGED
@@ -1,25 +1,23 @@
|
|
1
|
-
class Mechanize
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
end
|
1
|
+
class Mechanize::Headers < Hash
|
2
|
+
def [](key)
|
3
|
+
super(key.downcase)
|
4
|
+
end
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
def []=(key, value)
|
7
|
+
super(key.downcase, value)
|
8
|
+
end
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
def key?(key)
|
11
|
+
super(key.downcase)
|
12
|
+
end
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
end
|
14
|
+
def canonical_each
|
15
|
+
block_given? or return enum_for(__method__)
|
16
|
+
each { |key, value|
|
17
|
+
key = key.capitalize
|
18
|
+
key.gsub!(/-([a-z])/) { "-#{$1.upcase}" }
|
19
|
+
yield [key, value]
|
20
|
+
}
|
23
21
|
end
|
24
22
|
end
|
25
23
|
|
data/lib/mechanize/history.rb
CHANGED
@@ -1,83 +1,82 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
# This class manages history for your mechanize object.
|
4
|
-
class History < Array
|
5
|
-
attr_accessor :max_size
|
6
|
-
|
7
|
-
def initialize(max_size = nil)
|
8
|
-
@max_size = max_size
|
9
|
-
@history_index = {}
|
10
|
-
end
|
1
|
+
##
|
2
|
+
# This class manages history for your mechanize object.
|
11
3
|
|
12
|
-
|
13
|
-
super
|
14
|
-
@history_index = orig.instance_variable_get(:@history_index).dup
|
15
|
-
end
|
4
|
+
class Mechanize::History < Array
|
16
5
|
|
17
|
-
|
18
|
-
uris = map { |page| page.uri }.join ', '
|
6
|
+
attr_accessor :max_size
|
19
7
|
|
20
|
-
|
21
|
-
|
8
|
+
def initialize(max_size = nil)
|
9
|
+
@max_size = max_size
|
10
|
+
@history_index = {}
|
11
|
+
end
|
22
12
|
|
23
|
-
|
24
|
-
|
13
|
+
def initialize_copy(orig)
|
14
|
+
super
|
15
|
+
@history_index = orig.instance_variable_get(:@history_index).dup
|
16
|
+
end
|
25
17
|
|
26
|
-
|
18
|
+
def inspect # :nodoc:
|
19
|
+
uris = map { |page| page.uri }.join ', '
|
27
20
|
|
28
|
-
|
29
|
-
|
30
|
-
self.shift
|
31
|
-
end
|
32
|
-
end
|
21
|
+
"[#{uris}]"
|
22
|
+
end
|
33
23
|
|
34
|
-
|
35
|
-
|
36
|
-
alias :<< :push
|
24
|
+
def push(page, uri = nil)
|
25
|
+
super page
|
37
26
|
|
38
|
-
|
39
|
-
|
27
|
+
index = uri ? uri : page.uri
|
28
|
+
@history_index[index.to_s] = page
|
40
29
|
|
41
|
-
|
30
|
+
shift while length > @max_size if @max_size
|
42
31
|
|
43
|
-
|
44
|
-
|
32
|
+
self
|
33
|
+
end
|
45
34
|
|
46
|
-
|
47
|
-
end
|
35
|
+
alias :<< :push
|
48
36
|
|
49
|
-
|
37
|
+
def visited? uri
|
38
|
+
page = @history_index[uri.to_s]
|
50
39
|
|
51
|
-
|
52
|
-
@history_index.clear
|
53
|
-
super
|
54
|
-
end
|
40
|
+
return page if page # HACK
|
55
41
|
|
56
|
-
|
57
|
-
|
58
|
-
page = self[0]
|
59
|
-
self[0] = nil
|
42
|
+
uri = uri.dup
|
43
|
+
uri.path = '/' if uri.path.empty?
|
60
44
|
|
61
|
-
|
45
|
+
@history_index[uri.to_s]
|
46
|
+
end
|
62
47
|
|
63
|
-
|
64
|
-
page
|
65
|
-
end
|
48
|
+
alias visited_page visited?
|
66
49
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
page
|
72
|
-
end
|
50
|
+
def clear
|
51
|
+
@history_index.clear
|
52
|
+
super
|
53
|
+
end
|
73
54
|
|
74
|
-
|
55
|
+
def shift
|
56
|
+
return nil if length == 0
|
57
|
+
page = self[0]
|
58
|
+
self[0] = nil
|
75
59
|
|
76
|
-
|
77
|
-
@history_index.each do |k,v|
|
78
|
-
@history_index.delete(k) if v == page
|
79
|
-
end
|
80
|
-
end
|
60
|
+
super
|
81
61
|
|
62
|
+
remove_from_index(page)
|
63
|
+
page
|
82
64
|
end
|
65
|
+
|
66
|
+
def pop
|
67
|
+
return nil if length == 0
|
68
|
+
page = super
|
69
|
+
remove_from_index(page)
|
70
|
+
page
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def remove_from_index(page)
|
76
|
+
@history_index.each do |k,v|
|
77
|
+
@history_index.delete(k) if v == page
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
83
81
|
end
|
82
|
+
|
data/lib/mechanize/http.rb
CHANGED
data/lib/mechanize/http/agent.rb
CHANGED
@@ -1,38 +1,63 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'net/ntlm'
|
3
|
+
require 'kconv'
|
4
|
+
require 'webrobots'
|
5
|
+
|
1
6
|
##
|
2
|
-
# An HTTP (and local disk access) user agent
|
7
|
+
# An HTTP (and local disk access) user agent. This class is an implementation
|
8
|
+
# detail and is subject to change at any time.
|
3
9
|
|
4
10
|
class Mechanize::HTTP::Agent
|
5
11
|
|
6
|
-
|
12
|
+
# :section: Headers
|
7
13
|
|
8
14
|
# Disables If-Modified-Since conditional requests (enabled by default)
|
9
15
|
attr_accessor :conditional_requests
|
10
|
-
attr_accessor :context
|
11
16
|
|
12
|
-
#
|
13
|
-
# of the head element will be followed.
|
14
|
-
attr_accessor :follow_meta_refresh
|
17
|
+
# Is gzip compression of requests enabled?
|
15
18
|
attr_accessor :gzip_enabled
|
16
|
-
attr_accessor :history
|
17
19
|
|
18
|
-
#
|
19
|
-
attr_accessor :
|
20
|
+
# A hash of request headers to be used for every request
|
21
|
+
attr_accessor :request_headers
|
20
22
|
|
21
|
-
|
22
|
-
attr_reader :
|
23
|
+
# The User-Agent header to send
|
24
|
+
attr_reader :user_agent
|
25
|
+
|
26
|
+
# :section: History
|
27
|
+
|
28
|
+
# history of requests made
|
29
|
+
attr_accessor :history
|
30
|
+
|
31
|
+
# :section: Hooks
|
23
32
|
|
24
33
|
# A list of hooks to call after retrieving a response. Hooks are called with
|
25
34
|
# the agent and the response returned.
|
26
|
-
|
27
35
|
attr_reader :post_connect_hooks
|
28
36
|
|
29
37
|
# A list of hooks to call before making a request. Hooks are called with
|
30
38
|
# the agent and the request to be performed.
|
31
|
-
|
32
39
|
attr_reader :pre_connect_hooks
|
33
40
|
|
34
|
-
#
|
35
|
-
|
41
|
+
# A list of hooks to call to handle the content-encoding of a request.
|
42
|
+
attr_reader :content_encoding_hooks
|
43
|
+
|
44
|
+
# :section: HTTP Authentication
|
45
|
+
|
46
|
+
attr_reader :authenticate_methods # :nodoc:
|
47
|
+
attr_reader :digest_challenges # :nodoc:
|
48
|
+
attr_accessor :user
|
49
|
+
attr_accessor :password
|
50
|
+
|
51
|
+
# :section: Redirection
|
52
|
+
|
53
|
+
# Follow HTML meta refresh and HTTP Refresh. If set to +:anywhere+ meta
|
54
|
+
# refresh tags outside of the head element will be followed.
|
55
|
+
attr_accessor :follow_meta_refresh
|
56
|
+
|
57
|
+
# Follow an HTML meta refresh that has no "url=" in the content attribute.
|
58
|
+
#
|
59
|
+
# Defaults to false to prevent infinite refresh loops.
|
60
|
+
attr_accessor :follow_meta_refresh_self
|
36
61
|
|
37
62
|
# Controls how this agent deals with redirects. The following values are
|
38
63
|
# allowed:
|
@@ -40,22 +65,17 @@ class Mechanize::HTTP::Agent
|
|
40
65
|
# :all, true:: All 3xx redirects are followed (default)
|
41
66
|
# :permanent:: Only 301 Moved Permanantly redirects are followed
|
42
67
|
# false:: No redirects are followed
|
43
|
-
|
44
68
|
attr_accessor :redirect_ok
|
45
|
-
attr_accessor :redirection_limit
|
46
69
|
|
47
|
-
#
|
70
|
+
# Maximum number of redirects to follow
|
71
|
+
attr_accessor :redirection_limit
|
48
72
|
|
49
|
-
|
73
|
+
# :section: Robots
|
50
74
|
|
51
75
|
# When true, this agent will consult the site's robots.txt for each access.
|
52
|
-
|
53
76
|
attr_reader :robots
|
54
77
|
|
55
|
-
|
56
|
-
|
57
|
-
attr_accessor :user
|
58
|
-
attr_reader :user_agent
|
78
|
+
# :section: SSL
|
59
79
|
|
60
80
|
# Path to an OpenSSL server certificate file
|
61
81
|
attr_accessor :ca_file
|
@@ -66,6 +86,9 @@ class Mechanize::HTTP::Agent
|
|
66
86
|
# An OpenSSL client certificate or the path to a certificate file.
|
67
87
|
attr_accessor :cert
|
68
88
|
|
89
|
+
# An SSL certificate store
|
90
|
+
attr_accessor :cert_store
|
91
|
+
|
69
92
|
# OpenSSL key password
|
70
93
|
attr_accessor :pass
|
71
94
|
|
@@ -77,38 +100,99 @@ class Mechanize::HTTP::Agent
|
|
77
100
|
# when the SSLContext was created
|
78
101
|
attr_accessor :verify_callback
|
79
102
|
|
103
|
+
# How to verify SSL connections. Defaults to VERIFY_PEER
|
104
|
+
attr_accessor :verify_mode
|
105
|
+
|
106
|
+
# :section: Timeouts
|
107
|
+
|
108
|
+
# Reset connections that have not been used in this many seconds
|
109
|
+
attr_reader :idle_timeout
|
110
|
+
|
111
|
+
# Set to false to disable HTTP/1.1 keep-alive requests
|
112
|
+
attr_accessor :keep_alive
|
113
|
+
|
114
|
+
# Length of time to wait until a connection is opened in seconds
|
115
|
+
attr_accessor :open_timeout
|
116
|
+
|
117
|
+
# Length of time to attempt to read data from the server
|
118
|
+
attr_accessor :read_timeout
|
119
|
+
|
120
|
+
# :section:
|
121
|
+
|
122
|
+
# The cookies for this agent
|
123
|
+
attr_accessor :cookie_jar
|
124
|
+
|
125
|
+
# URI for a proxy connection
|
126
|
+
attr_reader :proxy_uri
|
127
|
+
|
128
|
+
# Retry non-idempotent requests?
|
129
|
+
attr_reader :retry_change_requests
|
130
|
+
|
131
|
+
# Responses larger than this will be written to a Tempfile instead of stored
|
132
|
+
# in memory.
|
133
|
+
attr_accessor :max_file_buffer
|
134
|
+
|
135
|
+
# :section: Utility
|
136
|
+
|
137
|
+
# The context parses responses into pages
|
138
|
+
attr_accessor :context
|
139
|
+
|
80
140
|
attr_reader :http # :nodoc:
|
81
141
|
|
142
|
+
# Handlers for various URI schemes
|
143
|
+
attr_accessor :scheme_handlers
|
144
|
+
|
145
|
+
# :section:
|
146
|
+
|
147
|
+
# Creates a new Mechanize HTTP user agent. The user agent is an
|
148
|
+
# implementation detail of mechanize and its API may change at any time.
|
149
|
+
|
82
150
|
def initialize
|
83
|
-
@
|
84
|
-
@
|
85
|
-
@
|
86
|
-
@cookie_jar
|
87
|
-
@
|
151
|
+
@conditional_requests = true
|
152
|
+
@context = nil
|
153
|
+
@content_encoding_hooks = []
|
154
|
+
@cookie_jar = Mechanize::CookieJar.new
|
155
|
+
@follow_meta_refresh = false
|
156
|
+
@follow_meta_refresh_self = false
|
157
|
+
@gzip_enabled = true
|
158
|
+
@history = Mechanize::History.new
|
159
|
+
@idle_timeout = nil
|
160
|
+
@keep_alive = true
|
161
|
+
@keep_alive_time = 300
|
162
|
+
@max_file_buffer = 10240
|
163
|
+
@open_timeout = nil
|
164
|
+
@post_connect_hooks = []
|
165
|
+
@pre_connect_hooks = []
|
166
|
+
@proxy_uri = nil
|
167
|
+
@read_timeout = nil
|
168
|
+
@redirect_ok = true
|
169
|
+
@redirection_limit = 20
|
170
|
+
@request_headers = {}
|
171
|
+
@retry_change_requests = false
|
172
|
+
@robots = false
|
173
|
+
@user_agent = nil
|
174
|
+
@webrobots = nil
|
175
|
+
|
176
|
+
# HTTP Authentication
|
177
|
+
@authenticate_parser = Mechanize::HTTP::WWWAuthenticateParser.new
|
178
|
+
@authenticate_methods = Hash.new do |methods, uri|
|
179
|
+
methods[uri] = Hash.new do |realms, auth_scheme|
|
180
|
+
realms[auth_scheme] = []
|
181
|
+
end
|
182
|
+
end
|
88
183
|
@digest_auth = Net::HTTP::DigestAuth.new
|
89
|
-
@
|
90
|
-
@gzip_enabled = true
|
91
|
-
@history = Mechanize::History.new
|
92
|
-
@keep_alive_time = 300
|
93
|
-
@open_timeout = nil
|
184
|
+
@digest_challenges = {}
|
94
185
|
@password = nil # HTTP auth password
|
95
|
-
@post_connect_hooks = []
|
96
|
-
@pre_connect_hooks = []
|
97
|
-
@proxy_uri = nil
|
98
|
-
@read_timeout = nil
|
99
|
-
@redirect_ok = true
|
100
|
-
@redirection_limit = 20
|
101
|
-
@request_headers = {}
|
102
|
-
@robots = false
|
103
186
|
@user = nil # HTTP auth user
|
104
|
-
@user_agent = nil
|
105
|
-
@webrobots = nil
|
106
187
|
|
107
|
-
|
108
|
-
@
|
109
|
-
@
|
110
|
-
@
|
188
|
+
# SSL
|
189
|
+
@ca_file = nil
|
190
|
+
@cert = nil
|
191
|
+
@cert_store = nil
|
192
|
+
@key = nil
|
193
|
+
@pass = nil
|
111
194
|
@verify_callback = nil
|
195
|
+
@verify_mode = nil
|
112
196
|
|
113
197
|
@scheme_handlers = Hash.new { |h, scheme|
|
114
198
|
h[scheme] = lambda { |link, page|
|
@@ -122,41 +206,14 @@ class Mechanize::HTTP::Agent
|
|
122
206
|
@scheme_handlers['file'] = @scheme_handlers['http']
|
123
207
|
end
|
124
208
|
|
125
|
-
#
|
126
|
-
#
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
@http.certificate
|
133
|
-
end
|
134
|
-
|
135
|
-
def connection_for uri
|
136
|
-
case uri.scheme.downcase
|
137
|
-
when 'http', 'https' then
|
138
|
-
return @http
|
139
|
-
when 'file' then
|
140
|
-
return Mechanize::FileConnection.new
|
141
|
-
end
|
142
|
-
end
|
143
|
-
|
144
|
-
##
|
145
|
-
# Returns the latest page loaded by the agent
|
146
|
-
|
147
|
-
def current_page
|
148
|
-
@history.last
|
149
|
-
end
|
150
|
-
|
151
|
-
def enable_gzip request
|
152
|
-
request['accept-encoding'] = if @gzip_enabled
|
153
|
-
'gzip,deflate,identity'
|
154
|
-
else
|
155
|
-
'identity'
|
156
|
-
end
|
157
|
-
end
|
209
|
+
# Retrieves +uri+ and parses it into a page or other object according to
|
210
|
+
# PluggableParser. If the URI is an HTTP or HTTPS scheme URI the given HTTP
|
211
|
+
# +method+ is used to retrieve it, along with the HTTP +headers+, request
|
212
|
+
# +params+ and HTTP +referer+.
|
213
|
+
#
|
214
|
+
# +redirects+ tracks the number of redirects experienced when retrieving the
|
215
|
+
# page. If it is over the redirection_limit an error will be raised.
|
158
216
|
|
159
|
-
# uri is an absolute URI
|
160
217
|
def fetch uri, method = :get, headers = {}, params = [],
|
161
218
|
referer = current_page, redirects = 0
|
162
219
|
referer_uri = referer ? referer.uri : nil
|
@@ -169,18 +226,19 @@ class Mechanize::HTTP::Agent
|
|
169
226
|
|
170
227
|
connection = connection_for uri
|
171
228
|
|
172
|
-
request_auth
|
229
|
+
request_auth request, uri
|
173
230
|
|
174
|
-
|
231
|
+
disable_keep_alive request
|
232
|
+
enable_gzip request
|
175
233
|
|
176
234
|
request_language_charset request
|
177
|
-
request_cookies
|
178
|
-
request_host
|
179
|
-
request_referer
|
180
|
-
request_user_agent
|
181
|
-
request_add_headers
|
235
|
+
request_cookies request, uri
|
236
|
+
request_host request, uri
|
237
|
+
request_referer request, uri, referer_uri
|
238
|
+
request_user_agent request
|
239
|
+
request_add_headers request, headers
|
182
240
|
|
183
|
-
pre_connect
|
241
|
+
pre_connect request
|
184
242
|
|
185
243
|
# Consult robots.txt
|
186
244
|
if robots && uri.is_a?(URI::HTTP)
|
@@ -188,6 +246,8 @@ class Mechanize::HTTP::Agent
|
|
188
246
|
end
|
189
247
|
|
190
248
|
# Add If-Modified-Since if page is in history
|
249
|
+
page = visited_page(uri)
|
250
|
+
|
191
251
|
if (page = visited_page(uri)) and page.response['Last-Modified']
|
192
252
|
request['If-Modified-Since'] = page.response['Last-Modified']
|
193
253
|
end if(@conditional_requests)
|
@@ -209,11 +269,13 @@ class Mechanize::HTTP::Agent
|
|
209
269
|
res
|
210
270
|
}
|
211
271
|
|
212
|
-
|
272
|
+
hook_content_encoding response, uri, response_body_io
|
213
273
|
|
214
|
-
|
274
|
+
response_body_io = response_content_encoding response, response_body_io
|
215
275
|
|
216
|
-
|
276
|
+
post_connect uri, response, response_body_io
|
277
|
+
|
278
|
+
page = response_parse response, response_body_io, uri
|
217
279
|
|
218
280
|
response_cookies response, uri, page
|
219
281
|
|
@@ -233,7 +295,7 @@ class Mechanize::HTTP::Agent
|
|
233
295
|
log.debug("Got cached page") if log
|
234
296
|
visited_page(uri) || page
|
235
297
|
when Net::HTTPRedirection
|
236
|
-
response_redirect response, method, page, redirects
|
298
|
+
response_redirect response, method, page, redirects, referer
|
237
299
|
when Net::HTTPUnauthorized
|
238
300
|
response_authenticate(response, page, uri, request, headers, params,
|
239
301
|
referer)
|
@@ -242,6 +304,35 @@ class Mechanize::HTTP::Agent
|
|
242
304
|
end
|
243
305
|
end
|
244
306
|
|
307
|
+
# Retry non-idempotent requests
|
308
|
+
|
309
|
+
def retry_change_requests= retri
|
310
|
+
@retry_change_requests = retri
|
311
|
+
@http.retry_change_requests = retri if @http
|
312
|
+
end
|
313
|
+
|
314
|
+
# :section: Headers
|
315
|
+
|
316
|
+
def user_agent= user_agent
|
317
|
+
@webrobots = nil if user_agent != @user_agent
|
318
|
+
@user_agent = user_agent
|
319
|
+
end
|
320
|
+
|
321
|
+
# :section: History
|
322
|
+
|
323
|
+
# Equivalent to the browser back button. Returns the most recent page
|
324
|
+
# visited.
|
325
|
+
def back
|
326
|
+
@history.pop
|
327
|
+
end
|
328
|
+
|
329
|
+
##
|
330
|
+
# Returns the latest page loaded by the agent
|
331
|
+
|
332
|
+
def current_page
|
333
|
+
@history.last
|
334
|
+
end
|
335
|
+
|
245
336
|
def max_history
|
246
337
|
@history.max_size
|
247
338
|
end
|
@@ -250,24 +341,19 @@ class Mechanize::HTTP::Agent
|
|
250
341
|
@history.max_size = length
|
251
342
|
end
|
252
343
|
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
344
|
+
# Returns a visited page for the url passed in, otherwise nil
|
345
|
+
def visited_page url
|
346
|
+
@history.visited_page resolve url
|
347
|
+
end
|
257
348
|
|
258
|
-
|
259
|
-
request.body = params.first if params
|
349
|
+
# :section: Hooks
|
260
350
|
|
261
|
-
|
262
|
-
|
263
|
-
|
351
|
+
def hook_content_encoding response, uri, response_body_io
|
352
|
+
@content_encoding_hooks.each do |hook|
|
353
|
+
hook.call self, uri, response, response_body_io
|
264
354
|
end
|
265
355
|
end
|
266
356
|
|
267
|
-
def log
|
268
|
-
Mechanize.log
|
269
|
-
end
|
270
|
-
|
271
357
|
##
|
272
358
|
# Invokes hooks added to post_connect_hooks after a +response+ is returned
|
273
359
|
# and the response +body+ is handled.
|
@@ -275,9 +361,13 @@ class Mechanize::HTTP::Agent
|
|
275
361
|
# Yields the +context+, the +uri+ for the request, the +response+ and the
|
276
362
|
# response +body+.
|
277
363
|
|
278
|
-
def post_connect uri, response,
|
364
|
+
def post_connect uri, response, body_io # :yields: agent, uri, response, body
|
279
365
|
@post_connect_hooks.each do |hook|
|
280
|
-
|
366
|
+
begin
|
367
|
+
hook.call self, uri, response, body_io.read
|
368
|
+
ensure
|
369
|
+
body_io.rewind
|
370
|
+
end
|
281
371
|
end
|
282
372
|
end
|
283
373
|
|
@@ -291,26 +381,83 @@ class Mechanize::HTTP::Agent
|
|
291
381
|
end
|
292
382
|
end
|
293
383
|
|
294
|
-
|
295
|
-
|
384
|
+
# :section: Request
|
385
|
+
|
386
|
+
def connection_for uri
|
387
|
+
case uri.scheme.downcase
|
388
|
+
when 'http', 'https' then
|
389
|
+
return @http
|
390
|
+
when 'file' then
|
391
|
+
return Mechanize::FileConnection.new
|
392
|
+
end
|
393
|
+
end
|
296
394
|
|
297
|
-
|
395
|
+
def disable_keep_alive request
|
396
|
+
request['connection'] = 'close' unless @keep_alive
|
397
|
+
end
|
298
398
|
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
399
|
+
def enable_gzip request
|
400
|
+
request['accept-encoding'] = if @gzip_enabled
|
401
|
+
'gzip,deflate,identity'
|
402
|
+
else
|
403
|
+
'identity'
|
404
|
+
end
|
405
|
+
end
|
406
|
+
|
407
|
+
def http_request uri, method, params = nil
|
408
|
+
case uri.scheme.downcase
|
409
|
+
when 'http', 'https' then
|
410
|
+
klass = Net::HTTP.const_get(method.to_s.capitalize)
|
411
|
+
|
412
|
+
request ||= klass.new(uri.request_uri)
|
413
|
+
request.body = params.first if params
|
414
|
+
|
415
|
+
request
|
416
|
+
when 'file' then
|
417
|
+
Mechanize::FileRequest.new uri
|
418
|
+
end
|
419
|
+
end
|
305
420
|
|
306
|
-
|
421
|
+
def request_add_headers request, headers = {}
|
422
|
+
@request_headers.each do |k,v|
|
423
|
+
request[k] = v
|
424
|
+
end
|
307
425
|
|
308
|
-
|
426
|
+
headers.each do |field, value|
|
427
|
+
case field
|
428
|
+
when :etag then request["ETag"] = value
|
429
|
+
when :if_modified_since then request["If-Modified-Since"] = value
|
430
|
+
when Symbol then
|
431
|
+
raise ArgumentError, "unknown header symbol #{field}"
|
432
|
+
else
|
433
|
+
request[field] = value
|
434
|
+
end
|
435
|
+
end
|
436
|
+
end
|
309
437
|
|
310
|
-
|
438
|
+
def request_auth request, uri
|
439
|
+
base_uri = uri + '/'
|
440
|
+
schemes = @authenticate_methods[base_uri]
|
441
|
+
|
442
|
+
if realm = schemes[:digest].find { |r| r.uri == base_uri } then
|
443
|
+
request_auth_digest request, uri, realm, base_uri, false
|
444
|
+
elsif realm = schemes[:iis_digest].find { |r| r.uri == base_uri } then
|
445
|
+
request_auth_digest request, uri, realm, base_uri, true
|
446
|
+
elsif schemes[:basic].find { |r| r.uri == base_uri } then
|
447
|
+
request.basic_auth @user, @password
|
311
448
|
end
|
312
449
|
end
|
313
450
|
|
451
|
+
def request_auth_digest request, uri, realm, base_uri, iis
|
452
|
+
challenge = @digest_challenges[realm]
|
453
|
+
|
454
|
+
uri.user = @user
|
455
|
+
uri.password = @password
|
456
|
+
|
457
|
+
auth = @digest_auth.auth_header uri, challenge.to_s, request.method, iis
|
458
|
+
request['Authorization'] = auth
|
459
|
+
end
|
460
|
+
|
314
461
|
def request_cookies request, uri
|
315
462
|
return if @cookie_jar.empty? uri
|
316
463
|
|
@@ -344,23 +491,6 @@ class Mechanize::HTTP::Agent
|
|
344
491
|
end
|
345
492
|
end
|
346
493
|
|
347
|
-
def request_add_headers request, headers = {}
|
348
|
-
@request_headers.each do |k,v|
|
349
|
-
request[k] = v
|
350
|
-
end
|
351
|
-
|
352
|
-
headers.each do |field, value|
|
353
|
-
case field
|
354
|
-
when :etag then request["ETag"] = value
|
355
|
-
when :if_modified_since then request["If-Modified-Since"] = value
|
356
|
-
when Symbol then
|
357
|
-
raise ArgumentError, "unknown header symbol #{field}"
|
358
|
-
else
|
359
|
-
request[field] = value
|
360
|
-
end
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
494
|
def request_referer request, uri, referer
|
365
495
|
return unless referer
|
366
496
|
return if 'https' == referer.scheme.downcase and
|
@@ -451,26 +581,110 @@ class Mechanize::HTTP::Agent
|
|
451
581
|
return uri, parameters
|
452
582
|
end
|
453
583
|
|
584
|
+
# :section: Response
|
585
|
+
|
586
|
+
def get_meta_refresh response, uri, page
|
587
|
+
return nil unless @follow_meta_refresh
|
588
|
+
|
589
|
+
if page.respond_to?(:meta_refresh) and
|
590
|
+
(redirect = page.meta_refresh.first) then
|
591
|
+
[redirect.delay, redirect.href] unless
|
592
|
+
not @follow_meta_refresh_self and redirect.link_self
|
593
|
+
elsif refresh = response['refresh']
|
594
|
+
delay, href, link_self = Mechanize::Page::MetaRefresh.parse refresh, uri
|
595
|
+
raise Mechanize::Error, 'Invalid refresh http header' unless delay
|
596
|
+
[delay.to_f, href] unless
|
597
|
+
not @follow_meta_refresh_self and link_self
|
598
|
+
end
|
599
|
+
end
|
600
|
+
|
601
|
+
def response_authenticate(response, page, uri, request, headers, params,
|
602
|
+
referer)
|
603
|
+
raise Mechanize::UnauthorizedError, page unless @user || @password
|
604
|
+
|
605
|
+
challenges = @authenticate_parser.parse response['www-authenticate']
|
606
|
+
|
607
|
+
if challenge = challenges.find { |c| c.scheme =~ /^Digest$/i } then
|
608
|
+
realm = challenge.realm uri
|
609
|
+
|
610
|
+
auth_scheme = if response['server'] =~ /Microsoft-IIS/ then
|
611
|
+
:iis_digest
|
612
|
+
else
|
613
|
+
:digest
|
614
|
+
end
|
615
|
+
|
616
|
+
existing_realms = @authenticate_methods[realm.uri][auth_scheme]
|
617
|
+
|
618
|
+
raise Mechanize::UnauthorizedError, page if
|
619
|
+
existing_realms.include? realm
|
620
|
+
|
621
|
+
existing_realms << realm
|
622
|
+
@digest_challenges[realm] = challenge
|
623
|
+
elsif challenge = challenges.find { |c| c.scheme == 'NTLM' } then
|
624
|
+
existing_realms = @authenticate_methods[uri + '/'][:ntlm]
|
625
|
+
|
626
|
+
raise Mechanize::UnauthorizedError, page if
|
627
|
+
existing_realms.include?(realm) and not challenge.params
|
628
|
+
|
629
|
+
existing_realms << realm
|
630
|
+
|
631
|
+
if challenge.params then
|
632
|
+
type_2 = Net::NTLM::Message.decode64 challenge.params
|
633
|
+
|
634
|
+
type_3 = type_2.response({ :user => @user, :password => @password, },
|
635
|
+
{ :ntlmv2 => true }).encode64
|
636
|
+
|
637
|
+
headers['Authorization'] = "NTLM #{type_3}"
|
638
|
+
else
|
639
|
+
type_1 = Net::NTLM::Message::Type1.new.encode64
|
640
|
+
headers['Authorization'] = "NTLM #{type_1}"
|
641
|
+
end
|
642
|
+
elsif challenge = challenges.find { |c| c.scheme == 'Basic' } then
|
643
|
+
realm = challenge.realm uri
|
644
|
+
|
645
|
+
existing_realms = @authenticate_methods[realm.uri][:basic]
|
646
|
+
|
647
|
+
raise Mechanize::UnauthorizedError, page if
|
648
|
+
existing_realms.include? realm
|
649
|
+
|
650
|
+
existing_realms << realm
|
651
|
+
else
|
652
|
+
raise Mechanize::UnauthorizedError, page
|
653
|
+
end
|
654
|
+
|
655
|
+
fetch uri, request.method.downcase.to_sym, headers, params, referer
|
656
|
+
end
|
657
|
+
|
454
658
|
def response_content_encoding response, body_io
|
455
|
-
length = response.content_length
|
659
|
+
length = response.content_length
|
660
|
+
|
661
|
+
length = case body_io
|
662
|
+
when IO, Tempfile then
|
663
|
+
body_io.stat.size
|
664
|
+
else
|
665
|
+
body_io.length
|
666
|
+
end unless length
|
667
|
+
|
668
|
+
out_io = nil
|
456
669
|
|
457
670
|
case response['Content-Encoding']
|
458
671
|
when nil, 'none', '7bit' then
|
459
|
-
body_io
|
672
|
+
out_io = body_io
|
460
673
|
when 'deflate' then
|
461
674
|
log.debug('deflate body') if log
|
462
675
|
|
463
676
|
return if length.zero?
|
464
677
|
|
465
678
|
begin
|
466
|
-
|
679
|
+
out_io = inflate body_io
|
467
680
|
rescue Zlib::BufError, Zlib::DataError
|
468
681
|
log.error('Unable to inflate page, retrying with raw deflate') if log
|
682
|
+
body_io.rewind
|
469
683
|
begin
|
470
|
-
|
684
|
+
out_io = inflate body_io, -Zlib::MAX_WBITS
|
471
685
|
rescue Zlib::BufError, Zlib::DataError
|
472
686
|
log.error("unable to inflate page: #{$!}") if log
|
473
|
-
|
687
|
+
nil
|
474
688
|
end
|
475
689
|
end
|
476
690
|
when 'gzip', 'x-gzip' then
|
@@ -480,12 +694,17 @@ class Mechanize::HTTP::Agent
|
|
480
694
|
|
481
695
|
begin
|
482
696
|
zio = Zlib::GzipReader.new body_io
|
483
|
-
|
697
|
+
out_io = Tempfile.new 'mechanize-decode'
|
698
|
+
|
699
|
+
until zio.eof? do
|
700
|
+
out_io.write zio.read 16384
|
701
|
+
end
|
484
702
|
rescue Zlib::BufError, Zlib::GzipFile::Error
|
485
703
|
log.error('Unable to gunzip body, trying raw inflate') if log
|
486
704
|
body_io.rewind
|
487
705
|
body_io.read 10
|
488
|
-
|
706
|
+
|
707
|
+
out_io = inflate body_io, -Zlib::MAX_WBITS
|
489
708
|
rescue Zlib::DataError
|
490
709
|
log.error("unable to gunzip page: #{$!}") if log
|
491
710
|
''
|
@@ -496,14 +715,23 @@ class Mechanize::HTTP::Agent
|
|
496
715
|
raise Mechanize::Error,
|
497
716
|
"Unsupported Content-Encoding: #{response['Content-Encoding']}"
|
498
717
|
end
|
718
|
+
|
719
|
+
out_io.flush
|
720
|
+
out_io.rewind
|
721
|
+
|
722
|
+
out_io
|
499
723
|
end
|
500
724
|
|
501
725
|
def response_cookies response, uri, page
|
726
|
+
log = log() # reduce method calls
|
502
727
|
if Mechanize::Page === page and page.body =~ /Set-Cookie/n
|
503
728
|
page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
|
504
|
-
Mechanize::Cookie.parse(uri, meta['content']) { |c|
|
505
|
-
|
506
|
-
|
729
|
+
Mechanize::Cookie.parse(uri, meta['content'], log) { |c|
|
730
|
+
if @cookie_jar.add(uri, c)
|
731
|
+
log.debug("saved cookie: #{c}") if log
|
732
|
+
else
|
733
|
+
log.debug("rejected cookie: #{c}") if log
|
734
|
+
end
|
507
735
|
}
|
508
736
|
end
|
509
737
|
end
|
@@ -513,35 +741,27 @@ class Mechanize::HTTP::Agent
|
|
513
741
|
return unless header_cookies
|
514
742
|
|
515
743
|
header_cookies.each do |cookie|
|
516
|
-
Mechanize::Cookie.parse(uri, cookie) { |c|
|
517
|
-
|
518
|
-
|
744
|
+
Mechanize::Cookie.parse(uri, cookie, log) { |c|
|
745
|
+
if @cookie_jar.add(uri, c)
|
746
|
+
log.debug("saved cookie: #{c}") if log
|
747
|
+
else
|
748
|
+
log.debug("rejected cookie: #{c}") if log
|
749
|
+
end
|
519
750
|
}
|
520
751
|
end
|
521
752
|
end
|
522
753
|
|
523
754
|
def response_follow_meta_refresh response, uri, page, redirects
|
524
|
-
|
525
|
-
|
526
|
-
redirect_uri = nil
|
527
|
-
referer = page
|
755
|
+
delay, new_url = get_meta_refresh(response, uri, page)
|
756
|
+
return nil unless new_url
|
528
757
|
|
529
|
-
|
530
|
-
|
531
|
-
sleep redirect.node['delay'].to_f
|
532
|
-
referer = Mechanize::Page.new(nil, {'content-type'=>'text/html'})
|
533
|
-
elsif refresh = response['refresh']
|
534
|
-
delay, redirect_uri = Mechanize::Page::MetaRefresh.parse refresh, uri
|
535
|
-
raise Mechanize::Error, 'Invalid refresh http header' unless delay
|
536
|
-
raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
|
537
|
-
redirects + 1 > @redirection_limit
|
538
|
-
sleep delay.to_f
|
539
|
-
end
|
758
|
+
raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
|
759
|
+
redirects + 1 > @redirection_limit
|
540
760
|
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
761
|
+
sleep delay
|
762
|
+
@history.push(page, page.uri)
|
763
|
+
fetch new_url, :get, {}, [],
|
764
|
+
Mechanize::Page.new(nil, {'content-type'=>'text/html'}), redirects
|
545
765
|
end
|
546
766
|
|
547
767
|
def response_log response
|
@@ -555,18 +775,36 @@ class Mechanize::HTTP::Agent
|
|
555
775
|
end
|
556
776
|
end
|
557
777
|
|
558
|
-
def response_parse response,
|
559
|
-
@context.parse uri, response,
|
778
|
+
def response_parse response, body_io, uri
|
779
|
+
@context.parse uri, response, body_io
|
560
780
|
end
|
561
781
|
|
562
782
|
def response_read response, request
|
563
|
-
|
783
|
+
content_length = response.content_length
|
784
|
+
|
785
|
+
if content_length and content_length > @max_file_buffer then
|
786
|
+
body_io = Tempfile.new 'mechanize-raw'
|
787
|
+
body_io.binmode if defined? body_io.binmode
|
788
|
+
else
|
789
|
+
body_io = StringIO.new
|
790
|
+
end
|
791
|
+
|
564
792
|
body_io.set_encoding Encoding::BINARY if body_io.respond_to? :set_encoding
|
565
793
|
total = 0
|
566
794
|
|
567
795
|
begin
|
568
796
|
response.read_body { |part|
|
569
797
|
total += part.length
|
798
|
+
|
799
|
+
if StringIO === body_io and total > @max_file_buffer then
|
800
|
+
new_io = Tempfile.new 'mechanize-raw'
|
801
|
+
new_io.binmode if defined? binmode
|
802
|
+
|
803
|
+
new_io.write body_io.string
|
804
|
+
|
805
|
+
body_io = new_io
|
806
|
+
end
|
807
|
+
|
570
808
|
body_io.write(part)
|
571
809
|
log.debug("Read #{part.length} bytes (#{total} total)") if log
|
572
810
|
}
|
@@ -575,6 +813,7 @@ class Mechanize::HTTP::Agent
|
|
575
813
|
raise Mechanize::ResponseReadError.new(e, response, body_io)
|
576
814
|
end
|
577
815
|
|
816
|
+
body_io.flush
|
578
817
|
body_io.rewind
|
579
818
|
|
580
819
|
raise Mechanize::ResponseCodeError, response if
|
@@ -591,49 +830,37 @@ class Mechanize::HTTP::Agent
|
|
591
830
|
body_io
|
592
831
|
end
|
593
832
|
|
594
|
-
def response_redirect response, method, page, redirects
|
833
|
+
def response_redirect response, method, page, redirects, referer = current_page
|
595
834
|
case @redirect_ok
|
596
835
|
when true, :all
|
597
836
|
# shortcut
|
598
837
|
when false, nil
|
599
838
|
return page
|
600
839
|
when :permanent
|
601
|
-
return page
|
840
|
+
return page unless Net::HTTPMovedPermanently === response
|
602
841
|
end
|
603
842
|
|
604
843
|
log.info("follow redirect to: #{response['Location']}") if log
|
605
844
|
|
606
|
-
from_uri = page.uri
|
607
|
-
|
608
845
|
raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
|
609
846
|
redirects + 1 > @redirection_limit
|
610
847
|
|
611
848
|
redirect_method = method == :head ? :head : :get
|
612
849
|
|
613
|
-
|
614
|
-
redirects + 1)
|
615
|
-
|
850
|
+
from_uri = page.uri
|
616
851
|
@history.push(page, from_uri)
|
852
|
+
new_uri = from_uri + response['Location'].to_s
|
617
853
|
|
618
|
-
|
854
|
+
fetch new_uri, redirect_method, {}, [], referer, redirects + 1
|
619
855
|
end
|
620
856
|
|
621
|
-
|
622
|
-
referer)
|
623
|
-
raise Mechanize::ResponseCodeError, page unless @user || @password
|
624
|
-
raise Mechanize::ResponseCodeError, page if @auth_hash.has_key?(uri.host)
|
857
|
+
# :section: Robots
|
625
858
|
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
@digest = response['www-authenticate']
|
632
|
-
else
|
633
|
-
@auth_hash[uri.host] = :basic
|
634
|
-
end
|
635
|
-
|
636
|
-
fetch uri, request.method.downcase.to_sym, headers, params, referer
|
859
|
+
def get_robots(uri) # :nodoc:
|
860
|
+
fetch(uri).body
|
861
|
+
rescue Mechanize::ResponseCodeError => e
|
862
|
+
return '' if e.response_code == '404'
|
863
|
+
raise e
|
637
864
|
end
|
638
865
|
|
639
866
|
def robots= value
|
@@ -675,13 +902,58 @@ class Mechanize::HTTP::Agent
|
|
675
902
|
webrobots.reset(url)
|
676
903
|
end
|
677
904
|
|
905
|
+
def webrobots
|
906
|
+
@webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots))
|
907
|
+
end
|
908
|
+
|
909
|
+
# :section: SSL
|
910
|
+
|
911
|
+
def certificate
|
912
|
+
@http.certificate
|
913
|
+
end
|
914
|
+
|
915
|
+
# :section: Timeouts
|
916
|
+
|
917
|
+
# Sets the conection idle timeout for persistent connections
|
918
|
+
def idle_timeout= timeout
|
919
|
+
@idle_timeout = timeout
|
920
|
+
@http.idle_timeout = timeout if @http
|
921
|
+
end
|
922
|
+
|
923
|
+
# :section: Utility
|
924
|
+
|
925
|
+
def inflate compressed, window_bits = nil
|
926
|
+
inflate = Zlib::Inflate.new window_bits
|
927
|
+
out_io = Tempfile.new 'mechanize-decode'
|
928
|
+
|
929
|
+
until compressed.eof? do
|
930
|
+
out_io.write inflate.inflate compressed.read 1024
|
931
|
+
end
|
932
|
+
|
933
|
+
out_io.write inflate.finish
|
934
|
+
|
935
|
+
out_io
|
936
|
+
end
|
937
|
+
|
938
|
+
def log
|
939
|
+
@context.log
|
940
|
+
end
|
941
|
+
|
678
942
|
def set_http
|
679
943
|
@http = Net::HTTP::Persistent.new 'mechanize', @proxy_uri
|
680
944
|
|
681
945
|
@http.keep_alive = @keep_alive_time
|
946
|
+
@http.idle_timeout = @idle_timeout if @idle_timeout
|
947
|
+
@http.retry_change_requests = @retry_change_requests
|
682
948
|
|
683
949
|
@http.ca_file = @ca_file
|
950
|
+
@http.cert_store = @cert_store if @cert_store
|
684
951
|
@http.verify_callback = @verify_callback
|
952
|
+
@http.verify_mode = @verify_mode if @verify_mode
|
953
|
+
|
954
|
+
# update our cached value
|
955
|
+
@verify_mode = @http.verify_mode
|
956
|
+
@cert_store = @http.cert_store
|
685
957
|
|
686
958
|
if @cert and @key then
|
687
959
|
cert = if OpenSSL::X509::Certificate === @cert then
|
@@ -701,10 +973,26 @@ class Mechanize::HTTP::Agent
|
|
701
973
|
end
|
702
974
|
end
|
703
975
|
|
976
|
+
##
|
704
977
|
# Sets the proxy address, port, user, and password +addr+ should be a host,
|
705
|
-
# with no "http://"
|
978
|
+
# with no "http://", +port+ may be a port number, service name or port
|
979
|
+
# number string.
|
980
|
+
|
706
981
|
def set_proxy(addr, port, user = nil, pass = nil)
|
707
982
|
return unless addr and port
|
983
|
+
|
984
|
+
unless Integer === port then
|
985
|
+
begin
|
986
|
+
port = Socket.getservbyname port
|
987
|
+
rescue SocketError
|
988
|
+
begin
|
989
|
+
port = Integer port
|
990
|
+
rescue ArgumentError
|
991
|
+
raise ArgumentError, "invalid value for port: #{port.inspect}"
|
992
|
+
end
|
993
|
+
end
|
994
|
+
end
|
995
|
+
|
708
996
|
@proxy_uri = URI "http://#{addr}"
|
709
997
|
@proxy_uri.port = port
|
710
998
|
@proxy_uri.user = user if user
|
@@ -713,26 +1001,5 @@ class Mechanize::HTTP::Agent
|
|
713
1001
|
@proxy_uri
|
714
1002
|
end
|
715
1003
|
|
716
|
-
def user_agent= user_agent
|
717
|
-
@webrobots = nil if user_agent != @user_agent
|
718
|
-
@user_agent = user_agent
|
719
|
-
end
|
720
|
-
|
721
|
-
# Returns a visited page for the url passed in, otherwise nil
|
722
|
-
def visited_page url
|
723
|
-
@history.visited_page resolve url
|
724
|
-
end
|
725
|
-
|
726
|
-
def get_robots(uri) # :nodoc:
|
727
|
-
fetch(uri).body
|
728
|
-
rescue Mechanize::ResponseCodeError => e
|
729
|
-
return '' if e.response_code == '404'
|
730
|
-
raise e
|
731
|
-
end
|
732
|
-
|
733
|
-
def webrobots
|
734
|
-
@webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots))
|
735
|
-
end
|
736
|
-
|
737
1004
|
end
|
738
1005
|
|