diamond-mechanize 2.1 → 2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metadata +222 -167
- data/Rakefile +0 -49
- data/lib/mechanize/content_type_error.rb +0 -13
- data/lib/mechanize/cookie.rb +0 -232
- data/lib/mechanize/cookie_jar.rb +0 -194
- data/lib/mechanize/download.rb +0 -59
- data/lib/mechanize/element_matcher.rb +0 -36
- data/lib/mechanize/file.rb +0 -65
- data/lib/mechanize/file_connection.rb +0 -17
- data/lib/mechanize/file_request.rb +0 -26
- data/lib/mechanize/file_response.rb +0 -74
- data/lib/mechanize/file_saver.rb +0 -39
- data/lib/mechanize/form/button.rb +0 -6
- data/lib/mechanize/form/check_box.rb +0 -12
- data/lib/mechanize/form/field.rb +0 -54
- data/lib/mechanize/form/file_upload.rb +0 -21
- data/lib/mechanize/form/hidden.rb +0 -3
- data/lib/mechanize/form/image_button.rb +0 -19
- data/lib/mechanize/form/keygen.rb +0 -34
- data/lib/mechanize/form/multi_select_list.rb +0 -94
- data/lib/mechanize/form/option.rb +0 -50
- data/lib/mechanize/form/radio_button.rb +0 -55
- data/lib/mechanize/form/reset.rb +0 -3
- data/lib/mechanize/form/select_list.rb +0 -44
- data/lib/mechanize/form/submit.rb +0 -3
- data/lib/mechanize/form/text.rb +0 -3
- data/lib/mechanize/form/textarea.rb +0 -3
- data/lib/mechanize/form.rb +0 -543
- data/lib/mechanize/headers.rb +0 -23
- data/lib/mechanize/history.rb +0 -82
- data/lib/mechanize/http/agent.rb +0 -1004
- data/lib/mechanize/http/auth_challenge.rb +0 -59
- data/lib/mechanize/http/auth_realm.rb +0 -31
- data/lib/mechanize/http/content_disposition_parser.rb +0 -188
- data/lib/mechanize/http/www_authenticate_parser.rb +0 -155
- data/lib/mechanize/http.rb +0 -8
- data/lib/mechanize/monkey_patch.rb +0 -16
- data/lib/mechanize/page/base.rb +0 -7
- data/lib/mechanize/page/frame.rb +0 -27
- data/lib/mechanize/page/image.rb +0 -30
- data/lib/mechanize/page/label.rb +0 -20
- data/lib/mechanize/page/link.rb +0 -98
- data/lib/mechanize/page/meta_refresh.rb +0 -68
- data/lib/mechanize/page.rb +0 -440
- data/lib/mechanize/parser.rb +0 -173
- data/lib/mechanize/pluggable_parsers.rb +0 -144
- data/lib/mechanize/redirect_limit_reached_error.rb +0 -19
- data/lib/mechanize/redirect_not_get_or_head_error.rb +0 -21
- data/lib/mechanize/response_code_error.rb +0 -21
- data/lib/mechanize/response_read_error.rb +0 -27
- data/lib/mechanize/robots_disallowed_error.rb +0 -28
- data/lib/mechanize/test_case.rb +0 -663
- data/lib/mechanize/unauthorized_error.rb +0 -3
- data/lib/mechanize/unsupported_scheme_error.rb +0 -6
- data/lib/mechanize/util.rb +0 -101
- data/lib/mechanize.rb +0 -1079
- data/test/data/htpasswd +0 -1
- data/test/data/server.crt +0 -16
- data/test/data/server.csr +0 -12
- data/test/data/server.key +0 -15
- data/test/data/server.pem +0 -15
- data/test/htdocs/alt_text.html +0 -10
- data/test/htdocs/bad_form_test.html +0 -9
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/canonical_uri.html +0 -9
- data/test/htdocs/dir with spaces/foo.html +0 -1
- data/test/htdocs/empty_form.html +0 -6
- data/test/htdocs/file_upload.html +0 -26
- data/test/htdocs/find_link.html +0 -41
- data/test/htdocs/form_multi_select.html +0 -16
- data/test/htdocs/form_multival.html +0 -37
- data/test/htdocs/form_no_action.html +0 -18
- data/test/htdocs/form_no_input_name.html +0 -16
- data/test/htdocs/form_order_test.html +0 -11
- data/test/htdocs/form_select.html +0 -16
- data/test/htdocs/form_set_fields.html +0 -14
- data/test/htdocs/form_test.html +0 -188
- data/test/htdocs/frame_referer_test.html +0 -10
- data/test/htdocs/frame_test.html +0 -30
- data/test/htdocs/google.html +0 -13
- data/test/htdocs/index.html +0 -6
- data/test/htdocs/link with space.html +0 -5
- data/test/htdocs/meta_cookie.html +0 -11
- data/test/htdocs/no_title_test.html +0 -6
- data/test/htdocs/noindex.html +0 -9
- data/test/htdocs/rails_3_encoding_hack_form_test.html +0 -27
- data/test/htdocs/relative/tc_relative_links.html +0 -21
- data/test/htdocs/robots.html +0 -8
- data/test/htdocs/robots.txt +0 -2
- data/test/htdocs/tc_bad_charset.html +0 -9
- data/test/htdocs/tc_bad_links.html +0 -5
- data/test/htdocs/tc_base_link.html +0 -8
- data/test/htdocs/tc_blank_form.html +0 -11
- data/test/htdocs/tc_charset.html +0 -6
- data/test/htdocs/tc_checkboxes.html +0 -19
- data/test/htdocs/tc_encoded_links.html +0 -5
- data/test/htdocs/tc_field_precedence.html +0 -11
- data/test/htdocs/tc_follow_meta.html +0 -8
- data/test/htdocs/tc_form_action.html +0 -48
- data/test/htdocs/tc_links.html +0 -19
- data/test/htdocs/tc_meta_in_body.html +0 -9
- data/test/htdocs/tc_pretty_print.html +0 -17
- data/test/htdocs/tc_referer.html +0 -16
- data/test/htdocs/tc_relative_links.html +0 -19
- data/test/htdocs/tc_textarea.html +0 -23
- data/test/htdocs/test_click.html +0 -11
- data/test/htdocs/unusual______.html +0 -5
- data/test/test_mechanize.rb +0 -1164
- data/test/test_mechanize_cookie.rb +0 -451
- data/test/test_mechanize_cookie_jar.rb +0 -483
- data/test/test_mechanize_download.rb +0 -43
- data/test/test_mechanize_file.rb +0 -61
- data/test/test_mechanize_file_connection.rb +0 -21
- data/test/test_mechanize_file_request.rb +0 -19
- data/test/test_mechanize_file_saver.rb +0 -21
- data/test/test_mechanize_form.rb +0 -875
- data/test/test_mechanize_form_check_box.rb +0 -38
- data/test/test_mechanize_form_encoding.rb +0 -114
- data/test/test_mechanize_form_field.rb +0 -63
- data/test/test_mechanize_form_file_upload.rb +0 -20
- data/test/test_mechanize_form_image_button.rb +0 -12
- data/test/test_mechanize_form_keygen.rb +0 -32
- data/test/test_mechanize_form_multi_select_list.rb +0 -84
- data/test/test_mechanize_form_option.rb +0 -55
- data/test/test_mechanize_form_radio_button.rb +0 -78
- data/test/test_mechanize_form_select_list.rb +0 -76
- data/test/test_mechanize_form_textarea.rb +0 -52
- data/test/test_mechanize_headers.rb +0 -35
- data/test/test_mechanize_history.rb +0 -103
- data/test/test_mechanize_http_agent.rb +0 -1225
- data/test/test_mechanize_http_auth_challenge.rb +0 -39
- data/test/test_mechanize_http_auth_realm.rb +0 -49
- data/test/test_mechanize_http_content_disposition_parser.rb +0 -118
- data/test/test_mechanize_http_www_authenticate_parser.rb +0 -146
- data/test/test_mechanize_link.rb +0 -80
- data/test/test_mechanize_page.rb +0 -118
- data/test/test_mechanize_page_encoding.rb +0 -182
- data/test/test_mechanize_page_frame.rb +0 -16
- data/test/test_mechanize_page_link.rb +0 -390
- data/test/test_mechanize_page_meta_refresh.rb +0 -127
- data/test/test_mechanize_parser.rb +0 -289
- data/test/test_mechanize_pluggable_parser.rb +0 -52
- data/test/test_mechanize_redirect_limit_reached_error.rb +0 -24
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +0 -14
- data/test/test_mechanize_subclass.rb +0 -22
- data/test/test_mechanize_util.rb +0 -103
- data/test/test_multi_select.rb +0 -119
data/lib/mechanize/util.rb
DELETED
@@ -1,101 +0,0 @@
|
|
1
|
-
require 'cgi'
|
2
|
-
|
3
|
-
class Mechanize::Util
|
4
|
-
CODE_DIC = {
|
5
|
-
:JIS => "ISO-2022-JP",
|
6
|
-
:EUC => "EUC-JP",
|
7
|
-
:SJIS => "SHIFT_JIS",
|
8
|
-
:UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"}
|
9
|
-
|
10
|
-
# true if RUBY_VERSION is 1.9.0 or later
|
11
|
-
NEW_RUBY_ENCODING = RUBY_VERSION >= '1.9.0'
|
12
|
-
|
13
|
-
# contains encoding error classes to raise
|
14
|
-
ENCODING_ERRORS = if NEW_RUBY_ENCODING
|
15
|
-
[EncodingError]
|
16
|
-
else
|
17
|
-
[Iconv::InvalidEncoding, Iconv::IllegalSequence]
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.build_query_string(parameters, enc=nil)
|
21
|
-
parameters.map { |k,v|
|
22
|
-
# WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
|
23
|
-
[CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
|
24
|
-
}.compact.join('&')
|
25
|
-
end
|
26
|
-
|
27
|
-
# Converts string +s+ from +code+ to UTF-8.
|
28
|
-
def self.from_native_charset(s, code, ignore_encoding_error=false, log=nil)
|
29
|
-
return s unless s && code
|
30
|
-
return s unless Mechanize.html_parser == Nokogiri::HTML
|
31
|
-
|
32
|
-
begin
|
33
|
-
encode_to(code, s)
|
34
|
-
rescue *ENCODING_ERRORS => ex
|
35
|
-
log.debug("from_native_charset: #{ex.class}: form encoding: #{code.inspect} string: #{s}") if log
|
36
|
-
if ignore_encoding_error
|
37
|
-
s
|
38
|
-
else
|
39
|
-
raise
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
# inner convert method of Util.from_native_charset
|
45
|
-
def self.encode_to(encoding, str)
|
46
|
-
if NEW_RUBY_ENCODING
|
47
|
-
str.encode(encoding)
|
48
|
-
else
|
49
|
-
Iconv.conv(encoding.to_s, "UTF-8", str)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
private_class_method :encode_to
|
53
|
-
|
54
|
-
def self.html_unescape(s)
|
55
|
-
return s unless s
|
56
|
-
s.gsub(/&(\w+|#[0-9]+);/) { |match|
|
57
|
-
number = case match
|
58
|
-
when /&(\w+);/
|
59
|
-
Mechanize.html_parser::NamedCharacters[$1]
|
60
|
-
when /&#([0-9]+);/
|
61
|
-
$1.to_i
|
62
|
-
end
|
63
|
-
|
64
|
-
number ? ([number].pack('U') rescue match) : match
|
65
|
-
}
|
66
|
-
end
|
67
|
-
|
68
|
-
def self.detect_charset(src)
|
69
|
-
tmp = NKF.guess(src || "<html></html>")
|
70
|
-
if RUBY_VERSION >= "1.9.0"
|
71
|
-
enc = tmp.to_s.upcase
|
72
|
-
else
|
73
|
-
enc = NKF.constants.find{|c|
|
74
|
-
NKF.const_get(c) == tmp
|
75
|
-
}
|
76
|
-
enc = CODE_DIC[enc.intern]
|
77
|
-
end
|
78
|
-
enc || "ISO-8859-1"
|
79
|
-
end
|
80
|
-
|
81
|
-
def self.uri_escape str
|
82
|
-
@parser ||= begin
|
83
|
-
URI::Parser.new
|
84
|
-
rescue NameError
|
85
|
-
URI
|
86
|
-
end
|
87
|
-
|
88
|
-
@parser.escape str
|
89
|
-
end
|
90
|
-
|
91
|
-
def self.uri_unescape str
|
92
|
-
@parser ||= begin
|
93
|
-
URI::Parser.new
|
94
|
-
rescue NameError
|
95
|
-
URI
|
96
|
-
end
|
97
|
-
|
98
|
-
@parser.unescape str
|
99
|
-
end
|
100
|
-
|
101
|
-
end
|
data/lib/mechanize.rb
DELETED
@@ -1,1079 +0,0 @@
|
|
1
|
-
require 'fileutils'
|
2
|
-
require 'forwardable'
|
3
|
-
require 'iconv' if RUBY_VERSION < '1.9.2'
|
4
|
-
require 'mutex_m'
|
5
|
-
require 'net/http/digest_auth'
|
6
|
-
require 'net/http/persistent'
|
7
|
-
require 'nkf'
|
8
|
-
require 'nokogiri'
|
9
|
-
require 'openssl'
|
10
|
-
require 'pp'
|
11
|
-
require 'stringio'
|
12
|
-
require 'uri'
|
13
|
-
require 'webrick/httputils'
|
14
|
-
require 'zlib'
|
15
|
-
|
16
|
-
##
|
17
|
-
# The Mechanize library is used for automating interactions with a website. It
|
18
|
-
# can follow links and submit forms. Form fields can be populated and
|
19
|
-
# submitted. A history of URL's is maintained and can be queried.
|
20
|
-
#
|
21
|
-
# == Example
|
22
|
-
#
|
23
|
-
# require 'mechanize'
|
24
|
-
# require 'logger'
|
25
|
-
#
|
26
|
-
# agent = Mechanize.new
|
27
|
-
# agent.log = Logger.new "mech.log"
|
28
|
-
# agent.user_agent_alias = 'Mac Safari'
|
29
|
-
#
|
30
|
-
# page = agent.get "http://www.google.com/"
|
31
|
-
# search_form = page.form_with :name => "f"
|
32
|
-
# search_form.field_with(:name => "q").value = "Hello"
|
33
|
-
#
|
34
|
-
# search_results = agent.submit search_form
|
35
|
-
# puts search_results.body
|
36
|
-
|
37
|
-
class Mechanize
|
38
|
-
|
39
|
-
##
|
40
|
-
# The version of Mechanize you are using.
|
41
|
-
|
42
|
-
VERSION = '2.1'
|
43
|
-
|
44
|
-
##
|
45
|
-
# Base mechanize error class
|
46
|
-
|
47
|
-
class Error < RuntimeError
|
48
|
-
end
|
49
|
-
|
50
|
-
ruby_version = if RUBY_PATCHLEVEL >= 0 then
|
51
|
-
"#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
|
52
|
-
else
|
53
|
-
"#{RUBY_VERSION}dev#{RUBY_REVISION}"
|
54
|
-
end
|
55
|
-
|
56
|
-
##
|
57
|
-
# Supported User-Agent aliases for use with user_agent_alias=. The
|
58
|
-
# description in parenthesis is for informative purposes and is not part of
|
59
|
-
# the alias name.
|
60
|
-
#
|
61
|
-
# * Linux Firefox (3.6.1)
|
62
|
-
# * Linux Konqueror (3)
|
63
|
-
# * Linux Mozilla
|
64
|
-
# * Mac Firefox (3.6)
|
65
|
-
# * Mac Mozilla
|
66
|
-
# * Mac Safari (5)
|
67
|
-
# * Mac Safari 4
|
68
|
-
# * Mechanize (default)
|
69
|
-
# * Windows IE 6
|
70
|
-
# * Windows IE 7
|
71
|
-
# * Windows IE 8
|
72
|
-
# * Windows IE 9
|
73
|
-
# * Windows Mozilla
|
74
|
-
# * iPhone (3.0)
|
75
|
-
#
|
76
|
-
# Example:
|
77
|
-
#
|
78
|
-
# agent = Mechanize.new
|
79
|
-
# agent.user_agent_alias = 'Mac Safari'
|
80
|
-
|
81
|
-
AGENT_ALIASES = {
|
82
|
-
'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)",
|
83
|
-
'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
|
84
|
-
'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
|
85
|
-
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
86
|
-
'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
|
87
|
-
'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
|
88
|
-
'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
|
89
|
-
'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22',
|
90
|
-
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
91
|
-
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
92
|
-
'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
93
|
-
'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
|
94
|
-
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
|
95
|
-
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
|
96
|
-
}
|
97
|
-
|
98
|
-
def self.inherited(child) # :nodoc:
|
99
|
-
child.html_parser ||= html_parser
|
100
|
-
child.log ||= log
|
101
|
-
super
|
102
|
-
end
|
103
|
-
|
104
|
-
##
|
105
|
-
# Creates a new mechanize instance. If a block is given, the created
|
106
|
-
# instance is yielded to the block for setting up pre-connection state such
|
107
|
-
# as SSL parameters or proxies:
|
108
|
-
#
|
109
|
-
# agent = Mechanize.new do |a|
|
110
|
-
# a.proxy_host = 'proxy.example'
|
111
|
-
# a.proxy_port = 8080
|
112
|
-
# end
|
113
|
-
|
114
|
-
def initialize
|
115
|
-
@agent = Mechanize::HTTP::Agent.new
|
116
|
-
@agent.context = self
|
117
|
-
@log = nil
|
118
|
-
|
119
|
-
# attr_accessors
|
120
|
-
@agent.user_agent = AGENT_ALIASES['Mechanize']
|
121
|
-
@watch_for_set = nil
|
122
|
-
@history_added = nil
|
123
|
-
|
124
|
-
# attr_readers
|
125
|
-
@pluggable_parser = PluggableParser.new
|
126
|
-
|
127
|
-
@keep_alive_time = 0
|
128
|
-
|
129
|
-
# Proxy
|
130
|
-
@proxy_addr = nil
|
131
|
-
@proxy_port = nil
|
132
|
-
@proxy_user = nil
|
133
|
-
@proxy_pass = nil
|
134
|
-
|
135
|
-
@html_parser = self.class.html_parser
|
136
|
-
|
137
|
-
@default_encoding = nil
|
138
|
-
@force_default_encoding = false
|
139
|
-
|
140
|
-
yield self if block_given?
|
141
|
-
|
142
|
-
@agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
|
143
|
-
@agent.set_http
|
144
|
-
end
|
145
|
-
|
146
|
-
# :section: History
|
147
|
-
#
|
148
|
-
# Methods for navigating and controlling history
|
149
|
-
|
150
|
-
##
|
151
|
-
# Equivalent to the browser back button. Returns the previous page visited.
|
152
|
-
|
153
|
-
def back
|
154
|
-
@agent.history.pop
|
155
|
-
end
|
156
|
-
|
157
|
-
##
|
158
|
-
# Returns the latest page loaded by Mechanize
|
159
|
-
|
160
|
-
def current_page
|
161
|
-
@agent.current_page
|
162
|
-
end
|
163
|
-
|
164
|
-
alias page current_page
|
165
|
-
|
166
|
-
##
|
167
|
-
# The history of this mechanize run
|
168
|
-
|
169
|
-
def history
|
170
|
-
@agent.history
|
171
|
-
end
|
172
|
-
|
173
|
-
##
|
174
|
-
# Maximum number of items allowed in the history.
|
175
|
-
|
176
|
-
def max_history
|
177
|
-
@agent.history.max_size
|
178
|
-
end
|
179
|
-
|
180
|
-
##
|
181
|
-
# Sets the maximum number of items allowed in the history to +length+.
|
182
|
-
|
183
|
-
def max_history= length
|
184
|
-
@agent.history.max_size = length
|
185
|
-
end
|
186
|
-
|
187
|
-
##
|
188
|
-
# Returns a visited page for the +url+ passed in, otherwise nil
|
189
|
-
|
190
|
-
def visited? url
|
191
|
-
url = url.href if url.respond_to? :href
|
192
|
-
|
193
|
-
@agent.visited_page url
|
194
|
-
end
|
195
|
-
|
196
|
-
##
|
197
|
-
# Returns whether or not a url has been visited
|
198
|
-
|
199
|
-
alias visited_page visited?
|
200
|
-
|
201
|
-
# :section: Hooks
|
202
|
-
#
|
203
|
-
# Hooks into the operation of mechanize
|
204
|
-
|
205
|
-
##
|
206
|
-
# A list of hooks to call before reading response header 'content-encoding'.
|
207
|
-
#
|
208
|
-
# The hook is called with the agent making the request, the URI of the
|
209
|
-
# request, the response an IO containing the response body.
|
210
|
-
|
211
|
-
def content_encoding_hooks
|
212
|
-
@agent.content_encoding_hooks
|
213
|
-
end
|
214
|
-
|
215
|
-
##
|
216
|
-
# Callback which is invoked with the page that was added to history.
|
217
|
-
|
218
|
-
attr_accessor :history_added
|
219
|
-
|
220
|
-
##
|
221
|
-
# A list of hooks to call after retrieving a response. Hooks are called with
|
222
|
-
# the agent and the response returned.
|
223
|
-
|
224
|
-
def post_connect_hooks
|
225
|
-
@agent.post_connect_hooks
|
226
|
-
end
|
227
|
-
|
228
|
-
##
|
229
|
-
# A list of hooks to call before making a request. Hooks are called with
|
230
|
-
# the agent and the request to be performed.
|
231
|
-
|
232
|
-
def pre_connect_hooks
|
233
|
-
@agent.pre_connect_hooks
|
234
|
-
end
|
235
|
-
|
236
|
-
# :section: Requests
|
237
|
-
#
|
238
|
-
# Methods for making HTTP requests
|
239
|
-
|
240
|
-
##
|
241
|
-
# If the parameter is a string, finds the button or link with the
|
242
|
-
# value of the string on the current page and clicks it. Otherwise, clicks
|
243
|
-
# the Mechanize::Page::Link object passed in. Returns the page fetched.
|
244
|
-
|
245
|
-
def click link
|
246
|
-
case link
|
247
|
-
when Page::Link then
|
248
|
-
referer = link.page || current_page()
|
249
|
-
if @agent.robots
|
250
|
-
if (referer.is_a?(Page) and referer.parser.nofollow?) or
|
251
|
-
link.rel?('nofollow') then
|
252
|
-
raise RobotsDisallowedError.new(link.href)
|
253
|
-
end
|
254
|
-
end
|
255
|
-
if link.rel?('noreferrer')
|
256
|
-
href = @agent.resolve(link.href, link.page || current_page)
|
257
|
-
referer = Page.new(nil, {'content-type'=>'text/html'})
|
258
|
-
else
|
259
|
-
href = link.href
|
260
|
-
end
|
261
|
-
get href, [], referer
|
262
|
-
when String, Regexp then
|
263
|
-
if real_link = page.link_with(:text => link)
|
264
|
-
click real_link
|
265
|
-
else
|
266
|
-
button = nil
|
267
|
-
form = page.forms.find do |f|
|
268
|
-
button = f.button_with(:value => link)
|
269
|
-
button.is_a? Form::Submit
|
270
|
-
end
|
271
|
-
submit form, button if form
|
272
|
-
end
|
273
|
-
else
|
274
|
-
referer = current_page()
|
275
|
-
href = link.respond_to?(:href) ? link.href :
|
276
|
-
(link['href'] || link['src'])
|
277
|
-
get href, [], referer
|
278
|
-
end
|
279
|
-
end
|
280
|
-
|
281
|
-
##
|
282
|
-
# DELETE +uri+ with +query_params+, and setting +headers+:
|
283
|
-
#
|
284
|
-
# delete('http://example/', {'q' => 'foo'}, {})
|
285
|
-
|
286
|
-
def delete(uri, query_params = {}, headers = {})
|
287
|
-
page = @agent.fetch(uri, :delete, headers, query_params)
|
288
|
-
add_to_history(page)
|
289
|
-
page
|
290
|
-
end
|
291
|
-
|
292
|
-
##
|
293
|
-
# GET the +uri+ with the given request +parameters+, +referer+ and
|
294
|
-
# +headers+.
|
295
|
-
#
|
296
|
-
# The +referer+ may be a URI or a page.
|
297
|
-
|
298
|
-
def get(uri, parameters = [], referer = nil, headers = {})
|
299
|
-
method = :get
|
300
|
-
|
301
|
-
referer ||=
|
302
|
-
if uri.to_s =~ %r{\Ahttps?://}
|
303
|
-
Page.new(nil, {'content-type'=>'text/html'})
|
304
|
-
else
|
305
|
-
current_page || Page.new(nil, {'content-type'=>'text/html'})
|
306
|
-
end
|
307
|
-
|
308
|
-
# FIXME: Huge hack so that using a URI as a referer works. I need to
|
309
|
-
# refactor everything to pass around URIs but still support
|
310
|
-
# Mechanize::Page#base
|
311
|
-
unless Mechanize::Parser === referer then
|
312
|
-
referer = referer.is_a?(String) ?
|
313
|
-
Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
|
314
|
-
Page.new(referer, {'content-type' => 'text/html'})
|
315
|
-
end
|
316
|
-
|
317
|
-
# fetch the page
|
318
|
-
headers ||= {}
|
319
|
-
page = @agent.fetch uri, method, headers, parameters, referer
|
320
|
-
add_to_history(page)
|
321
|
-
yield page if block_given?
|
322
|
-
page
|
323
|
-
end
|
324
|
-
|
325
|
-
##
|
326
|
-
# GET +url+ and return only its contents
|
327
|
-
|
328
|
-
def get_file(url)
|
329
|
-
get(url).body
|
330
|
-
end
|
331
|
-
|
332
|
-
##
|
333
|
-
# HEAD +uri+ with +query_params+, and setting +headers+:
|
334
|
-
#
|
335
|
-
# head('http://example/', {'q' => 'foo'}, {})
|
336
|
-
|
337
|
-
def head(uri, query_params = {}, headers = {})
|
338
|
-
# fetch the page
|
339
|
-
page = @agent.fetch(uri, :head, headers, query_params)
|
340
|
-
yield page if block_given?
|
341
|
-
page
|
342
|
-
end
|
343
|
-
|
344
|
-
##
|
345
|
-
# POST to the given +uri+ with the given +query+. The query is specified by
|
346
|
-
# either a string, or a list of key-value pairs represented by a hash or an
|
347
|
-
# array of arrays.
|
348
|
-
#
|
349
|
-
# Examples:
|
350
|
-
# agent.post 'http://example.com/', "foo" => "bar"
|
351
|
-
#
|
352
|
-
# agent.post 'http://example.com/', [%w[foo bar]]
|
353
|
-
#
|
354
|
-
# agent.post('http://example.com/', "<message>hello</message>",
|
355
|
-
# 'Content-Type' => 'application/xml')
|
356
|
-
|
357
|
-
def post(uri, query={}, headers={})
|
358
|
-
return request_with_entity(:post, uri, query, headers) if String === query
|
359
|
-
|
360
|
-
node = {}
|
361
|
-
# Create a fake form
|
362
|
-
class << node
|
363
|
-
def search(*args); []; end
|
364
|
-
end
|
365
|
-
node['method'] = 'POST'
|
366
|
-
node['enctype'] = 'application/x-www-form-urlencoded'
|
367
|
-
|
368
|
-
form = Form.new(node)
|
369
|
-
|
370
|
-
query.each { |k, v|
|
371
|
-
if v.is_a?(IO)
|
372
|
-
form.enctype = 'multipart/form-data'
|
373
|
-
ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
|
374
|
-
ul.file_data = v.read
|
375
|
-
form.file_uploads << ul
|
376
|
-
else
|
377
|
-
form.fields << Form::Field.new({'name' => k.to_s},v)
|
378
|
-
end
|
379
|
-
}
|
380
|
-
post_form(uri, form, headers)
|
381
|
-
end
|
382
|
-
|
383
|
-
##
|
384
|
-
# PUT to +uri+ with +entity+, and setting +headers+:
|
385
|
-
#
|
386
|
-
# put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
|
387
|
-
|
388
|
-
def put(uri, entity, headers = {})
|
389
|
-
request_with_entity(:put, uri, entity, headers)
|
390
|
-
end
|
391
|
-
|
392
|
-
##
|
393
|
-
# Makes an HTTP request to +url+ using HTTP method +verb+. +entity+ is used
|
394
|
-
# as the request body, if allowed.
|
395
|
-
|
396
|
-
def request_with_entity(verb, uri, entity, headers = {})
|
397
|
-
cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
|
398
|
-
|
399
|
-
headers = {
|
400
|
-
'Content-Type' => 'application/octet-stream',
|
401
|
-
'Content-Length' => entity.size.to_s,
|
402
|
-
}.update headers
|
403
|
-
|
404
|
-
page = @agent.fetch uri, verb, headers, [entity], cur_page
|
405
|
-
add_to_history(page)
|
406
|
-
page
|
407
|
-
end
|
408
|
-
|
409
|
-
##
|
410
|
-
# Submits +form+ with an optional +button+.
|
411
|
-
#
|
412
|
-
# Without a button:
|
413
|
-
#
|
414
|
-
# page = agent.get('http://example.com')
|
415
|
-
# agent.submit(page.forms.first)
|
416
|
-
#
|
417
|
-
# With a button:
|
418
|
-
#
|
419
|
-
# agent.submit(page.forms.first, page.forms.first.buttons.first)
|
420
|
-
|
421
|
-
def submit(form, button=nil, headers={})
|
422
|
-
form.add_button_to_query(button) if button
|
423
|
-
|
424
|
-
case form.method.upcase
|
425
|
-
when 'POST'
|
426
|
-
post_form(form.action, form, headers)
|
427
|
-
when 'GET'
|
428
|
-
get(form.action.gsub(/\?[^\?]*$/, ''),
|
429
|
-
form.build_query,
|
430
|
-
form.page,
|
431
|
-
headers)
|
432
|
-
else
|
433
|
-
raise ArgumentError, "unsupported method: #{form.method.upcase}"
|
434
|
-
end
|
435
|
-
end
|
436
|
-
|
437
|
-
##
|
438
|
-
# Runs given block, then resets the page history as it was before. self is
|
439
|
-
# given as a parameter to the block. Returns the value of the block.
|
440
|
-
|
441
|
-
def transact
|
442
|
-
history_backup = @agent.history.dup
|
443
|
-
begin
|
444
|
-
yield self
|
445
|
-
ensure
|
446
|
-
@agent.history = history_backup
|
447
|
-
end
|
448
|
-
end
|
449
|
-
|
450
|
-
# :section: Settings
|
451
|
-
#
|
452
|
-
# Settings that adjust how mechanize makes HTTP requests including timeouts,
|
453
|
-
# keep-alives, compression, redirects and headers.
|
454
|
-
|
455
|
-
@html_parser = Nokogiri::HTML
|
456
|
-
|
457
|
-
class << self
|
458
|
-
|
459
|
-
##
|
460
|
-
# Default HTML parser for all mechanize instances
|
461
|
-
#
|
462
|
-
# Mechanize.html_parser = Nokogiri::XML
|
463
|
-
|
464
|
-
attr_accessor :html_parser
|
465
|
-
|
466
|
-
##
|
467
|
-
# Default logger for all mechanize instances
|
468
|
-
#
|
469
|
-
# Mechanize.log = Logger.new $stderr
|
470
|
-
|
471
|
-
attr_accessor :log
|
472
|
-
|
473
|
-
end
|
474
|
-
|
475
|
-
##
|
476
|
-
# A default encoding name used when parsing HTML parsing. When set it is
|
477
|
-
# used after any other encoding. The default is nil.
|
478
|
-
|
479
|
-
attr_accessor :default_encoding
|
480
|
-
|
481
|
-
##
|
482
|
-
# Overrides the encodings given by the HTTP server and the HTML page with
|
483
|
-
# the default_encoding when set to true.
|
484
|
-
|
485
|
-
attr_accessor :force_default_encoding
|
486
|
-
|
487
|
-
##
|
488
|
-
# The HTML parser to be used when parsing documents
|
489
|
-
|
490
|
-
attr_accessor :html_parser
|
491
|
-
|
492
|
-
##
|
493
|
-
# HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
|
494
|
-
# now uses net-http-persistent which only supports HTTP/1.1 persistent
|
495
|
-
# connections
|
496
|
-
|
497
|
-
attr_accessor :keep_alive_time
|
498
|
-
|
499
|
-
##
|
500
|
-
# The HTTP proxy address
|
501
|
-
|
502
|
-
attr_reader :proxy_addr
|
503
|
-
|
504
|
-
##
|
505
|
-
# The HTTP proxy password
|
506
|
-
|
507
|
-
attr_reader :proxy_pass
|
508
|
-
|
509
|
-
##
|
510
|
-
# The HTTP proxy port
|
511
|
-
|
512
|
-
attr_reader :proxy_port
|
513
|
-
|
514
|
-
##
|
515
|
-
# The HTTP proxy username
|
516
|
-
|
517
|
-
attr_reader :proxy_user
|
518
|
-
|
519
|
-
##
|
520
|
-
# Sets the user and password to be used for HTTP authentication.
|
521
|
-
|
522
|
-
def auth(user, password)
|
523
|
-
@agent.user = user
|
524
|
-
@agent.password = password
|
525
|
-
end
|
526
|
-
|
527
|
-
alias basic_auth auth
|
528
|
-
|
529
|
-
##
|
530
|
-
# Are If-Modified-Since conditional requests enabled?
|
531
|
-
|
532
|
-
def conditional_requests
|
533
|
-
@agent.conditional_requests
|
534
|
-
end
|
535
|
-
|
536
|
-
##
|
537
|
-
# Disables If-Modified-Since conditional requests (enabled by default)
|
538
|
-
|
539
|
-
def conditional_requests= enabled
|
540
|
-
@agent.conditional_requests = enabled
|
541
|
-
end
|
542
|
-
|
543
|
-
##
|
544
|
-
# A Mechanize::CookieJar which stores cookies
|
545
|
-
|
546
|
-
def cookie_jar
|
547
|
-
@agent.cookie_jar
|
548
|
-
end
|
549
|
-
|
550
|
-
##
|
551
|
-
# Replaces the cookie jar with +cookie_jar+
|
552
|
-
|
553
|
-
def cookie_jar= cookie_jar
|
554
|
-
@agent.cookie_jar = cookie_jar
|
555
|
-
end
|
556
|
-
|
557
|
-
##
|
558
|
-
# Returns a list of cookies stored in the cookie jar.
|
559
|
-
|
560
|
-
def cookies
|
561
|
-
@agent.cookie_jar.to_a
|
562
|
-
end
|
563
|
-
|
564
|
-
##
|
565
|
-
# Follow HTML meta refresh and HTTP Refresh headers. If set to +:anywhere+
|
566
|
-
# meta refresh tags outside of the head element will be followed.
|
567
|
-
|
568
|
-
def follow_meta_refresh
|
569
|
-
@agent.follow_meta_refresh
|
570
|
-
end
|
571
|
-
|
572
|
-
##
|
573
|
-
# Controls following of HTML meta refresh and HTTP Refresh headers in
|
574
|
-
# responses.
|
575
|
-
|
576
|
-
def follow_meta_refresh= follow
|
577
|
-
@agent.follow_meta_refresh = follow
|
578
|
-
end
|
579
|
-
|
580
|
-
##
|
581
|
-
# Follow an HTML meta refresh and HTTP Refresh headers that have no "url="
|
582
|
-
# in the content attribute.
|
583
|
-
#
|
584
|
-
# Defaults to false to prevent infinite refresh loops.
|
585
|
-
|
586
|
-
def follow_meta_refresh_self
|
587
|
-
@agent.follow_meta_refresh_self
|
588
|
-
end
|
589
|
-
|
590
|
-
##
|
591
|
-
# Alters the following of HTML meta refresh and HTTP Refresh headers that
|
592
|
-
# point to the same page.
|
593
|
-
|
594
|
-
def follow_meta_refresh_self= follow
|
595
|
-
@agent.follow_meta_refresh_self = follow
|
596
|
-
end
|
597
|
-
|
598
|
-
##
|
599
|
-
# Is gzip compression of responses enabled?
|
600
|
-
|
601
|
-
def gzip_enabled
|
602
|
-
@agent.gzip_enabled
|
603
|
-
end
|
604
|
-
|
605
|
-
##
|
606
|
-
# Disables HTTP/1.1 gzip compression (enabled by default)
|
607
|
-
|
608
|
-
def gzip_enabled=enabled
|
609
|
-
@agent.gzip_enabled = enabled
|
610
|
-
end
|
611
|
-
|
612
|
-
##
|
613
|
-
# Connections that have not been used in this many seconds will be reset.
|
614
|
-
|
615
|
-
def idle_timeout
|
616
|
-
@agent.idle_timeout
|
617
|
-
end
|
618
|
-
|
619
|
-
# Sets the idle timeout to +idle_timeout+. The default timeout is 5
|
620
|
-
# seconds. If you experience "too many connection resets", reducing this
|
621
|
-
# value may help.
|
622
|
-
|
623
|
-
def idle_timeout= idle_timeout
|
624
|
-
@agent.idle_timeout = idle_timeout
|
625
|
-
end
|
626
|
-
|
627
|
-
##
|
628
|
-
# Are HTTP/1.1 keep-alive connections enabled?
|
629
|
-
|
630
|
-
def keep_alive
|
631
|
-
@agent.keep_alive
|
632
|
-
end
|
633
|
-
|
634
|
-
##
|
635
|
-
# Disable HTTP/1.1 keep-alive connections if +enable+ is set to false. If
|
636
|
-
# you are experiencing "too many connection resets" errors setting this to
|
637
|
-
# false will eliminate them.
|
638
|
-
#
|
639
|
-
# You should first investigate reducing idle_timeout.
|
640
|
-
|
641
|
-
def keep_alive= enable
|
642
|
-
@agent.keep_alive = enable
|
643
|
-
end
|
644
|
-
|
645
|
-
##
|
646
|
-
# The current logger. If no logger has been set Mechanize.log is used.
|
647
|
-
|
648
|
-
def log
|
649
|
-
@log || Mechanize.log
|
650
|
-
end
|
651
|
-
|
652
|
-
##
|
653
|
-
# Sets the +logger+ used by this instance of mechanize
|
654
|
-
|
655
|
-
def log= logger
|
656
|
-
@log = logger
|
657
|
-
end
|
658
|
-
|
659
|
-
##
|
660
|
-
# Responses larger than this will be written to a Tempfile instead of stored
|
661
|
-
# in memory. The default is 10240 bytes
|
662
|
-
|
663
|
-
def max_file_buffer
|
664
|
-
@agent.max_file_buffer
|
665
|
-
end
|
666
|
-
|
667
|
-
##
|
668
|
-
# Sets the maximum size of a response body that will be stored in memory to
|
669
|
-
# +bytes+
|
670
|
-
|
671
|
-
def max_file_buffer= bytes
|
672
|
-
@agent.max_file_buffer = bytes
|
673
|
-
end
|
674
|
-
|
675
|
-
##
|
676
|
-
# Length of time to wait until a connection is opened in seconds
|
677
|
-
|
678
|
-
def open_timeout
|
679
|
-
@agent.open_timeout
|
680
|
-
end
|
681
|
-
|
682
|
-
##
|
683
|
-
# Sets the connection open timeout to +open_timeout+
|
684
|
-
|
685
|
-
def open_timeout= open_timeout
|
686
|
-
@agent.open_timeout = open_timeout
|
687
|
-
end
|
688
|
-
|
689
|
-
##
|
690
|
-
# Length of time to wait for data from the server
|
691
|
-
|
692
|
-
def read_timeout
|
693
|
-
@agent.read_timeout
|
694
|
-
end
|
695
|
-
|
696
|
-
##
|
697
|
-
# Sets the timeout for each chunk of data read from the server to
|
698
|
-
# +read_timeout+. A single request may read many chunks of data.
|
699
|
-
|
700
|
-
def read_timeout= read_timeout
|
701
|
-
@agent.read_timeout = read_timeout
|
702
|
-
end
|
703
|
-
|
704
|
-
##
|
705
|
-
# Controls how mechanize deals with redirects. The following values are
|
706
|
-
# allowed:
|
707
|
-
#
|
708
|
-
# :all, true:: All 3xx redirects are followed (default)
|
709
|
-
# :permanent:: Only 301 Moved Permanantly redirects are followed
|
710
|
-
# false:: No redirects are followed
|
711
|
-
|
712
|
-
def redirect_ok
|
713
|
-
@agent.redirect_ok
|
714
|
-
end
|
715
|
-
|
716
|
-
alias follow_redirect? redirect_ok
|
717
|
-
|
718
|
-
##
|
719
|
-
# Sets the mechanize redirect handling policy. See redirect_ok for allowed
|
720
|
-
# values
|
721
|
-
|
722
|
-
def redirect_ok= follow
|
723
|
-
@agent.redirect_ok = follow
|
724
|
-
end
|
725
|
-
|
726
|
-
##
|
727
|
-
# Maximum number of redirections to follow
|
728
|
-
|
729
|
-
def redirection_limit
|
730
|
-
@agent.redirection_limit
|
731
|
-
end
|
732
|
-
|
733
|
-
##
|
734
|
-
# Sets the maximum number of redirections to follow to +limit+
|
735
|
-
|
736
|
-
def redirection_limit= limit
|
737
|
-
@agent.redirection_limit = limit
|
738
|
-
end
|
739
|
-
|
740
|
-
##
|
741
|
-
# A hash of custom request headers that will be sent on every request
|
742
|
-
|
743
|
-
def request_headers
|
744
|
-
@agent.request_headers
|
745
|
-
end
|
746
|
-
|
747
|
-
##
|
748
|
-
# Replaces the custom request headers that will be sent on every request
|
749
|
-
# with +request_headers+
|
750
|
-
|
751
|
-
def request_headers= request_headers
|
752
|
-
@agent.request_headers = request_headers
|
753
|
-
end
|
754
|
-
|
755
|
-
##
|
756
|
-
# Retry POST and other non-idempotent requests. See RFC 2616 9.1.2.
|
757
|
-
|
758
|
-
def retry_change_requests
|
759
|
-
@agent.retry_change_requests
|
760
|
-
end
|
761
|
-
|
762
|
-
##
|
763
|
-
# When setting +retry_change_requests+ to true you are stating that, for all
|
764
|
-
# the URLs you access with mechanize, making POST and other non-idempotent
|
765
|
-
# requests is safe and will not cause data duplication or other harmful
|
766
|
-
# results.
|
767
|
-
#
|
768
|
-
# If you are experiencing "too many connection resets" errors you should
|
769
|
-
# instead investigate reducing the idle_timeout or disabling keep_alive
|
770
|
-
# connections.
|
771
|
-
|
772
|
-
def retry_change_requests= retry_change_requests
|
773
|
-
@agent.retry_change_requests = retry_change_requests
|
774
|
-
end
|
775
|
-
|
776
|
-
##
|
777
|
-
# Will <code>/robots.txt</code> files be obeyed?
|
778
|
-
|
779
|
-
def robots
|
780
|
-
@agent.robots
|
781
|
-
end
|
782
|
-
|
783
|
-
##
|
784
|
-
# When +enabled+ mechanize will retrieve and obey <code>robots.txt</code>
|
785
|
-
# files
|
786
|
-
|
787
|
-
def robots= enabled
|
788
|
-
@agent.robots = enabled
|
789
|
-
end
|
790
|
-
|
791
|
-
##
|
792
|
-
# The handlers for HTTP and other URI protocols.
|
793
|
-
|
794
|
-
def scheme_handlers
|
795
|
-
@agent.scheme_handlers
|
796
|
-
end
|
797
|
-
|
798
|
-
##
|
799
|
-
# Replaces the URI scheme handler table with +scheme_handlers+
|
800
|
-
|
801
|
-
def scheme_handlers= scheme_handlers
|
802
|
-
@agent.scheme_handlers = scheme_handlers
|
803
|
-
end
|
804
|
-
|
805
|
-
##
|
806
|
-
# The identification string for the client initiating a web request
|
807
|
-
|
808
|
-
def user_agent
|
809
|
-
@agent.user_agent
|
810
|
-
end
|
811
|
-
|
812
|
-
##
|
813
|
-
# Sets the User-Agent used by mechanize to +user_agent+. See also
|
814
|
-
# user_agent_alias
|
815
|
-
|
816
|
-
def user_agent= user_agent
|
817
|
-
@agent.user_agent = user_agent
|
818
|
-
end
|
819
|
-
|
820
|
-
##
|
821
|
-
# Set the user agent for the Mechanize object based on the given +name+.
|
822
|
-
#
|
823
|
-
# See also AGENT_ALIASES
|
824
|
-
|
825
|
-
def user_agent_alias= name
|
826
|
-
self.user_agent = AGENT_ALIASES[name] ||
|
827
|
-
raise(ArgumentError, "unknown agent alias #{name.inspect}")
|
828
|
-
end
|
829
|
-
|
830
|
-
##
|
831
|
-
# The value of watch_for_set is passed to pluggable parsers for retrieved
|
832
|
-
# content
|
833
|
-
|
834
|
-
attr_accessor :watch_for_set
|
835
|
-
|
836
|
-
# :section: SSL
|
837
|
-
#
|
838
|
-
# SSL settings for mechanize. These must be set in the block given to
|
839
|
-
# Mechanize.new
|
840
|
-
|
841
|
-
##
|
842
|
-
# Path to an OpenSSL server certificate file
|
843
|
-
|
844
|
-
def ca_file
|
845
|
-
@agent.ca_file
|
846
|
-
end
|
847
|
-
|
848
|
-
##
|
849
|
-
# Sets the certificate file used for SSL connections
|
850
|
-
|
851
|
-
def ca_file= ca_file
|
852
|
-
@agent.ca_file = ca_file
|
853
|
-
end
|
854
|
-
|
855
|
-
##
|
856
|
-
# An OpenSSL client certificate or the path to a certificate file.
|
857
|
-
|
858
|
-
def cert
|
859
|
-
@agent.cert
|
860
|
-
end
|
861
|
-
|
862
|
-
##
|
863
|
-
# Sets the OpenSSL client certificate +cert+ to the given path or
|
864
|
-
# certificate instance
|
865
|
-
|
866
|
-
def cert= cert
|
867
|
-
@agent.cert = cert
|
868
|
-
end
|
869
|
-
|
870
|
-
##
|
871
|
-
# An OpenSSL certificate store for verifying server certificates. This
|
872
|
-
# defaults to the default certificate store.
|
873
|
-
|
874
|
-
def cert_store
|
875
|
-
@agent.cert_store
|
876
|
-
end
|
877
|
-
|
878
|
-
##
|
879
|
-
# Sets the OpenSSL certificate store to +store+.
|
880
|
-
|
881
|
-
def cert_store= cert_store
|
882
|
-
@agent.cert_store = cert_store
|
883
|
-
end
|
884
|
-
|
885
|
-
##
|
886
|
-
# What is this?
|
887
|
-
#
|
888
|
-
# Why is it different from #cert?
|
889
|
-
|
890
|
-
def certificate # :nodoc:
|
891
|
-
@agent.certificate
|
892
|
-
end
|
893
|
-
|
894
|
-
##
|
895
|
-
# An OpenSSL private key or the path to a private key
|
896
|
-
|
897
|
-
def key
|
898
|
-
@agent.key
|
899
|
-
end
|
900
|
-
|
901
|
-
##
|
902
|
-
# Sets the OpenSSL client +key+ to the given path or key instance
|
903
|
-
|
904
|
-
def key= key
|
905
|
-
@agent.key = key
|
906
|
-
end
|
907
|
-
|
908
|
-
##
|
909
|
-
# OpenSSL client key password
|
910
|
-
|
911
|
-
def pass
|
912
|
-
@agent.pass
|
913
|
-
end
|
914
|
-
|
915
|
-
##
|
916
|
-
# Sets the client key password to +pass+
|
917
|
-
|
918
|
-
def pass= pass
|
919
|
-
@agent.pass = pass
|
920
|
-
end
|
921
|
-
|
922
|
-
##
|
923
|
-
# A callback for additional certificate verification. See
|
924
|
-
# OpenSSL::SSL::SSLContext#verify_callback
|
925
|
-
#
|
926
|
-
# The callback can be used for debugging or to ignore errors by always
|
927
|
-
# returning +true+. Specifying nil uses the default method that was valid
|
928
|
-
# when the SSLContext was created
|
929
|
-
|
930
|
-
def verify_callback
|
931
|
-
@agent.verify_callback
|
932
|
-
end
|
933
|
-
|
934
|
-
##
|
935
|
-
# Sets the OpenSSL certificate verification callback
|
936
|
-
|
937
|
-
def verify_callback= verify_callback
|
938
|
-
@agent.verify_callback = verify_callback
|
939
|
-
end
|
940
|
-
|
941
|
-
##
|
942
|
-
# the OpenSSL server certificate verification method. The default is
|
943
|
-
# OpenSSL::SSL::VERIFY_PEER and certificate verification uses the default
|
944
|
-
# system certificates. See also cert_store
|
945
|
-
|
946
|
-
def verify_mode
|
947
|
-
@agent.verify_mode
|
948
|
-
end
|
949
|
-
|
950
|
-
##
|
951
|
-
# Sets the OpenSSL server certificate verification method.
|
952
|
-
|
953
|
-
def verify_mode= verify_mode
|
954
|
-
@agent.verify_mode = verify_mode
|
955
|
-
end
|
956
|
-
|
957
|
-
# :section: Utilities
|
958
|
-
|
959
|
-
attr_reader :agent # :nodoc:
|
960
|
-
|
961
|
-
attr_reader :pluggable_parser # :nodoc:
|
962
|
-
|
963
|
-
##
|
964
|
-
# Parses the +body+ of the +response+ from +uri+ using the pluggable parser
|
965
|
-
# that matches its content type
|
966
|
-
|
967
|
-
def parse uri, response, body
|
968
|
-
content_type = nil
|
969
|
-
|
970
|
-
unless response['Content-Type'].nil?
|
971
|
-
data, = response['Content-Type'].split ';', 2
|
972
|
-
content_type, = data.downcase.split ',', 2 unless data.nil?
|
973
|
-
end
|
974
|
-
|
975
|
-
# Find our pluggable parser
|
976
|
-
parser_klass = @pluggable_parser.parser content_type
|
977
|
-
|
978
|
-
unless parser_klass <= Mechanize::Download then
|
979
|
-
body = case body
|
980
|
-
when IO, Tempfile, StringIO then
|
981
|
-
body.read
|
982
|
-
else
|
983
|
-
body
|
984
|
-
end
|
985
|
-
end
|
986
|
-
|
987
|
-
parser_klass.new uri, response, body, response.code do |parser|
|
988
|
-
parser.mech = self if parser.respond_to? :mech=
|
989
|
-
|
990
|
-
parser.watch_for_set = @watch_for_set if
|
991
|
-
@watch_for_set and parser.respond_to?(:watch_for_set=)
|
992
|
-
end
|
993
|
-
end
|
994
|
-
|
995
|
-
def pretty_print(q) # :nodoc:
|
996
|
-
q.object_group(self) {
|
997
|
-
q.breakable
|
998
|
-
q.pp cookie_jar
|
999
|
-
q.breakable
|
1000
|
-
q.pp current_page
|
1001
|
-
}
|
1002
|
-
end
|
1003
|
-
|
1004
|
-
##
|
1005
|
-
# Sets the proxy +address+ at +port+ with an optional +user+ and +password+
|
1006
|
-
|
1007
|
-
def set_proxy address, port, user = nil, password = nil
|
1008
|
-
@proxy_addr = address
|
1009
|
-
@proxy_port = port
|
1010
|
-
@proxy_user = user
|
1011
|
-
@proxy_pass = password
|
1012
|
-
|
1013
|
-
@agent.set_proxy address, port, user, password
|
1014
|
-
@agent.set_http
|
1015
|
-
end
|
1016
|
-
|
1017
|
-
private
|
1018
|
-
|
1019
|
-
##
|
1020
|
-
# Posts +form+ to +uri+
|
1021
|
-
|
1022
|
-
def post_form(uri, form, headers = {})
|
1023
|
-
cur_page = form.page || current_page ||
|
1024
|
-
Page.new(nil, {'content-type'=>'text/html'})
|
1025
|
-
|
1026
|
-
request_data = form.request_data
|
1027
|
-
|
1028
|
-
log.debug("query: #{ request_data.inspect }") if log
|
1029
|
-
|
1030
|
-
headers = {
|
1031
|
-
'Content-Type' => form.enctype,
|
1032
|
-
'Content-Length' => request_data.size.to_s,
|
1033
|
-
}.merge headers
|
1034
|
-
|
1035
|
-
# fetch the page
|
1036
|
-
page = @agent.fetch uri, :post, headers, [request_data], cur_page
|
1037
|
-
add_to_history(page)
|
1038
|
-
page
|
1039
|
-
end
|
1040
|
-
|
1041
|
-
##
|
1042
|
-
# Adds +page+ to the history
|
1043
|
-
|
1044
|
-
def add_to_history(page)
|
1045
|
-
@agent.history.push(page, @agent.resolve(page.uri))
|
1046
|
-
@history_added.call(page) if @history_added
|
1047
|
-
end
|
1048
|
-
|
1049
|
-
end
|
1050
|
-
|
1051
|
-
require 'mechanize/content_type_error'
|
1052
|
-
require 'mechanize/cookie'
|
1053
|
-
require 'mechanize/cookie_jar'
|
1054
|
-
require 'mechanize/parser'
|
1055
|
-
require 'mechanize/download'
|
1056
|
-
require 'mechanize/file'
|
1057
|
-
require 'mechanize/file_connection'
|
1058
|
-
require 'mechanize/file_request'
|
1059
|
-
require 'mechanize/file_response'
|
1060
|
-
require 'mechanize/form'
|
1061
|
-
require 'mechanize/history'
|
1062
|
-
require 'mechanize/http'
|
1063
|
-
require 'mechanize/http/agent'
|
1064
|
-
require 'mechanize/http/auth_challenge'
|
1065
|
-
require 'mechanize/http/auth_realm'
|
1066
|
-
require 'mechanize/http/content_disposition_parser'
|
1067
|
-
require 'mechanize/http/www_authenticate_parser'
|
1068
|
-
require 'mechanize/page'
|
1069
|
-
require 'mechanize/monkey_patch'
|
1070
|
-
require 'mechanize/pluggable_parsers'
|
1071
|
-
require 'mechanize/redirect_limit_reached_error'
|
1072
|
-
require 'mechanize/redirect_not_get_or_head_error'
|
1073
|
-
require 'mechanize/response_code_error'
|
1074
|
-
require 'mechanize/unauthorized_error'
|
1075
|
-
require 'mechanize/response_read_error'
|
1076
|
-
require 'mechanize/robots_disallowed_error'
|
1077
|
-
require 'mechanize/unsupported_scheme_error'
|
1078
|
-
require 'mechanize/util'
|
1079
|
-
|