mechanize 2.0.1 → 2.1.pre.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.rdoc +82 -0
- data/EXAMPLES.rdoc +1 -1
- data/FAQ.rdoc +9 -9
- data/Manifest.txt +35 -48
- data/README.rdoc +2 -1
- data/Rakefile +16 -3
- data/lib/mechanize.rb +809 -392
- data/lib/mechanize/content_type_error.rb +10 -11
- data/lib/mechanize/cookie.rb +193 -60
- data/lib/mechanize/cookie_jar.rb +39 -86
- data/lib/mechanize/download.rb +59 -0
- data/lib/mechanize/element_matcher.rb +1 -0
- data/lib/mechanize/file.rb +61 -76
- data/lib/mechanize/file_saver.rb +37 -35
- data/lib/mechanize/form.rb +475 -410
- data/lib/mechanize/form/button.rb +4 -7
- data/lib/mechanize/form/check_box.rb +10 -9
- data/lib/mechanize/form/field.rb +52 -42
- data/lib/mechanize/form/file_upload.rb +17 -19
- data/lib/mechanize/form/hidden.rb +3 -0
- data/lib/mechanize/form/image_button.rb +15 -16
- data/lib/mechanize/form/keygen.rb +34 -0
- data/lib/mechanize/form/multi_select_list.rb +20 -9
- data/lib/mechanize/form/option.rb +48 -47
- data/lib/mechanize/form/radio_button.rb +52 -45
- data/lib/mechanize/form/reset.rb +3 -0
- data/lib/mechanize/form/select_list.rb +10 -6
- data/lib/mechanize/form/submit.rb +3 -0
- data/lib/mechanize/form/text.rb +3 -0
- data/lib/mechanize/form/textarea.rb +3 -0
- data/lib/mechanize/headers.rb +17 -19
- data/lib/mechanize/history.rb +60 -61
- data/lib/mechanize/http.rb +5 -0
- data/lib/mechanize/http/agent.rb +485 -218
- data/lib/mechanize/http/auth_challenge.rb +59 -0
- data/lib/mechanize/http/auth_realm.rb +31 -0
- data/lib/mechanize/http/content_disposition_parser.rb +188 -0
- data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
- data/lib/mechanize/monkey_patch.rb +14 -35
- data/lib/mechanize/page.rb +34 -2
- data/lib/mechanize/page/base.rb +6 -7
- data/lib/mechanize/page/frame.rb +5 -5
- data/lib/mechanize/page/image.rb +23 -23
- data/lib/mechanize/page/label.rb +16 -16
- data/lib/mechanize/page/link.rb +16 -0
- data/lib/mechanize/page/meta_refresh.rb +19 -7
- data/lib/mechanize/parser.rb +173 -0
- data/lib/mechanize/pluggable_parsers.rb +126 -83
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -13
- data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -16
- data/lib/mechanize/response_code_error.rb +16 -17
- data/lib/mechanize/robots_disallowed_error.rb +22 -23
- data/lib/mechanize/test_case.rb +659 -0
- data/lib/mechanize/unauthorized_error.rb +3 -0
- data/lib/mechanize/unsupported_scheme_error.rb +4 -6
- data/lib/mechanize/util.rb +0 -12
- data/test/htdocs/form_order_test.html +11 -0
- data/test/htdocs/form_test.html +2 -2
- data/test/htdocs/tc_links.html +1 -0
- data/test/test_mechanize.rb +367 -59
- data/test/test_mechanize_cookie.rb +69 -4
- data/test/test_mechanize_cookie_jar.rb +200 -124
- data/test/test_mechanize_download.rb +43 -0
- data/test/test_mechanize_file.rb +53 -45
- data/test/{test_mechanize_file_response.rb → test_mechanize_file_connection.rb} +2 -2
- data/test/test_mechanize_file_request.rb +2 -2
- data/test/test_mechanize_file_saver.rb +21 -0
- data/test/test_mechanize_form.rb +345 -46
- data/test/test_mechanize_form_check_box.rb +5 -4
- data/test/test_mechanize_form_encoding.rb +10 -16
- data/test/test_mechanize_form_field.rb +45 -3
- data/test/test_mechanize_form_file_upload.rb +20 -0
- data/test/test_mechanize_form_image_button.rb +2 -2
- data/test/test_mechanize_form_keygen.rb +32 -0
- data/test/test_mechanize_form_multi_select_list.rb +84 -0
- data/test/test_mechanize_form_option.rb +55 -0
- data/test/test_mechanize_form_radio_button.rb +78 -0
- data/test/test_mechanize_form_select_list.rb +76 -0
- data/test/test_mechanize_form_textarea.rb +8 -7
- data/test/{test_headers.rb → test_mechanize_headers.rb} +4 -2
- data/test/test_mechanize_history.rb +103 -0
- data/test/test_mechanize_http_agent.rb +525 -17
- data/test/test_mechanize_http_auth_challenge.rb +39 -0
- data/test/test_mechanize_http_auth_realm.rb +49 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
- data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
- data/test/test_mechanize_link.rb +10 -14
- data/test/test_mechanize_page.rb +118 -0
- data/test/test_mechanize_page_encoding.rb +48 -13
- data/test/test_mechanize_page_frame.rb +16 -0
- data/test/test_mechanize_page_link.rb +27 -19
- data/test/test_mechanize_page_meta_refresh.rb +26 -14
- data/test/test_mechanize_parser.rb +289 -0
- data/test/test_mechanize_pluggable_parser.rb +52 -0
- data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +3 -7
- data/test/test_mechanize_subclass.rb +2 -2
- data/test/test_mechanize_util.rb +24 -13
- data/test/test_multi_select.rb +23 -22
- metadata +145 -114
- metadata.gz.sig +0 -0
- data/lib/mechanize/inspect.rb +0 -88
- data/test/helper.rb +0 -175
- data/test/htdocs/form_select_all.html +0 -16
- data/test/htdocs/form_select_none.html +0 -17
- data/test/htdocs/form_select_noopts.html +0 -10
- data/test/htdocs/iframe_test.html +0 -16
- data/test/htdocs/nofollow.html +0 -9
- data/test/htdocs/norobots.html +0 -8
- data/test/htdocs/rel_nofollow.html +0 -8
- data/test/htdocs/tc_base_images.html +0 -10
- data/test/htdocs/tc_images.html +0 -8
- data/test/htdocs/tc_no_attributes.html +0 -16
- data/test/htdocs/tc_radiobuttons.html +0 -17
- data/test/htdocs/test_bad_encoding.html +0 -52
- data/test/servlets.rb +0 -402
- data/test/ssl_server.rb +0 -48
- data/test/test_cookies.rb +0 -129
- data/test/test_form_action.rb +0 -52
- data/test/test_form_as_hash.rb +0 -59
- data/test/test_form_button.rb +0 -46
- data/test/test_frames.rb +0 -34
- data/test/test_history.rb +0 -118
- data/test/test_history_added.rb +0 -16
- data/test/test_html_unscape_forms.rb +0 -46
- data/test/test_if_modified_since.rb +0 -20
- data/test/test_images.rb +0 -19
- data/test/test_no_attributes.rb +0 -13
- data/test/test_option.rb +0 -18
- data/test/test_pluggable_parser.rb +0 -136
- data/test/test_post_form.rb +0 -37
- data/test/test_pretty_print.rb +0 -22
- data/test/test_radiobutton.rb +0 -75
- data/test/test_redirect_limit_reached.rb +0 -39
- data/test/test_referer.rb +0 -81
- data/test/test_relative_links.rb +0 -40
- data/test/test_request.rb +0 -13
- data/test/test_response_code.rb +0 -53
- data/test/test_robots.rb +0 -72
- data/test/test_save_file.rb +0 -48
- data/test/test_scheme.rb +0 -48
- data/test/test_select.rb +0 -119
- data/test/test_select_all.rb +0 -15
- data/test/test_select_none.rb +0 -15
- data/test/test_select_noopts.rb +0 -18
- data/test/test_set_fields.rb +0 -44
- data/test/test_ssl_server.rb +0 -20
data.tar.gz.sig
CHANGED
Binary file
|
data/CHANGELOG.rdoc
CHANGED
@@ -1,5 +1,87 @@
|
|
1
1
|
= Mechanize CHANGELOG
|
2
2
|
|
3
|
+
=== 2.1 / ??
|
4
|
+
|
5
|
+
* Deprecations
|
6
|
+
* Mechanize#get no longer accepts an options hash.
|
7
|
+
* Mechanize::Util::to_native_charset has been removed.
|
8
|
+
|
9
|
+
* Minor enhancements
|
10
|
+
* Mechanize now depends on net-http-persistent 2.3+. This new version
|
11
|
+
brings idle timeouts to help with the dreaded "too many connection resets"
|
12
|
+
issue when POSTing to a closed connection. Also, SSL connections will be
|
13
|
+
verified against the system certificate store by default. Issue #123
|
14
|
+
* Added Mechanize#retry_change_requests to allow mechanize to retry POST and
|
15
|
+
other non-idempotent requests when you know it is safe to do so. Issue
|
16
|
+
#123
|
17
|
+
* Mechanize can now stream files directly to disk without loading them into
|
18
|
+
memory first through Mechanize::Download, a pluggable parser for
|
19
|
+
downloading files.
|
20
|
+
|
21
|
+
All responses larger than Mechanize#max_file_buffer are downloaded to a
|
22
|
+
Tempfile. For backwards compatibility Mechanize::File subclasses still
|
23
|
+
load the response body into memory.
|
24
|
+
|
25
|
+
To force all unknown content types to download to disk instead of memory
|
26
|
+
set:
|
27
|
+
|
28
|
+
agent.pluggable_parser.default = Mechanize::Download
|
29
|
+
* Added Mechanize#content_encoding_hooks which allow handling of
|
30
|
+
non-standard content encodings like "agzip". Patch #125 by kitamomonga
|
31
|
+
* Added dom_class to elements and the element matcher like dom_id. Patch
|
32
|
+
#156 by Dan Hansen.
|
33
|
+
* Added support for the HTML5 keygen form element. See
|
34
|
+
http://dev.w3.org/html5/spec/Overview.html#the-keygen-element Patch #157
|
35
|
+
by Victor Costan.
|
36
|
+
* Mechanize no longer follows meta refreshes that have no "url=" in the
|
37
|
+
content attribute to avoid infinite loops. To follow a meta refresh to
|
38
|
+
the same page set Mechanize#follow_meta_refresh_self to true. Issue #134
|
39
|
+
by Jo Hund.
|
40
|
+
* Updated 'Mac Safari' User-Agent alias to Safari 5.1.1. 'Mac Safari 4' can
|
41
|
+
be used for the old 'Mac Safari' alias.
|
42
|
+
* When given multiple HTTP authentication options mechanize can better pick
|
43
|
+
the strongest method.
|
44
|
+
* Improvements to HTTP authorization:
|
45
|
+
* mechanize raises Mechanize::UnathorizedError for 401 responses which is
|
46
|
+
a sublcass of Mechanize::ResponseCodeError.
|
47
|
+
* Added support for NTLM authentication, but this has not been tested.
|
48
|
+
* Mechanize::Cookie.new accepts attributes in a hash.
|
49
|
+
* Mechanize::CookieJar#<<(cookie) (alias: add!) is added. Issue #139
|
50
|
+
* Different mechanize instances may now have different loggers. Issue #122
|
51
|
+
* Mechanize now accepts a proxy port as a service name or number string.
|
52
|
+
Issue #167
|
53
|
+
|
54
|
+
* Bug fixes
|
55
|
+
* Mechanize takes more care to avoid saving files with certain unsafe names.
|
56
|
+
You should still take care not to use mechanize to save files into your
|
57
|
+
home directory. Issue #163.
|
58
|
+
* Mechanize#cookie_jar= works again. Issue #126
|
59
|
+
* Mechanize now handles cookies just as most modern browsers do,
|
60
|
+
roughly based on RFC 6265.
|
61
|
+
* domain=.example.com (which is invalid) is considered identical to
|
62
|
+
domain=example.com.
|
63
|
+
* A cookie with domain=example.com is sent to host.sub.example.com
|
64
|
+
as well as host.example.com and example.com.
|
65
|
+
* A cookie with domain=TLD (no dots) is accepted and sent if the
|
66
|
+
host name is TLD, and rejected otherwise. To retain compatibility
|
67
|
+
and convention, host/domain names starting with "local" are exempt
|
68
|
+
from this rule.
|
69
|
+
* A cookie with no domain attribute is only sent to the original
|
70
|
+
host.
|
71
|
+
* A cookie with an Effective TLD is rejected based on the public
|
72
|
+
suffix list. (cf. http://publicsuffix.org/)
|
73
|
+
* "Secure" cookies are not sent via non-https connection.
|
74
|
+
* Subdomain match is not performed against an IP address.
|
75
|
+
* It is recommended that you clear out existing cookie jars for
|
76
|
+
regeneration because previously saved cookies may not have been
|
77
|
+
parsed correctly.
|
78
|
+
* The original Referer value persists on redirection. Issue #150
|
79
|
+
* Do not send a referer on a Refresh header based redirection.
|
80
|
+
* Fixed encoding error in tests when LANG=C. Patch #142 by jinschoi.
|
81
|
+
* The order of items in a form submission now match the DOM order. Patch
|
82
|
+
#129 by kitamomonga
|
83
|
+
* Fixed proxy example in EXAMPLE. Issue #146 by NielsKSchjoedt
|
84
|
+
|
3
85
|
=== 2.0.1 / 2011-06-28
|
4
86
|
|
5
87
|
Mechanize now uses minitest to avoid 1.9 vs 1.8 assertion availability in
|
data/EXAMPLES.rdoc
CHANGED
data/FAQ.rdoc
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
-
Q
|
2
|
-
protocol.rb:133:in `sysread': end of file reached (EOFError)
|
3
|
-
|
4
|
-
A: Some people have experienced an EOFError during normal mechanize usage.
|
5
|
-
Most of the time this occurs because the remote website claims to support
|
6
|
-
keep alives, but does not implement them correctly. Try turning off
|
7
|
-
keep alives on your mechanize object:
|
8
|
-
|
9
|
-
mech.keep_alive = false
|
1
|
+
Q:: Why do I keep getting an EOFError?
|
10
2
|
|
3
|
+
A:: For older versions of mechanize turning off keep_alive could help with the
|
4
|
+
problem, but mechanize now has more robust handling of persistent
|
5
|
+
connections.
|
11
6
|
|
7
|
+
Older versions of mechanize would raise an EOFError when a chunked body was
|
8
|
+
not terminated properly, a common bug of IIS servers. Since 2.0
|
9
|
+
Mechanize::ResponseReadError is raised containing the original response and
|
10
|
+
body read so far so if the server is broken you can still retrieve the
|
11
|
+
entire content.
|
data/Manifest.txt
CHANGED
@@ -16,6 +16,7 @@ lib/mechanize.rb
|
|
16
16
|
lib/mechanize/content_type_error.rb
|
17
17
|
lib/mechanize/cookie.rb
|
18
18
|
lib/mechanize/cookie_jar.rb
|
19
|
+
lib/mechanize/download.rb
|
19
20
|
lib/mechanize/element_matcher.rb
|
20
21
|
lib/mechanize/file.rb
|
21
22
|
lib/mechanize/file_connection.rb
|
@@ -27,16 +28,25 @@ lib/mechanize/form/button.rb
|
|
27
28
|
lib/mechanize/form/check_box.rb
|
28
29
|
lib/mechanize/form/field.rb
|
29
30
|
lib/mechanize/form/file_upload.rb
|
31
|
+
lib/mechanize/form/hidden.rb
|
30
32
|
lib/mechanize/form/image_button.rb
|
33
|
+
lib/mechanize/form/keygen.rb
|
31
34
|
lib/mechanize/form/multi_select_list.rb
|
32
35
|
lib/mechanize/form/option.rb
|
33
36
|
lib/mechanize/form/radio_button.rb
|
37
|
+
lib/mechanize/form/reset.rb
|
34
38
|
lib/mechanize/form/select_list.rb
|
39
|
+
lib/mechanize/form/submit.rb
|
40
|
+
lib/mechanize/form/text.rb
|
41
|
+
lib/mechanize/form/textarea.rb
|
35
42
|
lib/mechanize/headers.rb
|
36
43
|
lib/mechanize/history.rb
|
37
44
|
lib/mechanize/http.rb
|
38
45
|
lib/mechanize/http/agent.rb
|
39
|
-
lib/mechanize/
|
46
|
+
lib/mechanize/http/auth_challenge.rb
|
47
|
+
lib/mechanize/http/auth_realm.rb
|
48
|
+
lib/mechanize/http/content_disposition_parser.rb
|
49
|
+
lib/mechanize/http/www_authenticate_parser.rb
|
40
50
|
lib/mechanize/monkey_patch.rb
|
41
51
|
lib/mechanize/page.rb
|
42
52
|
lib/mechanize/page/base.rb
|
@@ -45,12 +55,15 @@ lib/mechanize/page/image.rb
|
|
45
55
|
lib/mechanize/page/label.rb
|
46
56
|
lib/mechanize/page/link.rb
|
47
57
|
lib/mechanize/page/meta_refresh.rb
|
58
|
+
lib/mechanize/parser.rb
|
48
59
|
lib/mechanize/pluggable_parsers.rb
|
49
60
|
lib/mechanize/redirect_limit_reached_error.rb
|
50
61
|
lib/mechanize/redirect_not_get_or_head_error.rb
|
51
62
|
lib/mechanize/response_code_error.rb
|
52
63
|
lib/mechanize/response_read_error.rb
|
53
64
|
lib/mechanize/robots_disallowed_error.rb
|
65
|
+
lib/mechanize/test_case.rb
|
66
|
+
lib/mechanize/unauthorized_error.rb
|
54
67
|
lib/mechanize/unsupported_scheme_error.rb
|
55
68
|
lib/mechanize/util.rb
|
56
69
|
test/data/htpasswd
|
@@ -58,7 +71,6 @@ test/data/server.crt
|
|
58
71
|
test/data/server.csr
|
59
72
|
test/data/server.key
|
60
73
|
test/data/server.pem
|
61
|
-
test/helper.rb
|
62
74
|
test/htdocs/alt_text.html
|
63
75
|
test/htdocs/bad_form_test.html
|
64
76
|
test/htdocs/button.jpg
|
@@ -71,31 +83,24 @@ test/htdocs/form_multi_select.html
|
|
71
83
|
test/htdocs/form_multival.html
|
72
84
|
test/htdocs/form_no_action.html
|
73
85
|
test/htdocs/form_no_input_name.html
|
86
|
+
test/htdocs/form_order_test.html
|
74
87
|
test/htdocs/form_select.html
|
75
|
-
test/htdocs/form_select_all.html
|
76
|
-
test/htdocs/form_select_none.html
|
77
|
-
test/htdocs/form_select_noopts.html
|
78
88
|
test/htdocs/form_set_fields.html
|
79
89
|
test/htdocs/form_test.html
|
80
90
|
test/htdocs/frame_referer_test.html
|
81
91
|
test/htdocs/frame_test.html
|
82
92
|
test/htdocs/google.html
|
83
|
-
test/htdocs/iframe_test.html
|
84
93
|
test/htdocs/index.html
|
85
94
|
test/htdocs/link with space.html
|
86
95
|
test/htdocs/meta_cookie.html
|
87
96
|
test/htdocs/no_title_test.html
|
88
|
-
test/htdocs/nofollow.html
|
89
97
|
test/htdocs/noindex.html
|
90
|
-
test/htdocs/norobots.html
|
91
98
|
test/htdocs/rails_3_encoding_hack_form_test.html
|
92
|
-
test/htdocs/rel_nofollow.html
|
93
99
|
test/htdocs/relative/tc_relative_links.html
|
94
100
|
test/htdocs/robots.html
|
95
101
|
test/htdocs/robots.txt
|
96
102
|
test/htdocs/tc_bad_charset.html
|
97
103
|
test/htdocs/tc_bad_links.html
|
98
|
-
test/htdocs/tc_base_images.html
|
99
104
|
test/htdocs/tc_base_link.html
|
100
105
|
test/htdocs/tc_blank_form.html
|
101
106
|
test/htdocs/tc_charset.html
|
@@ -104,69 +109,51 @@ test/htdocs/tc_encoded_links.html
|
|
104
109
|
test/htdocs/tc_field_precedence.html
|
105
110
|
test/htdocs/tc_follow_meta.html
|
106
111
|
test/htdocs/tc_form_action.html
|
107
|
-
test/htdocs/tc_images.html
|
108
112
|
test/htdocs/tc_links.html
|
109
113
|
test/htdocs/tc_meta_in_body.html
|
110
|
-
test/htdocs/tc_no_attributes.html
|
111
114
|
test/htdocs/tc_pretty_print.html
|
112
|
-
test/htdocs/tc_radiobuttons.html
|
113
115
|
test/htdocs/tc_referer.html
|
114
116
|
test/htdocs/tc_relative_links.html
|
115
117
|
test/htdocs/tc_textarea.html
|
116
|
-
test/htdocs/test_bad_encoding.html
|
117
118
|
test/htdocs/test_click.html
|
118
119
|
test/htdocs/unusual______.html
|
119
|
-
test/servlets.rb
|
120
|
-
test/ssl_server.rb
|
121
|
-
test/test_cookies.rb
|
122
|
-
test/test_form_action.rb
|
123
|
-
test/test_form_as_hash.rb
|
124
|
-
test/test_form_button.rb
|
125
|
-
test/test_frames.rb
|
126
|
-
test/test_headers.rb
|
127
|
-
test/test_history.rb
|
128
|
-
test/test_history_added.rb
|
129
|
-
test/test_html_unscape_forms.rb
|
130
|
-
test/test_if_modified_since.rb
|
131
|
-
test/test_images.rb
|
132
120
|
test/test_mechanize.rb
|
133
121
|
test/test_mechanize_cookie.rb
|
134
122
|
test/test_mechanize_cookie_jar.rb
|
123
|
+
test/test_mechanize_download.rb
|
135
124
|
test/test_mechanize_file.rb
|
125
|
+
test/test_mechanize_file_connection.rb
|
136
126
|
test/test_mechanize_file_request.rb
|
137
|
-
test/
|
127
|
+
test/test_mechanize_file_saver.rb
|
138
128
|
test/test_mechanize_form.rb
|
139
129
|
test/test_mechanize_form_check_box.rb
|
140
130
|
test/test_mechanize_form_encoding.rb
|
141
131
|
test/test_mechanize_form_field.rb
|
132
|
+
test/test_mechanize_form_file_upload.rb
|
142
133
|
test/test_mechanize_form_image_button.rb
|
134
|
+
test/test_mechanize_form_keygen.rb
|
135
|
+
test/test_mechanize_form_multi_select_list.rb
|
136
|
+
test/test_mechanize_form_option.rb
|
137
|
+
test/test_mechanize_form_radio_button.rb
|
138
|
+
test/test_mechanize_form_select_list.rb
|
143
139
|
test/test_mechanize_form_textarea.rb
|
140
|
+
test/test_mechanize_headers.rb
|
141
|
+
test/test_mechanize_history.rb
|
144
142
|
test/test_mechanize_http_agent.rb
|
143
|
+
test/test_mechanize_http_auth_challenge.rb
|
144
|
+
test/test_mechanize_http_auth_realm.rb
|
145
|
+
test/test_mechanize_http_content_disposition_parser.rb
|
146
|
+
test/test_mechanize_http_www_authenticate_parser.rb
|
145
147
|
test/test_mechanize_link.rb
|
148
|
+
test/test_mechanize_page.rb
|
146
149
|
test/test_mechanize_page_encoding.rb
|
150
|
+
test/test_mechanize_page_frame.rb
|
147
151
|
test/test_mechanize_page_link.rb
|
148
152
|
test/test_mechanize_page_meta_refresh.rb
|
153
|
+
test/test_mechanize_parser.rb
|
154
|
+
test/test_mechanize_pluggable_parser.rb
|
155
|
+
test/test_mechanize_redirect_limit_reached_error.rb
|
149
156
|
test/test_mechanize_redirect_not_get_or_head_error.rb
|
150
157
|
test/test_mechanize_subclass.rb
|
151
158
|
test/test_mechanize_util.rb
|
152
159
|
test/test_multi_select.rb
|
153
|
-
test/test_no_attributes.rb
|
154
|
-
test/test_option.rb
|
155
|
-
test/test_pluggable_parser.rb
|
156
|
-
test/test_post_form.rb
|
157
|
-
test/test_pretty_print.rb
|
158
|
-
test/test_radiobutton.rb
|
159
|
-
test/test_redirect_limit_reached.rb
|
160
|
-
test/test_referer.rb
|
161
|
-
test/test_relative_links.rb
|
162
|
-
test/test_request.rb
|
163
|
-
test/test_response_code.rb
|
164
|
-
test/test_robots.rb
|
165
|
-
test/test_save_file.rb
|
166
|
-
test/test_scheme.rb
|
167
|
-
test/test_select.rb
|
168
|
-
test/test_select_all.rb
|
169
|
-
test/test_select_none.rb
|
170
|
-
test/test_select_noopts.rb
|
171
|
-
test/test_set_fields.rb
|
172
|
-
test/test_ssl_server.rb
|
data/README.rdoc
CHANGED
@@ -43,6 +43,7 @@ Copyright (c) 2006-2011:
|
|
43
43
|
Copyright (c) 2011:
|
44
44
|
|
45
45
|
* {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net)
|
46
|
+
* {Akinori MUSHA}[http://blog.akinori.org] (knu@idaemons.org)
|
46
47
|
|
47
48
|
This library comes with a shameless plug for employing me
|
48
49
|
(Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language!
|
@@ -51,7 +52,7 @@ This library comes with a shameless plug for employing me
|
|
51
52
|
|
52
53
|
This library was heavily influenced by its namesake in the perl world. A big
|
53
54
|
thanks goes to Andy Lester (andy@petdance.com), the author of the original
|
54
|
-
perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize
|
55
|
+
perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize/]. Ruby Mechanize would not be around without you!
|
55
56
|
|
56
57
|
Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
|
57
58
|
who's helped out in various ways. Finally, thank you to the people using this
|
data/Rakefile
CHANGED
@@ -4,10 +4,11 @@ require 'hoe'
|
|
4
4
|
Hoe.plugin :git
|
5
5
|
Hoe.plugin :minitest
|
6
6
|
|
7
|
-
Hoe.spec 'mechanize' do
|
7
|
+
hoe = Hoe.spec 'mechanize' do
|
8
8
|
developer 'Eric Hodel', 'drbrain@segment7.net'
|
9
9
|
developer 'Aaron Patterson', 'aaronp@rubyforge.org'
|
10
10
|
developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
|
11
|
+
developer 'Akinori MUSHA', 'knu@idaemons.org'
|
11
12
|
|
12
13
|
self.readme_file = 'README.rdoc'
|
13
14
|
self.history_file = 'CHANGELOG.rdoc'
|
@@ -15,10 +16,12 @@ Hoe.spec 'mechanize' do
|
|
15
16
|
|
16
17
|
rdoc_locations << 'drbrain@rubyforge.org:/var/www/gforge-projects/mechanize/'
|
17
18
|
|
18
|
-
self.extra_deps << ['nokogiri', '~> 1.4']
|
19
|
-
self.extra_deps << ['net-http-persistent', '~> 1.8']
|
20
19
|
self.extra_deps << ['net-http-digest_auth', '~> 1.1', '>= 1.1.1']
|
20
|
+
self.extra_deps << ['net-http-persistent', '~> 2.3', '>= 2.3.2']
|
21
|
+
self.extra_deps << ['nokogiri', '~> 1.4']
|
22
|
+
self.extra_deps << ['ntlm-http', '~> 0.1', '>= 0.1.1']
|
21
23
|
self.extra_deps << ['webrobots', '~> 0.0', '>= 0.0.9']
|
24
|
+
self.extra_deps << ['domain_name', '~> 0.5', '>= 0.5.1']
|
22
25
|
|
23
26
|
self.spec_extras[:required_ruby_version] = '>= 1.8.7'
|
24
27
|
end
|
@@ -34,3 +37,13 @@ task('ssl_cert') do |p|
|
|
34
37
|
sh "mv server.key server.csr server.crt server.pem test/data/"
|
35
38
|
sh "rm server.key.org"
|
36
39
|
end
|
40
|
+
|
41
|
+
desc 'Install deps for travis to work around Hoe/RubyGems bug'
|
42
|
+
task 'travis_deps' do
|
43
|
+
hoe.spec.dependencies.each do |dep|
|
44
|
+
first_requirement = dep.requirement.requirements.first.join ' '
|
45
|
+
system('gem', 'install', dep.name, '-v', first_requirement,
|
46
|
+
'--no-rdoc', '--no-ri')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
data/lib/mechanize.rb
CHANGED
@@ -7,33 +7,42 @@ require 'net/http/persistent'
|
|
7
7
|
require 'nkf'
|
8
8
|
require 'nokogiri'
|
9
9
|
require 'openssl'
|
10
|
+
require 'pp'
|
10
11
|
require 'stringio'
|
11
12
|
require 'uri'
|
12
13
|
require 'webrick/httputils'
|
13
14
|
require 'zlib'
|
14
15
|
|
15
|
-
|
16
|
-
# The Mechanize library is used for automating
|
17
|
-
# can follow links
|
16
|
+
##
|
17
|
+
# The Mechanize library is used for automating interactions with a website. It
|
18
|
+
# can follow links and submit forms. Form fields can be populated and
|
18
19
|
# submitted. A history of URL's is maintained and can be queried.
|
19
20
|
#
|
20
21
|
# == Example
|
21
|
-
# require 'rubygems'
|
22
|
-
# require 'mechanize'
|
23
|
-
# require 'logger'
|
24
22
|
#
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
23
|
+
# require 'mechanize'
|
24
|
+
# require 'logger'
|
25
|
+
#
|
26
|
+
# agent = Mechanize.new
|
27
|
+
# agent.log = Logger.new "mech.log"
|
28
|
+
# agent.user_agent_alias = 'Mac Safari'
|
29
|
+
#
|
30
|
+
# page = agent.get "http://www.google.com/"
|
31
|
+
# search_form = page.form_with :name => "f"
|
32
|
+
# search_form.field_with(:name => "q").value = "Hello"
|
33
|
+
#
|
34
|
+
# search_results = agent.submit search_form
|
35
|
+
# puts search_results.body
|
36
|
+
|
32
37
|
class Mechanize
|
33
38
|
|
34
39
|
##
|
35
40
|
# The version of Mechanize you are using.
|
36
|
-
|
41
|
+
|
42
|
+
VERSION = '2.1'
|
43
|
+
|
44
|
+
##
|
45
|
+
# Base mechanize error class
|
37
46
|
|
38
47
|
class Error < RuntimeError
|
39
48
|
end
|
@@ -44,517 +53,916 @@ class Mechanize
|
|
44
53
|
"#{RUBY_VERSION}dev#{RUBY_REVISION}"
|
45
54
|
end
|
46
55
|
|
47
|
-
|
48
|
-
|
56
|
+
##
|
57
|
+
# Supported User-Agent aliases for use with user_agent_alias=. The
|
58
|
+
# description in parenthesis is for informative purposes and is not part of
|
59
|
+
# the alias name.
|
60
|
+
#
|
61
|
+
# * Linux Firefox (3.6.1)
|
62
|
+
# * Linux Konqueror (3)
|
63
|
+
# * Linux Mozilla
|
64
|
+
# * Mac Firefox (3.6)
|
65
|
+
# * Mac Mozilla
|
66
|
+
# * Mac Safari (5)
|
67
|
+
# * Mac Safari 4
|
68
|
+
# * Mechanize (default)
|
69
|
+
# * Windows IE 6
|
70
|
+
# * Windows IE 7
|
71
|
+
# * Windows IE 8
|
72
|
+
# * Windows IE 9
|
73
|
+
# * Windows Mozilla
|
74
|
+
# * iPhone (3.0)
|
75
|
+
#
|
76
|
+
# Example:
|
77
|
+
#
|
78
|
+
# agent = Mechanize.new
|
79
|
+
# agent.user_agent_alias = 'Mac Safari'
|
80
|
+
|
81
|
+
AGENT_ALIASES = {
|
82
|
+
'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)",
|
83
|
+
'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
|
84
|
+
'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
|
85
|
+
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
86
|
+
'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
|
87
|
+
'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
|
88
|
+
'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
|
89
|
+
'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22',
|
90
|
+
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
91
|
+
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
92
|
+
'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
93
|
+
'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
|
94
|
+
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
|
95
|
+
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
|
96
|
+
}
|
97
|
+
|
98
|
+
def self.inherited(child) # :nodoc:
|
99
|
+
child.html_parser ||= html_parser
|
100
|
+
child.log ||= log
|
101
|
+
super
|
102
|
+
end
|
103
|
+
|
104
|
+
##
|
105
|
+
# Creates a new mechanize instance. If a block is given, the created
|
106
|
+
# instance is yielded to the block for setting up pre-connection state such
|
107
|
+
# as SSL parameters or proxies:
|
108
|
+
#
|
109
|
+
# agent = Mechanize.new do |a|
|
110
|
+
# a.proxy_host = 'proxy.example'
|
111
|
+
# a.proxy_port = 8080
|
112
|
+
# end
|
113
|
+
|
114
|
+
def initialize
|
115
|
+
@agent = Mechanize::HTTP::Agent.new
|
116
|
+
@agent.context = self
|
117
|
+
@log = nil
|
118
|
+
|
119
|
+
# attr_accessors
|
120
|
+
@agent.user_agent = AGENT_ALIASES['Mechanize']
|
121
|
+
@watch_for_set = nil
|
122
|
+
@history_added = nil
|
123
|
+
|
124
|
+
# attr_readers
|
125
|
+
@pluggable_parser = PluggableParser.new
|
126
|
+
|
127
|
+
@keep_alive_time = 0
|
128
|
+
|
129
|
+
# Proxy
|
130
|
+
@proxy_addr = nil
|
131
|
+
@proxy_port = nil
|
132
|
+
@proxy_user = nil
|
133
|
+
@proxy_pass = nil
|
134
|
+
|
135
|
+
@html_parser = self.class.html_parser
|
136
|
+
|
137
|
+
@default_encoding = nil
|
138
|
+
@force_default_encoding = false
|
139
|
+
|
140
|
+
yield self if block_given?
|
141
|
+
|
142
|
+
@agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
|
143
|
+
@agent.set_http
|
144
|
+
end
|
145
|
+
|
146
|
+
# :section: History
|
147
|
+
#
|
148
|
+
# Methods for navigating and controlling history
|
149
|
+
|
150
|
+
##
|
151
|
+
# Equivalent to the browser back button. Returns the previous page visited.
|
152
|
+
|
153
|
+
def back
|
154
|
+
@agent.history.pop
|
155
|
+
end
|
156
|
+
|
157
|
+
##
|
158
|
+
# Returns the latest page loaded by Mechanize
|
159
|
+
|
160
|
+
def current_page
|
161
|
+
@agent.current_page
|
162
|
+
end
|
163
|
+
|
164
|
+
alias page current_page
|
165
|
+
|
166
|
+
##
|
167
|
+
# The history of this mechanize run
|
168
|
+
|
169
|
+
def history
|
170
|
+
@agent.history
|
171
|
+
end
|
172
|
+
|
173
|
+
##
|
174
|
+
# Maximum number of items allowed in the history.
|
175
|
+
|
176
|
+
def max_history
|
177
|
+
@agent.history.max_size
|
178
|
+
end
|
179
|
+
|
180
|
+
##
|
181
|
+
# Sets the maximum number of items allowed in the history to +length+.
|
182
|
+
|
183
|
+
def max_history= length
|
184
|
+
@agent.history.max_size = length
|
185
|
+
end
|
186
|
+
|
187
|
+
##
|
188
|
+
# Returns a visited page for the +url+ passed in, otherwise nil
|
189
|
+
|
190
|
+
def visited? url
|
191
|
+
url = url.href if url.respond_to? :href
|
192
|
+
|
193
|
+
@agent.visited_page url
|
194
|
+
end
|
195
|
+
|
196
|
+
##
|
197
|
+
# Returns whether or not a url has been visited
|
198
|
+
|
199
|
+
alias visited_page visited?
|
200
|
+
|
201
|
+
# :section: Hooks
|
202
|
+
#
|
203
|
+
# Hooks into the operation of mechanize
|
204
|
+
|
205
|
+
##
|
206
|
+
# A list of hooks to call before reading response header 'content-encoding'.
|
207
|
+
#
|
208
|
+
# The hook is called with the agent making the request, the URI of the
|
209
|
+
# request, the response an IO containing the response body.
|
210
|
+
|
211
|
+
def content_encoding_hooks
|
212
|
+
@agent.content_encoding_hooks
|
213
|
+
end
|
214
|
+
|
215
|
+
##
|
216
|
+
# Callback which is invoked with the page that was added to history.
|
217
|
+
|
218
|
+
attr_accessor :history_added
|
219
|
+
|
220
|
+
##
|
221
|
+
# A list of hooks to call after retrieving a response. Hooks are called with
|
222
|
+
# the agent and the response returned.
|
223
|
+
|
224
|
+
def post_connect_hooks
|
225
|
+
@agent.post_connect_hooks
|
226
|
+
end
|
227
|
+
|
228
|
+
##
|
229
|
+
# A list of hooks to call before making a request. Hooks are called with
|
230
|
+
# the agent and the request to be performed.
|
231
|
+
|
232
|
+
def pre_connect_hooks
|
233
|
+
@agent.pre_connect_hooks
|
234
|
+
end
|
235
|
+
|
236
|
+
# :section: Requests
|
237
|
+
#
|
238
|
+
# Methods for making HTTP requests
|
239
|
+
|
240
|
+
##
|
241
|
+
# If the parameter is a string, finds the button or link with the
|
242
|
+
# value of the string on the current page and clicks it. Otherwise, clicks
|
243
|
+
# the Mechanize::Page::Link object passed in. Returns the page fetched.
|
244
|
+
|
245
|
+
def click link
|
246
|
+
case link
|
247
|
+
when Page::Link then
|
248
|
+
referer = link.page || current_page()
|
249
|
+
if @agent.robots
|
250
|
+
if (referer.is_a?(Page) and referer.parser.nofollow?) or
|
251
|
+
link.rel?('nofollow') then
|
252
|
+
raise RobotsDisallowedError.new(link.href)
|
253
|
+
end
|
254
|
+
end
|
255
|
+
if link.rel?('noreferrer')
|
256
|
+
href = @agent.resolve(link.href, link.page || current_page)
|
257
|
+
referer = Page.new(nil, {'content-type'=>'text/html'})
|
258
|
+
else
|
259
|
+
href = link.href
|
260
|
+
end
|
261
|
+
get href, [], referer
|
262
|
+
when String, Regexp then
|
263
|
+
if real_link = page.link_with(:text => link)
|
264
|
+
click real_link
|
265
|
+
else
|
266
|
+
button = nil
|
267
|
+
form = page.forms.find do |f|
|
268
|
+
button = f.button_with(:value => link)
|
269
|
+
button.is_a? Form::Submit
|
270
|
+
end
|
271
|
+
submit form, button if form
|
272
|
+
end
|
273
|
+
else
|
274
|
+
referer = current_page()
|
275
|
+
href = link.respond_to?(:href) ? link.href :
|
276
|
+
(link['href'] || link['src'])
|
277
|
+
get href, [], referer
|
278
|
+
end
|
279
|
+
end
|
280
|
+
|
281
|
+
##
|
282
|
+
# DELETE +uri+ with +query_params+, and setting +headers+:
|
283
|
+
#
|
284
|
+
# delete('http://example/', {'q' => 'foo'}, {})
|
285
|
+
|
286
|
+
def delete(uri, query_params = {}, headers = {})
|
287
|
+
page = @agent.fetch(uri, :delete, headers, query_params)
|
288
|
+
add_to_history(page)
|
289
|
+
page
|
290
|
+
end
|
291
|
+
|
292
|
+
##
|
293
|
+
# GET the +uri+ with the given request +parameters+, +referer+ and
|
294
|
+
# +headers+.
|
295
|
+
#
|
296
|
+
# The +referer+ may be a URI or a page.
|
297
|
+
|
298
|
+
def get(uri, parameters = [], referer = nil, headers = {})
|
299
|
+
method = :get
|
300
|
+
|
301
|
+
referer ||=
|
302
|
+
if uri.to_s =~ %r{\Ahttps?://}
|
303
|
+
Page.new(nil, {'content-type'=>'text/html'})
|
304
|
+
else
|
305
|
+
current_page || Page.new(nil, {'content-type'=>'text/html'})
|
306
|
+
end
|
307
|
+
|
308
|
+
# FIXME: Huge hack so that using a URI as a referer works. I need to
|
309
|
+
# refactor everything to pass around URIs but still support
|
310
|
+
# Mechanize::Page#base
|
311
|
+
unless referer.is_a?(Mechanize::File)
|
312
|
+
referer = referer.is_a?(String) ?
|
313
|
+
Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
|
314
|
+
Page.new(referer, {'content-type' => 'text/html'})
|
315
|
+
end
|
316
|
+
|
317
|
+
# fetch the page
|
318
|
+
headers ||= {}
|
319
|
+
page = @agent.fetch uri, method, headers, parameters, referer
|
320
|
+
add_to_history(page)
|
321
|
+
yield page if block_given?
|
322
|
+
page
|
323
|
+
end
|
324
|
+
|
325
|
+
##
|
326
|
+
# GET +url+ and return only its contents
|
327
|
+
|
328
|
+
def get_file(url)
|
329
|
+
get(url).body
|
330
|
+
end
|
331
|
+
|
332
|
+
##
|
333
|
+
# HEAD +uri+ with +query_params+, and setting +headers+:
|
334
|
+
#
|
335
|
+
# head('http://example/', {'q' => 'foo'}, {})
|
336
|
+
|
337
|
+
def head(uri, query_params = {}, headers = {})
|
338
|
+
# fetch the page
|
339
|
+
page = @agent.fetch(uri, :head, headers, query_params)
|
340
|
+
yield page if block_given?
|
341
|
+
page
|
342
|
+
end
|
343
|
+
|
344
|
+
##
|
345
|
+
# POST to the given +uri+ with the given +query+. The query is specified by
|
346
|
+
# either a string, or a list of key-value pairs represented by a hash or an
|
347
|
+
# array of arrays.
|
348
|
+
#
|
349
|
+
# Examples:
|
350
|
+
# agent.post 'http://example.com/', "foo" => "bar"
|
351
|
+
#
|
352
|
+
# agent.post 'http://example.com/', [%w[foo bar]]
|
353
|
+
#
|
354
|
+
# agent.post('http://example.com/', "<message>hello</message>",
|
355
|
+
# 'Content-Type' => 'application/xml')
|
356
|
+
|
357
|
+
def post(uri, query={}, headers={})
|
358
|
+
return request_with_entity(:post, uri, query, headers) if String === query
|
359
|
+
|
360
|
+
node = {}
|
361
|
+
# Create a fake form
|
362
|
+
class << node
|
363
|
+
def search(*args); []; end
|
364
|
+
end
|
365
|
+
node['method'] = 'POST'
|
366
|
+
node['enctype'] = 'application/x-www-form-urlencoded'
|
367
|
+
|
368
|
+
form = Form.new(node)
|
369
|
+
|
370
|
+
query.each { |k, v|
|
371
|
+
if v.is_a?(IO)
|
372
|
+
form.enctype = 'multipart/form-data'
|
373
|
+
ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
|
374
|
+
ul.file_data = v.read
|
375
|
+
form.file_uploads << ul
|
376
|
+
else
|
377
|
+
form.fields << Form::Field.new({'name' => k.to_s},v)
|
378
|
+
end
|
379
|
+
}
|
380
|
+
post_form(uri, form, headers)
|
381
|
+
end
|
382
|
+
|
383
|
+
##
|
384
|
+
# PUT to +uri+ with +entity+, and setting +headers+:
|
385
|
+
#
|
386
|
+
# put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
|
387
|
+
|
388
|
+
def put(uri, entity, headers = {})
|
389
|
+
request_with_entity(:put, uri, entity, headers)
|
390
|
+
end
|
391
|
+
|
392
|
+
##
|
393
|
+
# Makes an HTTP request to +url+ using HTTP method +verb+. +entity+ is used
|
394
|
+
# as the request body, if allowed.
|
395
|
+
|
396
|
+
def request_with_entity(verb, uri, entity, headers = {})
|
397
|
+
cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
|
398
|
+
|
399
|
+
headers = {
|
400
|
+
'Content-Type' => 'application/octet-stream',
|
401
|
+
'Content-Length' => entity.size.to_s,
|
402
|
+
}.update headers
|
403
|
+
|
404
|
+
page = @agent.fetch uri, verb, headers, [entity], cur_page
|
405
|
+
add_to_history(page)
|
406
|
+
page
|
407
|
+
end
|
408
|
+
|
409
|
+
##
|
410
|
+
# Submits +form+ with an optional +button+.
|
411
|
+
#
|
412
|
+
# Without a button:
|
413
|
+
#
|
414
|
+
# page = agent.get('http://example.com')
|
415
|
+
# agent.submit(page.forms.first)
|
416
|
+
#
|
417
|
+
# With a button:
|
418
|
+
#
|
419
|
+
# agent.submit(page.forms.first, page.forms.first.buttons.first)
|
420
|
+
|
421
|
+
def submit(form, button=nil, headers={})
|
422
|
+
form.add_button_to_query(button) if button
|
423
|
+
|
424
|
+
case form.method.upcase
|
425
|
+
when 'POST'
|
426
|
+
post_form(form.action, form, headers)
|
427
|
+
when 'GET'
|
428
|
+
get(form.action.gsub(/\?[^\?]*$/, ''),
|
429
|
+
form.build_query,
|
430
|
+
form.page,
|
431
|
+
headers)
|
432
|
+
else
|
433
|
+
raise ArgumentError, "unsupported method: #{form.method.upcase}"
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
##
|
438
|
+
# Runs given block, then resets the page history as it was before. self is
|
439
|
+
# given as a parameter to the block. Returns the value of the block.
|
440
|
+
|
441
|
+
def transact
|
442
|
+
history_backup = @agent.history.dup
|
443
|
+
begin
|
444
|
+
yield self
|
445
|
+
ensure
|
446
|
+
@agent.history = history_backup
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
# :section: Settings
|
451
|
+
#
|
452
|
+
# Settings that adjust how mechanize makes HTTP requests including timeouts,
|
453
|
+
# keep-alives, compression, redirects and headers.
|
454
|
+
|
455
|
+
@html_parser = Nokogiri::HTML
|
456
|
+
|
457
|
+
class << self
|
458
|
+
|
459
|
+
##
|
460
|
+
# Default HTML parser for all mechanize instances
|
461
|
+
#
|
462
|
+
# Mechanize.html_parser = Nokogiri::XML
|
463
|
+
|
464
|
+
attr_accessor :html_parser
|
465
|
+
|
466
|
+
##
|
467
|
+
# Default logger for all mechanize instances
|
468
|
+
#
|
469
|
+
# Mechanize.log = Logger.new $stderr
|
470
|
+
|
471
|
+
attr_accessor :log
|
472
|
+
|
473
|
+
end
|
474
|
+
|
475
|
+
##
|
476
|
+
# A default encoding name used when parsing HTML parsing. When set it is
|
477
|
+
# used after any other encoding. The default is nil.
|
478
|
+
|
479
|
+
attr_accessor :default_encoding
|
480
|
+
|
481
|
+
##
|
482
|
+
# Overrides the encodings given by the HTTP server and the HTML page with
|
483
|
+
# the default_encoding when set to true.
|
484
|
+
|
485
|
+
attr_accessor :force_default_encoding
|
486
|
+
|
487
|
+
##
|
488
|
+
# The HTML parser to be used when parsing documents
|
489
|
+
|
490
|
+
attr_accessor :html_parser
|
49
491
|
|
492
|
+
##
|
50
493
|
# HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
|
51
494
|
# now uses net-http-persistent which only supports HTTP/1.1 persistent
|
52
495
|
# connections
|
496
|
+
|
53
497
|
attr_accessor :keep_alive_time
|
54
498
|
|
55
499
|
##
|
56
|
-
#
|
500
|
+
# The HTTP proxy address
|
57
501
|
|
58
|
-
|
59
|
-
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
60
|
-
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
61
|
-
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
|
62
|
-
'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
|
63
|
-
'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
|
64
|
-
'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
|
65
|
-
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
66
|
-
'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
|
67
|
-
'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
|
68
|
-
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
|
69
|
-
'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
|
70
|
-
}
|
502
|
+
attr_reader :proxy_addr
|
71
503
|
|
72
|
-
|
504
|
+
##
|
505
|
+
# The HTTP proxy password
|
73
506
|
|
74
|
-
|
75
|
-
@agent.cookie_jar
|
76
|
-
end
|
507
|
+
attr_reader :proxy_pass
|
77
508
|
|
78
|
-
|
79
|
-
|
80
|
-
end
|
509
|
+
##
|
510
|
+
# The HTTP proxy port
|
81
511
|
|
82
|
-
|
83
|
-
def open_timeout
|
84
|
-
@agent.open_timeout
|
85
|
-
end
|
512
|
+
attr_reader :proxy_port
|
86
513
|
|
87
|
-
|
88
|
-
|
89
|
-
end
|
514
|
+
##
|
515
|
+
# The HTTP proxy username
|
90
516
|
|
91
|
-
|
92
|
-
def read_timeout
|
93
|
-
@agent.read_timeout
|
94
|
-
end
|
517
|
+
attr_reader :proxy_user
|
95
518
|
|
96
|
-
|
97
|
-
|
98
|
-
end
|
519
|
+
##
|
520
|
+
# Sets the user and password to be used for HTTP authentication.
|
99
521
|
|
100
|
-
|
101
|
-
|
102
|
-
@agent.
|
522
|
+
def auth(user, password)
|
523
|
+
@agent.user = user
|
524
|
+
@agent.password = password
|
103
525
|
end
|
104
526
|
|
105
|
-
|
106
|
-
# content
|
107
|
-
attr_accessor :watch_for_set
|
527
|
+
alias basic_auth auth
|
108
528
|
|
109
|
-
|
110
|
-
|
111
|
-
@agent.ca_file
|
112
|
-
end
|
529
|
+
##
|
530
|
+
# Are If-Modified-Since conditional requests enabled?
|
113
531
|
|
114
|
-
def
|
115
|
-
@agent.
|
532
|
+
def conditional_requests
|
533
|
+
@agent.conditional_requests
|
116
534
|
end
|
117
535
|
|
118
|
-
|
119
|
-
|
120
|
-
end
|
536
|
+
##
|
537
|
+
# Disables If-Modified-Since conditional requests (enabled by default)
|
121
538
|
|
122
|
-
|
123
|
-
|
124
|
-
@agent.key
|
539
|
+
def conditional_requests= enabled
|
540
|
+
@agent.conditional_requests = enabled
|
125
541
|
end
|
126
542
|
|
127
|
-
|
128
|
-
|
543
|
+
##
|
544
|
+
# A Mechanize::CookieJar which stores cookies
|
545
|
+
|
546
|
+
def cookie_jar
|
547
|
+
@agent.cookie_jar
|
129
548
|
end
|
130
549
|
|
131
|
-
|
132
|
-
|
133
|
-
|
550
|
+
##
|
551
|
+
# Replaces the cookie jar with +cookie_jar+
|
552
|
+
|
553
|
+
def cookie_jar= cookie_jar
|
554
|
+
@agent.cookie_jar = cookie_jar
|
134
555
|
end
|
135
556
|
|
136
|
-
|
137
|
-
|
557
|
+
##
|
558
|
+
# Returns a list of cookies stored in the cookie jar.
|
559
|
+
|
560
|
+
def cookies
|
561
|
+
@agent.cookie_jar.to_a
|
138
562
|
end
|
139
563
|
|
140
|
-
|
141
|
-
|
142
|
-
|
564
|
+
##
|
565
|
+
# Follow HTML meta refresh and HTTP Refresh headers. If set to +:anywhere+
|
566
|
+
# meta refresh tags outside of the head element will be followed.
|
567
|
+
|
568
|
+
def follow_meta_refresh
|
569
|
+
@agent.follow_meta_refresh
|
143
570
|
end
|
144
571
|
|
145
|
-
|
146
|
-
|
572
|
+
##
|
573
|
+
# Controls following of HTML meta refresh and HTTP Refresh headers in
|
574
|
+
# responses.
|
575
|
+
|
576
|
+
def follow_meta_refresh= follow
|
577
|
+
@agent.follow_meta_refresh = follow
|
147
578
|
end
|
148
579
|
|
149
|
-
|
150
|
-
#
|
580
|
+
##
|
581
|
+
# Follow an HTML meta refresh and HTTP Refresh headers that have no "url="
|
582
|
+
# in the content attribute.
|
151
583
|
#
|
152
|
-
#
|
153
|
-
# :permanent:: Only 301 Moved Permanantly redirects are followed
|
154
|
-
# false:: No redirects are followed
|
584
|
+
# Defaults to false to prevent infinite refresh loops.
|
155
585
|
|
156
|
-
def
|
157
|
-
@agent.
|
586
|
+
def follow_meta_refresh_self
|
587
|
+
@agent.follow_meta_refresh_self
|
158
588
|
end
|
159
589
|
|
160
|
-
|
161
|
-
|
590
|
+
##
|
591
|
+
# Alters the following of HTML meta refresh and HTTP Refresh headers that
|
592
|
+
# point to the same page.
|
593
|
+
|
594
|
+
def follow_meta_refresh_self= follow
|
595
|
+
@agent.follow_meta_refresh_self = follow
|
162
596
|
end
|
163
597
|
|
598
|
+
##
|
599
|
+
# Is gzip compression of responses enabled?
|
600
|
+
|
164
601
|
def gzip_enabled
|
165
602
|
@agent.gzip_enabled
|
166
603
|
end
|
167
604
|
|
605
|
+
##
|
168
606
|
# Disables HTTP/1.1 gzip compression (enabled by default)
|
607
|
+
|
169
608
|
def gzip_enabled=enabled
|
170
609
|
@agent.gzip_enabled = enabled
|
171
610
|
end
|
172
611
|
|
173
|
-
|
174
|
-
|
175
|
-
end
|
612
|
+
##
|
613
|
+
# Connections that have not been used in this many seconds will be reset.
|
176
614
|
|
177
|
-
|
178
|
-
|
179
|
-
@agent.conditional_requests = enabled
|
615
|
+
def idle_timeout
|
616
|
+
@agent.idle_timeout
|
180
617
|
end
|
181
618
|
|
182
|
-
#
|
183
|
-
#
|
184
|
-
|
185
|
-
|
619
|
+
# Sets the idle timeout to +idle_timeout+. The default timeout is 5
|
620
|
+
# seconds. If you experience "too many connection resets", reducing this
|
621
|
+
# value may help.
|
622
|
+
|
623
|
+
def idle_timeout= idle_timeout
|
624
|
+
@agent.idle_timeout = idle_timeout
|
186
625
|
end
|
187
626
|
|
188
|
-
|
189
|
-
|
627
|
+
##
|
628
|
+
# Are HTTP/1.1 keep-alive connections enabled?
|
629
|
+
|
630
|
+
def keep_alive
|
631
|
+
@agent.keep_alive
|
190
632
|
end
|
191
633
|
|
192
|
-
|
193
|
-
#
|
634
|
+
##
|
635
|
+
# Disable HTTP/1.1 keep-alive connections if +enable+ is set to false. If
|
636
|
+
# you are experiencing "too many connection resets" errors setting this to
|
637
|
+
# false will eliminate them.
|
194
638
|
#
|
195
|
-
#
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
@agent.verify_callback
|
639
|
+
# You should first investigate reducing idle_timeout.
|
640
|
+
|
641
|
+
def keep_alive= enable
|
642
|
+
@agent.keep_alive = enable
|
200
643
|
end
|
201
644
|
|
202
|
-
|
203
|
-
|
645
|
+
##
|
646
|
+
# The current logger. If no logger has been set Mechanize.log is used.
|
647
|
+
|
648
|
+
def log
|
649
|
+
@log || Mechanize.log
|
204
650
|
end
|
205
651
|
|
206
|
-
|
652
|
+
##
|
653
|
+
# Sets the +logger+ used by this instance of mechanize
|
207
654
|
|
208
|
-
def
|
209
|
-
@
|
655
|
+
def log= logger
|
656
|
+
@log = logger
|
210
657
|
end
|
211
658
|
|
212
|
-
|
213
|
-
|
659
|
+
##
|
660
|
+
# Responses larger than this will be written to a Tempfile instead of stored
|
661
|
+
# in memory. The default is 10240 bytes
|
662
|
+
|
663
|
+
def max_file_buffer
|
664
|
+
@agent.max_file_buffer
|
214
665
|
end
|
215
666
|
|
216
|
-
|
217
|
-
|
667
|
+
##
|
668
|
+
# Sets the maximum size of a response body that will be stored in memory to
|
669
|
+
# +bytes+
|
670
|
+
|
671
|
+
def max_file_buffer= bytes
|
672
|
+
@agent.max_file_buffer = bytes
|
218
673
|
end
|
219
674
|
|
220
|
-
|
221
|
-
|
675
|
+
##
|
676
|
+
# Length of time to wait until a connection is opened in seconds
|
677
|
+
|
678
|
+
def open_timeout
|
679
|
+
@agent.open_timeout
|
222
680
|
end
|
223
681
|
|
224
|
-
|
225
|
-
|
226
|
-
|
682
|
+
##
|
683
|
+
# Sets the connection open timeout to +open_timeout+
|
684
|
+
|
685
|
+
def open_timeout= open_timeout
|
686
|
+
@agent.open_timeout = open_timeout
|
227
687
|
end
|
228
688
|
|
229
|
-
|
230
|
-
|
689
|
+
##
|
690
|
+
# Length of time to wait for data from the server
|
691
|
+
|
692
|
+
def read_timeout
|
693
|
+
@agent.read_timeout
|
231
694
|
end
|
232
695
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
attr_reader :proxy_port
|
237
|
-
attr_reader :proxy_user
|
696
|
+
##
|
697
|
+
# Sets the timeout for each chunk of data read from the server to
|
698
|
+
# +read_timeout+. A single request may read many chunks of data.
|
238
699
|
|
239
|
-
|
240
|
-
|
700
|
+
def read_timeout= read_timeout
|
701
|
+
@agent.read_timeout = read_timeout
|
702
|
+
end
|
241
703
|
|
242
|
-
|
704
|
+
##
|
705
|
+
# Controls how mechanize deals with redirects. The following values are
|
706
|
+
# allowed:
|
707
|
+
#
|
708
|
+
# :all, true:: All 3xx redirects are followed (default)
|
709
|
+
# :permanent:: Only 301 Moved Permanantly redirects are followed
|
710
|
+
# false:: No redirects are followed
|
243
711
|
|
244
|
-
def
|
245
|
-
@agent.
|
712
|
+
def redirect_ok
|
713
|
+
@agent.redirect_ok
|
246
714
|
end
|
247
715
|
|
248
|
-
|
716
|
+
alias follow_redirect? redirect_ok
|
249
717
|
|
250
|
-
|
251
|
-
# the
|
718
|
+
##
|
719
|
+
# Sets the mechanize redirect handling policy. See redirect_ok for allowed
|
720
|
+
# values
|
252
721
|
|
253
|
-
def
|
254
|
-
@agent.
|
722
|
+
def redirect_ok= follow
|
723
|
+
@agent.redirect_ok = follow
|
255
724
|
end
|
256
725
|
|
257
|
-
|
258
|
-
#
|
726
|
+
##
|
727
|
+
# Maximum number of redirections to follow
|
259
728
|
|
260
|
-
def
|
261
|
-
@agent.
|
729
|
+
def redirection_limit
|
730
|
+
@agent.redirection_limit
|
262
731
|
end
|
263
732
|
|
264
|
-
|
733
|
+
##
|
734
|
+
# Sets the maximum number of redirections to follow to +limit+
|
265
735
|
|
266
|
-
|
267
|
-
|
268
|
-
|
736
|
+
def redirection_limit= limit
|
737
|
+
@agent.redirection_limit = limit
|
738
|
+
end
|
269
739
|
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
740
|
+
##
|
741
|
+
# A hash of custom request headers that will be sent on every request
|
742
|
+
|
743
|
+
def request_headers
|
744
|
+
@agent.request_headers
|
275
745
|
end
|
276
746
|
|
277
|
-
|
278
|
-
#
|
747
|
+
##
|
748
|
+
# Replaces the custom request headers that will be sent on every request
|
749
|
+
# with +request_headers+
|
279
750
|
|
280
|
-
|
751
|
+
def request_headers= request_headers
|
752
|
+
@agent.request_headers = request_headers
|
753
|
+
end
|
281
754
|
|
282
|
-
|
283
|
-
#
|
284
|
-
attr_accessor :force_default_encoding
|
755
|
+
##
|
756
|
+
# Retry POST and other non-idempotent requests. See RFC 2616 9.1.2.
|
285
757
|
|
286
|
-
def
|
287
|
-
@agent
|
288
|
-
|
758
|
+
def retry_change_requests
|
759
|
+
@agent.retry_change_requests
|
760
|
+
end
|
289
761
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
762
|
+
##
|
763
|
+
# When setting +retry_change_requests+ to true you are stating that, for all
|
764
|
+
# the URLs you access with mechanize, making POST and other non-idempotent
|
765
|
+
# requests is safe and will not cause data duplication or other harmful
|
766
|
+
# results.
|
767
|
+
#
|
768
|
+
# If you are experiencing "too many connection resets" errors you should
|
769
|
+
# instead investigate reducing the idle_timeout or disabling keep_alive
|
770
|
+
# connections.
|
294
771
|
|
295
|
-
|
296
|
-
@
|
772
|
+
def retry_change_requests= retry_change_requests
|
773
|
+
@agent.retry_change_requests = retry_change_requests
|
774
|
+
end
|
297
775
|
|
298
|
-
|
299
|
-
|
776
|
+
##
|
777
|
+
# Will <code>/robots.txt</code> files be obeyed?
|
300
778
|
|
301
|
-
|
302
|
-
@
|
303
|
-
|
304
|
-
@proxy_user = nil
|
305
|
-
@proxy_pass = nil
|
779
|
+
def robots
|
780
|
+
@agent.robots
|
781
|
+
end
|
306
782
|
|
307
|
-
|
783
|
+
##
|
784
|
+
# When +enabled+ mechanize will retrieve and obey <code>robots.txt</code>
|
785
|
+
# files
|
308
786
|
|
309
|
-
|
310
|
-
@
|
787
|
+
def robots= enabled
|
788
|
+
@agent.robots = enabled
|
789
|
+
end
|
311
790
|
|
312
|
-
|
791
|
+
##
|
792
|
+
# The handlers for HTTP and other URI protocols.
|
313
793
|
|
314
|
-
|
315
|
-
@agent.
|
794
|
+
def scheme_handlers
|
795
|
+
@agent.scheme_handlers
|
316
796
|
end
|
317
797
|
|
318
|
-
|
319
|
-
|
798
|
+
##
|
799
|
+
# Replaces the URI scheme handler table with +scheme_handlers+
|
800
|
+
|
801
|
+
def scheme_handlers= scheme_handlers
|
802
|
+
@agent.scheme_handlers = scheme_handlers
|
320
803
|
end
|
321
804
|
|
322
|
-
|
323
|
-
|
805
|
+
##
|
806
|
+
# The identification string for the client initiating a web request
|
807
|
+
|
808
|
+
def user_agent
|
809
|
+
@agent.user_agent
|
324
810
|
end
|
325
811
|
|
326
|
-
|
327
|
-
|
812
|
+
##
|
813
|
+
# Sets the User-Agent used by mechanize to +user_agent+. See also
|
814
|
+
# user_agent_alias
|
328
815
|
|
329
816
|
def user_agent= user_agent
|
330
817
|
@agent.user_agent = user_agent
|
331
818
|
end
|
332
819
|
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
end
|
820
|
+
##
|
821
|
+
# Set the user agent for the Mechanize object based on the given +name+.
|
822
|
+
#
|
823
|
+
# See also AGENT_ALIASES
|
338
824
|
|
339
|
-
|
340
|
-
|
341
|
-
|
825
|
+
def user_agent_alias= name
|
826
|
+
self.user_agent = AGENT_ALIASES[name] ||
|
827
|
+
raise(ArgumentError, "unknown agent alias #{name.inspect}")
|
342
828
|
end
|
343
829
|
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
830
|
+
##
|
831
|
+
# The value of watch_for_set is passed to pluggable parsers for retrieved
|
832
|
+
# content
|
833
|
+
|
834
|
+
attr_accessor :watch_for_set
|
835
|
+
|
836
|
+
# :section: SSL
|
837
|
+
#
|
838
|
+
# SSL settings for mechanize. These must be set in the block given to
|
839
|
+
# Mechanize.new
|
840
|
+
|
841
|
+
##
|
842
|
+
# Path to an OpenSSL server certificate file
|
843
|
+
|
844
|
+
def ca_file
|
845
|
+
@agent.ca_file
|
348
846
|
end
|
349
847
|
|
350
|
-
|
848
|
+
##
|
849
|
+
# Sets the certificate file used for SSL connections
|
351
850
|
|
352
|
-
|
353
|
-
|
354
|
-
|
851
|
+
def ca_file= ca_file
|
852
|
+
@agent.ca_file = ca_file
|
853
|
+
end
|
355
854
|
|
356
|
-
|
357
|
-
|
358
|
-
location = Gem.location_of_caller.join ':'
|
359
|
-
warn "#{location}: Mechanize#get with options hash is deprecated and will be removed October 2011"
|
855
|
+
##
|
856
|
+
# An OpenSSL client certificate or the path to a certificate file.
|
360
857
|
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
headers = options[:headers]
|
365
|
-
method = options[:verb] || method
|
366
|
-
end
|
858
|
+
def cert
|
859
|
+
@agent.cert
|
860
|
+
end
|
367
861
|
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
else
|
372
|
-
current_page || Page.new(nil, {'content-type'=>'text/html'})
|
373
|
-
end
|
862
|
+
##
|
863
|
+
# Sets the OpenSSL client certificate +cert+ to the given path or
|
864
|
+
# certificate instance
|
374
865
|
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
unless referer.is_a?(Mechanize::File)
|
379
|
-
referer = referer.is_a?(String) ?
|
380
|
-
Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
|
381
|
-
Page.new(referer, {'content-type' => 'text/html'})
|
382
|
-
end
|
866
|
+
def cert= cert
|
867
|
+
@agent.cert = cert
|
868
|
+
end
|
383
869
|
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
870
|
+
##
|
871
|
+
# An OpenSSL certificate store for verifying server certificates. This
|
872
|
+
# defaults to the default certificate store.
|
873
|
+
|
874
|
+
def cert_store
|
875
|
+
@agent.cert_store
|
390
876
|
end
|
391
877
|
|
392
878
|
##
|
393
|
-
#
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
def put(url, entity, headers = {})
|
398
|
-
request_with_entity(:put, url, entity, headers)
|
879
|
+
# Sets the OpenSSL certificate store to +store+.
|
880
|
+
|
881
|
+
def cert_store= cert_store
|
882
|
+
@agent.cert_store = cert_store
|
399
883
|
end
|
400
884
|
|
401
885
|
##
|
402
|
-
#
|
403
|
-
#
|
404
|
-
# delete('http://example/', {'q' => 'foo'}, {})
|
886
|
+
# What is this?
|
405
887
|
#
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
888
|
+
# Why is it different from #cert?
|
889
|
+
|
890
|
+
def certificate # :nodoc:
|
891
|
+
@agent.certificate
|
410
892
|
end
|
411
893
|
|
412
894
|
##
|
413
|
-
#
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
def head(uri, query_params = {}, headers = {})
|
418
|
-
# fetch the page
|
419
|
-
page = @agent.fetch(uri, :head, headers, query_params)
|
420
|
-
yield page if block_given?
|
421
|
-
page
|
895
|
+
# An OpenSSL private key or the path to a private key
|
896
|
+
|
897
|
+
def key
|
898
|
+
@agent.key
|
422
899
|
end
|
423
900
|
|
424
|
-
|
425
|
-
|
426
|
-
|
901
|
+
##
|
902
|
+
# Sets the OpenSSL client +key+ to the given path or key instance
|
903
|
+
|
904
|
+
def key= key
|
905
|
+
@agent.key = key
|
427
906
|
end
|
428
907
|
|
429
|
-
|
430
|
-
#
|
431
|
-
|
432
|
-
def
|
433
|
-
|
434
|
-
when Page::Link
|
435
|
-
referer = link.page || current_page()
|
436
|
-
if @agent.robots
|
437
|
-
if (referer.is_a?(Page) && referer.parser.nofollow?) || link.rel?('nofollow')
|
438
|
-
raise RobotsDisallowedError.new(link.href)
|
439
|
-
end
|
440
|
-
end
|
441
|
-
if link.rel?('noreferrer')
|
442
|
-
href = @agent.resolve(link.href, link.page || current_page)
|
443
|
-
referer = Page.new(nil, {'content-type'=>'text/html'})
|
444
|
-
else
|
445
|
-
href = link.href
|
446
|
-
end
|
447
|
-
get href, [], referer
|
448
|
-
when String, Regexp
|
449
|
-
if real_link = page.link_with(:text => link)
|
450
|
-
click real_link
|
451
|
-
else
|
452
|
-
button = nil
|
453
|
-
form = page.forms.find do |f|
|
454
|
-
button = f.button_with(:value => link)
|
455
|
-
button.is_a? Form::Submit
|
456
|
-
end
|
457
|
-
submit form, button if form
|
458
|
-
end
|
459
|
-
else
|
460
|
-
referer = current_page()
|
461
|
-
href = link.respond_to?(:href) ? link.href :
|
462
|
-
(link['href'] || link['src'])
|
463
|
-
get href, [], referer
|
464
|
-
end
|
908
|
+
##
|
909
|
+
# OpenSSL client key password
|
910
|
+
|
911
|
+
def pass
|
912
|
+
@agent.pass
|
465
913
|
end
|
466
914
|
|
467
|
-
|
468
|
-
#
|
469
|
-
|
470
|
-
|
915
|
+
##
|
916
|
+
# Sets the client key password to +pass+
|
917
|
+
|
918
|
+
def pass= pass
|
919
|
+
@agent.pass = pass
|
471
920
|
end
|
472
921
|
|
473
|
-
|
474
|
-
#
|
475
|
-
#
|
476
|
-
#
|
477
|
-
# Examples:
|
478
|
-
# agent.post('http://example.com/', "foo" => "bar")
|
479
|
-
#
|
480
|
-
# agent.post('http://example.com/', [ ["foo", "bar"] ])
|
922
|
+
##
|
923
|
+
# A callback for additional certificate verification. See
|
924
|
+
# OpenSSL::SSL::SSLContext#verify_callback
|
481
925
|
#
|
482
|
-
#
|
483
|
-
|
484
|
-
|
485
|
-
return request_with_entity(:post, url, query, headers)
|
486
|
-
end
|
487
|
-
node = {}
|
488
|
-
# Create a fake form
|
489
|
-
class << node
|
490
|
-
def search(*args); []; end
|
491
|
-
end
|
492
|
-
node['method'] = 'POST'
|
493
|
-
node['enctype'] = 'application/x-www-form-urlencoded'
|
494
|
-
|
495
|
-
form = Form.new(node)
|
926
|
+
# The callback can be used for debugging or to ignore errors by always
|
927
|
+
# returning +true+. Specifying nil uses the default method that was valid
|
928
|
+
# when the SSLContext was created
|
496
929
|
|
497
|
-
|
498
|
-
|
499
|
-
form.enctype = 'multipart/form-data'
|
500
|
-
ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
|
501
|
-
ul.file_data = v.read
|
502
|
-
form.file_uploads << ul
|
503
|
-
else
|
504
|
-
form.fields << Form::Field.new({'name' => k.to_s},v)
|
505
|
-
end
|
506
|
-
}
|
507
|
-
post_form(url, form, headers)
|
930
|
+
def verify_callback
|
931
|
+
@agent.verify_callback
|
508
932
|
end
|
509
933
|
|
510
|
-
|
511
|
-
#
|
512
|
-
# page = agent.get('http://example.com')
|
513
|
-
# agent.submit(page.forms.first)
|
514
|
-
# With a button
|
515
|
-
# agent.submit(page.forms.first, page.forms.first.buttons.first)
|
516
|
-
def submit(form, button=nil, headers={})
|
517
|
-
form.add_button_to_query(button) if button
|
518
|
-
case form.method.upcase
|
519
|
-
when 'POST'
|
520
|
-
post_form(form.action, form, headers)
|
521
|
-
when 'GET'
|
522
|
-
get(form.action.gsub(/\?[^\?]*$/, ''),
|
523
|
-
form.build_query,
|
524
|
-
form.page,
|
525
|
-
headers)
|
526
|
-
else
|
527
|
-
raise ArgumentError, "unsupported method: #{form.method.upcase}"
|
528
|
-
end
|
529
|
-
end
|
934
|
+
##
|
935
|
+
# Sets the OpenSSL certificate verification callback
|
530
936
|
|
531
|
-
def
|
532
|
-
|
937
|
+
def verify_callback= verify_callback
|
938
|
+
@agent.verify_callback = verify_callback
|
939
|
+
end
|
533
940
|
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
941
|
+
##
|
942
|
+
# the OpenSSL server certificate verification method. The default is
|
943
|
+
# OpenSSL::SSL::VERIFY_PEER and certificate verification uses the default
|
944
|
+
# system certificates. See also cert_store
|
538
945
|
|
539
|
-
|
540
|
-
|
541
|
-
page
|
946
|
+
def verify_mode
|
947
|
+
@agent.verify_mode
|
542
948
|
end
|
543
949
|
|
544
|
-
|
545
|
-
|
546
|
-
|
950
|
+
##
|
951
|
+
# Sets the OpenSSL server certificate verification method.
|
952
|
+
|
953
|
+
def verify_mode= verify_mode
|
954
|
+
@agent.verify_mode = verify_mode
|
547
955
|
end
|
548
956
|
|
549
|
-
#
|
550
|
-
def visited_page(url)
|
551
|
-
url = url.href if url.respond_to? :href
|
957
|
+
# :section: Utilities
|
552
958
|
|
553
|
-
|
554
|
-
end
|
959
|
+
attr_reader :agent # :nodoc:
|
555
960
|
|
556
|
-
|
557
|
-
|
961
|
+
attr_reader :pluggable_parser # :nodoc:
|
962
|
+
|
963
|
+
##
|
964
|
+
# Parses the +body+ of the +response+ from +uri+ using the pluggable parser
|
965
|
+
# that matches its content type
|
558
966
|
|
559
967
|
def parse uri, response, body
|
560
968
|
content_type = nil
|
@@ -567,6 +975,15 @@ class Mechanize
|
|
567
975
|
# Find our pluggable parser
|
568
976
|
parser_klass = @pluggable_parser.parser content_type
|
569
977
|
|
978
|
+
unless Mechanize::Download === parser_klass then
|
979
|
+
body = case body
|
980
|
+
when IO, Tempfile, StringIO then
|
981
|
+
body.read
|
982
|
+
else
|
983
|
+
body
|
984
|
+
end
|
985
|
+
end
|
986
|
+
|
570
987
|
parser_klass.new uri, response, body, response.code do |parser|
|
571
988
|
parser.mech = self if parser.respond_to? :mech=
|
572
989
|
|
@@ -575,6 +992,15 @@ class Mechanize
|
|
575
992
|
end
|
576
993
|
end
|
577
994
|
|
995
|
+
def pretty_print(q) # :nodoc:
|
996
|
+
q.object_group(self) {
|
997
|
+
q.breakable
|
998
|
+
q.pp cookie_jar
|
999
|
+
q.breakable
|
1000
|
+
q.pp current_page
|
1001
|
+
}
|
1002
|
+
end
|
1003
|
+
|
578
1004
|
##
|
579
1005
|
# Sets the proxy +address+ at +port+ with an optional +user+ and +password+
|
580
1006
|
|
@@ -588,29 +1014,11 @@ class Mechanize
|
|
588
1014
|
@agent.set_http
|
589
1015
|
end
|
590
1016
|
|
591
|
-
# Runs given block, then resets the page history as it was before. self is
|
592
|
-
# given as a parameter to the block. Returns the value of the block.
|
593
|
-
def transact
|
594
|
-
history_backup = @agent.history.dup
|
595
|
-
begin
|
596
|
-
yield self
|
597
|
-
ensure
|
598
|
-
@agent.history = history_backup
|
599
|
-
end
|
600
|
-
end
|
601
|
-
|
602
|
-
def robots
|
603
|
-
@agent.robots
|
604
|
-
end
|
605
|
-
|
606
|
-
def robots= enabled
|
607
|
-
@agent.robots = enabled
|
608
|
-
end
|
609
|
-
|
610
|
-
alias :page :current_page
|
611
|
-
|
612
1017
|
private
|
613
1018
|
|
1019
|
+
##
|
1020
|
+
# Posts +form+ to +uri+
|
1021
|
+
|
614
1022
|
def post_form(uri, form, headers = {})
|
615
1023
|
cur_page = form.page || current_page ||
|
616
1024
|
Page.new(nil, {'content-type'=>'text/html'})
|
@@ -630,6 +1038,9 @@ class Mechanize
|
|
630
1038
|
page
|
631
1039
|
end
|
632
1040
|
|
1041
|
+
##
|
1042
|
+
# Adds +page+ to the history
|
1043
|
+
|
633
1044
|
def add_to_history(page)
|
634
1045
|
@agent.history.push(page, @agent.resolve(page.uri))
|
635
1046
|
@history_added.call(page) if @history_added
|
@@ -640,6 +1051,8 @@ end
|
|
640
1051
|
require 'mechanize/content_type_error'
|
641
1052
|
require 'mechanize/cookie'
|
642
1053
|
require 'mechanize/cookie_jar'
|
1054
|
+
require 'mechanize/parser'
|
1055
|
+
require 'mechanize/download'
|
643
1056
|
require 'mechanize/file'
|
644
1057
|
require 'mechanize/file_connection'
|
645
1058
|
require 'mechanize/file_request'
|
@@ -648,13 +1061,17 @@ require 'mechanize/form'
|
|
648
1061
|
require 'mechanize/history'
|
649
1062
|
require 'mechanize/http'
|
650
1063
|
require 'mechanize/http/agent'
|
1064
|
+
require 'mechanize/http/auth_challenge'
|
1065
|
+
require 'mechanize/http/auth_realm'
|
1066
|
+
require 'mechanize/http/content_disposition_parser'
|
1067
|
+
require 'mechanize/http/www_authenticate_parser'
|
651
1068
|
require 'mechanize/page'
|
652
|
-
require 'mechanize/inspect'
|
653
1069
|
require 'mechanize/monkey_patch'
|
654
1070
|
require 'mechanize/pluggable_parsers'
|
655
1071
|
require 'mechanize/redirect_limit_reached_error'
|
656
1072
|
require 'mechanize/redirect_not_get_or_head_error'
|
657
1073
|
require 'mechanize/response_code_error'
|
1074
|
+
require 'mechanize/unauthorized_error'
|
658
1075
|
require 'mechanize/response_read_error'
|
659
1076
|
require 'mechanize/robots_disallowed_error'
|
660
1077
|
require 'mechanize/unsupported_scheme_error'
|