mechanize 2.0.1 → 2.1.pre.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (148) hide show
  1. data.tar.gz.sig +0 -0
  2. data/CHANGELOG.rdoc +82 -0
  3. data/EXAMPLES.rdoc +1 -1
  4. data/FAQ.rdoc +9 -9
  5. data/Manifest.txt +35 -48
  6. data/README.rdoc +2 -1
  7. data/Rakefile +16 -3
  8. data/lib/mechanize.rb +809 -392
  9. data/lib/mechanize/content_type_error.rb +10 -11
  10. data/lib/mechanize/cookie.rb +193 -60
  11. data/lib/mechanize/cookie_jar.rb +39 -86
  12. data/lib/mechanize/download.rb +59 -0
  13. data/lib/mechanize/element_matcher.rb +1 -0
  14. data/lib/mechanize/file.rb +61 -76
  15. data/lib/mechanize/file_saver.rb +37 -35
  16. data/lib/mechanize/form.rb +475 -410
  17. data/lib/mechanize/form/button.rb +4 -7
  18. data/lib/mechanize/form/check_box.rb +10 -9
  19. data/lib/mechanize/form/field.rb +52 -42
  20. data/lib/mechanize/form/file_upload.rb +17 -19
  21. data/lib/mechanize/form/hidden.rb +3 -0
  22. data/lib/mechanize/form/image_button.rb +15 -16
  23. data/lib/mechanize/form/keygen.rb +34 -0
  24. data/lib/mechanize/form/multi_select_list.rb +20 -9
  25. data/lib/mechanize/form/option.rb +48 -47
  26. data/lib/mechanize/form/radio_button.rb +52 -45
  27. data/lib/mechanize/form/reset.rb +3 -0
  28. data/lib/mechanize/form/select_list.rb +10 -6
  29. data/lib/mechanize/form/submit.rb +3 -0
  30. data/lib/mechanize/form/text.rb +3 -0
  31. data/lib/mechanize/form/textarea.rb +3 -0
  32. data/lib/mechanize/headers.rb +17 -19
  33. data/lib/mechanize/history.rb +60 -61
  34. data/lib/mechanize/http.rb +5 -0
  35. data/lib/mechanize/http/agent.rb +485 -218
  36. data/lib/mechanize/http/auth_challenge.rb +59 -0
  37. data/lib/mechanize/http/auth_realm.rb +31 -0
  38. data/lib/mechanize/http/content_disposition_parser.rb +188 -0
  39. data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
  40. data/lib/mechanize/monkey_patch.rb +14 -35
  41. data/lib/mechanize/page.rb +34 -2
  42. data/lib/mechanize/page/base.rb +6 -7
  43. data/lib/mechanize/page/frame.rb +5 -5
  44. data/lib/mechanize/page/image.rb +23 -23
  45. data/lib/mechanize/page/label.rb +16 -16
  46. data/lib/mechanize/page/link.rb +16 -0
  47. data/lib/mechanize/page/meta_refresh.rb +19 -7
  48. data/lib/mechanize/parser.rb +173 -0
  49. data/lib/mechanize/pluggable_parsers.rb +126 -83
  50. data/lib/mechanize/redirect_limit_reached_error.rb +16 -13
  51. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -16
  52. data/lib/mechanize/response_code_error.rb +16 -17
  53. data/lib/mechanize/robots_disallowed_error.rb +22 -23
  54. data/lib/mechanize/test_case.rb +659 -0
  55. data/lib/mechanize/unauthorized_error.rb +3 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +4 -6
  57. data/lib/mechanize/util.rb +0 -12
  58. data/test/htdocs/form_order_test.html +11 -0
  59. data/test/htdocs/form_test.html +2 -2
  60. data/test/htdocs/tc_links.html +1 -0
  61. data/test/test_mechanize.rb +367 -59
  62. data/test/test_mechanize_cookie.rb +69 -4
  63. data/test/test_mechanize_cookie_jar.rb +200 -124
  64. data/test/test_mechanize_download.rb +43 -0
  65. data/test/test_mechanize_file.rb +53 -45
  66. data/test/{test_mechanize_file_response.rb → test_mechanize_file_connection.rb} +2 -2
  67. data/test/test_mechanize_file_request.rb +2 -2
  68. data/test/test_mechanize_file_saver.rb +21 -0
  69. data/test/test_mechanize_form.rb +345 -46
  70. data/test/test_mechanize_form_check_box.rb +5 -4
  71. data/test/test_mechanize_form_encoding.rb +10 -16
  72. data/test/test_mechanize_form_field.rb +45 -3
  73. data/test/test_mechanize_form_file_upload.rb +20 -0
  74. data/test/test_mechanize_form_image_button.rb +2 -2
  75. data/test/test_mechanize_form_keygen.rb +32 -0
  76. data/test/test_mechanize_form_multi_select_list.rb +84 -0
  77. data/test/test_mechanize_form_option.rb +55 -0
  78. data/test/test_mechanize_form_radio_button.rb +78 -0
  79. data/test/test_mechanize_form_select_list.rb +76 -0
  80. data/test/test_mechanize_form_textarea.rb +8 -7
  81. data/test/{test_headers.rb → test_mechanize_headers.rb} +4 -2
  82. data/test/test_mechanize_history.rb +103 -0
  83. data/test/test_mechanize_http_agent.rb +525 -17
  84. data/test/test_mechanize_http_auth_challenge.rb +39 -0
  85. data/test/test_mechanize_http_auth_realm.rb +49 -0
  86. data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
  87. data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
  88. data/test/test_mechanize_link.rb +10 -14
  89. data/test/test_mechanize_page.rb +118 -0
  90. data/test/test_mechanize_page_encoding.rb +48 -13
  91. data/test/test_mechanize_page_frame.rb +16 -0
  92. data/test/test_mechanize_page_link.rb +27 -19
  93. data/test/test_mechanize_page_meta_refresh.rb +26 -14
  94. data/test/test_mechanize_parser.rb +289 -0
  95. data/test/test_mechanize_pluggable_parser.rb +52 -0
  96. data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
  97. data/test/test_mechanize_redirect_not_get_or_head_error.rb +3 -7
  98. data/test/test_mechanize_subclass.rb +2 -2
  99. data/test/test_mechanize_util.rb +24 -13
  100. data/test/test_multi_select.rb +23 -22
  101. metadata +145 -114
  102. metadata.gz.sig +0 -0
  103. data/lib/mechanize/inspect.rb +0 -88
  104. data/test/helper.rb +0 -175
  105. data/test/htdocs/form_select_all.html +0 -16
  106. data/test/htdocs/form_select_none.html +0 -17
  107. data/test/htdocs/form_select_noopts.html +0 -10
  108. data/test/htdocs/iframe_test.html +0 -16
  109. data/test/htdocs/nofollow.html +0 -9
  110. data/test/htdocs/norobots.html +0 -8
  111. data/test/htdocs/rel_nofollow.html +0 -8
  112. data/test/htdocs/tc_base_images.html +0 -10
  113. data/test/htdocs/tc_images.html +0 -8
  114. data/test/htdocs/tc_no_attributes.html +0 -16
  115. data/test/htdocs/tc_radiobuttons.html +0 -17
  116. data/test/htdocs/test_bad_encoding.html +0 -52
  117. data/test/servlets.rb +0 -402
  118. data/test/ssl_server.rb +0 -48
  119. data/test/test_cookies.rb +0 -129
  120. data/test/test_form_action.rb +0 -52
  121. data/test/test_form_as_hash.rb +0 -59
  122. data/test/test_form_button.rb +0 -46
  123. data/test/test_frames.rb +0 -34
  124. data/test/test_history.rb +0 -118
  125. data/test/test_history_added.rb +0 -16
  126. data/test/test_html_unscape_forms.rb +0 -46
  127. data/test/test_if_modified_since.rb +0 -20
  128. data/test/test_images.rb +0 -19
  129. data/test/test_no_attributes.rb +0 -13
  130. data/test/test_option.rb +0 -18
  131. data/test/test_pluggable_parser.rb +0 -136
  132. data/test/test_post_form.rb +0 -37
  133. data/test/test_pretty_print.rb +0 -22
  134. data/test/test_radiobutton.rb +0 -75
  135. data/test/test_redirect_limit_reached.rb +0 -39
  136. data/test/test_referer.rb +0 -81
  137. data/test/test_relative_links.rb +0 -40
  138. data/test/test_request.rb +0 -13
  139. data/test/test_response_code.rb +0 -53
  140. data/test/test_robots.rb +0 -72
  141. data/test/test_save_file.rb +0 -48
  142. data/test/test_scheme.rb +0 -48
  143. data/test/test_select.rb +0 -119
  144. data/test/test_select_all.rb +0 -15
  145. data/test/test_select_none.rb +0 -15
  146. data/test/test_select_noopts.rb +0 -18
  147. data/test/test_set_fields.rb +0 -44
  148. data/test/test_ssl_server.rb +0 -20
data.tar.gz.sig CHANGED
Binary file
data/CHANGELOG.rdoc CHANGED
@@ -1,5 +1,87 @@
1
1
  = Mechanize CHANGELOG
2
2
 
3
+ === 2.1 / ??
4
+
5
+ * Deprecations
6
+ * Mechanize#get no longer accepts an options hash.
7
+ * Mechanize::Util::to_native_charset has been removed.
8
+
9
+ * Minor enhancements
10
+ * Mechanize now depends on net-http-persistent 2.3+. This new version
11
+ brings idle timeouts to help with the dreaded "too many connection resets"
12
+ issue when POSTing to a closed connection. Also, SSL connections will be
13
+ verified against the system certificate store by default. Issue #123
14
+ * Added Mechanize#retry_change_requests to allow mechanize to retry POST and
15
+ other non-idempotent requests when you know it is safe to do so. Issue
16
+ #123
17
+ * Mechanize can now stream files directly to disk without loading them into
18
+ memory first through Mechanize::Download, a pluggable parser for
19
+ downloading files.
20
+
21
+ All responses larger than Mechanize#max_file_buffer are downloaded to a
22
+ Tempfile. For backwards compatibility Mechanize::File subclasses still
23
+ load the response body into memory.
24
+
25
+ To force all unknown content types to download to disk instead of memory
26
+ set:
27
+
28
+ agent.pluggable_parser.default = Mechanize::Download
29
+ * Added Mechanize#content_encoding_hooks which allow handling of
30
+ non-standard content encodings like "agzip". Patch #125 by kitamomonga
31
+ * Added dom_class to elements and the element matcher like dom_id. Patch
32
+ #156 by Dan Hansen.
33
+ * Added support for the HTML5 keygen form element. See
34
+ http://dev.w3.org/html5/spec/Overview.html#the-keygen-element Patch #157
35
+ by Victor Costan.
36
+ * Mechanize no longer follows meta refreshes that have no "url=" in the
37
+ content attribute to avoid infinite loops. To follow a meta refresh to
38
+ the same page set Mechanize#follow_meta_refresh_self to true. Issue #134
39
+ by Jo Hund.
40
+ * Updated 'Mac Safari' User-Agent alias to Safari 5.1.1. 'Mac Safari 4' can
41
+ be used for the old 'Mac Safari' alias.
42
+ * When given multiple HTTP authentication options mechanize can better pick
43
+ the strongest method.
44
+ * Improvements to HTTP authorization:
45
+ * mechanize raises Mechanize::UnathorizedError for 401 responses which is
46
+ a sublcass of Mechanize::ResponseCodeError.
47
+ * Added support for NTLM authentication, but this has not been tested.
48
+ * Mechanize::Cookie.new accepts attributes in a hash.
49
+ * Mechanize::CookieJar#<<(cookie) (alias: add!) is added. Issue #139
50
+ * Different mechanize instances may now have different loggers. Issue #122
51
+ * Mechanize now accepts a proxy port as a service name or number string.
52
+ Issue #167
53
+
54
+ * Bug fixes
55
+ * Mechanize takes more care to avoid saving files with certain unsafe names.
56
+ You should still take care not to use mechanize to save files into your
57
+ home directory. Issue #163.
58
+ * Mechanize#cookie_jar= works again. Issue #126
59
+ * Mechanize now handles cookies just as most modern browsers do,
60
+ roughly based on RFC 6265.
61
+ * domain=.example.com (which is invalid) is considered identical to
62
+ domain=example.com.
63
+ * A cookie with domain=example.com is sent to host.sub.example.com
64
+ as well as host.example.com and example.com.
65
+ * A cookie with domain=TLD (no dots) is accepted and sent if the
66
+ host name is TLD, and rejected otherwise. To retain compatibility
67
+ and convention, host/domain names starting with "local" are exempt
68
+ from this rule.
69
+ * A cookie with no domain attribute is only sent to the original
70
+ host.
71
+ * A cookie with an Effective TLD is rejected based on the public
72
+ suffix list. (cf. http://publicsuffix.org/)
73
+ * "Secure" cookies are not sent via non-https connection.
74
+ * Subdomain match is not performed against an IP address.
75
+ * It is recommended that you clear out existing cookie jars for
76
+ regeneration because previously saved cookies may not have been
77
+ parsed correctly.
78
+ * The original Referer value persists on redirection. Issue #150
79
+ * Do not send a referer on a Refresh header based redirection.
80
+ * Fixed encoding error in tests when LANG=C. Patch #142 by jinschoi.
81
+ * The order of items in a form submission now match the DOM order. Patch
82
+ #129 by kitamomonga
83
+ * Fixed proxy example in EXAMPLE. Issue #146 by NielsKSchjoedt
84
+
3
85
  === 2.0.1 / 2011-06-28
4
86
 
5
87
  Mechanize now uses minitest to avoid 1.9 vs 1.8 assertion availability in
data/EXAMPLES.rdoc CHANGED
@@ -109,7 +109,7 @@ Beautiful Soup for that page.
109
109
  require 'mechanize'
110
110
 
111
111
  agent = Mechanize.new
112
- agent.set_proxy('localhost', '8000')
112
+ agent.set_proxy 'localhost', 8000
113
113
  page = agent.get(ARGV[0])
114
114
  puts page.body
115
115
 
data/FAQ.rdoc CHANGED
@@ -1,11 +1,11 @@
1
- Q: I keep getting an EOFError:
2
- protocol.rb:133:in `sysread': end of file reached (EOFError)
3
-
4
- A: Some people have experienced an EOFError during normal mechanize usage.
5
- Most of the time this occurs because the remote website claims to support
6
- keep alives, but does not implement them correctly. Try turning off
7
- keep alives on your mechanize object:
8
-
9
- mech.keep_alive = false
1
+ Q:: Why do I keep getting an EOFError?
10
2
 
3
+ A:: For older versions of mechanize turning off keep_alive could help with the
4
+ problem, but mechanize now has more robust handling of persistent
5
+ connections.
11
6
 
7
+ Older versions of mechanize would raise an EOFError when a chunked body was
8
+ not terminated properly, a common bug of IIS servers. Since 2.0
9
+ Mechanize::ResponseReadError is raised containing the original response and
10
+ body read so far so if the server is broken you can still retrieve the
11
+ entire content.
data/Manifest.txt CHANGED
@@ -16,6 +16,7 @@ lib/mechanize.rb
16
16
  lib/mechanize/content_type_error.rb
17
17
  lib/mechanize/cookie.rb
18
18
  lib/mechanize/cookie_jar.rb
19
+ lib/mechanize/download.rb
19
20
  lib/mechanize/element_matcher.rb
20
21
  lib/mechanize/file.rb
21
22
  lib/mechanize/file_connection.rb
@@ -27,16 +28,25 @@ lib/mechanize/form/button.rb
27
28
  lib/mechanize/form/check_box.rb
28
29
  lib/mechanize/form/field.rb
29
30
  lib/mechanize/form/file_upload.rb
31
+ lib/mechanize/form/hidden.rb
30
32
  lib/mechanize/form/image_button.rb
33
+ lib/mechanize/form/keygen.rb
31
34
  lib/mechanize/form/multi_select_list.rb
32
35
  lib/mechanize/form/option.rb
33
36
  lib/mechanize/form/radio_button.rb
37
+ lib/mechanize/form/reset.rb
34
38
  lib/mechanize/form/select_list.rb
39
+ lib/mechanize/form/submit.rb
40
+ lib/mechanize/form/text.rb
41
+ lib/mechanize/form/textarea.rb
35
42
  lib/mechanize/headers.rb
36
43
  lib/mechanize/history.rb
37
44
  lib/mechanize/http.rb
38
45
  lib/mechanize/http/agent.rb
39
- lib/mechanize/inspect.rb
46
+ lib/mechanize/http/auth_challenge.rb
47
+ lib/mechanize/http/auth_realm.rb
48
+ lib/mechanize/http/content_disposition_parser.rb
49
+ lib/mechanize/http/www_authenticate_parser.rb
40
50
  lib/mechanize/monkey_patch.rb
41
51
  lib/mechanize/page.rb
42
52
  lib/mechanize/page/base.rb
@@ -45,12 +55,15 @@ lib/mechanize/page/image.rb
45
55
  lib/mechanize/page/label.rb
46
56
  lib/mechanize/page/link.rb
47
57
  lib/mechanize/page/meta_refresh.rb
58
+ lib/mechanize/parser.rb
48
59
  lib/mechanize/pluggable_parsers.rb
49
60
  lib/mechanize/redirect_limit_reached_error.rb
50
61
  lib/mechanize/redirect_not_get_or_head_error.rb
51
62
  lib/mechanize/response_code_error.rb
52
63
  lib/mechanize/response_read_error.rb
53
64
  lib/mechanize/robots_disallowed_error.rb
65
+ lib/mechanize/test_case.rb
66
+ lib/mechanize/unauthorized_error.rb
54
67
  lib/mechanize/unsupported_scheme_error.rb
55
68
  lib/mechanize/util.rb
56
69
  test/data/htpasswd
@@ -58,7 +71,6 @@ test/data/server.crt
58
71
  test/data/server.csr
59
72
  test/data/server.key
60
73
  test/data/server.pem
61
- test/helper.rb
62
74
  test/htdocs/alt_text.html
63
75
  test/htdocs/bad_form_test.html
64
76
  test/htdocs/button.jpg
@@ -71,31 +83,24 @@ test/htdocs/form_multi_select.html
71
83
  test/htdocs/form_multival.html
72
84
  test/htdocs/form_no_action.html
73
85
  test/htdocs/form_no_input_name.html
86
+ test/htdocs/form_order_test.html
74
87
  test/htdocs/form_select.html
75
- test/htdocs/form_select_all.html
76
- test/htdocs/form_select_none.html
77
- test/htdocs/form_select_noopts.html
78
88
  test/htdocs/form_set_fields.html
79
89
  test/htdocs/form_test.html
80
90
  test/htdocs/frame_referer_test.html
81
91
  test/htdocs/frame_test.html
82
92
  test/htdocs/google.html
83
- test/htdocs/iframe_test.html
84
93
  test/htdocs/index.html
85
94
  test/htdocs/link with space.html
86
95
  test/htdocs/meta_cookie.html
87
96
  test/htdocs/no_title_test.html
88
- test/htdocs/nofollow.html
89
97
  test/htdocs/noindex.html
90
- test/htdocs/norobots.html
91
98
  test/htdocs/rails_3_encoding_hack_form_test.html
92
- test/htdocs/rel_nofollow.html
93
99
  test/htdocs/relative/tc_relative_links.html
94
100
  test/htdocs/robots.html
95
101
  test/htdocs/robots.txt
96
102
  test/htdocs/tc_bad_charset.html
97
103
  test/htdocs/tc_bad_links.html
98
- test/htdocs/tc_base_images.html
99
104
  test/htdocs/tc_base_link.html
100
105
  test/htdocs/tc_blank_form.html
101
106
  test/htdocs/tc_charset.html
@@ -104,69 +109,51 @@ test/htdocs/tc_encoded_links.html
104
109
  test/htdocs/tc_field_precedence.html
105
110
  test/htdocs/tc_follow_meta.html
106
111
  test/htdocs/tc_form_action.html
107
- test/htdocs/tc_images.html
108
112
  test/htdocs/tc_links.html
109
113
  test/htdocs/tc_meta_in_body.html
110
- test/htdocs/tc_no_attributes.html
111
114
  test/htdocs/tc_pretty_print.html
112
- test/htdocs/tc_radiobuttons.html
113
115
  test/htdocs/tc_referer.html
114
116
  test/htdocs/tc_relative_links.html
115
117
  test/htdocs/tc_textarea.html
116
- test/htdocs/test_bad_encoding.html
117
118
  test/htdocs/test_click.html
118
119
  test/htdocs/unusual______.html
119
- test/servlets.rb
120
- test/ssl_server.rb
121
- test/test_cookies.rb
122
- test/test_form_action.rb
123
- test/test_form_as_hash.rb
124
- test/test_form_button.rb
125
- test/test_frames.rb
126
- test/test_headers.rb
127
- test/test_history.rb
128
- test/test_history_added.rb
129
- test/test_html_unscape_forms.rb
130
- test/test_if_modified_since.rb
131
- test/test_images.rb
132
120
  test/test_mechanize.rb
133
121
  test/test_mechanize_cookie.rb
134
122
  test/test_mechanize_cookie_jar.rb
123
+ test/test_mechanize_download.rb
135
124
  test/test_mechanize_file.rb
125
+ test/test_mechanize_file_connection.rb
136
126
  test/test_mechanize_file_request.rb
137
- test/test_mechanize_file_response.rb
127
+ test/test_mechanize_file_saver.rb
138
128
  test/test_mechanize_form.rb
139
129
  test/test_mechanize_form_check_box.rb
140
130
  test/test_mechanize_form_encoding.rb
141
131
  test/test_mechanize_form_field.rb
132
+ test/test_mechanize_form_file_upload.rb
142
133
  test/test_mechanize_form_image_button.rb
134
+ test/test_mechanize_form_keygen.rb
135
+ test/test_mechanize_form_multi_select_list.rb
136
+ test/test_mechanize_form_option.rb
137
+ test/test_mechanize_form_radio_button.rb
138
+ test/test_mechanize_form_select_list.rb
143
139
  test/test_mechanize_form_textarea.rb
140
+ test/test_mechanize_headers.rb
141
+ test/test_mechanize_history.rb
144
142
  test/test_mechanize_http_agent.rb
143
+ test/test_mechanize_http_auth_challenge.rb
144
+ test/test_mechanize_http_auth_realm.rb
145
+ test/test_mechanize_http_content_disposition_parser.rb
146
+ test/test_mechanize_http_www_authenticate_parser.rb
145
147
  test/test_mechanize_link.rb
148
+ test/test_mechanize_page.rb
146
149
  test/test_mechanize_page_encoding.rb
150
+ test/test_mechanize_page_frame.rb
147
151
  test/test_mechanize_page_link.rb
148
152
  test/test_mechanize_page_meta_refresh.rb
153
+ test/test_mechanize_parser.rb
154
+ test/test_mechanize_pluggable_parser.rb
155
+ test/test_mechanize_redirect_limit_reached_error.rb
149
156
  test/test_mechanize_redirect_not_get_or_head_error.rb
150
157
  test/test_mechanize_subclass.rb
151
158
  test/test_mechanize_util.rb
152
159
  test/test_multi_select.rb
153
- test/test_no_attributes.rb
154
- test/test_option.rb
155
- test/test_pluggable_parser.rb
156
- test/test_post_form.rb
157
- test/test_pretty_print.rb
158
- test/test_radiobutton.rb
159
- test/test_redirect_limit_reached.rb
160
- test/test_referer.rb
161
- test/test_relative_links.rb
162
- test/test_request.rb
163
- test/test_response_code.rb
164
- test/test_robots.rb
165
- test/test_save_file.rb
166
- test/test_scheme.rb
167
- test/test_select.rb
168
- test/test_select_all.rb
169
- test/test_select_none.rb
170
- test/test_select_noopts.rb
171
- test/test_set_fields.rb
172
- test/test_ssl_server.rb
data/README.rdoc CHANGED
@@ -43,6 +43,7 @@ Copyright (c) 2006-2011:
43
43
  Copyright (c) 2011:
44
44
 
45
45
  * {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net)
46
+ * {Akinori MUSHA}[http://blog.akinori.org] (knu@idaemons.org)
46
47
 
47
48
  This library comes with a shameless plug for employing me
48
49
  (Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language!
@@ -51,7 +52,7 @@ This library comes with a shameless plug for employing me
51
52
 
52
53
  This library was heavily influenced by its namesake in the perl world. A big
53
54
  thanks goes to Andy Lester (andy@petdance.com), the author of the original
54
- perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
55
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize/]. Ruby Mechanize would not be around without you!
55
56
 
56
57
  Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
57
58
  who's helped out in various ways. Finally, thank you to the people using this
data/Rakefile CHANGED
@@ -4,10 +4,11 @@ require 'hoe'
4
4
  Hoe.plugin :git
5
5
  Hoe.plugin :minitest
6
6
 
7
- Hoe.spec 'mechanize' do
7
+ hoe = Hoe.spec 'mechanize' do
8
8
  developer 'Eric Hodel', 'drbrain@segment7.net'
9
9
  developer 'Aaron Patterson', 'aaronp@rubyforge.org'
10
10
  developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
11
+ developer 'Akinori MUSHA', 'knu@idaemons.org'
11
12
 
12
13
  self.readme_file = 'README.rdoc'
13
14
  self.history_file = 'CHANGELOG.rdoc'
@@ -15,10 +16,12 @@ Hoe.spec 'mechanize' do
15
16
 
16
17
  rdoc_locations << 'drbrain@rubyforge.org:/var/www/gforge-projects/mechanize/'
17
18
 
18
- self.extra_deps << ['nokogiri', '~> 1.4']
19
- self.extra_deps << ['net-http-persistent', '~> 1.8']
20
19
  self.extra_deps << ['net-http-digest_auth', '~> 1.1', '>= 1.1.1']
20
+ self.extra_deps << ['net-http-persistent', '~> 2.3', '>= 2.3.2']
21
+ self.extra_deps << ['nokogiri', '~> 1.4']
22
+ self.extra_deps << ['ntlm-http', '~> 0.1', '>= 0.1.1']
21
23
  self.extra_deps << ['webrobots', '~> 0.0', '>= 0.0.9']
24
+ self.extra_deps << ['domain_name', '~> 0.5', '>= 0.5.1']
22
25
 
23
26
  self.spec_extras[:required_ruby_version] = '>= 1.8.7'
24
27
  end
@@ -34,3 +37,13 @@ task('ssl_cert') do |p|
34
37
  sh "mv server.key server.csr server.crt server.pem test/data/"
35
38
  sh "rm server.key.org"
36
39
  end
40
+
41
+ desc 'Install deps for travis to work around Hoe/RubyGems bug'
42
+ task 'travis_deps' do
43
+ hoe.spec.dependencies.each do |dep|
44
+ first_requirement = dep.requirement.requirements.first.join ' '
45
+ system('gem', 'install', dep.name, '-v', first_requirement,
46
+ '--no-rdoc', '--no-ri')
47
+ end
48
+ end
49
+
data/lib/mechanize.rb CHANGED
@@ -7,33 +7,42 @@ require 'net/http/persistent'
7
7
  require 'nkf'
8
8
  require 'nokogiri'
9
9
  require 'openssl'
10
+ require 'pp'
10
11
  require 'stringio'
11
12
  require 'uri'
12
13
  require 'webrick/httputils'
13
14
  require 'zlib'
14
15
 
15
- # = Synopsis
16
- # The Mechanize library is used for automating interaction with a website. It
17
- # can follow links, and submit forms. Form fields can be populated and
16
+ ##
17
+ # The Mechanize library is used for automating interactions with a website. It
18
+ # can follow links and submit forms. Form fields can be populated and
18
19
  # submitted. A history of URL's is maintained and can be queried.
19
20
  #
20
21
  # == Example
21
- # require 'rubygems'
22
- # require 'mechanize'
23
- # require 'logger'
24
22
  #
25
- # agent = Mechanize.new { |a| a.log = Logger.new("mech.log") }
26
- # agent.user_agent_alias = 'Mac Safari'
27
- # page = agent.get("http://www.google.com/")
28
- # search_form = page.form_with(:name => "f")
29
- # search_form.field_with(:name => "q").value = "Hello"
30
- # search_results = agent.submit(search_form)
31
- # puts search_results.body
23
+ # require 'mechanize'
24
+ # require 'logger'
25
+ #
26
+ # agent = Mechanize.new
27
+ # agent.log = Logger.new "mech.log"
28
+ # agent.user_agent_alias = 'Mac Safari'
29
+ #
30
+ # page = agent.get "http://www.google.com/"
31
+ # search_form = page.form_with :name => "f"
32
+ # search_form.field_with(:name => "q").value = "Hello"
33
+ #
34
+ # search_results = agent.submit search_form
35
+ # puts search_results.body
36
+
32
37
  class Mechanize
33
38
 
34
39
  ##
35
40
  # The version of Mechanize you are using.
36
- VERSION = '2.0.1'
41
+
42
+ VERSION = '2.1'
43
+
44
+ ##
45
+ # Base mechanize error class
37
46
 
38
47
  class Error < RuntimeError
39
48
  end
@@ -44,517 +53,916 @@ class Mechanize
44
53
  "#{RUBY_VERSION}dev#{RUBY_REVISION}"
45
54
  end
46
55
 
47
- # HTTP/1.1 keep-alives are always active. This does nothing.
48
- attr_accessor :keep_alive
56
+ ##
57
+ # Supported User-Agent aliases for use with user_agent_alias=. The
58
+ # description in parenthesis is for informative purposes and is not part of
59
+ # the alias name.
60
+ #
61
+ # * Linux Firefox (3.6.1)
62
+ # * Linux Konqueror (3)
63
+ # * Linux Mozilla
64
+ # * Mac Firefox (3.6)
65
+ # * Mac Mozilla
66
+ # * Mac Safari (5)
67
+ # * Mac Safari 4
68
+ # * Mechanize (default)
69
+ # * Windows IE 6
70
+ # * Windows IE 7
71
+ # * Windows IE 8
72
+ # * Windows IE 9
73
+ # * Windows Mozilla
74
+ # * iPhone (3.0)
75
+ #
76
+ # Example:
77
+ #
78
+ # agent = Mechanize.new
79
+ # agent.user_agent_alias = 'Mac Safari'
80
+
81
+ AGENT_ALIASES = {
82
+ 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)",
83
+ 'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
84
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
85
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
86
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
87
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
88
+ 'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
89
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22',
90
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
91
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
92
+ 'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
93
+ 'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
94
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
95
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
96
+ }
97
+
98
+ def self.inherited(child) # :nodoc:
99
+ child.html_parser ||= html_parser
100
+ child.log ||= log
101
+ super
102
+ end
103
+
104
+ ##
105
+ # Creates a new mechanize instance. If a block is given, the created
106
+ # instance is yielded to the block for setting up pre-connection state such
107
+ # as SSL parameters or proxies:
108
+ #
109
+ # agent = Mechanize.new do |a|
110
+ # a.proxy_host = 'proxy.example'
111
+ # a.proxy_port = 8080
112
+ # end
113
+
114
+ def initialize
115
+ @agent = Mechanize::HTTP::Agent.new
116
+ @agent.context = self
117
+ @log = nil
118
+
119
+ # attr_accessors
120
+ @agent.user_agent = AGENT_ALIASES['Mechanize']
121
+ @watch_for_set = nil
122
+ @history_added = nil
123
+
124
+ # attr_readers
125
+ @pluggable_parser = PluggableParser.new
126
+
127
+ @keep_alive_time = 0
128
+
129
+ # Proxy
130
+ @proxy_addr = nil
131
+ @proxy_port = nil
132
+ @proxy_user = nil
133
+ @proxy_pass = nil
134
+
135
+ @html_parser = self.class.html_parser
136
+
137
+ @default_encoding = nil
138
+ @force_default_encoding = false
139
+
140
+ yield self if block_given?
141
+
142
+ @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
143
+ @agent.set_http
144
+ end
145
+
146
+ # :section: History
147
+ #
148
+ # Methods for navigating and controlling history
149
+
150
+ ##
151
+ # Equivalent to the browser back button. Returns the previous page visited.
152
+
153
+ def back
154
+ @agent.history.pop
155
+ end
156
+
157
+ ##
158
+ # Returns the latest page loaded by Mechanize
159
+
160
+ def current_page
161
+ @agent.current_page
162
+ end
163
+
164
+ alias page current_page
165
+
166
+ ##
167
+ # The history of this mechanize run
168
+
169
+ def history
170
+ @agent.history
171
+ end
172
+
173
+ ##
174
+ # Maximum number of items allowed in the history.
175
+
176
+ def max_history
177
+ @agent.history.max_size
178
+ end
179
+
180
+ ##
181
+ # Sets the maximum number of items allowed in the history to +length+.
182
+
183
+ def max_history= length
184
+ @agent.history.max_size = length
185
+ end
186
+
187
+ ##
188
+ # Returns a visited page for the +url+ passed in, otherwise nil
189
+
190
+ def visited? url
191
+ url = url.href if url.respond_to? :href
192
+
193
+ @agent.visited_page url
194
+ end
195
+
196
+ ##
197
+ # Returns whether or not a url has been visited
198
+
199
+ alias visited_page visited?
200
+
201
+ # :section: Hooks
202
+ #
203
+ # Hooks into the operation of mechanize
204
+
205
+ ##
206
+ # A list of hooks to call before reading response header 'content-encoding'.
207
+ #
208
+ # The hook is called with the agent making the request, the URI of the
209
+ # request, the response an IO containing the response body.
210
+
211
+ def content_encoding_hooks
212
+ @agent.content_encoding_hooks
213
+ end
214
+
215
+ ##
216
+ # Callback which is invoked with the page that was added to history.
217
+
218
+ attr_accessor :history_added
219
+
220
+ ##
221
+ # A list of hooks to call after retrieving a response. Hooks are called with
222
+ # the agent and the response returned.
223
+
224
+ def post_connect_hooks
225
+ @agent.post_connect_hooks
226
+ end
227
+
228
+ ##
229
+ # A list of hooks to call before making a request. Hooks are called with
230
+ # the agent and the request to be performed.
231
+
232
+ def pre_connect_hooks
233
+ @agent.pre_connect_hooks
234
+ end
235
+
236
+ # :section: Requests
237
+ #
238
+ # Methods for making HTTP requests
239
+
240
+ ##
241
+ # If the parameter is a string, finds the button or link with the
242
+ # value of the string on the current page and clicks it. Otherwise, clicks
243
+ # the Mechanize::Page::Link object passed in. Returns the page fetched.
244
+
245
+ def click link
246
+ case link
247
+ when Page::Link then
248
+ referer = link.page || current_page()
249
+ if @agent.robots
250
+ if (referer.is_a?(Page) and referer.parser.nofollow?) or
251
+ link.rel?('nofollow') then
252
+ raise RobotsDisallowedError.new(link.href)
253
+ end
254
+ end
255
+ if link.rel?('noreferrer')
256
+ href = @agent.resolve(link.href, link.page || current_page)
257
+ referer = Page.new(nil, {'content-type'=>'text/html'})
258
+ else
259
+ href = link.href
260
+ end
261
+ get href, [], referer
262
+ when String, Regexp then
263
+ if real_link = page.link_with(:text => link)
264
+ click real_link
265
+ else
266
+ button = nil
267
+ form = page.forms.find do |f|
268
+ button = f.button_with(:value => link)
269
+ button.is_a? Form::Submit
270
+ end
271
+ submit form, button if form
272
+ end
273
+ else
274
+ referer = current_page()
275
+ href = link.respond_to?(:href) ? link.href :
276
+ (link['href'] || link['src'])
277
+ get href, [], referer
278
+ end
279
+ end
280
+
281
+ ##
282
+ # DELETE +uri+ with +query_params+, and setting +headers+:
283
+ #
284
+ # delete('http://example/', {'q' => 'foo'}, {})
285
+
286
+ def delete(uri, query_params = {}, headers = {})
287
+ page = @agent.fetch(uri, :delete, headers, query_params)
288
+ add_to_history(page)
289
+ page
290
+ end
291
+
292
+ ##
293
+ # GET the +uri+ with the given request +parameters+, +referer+ and
294
+ # +headers+.
295
+ #
296
+ # The +referer+ may be a URI or a page.
297
+
298
+ def get(uri, parameters = [], referer = nil, headers = {})
299
+ method = :get
300
+
301
+ referer ||=
302
+ if uri.to_s =~ %r{\Ahttps?://}
303
+ Page.new(nil, {'content-type'=>'text/html'})
304
+ else
305
+ current_page || Page.new(nil, {'content-type'=>'text/html'})
306
+ end
307
+
308
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
309
+ # refactor everything to pass around URIs but still support
310
+ # Mechanize::Page#base
311
+ unless referer.is_a?(Mechanize::File)
312
+ referer = referer.is_a?(String) ?
313
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
314
+ Page.new(referer, {'content-type' => 'text/html'})
315
+ end
316
+
317
+ # fetch the page
318
+ headers ||= {}
319
+ page = @agent.fetch uri, method, headers, parameters, referer
320
+ add_to_history(page)
321
+ yield page if block_given?
322
+ page
323
+ end
324
+
325
+ ##
326
+ # GET +url+ and return only its contents
327
+
328
+ def get_file(url)
329
+ get(url).body
330
+ end
331
+
332
+ ##
333
+ # HEAD +uri+ with +query_params+, and setting +headers+:
334
+ #
335
+ # head('http://example/', {'q' => 'foo'}, {})
336
+
337
+ def head(uri, query_params = {}, headers = {})
338
+ # fetch the page
339
+ page = @agent.fetch(uri, :head, headers, query_params)
340
+ yield page if block_given?
341
+ page
342
+ end
343
+
344
+ ##
345
+ # POST to the given +uri+ with the given +query+. The query is specified by
346
+ # either a string, or a list of key-value pairs represented by a hash or an
347
+ # array of arrays.
348
+ #
349
+ # Examples:
350
+ # agent.post 'http://example.com/', "foo" => "bar"
351
+ #
352
+ # agent.post 'http://example.com/', [%w[foo bar]]
353
+ #
354
+ # agent.post('http://example.com/', "<message>hello</message>",
355
+ # 'Content-Type' => 'application/xml')
356
+
357
+ def post(uri, query={}, headers={})
358
+ return request_with_entity(:post, uri, query, headers) if String === query
359
+
360
+ node = {}
361
+ # Create a fake form
362
+ class << node
363
+ def search(*args); []; end
364
+ end
365
+ node['method'] = 'POST'
366
+ node['enctype'] = 'application/x-www-form-urlencoded'
367
+
368
+ form = Form.new(node)
369
+
370
+ query.each { |k, v|
371
+ if v.is_a?(IO)
372
+ form.enctype = 'multipart/form-data'
373
+ ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
374
+ ul.file_data = v.read
375
+ form.file_uploads << ul
376
+ else
377
+ form.fields << Form::Field.new({'name' => k.to_s},v)
378
+ end
379
+ }
380
+ post_form(uri, form, headers)
381
+ end
382
+
383
+ ##
384
+ # PUT to +uri+ with +entity+, and setting +headers+:
385
+ #
386
+ # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
387
+
388
+ def put(uri, entity, headers = {})
389
+ request_with_entity(:put, uri, entity, headers)
390
+ end
391
+
392
+ ##
393
+ # Makes an HTTP request to +url+ using HTTP method +verb+. +entity+ is used
394
+ # as the request body, if allowed.
395
+
396
+ def request_with_entity(verb, uri, entity, headers = {})
397
+ cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
398
+
399
+ headers = {
400
+ 'Content-Type' => 'application/octet-stream',
401
+ 'Content-Length' => entity.size.to_s,
402
+ }.update headers
403
+
404
+ page = @agent.fetch uri, verb, headers, [entity], cur_page
405
+ add_to_history(page)
406
+ page
407
+ end
408
+
409
+ ##
410
+ # Submits +form+ with an optional +button+.
411
+ #
412
+ # Without a button:
413
+ #
414
+ # page = agent.get('http://example.com')
415
+ # agent.submit(page.forms.first)
416
+ #
417
+ # With a button:
418
+ #
419
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
420
+
421
+ def submit(form, button=nil, headers={})
422
+ form.add_button_to_query(button) if button
423
+
424
+ case form.method.upcase
425
+ when 'POST'
426
+ post_form(form.action, form, headers)
427
+ when 'GET'
428
+ get(form.action.gsub(/\?[^\?]*$/, ''),
429
+ form.build_query,
430
+ form.page,
431
+ headers)
432
+ else
433
+ raise ArgumentError, "unsupported method: #{form.method.upcase}"
434
+ end
435
+ end
436
+
437
+ ##
438
+ # Runs given block, then resets the page history as it was before. self is
439
+ # given as a parameter to the block. Returns the value of the block.
440
+
441
+ def transact
442
+ history_backup = @agent.history.dup
443
+ begin
444
+ yield self
445
+ ensure
446
+ @agent.history = history_backup
447
+ end
448
+ end
449
+
450
+ # :section: Settings
451
+ #
452
+ # Settings that adjust how mechanize makes HTTP requests including timeouts,
453
+ # keep-alives, compression, redirects and headers.
454
+
455
+ @html_parser = Nokogiri::HTML
456
+
457
+ class << self
458
+
459
+ ##
460
+ # Default HTML parser for all mechanize instances
461
+ #
462
+ # Mechanize.html_parser = Nokogiri::XML
463
+
464
+ attr_accessor :html_parser
465
+
466
+ ##
467
+ # Default logger for all mechanize instances
468
+ #
469
+ # Mechanize.log = Logger.new $stderr
470
+
471
+ attr_accessor :log
472
+
473
+ end
474
+
475
+ ##
476
+ # A default encoding name used when parsing HTML parsing. When set it is
477
+ # used after any other encoding. The default is nil.
478
+
479
+ attr_accessor :default_encoding
480
+
481
+ ##
482
+ # Overrides the encodings given by the HTTP server and the HTML page with
483
+ # the default_encoding when set to true.
484
+
485
+ attr_accessor :force_default_encoding
486
+
487
+ ##
488
+ # The HTML parser to be used when parsing documents
489
+
490
+ attr_accessor :html_parser
49
491
 
492
+ ##
50
493
  # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
51
494
  # now uses net-http-persistent which only supports HTTP/1.1 persistent
52
495
  # connections
496
+
53
497
  attr_accessor :keep_alive_time
54
498
 
55
499
  ##
56
- # User Agent aliases
500
+ # The HTTP proxy address
57
501
 
58
- AGENT_ALIASES = {
59
- 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
60
- 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
61
- 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
62
- 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
63
- 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
64
- 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
65
- 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
66
- 'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
67
- 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
68
- 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
69
- 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
70
- }
502
+ attr_reader :proxy_addr
71
503
 
72
- # A Mechanize::CookieJar which stores cookies
504
+ ##
505
+ # The HTTP proxy password
73
506
 
74
- def cookie_jar
75
- @agent.cookie_jar
76
- end
507
+ attr_reader :proxy_pass
77
508
 
78
- def cookie_jar= cookie_jar
79
- @agent.cookie_jar = cookie_jar
80
- end
509
+ ##
510
+ # The HTTP proxy port
81
511
 
82
- # Length of time to wait until a connection is opened in seconds
83
- def open_timeout
84
- @agent.open_timeout
85
- end
512
+ attr_reader :proxy_port
86
513
 
87
- def open_timeout= open_timeout
88
- @agent.open_timeout = open_timeout
89
- end
514
+ ##
515
+ # The HTTP proxy username
90
516
 
91
- # Length of time to attempt to read data from the server
92
- def read_timeout
93
- @agent.read_timeout
94
- end
517
+ attr_reader :proxy_user
95
518
 
96
- def read_timeout= read_timeout
97
- @agent.read_timeout = read_timeout
98
- end
519
+ ##
520
+ # Sets the user and password to be used for HTTP authentication.
99
521
 
100
- # The identification string for the client initiating a web request
101
- def user_agent
102
- @agent.user_agent
522
+ def auth(user, password)
523
+ @agent.user = user
524
+ @agent.password = password
103
525
  end
104
526
 
105
- # The value of watch_for_set is passed to pluggable parsers for retrieved
106
- # content
107
- attr_accessor :watch_for_set
527
+ alias basic_auth auth
108
528
 
109
- # Path to an OpenSSL server certificate file
110
- def ca_file
111
- @agent.ca_file
112
- end
529
+ ##
530
+ # Are If-Modified-Since conditional requests enabled?
113
531
 
114
- def ca_file= ca_file
115
- @agent.ca_file = ca_file
532
+ def conditional_requests
533
+ @agent.conditional_requests
116
534
  end
117
535
 
118
- def certificate
119
- @agent.certificate
120
- end
536
+ ##
537
+ # Disables If-Modified-Since conditional requests (enabled by default)
121
538
 
122
- # An OpenSSL private key or the path to a private key
123
- def key
124
- @agent.key
539
+ def conditional_requests= enabled
540
+ @agent.conditional_requests = enabled
125
541
  end
126
542
 
127
- def key= key
128
- @agent.key = key
543
+ ##
544
+ # A Mechanize::CookieJar which stores cookies
545
+
546
+ def cookie_jar
547
+ @agent.cookie_jar
129
548
  end
130
549
 
131
- # An OpenSSL client certificate or the path to a certificate file.
132
- def cert
133
- @agent.cert
550
+ ##
551
+ # Replaces the cookie jar with +cookie_jar+
552
+
553
+ def cookie_jar= cookie_jar
554
+ @agent.cookie_jar = cookie_jar
134
555
  end
135
556
 
136
- def cert= cert
137
- @agent.cert = cert
557
+ ##
558
+ # Returns a list of cookies stored in the cookie jar.
559
+
560
+ def cookies
561
+ @agent.cookie_jar.to_a
138
562
  end
139
563
 
140
- # OpenSSL key password
141
- def pass
142
- @agent.pass
564
+ ##
565
+ # Follow HTML meta refresh and HTTP Refresh headers. If set to +:anywhere+
566
+ # meta refresh tags outside of the head element will be followed.
567
+
568
+ def follow_meta_refresh
569
+ @agent.follow_meta_refresh
143
570
  end
144
571
 
145
- def pass= pass
146
- @agent.pass = pass
572
+ ##
573
+ # Controls following of HTML meta refresh and HTTP Refresh headers in
574
+ # responses.
575
+
576
+ def follow_meta_refresh= follow
577
+ @agent.follow_meta_refresh = follow
147
578
  end
148
579
 
149
- # Controls how this agent deals with redirects. The following values are
150
- # allowed:
580
+ ##
581
+ # Follow an HTML meta refresh and HTTP Refresh headers that have no "url="
582
+ # in the content attribute.
151
583
  #
152
- # :all, true:: All 3xx redirects are followed (default)
153
- # :permanent:: Only 301 Moved Permanantly redirects are followed
154
- # false:: No redirects are followed
584
+ # Defaults to false to prevent infinite refresh loops.
155
585
 
156
- def redirect_ok
157
- @agent.redirect_ok
586
+ def follow_meta_refresh_self
587
+ @agent.follow_meta_refresh_self
158
588
  end
159
589
 
160
- def redirect_ok= follow
161
- @agent.redirect_ok = follow
590
+ ##
591
+ # Alters the following of HTML meta refresh and HTTP Refresh headers that
592
+ # point to the same page.
593
+
594
+ def follow_meta_refresh_self= follow
595
+ @agent.follow_meta_refresh_self = follow
162
596
  end
163
597
 
598
+ ##
599
+ # Is gzip compression of responses enabled?
600
+
164
601
  def gzip_enabled
165
602
  @agent.gzip_enabled
166
603
  end
167
604
 
605
+ ##
168
606
  # Disables HTTP/1.1 gzip compression (enabled by default)
607
+
169
608
  def gzip_enabled=enabled
170
609
  @agent.gzip_enabled = enabled
171
610
  end
172
611
 
173
- def conditional_requests
174
- @agent.conditional_requests
175
- end
612
+ ##
613
+ # Connections that have not been used in this many seconds will be reset.
176
614
 
177
- # Disables If-Modified-Since conditional requests (enabled by default)
178
- def conditional_requests= enabled
179
- @agent.conditional_requests = enabled
615
+ def idle_timeout
616
+ @agent.idle_timeout
180
617
  end
181
618
 
182
- # Follow HTML meta refresh. If set to +:anywhere+ meta refresh tags outside
183
- # of the head element will be followed.
184
- def follow_meta_refresh
185
- @agent.follow_meta_refresh
619
+ # Sets the idle timeout to +idle_timeout+. The default timeout is 5
620
+ # seconds. If you experience "too many connection resets", reducing this
621
+ # value may help.
622
+
623
+ def idle_timeout= idle_timeout
624
+ @agent.idle_timeout = idle_timeout
186
625
  end
187
626
 
188
- def follow_meta_refresh= follow
189
- @agent.follow_meta_refresh = follow
627
+ ##
628
+ # Are HTTP/1.1 keep-alive connections enabled?
629
+
630
+ def keep_alive
631
+ @agent.keep_alive
190
632
  end
191
633
 
192
- # A callback for additional certificate verification. See
193
- # OpenSSL::SSL::SSLContext#verify_callback
634
+ ##
635
+ # Disable HTTP/1.1 keep-alive connections if +enable+ is set to false. If
636
+ # you are experiencing "too many connection resets" errors setting this to
637
+ # false will eliminate them.
194
638
  #
195
- # The callback can be used for debugging or to ignore errors by always
196
- # returning +true+. Specifying nil uses the default method that was valid
197
- # when the SSLContext was created
198
- def verify_callback
199
- @agent.verify_callback
639
+ # You should first investigate reducing idle_timeout.
640
+
641
+ def keep_alive= enable
642
+ @agent.keep_alive = enable
200
643
  end
201
644
 
202
- def verify_callback= verify_callback
203
- @agent.verify_callback = verify_callback
645
+ ##
646
+ # The current logger. If no logger has been set Mechanize.log is used.
647
+
648
+ def log
649
+ @log || Mechanize.log
204
650
  end
205
651
 
206
- attr_accessor :history_added
652
+ ##
653
+ # Sets the +logger+ used by this instance of mechanize
207
654
 
208
- def redirection_limit
209
- @agent.redirection_limit
655
+ def log= logger
656
+ @log = logger
210
657
  end
211
658
 
212
- def redirection_limit= limit
213
- @agent.redirection_limit = limit
659
+ ##
660
+ # Responses larger than this will be written to a Tempfile instead of stored
661
+ # in memory. The default is 10240 bytes
662
+
663
+ def max_file_buffer
664
+ @agent.max_file_buffer
214
665
  end
215
666
 
216
- def scheme_handlers
217
- @agent.scheme_handlers
667
+ ##
668
+ # Sets the maximum size of a response body that will be stored in memory to
669
+ # +bytes+
670
+
671
+ def max_file_buffer= bytes
672
+ @agent.max_file_buffer = bytes
218
673
  end
219
674
 
220
- def scheme_handlers= scheme_handlers
221
- @agent.scheme_handlers = scheme_handlers
675
+ ##
676
+ # Length of time to wait until a connection is opened in seconds
677
+
678
+ def open_timeout
679
+ @agent.open_timeout
222
680
  end
223
681
 
224
- # A hash of custom request headers
225
- def request_headers
226
- @agent.request_headers
682
+ ##
683
+ # Sets the connection open timeout to +open_timeout+
684
+
685
+ def open_timeout= open_timeout
686
+ @agent.open_timeout = open_timeout
227
687
  end
228
688
 
229
- def request_headers= request_headers
230
- @agent.request_headers = request_headers
689
+ ##
690
+ # Length of time to wait for data from the server
691
+
692
+ def read_timeout
693
+ @agent.read_timeout
231
694
  end
232
695
 
233
- # Proxy settings
234
- attr_reader :proxy_addr
235
- attr_reader :proxy_pass
236
- attr_reader :proxy_port
237
- attr_reader :proxy_user
696
+ ##
697
+ # Sets the timeout for each chunk of data read from the server to
698
+ # +read_timeout+. A single request may read many chunks of data.
238
699
 
239
- # The HTML parser to be used when parsing documents
240
- attr_accessor :html_parser
700
+ def read_timeout= read_timeout
701
+ @agent.read_timeout = read_timeout
702
+ end
241
703
 
242
- attr_reader :agent # :nodoc:
704
+ ##
705
+ # Controls how mechanize deals with redirects. The following values are
706
+ # allowed:
707
+ #
708
+ # :all, true:: All 3xx redirects are followed (default)
709
+ # :permanent:: Only 301 Moved Permanantly redirects are followed
710
+ # false:: No redirects are followed
243
711
 
244
- def history
245
- @agent.history
712
+ def redirect_ok
713
+ @agent.redirect_ok
246
714
  end
247
715
 
248
- attr_reader :pluggable_parser
716
+ alias follow_redirect? redirect_ok
249
717
 
250
- # A list of hooks to call after retrieving a response. Hooks are called with
251
- # the agent and the response returned.
718
+ ##
719
+ # Sets the mechanize redirect handling policy. See redirect_ok for allowed
720
+ # values
252
721
 
253
- def post_connect_hooks
254
- @agent.post_connect_hooks
722
+ def redirect_ok= follow
723
+ @agent.redirect_ok = follow
255
724
  end
256
725
 
257
- # A list of hooks to call before making a request. Hooks are called with
258
- # the agent and the request to be performed.
726
+ ##
727
+ # Maximum number of redirections to follow
259
728
 
260
- def pre_connect_hooks
261
- @agent.pre_connect_hooks
729
+ def redirection_limit
730
+ @agent.redirection_limit
262
731
  end
263
732
 
264
- alias follow_redirect? redirect_ok
733
+ ##
734
+ # Sets the maximum number of redirections to follow to +limit+
265
735
 
266
- @html_parser = Nokogiri::HTML
267
- class << self
268
- attr_accessor :html_parser, :log
736
+ def redirection_limit= limit
737
+ @agent.redirection_limit = limit
738
+ end
269
739
 
270
- def inherited(child)
271
- child.html_parser ||= html_parser
272
- child.log ||= log
273
- super
274
- end
740
+ ##
741
+ # A hash of custom request headers that will be sent on every request
742
+
743
+ def request_headers
744
+ @agent.request_headers
275
745
  end
276
746
 
277
- # A default encoding name used when parsing HTML parsing. When set it is
278
- # used after any other encoding. The default is nil.
747
+ ##
748
+ # Replaces the custom request headers that will be sent on every request
749
+ # with +request_headers+
279
750
 
280
- attr_accessor :default_encoding
751
+ def request_headers= request_headers
752
+ @agent.request_headers = request_headers
753
+ end
281
754
 
282
- # Overrides the encodings given by the HTTP server and the HTML page with
283
- # the default_encoding when set to true.
284
- attr_accessor :force_default_encoding
755
+ ##
756
+ # Retry POST and other non-idempotent requests. See RFC 2616 9.1.2.
285
757
 
286
- def initialize
287
- @agent = Mechanize::HTTP::Agent.new
288
- @agent.context = self
758
+ def retry_change_requests
759
+ @agent.retry_change_requests
760
+ end
289
761
 
290
- # attr_accessors
291
- @agent.user_agent = AGENT_ALIASES['Mechanize']
292
- @watch_for_set = nil
293
- @history_added = nil
762
+ ##
763
+ # When setting +retry_change_requests+ to true you are stating that, for all
764
+ # the URLs you access with mechanize, making POST and other non-idempotent
765
+ # requests is safe and will not cause data duplication or other harmful
766
+ # results.
767
+ #
768
+ # If you are experiencing "too many connection resets" errors you should
769
+ # instead investigate reducing the idle_timeout or disabling keep_alive
770
+ # connections.
294
771
 
295
- # attr_readers
296
- @pluggable_parser = PluggableParser.new
772
+ def retry_change_requests= retry_change_requests
773
+ @agent.retry_change_requests = retry_change_requests
774
+ end
297
775
 
298
- @keep_alive = true
299
- @keep_alive_time = 0
776
+ ##
777
+ # Will <code>/robots.txt</code> files be obeyed?
300
778
 
301
- # Proxy
302
- @proxy_addr = nil
303
- @proxy_port = nil
304
- @proxy_user = nil
305
- @proxy_pass = nil
779
+ def robots
780
+ @agent.robots
781
+ end
306
782
 
307
- @html_parser = self.class.html_parser
783
+ ##
784
+ # When +enabled+ mechanize will retrieve and obey <code>robots.txt</code>
785
+ # files
308
786
 
309
- @default_encoding = nil
310
- @force_default_encoding = false
787
+ def robots= enabled
788
+ @agent.robots = enabled
789
+ end
311
790
 
312
- yield self if block_given?
791
+ ##
792
+ # The handlers for HTTP and other URI protocols.
313
793
 
314
- @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
315
- @agent.set_http
794
+ def scheme_handlers
795
+ @agent.scheme_handlers
316
796
  end
317
797
 
318
- def max_history
319
- @agent.history.max_size
798
+ ##
799
+ # Replaces the URI scheme handler table with +scheme_handlers+
800
+
801
+ def scheme_handlers= scheme_handlers
802
+ @agent.scheme_handlers = scheme_handlers
320
803
  end
321
804
 
322
- def max_history= length
323
- @agent.history.max_size = length
805
+ ##
806
+ # The identification string for the client initiating a web request
807
+
808
+ def user_agent
809
+ @agent.user_agent
324
810
  end
325
811
 
326
- def log=(l); self.class.log = l end
327
- def log; self.class.log end
812
+ ##
813
+ # Sets the User-Agent used by mechanize to +user_agent+. See also
814
+ # user_agent_alias
328
815
 
329
816
  def user_agent= user_agent
330
817
  @agent.user_agent = user_agent
331
818
  end
332
819
 
333
- # Set the user agent for the Mechanize object. See AGENT_ALIASES
334
- def user_agent_alias=(al)
335
- self.user_agent = AGENT_ALIASES[al] ||
336
- raise(ArgumentError, "unknown agent alias #{al.inspect}")
337
- end
820
+ ##
821
+ # Set the user agent for the Mechanize object based on the given +name+.
822
+ #
823
+ # See also AGENT_ALIASES
338
824
 
339
- # Returns a list of cookies stored in the cookie jar.
340
- def cookies
341
- @agent.cookie_jar.to_a
825
+ def user_agent_alias= name
826
+ self.user_agent = AGENT_ALIASES[name] ||
827
+ raise(ArgumentError, "unknown agent alias #{name.inspect}")
342
828
  end
343
829
 
344
- # Sets the user and password to be used for authentication.
345
- def auth(user, password)
346
- @agent.user = user
347
- @agent.password = password
830
+ ##
831
+ # The value of watch_for_set is passed to pluggable parsers for retrieved
832
+ # content
833
+
834
+ attr_accessor :watch_for_set
835
+
836
+ # :section: SSL
837
+ #
838
+ # SSL settings for mechanize. These must be set in the block given to
839
+ # Mechanize.new
840
+
841
+ ##
842
+ # Path to an OpenSSL server certificate file
843
+
844
+ def ca_file
845
+ @agent.ca_file
348
846
  end
349
847
 
350
- alias :basic_auth :auth
848
+ ##
849
+ # Sets the certificate file used for SSL connections
351
850
 
352
- # Fetches the URL passed in and returns a page.
353
- def get(uri, parameters = [], referer = nil, headers = {})
354
- method = :get
851
+ def ca_file= ca_file
852
+ @agent.ca_file = ca_file
853
+ end
355
854
 
356
- if Hash === uri then
357
- options = uri
358
- location = Gem.location_of_caller.join ':'
359
- warn "#{location}: Mechanize#get with options hash is deprecated and will be removed October 2011"
855
+ ##
856
+ # An OpenSSL client certificate or the path to a certificate file.
360
857
 
361
- raise ArgumentError, "url must be specified" unless uri = options[:url]
362
- parameters = options[:params] || []
363
- referer = options[:referer]
364
- headers = options[:headers]
365
- method = options[:verb] || method
366
- end
858
+ def cert
859
+ @agent.cert
860
+ end
367
861
 
368
- referer ||=
369
- if uri.to_s =~ %r{\Ahttps?://}
370
- Page.new(nil, {'content-type'=>'text/html'})
371
- else
372
- current_page || Page.new(nil, {'content-type'=>'text/html'})
373
- end
862
+ ##
863
+ # Sets the OpenSSL client certificate +cert+ to the given path or
864
+ # certificate instance
374
865
 
375
- # FIXME: Huge hack so that using a URI as a referer works. I need to
376
- # refactor everything to pass around URIs but still support
377
- # Mechanize::Page#base
378
- unless referer.is_a?(Mechanize::File)
379
- referer = referer.is_a?(String) ?
380
- Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
381
- Page.new(referer, {'content-type' => 'text/html'})
382
- end
866
+ def cert= cert
867
+ @agent.cert = cert
868
+ end
383
869
 
384
- # fetch the page
385
- headers ||= {}
386
- page = @agent.fetch uri, method, headers, parameters, referer
387
- add_to_history(page)
388
- yield page if block_given?
389
- page
870
+ ##
871
+ # An OpenSSL certificate store for verifying server certificates. This
872
+ # defaults to the default certificate store.
873
+
874
+ def cert_store
875
+ @agent.cert_store
390
876
  end
391
877
 
392
878
  ##
393
- # PUT to +url+ with +entity+, and setting +headers+:
394
- #
395
- # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
396
- #
397
- def put(url, entity, headers = {})
398
- request_with_entity(:put, url, entity, headers)
879
+ # Sets the OpenSSL certificate store to +store+.
880
+
881
+ def cert_store= cert_store
882
+ @agent.cert_store = cert_store
399
883
  end
400
884
 
401
885
  ##
402
- # DELETE to +url+ with +query_params+, and setting +headers+:
403
- #
404
- # delete('http://example/', {'q' => 'foo'}, {})
886
+ # What is this?
405
887
  #
406
- def delete(uri, query_params = {}, headers = {})
407
- page = @agent.fetch(uri, :delete, headers, query_params)
408
- add_to_history(page)
409
- page
888
+ # Why is it different from #cert?
889
+
890
+ def certificate # :nodoc:
891
+ @agent.certificate
410
892
  end
411
893
 
412
894
  ##
413
- # HEAD to +url+ with +query_params+, and setting +headers+:
414
- #
415
- # head('http://example/', {'q' => 'foo'}, {})
416
- #
417
- def head(uri, query_params = {}, headers = {})
418
- # fetch the page
419
- page = @agent.fetch(uri, :head, headers, query_params)
420
- yield page if block_given?
421
- page
895
+ # An OpenSSL private key or the path to a private key
896
+
897
+ def key
898
+ @agent.key
422
899
  end
423
900
 
424
- # Fetch a file and return the contents of the file.
425
- def get_file(url)
426
- get(url).body
901
+ ##
902
+ # Sets the OpenSSL client +key+ to the given path or key instance
903
+
904
+ def key= key
905
+ @agent.key = key
427
906
  end
428
907
 
429
- # If the parameter is a string, finds the button or link with the
430
- # value of the string and clicks it. Otherwise, clicks the
431
- # Mechanize::Page::Link object passed in. Returns the page fetched.
432
- def click(link)
433
- case link
434
- when Page::Link
435
- referer = link.page || current_page()
436
- if @agent.robots
437
- if (referer.is_a?(Page) && referer.parser.nofollow?) || link.rel?('nofollow')
438
- raise RobotsDisallowedError.new(link.href)
439
- end
440
- end
441
- if link.rel?('noreferrer')
442
- href = @agent.resolve(link.href, link.page || current_page)
443
- referer = Page.new(nil, {'content-type'=>'text/html'})
444
- else
445
- href = link.href
446
- end
447
- get href, [], referer
448
- when String, Regexp
449
- if real_link = page.link_with(:text => link)
450
- click real_link
451
- else
452
- button = nil
453
- form = page.forms.find do |f|
454
- button = f.button_with(:value => link)
455
- button.is_a? Form::Submit
456
- end
457
- submit form, button if form
458
- end
459
- else
460
- referer = current_page()
461
- href = link.respond_to?(:href) ? link.href :
462
- (link['href'] || link['src'])
463
- get href, [], referer
464
- end
908
+ ##
909
+ # OpenSSL client key password
910
+
911
+ def pass
912
+ @agent.pass
465
913
  end
466
914
 
467
- # Equivalent to the browser back button. Returns the most recent page
468
- # visited.
469
- def back
470
- @agent.history.pop
915
+ ##
916
+ # Sets the client key password to +pass+
917
+
918
+ def pass= pass
919
+ @agent.pass = pass
471
920
  end
472
921
 
473
- # Posts to the given URL with the request entity. The request
474
- # entity is specified by either a string, or a list of key-value
475
- # pairs represented by a hash or an array of arrays.
476
- #
477
- # Examples:
478
- # agent.post('http://example.com/', "foo" => "bar")
479
- #
480
- # agent.post('http://example.com/', [ ["foo", "bar"] ])
922
+ ##
923
+ # A callback for additional certificate verification. See
924
+ # OpenSSL::SSL::SSLContext#verify_callback
481
925
  #
482
- # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
483
- def post(url, query={}, headers={})
484
- if query.is_a?(String)
485
- return request_with_entity(:post, url, query, headers)
486
- end
487
- node = {}
488
- # Create a fake form
489
- class << node
490
- def search(*args); []; end
491
- end
492
- node['method'] = 'POST'
493
- node['enctype'] = 'application/x-www-form-urlencoded'
494
-
495
- form = Form.new(node)
926
+ # The callback can be used for debugging or to ignore errors by always
927
+ # returning +true+. Specifying nil uses the default method that was valid
928
+ # when the SSLContext was created
496
929
 
497
- query.each { |k, v|
498
- if v.is_a?(IO)
499
- form.enctype = 'multipart/form-data'
500
- ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
501
- ul.file_data = v.read
502
- form.file_uploads << ul
503
- else
504
- form.fields << Form::Field.new({'name' => k.to_s},v)
505
- end
506
- }
507
- post_form(url, form, headers)
930
+ def verify_callback
931
+ @agent.verify_callback
508
932
  end
509
933
 
510
- # Submit a form with an optional button.
511
- # Without a button:
512
- # page = agent.get('http://example.com')
513
- # agent.submit(page.forms.first)
514
- # With a button
515
- # agent.submit(page.forms.first, page.forms.first.buttons.first)
516
- def submit(form, button=nil, headers={})
517
- form.add_button_to_query(button) if button
518
- case form.method.upcase
519
- when 'POST'
520
- post_form(form.action, form, headers)
521
- when 'GET'
522
- get(form.action.gsub(/\?[^\?]*$/, ''),
523
- form.build_query,
524
- form.page,
525
- headers)
526
- else
527
- raise ArgumentError, "unsupported method: #{form.method.upcase}"
528
- end
529
- end
934
+ ##
935
+ # Sets the OpenSSL certificate verification callback
530
936
 
531
- def request_with_entity(verb, uri, entity, headers = {})
532
- cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
937
+ def verify_callback= verify_callback
938
+ @agent.verify_callback = verify_callback
939
+ end
533
940
 
534
- headers = {
535
- 'Content-Type' => 'application/octet-stream',
536
- 'Content-Length' => entity.size.to_s,
537
- }.update headers
941
+ ##
942
+ # the OpenSSL server certificate verification method. The default is
943
+ # OpenSSL::SSL::VERIFY_PEER and certificate verification uses the default
944
+ # system certificates. See also cert_store
538
945
 
539
- page = @agent.fetch uri, verb, headers, [entity], cur_page
540
- add_to_history(page)
541
- page
946
+ def verify_mode
947
+ @agent.verify_mode
542
948
  end
543
949
 
544
- # Returns the current page loaded by Mechanize
545
- def current_page
546
- @agent.current_page
950
+ ##
951
+ # Sets the OpenSSL server certificate verification method.
952
+
953
+ def verify_mode= verify_mode
954
+ @agent.verify_mode = verify_mode
547
955
  end
548
956
 
549
- # Returns a visited page for the url passed in, otherwise nil
550
- def visited_page(url)
551
- url = url.href if url.respond_to? :href
957
+ # :section: Utilities
552
958
 
553
- @agent.visited_page url
554
- end
959
+ attr_reader :agent # :nodoc:
555
960
 
556
- # Returns whether or not a url has been visited
557
- alias visited? visited_page
961
+ attr_reader :pluggable_parser # :nodoc:
962
+
963
+ ##
964
+ # Parses the +body+ of the +response+ from +uri+ using the pluggable parser
965
+ # that matches its content type
558
966
 
559
967
  def parse uri, response, body
560
968
  content_type = nil
@@ -567,6 +975,15 @@ class Mechanize
567
975
  # Find our pluggable parser
568
976
  parser_klass = @pluggable_parser.parser content_type
569
977
 
978
+ unless Mechanize::Download === parser_klass then
979
+ body = case body
980
+ when IO, Tempfile, StringIO then
981
+ body.read
982
+ else
983
+ body
984
+ end
985
+ end
986
+
570
987
  parser_klass.new uri, response, body, response.code do |parser|
571
988
  parser.mech = self if parser.respond_to? :mech=
572
989
 
@@ -575,6 +992,15 @@ class Mechanize
575
992
  end
576
993
  end
577
994
 
995
+ def pretty_print(q) # :nodoc:
996
+ q.object_group(self) {
997
+ q.breakable
998
+ q.pp cookie_jar
999
+ q.breakable
1000
+ q.pp current_page
1001
+ }
1002
+ end
1003
+
578
1004
  ##
579
1005
  # Sets the proxy +address+ at +port+ with an optional +user+ and +password+
580
1006
 
@@ -588,29 +1014,11 @@ class Mechanize
588
1014
  @agent.set_http
589
1015
  end
590
1016
 
591
- # Runs given block, then resets the page history as it was before. self is
592
- # given as a parameter to the block. Returns the value of the block.
593
- def transact
594
- history_backup = @agent.history.dup
595
- begin
596
- yield self
597
- ensure
598
- @agent.history = history_backup
599
- end
600
- end
601
-
602
- def robots
603
- @agent.robots
604
- end
605
-
606
- def robots= enabled
607
- @agent.robots = enabled
608
- end
609
-
610
- alias :page :current_page
611
-
612
1017
  private
613
1018
 
1019
+ ##
1020
+ # Posts +form+ to +uri+
1021
+
614
1022
  def post_form(uri, form, headers = {})
615
1023
  cur_page = form.page || current_page ||
616
1024
  Page.new(nil, {'content-type'=>'text/html'})
@@ -630,6 +1038,9 @@ class Mechanize
630
1038
  page
631
1039
  end
632
1040
 
1041
+ ##
1042
+ # Adds +page+ to the history
1043
+
633
1044
  def add_to_history(page)
634
1045
  @agent.history.push(page, @agent.resolve(page.uri))
635
1046
  @history_added.call(page) if @history_added
@@ -640,6 +1051,8 @@ end
640
1051
  require 'mechanize/content_type_error'
641
1052
  require 'mechanize/cookie'
642
1053
  require 'mechanize/cookie_jar'
1054
+ require 'mechanize/parser'
1055
+ require 'mechanize/download'
643
1056
  require 'mechanize/file'
644
1057
  require 'mechanize/file_connection'
645
1058
  require 'mechanize/file_request'
@@ -648,13 +1061,17 @@ require 'mechanize/form'
648
1061
  require 'mechanize/history'
649
1062
  require 'mechanize/http'
650
1063
  require 'mechanize/http/agent'
1064
+ require 'mechanize/http/auth_challenge'
1065
+ require 'mechanize/http/auth_realm'
1066
+ require 'mechanize/http/content_disposition_parser'
1067
+ require 'mechanize/http/www_authenticate_parser'
651
1068
  require 'mechanize/page'
652
- require 'mechanize/inspect'
653
1069
  require 'mechanize/monkey_patch'
654
1070
  require 'mechanize/pluggable_parsers'
655
1071
  require 'mechanize/redirect_limit_reached_error'
656
1072
  require 'mechanize/redirect_not_get_or_head_error'
657
1073
  require 'mechanize/response_code_error'
1074
+ require 'mechanize/unauthorized_error'
658
1075
  require 'mechanize/response_read_error'
659
1076
  require 'mechanize/robots_disallowed_error'
660
1077
  require 'mechanize/unsupported_scheme_error'