tenderlove-mechanize 0.9.3.20090617085936

Sign up to get free protection for your applications and to get access to all the features.
Files changed (173) hide show
  1. data/CHANGELOG.rdoc +496 -0
  2. data/EXAMPLES.rdoc +171 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +122 -0
  5. data/LICENSE.rdoc +340 -0
  6. data/Manifest.txt +169 -0
  7. data/README.rdoc +60 -0
  8. data/Rakefile +43 -0
  9. data/examples/flickr_upload.rb +23 -0
  10. data/examples/mech-dump.rb +7 -0
  11. data/examples/proxy_req.rb +9 -0
  12. data/examples/rubyforge.rb +21 -0
  13. data/examples/spider.rb +11 -0
  14. data/lib/mechanize.rb +7 -0
  15. data/lib/www/mechanize/chain/auth_headers.rb +80 -0
  16. data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
  17. data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
  18. data/lib/www/mechanize/chain/custom_headers.rb +23 -0
  19. data/lib/www/mechanize/chain/handler.rb +9 -0
  20. data/lib/www/mechanize/chain/header_resolver.rb +53 -0
  21. data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
  22. data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
  23. data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
  24. data/lib/www/mechanize/chain/request_resolver.rb +32 -0
  25. data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
  26. data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
  27. data/lib/www/mechanize/chain/response_reader.rb +41 -0
  28. data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
  29. data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
  30. data/lib/www/mechanize/chain.rb +34 -0
  31. data/lib/www/mechanize/content_type_error.rb +16 -0
  32. data/lib/www/mechanize/cookie.rb +72 -0
  33. data/lib/www/mechanize/cookie_jar.rb +191 -0
  34. data/lib/www/mechanize/file.rb +73 -0
  35. data/lib/www/mechanize/file_response.rb +62 -0
  36. data/lib/www/mechanize/file_saver.rb +39 -0
  37. data/lib/www/mechanize/form/button.rb +8 -0
  38. data/lib/www/mechanize/form/check_box.rb +13 -0
  39. data/lib/www/mechanize/form/field.rb +28 -0
  40. data/lib/www/mechanize/form/file_upload.rb +24 -0
  41. data/lib/www/mechanize/form/image_button.rb +23 -0
  42. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  43. data/lib/www/mechanize/form/option.rb +51 -0
  44. data/lib/www/mechanize/form/radio_button.rb +38 -0
  45. data/lib/www/mechanize/form/select_list.rb +45 -0
  46. data/lib/www/mechanize/form.rb +360 -0
  47. data/lib/www/mechanize/headers.rb +12 -0
  48. data/lib/www/mechanize/history.rb +67 -0
  49. data/lib/www/mechanize/inspect.rb +90 -0
  50. data/lib/www/mechanize/monkey_patch.rb +37 -0
  51. data/lib/www/mechanize/page/base.rb +10 -0
  52. data/lib/www/mechanize/page/frame.rb +22 -0
  53. data/lib/www/mechanize/page/link.rb +50 -0
  54. data/lib/www/mechanize/page/meta.rb +51 -0
  55. data/lib/www/mechanize/page.rb +176 -0
  56. data/lib/www/mechanize/pluggable_parsers.rb +103 -0
  57. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  58. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
  59. data/lib/www/mechanize/response_code_error.rb +25 -0
  60. data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
  61. data/lib/www/mechanize/util.rb +76 -0
  62. data/lib/www/mechanize.rb +619 -0
  63. data/mechanize.gemspec +41 -0
  64. data/test/chain/test_argument_validator.rb +14 -0
  65. data/test/chain/test_auth_headers.rb +25 -0
  66. data/test/chain/test_custom_headers.rb +18 -0
  67. data/test/chain/test_header_resolver.rb +28 -0
  68. data/test/chain/test_parameter_resolver.rb +35 -0
  69. data/test/chain/test_request_resolver.rb +29 -0
  70. data/test/chain/test_response_reader.rb +24 -0
  71. data/test/data/htpasswd +1 -0
  72. data/test/data/server.crt +16 -0
  73. data/test/data/server.csr +12 -0
  74. data/test/data/server.key +15 -0
  75. data/test/data/server.pem +15 -0
  76. data/test/helper.rb +129 -0
  77. data/test/htdocs/alt_text.html +10 -0
  78. data/test/htdocs/bad_form_test.html +9 -0
  79. data/test/htdocs/button.jpg +0 -0
  80. data/test/htdocs/empty_form.html +6 -0
  81. data/test/htdocs/file_upload.html +26 -0
  82. data/test/htdocs/find_link.html +41 -0
  83. data/test/htdocs/form_multi_select.html +16 -0
  84. data/test/htdocs/form_multival.html +37 -0
  85. data/test/htdocs/form_no_action.html +18 -0
  86. data/test/htdocs/form_no_input_name.html +16 -0
  87. data/test/htdocs/form_select.html +16 -0
  88. data/test/htdocs/form_select_all.html +16 -0
  89. data/test/htdocs/form_select_none.html +17 -0
  90. data/test/htdocs/form_select_noopts.html +10 -0
  91. data/test/htdocs/form_set_fields.html +14 -0
  92. data/test/htdocs/form_test.html +188 -0
  93. data/test/htdocs/frame_test.html +30 -0
  94. data/test/htdocs/google.html +13 -0
  95. data/test/htdocs/iframe_test.html +16 -0
  96. data/test/htdocs/index.html +6 -0
  97. data/test/htdocs/link with space.html +5 -0
  98. data/test/htdocs/meta_cookie.html +11 -0
  99. data/test/htdocs/no_title_test.html +6 -0
  100. data/test/htdocs/relative/tc_relative_links.html +21 -0
  101. data/test/htdocs/tc_bad_links.html +5 -0
  102. data/test/htdocs/tc_base_link.html +8 -0
  103. data/test/htdocs/tc_blank_form.html +11 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_links.html +18 -0
  109. data/test/htdocs/tc_no_attributes.html +16 -0
  110. data/test/htdocs/tc_pretty_print.html +17 -0
  111. data/test/htdocs/tc_radiobuttons.html +17 -0
  112. data/test/htdocs/tc_referer.html +10 -0
  113. data/test/htdocs/tc_relative_links.html +19 -0
  114. data/test/htdocs/tc_textarea.html +23 -0
  115. data/test/htdocs/unusual______.html +5 -0
  116. data/test/servlets.rb +365 -0
  117. data/test/ssl_server.rb +48 -0
  118. data/test/test_authenticate.rb +71 -0
  119. data/test/test_bad_links.rb +25 -0
  120. data/test/test_blank_form.rb +16 -0
  121. data/test/test_checkboxes.rb +61 -0
  122. data/test/test_content_type.rb +13 -0
  123. data/test/test_cookie_class.rb +338 -0
  124. data/test/test_cookie_jar.rb +362 -0
  125. data/test/test_cookies.rb +123 -0
  126. data/test/test_encoded_links.rb +20 -0
  127. data/test/test_errors.rb +49 -0
  128. data/test/test_follow_meta.rb +108 -0
  129. data/test/test_form_action.rb +44 -0
  130. data/test/test_form_as_hash.rb +61 -0
  131. data/test/test_form_button.rb +38 -0
  132. data/test/test_form_no_inputname.rb +15 -0
  133. data/test/test_forms.rb +564 -0
  134. data/test/test_frames.rb +25 -0
  135. data/test/test_get_headers.rb +52 -0
  136. data/test/test_gzipping.rb +22 -0
  137. data/test/test_hash_api.rb +45 -0
  138. data/test/test_history.rb +142 -0
  139. data/test/test_history_added.rb +16 -0
  140. data/test/test_html_unscape_forms.rb +39 -0
  141. data/test/test_if_modified_since.rb +20 -0
  142. data/test/test_keep_alive.rb +31 -0
  143. data/test/test_links.rb +120 -0
  144. data/test/test_mech.rb +268 -0
  145. data/test/test_mechanize_file.rb +47 -0
  146. data/test/test_meta.rb +65 -0
  147. data/test/test_multi_select.rb +106 -0
  148. data/test/test_no_attributes.rb +13 -0
  149. data/test/test_option.rb +18 -0
  150. data/test/test_page.rb +119 -0
  151. data/test/test_pluggable_parser.rb +145 -0
  152. data/test/test_post_form.rb +34 -0
  153. data/test/test_pretty_print.rb +22 -0
  154. data/test/test_radiobutton.rb +75 -0
  155. data/test/test_redirect_limit_reached.rb +41 -0
  156. data/test/test_redirect_verb_handling.rb +45 -0
  157. data/test/test_referer.rb +39 -0
  158. data/test/test_relative_links.rb +40 -0
  159. data/test/test_request.rb +13 -0
  160. data/test/test_response_code.rb +52 -0
  161. data/test/test_save_file.rb +48 -0
  162. data/test/test_scheme.rb +48 -0
  163. data/test/test_select.rb +106 -0
  164. data/test/test_select_all.rb +15 -0
  165. data/test/test_select_none.rb +15 -0
  166. data/test/test_select_noopts.rb +16 -0
  167. data/test/test_set_fields.rb +44 -0
  168. data/test/test_ssl_server.rb +20 -0
  169. data/test/test_subclass.rb +14 -0
  170. data/test/test_textarea.rb +45 -0
  171. data/test/test_upload.rb +109 -0
  172. data/test/test_verbs.rb +25 -0
  173. metadata +314 -0
data/CHANGELOG.rdoc ADDED
@@ -0,0 +1,496 @@
1
+ = Mechanize CHANGELOG
2
+
3
+ === HEAD
4
+
5
+ * Bug Fixes:
6
+
7
+ * Do not apply encoding if encoding equals 'none' Thanks Akinori MUSHA!
8
+ * Custom request headers may be supplied WWW::Mechanize#request_headers
9
+ RF #24516
10
+ * HTML Parser may be set on a per instance level WWW::Mechanize#html_parser
11
+ RF #24693
12
+ * Fixed string encoding in ruby 1.9. RF #2433
13
+ * Rescuing Zlib::DataErrors (Thanks Kelley Reynolds)
14
+ * Fixing a problem with frozen SSL objects. RF #24950
15
+ * Do not send a referer on meta refresh. RF #24945
16
+ * Fixed a bug with double semi-colons in Content-Disposition headers
17
+ * Properly handling cookies that specify a path. RF #25259
18
+
19
+ === 0.9.2 / 2009/03/05
20
+
21
+ * New Features:
22
+ * Mechanize#submit and Form#submit take arbitrary headers(thanks penguincoder)
23
+
24
+ * Bug Fixes:
25
+ * Fixed a bug with bad cookie parsing
26
+ * Form::RadioButton#click unchecks other buttons (RF #24159)
27
+ * Fixed problems with Iconv (RF #24190, RF #24192, RF #24043)
28
+ * POST parameters should be CGI escaped
29
+ * Made Content-Type match case insensitive (Thanks Kelly Reynolds)
30
+ * Non-string form parameters work
31
+
32
+ === 0.9.1 2009/02/23
33
+
34
+ * New Features:
35
+ * Encoding may be specified for a page: Page#encoding=
36
+
37
+ * Bug Fixes:
38
+ * m17n fixes. ありがとう konn!
39
+ * Fixed a problem with base tags. ありがとう Keisuke
40
+ * HEAD requests do not record in the history
41
+ * Default encoding to ISO-8859-1 instead of ASCII
42
+ * Requests with URI instances should not be polluted RF #23472
43
+ * Nonce count fixed for digest auth requests. Thanks Adrian Slapa!
44
+ * Fixed a referer issue with requests using a uri. RF #23472
45
+ * WAP content types will now be parsed
46
+ * Rescued poorly formatted cookies. Thanks Kelley Reynolds!
47
+
48
+ === 0.9.0
49
+
50
+ * Deprecations
51
+ * WWW::Mechanize::List is gone!
52
+ * Mechanize uses Nokogiri as it's HTML parser but you may switch to
53
+ Hpricot by using WWW::Mechanize.html_parser = Hpricot
54
+
55
+ * Bug Fixes:
56
+ * Nil check on page when base tag is used #23021
57
+
58
+ === 0.8.5
59
+
60
+ * Deprecations
61
+ * WWW::Mechanize::List will be deprecated in 0.9.0, and warnings have
62
+ been added to help you upgrade.
63
+
64
+ * Bug Fixes:
65
+ * Stopped raising EOF exceptions on HEAD requests. ありがとう:HIRAKU Kuroda
66
+ * Fixed exceptions when a logger is set and file:// requests are made.
67
+ * Made Mechanize 1.9 compatible
68
+ * Not setting the port in the host header for SSL sites.
69
+ * Following refresh headers. Thanks Tim Connor!
70
+ * Cookie Jar handles cookie domains containing ports, like
71
+ 'mydomain.com:443' (Thanks Michal Ochman!)
72
+ * Fixing strange uri escaping problems [#22604]
73
+ * Making content-type determintation more robust. (thanks Han Holl!)
74
+ * Dealing with links that are query string only. [#22402]
75
+ * Nokogiri may be dropped in as a replacement.
76
+ WWW::Mechanize.html_parser = Nokogiri::HTML
77
+ * Making sure the correct page is added to the history on meta refresh.
78
+ [#22708]
79
+ * Mechanize#get requests no longer send a referer unless they are relative
80
+ requests.
81
+
82
+ === 0.8.4
83
+
84
+ * Bug Fixes:
85
+ * Setting the port number on the host header.
86
+ * Fixing Authorization headers for picky servers
87
+
88
+ === 0.8.3
89
+
90
+ * Bug Fixes:
91
+ * Making sure logger is set during SSL connections.
92
+
93
+ === 0.8.2
94
+
95
+ * Bug Fixes:
96
+ * Doh! I was accidentally setting headers twice.
97
+
98
+ === 0.8.1
99
+
100
+ * Bug Fixes:
101
+ * Fixed problem with nil pointer when logger is set
102
+
103
+ === 0.8.0
104
+
105
+ * New Features:
106
+ * Lifecycle hooks. Mechanize#pre_connect_hooks, Mechanize#post_connect_hooks
107
+ * file:/// urls are now supported
108
+ * Added Mechanize::Page#link_with, frame_with for searching for links using
109
+ +criteria+.
110
+ * Implementing PUT, DELETE, and HEAD requests
111
+
112
+ * Bug Fixes:
113
+ * Fixed an infinite loop when content-length and body length don't match.
114
+ * Only setting headers once
115
+ * Adding IIS authentication support
116
+
117
+ === 0.7.8
118
+
119
+ * Bug Fixes:
120
+ * Fixed bug when receiving a 304 response (HTTPNotModified) on a page not
121
+ cached in history.
122
+ * #21428 Default to HTML parser for 'application/xhtml+xml' content-type.
123
+ * Fixed an issue where redirects were resending posted data
124
+
125
+ === 0.7.7
126
+
127
+ * New Features:
128
+ * Page#form_with takes a +criteria+ hash.
129
+ * Page#form is changed to Page#form_with
130
+ * Mechanize#get takes custom http headers. Thanks Mike Dalessio!
131
+ * Form#click_button submits a form defaulting to the current button.
132
+ * Form#set_fields now takes a hash. Thanks Tobi!
133
+ * Mechanize#redirection_limit= for setting the max number of redirects.
134
+
135
+ * Bug Fixes:
136
+ * Added more examples. Thanks Robert Jackson.
137
+ * #20480 Making sure the Host header is set.
138
+ * #20672 Making sure cookies with weird semicolons work.
139
+ * Fixed bug with percent signs in urls.
140
+ http://d.hatena.ne.jp/kitamomonga/20080410/ruby_mechanize_percent_url_bug
141
+ * #21132 Not checking for EOF errors on redirect
142
+ * Fixed a weird gzipping error.
143
+ * #21233 Smarter multipart boundry. Thanks Todd Willey!
144
+ * #20097 Supporting meta tag cookies.
145
+
146
+ === 0.7.6
147
+
148
+ * New Features:
149
+ * Added support for reading Mozilla cookie jars. Thanks Chris Riddoch!
150
+ * Moving text, password, hidden, int to default. Thanks Tim Harper!
151
+ * Mechanize#history_added callback for page loads. Thanks Tobi Reif!
152
+ * Mechanize#scheme_handlers callbacks for handling unsupported schemes on
153
+ links.
154
+
155
+ * Bug Fixes:
156
+ * Ignoring scheme case
157
+ http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=470642
158
+ * Not encoding tildes in uris. Thanks Bruno. [#19380]
159
+ * Resetting request bodys when retrying form posts. Thanks Bruno. [#19379]
160
+ * Throwing away keep alive connections on EPIPE and ECONNRESET.
161
+ * Duplicating request headers when retrying a 401. Thanks Hiroshi Ichikawa.
162
+ * Simulating an EOF error when a response length is bad. Thanks Tobias Gruetzmacher.
163
+ http://rubyforge.org/tracker/index.php?func=detail&aid=19178&group_id=1453&atid=5711
164
+ * Defaulting option tags to the inner text.
165
+ http://rubyforge.org/tracker/index.php?func=detail&aid=19976&group_id=1453&atid=5709
166
+ * Supporting blank strings for option values.
167
+ http://rubyforge.org/tracker/index.php?func=detail&aid=19975&group_id=1453&atid=5709
168
+
169
+ === 0.7.5
170
+
171
+ * Fixed a bug when fetching files and not pages. Thanks Mat Schaffer!
172
+
173
+ === 0.7.4
174
+
175
+ * doh!
176
+
177
+ === 0.7.3
178
+
179
+ * Pages are now yielded to a blocks given to WWW::Mechanize#get
180
+ * WWW::Mechanize#get now takes hash arguments for uri parameters.
181
+ * WWW::Mechanize#post takes an IO object as a parameter and posts correctly.
182
+ * Fixing a strange zlib inflate problem on windows
183
+
184
+ === 0.7.2
185
+
186
+ * Handling gzipped responses with no Content-Length header
187
+
188
+ === 0.7.1
189
+
190
+ * Added iPhone to the user agent aliases. [#17572]
191
+ * Fixed a bug with EOF errors in net/http. [#17570]
192
+ * Handling 0 length gzipped responses. [#17471]
193
+
194
+ === 0.7.0
195
+
196
+ * Removed Ruby 1.8.2 support
197
+ * Changed parser to lazily parse links
198
+ * Lazily parsing document
199
+ * Adding verify_callback for SSL requests. Thanks Mike Dalessio!
200
+ * Fixed a bug with Accept-Language header. Thanks Bill Siggelkow.
201
+
202
+ === 0.6.11
203
+
204
+ * Detecting single quotes in meta redirects.
205
+ * Adding pretty inspect for ruby versions > 1.8.4 (Thanks Joel Kociolek)
206
+ http://rubyforge.org/tracker/index.php?func=detail&aid=13150&group_id=1453&atid=5709
207
+ * Fixed bug with file name in multipart posts
208
+ http://rubyforge.org/tracker/?func=detail&aid=15594&group_id=1453&atid=5709
209
+ * Posting forms relative to the originating page. Thanks Mortee.
210
+ * Added a FAQ
211
+ http://rubyforge.org/tracker/?func=detail&aid=15772&group_id=1453&atid=5709
212
+
213
+ === 0.6.10
214
+
215
+ * Made digest authentication work with POSTs.
216
+ * Made sure page was HTML before following meta refreshes.
217
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12260&group_id=1453&atid=5709
218
+ * Made sure that URLS with a host and no path would default to '/' for history
219
+ purposes.
220
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12368&group_id=1453&atid=5709
221
+ * Avoiding memory leaks with transact. Thanks Tobias Gruetzmacher!
222
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12057&group_id=1453&atid=5711
223
+ * Fixing a problem with # signs in the file name. Thanks Tobias Gruetzmacher!
224
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12510&group_id=1453&atid=5711
225
+ * Made sure that blank form values are submitted.
226
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12505&group_id=1453&atid=5709
227
+ * Mechanize now respects the base tag. Thanks Stephan Dale.
228
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12468&group_id=1453&atid=5709
229
+ * Aliasing inspect to pretty_inspect. Thanks Eric Promislow.
230
+ http://rubyforge.org/pipermail/mechanize-users/2007-July/000157.html
231
+
232
+ === 0.6.9
233
+
234
+ * Updating UTF-8 support for urls
235
+ * Adding AREA tags to the links list.
236
+ http://rubyforge.org/pipermail/mechanize-users/2007-May/000140.html
237
+ * WWW::Mechanize#follow_meta_refresh will allow you to automatically follow
238
+ meta refresh tags. [#10032]
239
+ * Adding x-gzip to accepted content-encoding. Thanks Simon Strandgaard
240
+ http://rubyforge.org/tracker/index.php?func=detail&aid=11167&group_id=1453&atid=5711
241
+ * Added Digest Authentication support. Thanks to Ryan Davis and Eric Hodel,
242
+ you get a gold star!
243
+
244
+ === 0.6.8
245
+
246
+ * Keep alive can be shut off now with WWW::Mechanize#keep_alive
247
+ * Conditional requests can be shut off with WWW::Mechanize#conditional_requests
248
+ * Monkey patched Net::HTTP#keep_alive?
249
+ * [#9877] Moved last request time. Thanks Max Stepanov
250
+ * Added WWW::Mechanize::File#save
251
+ * Defaulting file name to URI or Content-Disposition
252
+ * Updating compatability with hpricot
253
+ * Added more unit tests
254
+
255
+ === 0.6.7
256
+
257
+ * Fixed a bug with keep-alive requests
258
+ * [#9549] fixed problem with cookie paths
259
+
260
+ === 0.6.6
261
+
262
+ * Removing hpricot overrides
263
+ * Fixed a bug where alt text can be nil. Thanks Yannick!
264
+ * Unparseable expiration dates in cookies are now treated as session cookies
265
+ * Caching connections
266
+ * Requests now default to keep alive
267
+ * [#9434] Fixed bug where html entities weren't decoded
268
+ * [#9150] Updated mechanize history to deal with redirects
269
+
270
+ === 0.6.5
271
+
272
+ * Copying headers to a hash to prevent memory leaks
273
+ * Speeding up page parsing
274
+ * Aliased fields to elements
275
+ * Adding If-Modified-Since header
276
+ * Added delete_field! to form. Thanks to Sava Chankov
277
+ * Updated uri escaping to support high order characters. Thanks to Henrik Nyh.
278
+ * Better handling relative URIs. Thanks to Henrik Nyh
279
+ * Now handles pipes in URLs
280
+ http://rubyforge.org/tracker/?func=detail&aid=7140&group_id=1453&atid=5709
281
+ * Now escaping html entities in form fields.
282
+ http://rubyforge.org/tracker/?func=detail&aid=7563&group_id=1453&atid=5709
283
+ * Added MSIE 7.0 user agent string
284
+
285
+ === 0.6.4
286
+
287
+ * Adding the "redirect_ok" method to Mechanize to stop mechanize from
288
+ following redirects.
289
+ http://rubyforge.org/tracker/index.php?func=detail&aid=6571&group_id=1453&atid=5712
290
+ * Added protected method Mechanize#set_headers so that subclasses can set
291
+ custom headers.
292
+ http://rubyforge.org/tracker/?func=detail&aid=7208&group_id=1453&atid=5712
293
+ * Aliased Page#referer to Page#page
294
+ * Fixed a bug when clicking relative urls
295
+ http://rubyforge.org/pipermail/mechanize-users/2006-November/000035.html
296
+ * Fixing a bug when bad version or max age is passed to Cookie::parse
297
+ http://rubyforge.org/pipermail/mechanize-users/2006-November/000033.html
298
+ * Fixing a bug with response codes. [#6526]
299
+ * Fixed bug [#6548]. Input type of 'button' was not being added as a button.
300
+ * Fixed bug [#7139]. REXML parser calls hpricot parser by accident
301
+
302
+ === 0.6.3
303
+
304
+ * Added keys and values methods to Form
305
+ * Added has_value? to Form
306
+ * Added a has_field? method to Form
307
+ * The add_field! method on Form now creates a field for you on the form.
308
+ Thanks to Mat Schaffer for the patch.
309
+ http://rubyforge.org/pipermail/mechanize-users/2006-November/000025.html
310
+ * Fixed a bug when form actions have html ecoded entities in them.
311
+ http://rubyforge.org/pipermail/mechanize-users/2006-October/000019.html
312
+ * Fixed a bug when links or frame sources have html encoded entities in the
313
+ href or src.
314
+ * Fixed a bug where '#' symbols are encoded
315
+ http://rubyforge.org/forum/message.php?msg_id=14747
316
+
317
+ === 0.6.2
318
+
319
+ * Added a yield to Page#form so that dealing with forms can be more DSL like.
320
+ * Added the parsed page to the ResponseCodeError so that the parsed results
321
+ can be accessed even in the event of an error.
322
+ http://rubyforge.org/pipermail/mechanize-users/2006-September/000007.html
323
+ * Updated documentation (Thanks to Paul Smith)
324
+
325
+ === 0.6.1
326
+
327
+ * Added a method to Form called "submit". Now forms can be submitted by
328
+ calling a method on the form.
329
+ * Added a click method to links
330
+ * Added an REXML pluggable parser for backwards compatability. To use it,
331
+ just do this:
332
+ agent.pluggable_parser.html = WWW::Mechanize::REXMLPage
333
+ * Fixed a bug with referrers by adding a page attribute to forms and links.
334
+ * Fixed a bug where domain names were case sensitive.
335
+ http://tenderlovemaking.com/2006/09/04/road-to-ruby-mechanize-060/#comment-53
336
+ * Fixed a bug with URI escaped links.
337
+ http://rubyforge.org/pipermail/mechanize-users/2006-September/000002.html
338
+ * Fixed a bug when options in select lists don't have a value. Thanks Dan Higham
339
+ [#5837] Code in lib/mechanize/form_elements.rb is incorrect.
340
+ * Fixed a bug with loading text in to links.
341
+ http://rubyforge.org/pipermail/mechanize-users/2006-September/000000.html
342
+
343
+ === 0.6.0
344
+
345
+ * Changed main parser to use hpricot
346
+ * Made WWW::Mechanize::Page class searchable like hpricot
347
+ * Updated WWW::Mechanize#click to support hpricot links like this:
348
+ @agent.click (page/"a").first
349
+ * Clicking a Frame is now possible:
350
+ @agent.click (page/"frame").first
351
+ * Removed deprecated attr_finder
352
+ * Removed REXML helper methods since the main parser is now hpricot
353
+ * Overhauled cookie parser to use WEBrick::Cookie
354
+
355
+ === 0.5.4
356
+
357
+ * Added WWW::Mechanize#trasact for saving history state between in a
358
+ transaction. See the EXAMPLES file. Thanks Johan Kiviniemi.
359
+ * Added support for gzip compressed pages
360
+ * Forms can now be accessed like a hash. For example, to set the value
361
+ of an input field named 'name' to "Aaron", you can do this:
362
+ form['name'] = "Aaron"
363
+ Or to get the value of a field named 'name', do this:
364
+ puts form['name']
365
+ * File uploads will now read the file specified in FileUpload#file_name
366
+ * FileUpload can use an IO object in FileUpload#file_data
367
+ * Fixed a bug with saving files on windows
368
+ * Fixed a bug with the filename being set in forms
369
+
370
+ === 0.5.3
371
+
372
+ * Mechanize#click will now act on the first element of an array. So if an
373
+ array of links is passed to WWW::Mechanize#click, the first link is clicked.
374
+ That means the syntax for clicking links is shortened and still supports
375
+ selecting a link. The following are equivalent:
376
+ agent.click page.links.first
377
+ agent.click page.links
378
+ * Fixed a bug with spaces in href's and get's
379
+ * Added a tick, untick, and click method to radio buttons so that
380
+ radiobuttons can be "clicked"
381
+ * Added a tick, untick, and click method to check boxes so that
382
+ checkboxes can be "clicked"
383
+ * Options on Select lists can now be "tick"ed, and "untick"ed.
384
+ * Fixed a potential bug conflicting with rails. Thanks Eric Kolve
385
+ * Updated log4r support for a speed increase. Thanks Yinon Bentor
386
+ * Added inspect methods and pretty printing
387
+
388
+ === 0.5.2
389
+
390
+ * Fixed a bug with input names that are nil
391
+ * Added a warning when using attr_finder because attr_finder will be deprecated
392
+ in 0.6.0 in favor of method calls. So this syntax:
393
+ @agent.links(:text => 'foo')
394
+ should be changed to this:
395
+ @agent.links.text('foo')
396
+ * Added support for selecting multiple options in select tags that support
397
+ multiple options. See WWW::Mechanize::MultiSelectList.
398
+ * New select list methods have been added, select_all, select_none.
399
+ * Options for select lists can now be "clicked" which toggles their selection,
400
+ they can be "selected" and "unselected". See WWW::Mechanize::Option
401
+ * Added a method to set multiple fields at the same time,
402
+ WWW::Mechanize::Form#set_fields. Which can be used like so:
403
+ form.set_fields( :foo => 'bar', :name => 'Aaron' )
404
+
405
+ === 0.5.1
406
+
407
+ * Fixed bug with file uploads
408
+ * Added performance tweaks to the cookie class
409
+
410
+ === 0.5.0
411
+
412
+ * Added pluggable parsers. (Thanks to Eric Kolve for the idea)
413
+ * Changed namespace so all classes are under WWW::Mechanize.
414
+ * Updating Forms so that fields can be used as accessors (Thanks Gregory Brown)
415
+ * Added WWW::Mechanize::File as default object used for unknown content types.
416
+ * Added 'save_as' method to Mechanize::File, so any page can be saved.
417
+ * Adding 'save_as' and 'load' to CookieJar so that cookies can be saved
418
+ between sessions.
419
+ * Added WWW::Mechanize::FileSaver pluggable parser to automatically save files.
420
+ * Added WWW::Mechanize::Page#title for page titles
421
+ * Added OpenSSL certificate support (Thanks Mike Dalessio)
422
+ * Removed support for body filters in favor of pluggable parsers.
423
+ * Fixed cookie bug adding a '/' when the url is missing one (Thanks Nick Dainty)
424
+
425
+ === 0.4.7
426
+
427
+ * Fixed bug with no action in forms. Thanks to Adam Wiggins
428
+ * Setting a default user-agent string
429
+ * Added house cleaning to the cookie jar so expired cookies don't stick around.
430
+ * Added new method WWW::Form#field to find the first field with a given name.
431
+ (thanks to Gregory Brown)
432
+ * Added WWW::Mechanize#get_file for fetching non text/html files
433
+
434
+ === 0.4.6
435
+
436
+ * Added support for proxies
437
+ * Added a uri field to WWW::Link
438
+ * Added a error class WWW::Mechanize::ContentTypeError
439
+ * Added image alt text to link text
440
+ * Added an visited? method to WWW::Mechanize
441
+ * Added Array#value= which will set the first value to the argument. That
442
+ allows syntax as such: form.fields.name('q').value = 'xyz'
443
+ Before it was like this: form.fields.name('q').first.value = 'xyz'
444
+
445
+ === 0.4.5
446
+
447
+ * Added support for multiple values of the same name
448
+ * Updated build_query_string to take an array of arrays (Thanks Michal Janeczek)
449
+ * Added WWW::Mechanize#body_filter= so that response bodies can be preprocessed
450
+ * Added WWW::Page#body_filter= so that response bodies can be preprocessed
451
+ * Added support for more date formats in the cookie parser
452
+ * Fixed a bug with empty select lists
453
+ * Fixing a problem with cookies not handling no spaces after semicolons
454
+
455
+ === 0.4.4
456
+
457
+ * Fixed error in method signature, basic_authetication is now basic_auth
458
+ * Fixed bug with encoding names in file uploads (Big thanks to Alex Young)
459
+ * Added options to the select list
460
+
461
+ === 0.4.3
462
+
463
+ * Added syntactic sugar for finding things
464
+ * Fixed bug with HttpOnly option in cookies
465
+ * Fixed a bug with cookie date parsing
466
+ * Defaulted dropdown lists to the first element
467
+ * Added unit tests
468
+
469
+ === 0.4.2
470
+
471
+ * Added support for iframes
472
+ * Made mechanize dependant on ruby-web rather than narf
473
+ * Added unit tests
474
+ * Fixed a bunch of warnings
475
+
476
+ === 0.4.1
477
+
478
+ * Added support for file uploading
479
+ * Added support for frames (Thanks Gabriel[mailto:leerbag@googlemail.com])
480
+ * Added more unit tests
481
+ * Fixed some bugs
482
+
483
+ === 0.4.0
484
+
485
+ * Added more unit tests
486
+ * Added a cookie jar with better cookie support, included expiration of cookies
487
+ and general cookie security.
488
+ * Updated mechanize to use built in net/http if ruby version is new enough.
489
+ * Added support for meta refresh tags
490
+ * Defaulted form actions to 'GET'
491
+ * Fixed various bugs
492
+ * Added more unit tests
493
+ * Added a response code exception
494
+ * Thanks to Brian Ellin (brianellin@gmail.com) for:
495
+ Added support for CA files, and support for 301 response codes
496
+
data/EXAMPLES.rdoc ADDED
@@ -0,0 +1,171 @@
1
+ = WWW::Mechanize examples
2
+
3
+ == Google
4
+ require 'rubygems'
5
+ require 'mechanize'
6
+
7
+ a = WWW::Mechanize.new { |agent|
8
+ agent.user_agent_alias = 'Mac Safari'
9
+ }
10
+
11
+ a.get('http://google.com/') do |page|
12
+ search_result = page.form_with(:name => 'f') do |search|
13
+ search.q = 'Hello world'
14
+ end.submit
15
+
16
+ search_result.links.each do |link|
17
+ puts link.text
18
+ end
19
+ end
20
+
21
+ == Rubyforge
22
+
23
+ a = WWW::Mechanize.new
24
+ a.get('http://rubyforge.org/') do |page|
25
+ # Click the login link
26
+ login_page = a.click(page.links.text(/Log In/))
27
+
28
+ # Submit the login form
29
+ my_page = login_page.form_with(:action => '/account/login.php') do |f|
30
+ f.form_loginname = ARGV[0]
31
+ f.form_pw = ARGV[1]
32
+ end.click_button
33
+
34
+ my_page.links.each do |link|
35
+ text = link.text.strip
36
+ next unless text.length > 0
37
+ puts text
38
+ end
39
+ end
40
+
41
+ == File Upload
42
+ Upload a file to flickr.
43
+
44
+ a = WWW::Mechanize.new { |agent|
45
+ # Flickr refreshes after login
46
+ agent.follow_meta_refresh = true
47
+ }
48
+
49
+ a.get('http://flickr.com/') do |home_page|
50
+ signin_page = a.click(home_page.links.text(/Sign In/))
51
+
52
+ my_page = signin_page.form_with(:name => 'login_form') do |form|
53
+ form.login = ARGV[0]
54
+ form.passwd = ARGV[1]
55
+ end.submit
56
+
57
+ # Click the upload link
58
+ upload_page = a.click(my_page.links.text(/Upload/))
59
+
60
+ # We want the basic upload page.
61
+ upload_page = a.click(upload_page.links.text(/basic Uploader/))
62
+
63
+ # Upload the file
64
+ upload_page.form_with(:method => 'POST') do |upload_form|
65
+ upload_form.file_uploads.first.file_name = ARGV[2]
66
+ end.submit
67
+ end
68
+
69
+ == Pluggable Parsers
70
+ Lets say you want html pages to automatically be parsed with Rubyful Soup.
71
+ This example shows you how:
72
+
73
+ require 'rubygems'
74
+ require 'mechanize'
75
+ require 'rubyful_soup'
76
+
77
+ class SoupParser < WWW::Mechanize::Page
78
+ attr_reader :soup
79
+ def initialize(uri = nil, response = nil, body = nil, code = nil)
80
+ @soup = BeautifulSoup.new(body)
81
+ super(uri, response, body, code)
82
+ end
83
+ end
84
+
85
+ agent = WWW::Mechanize.new
86
+ agent.pluggable_parser.html = SoupParser
87
+
88
+ Now all HTML pages will be parsed with the SoupParser class, and automatically
89
+ give you access to a method called 'soup' where you can get access to the
90
+ Beautiful Soup for that page.
91
+
92
+ == Using a proxy
93
+
94
+ require 'rubygems'
95
+ require 'mechanize'
96
+
97
+ agent = WWW::Mechanize.new
98
+ agent.set_proxy('localhost', '8000')
99
+ page = agent.get(ARGV[0])
100
+ puts page.body
101
+
102
+ == The transact method
103
+
104
+ transact runs the given block and then resets the page history. I.e. after the
105
+ block has been executed, you're back at the original page; no need count how
106
+ many times to call the back method at the end of a loop (while accounting for
107
+ possible exceptions).
108
+
109
+ This example also demonstrates subclassing Mechanize.
110
+
111
+ require 'mechanize'
112
+
113
+ class TestMech < WWW::Mechanize
114
+ def process
115
+ get 'http://rubyforge.org/'
116
+ search_form = page.forms.first
117
+ search_form.words = 'WWW'
118
+ submit search_form
119
+
120
+ page.links_with(:href => %r{/projects/} ).each do |link|
121
+ next if link.href =~ %r{/projects/support/}
122
+
123
+ puts 'Loading %-30s %s' % [link.href, link.text]
124
+ begin
125
+ transact do
126
+ click link
127
+ # Do stuff, maybe click more links.
128
+ end
129
+ # Now we're back at the original page.
130
+
131
+ rescue => e
132
+ $stderr.puts "#{e.class}: #{e.message}"
133
+ end
134
+ end
135
+ end
136
+ end
137
+
138
+ TestMech.new.process
139
+
140
+ == Client Certificate Authentication (Mutual Auth)
141
+
142
+ In most cases a client certificate is created as an additional layer of security
143
+ for certain websites. The specific case that this was initially tested on was
144
+ for automating the download of archived images from a banks (Wachovia) lockbox
145
+ system. Once the certificate is installed into your browser you will have to
146
+ export it and split the certificate and private key into separate files. Exported
147
+ files are usually in .p12 format (IE 7 & Firefox 2.0) which stands for PKCS #12.
148
+ You can convert them from p12 to pem format by using the following commands:
149
+
150
+ openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.key -nocerts -nodes
151
+ openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.cer -nokeys
152
+
153
+ require 'rubygems'
154
+ require 'mechanize'
155
+
156
+ # create Mechanize instance
157
+ agent = WWW::Mechanize.new
158
+
159
+ # set the path of the certificate file
160
+ agent.cert = 'example.cer'
161
+
162
+ # set the path of the private key file
163
+ agent.key = 'example.key'
164
+
165
+ # get the login form & fill it out with the username/password
166
+ login_form = @agent.get("http://example.com/login_page").form('Login')
167
+ login_form.Userid = 'TestUser'
168
+ login_form.Password = 'TestPassword'
169
+
170
+ # submit login form
171
+ agent.submit(login_form, login_form.buttons.first)
data/FAQ.rdoc ADDED
@@ -0,0 +1,11 @@
1
+ Q: I keep getting an EOFError:
2
+ protocol.rb:133:in `sysread': end of file reached (EOFError)
3
+
4
+ A: Some people have experienced an EOFError during normal mechanize usage.
5
+ Most of the time this occurs because the remote website claims to support
6
+ keep alives, but does not implement them correctly. Try turning off
7
+ keep alives on your mechanize object:
8
+
9
+ mech.keep_alive = false
10
+
11
+