tenderlove-mechanize 0.9.3.20090617085936

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. data/CHANGELOG.rdoc +496 -0
  2. data/EXAMPLES.rdoc +171 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +122 -0
  5. data/LICENSE.rdoc +340 -0
  6. data/Manifest.txt +169 -0
  7. data/README.rdoc +60 -0
  8. data/Rakefile +43 -0
  9. data/examples/flickr_upload.rb +23 -0
  10. data/examples/mech-dump.rb +7 -0
  11. data/examples/proxy_req.rb +9 -0
  12. data/examples/rubyforge.rb +21 -0
  13. data/examples/spider.rb +11 -0
  14. data/lib/mechanize.rb +7 -0
  15. data/lib/www/mechanize/chain/auth_headers.rb +80 -0
  16. data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
  17. data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
  18. data/lib/www/mechanize/chain/custom_headers.rb +23 -0
  19. data/lib/www/mechanize/chain/handler.rb +9 -0
  20. data/lib/www/mechanize/chain/header_resolver.rb +53 -0
  21. data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
  22. data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
  23. data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
  24. data/lib/www/mechanize/chain/request_resolver.rb +32 -0
  25. data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
  26. data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
  27. data/lib/www/mechanize/chain/response_reader.rb +41 -0
  28. data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
  29. data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
  30. data/lib/www/mechanize/chain.rb +34 -0
  31. data/lib/www/mechanize/content_type_error.rb +16 -0
  32. data/lib/www/mechanize/cookie.rb +72 -0
  33. data/lib/www/mechanize/cookie_jar.rb +191 -0
  34. data/lib/www/mechanize/file.rb +73 -0
  35. data/lib/www/mechanize/file_response.rb +62 -0
  36. data/lib/www/mechanize/file_saver.rb +39 -0
  37. data/lib/www/mechanize/form/button.rb +8 -0
  38. data/lib/www/mechanize/form/check_box.rb +13 -0
  39. data/lib/www/mechanize/form/field.rb +28 -0
  40. data/lib/www/mechanize/form/file_upload.rb +24 -0
  41. data/lib/www/mechanize/form/image_button.rb +23 -0
  42. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  43. data/lib/www/mechanize/form/option.rb +51 -0
  44. data/lib/www/mechanize/form/radio_button.rb +38 -0
  45. data/lib/www/mechanize/form/select_list.rb +45 -0
  46. data/lib/www/mechanize/form.rb +360 -0
  47. data/lib/www/mechanize/headers.rb +12 -0
  48. data/lib/www/mechanize/history.rb +67 -0
  49. data/lib/www/mechanize/inspect.rb +90 -0
  50. data/lib/www/mechanize/monkey_patch.rb +37 -0
  51. data/lib/www/mechanize/page/base.rb +10 -0
  52. data/lib/www/mechanize/page/frame.rb +22 -0
  53. data/lib/www/mechanize/page/link.rb +50 -0
  54. data/lib/www/mechanize/page/meta.rb +51 -0
  55. data/lib/www/mechanize/page.rb +176 -0
  56. data/lib/www/mechanize/pluggable_parsers.rb +103 -0
  57. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  58. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
  59. data/lib/www/mechanize/response_code_error.rb +25 -0
  60. data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
  61. data/lib/www/mechanize/util.rb +76 -0
  62. data/lib/www/mechanize.rb +619 -0
  63. data/mechanize.gemspec +41 -0
  64. data/test/chain/test_argument_validator.rb +14 -0
  65. data/test/chain/test_auth_headers.rb +25 -0
  66. data/test/chain/test_custom_headers.rb +18 -0
  67. data/test/chain/test_header_resolver.rb +28 -0
  68. data/test/chain/test_parameter_resolver.rb +35 -0
  69. data/test/chain/test_request_resolver.rb +29 -0
  70. data/test/chain/test_response_reader.rb +24 -0
  71. data/test/data/htpasswd +1 -0
  72. data/test/data/server.crt +16 -0
  73. data/test/data/server.csr +12 -0
  74. data/test/data/server.key +15 -0
  75. data/test/data/server.pem +15 -0
  76. data/test/helper.rb +129 -0
  77. data/test/htdocs/alt_text.html +10 -0
  78. data/test/htdocs/bad_form_test.html +9 -0
  79. data/test/htdocs/button.jpg +0 -0
  80. data/test/htdocs/empty_form.html +6 -0
  81. data/test/htdocs/file_upload.html +26 -0
  82. data/test/htdocs/find_link.html +41 -0
  83. data/test/htdocs/form_multi_select.html +16 -0
  84. data/test/htdocs/form_multival.html +37 -0
  85. data/test/htdocs/form_no_action.html +18 -0
  86. data/test/htdocs/form_no_input_name.html +16 -0
  87. data/test/htdocs/form_select.html +16 -0
  88. data/test/htdocs/form_select_all.html +16 -0
  89. data/test/htdocs/form_select_none.html +17 -0
  90. data/test/htdocs/form_select_noopts.html +10 -0
  91. data/test/htdocs/form_set_fields.html +14 -0
  92. data/test/htdocs/form_test.html +188 -0
  93. data/test/htdocs/frame_test.html +30 -0
  94. data/test/htdocs/google.html +13 -0
  95. data/test/htdocs/iframe_test.html +16 -0
  96. data/test/htdocs/index.html +6 -0
  97. data/test/htdocs/link with space.html +5 -0
  98. data/test/htdocs/meta_cookie.html +11 -0
  99. data/test/htdocs/no_title_test.html +6 -0
  100. data/test/htdocs/relative/tc_relative_links.html +21 -0
  101. data/test/htdocs/tc_bad_links.html +5 -0
  102. data/test/htdocs/tc_base_link.html +8 -0
  103. data/test/htdocs/tc_blank_form.html +11 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_links.html +18 -0
  109. data/test/htdocs/tc_no_attributes.html +16 -0
  110. data/test/htdocs/tc_pretty_print.html +17 -0
  111. data/test/htdocs/tc_radiobuttons.html +17 -0
  112. data/test/htdocs/tc_referer.html +10 -0
  113. data/test/htdocs/tc_relative_links.html +19 -0
  114. data/test/htdocs/tc_textarea.html +23 -0
  115. data/test/htdocs/unusual______.html +5 -0
  116. data/test/servlets.rb +365 -0
  117. data/test/ssl_server.rb +48 -0
  118. data/test/test_authenticate.rb +71 -0
  119. data/test/test_bad_links.rb +25 -0
  120. data/test/test_blank_form.rb +16 -0
  121. data/test/test_checkboxes.rb +61 -0
  122. data/test/test_content_type.rb +13 -0
  123. data/test/test_cookie_class.rb +338 -0
  124. data/test/test_cookie_jar.rb +362 -0
  125. data/test/test_cookies.rb +123 -0
  126. data/test/test_encoded_links.rb +20 -0
  127. data/test/test_errors.rb +49 -0
  128. data/test/test_follow_meta.rb +108 -0
  129. data/test/test_form_action.rb +44 -0
  130. data/test/test_form_as_hash.rb +61 -0
  131. data/test/test_form_button.rb +38 -0
  132. data/test/test_form_no_inputname.rb +15 -0
  133. data/test/test_forms.rb +564 -0
  134. data/test/test_frames.rb +25 -0
  135. data/test/test_get_headers.rb +52 -0
  136. data/test/test_gzipping.rb +22 -0
  137. data/test/test_hash_api.rb +45 -0
  138. data/test/test_history.rb +142 -0
  139. data/test/test_history_added.rb +16 -0
  140. data/test/test_html_unscape_forms.rb +39 -0
  141. data/test/test_if_modified_since.rb +20 -0
  142. data/test/test_keep_alive.rb +31 -0
  143. data/test/test_links.rb +120 -0
  144. data/test/test_mech.rb +268 -0
  145. data/test/test_mechanize_file.rb +47 -0
  146. data/test/test_meta.rb +65 -0
  147. data/test/test_multi_select.rb +106 -0
  148. data/test/test_no_attributes.rb +13 -0
  149. data/test/test_option.rb +18 -0
  150. data/test/test_page.rb +119 -0
  151. data/test/test_pluggable_parser.rb +145 -0
  152. data/test/test_post_form.rb +34 -0
  153. data/test/test_pretty_print.rb +22 -0
  154. data/test/test_radiobutton.rb +75 -0
  155. data/test/test_redirect_limit_reached.rb +41 -0
  156. data/test/test_redirect_verb_handling.rb +45 -0
  157. data/test/test_referer.rb +39 -0
  158. data/test/test_relative_links.rb +40 -0
  159. data/test/test_request.rb +13 -0
  160. data/test/test_response_code.rb +52 -0
  161. data/test/test_save_file.rb +48 -0
  162. data/test/test_scheme.rb +48 -0
  163. data/test/test_select.rb +106 -0
  164. data/test/test_select_all.rb +15 -0
  165. data/test/test_select_none.rb +15 -0
  166. data/test/test_select_noopts.rb +16 -0
  167. data/test/test_set_fields.rb +44 -0
  168. data/test/test_ssl_server.rb +20 -0
  169. data/test/test_subclass.rb +14 -0
  170. data/test/test_textarea.rb +45 -0
  171. data/test/test_upload.rb +109 -0
  172. data/test/test_verbs.rb +25 -0
  173. metadata +314 -0
data/CHANGELOG.rdoc ADDED
@@ -0,0 +1,496 @@
1
+ = Mechanize CHANGELOG
2
+
3
+ === HEAD
4
+
5
+ * Bug Fixes:
6
+
7
+ * Do not apply encoding if encoding equals 'none' Thanks Akinori MUSHA!
8
+ * Custom request headers may be supplied WWW::Mechanize#request_headers
9
+ RF #24516
10
+ * HTML Parser may be set on a per instance level WWW::Mechanize#html_parser
11
+ RF #24693
12
+ * Fixed string encoding in ruby 1.9. RF #2433
13
+ * Rescuing Zlib::DataErrors (Thanks Kelley Reynolds)
14
+ * Fixing a problem with frozen SSL objects. RF #24950
15
+ * Do not send a referer on meta refresh. RF #24945
16
+ * Fixed a bug with double semi-colons in Content-Disposition headers
17
+ * Properly handling cookies that specify a path. RF #25259
18
+
19
+ === 0.9.2 / 2009/03/05
20
+
21
+ * New Features:
22
+ * Mechanize#submit and Form#submit take arbitrary headers(thanks penguincoder)
23
+
24
+ * Bug Fixes:
25
+ * Fixed a bug with bad cookie parsing
26
+ * Form::RadioButton#click unchecks other buttons (RF #24159)
27
+ * Fixed problems with Iconv (RF #24190, RF #24192, RF #24043)
28
+ * POST parameters should be CGI escaped
29
+ * Made Content-Type match case insensitive (Thanks Kelly Reynolds)
30
+ * Non-string form parameters work
31
+
32
+ === 0.9.1 2009/02/23
33
+
34
+ * New Features:
35
+ * Encoding may be specified for a page: Page#encoding=
36
+
37
+ * Bug Fixes:
38
+ * m17n fixes. ありがとう konn!
39
+ * Fixed a problem with base tags. ありがとう Keisuke
40
+ * HEAD requests do not record in the history
41
+ * Default encoding to ISO-8859-1 instead of ASCII
42
+ * Requests with URI instances should not be polluted RF #23472
43
+ * Nonce count fixed for digest auth requests. Thanks Adrian Slapa!
44
+ * Fixed a referer issue with requests using a uri. RF #23472
45
+ * WAP content types will now be parsed
46
+ * Rescued poorly formatted cookies. Thanks Kelley Reynolds!
47
+
48
+ === 0.9.0
49
+
50
+ * Deprecations
51
+ * WWW::Mechanize::List is gone!
52
+ * Mechanize uses Nokogiri as it's HTML parser but you may switch to
53
+ Hpricot by using WWW::Mechanize.html_parser = Hpricot
54
+
55
+ * Bug Fixes:
56
+ * Nil check on page when base tag is used #23021
57
+
58
+ === 0.8.5
59
+
60
+ * Deprecations
61
+ * WWW::Mechanize::List will be deprecated in 0.9.0, and warnings have
62
+ been added to help you upgrade.
63
+
64
+ * Bug Fixes:
65
+ * Stopped raising EOF exceptions on HEAD requests. ありがとう:HIRAKU Kuroda
66
+ * Fixed exceptions when a logger is set and file:// requests are made.
67
+ * Made Mechanize 1.9 compatible
68
+ * Not setting the port in the host header for SSL sites.
69
+ * Following refresh headers. Thanks Tim Connor!
70
+ * Cookie Jar handles cookie domains containing ports, like
71
+ 'mydomain.com:443' (Thanks Michal Ochman!)
72
+ * Fixing strange uri escaping problems [#22604]
73
+ * Making content-type determintation more robust. (thanks Han Holl!)
74
+ * Dealing with links that are query string only. [#22402]
75
+ * Nokogiri may be dropped in as a replacement.
76
+ WWW::Mechanize.html_parser = Nokogiri::HTML
77
+ * Making sure the correct page is added to the history on meta refresh.
78
+ [#22708]
79
+ * Mechanize#get requests no longer send a referer unless they are relative
80
+ requests.
81
+
82
+ === 0.8.4
83
+
84
+ * Bug Fixes:
85
+ * Setting the port number on the host header.
86
+ * Fixing Authorization headers for picky servers
87
+
88
+ === 0.8.3
89
+
90
+ * Bug Fixes:
91
+ * Making sure logger is set during SSL connections.
92
+
93
+ === 0.8.2
94
+
95
+ * Bug Fixes:
96
+ * Doh! I was accidentally setting headers twice.
97
+
98
+ === 0.8.1
99
+
100
+ * Bug Fixes:
101
+ * Fixed problem with nil pointer when logger is set
102
+
103
+ === 0.8.0
104
+
105
+ * New Features:
106
+ * Lifecycle hooks. Mechanize#pre_connect_hooks, Mechanize#post_connect_hooks
107
+ * file:/// urls are now supported
108
+ * Added Mechanize::Page#link_with, frame_with for searching for links using
109
+ +criteria+.
110
+ * Implementing PUT, DELETE, and HEAD requests
111
+
112
+ * Bug Fixes:
113
+ * Fixed an infinite loop when content-length and body length don't match.
114
+ * Only setting headers once
115
+ * Adding IIS authentication support
116
+
117
+ === 0.7.8
118
+
119
+ * Bug Fixes:
120
+ * Fixed bug when receiving a 304 response (HTTPNotModified) on a page not
121
+ cached in history.
122
+ * #21428 Default to HTML parser for 'application/xhtml+xml' content-type.
123
+ * Fixed an issue where redirects were resending posted data
124
+
125
+ === 0.7.7
126
+
127
+ * New Features:
128
+ * Page#form_with takes a +criteria+ hash.
129
+ * Page#form is changed to Page#form_with
130
+ * Mechanize#get takes custom http headers. Thanks Mike Dalessio!
131
+ * Form#click_button submits a form defaulting to the current button.
132
+ * Form#set_fields now takes a hash. Thanks Tobi!
133
+ * Mechanize#redirection_limit= for setting the max number of redirects.
134
+
135
+ * Bug Fixes:
136
+ * Added more examples. Thanks Robert Jackson.
137
+ * #20480 Making sure the Host header is set.
138
+ * #20672 Making sure cookies with weird semicolons work.
139
+ * Fixed bug with percent signs in urls.
140
+ http://d.hatena.ne.jp/kitamomonga/20080410/ruby_mechanize_percent_url_bug
141
+ * #21132 Not checking for EOF errors on redirect
142
+ * Fixed a weird gzipping error.
143
+ * #21233 Smarter multipart boundry. Thanks Todd Willey!
144
+ * #20097 Supporting meta tag cookies.
145
+
146
+ === 0.7.6
147
+
148
+ * New Features:
149
+ * Added support for reading Mozilla cookie jars. Thanks Chris Riddoch!
150
+ * Moving text, password, hidden, int to default. Thanks Tim Harper!
151
+ * Mechanize#history_added callback for page loads. Thanks Tobi Reif!
152
+ * Mechanize#scheme_handlers callbacks for handling unsupported schemes on
153
+ links.
154
+
155
+ * Bug Fixes:
156
+ * Ignoring scheme case
157
+ http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=470642
158
+ * Not encoding tildes in uris. Thanks Bruno. [#19380]
159
+ * Resetting request bodys when retrying form posts. Thanks Bruno. [#19379]
160
+ * Throwing away keep alive connections on EPIPE and ECONNRESET.
161
+ * Duplicating request headers when retrying a 401. Thanks Hiroshi Ichikawa.
162
+ * Simulating an EOF error when a response length is bad. Thanks Tobias Gruetzmacher.
163
+ http://rubyforge.org/tracker/index.php?func=detail&aid=19178&group_id=1453&atid=5711
164
+ * Defaulting option tags to the inner text.
165
+ http://rubyforge.org/tracker/index.php?func=detail&aid=19976&group_id=1453&atid=5709
166
+ * Supporting blank strings for option values.
167
+ http://rubyforge.org/tracker/index.php?func=detail&aid=19975&group_id=1453&atid=5709
168
+
169
+ === 0.7.5
170
+
171
+ * Fixed a bug when fetching files and not pages. Thanks Mat Schaffer!
172
+
173
+ === 0.7.4
174
+
175
+ * doh!
176
+
177
+ === 0.7.3
178
+
179
+ * Pages are now yielded to a blocks given to WWW::Mechanize#get
180
+ * WWW::Mechanize#get now takes hash arguments for uri parameters.
181
+ * WWW::Mechanize#post takes an IO object as a parameter and posts correctly.
182
+ * Fixing a strange zlib inflate problem on windows
183
+
184
+ === 0.7.2
185
+
186
+ * Handling gzipped responses with no Content-Length header
187
+
188
+ === 0.7.1
189
+
190
+ * Added iPhone to the user agent aliases. [#17572]
191
+ * Fixed a bug with EOF errors in net/http. [#17570]
192
+ * Handling 0 length gzipped responses. [#17471]
193
+
194
+ === 0.7.0
195
+
196
+ * Removed Ruby 1.8.2 support
197
+ * Changed parser to lazily parse links
198
+ * Lazily parsing document
199
+ * Adding verify_callback for SSL requests. Thanks Mike Dalessio!
200
+ * Fixed a bug with Accept-Language header. Thanks Bill Siggelkow.
201
+
202
+ === 0.6.11
203
+
204
+ * Detecting single quotes in meta redirects.
205
+ * Adding pretty inspect for ruby versions > 1.8.4 (Thanks Joel Kociolek)
206
+ http://rubyforge.org/tracker/index.php?func=detail&aid=13150&group_id=1453&atid=5709
207
+ * Fixed bug with file name in multipart posts
208
+ http://rubyforge.org/tracker/?func=detail&aid=15594&group_id=1453&atid=5709
209
+ * Posting forms relative to the originating page. Thanks Mortee.
210
+ * Added a FAQ
211
+ http://rubyforge.org/tracker/?func=detail&aid=15772&group_id=1453&atid=5709
212
+
213
+ === 0.6.10
214
+
215
+ * Made digest authentication work with POSTs.
216
+ * Made sure page was HTML before following meta refreshes.
217
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12260&group_id=1453&atid=5709
218
+ * Made sure that URLS with a host and no path would default to '/' for history
219
+ purposes.
220
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12368&group_id=1453&atid=5709
221
+ * Avoiding memory leaks with transact. Thanks Tobias Gruetzmacher!
222
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12057&group_id=1453&atid=5711
223
+ * Fixing a problem with # signs in the file name. Thanks Tobias Gruetzmacher!
224
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12510&group_id=1453&atid=5711
225
+ * Made sure that blank form values are submitted.
226
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12505&group_id=1453&atid=5709
227
+ * Mechanize now respects the base tag. Thanks Stephan Dale.
228
+ http://rubyforge.org/tracker/index.php?func=detail&aid=12468&group_id=1453&atid=5709
229
+ * Aliasing inspect to pretty_inspect. Thanks Eric Promislow.
230
+ http://rubyforge.org/pipermail/mechanize-users/2007-July/000157.html
231
+
232
+ === 0.6.9
233
+
234
+ * Updating UTF-8 support for urls
235
+ * Adding AREA tags to the links list.
236
+ http://rubyforge.org/pipermail/mechanize-users/2007-May/000140.html
237
+ * WWW::Mechanize#follow_meta_refresh will allow you to automatically follow
238
+ meta refresh tags. [#10032]
239
+ * Adding x-gzip to accepted content-encoding. Thanks Simon Strandgaard
240
+ http://rubyforge.org/tracker/index.php?func=detail&aid=11167&group_id=1453&atid=5711
241
+ * Added Digest Authentication support. Thanks to Ryan Davis and Eric Hodel,
242
+ you get a gold star!
243
+
244
+ === 0.6.8
245
+
246
+ * Keep alive can be shut off now with WWW::Mechanize#keep_alive
247
+ * Conditional requests can be shut off with WWW::Mechanize#conditional_requests
248
+ * Monkey patched Net::HTTP#keep_alive?
249
+ * [#9877] Moved last request time. Thanks Max Stepanov
250
+ * Added WWW::Mechanize::File#save
251
+ * Defaulting file name to URI or Content-Disposition
252
+ * Updating compatability with hpricot
253
+ * Added more unit tests
254
+
255
+ === 0.6.7
256
+
257
+ * Fixed a bug with keep-alive requests
258
+ * [#9549] fixed problem with cookie paths
259
+
260
+ === 0.6.6
261
+
262
+ * Removing hpricot overrides
263
+ * Fixed a bug where alt text can be nil. Thanks Yannick!
264
+ * Unparseable expiration dates in cookies are now treated as session cookies
265
+ * Caching connections
266
+ * Requests now default to keep alive
267
+ * [#9434] Fixed bug where html entities weren't decoded
268
+ * [#9150] Updated mechanize history to deal with redirects
269
+
270
+ === 0.6.5
271
+
272
+ * Copying headers to a hash to prevent memory leaks
273
+ * Speeding up page parsing
274
+ * Aliased fields to elements
275
+ * Adding If-Modified-Since header
276
+ * Added delete_field! to form. Thanks to Sava Chankov
277
+ * Updated uri escaping to support high order characters. Thanks to Henrik Nyh.
278
+ * Better handling relative URIs. Thanks to Henrik Nyh
279
+ * Now handles pipes in URLs
280
+ http://rubyforge.org/tracker/?func=detail&aid=7140&group_id=1453&atid=5709
281
+ * Now escaping html entities in form fields.
282
+ http://rubyforge.org/tracker/?func=detail&aid=7563&group_id=1453&atid=5709
283
+ * Added MSIE 7.0 user agent string
284
+
285
+ === 0.6.4
286
+
287
+ * Adding the "redirect_ok" method to Mechanize to stop mechanize from
288
+ following redirects.
289
+ http://rubyforge.org/tracker/index.php?func=detail&aid=6571&group_id=1453&atid=5712
290
+ * Added protected method Mechanize#set_headers so that subclasses can set
291
+ custom headers.
292
+ http://rubyforge.org/tracker/?func=detail&aid=7208&group_id=1453&atid=5712
293
+ * Aliased Page#referer to Page#page
294
+ * Fixed a bug when clicking relative urls
295
+ http://rubyforge.org/pipermail/mechanize-users/2006-November/000035.html
296
+ * Fixing a bug when bad version or max age is passed to Cookie::parse
297
+ http://rubyforge.org/pipermail/mechanize-users/2006-November/000033.html
298
+ * Fixing a bug with response codes. [#6526]
299
+ * Fixed bug [#6548]. Input type of 'button' was not being added as a button.
300
+ * Fixed bug [#7139]. REXML parser calls hpricot parser by accident
301
+
302
+ === 0.6.3
303
+
304
+ * Added keys and values methods to Form
305
+ * Added has_value? to Form
306
+ * Added a has_field? method to Form
307
+ * The add_field! method on Form now creates a field for you on the form.
308
+ Thanks to Mat Schaffer for the patch.
309
+ http://rubyforge.org/pipermail/mechanize-users/2006-November/000025.html
310
+ * Fixed a bug when form actions have html ecoded entities in them.
311
+ http://rubyforge.org/pipermail/mechanize-users/2006-October/000019.html
312
+ * Fixed a bug when links or frame sources have html encoded entities in the
313
+ href or src.
314
+ * Fixed a bug where '#' symbols are encoded
315
+ http://rubyforge.org/forum/message.php?msg_id=14747
316
+
317
+ === 0.6.2
318
+
319
+ * Added a yield to Page#form so that dealing with forms can be more DSL like.
320
+ * Added the parsed page to the ResponseCodeError so that the parsed results
321
+ can be accessed even in the event of an error.
322
+ http://rubyforge.org/pipermail/mechanize-users/2006-September/000007.html
323
+ * Updated documentation (Thanks to Paul Smith)
324
+
325
+ === 0.6.1
326
+
327
+ * Added a method to Form called "submit". Now forms can be submitted by
328
+ calling a method on the form.
329
+ * Added a click method to links
330
+ * Added an REXML pluggable parser for backwards compatability. To use it,
331
+ just do this:
332
+ agent.pluggable_parser.html = WWW::Mechanize::REXMLPage
333
+ * Fixed a bug with referrers by adding a page attribute to forms and links.
334
+ * Fixed a bug where domain names were case sensitive.
335
+ http://tenderlovemaking.com/2006/09/04/road-to-ruby-mechanize-060/#comment-53
336
+ * Fixed a bug with URI escaped links.
337
+ http://rubyforge.org/pipermail/mechanize-users/2006-September/000002.html
338
+ * Fixed a bug when options in select lists don't have a value. Thanks Dan Higham
339
+ [#5837] Code in lib/mechanize/form_elements.rb is incorrect.
340
+ * Fixed a bug with loading text in to links.
341
+ http://rubyforge.org/pipermail/mechanize-users/2006-September/000000.html
342
+
343
+ === 0.6.0
344
+
345
+ * Changed main parser to use hpricot
346
+ * Made WWW::Mechanize::Page class searchable like hpricot
347
+ * Updated WWW::Mechanize#click to support hpricot links like this:
348
+ @agent.click (page/"a").first
349
+ * Clicking a Frame is now possible:
350
+ @agent.click (page/"frame").first
351
+ * Removed deprecated attr_finder
352
+ * Removed REXML helper methods since the main parser is now hpricot
353
+ * Overhauled cookie parser to use WEBrick::Cookie
354
+
355
+ === 0.5.4
356
+
357
+ * Added WWW::Mechanize#trasact for saving history state between in a
358
+ transaction. See the EXAMPLES file. Thanks Johan Kiviniemi.
359
+ * Added support for gzip compressed pages
360
+ * Forms can now be accessed like a hash. For example, to set the value
361
+ of an input field named 'name' to "Aaron", you can do this:
362
+ form['name'] = "Aaron"
363
+ Or to get the value of a field named 'name', do this:
364
+ puts form['name']
365
+ * File uploads will now read the file specified in FileUpload#file_name
366
+ * FileUpload can use an IO object in FileUpload#file_data
367
+ * Fixed a bug with saving files on windows
368
+ * Fixed a bug with the filename being set in forms
369
+
370
+ === 0.5.3
371
+
372
+ * Mechanize#click will now act on the first element of an array. So if an
373
+ array of links is passed to WWW::Mechanize#click, the first link is clicked.
374
+ That means the syntax for clicking links is shortened and still supports
375
+ selecting a link. The following are equivalent:
376
+ agent.click page.links.first
377
+ agent.click page.links
378
+ * Fixed a bug with spaces in href's and get's
379
+ * Added a tick, untick, and click method to radio buttons so that
380
+ radiobuttons can be "clicked"
381
+ * Added a tick, untick, and click method to check boxes so that
382
+ checkboxes can be "clicked"
383
+ * Options on Select lists can now be "tick"ed, and "untick"ed.
384
+ * Fixed a potential bug conflicting with rails. Thanks Eric Kolve
385
+ * Updated log4r support for a speed increase. Thanks Yinon Bentor
386
+ * Added inspect methods and pretty printing
387
+
388
+ === 0.5.2
389
+
390
+ * Fixed a bug with input names that are nil
391
+ * Added a warning when using attr_finder because attr_finder will be deprecated
392
+ in 0.6.0 in favor of method calls. So this syntax:
393
+ @agent.links(:text => 'foo')
394
+ should be changed to this:
395
+ @agent.links.text('foo')
396
+ * Added support for selecting multiple options in select tags that support
397
+ multiple options. See WWW::Mechanize::MultiSelectList.
398
+ * New select list methods have been added, select_all, select_none.
399
+ * Options for select lists can now be "clicked" which toggles their selection,
400
+ they can be "selected" and "unselected". See WWW::Mechanize::Option
401
+ * Added a method to set multiple fields at the same time,
402
+ WWW::Mechanize::Form#set_fields. Which can be used like so:
403
+ form.set_fields( :foo => 'bar', :name => 'Aaron' )
404
+
405
+ === 0.5.1
406
+
407
+ * Fixed bug with file uploads
408
+ * Added performance tweaks to the cookie class
409
+
410
+ === 0.5.0
411
+
412
+ * Added pluggable parsers. (Thanks to Eric Kolve for the idea)
413
+ * Changed namespace so all classes are under WWW::Mechanize.
414
+ * Updating Forms so that fields can be used as accessors (Thanks Gregory Brown)
415
+ * Added WWW::Mechanize::File as default object used for unknown content types.
416
+ * Added 'save_as' method to Mechanize::File, so any page can be saved.
417
+ * Adding 'save_as' and 'load' to CookieJar so that cookies can be saved
418
+ between sessions.
419
+ * Added WWW::Mechanize::FileSaver pluggable parser to automatically save files.
420
+ * Added WWW::Mechanize::Page#title for page titles
421
+ * Added OpenSSL certificate support (Thanks Mike Dalessio)
422
+ * Removed support for body filters in favor of pluggable parsers.
423
+ * Fixed cookie bug adding a '/' when the url is missing one (Thanks Nick Dainty)
424
+
425
+ === 0.4.7
426
+
427
+ * Fixed bug with no action in forms. Thanks to Adam Wiggins
428
+ * Setting a default user-agent string
429
+ * Added house cleaning to the cookie jar so expired cookies don't stick around.
430
+ * Added new method WWW::Form#field to find the first field with a given name.
431
+ (thanks to Gregory Brown)
432
+ * Added WWW::Mechanize#get_file for fetching non text/html files
433
+
434
+ === 0.4.6
435
+
436
+ * Added support for proxies
437
+ * Added a uri field to WWW::Link
438
+ * Added a error class WWW::Mechanize::ContentTypeError
439
+ * Added image alt text to link text
440
+ * Added an visited? method to WWW::Mechanize
441
+ * Added Array#value= which will set the first value to the argument. That
442
+ allows syntax as such: form.fields.name('q').value = 'xyz'
443
+ Before it was like this: form.fields.name('q').first.value = 'xyz'
444
+
445
+ === 0.4.5
446
+
447
+ * Added support for multiple values of the same name
448
+ * Updated build_query_string to take an array of arrays (Thanks Michal Janeczek)
449
+ * Added WWW::Mechanize#body_filter= so that response bodies can be preprocessed
450
+ * Added WWW::Page#body_filter= so that response bodies can be preprocessed
451
+ * Added support for more date formats in the cookie parser
452
+ * Fixed a bug with empty select lists
453
+ * Fixing a problem with cookies not handling no spaces after semicolons
454
+
455
+ === 0.4.4
456
+
457
+ * Fixed error in method signature, basic_authetication is now basic_auth
458
+ * Fixed bug with encoding names in file uploads (Big thanks to Alex Young)
459
+ * Added options to the select list
460
+
461
+ === 0.4.3
462
+
463
+ * Added syntactic sugar for finding things
464
+ * Fixed bug with HttpOnly option in cookies
465
+ * Fixed a bug with cookie date parsing
466
+ * Defaulted dropdown lists to the first element
467
+ * Added unit tests
468
+
469
+ === 0.4.2
470
+
471
+ * Added support for iframes
472
+ * Made mechanize dependant on ruby-web rather than narf
473
+ * Added unit tests
474
+ * Fixed a bunch of warnings
475
+
476
+ === 0.4.1
477
+
478
+ * Added support for file uploading
479
+ * Added support for frames (Thanks Gabriel[mailto:leerbag@googlemail.com])
480
+ * Added more unit tests
481
+ * Fixed some bugs
482
+
483
+ === 0.4.0
484
+
485
+ * Added more unit tests
486
+ * Added a cookie jar with better cookie support, included expiration of cookies
487
+ and general cookie security.
488
+ * Updated mechanize to use built in net/http if ruby version is new enough.
489
+ * Added support for meta refresh tags
490
+ * Defaulted form actions to 'GET'
491
+ * Fixed various bugs
492
+ * Added more unit tests
493
+ * Added a response code exception
494
+ * Thanks to Brian Ellin (brianellin@gmail.com) for:
495
+ Added support for CA files, and support for 301 response codes
496
+
data/EXAMPLES.rdoc ADDED
@@ -0,0 +1,171 @@
1
+ = WWW::Mechanize examples
2
+
3
+ == Google
4
+ require 'rubygems'
5
+ require 'mechanize'
6
+
7
+ a = WWW::Mechanize.new { |agent|
8
+ agent.user_agent_alias = 'Mac Safari'
9
+ }
10
+
11
+ a.get('http://google.com/') do |page|
12
+ search_result = page.form_with(:name => 'f') do |search|
13
+ search.q = 'Hello world'
14
+ end.submit
15
+
16
+ search_result.links.each do |link|
17
+ puts link.text
18
+ end
19
+ end
20
+
21
+ == Rubyforge
22
+
23
+ a = WWW::Mechanize.new
24
+ a.get('http://rubyforge.org/') do |page|
25
+ # Click the login link
26
+ login_page = a.click(page.links.text(/Log In/))
27
+
28
+ # Submit the login form
29
+ my_page = login_page.form_with(:action => '/account/login.php') do |f|
30
+ f.form_loginname = ARGV[0]
31
+ f.form_pw = ARGV[1]
32
+ end.click_button
33
+
34
+ my_page.links.each do |link|
35
+ text = link.text.strip
36
+ next unless text.length > 0
37
+ puts text
38
+ end
39
+ end
40
+
41
+ == File Upload
42
+ Upload a file to flickr.
43
+
44
+ a = WWW::Mechanize.new { |agent|
45
+ # Flickr refreshes after login
46
+ agent.follow_meta_refresh = true
47
+ }
48
+
49
+ a.get('http://flickr.com/') do |home_page|
50
+ signin_page = a.click(home_page.links.text(/Sign In/))
51
+
52
+ my_page = signin_page.form_with(:name => 'login_form') do |form|
53
+ form.login = ARGV[0]
54
+ form.passwd = ARGV[1]
55
+ end.submit
56
+
57
+ # Click the upload link
58
+ upload_page = a.click(my_page.links.text(/Upload/))
59
+
60
+ # We want the basic upload page.
61
+ upload_page = a.click(upload_page.links.text(/basic Uploader/))
62
+
63
+ # Upload the file
64
+ upload_page.form_with(:method => 'POST') do |upload_form|
65
+ upload_form.file_uploads.first.file_name = ARGV[2]
66
+ end.submit
67
+ end
68
+
69
+ == Pluggable Parsers
70
+ Lets say you want html pages to automatically be parsed with Rubyful Soup.
71
+ This example shows you how:
72
+
73
+ require 'rubygems'
74
+ require 'mechanize'
75
+ require 'rubyful_soup'
76
+
77
+ class SoupParser < WWW::Mechanize::Page
78
+ attr_reader :soup
79
+ def initialize(uri = nil, response = nil, body = nil, code = nil)
80
+ @soup = BeautifulSoup.new(body)
81
+ super(uri, response, body, code)
82
+ end
83
+ end
84
+
85
+ agent = WWW::Mechanize.new
86
+ agent.pluggable_parser.html = SoupParser
87
+
88
+ Now all HTML pages will be parsed with the SoupParser class, and automatically
89
+ give you access to a method called 'soup' where you can get access to the
90
+ Beautiful Soup for that page.
91
+
92
+ == Using a proxy
93
+
94
+ require 'rubygems'
95
+ require 'mechanize'
96
+
97
+ agent = WWW::Mechanize.new
98
+ agent.set_proxy('localhost', '8000')
99
+ page = agent.get(ARGV[0])
100
+ puts page.body
101
+
102
+ == The transact method
103
+
104
+ transact runs the given block and then resets the page history. I.e. after the
105
+ block has been executed, you're back at the original page; no need count how
106
+ many times to call the back method at the end of a loop (while accounting for
107
+ possible exceptions).
108
+
109
+ This example also demonstrates subclassing Mechanize.
110
+
111
+ require 'mechanize'
112
+
113
+ class TestMech < WWW::Mechanize
114
+ def process
115
+ get 'http://rubyforge.org/'
116
+ search_form = page.forms.first
117
+ search_form.words = 'WWW'
118
+ submit search_form
119
+
120
+ page.links_with(:href => %r{/projects/} ).each do |link|
121
+ next if link.href =~ %r{/projects/support/}
122
+
123
+ puts 'Loading %-30s %s' % [link.href, link.text]
124
+ begin
125
+ transact do
126
+ click link
127
+ # Do stuff, maybe click more links.
128
+ end
129
+ # Now we're back at the original page.
130
+
131
+ rescue => e
132
+ $stderr.puts "#{e.class}: #{e.message}"
133
+ end
134
+ end
135
+ end
136
+ end
137
+
138
+ TestMech.new.process
139
+
140
+ == Client Certificate Authentication (Mutual Auth)
141
+
142
+ In most cases a client certificate is created as an additional layer of security
143
+ for certain websites. The specific case that this was initially tested on was
144
+ for automating the download of archived images from a banks (Wachovia) lockbox
145
+ system. Once the certificate is installed into your browser you will have to
146
+ export it and split the certificate and private key into separate files. Exported
147
+ files are usually in .p12 format (IE 7 & Firefox 2.0) which stands for PKCS #12.
148
+ You can convert them from p12 to pem format by using the following commands:
149
+
150
+ openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.key -nocerts -nodes
151
+ openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.cer -nokeys
152
+
153
+ require 'rubygems'
154
+ require 'mechanize'
155
+
156
+ # create Mechanize instance
157
+ agent = WWW::Mechanize.new
158
+
159
+ # set the path of the certificate file
160
+ agent.cert = 'example.cer'
161
+
162
+ # set the path of the private key file
163
+ agent.key = 'example.key'
164
+
165
+ # get the login form & fill it out with the username/password
166
+ login_form = @agent.get("http://example.com/login_page").form('Login')
167
+ login_form.Userid = 'TestUser'
168
+ login_form.Password = 'TestPassword'
169
+
170
+ # submit login form
171
+ agent.submit(login_form, login_form.buttons.first)
data/FAQ.rdoc ADDED
@@ -0,0 +1,11 @@
1
+ Q: I keep getting an EOFError:
2
+ protocol.rb:133:in `sysread': end of file reached (EOFError)
3
+
4
+ A: Some people have experienced an EOFError during normal mechanize usage.
5
+ Most of the time this occurs because the remote website claims to support
6
+ keep alives, but does not implement them correctly. Try turning off
7
+ keep alives on your mechanize object:
8
+
9
+ mech.keep_alive = false
10
+
11
+