tenderlove-mechanize 0.9.3.20090617085936
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +496 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +169 -0
- data/README.rdoc +60 -0
- data/Rakefile +43 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +7 -0
- data/lib/www/mechanize/chain/auth_headers.rb +80 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +53 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +32 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/content_type_error.rb +16 -0
- data/lib/www/mechanize/cookie.rb +72 -0
- data/lib/www/mechanize/cookie_jar.rb +191 -0
- data/lib/www/mechanize/file.rb +73 -0
- data/lib/www/mechanize/file_response.rb +62 -0
- data/lib/www/mechanize/file_saver.rb +39 -0
- data/lib/www/mechanize/form/button.rb +8 -0
- data/lib/www/mechanize/form/check_box.rb +13 -0
- data/lib/www/mechanize/form/field.rb +28 -0
- data/lib/www/mechanize/form/file_upload.rb +24 -0
- data/lib/www/mechanize/form/image_button.rb +23 -0
- data/lib/www/mechanize/form/multi_select_list.rb +69 -0
- data/lib/www/mechanize/form/option.rb +51 -0
- data/lib/www/mechanize/form/radio_button.rb +38 -0
- data/lib/www/mechanize/form/select_list.rb +45 -0
- data/lib/www/mechanize/form.rb +360 -0
- data/lib/www/mechanize/headers.rb +12 -0
- data/lib/www/mechanize/history.rb +67 -0
- data/lib/www/mechanize/inspect.rb +90 -0
- data/lib/www/mechanize/monkey_patch.rb +37 -0
- data/lib/www/mechanize/page/base.rb +10 -0
- data/lib/www/mechanize/page/frame.rb +22 -0
- data/lib/www/mechanize/page/link.rb +50 -0
- data/lib/www/mechanize/page/meta.rb +51 -0
- data/lib/www/mechanize/page.rb +176 -0
- data/lib/www/mechanize/pluggable_parsers.rb +103 -0
- data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
- data/lib/www/mechanize/response_code_error.rb +25 -0
- data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
- data/lib/www/mechanize/util.rb +76 -0
- data/lib/www/mechanize.rb +619 -0
- data/mechanize.gemspec +41 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +44 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +564 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +52 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +120 -0
- data/test/test_mech.rb +268 -0
- data/test/test_mechanize_file.rb +47 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +119 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +41 -0
- data/test/test_redirect_verb_handling.rb +45 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +314 -0
data/CHANGELOG.rdoc
ADDED
@@ -0,0 +1,496 @@
|
|
1
|
+
= Mechanize CHANGELOG
|
2
|
+
|
3
|
+
=== HEAD
|
4
|
+
|
5
|
+
* Bug Fixes:
|
6
|
+
|
7
|
+
* Do not apply encoding if encoding equals 'none' Thanks Akinori MUSHA!
|
8
|
+
* Custom request headers may be supplied WWW::Mechanize#request_headers
|
9
|
+
RF #24516
|
10
|
+
* HTML Parser may be set on a per instance level WWW::Mechanize#html_parser
|
11
|
+
RF #24693
|
12
|
+
* Fixed string encoding in ruby 1.9. RF #2433
|
13
|
+
* Rescuing Zlib::DataErrors (Thanks Kelley Reynolds)
|
14
|
+
* Fixing a problem with frozen SSL objects. RF #24950
|
15
|
+
* Do not send a referer on meta refresh. RF #24945
|
16
|
+
* Fixed a bug with double semi-colons in Content-Disposition headers
|
17
|
+
* Properly handling cookies that specify a path. RF #25259
|
18
|
+
|
19
|
+
=== 0.9.2 / 2009/03/05
|
20
|
+
|
21
|
+
* New Features:
|
22
|
+
* Mechanize#submit and Form#submit take arbitrary headers(thanks penguincoder)
|
23
|
+
|
24
|
+
* Bug Fixes:
|
25
|
+
* Fixed a bug with bad cookie parsing
|
26
|
+
* Form::RadioButton#click unchecks other buttons (RF #24159)
|
27
|
+
* Fixed problems with Iconv (RF #24190, RF #24192, RF #24043)
|
28
|
+
* POST parameters should be CGI escaped
|
29
|
+
* Made Content-Type match case insensitive (Thanks Kelly Reynolds)
|
30
|
+
* Non-string form parameters work
|
31
|
+
|
32
|
+
=== 0.9.1 2009/02/23
|
33
|
+
|
34
|
+
* New Features:
|
35
|
+
* Encoding may be specified for a page: Page#encoding=
|
36
|
+
|
37
|
+
* Bug Fixes:
|
38
|
+
* m17n fixes. ありがとう konn!
|
39
|
+
* Fixed a problem with base tags. ありがとう Keisuke
|
40
|
+
* HEAD requests do not record in the history
|
41
|
+
* Default encoding to ISO-8859-1 instead of ASCII
|
42
|
+
* Requests with URI instances should not be polluted RF #23472
|
43
|
+
* Nonce count fixed for digest auth requests. Thanks Adrian Slapa!
|
44
|
+
* Fixed a referer issue with requests using a uri. RF #23472
|
45
|
+
* WAP content types will now be parsed
|
46
|
+
* Rescued poorly formatted cookies. Thanks Kelley Reynolds!
|
47
|
+
|
48
|
+
=== 0.9.0
|
49
|
+
|
50
|
+
* Deprecations
|
51
|
+
* WWW::Mechanize::List is gone!
|
52
|
+
* Mechanize uses Nokogiri as it's HTML parser but you may switch to
|
53
|
+
Hpricot by using WWW::Mechanize.html_parser = Hpricot
|
54
|
+
|
55
|
+
* Bug Fixes:
|
56
|
+
* Nil check on page when base tag is used #23021
|
57
|
+
|
58
|
+
=== 0.8.5
|
59
|
+
|
60
|
+
* Deprecations
|
61
|
+
* WWW::Mechanize::List will be deprecated in 0.9.0, and warnings have
|
62
|
+
been added to help you upgrade.
|
63
|
+
|
64
|
+
* Bug Fixes:
|
65
|
+
* Stopped raising EOF exceptions on HEAD requests. ありがとう:HIRAKU Kuroda
|
66
|
+
* Fixed exceptions when a logger is set and file:// requests are made.
|
67
|
+
* Made Mechanize 1.9 compatible
|
68
|
+
* Not setting the port in the host header for SSL sites.
|
69
|
+
* Following refresh headers. Thanks Tim Connor!
|
70
|
+
* Cookie Jar handles cookie domains containing ports, like
|
71
|
+
'mydomain.com:443' (Thanks Michal Ochman!)
|
72
|
+
* Fixing strange uri escaping problems [#22604]
|
73
|
+
* Making content-type determintation more robust. (thanks Han Holl!)
|
74
|
+
* Dealing with links that are query string only. [#22402]
|
75
|
+
* Nokogiri may be dropped in as a replacement.
|
76
|
+
WWW::Mechanize.html_parser = Nokogiri::HTML
|
77
|
+
* Making sure the correct page is added to the history on meta refresh.
|
78
|
+
[#22708]
|
79
|
+
* Mechanize#get requests no longer send a referer unless they are relative
|
80
|
+
requests.
|
81
|
+
|
82
|
+
=== 0.8.4
|
83
|
+
|
84
|
+
* Bug Fixes:
|
85
|
+
* Setting the port number on the host header.
|
86
|
+
* Fixing Authorization headers for picky servers
|
87
|
+
|
88
|
+
=== 0.8.3
|
89
|
+
|
90
|
+
* Bug Fixes:
|
91
|
+
* Making sure logger is set during SSL connections.
|
92
|
+
|
93
|
+
=== 0.8.2
|
94
|
+
|
95
|
+
* Bug Fixes:
|
96
|
+
* Doh! I was accidentally setting headers twice.
|
97
|
+
|
98
|
+
=== 0.8.1
|
99
|
+
|
100
|
+
* Bug Fixes:
|
101
|
+
* Fixed problem with nil pointer when logger is set
|
102
|
+
|
103
|
+
=== 0.8.0
|
104
|
+
|
105
|
+
* New Features:
|
106
|
+
* Lifecycle hooks. Mechanize#pre_connect_hooks, Mechanize#post_connect_hooks
|
107
|
+
* file:/// urls are now supported
|
108
|
+
* Added Mechanize::Page#link_with, frame_with for searching for links using
|
109
|
+
+criteria+.
|
110
|
+
* Implementing PUT, DELETE, and HEAD requests
|
111
|
+
|
112
|
+
* Bug Fixes:
|
113
|
+
* Fixed an infinite loop when content-length and body length don't match.
|
114
|
+
* Only setting headers once
|
115
|
+
* Adding IIS authentication support
|
116
|
+
|
117
|
+
=== 0.7.8
|
118
|
+
|
119
|
+
* Bug Fixes:
|
120
|
+
* Fixed bug when receiving a 304 response (HTTPNotModified) on a page not
|
121
|
+
cached in history.
|
122
|
+
* #21428 Default to HTML parser for 'application/xhtml+xml' content-type.
|
123
|
+
* Fixed an issue where redirects were resending posted data
|
124
|
+
|
125
|
+
=== 0.7.7
|
126
|
+
|
127
|
+
* New Features:
|
128
|
+
* Page#form_with takes a +criteria+ hash.
|
129
|
+
* Page#form is changed to Page#form_with
|
130
|
+
* Mechanize#get takes custom http headers. Thanks Mike Dalessio!
|
131
|
+
* Form#click_button submits a form defaulting to the current button.
|
132
|
+
* Form#set_fields now takes a hash. Thanks Tobi!
|
133
|
+
* Mechanize#redirection_limit= for setting the max number of redirects.
|
134
|
+
|
135
|
+
* Bug Fixes:
|
136
|
+
* Added more examples. Thanks Robert Jackson.
|
137
|
+
* #20480 Making sure the Host header is set.
|
138
|
+
* #20672 Making sure cookies with weird semicolons work.
|
139
|
+
* Fixed bug with percent signs in urls.
|
140
|
+
http://d.hatena.ne.jp/kitamomonga/20080410/ruby_mechanize_percent_url_bug
|
141
|
+
* #21132 Not checking for EOF errors on redirect
|
142
|
+
* Fixed a weird gzipping error.
|
143
|
+
* #21233 Smarter multipart boundry. Thanks Todd Willey!
|
144
|
+
* #20097 Supporting meta tag cookies.
|
145
|
+
|
146
|
+
=== 0.7.6
|
147
|
+
|
148
|
+
* New Features:
|
149
|
+
* Added support for reading Mozilla cookie jars. Thanks Chris Riddoch!
|
150
|
+
* Moving text, password, hidden, int to default. Thanks Tim Harper!
|
151
|
+
* Mechanize#history_added callback for page loads. Thanks Tobi Reif!
|
152
|
+
* Mechanize#scheme_handlers callbacks for handling unsupported schemes on
|
153
|
+
links.
|
154
|
+
|
155
|
+
* Bug Fixes:
|
156
|
+
* Ignoring scheme case
|
157
|
+
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=470642
|
158
|
+
* Not encoding tildes in uris. Thanks Bruno. [#19380]
|
159
|
+
* Resetting request bodys when retrying form posts. Thanks Bruno. [#19379]
|
160
|
+
* Throwing away keep alive connections on EPIPE and ECONNRESET.
|
161
|
+
* Duplicating request headers when retrying a 401. Thanks Hiroshi Ichikawa.
|
162
|
+
* Simulating an EOF error when a response length is bad. Thanks Tobias Gruetzmacher.
|
163
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=19178&group_id=1453&atid=5711
|
164
|
+
* Defaulting option tags to the inner text.
|
165
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=19976&group_id=1453&atid=5709
|
166
|
+
* Supporting blank strings for option values.
|
167
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=19975&group_id=1453&atid=5709
|
168
|
+
|
169
|
+
=== 0.7.5
|
170
|
+
|
171
|
+
* Fixed a bug when fetching files and not pages. Thanks Mat Schaffer!
|
172
|
+
|
173
|
+
=== 0.7.4
|
174
|
+
|
175
|
+
* doh!
|
176
|
+
|
177
|
+
=== 0.7.3
|
178
|
+
|
179
|
+
* Pages are now yielded to a blocks given to WWW::Mechanize#get
|
180
|
+
* WWW::Mechanize#get now takes hash arguments for uri parameters.
|
181
|
+
* WWW::Mechanize#post takes an IO object as a parameter and posts correctly.
|
182
|
+
* Fixing a strange zlib inflate problem on windows
|
183
|
+
|
184
|
+
=== 0.7.2
|
185
|
+
|
186
|
+
* Handling gzipped responses with no Content-Length header
|
187
|
+
|
188
|
+
=== 0.7.1
|
189
|
+
|
190
|
+
* Added iPhone to the user agent aliases. [#17572]
|
191
|
+
* Fixed a bug with EOF errors in net/http. [#17570]
|
192
|
+
* Handling 0 length gzipped responses. [#17471]
|
193
|
+
|
194
|
+
=== 0.7.0
|
195
|
+
|
196
|
+
* Removed Ruby 1.8.2 support
|
197
|
+
* Changed parser to lazily parse links
|
198
|
+
* Lazily parsing document
|
199
|
+
* Adding verify_callback for SSL requests. Thanks Mike Dalessio!
|
200
|
+
* Fixed a bug with Accept-Language header. Thanks Bill Siggelkow.
|
201
|
+
|
202
|
+
=== 0.6.11
|
203
|
+
|
204
|
+
* Detecting single quotes in meta redirects.
|
205
|
+
* Adding pretty inspect for ruby versions > 1.8.4 (Thanks Joel Kociolek)
|
206
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=13150&group_id=1453&atid=5709
|
207
|
+
* Fixed bug with file name in multipart posts
|
208
|
+
http://rubyforge.org/tracker/?func=detail&aid=15594&group_id=1453&atid=5709
|
209
|
+
* Posting forms relative to the originating page. Thanks Mortee.
|
210
|
+
* Added a FAQ
|
211
|
+
http://rubyforge.org/tracker/?func=detail&aid=15772&group_id=1453&atid=5709
|
212
|
+
|
213
|
+
=== 0.6.10
|
214
|
+
|
215
|
+
* Made digest authentication work with POSTs.
|
216
|
+
* Made sure page was HTML before following meta refreshes.
|
217
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=12260&group_id=1453&atid=5709
|
218
|
+
* Made sure that URLS with a host and no path would default to '/' for history
|
219
|
+
purposes.
|
220
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=12368&group_id=1453&atid=5709
|
221
|
+
* Avoiding memory leaks with transact. Thanks Tobias Gruetzmacher!
|
222
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=12057&group_id=1453&atid=5711
|
223
|
+
* Fixing a problem with # signs in the file name. Thanks Tobias Gruetzmacher!
|
224
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=12510&group_id=1453&atid=5711
|
225
|
+
* Made sure that blank form values are submitted.
|
226
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=12505&group_id=1453&atid=5709
|
227
|
+
* Mechanize now respects the base tag. Thanks Stephan Dale.
|
228
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=12468&group_id=1453&atid=5709
|
229
|
+
* Aliasing inspect to pretty_inspect. Thanks Eric Promislow.
|
230
|
+
http://rubyforge.org/pipermail/mechanize-users/2007-July/000157.html
|
231
|
+
|
232
|
+
=== 0.6.9
|
233
|
+
|
234
|
+
* Updating UTF-8 support for urls
|
235
|
+
* Adding AREA tags to the links list.
|
236
|
+
http://rubyforge.org/pipermail/mechanize-users/2007-May/000140.html
|
237
|
+
* WWW::Mechanize#follow_meta_refresh will allow you to automatically follow
|
238
|
+
meta refresh tags. [#10032]
|
239
|
+
* Adding x-gzip to accepted content-encoding. Thanks Simon Strandgaard
|
240
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=11167&group_id=1453&atid=5711
|
241
|
+
* Added Digest Authentication support. Thanks to Ryan Davis and Eric Hodel,
|
242
|
+
you get a gold star!
|
243
|
+
|
244
|
+
=== 0.6.8
|
245
|
+
|
246
|
+
* Keep alive can be shut off now with WWW::Mechanize#keep_alive
|
247
|
+
* Conditional requests can be shut off with WWW::Mechanize#conditional_requests
|
248
|
+
* Monkey patched Net::HTTP#keep_alive?
|
249
|
+
* [#9877] Moved last request time. Thanks Max Stepanov
|
250
|
+
* Added WWW::Mechanize::File#save
|
251
|
+
* Defaulting file name to URI or Content-Disposition
|
252
|
+
* Updating compatability with hpricot
|
253
|
+
* Added more unit tests
|
254
|
+
|
255
|
+
=== 0.6.7
|
256
|
+
|
257
|
+
* Fixed a bug with keep-alive requests
|
258
|
+
* [#9549] fixed problem with cookie paths
|
259
|
+
|
260
|
+
=== 0.6.6
|
261
|
+
|
262
|
+
* Removing hpricot overrides
|
263
|
+
* Fixed a bug where alt text can be nil. Thanks Yannick!
|
264
|
+
* Unparseable expiration dates in cookies are now treated as session cookies
|
265
|
+
* Caching connections
|
266
|
+
* Requests now default to keep alive
|
267
|
+
* [#9434] Fixed bug where html entities weren't decoded
|
268
|
+
* [#9150] Updated mechanize history to deal with redirects
|
269
|
+
|
270
|
+
=== 0.6.5
|
271
|
+
|
272
|
+
* Copying headers to a hash to prevent memory leaks
|
273
|
+
* Speeding up page parsing
|
274
|
+
* Aliased fields to elements
|
275
|
+
* Adding If-Modified-Since header
|
276
|
+
* Added delete_field! to form. Thanks to Sava Chankov
|
277
|
+
* Updated uri escaping to support high order characters. Thanks to Henrik Nyh.
|
278
|
+
* Better handling relative URIs. Thanks to Henrik Nyh
|
279
|
+
* Now handles pipes in URLs
|
280
|
+
http://rubyforge.org/tracker/?func=detail&aid=7140&group_id=1453&atid=5709
|
281
|
+
* Now escaping html entities in form fields.
|
282
|
+
http://rubyforge.org/tracker/?func=detail&aid=7563&group_id=1453&atid=5709
|
283
|
+
* Added MSIE 7.0 user agent string
|
284
|
+
|
285
|
+
=== 0.6.4
|
286
|
+
|
287
|
+
* Adding the "redirect_ok" method to Mechanize to stop mechanize from
|
288
|
+
following redirects.
|
289
|
+
http://rubyforge.org/tracker/index.php?func=detail&aid=6571&group_id=1453&atid=5712
|
290
|
+
* Added protected method Mechanize#set_headers so that subclasses can set
|
291
|
+
custom headers.
|
292
|
+
http://rubyforge.org/tracker/?func=detail&aid=7208&group_id=1453&atid=5712
|
293
|
+
* Aliased Page#referer to Page#page
|
294
|
+
* Fixed a bug when clicking relative urls
|
295
|
+
http://rubyforge.org/pipermail/mechanize-users/2006-November/000035.html
|
296
|
+
* Fixing a bug when bad version or max age is passed to Cookie::parse
|
297
|
+
http://rubyforge.org/pipermail/mechanize-users/2006-November/000033.html
|
298
|
+
* Fixing a bug with response codes. [#6526]
|
299
|
+
* Fixed bug [#6548]. Input type of 'button' was not being added as a button.
|
300
|
+
* Fixed bug [#7139]. REXML parser calls hpricot parser by accident
|
301
|
+
|
302
|
+
=== 0.6.3
|
303
|
+
|
304
|
+
* Added keys and values methods to Form
|
305
|
+
* Added has_value? to Form
|
306
|
+
* Added a has_field? method to Form
|
307
|
+
* The add_field! method on Form now creates a field for you on the form.
|
308
|
+
Thanks to Mat Schaffer for the patch.
|
309
|
+
http://rubyforge.org/pipermail/mechanize-users/2006-November/000025.html
|
310
|
+
* Fixed a bug when form actions have html ecoded entities in them.
|
311
|
+
http://rubyforge.org/pipermail/mechanize-users/2006-October/000019.html
|
312
|
+
* Fixed a bug when links or frame sources have html encoded entities in the
|
313
|
+
href or src.
|
314
|
+
* Fixed a bug where '#' symbols are encoded
|
315
|
+
http://rubyforge.org/forum/message.php?msg_id=14747
|
316
|
+
|
317
|
+
=== 0.6.2
|
318
|
+
|
319
|
+
* Added a yield to Page#form so that dealing with forms can be more DSL like.
|
320
|
+
* Added the parsed page to the ResponseCodeError so that the parsed results
|
321
|
+
can be accessed even in the event of an error.
|
322
|
+
http://rubyforge.org/pipermail/mechanize-users/2006-September/000007.html
|
323
|
+
* Updated documentation (Thanks to Paul Smith)
|
324
|
+
|
325
|
+
=== 0.6.1
|
326
|
+
|
327
|
+
* Added a method to Form called "submit". Now forms can be submitted by
|
328
|
+
calling a method on the form.
|
329
|
+
* Added a click method to links
|
330
|
+
* Added an REXML pluggable parser for backwards compatability. To use it,
|
331
|
+
just do this:
|
332
|
+
agent.pluggable_parser.html = WWW::Mechanize::REXMLPage
|
333
|
+
* Fixed a bug with referrers by adding a page attribute to forms and links.
|
334
|
+
* Fixed a bug where domain names were case sensitive.
|
335
|
+
http://tenderlovemaking.com/2006/09/04/road-to-ruby-mechanize-060/#comment-53
|
336
|
+
* Fixed a bug with URI escaped links.
|
337
|
+
http://rubyforge.org/pipermail/mechanize-users/2006-September/000002.html
|
338
|
+
* Fixed a bug when options in select lists don't have a value. Thanks Dan Higham
|
339
|
+
[#5837] Code in lib/mechanize/form_elements.rb is incorrect.
|
340
|
+
* Fixed a bug with loading text in to links.
|
341
|
+
http://rubyforge.org/pipermail/mechanize-users/2006-September/000000.html
|
342
|
+
|
343
|
+
=== 0.6.0
|
344
|
+
|
345
|
+
* Changed main parser to use hpricot
|
346
|
+
* Made WWW::Mechanize::Page class searchable like hpricot
|
347
|
+
* Updated WWW::Mechanize#click to support hpricot links like this:
|
348
|
+
@agent.click (page/"a").first
|
349
|
+
* Clicking a Frame is now possible:
|
350
|
+
@agent.click (page/"frame").first
|
351
|
+
* Removed deprecated attr_finder
|
352
|
+
* Removed REXML helper methods since the main parser is now hpricot
|
353
|
+
* Overhauled cookie parser to use WEBrick::Cookie
|
354
|
+
|
355
|
+
=== 0.5.4
|
356
|
+
|
357
|
+
* Added WWW::Mechanize#trasact for saving history state between in a
|
358
|
+
transaction. See the EXAMPLES file. Thanks Johan Kiviniemi.
|
359
|
+
* Added support for gzip compressed pages
|
360
|
+
* Forms can now be accessed like a hash. For example, to set the value
|
361
|
+
of an input field named 'name' to "Aaron", you can do this:
|
362
|
+
form['name'] = "Aaron"
|
363
|
+
Or to get the value of a field named 'name', do this:
|
364
|
+
puts form['name']
|
365
|
+
* File uploads will now read the file specified in FileUpload#file_name
|
366
|
+
* FileUpload can use an IO object in FileUpload#file_data
|
367
|
+
* Fixed a bug with saving files on windows
|
368
|
+
* Fixed a bug with the filename being set in forms
|
369
|
+
|
370
|
+
=== 0.5.3
|
371
|
+
|
372
|
+
* Mechanize#click will now act on the first element of an array. So if an
|
373
|
+
array of links is passed to WWW::Mechanize#click, the first link is clicked.
|
374
|
+
That means the syntax for clicking links is shortened and still supports
|
375
|
+
selecting a link. The following are equivalent:
|
376
|
+
agent.click page.links.first
|
377
|
+
agent.click page.links
|
378
|
+
* Fixed a bug with spaces in href's and get's
|
379
|
+
* Added a tick, untick, and click method to radio buttons so that
|
380
|
+
radiobuttons can be "clicked"
|
381
|
+
* Added a tick, untick, and click method to check boxes so that
|
382
|
+
checkboxes can be "clicked"
|
383
|
+
* Options on Select lists can now be "tick"ed, and "untick"ed.
|
384
|
+
* Fixed a potential bug conflicting with rails. Thanks Eric Kolve
|
385
|
+
* Updated log4r support for a speed increase. Thanks Yinon Bentor
|
386
|
+
* Added inspect methods and pretty printing
|
387
|
+
|
388
|
+
=== 0.5.2
|
389
|
+
|
390
|
+
* Fixed a bug with input names that are nil
|
391
|
+
* Added a warning when using attr_finder because attr_finder will be deprecated
|
392
|
+
in 0.6.0 in favor of method calls. So this syntax:
|
393
|
+
@agent.links(:text => 'foo')
|
394
|
+
should be changed to this:
|
395
|
+
@agent.links.text('foo')
|
396
|
+
* Added support for selecting multiple options in select tags that support
|
397
|
+
multiple options. See WWW::Mechanize::MultiSelectList.
|
398
|
+
* New select list methods have been added, select_all, select_none.
|
399
|
+
* Options for select lists can now be "clicked" which toggles their selection,
|
400
|
+
they can be "selected" and "unselected". See WWW::Mechanize::Option
|
401
|
+
* Added a method to set multiple fields at the same time,
|
402
|
+
WWW::Mechanize::Form#set_fields. Which can be used like so:
|
403
|
+
form.set_fields( :foo => 'bar', :name => 'Aaron' )
|
404
|
+
|
405
|
+
=== 0.5.1
|
406
|
+
|
407
|
+
* Fixed bug with file uploads
|
408
|
+
* Added performance tweaks to the cookie class
|
409
|
+
|
410
|
+
=== 0.5.0
|
411
|
+
|
412
|
+
* Added pluggable parsers. (Thanks to Eric Kolve for the idea)
|
413
|
+
* Changed namespace so all classes are under WWW::Mechanize.
|
414
|
+
* Updating Forms so that fields can be used as accessors (Thanks Gregory Brown)
|
415
|
+
* Added WWW::Mechanize::File as default object used for unknown content types.
|
416
|
+
* Added 'save_as' method to Mechanize::File, so any page can be saved.
|
417
|
+
* Adding 'save_as' and 'load' to CookieJar so that cookies can be saved
|
418
|
+
between sessions.
|
419
|
+
* Added WWW::Mechanize::FileSaver pluggable parser to automatically save files.
|
420
|
+
* Added WWW::Mechanize::Page#title for page titles
|
421
|
+
* Added OpenSSL certificate support (Thanks Mike Dalessio)
|
422
|
+
* Removed support for body filters in favor of pluggable parsers.
|
423
|
+
* Fixed cookie bug adding a '/' when the url is missing one (Thanks Nick Dainty)
|
424
|
+
|
425
|
+
=== 0.4.7
|
426
|
+
|
427
|
+
* Fixed bug with no action in forms. Thanks to Adam Wiggins
|
428
|
+
* Setting a default user-agent string
|
429
|
+
* Added house cleaning to the cookie jar so expired cookies don't stick around.
|
430
|
+
* Added new method WWW::Form#field to find the first field with a given name.
|
431
|
+
(thanks to Gregory Brown)
|
432
|
+
* Added WWW::Mechanize#get_file for fetching non text/html files
|
433
|
+
|
434
|
+
=== 0.4.6
|
435
|
+
|
436
|
+
* Added support for proxies
|
437
|
+
* Added a uri field to WWW::Link
|
438
|
+
* Added a error class WWW::Mechanize::ContentTypeError
|
439
|
+
* Added image alt text to link text
|
440
|
+
* Added an visited? method to WWW::Mechanize
|
441
|
+
* Added Array#value= which will set the first value to the argument. That
|
442
|
+
allows syntax as such: form.fields.name('q').value = 'xyz'
|
443
|
+
Before it was like this: form.fields.name('q').first.value = 'xyz'
|
444
|
+
|
445
|
+
=== 0.4.5
|
446
|
+
|
447
|
+
* Added support for multiple values of the same name
|
448
|
+
* Updated build_query_string to take an array of arrays (Thanks Michal Janeczek)
|
449
|
+
* Added WWW::Mechanize#body_filter= so that response bodies can be preprocessed
|
450
|
+
* Added WWW::Page#body_filter= so that response bodies can be preprocessed
|
451
|
+
* Added support for more date formats in the cookie parser
|
452
|
+
* Fixed a bug with empty select lists
|
453
|
+
* Fixing a problem with cookies not handling no spaces after semicolons
|
454
|
+
|
455
|
+
=== 0.4.4
|
456
|
+
|
457
|
+
* Fixed error in method signature, basic_authetication is now basic_auth
|
458
|
+
* Fixed bug with encoding names in file uploads (Big thanks to Alex Young)
|
459
|
+
* Added options to the select list
|
460
|
+
|
461
|
+
=== 0.4.3
|
462
|
+
|
463
|
+
* Added syntactic sugar for finding things
|
464
|
+
* Fixed bug with HttpOnly option in cookies
|
465
|
+
* Fixed a bug with cookie date parsing
|
466
|
+
* Defaulted dropdown lists to the first element
|
467
|
+
* Added unit tests
|
468
|
+
|
469
|
+
=== 0.4.2
|
470
|
+
|
471
|
+
* Added support for iframes
|
472
|
+
* Made mechanize dependant on ruby-web rather than narf
|
473
|
+
* Added unit tests
|
474
|
+
* Fixed a bunch of warnings
|
475
|
+
|
476
|
+
=== 0.4.1
|
477
|
+
|
478
|
+
* Added support for file uploading
|
479
|
+
* Added support for frames (Thanks Gabriel[mailto:leerbag@googlemail.com])
|
480
|
+
* Added more unit tests
|
481
|
+
* Fixed some bugs
|
482
|
+
|
483
|
+
=== 0.4.0
|
484
|
+
|
485
|
+
* Added more unit tests
|
486
|
+
* Added a cookie jar with better cookie support, included expiration of cookies
|
487
|
+
and general cookie security.
|
488
|
+
* Updated mechanize to use built in net/http if ruby version is new enough.
|
489
|
+
* Added support for meta refresh tags
|
490
|
+
* Defaulted form actions to 'GET'
|
491
|
+
* Fixed various bugs
|
492
|
+
* Added more unit tests
|
493
|
+
* Added a response code exception
|
494
|
+
* Thanks to Brian Ellin (brianellin@gmail.com) for:
|
495
|
+
Added support for CA files, and support for 301 response codes
|
496
|
+
|
data/EXAMPLES.rdoc
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
= WWW::Mechanize examples
|
2
|
+
|
3
|
+
== Google
|
4
|
+
require 'rubygems'
|
5
|
+
require 'mechanize'
|
6
|
+
|
7
|
+
a = WWW::Mechanize.new { |agent|
|
8
|
+
agent.user_agent_alias = 'Mac Safari'
|
9
|
+
}
|
10
|
+
|
11
|
+
a.get('http://google.com/') do |page|
|
12
|
+
search_result = page.form_with(:name => 'f') do |search|
|
13
|
+
search.q = 'Hello world'
|
14
|
+
end.submit
|
15
|
+
|
16
|
+
search_result.links.each do |link|
|
17
|
+
puts link.text
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
== Rubyforge
|
22
|
+
|
23
|
+
a = WWW::Mechanize.new
|
24
|
+
a.get('http://rubyforge.org/') do |page|
|
25
|
+
# Click the login link
|
26
|
+
login_page = a.click(page.links.text(/Log In/))
|
27
|
+
|
28
|
+
# Submit the login form
|
29
|
+
my_page = login_page.form_with(:action => '/account/login.php') do |f|
|
30
|
+
f.form_loginname = ARGV[0]
|
31
|
+
f.form_pw = ARGV[1]
|
32
|
+
end.click_button
|
33
|
+
|
34
|
+
my_page.links.each do |link|
|
35
|
+
text = link.text.strip
|
36
|
+
next unless text.length > 0
|
37
|
+
puts text
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
== File Upload
|
42
|
+
Upload a file to flickr.
|
43
|
+
|
44
|
+
a = WWW::Mechanize.new { |agent|
|
45
|
+
# Flickr refreshes after login
|
46
|
+
agent.follow_meta_refresh = true
|
47
|
+
}
|
48
|
+
|
49
|
+
a.get('http://flickr.com/') do |home_page|
|
50
|
+
signin_page = a.click(home_page.links.text(/Sign In/))
|
51
|
+
|
52
|
+
my_page = signin_page.form_with(:name => 'login_form') do |form|
|
53
|
+
form.login = ARGV[0]
|
54
|
+
form.passwd = ARGV[1]
|
55
|
+
end.submit
|
56
|
+
|
57
|
+
# Click the upload link
|
58
|
+
upload_page = a.click(my_page.links.text(/Upload/))
|
59
|
+
|
60
|
+
# We want the basic upload page.
|
61
|
+
upload_page = a.click(upload_page.links.text(/basic Uploader/))
|
62
|
+
|
63
|
+
# Upload the file
|
64
|
+
upload_page.form_with(:method => 'POST') do |upload_form|
|
65
|
+
upload_form.file_uploads.first.file_name = ARGV[2]
|
66
|
+
end.submit
|
67
|
+
end
|
68
|
+
|
69
|
+
== Pluggable Parsers
|
70
|
+
Lets say you want html pages to automatically be parsed with Rubyful Soup.
|
71
|
+
This example shows you how:
|
72
|
+
|
73
|
+
require 'rubygems'
|
74
|
+
require 'mechanize'
|
75
|
+
require 'rubyful_soup'
|
76
|
+
|
77
|
+
class SoupParser < WWW::Mechanize::Page
|
78
|
+
attr_reader :soup
|
79
|
+
def initialize(uri = nil, response = nil, body = nil, code = nil)
|
80
|
+
@soup = BeautifulSoup.new(body)
|
81
|
+
super(uri, response, body, code)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
agent = WWW::Mechanize.new
|
86
|
+
agent.pluggable_parser.html = SoupParser
|
87
|
+
|
88
|
+
Now all HTML pages will be parsed with the SoupParser class, and automatically
|
89
|
+
give you access to a method called 'soup' where you can get access to the
|
90
|
+
Beautiful Soup for that page.
|
91
|
+
|
92
|
+
== Using a proxy
|
93
|
+
|
94
|
+
require 'rubygems'
|
95
|
+
require 'mechanize'
|
96
|
+
|
97
|
+
agent = WWW::Mechanize.new
|
98
|
+
agent.set_proxy('localhost', '8000')
|
99
|
+
page = agent.get(ARGV[0])
|
100
|
+
puts page.body
|
101
|
+
|
102
|
+
== The transact method
|
103
|
+
|
104
|
+
transact runs the given block and then resets the page history. I.e. after the
|
105
|
+
block has been executed, you're back at the original page; no need count how
|
106
|
+
many times to call the back method at the end of a loop (while accounting for
|
107
|
+
possible exceptions).
|
108
|
+
|
109
|
+
This example also demonstrates subclassing Mechanize.
|
110
|
+
|
111
|
+
require 'mechanize'
|
112
|
+
|
113
|
+
class TestMech < WWW::Mechanize
|
114
|
+
def process
|
115
|
+
get 'http://rubyforge.org/'
|
116
|
+
search_form = page.forms.first
|
117
|
+
search_form.words = 'WWW'
|
118
|
+
submit search_form
|
119
|
+
|
120
|
+
page.links_with(:href => %r{/projects/} ).each do |link|
|
121
|
+
next if link.href =~ %r{/projects/support/}
|
122
|
+
|
123
|
+
puts 'Loading %-30s %s' % [link.href, link.text]
|
124
|
+
begin
|
125
|
+
transact do
|
126
|
+
click link
|
127
|
+
# Do stuff, maybe click more links.
|
128
|
+
end
|
129
|
+
# Now we're back at the original page.
|
130
|
+
|
131
|
+
rescue => e
|
132
|
+
$stderr.puts "#{e.class}: #{e.message}"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
TestMech.new.process
|
139
|
+
|
140
|
+
== Client Certificate Authentication (Mutual Auth)
|
141
|
+
|
142
|
+
In most cases a client certificate is created as an additional layer of security
|
143
|
+
for certain websites. The specific case that this was initially tested on was
|
144
|
+
for automating the download of archived images from a banks (Wachovia) lockbox
|
145
|
+
system. Once the certificate is installed into your browser you will have to
|
146
|
+
export it and split the certificate and private key into separate files. Exported
|
147
|
+
files are usually in .p12 format (IE 7 & Firefox 2.0) which stands for PKCS #12.
|
148
|
+
You can convert them from p12 to pem format by using the following commands:
|
149
|
+
|
150
|
+
openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.key -nocerts -nodes
|
151
|
+
openssl.exe pkcs12 -in input_file.p12 -clcerts -out example.cer -nokeys
|
152
|
+
|
153
|
+
require 'rubygems'
|
154
|
+
require 'mechanize'
|
155
|
+
|
156
|
+
# create Mechanize instance
|
157
|
+
agent = WWW::Mechanize.new
|
158
|
+
|
159
|
+
# set the path of the certificate file
|
160
|
+
agent.cert = 'example.cer'
|
161
|
+
|
162
|
+
# set the path of the private key file
|
163
|
+
agent.key = 'example.key'
|
164
|
+
|
165
|
+
# get the login form & fill it out with the username/password
|
166
|
+
login_form = @agent.get("http://example.com/login_page").form('Login')
|
167
|
+
login_form.Userid = 'TestUser'
|
168
|
+
login_form.Password = 'TestPassword'
|
169
|
+
|
170
|
+
# submit login form
|
171
|
+
agent.submit(login_form, login_form.buttons.first)
|
data/FAQ.rdoc
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
Q: I keep getting an EOFError:
|
2
|
+
protocol.rb:133:in `sysread': end of file reached (EOFError)
|
3
|
+
|
4
|
+
A: Some people have experienced an EOFError during normal mechanize usage.
|
5
|
+
Most of the time this occurs because the remote website claims to support
|
6
|
+
keep alives, but does not implement them correctly. Try turning off
|
7
|
+
keep alives on your mechanize object:
|
8
|
+
|
9
|
+
mech.keep_alive = false
|
10
|
+
|
11
|
+
|