knu-mechanize 0.9.3.20090623142847
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +504 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +169 -0
- data/README.rdoc +60 -0
- data/Rakefile +43 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +7 -0
- data/lib/www/mechanize.rb +619 -0
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/chain/auth_headers.rb +80 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +53 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +32 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
- data/lib/www/mechanize/content_type_error.rb +16 -0
- data/lib/www/mechanize/cookie.rb +72 -0
- data/lib/www/mechanize/cookie_jar.rb +191 -0
- data/lib/www/mechanize/file.rb +73 -0
- data/lib/www/mechanize/file_response.rb +62 -0
- data/lib/www/mechanize/file_saver.rb +39 -0
- data/lib/www/mechanize/form.rb +360 -0
- data/lib/www/mechanize/form/button.rb +8 -0
- data/lib/www/mechanize/form/check_box.rb +13 -0
- data/lib/www/mechanize/form/field.rb +28 -0
- data/lib/www/mechanize/form/file_upload.rb +24 -0
- data/lib/www/mechanize/form/image_button.rb +23 -0
- data/lib/www/mechanize/form/multi_select_list.rb +69 -0
- data/lib/www/mechanize/form/option.rb +51 -0
- data/lib/www/mechanize/form/radio_button.rb +38 -0
- data/lib/www/mechanize/form/select_list.rb +45 -0
- data/lib/www/mechanize/headers.rb +12 -0
- data/lib/www/mechanize/history.rb +67 -0
- data/lib/www/mechanize/inspect.rb +90 -0
- data/lib/www/mechanize/monkey_patch.rb +37 -0
- data/lib/www/mechanize/page.rb +181 -0
- data/lib/www/mechanize/page/base.rb +10 -0
- data/lib/www/mechanize/page/frame.rb +22 -0
- data/lib/www/mechanize/page/link.rb +50 -0
- data/lib/www/mechanize/page/meta.rb +51 -0
- data/lib/www/mechanize/pluggable_parsers.rb +103 -0
- data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
- data/lib/www/mechanize/response_code_error.rb +25 -0
- data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
- data/lib/www/mechanize/util.rb +76 -0
- data/mechanize.gemspec +41 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +564 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +52 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +120 -0
- data/test/test_mech.rb +268 -0
- data/test/test_mechanize_file.rb +47 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +119 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +41 -0
- data/test/test_redirect_verb_handling.rb +45 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +314 -0
data/Manifest.txt
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
CHANGELOG.rdoc
|
2
|
+
EXAMPLES.rdoc
|
3
|
+
FAQ.rdoc
|
4
|
+
GUIDE.rdoc
|
5
|
+
LICENSE.rdoc
|
6
|
+
Manifest.txt
|
7
|
+
README.rdoc
|
8
|
+
Rakefile
|
9
|
+
examples/flickr_upload.rb
|
10
|
+
examples/mech-dump.rb
|
11
|
+
examples/proxy_req.rb
|
12
|
+
examples/rubyforge.rb
|
13
|
+
examples/spider.rb
|
14
|
+
lib/mechanize.rb
|
15
|
+
lib/www/mechanize.rb
|
16
|
+
lib/www/mechanize/chain.rb
|
17
|
+
lib/www/mechanize/chain/auth_headers.rb
|
18
|
+
lib/www/mechanize/chain/body_decoding_handler.rb
|
19
|
+
lib/www/mechanize/chain/connection_resolver.rb
|
20
|
+
lib/www/mechanize/chain/custom_headers.rb
|
21
|
+
lib/www/mechanize/chain/handler.rb
|
22
|
+
lib/www/mechanize/chain/header_resolver.rb
|
23
|
+
lib/www/mechanize/chain/parameter_resolver.rb
|
24
|
+
lib/www/mechanize/chain/post_connect_hook.rb
|
25
|
+
lib/www/mechanize/chain/pre_connect_hook.rb
|
26
|
+
lib/www/mechanize/chain/request_resolver.rb
|
27
|
+
lib/www/mechanize/chain/response_body_parser.rb
|
28
|
+
lib/www/mechanize/chain/response_header_handler.rb
|
29
|
+
lib/www/mechanize/chain/response_reader.rb
|
30
|
+
lib/www/mechanize/chain/ssl_resolver.rb
|
31
|
+
lib/www/mechanize/chain/uri_resolver.rb
|
32
|
+
lib/www/mechanize/content_type_error.rb
|
33
|
+
lib/www/mechanize/cookie.rb
|
34
|
+
lib/www/mechanize/cookie_jar.rb
|
35
|
+
lib/www/mechanize/file.rb
|
36
|
+
lib/www/mechanize/file_response.rb
|
37
|
+
lib/www/mechanize/file_saver.rb
|
38
|
+
lib/www/mechanize/form.rb
|
39
|
+
lib/www/mechanize/form/button.rb
|
40
|
+
lib/www/mechanize/form/check_box.rb
|
41
|
+
lib/www/mechanize/form/field.rb
|
42
|
+
lib/www/mechanize/form/file_upload.rb
|
43
|
+
lib/www/mechanize/form/image_button.rb
|
44
|
+
lib/www/mechanize/form/multi_select_list.rb
|
45
|
+
lib/www/mechanize/form/option.rb
|
46
|
+
lib/www/mechanize/form/radio_button.rb
|
47
|
+
lib/www/mechanize/form/select_list.rb
|
48
|
+
lib/www/mechanize/headers.rb
|
49
|
+
lib/www/mechanize/history.rb
|
50
|
+
lib/www/mechanize/inspect.rb
|
51
|
+
lib/www/mechanize/monkey_patch.rb
|
52
|
+
lib/www/mechanize/page.rb
|
53
|
+
lib/www/mechanize/page/base.rb
|
54
|
+
lib/www/mechanize/page/frame.rb
|
55
|
+
lib/www/mechanize/page/link.rb
|
56
|
+
lib/www/mechanize/page/meta.rb
|
57
|
+
lib/www/mechanize/pluggable_parsers.rb
|
58
|
+
lib/www/mechanize/redirect_limit_reached_error.rb
|
59
|
+
lib/www/mechanize/redirect_not_get_or_head_error.rb
|
60
|
+
lib/www/mechanize/response_code_error.rb
|
61
|
+
lib/www/mechanize/unsupported_scheme_error.rb
|
62
|
+
lib/www/mechanize/util.rb
|
63
|
+
mechanize.gemspec
|
64
|
+
test/chain/test_argument_validator.rb
|
65
|
+
test/chain/test_custom_headers.rb
|
66
|
+
test/chain/test_parameter_resolver.rb
|
67
|
+
test/chain/test_request_resolver.rb
|
68
|
+
test/chain/test_response_reader.rb
|
69
|
+
test/data/htpasswd
|
70
|
+
test/data/server.crt
|
71
|
+
test/data/server.csr
|
72
|
+
test/data/server.key
|
73
|
+
test/data/server.pem
|
74
|
+
test/helper.rb
|
75
|
+
test/htdocs/alt_text.html
|
76
|
+
test/htdocs/bad_form_test.html
|
77
|
+
test/htdocs/button.jpg
|
78
|
+
test/htdocs/empty_form.html
|
79
|
+
test/htdocs/file_upload.html
|
80
|
+
test/htdocs/find_link.html
|
81
|
+
test/htdocs/form_multi_select.html
|
82
|
+
test/htdocs/form_multival.html
|
83
|
+
test/htdocs/form_no_action.html
|
84
|
+
test/htdocs/form_no_input_name.html
|
85
|
+
test/htdocs/form_select.html
|
86
|
+
test/htdocs/form_select_all.html
|
87
|
+
test/htdocs/form_select_none.html
|
88
|
+
test/htdocs/form_select_noopts.html
|
89
|
+
test/htdocs/form_set_fields.html
|
90
|
+
test/htdocs/form_test.html
|
91
|
+
test/htdocs/frame_test.html
|
92
|
+
test/htdocs/google.html
|
93
|
+
test/htdocs/iframe_test.html
|
94
|
+
test/htdocs/index.html
|
95
|
+
test/htdocs/link with space.html
|
96
|
+
test/htdocs/meta_cookie.html
|
97
|
+
test/htdocs/no_title_test.html
|
98
|
+
test/htdocs/relative/tc_relative_links.html
|
99
|
+
test/htdocs/tc_bad_links.html
|
100
|
+
test/htdocs/tc_base_link.html
|
101
|
+
test/htdocs/tc_blank_form.html
|
102
|
+
test/htdocs/tc_checkboxes.html
|
103
|
+
test/htdocs/tc_encoded_links.html
|
104
|
+
test/htdocs/tc_follow_meta.html
|
105
|
+
test/htdocs/tc_form_action.html
|
106
|
+
test/htdocs/tc_links.html
|
107
|
+
test/htdocs/tc_no_attributes.html
|
108
|
+
test/htdocs/tc_pretty_print.html
|
109
|
+
test/htdocs/tc_radiobuttons.html
|
110
|
+
test/htdocs/tc_referer.html
|
111
|
+
test/htdocs/tc_relative_links.html
|
112
|
+
test/htdocs/tc_textarea.html
|
113
|
+
test/htdocs/unusual______.html
|
114
|
+
test/servlets.rb
|
115
|
+
test/ssl_server.rb
|
116
|
+
test/test_authenticate.rb
|
117
|
+
test/test_bad_links.rb
|
118
|
+
test/test_blank_form.rb
|
119
|
+
test/test_checkboxes.rb
|
120
|
+
test/test_content_type.rb
|
121
|
+
test/test_cookie_class.rb
|
122
|
+
test/test_cookie_jar.rb
|
123
|
+
test/test_cookies.rb
|
124
|
+
test/test_encoded_links.rb
|
125
|
+
test/test_errors.rb
|
126
|
+
test/test_follow_meta.rb
|
127
|
+
test/test_form_action.rb
|
128
|
+
test/test_form_as_hash.rb
|
129
|
+
test/test_form_button.rb
|
130
|
+
test/test_form_no_inputname.rb
|
131
|
+
test/test_forms.rb
|
132
|
+
test/test_frames.rb
|
133
|
+
test/test_get_headers.rb
|
134
|
+
test/test_gzipping.rb
|
135
|
+
test/test_hash_api.rb
|
136
|
+
test/test_history.rb
|
137
|
+
test/test_history_added.rb
|
138
|
+
test/test_html_unscape_forms.rb
|
139
|
+
test/test_if_modified_since.rb
|
140
|
+
test/test_keep_alive.rb
|
141
|
+
test/test_links.rb
|
142
|
+
test/test_mech.rb
|
143
|
+
test/test_mechanize_file.rb
|
144
|
+
test/test_multi_select.rb
|
145
|
+
test/test_no_attributes.rb
|
146
|
+
test/test_option.rb
|
147
|
+
test/test_page.rb
|
148
|
+
test/test_pluggable_parser.rb
|
149
|
+
test/test_post_form.rb
|
150
|
+
test/test_pretty_print.rb
|
151
|
+
test/test_radiobutton.rb
|
152
|
+
test/test_redirect_limit_reached.rb
|
153
|
+
test/test_redirect_verb_handling.rb
|
154
|
+
test/test_referer.rb
|
155
|
+
test/test_relative_links.rb
|
156
|
+
test/test_request.rb
|
157
|
+
test/test_response_code.rb
|
158
|
+
test/test_save_file.rb
|
159
|
+
test/test_scheme.rb
|
160
|
+
test/test_select.rb
|
161
|
+
test/test_select_all.rb
|
162
|
+
test/test_select_none.rb
|
163
|
+
test/test_select_noopts.rb
|
164
|
+
test/test_set_fields.rb
|
165
|
+
test/test_ssl_server.rb
|
166
|
+
test/test_subclass.rb
|
167
|
+
test/test_textarea.rb
|
168
|
+
test/test_upload.rb
|
169
|
+
test/test_verbs.rb
|
data/README.rdoc
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
= WWW::Mechanize
|
2
|
+
|
3
|
+
* http://mechanize.rubyforge.org/
|
4
|
+
* http://github.com/tenderlove/mechanize/tree/master
|
5
|
+
|
6
|
+
== DESCRIPTION
|
7
|
+
|
8
|
+
The Mechanize library is used for automating interaction with websites.
|
9
|
+
Mechanize automatically stores and sends cookies, follows redirects,
|
10
|
+
can follow links, and submit forms. Form fields can be populated and
|
11
|
+
submitted. Mechanize also keeps track of the sites that you have visited as
|
12
|
+
a history.
|
13
|
+
|
14
|
+
== Dependencies
|
15
|
+
|
16
|
+
* ruby 1.8.6
|
17
|
+
* nokogiri[http://nokogiri.rubyforge.org]
|
18
|
+
|
19
|
+
== SUPPORT:
|
20
|
+
|
21
|
+
The mechanize mailing list is available here:
|
22
|
+
|
23
|
+
* http://rubyforge.org/mailman/listinfo/mechanize-users
|
24
|
+
|
25
|
+
The bug tracker is available here:
|
26
|
+
|
27
|
+
* http://rubyforge.org/tracker/?atid=5709&group_id=1453
|
28
|
+
|
29
|
+
== Examples
|
30
|
+
|
31
|
+
If you are just starting, check out the GUIDE.
|
32
|
+
Also, check out the EXAMPLES file.
|
33
|
+
|
34
|
+
== Authors
|
35
|
+
|
36
|
+
Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
|
37
|
+
|
38
|
+
Copyright (c) 2006-2009:
|
39
|
+
|
40
|
+
* {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
|
41
|
+
* {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
|
42
|
+
|
43
|
+
This library comes with a shameless plug for employing me
|
44
|
+
(Aaron[http://tenderlovemaking.com/]) programming
|
45
|
+
Ruby, my favorite language!
|
46
|
+
|
47
|
+
== Acknowledgments
|
48
|
+
|
49
|
+
This library was heavily influenced by its namesake in the perl world. A big
|
50
|
+
thanks goes to Andy Lester (andy@petdance.com), the author of the original
|
51
|
+
perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
|
52
|
+
|
53
|
+
Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
|
54
|
+
who's helped out in various ways. Finally, thank you to the people using this
|
55
|
+
library!
|
56
|
+
|
57
|
+
== License
|
58
|
+
|
59
|
+
This library is distributed under the GPL. Please see the LICENSE file.
|
60
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hoe'
|
3
|
+
|
4
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
|
5
|
+
require 'mechanize'
|
6
|
+
|
7
|
+
HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
|
8
|
+
p.developer('Aaron Patterson','aaronp@rubyforge.org')
|
9
|
+
p.developer('Mike Dalessio','mike.dalessio@gmail.com')
|
10
|
+
p.readme_file = 'README.rdoc'
|
11
|
+
p.history_file = 'CHANGELOG.rdoc'
|
12
|
+
p.extra_rdoc_files = FileList['*.rdoc']
|
13
|
+
p.summary = "Mechanize provides automated web-browsing"
|
14
|
+
p.extra_deps = [['nokogiri', '>= 1.2.1']]
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Update SSL Certificate"
|
18
|
+
task('ssl_cert') do |p|
|
19
|
+
sh "openssl genrsa -des3 -out server.key 1024"
|
20
|
+
sh "openssl req -new -key server.key -out server.csr"
|
21
|
+
sh "cp server.key server.key.org"
|
22
|
+
sh "openssl rsa -in server.key.org -out server.key"
|
23
|
+
sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
|
24
|
+
sh "cp server.key server.pem"
|
25
|
+
sh "mv server.key server.csr server.crt server.pem test/data/"
|
26
|
+
sh "rm server.key.org"
|
27
|
+
end
|
28
|
+
|
29
|
+
namespace :gem do
|
30
|
+
desc 'Generate a gem spec'
|
31
|
+
task :spec do
|
32
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
33
|
+
HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
|
34
|
+
f.write(HOE.spec.to_ruby)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
desc "Run code-coverage analysis"
|
40
|
+
task :coverage do
|
41
|
+
rm_rf "coverage"
|
42
|
+
sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
|
43
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mechanize'
|
5
|
+
|
6
|
+
agent = WWW::Mechanize.new
|
7
|
+
|
8
|
+
# Get the flickr sign in page
|
9
|
+
page = agent.get('http://flickr.com/signin/flickr/')
|
10
|
+
|
11
|
+
# Fill out the login form
|
12
|
+
form = page.forms.name('flickrloginform').first
|
13
|
+
form.email = ARGV[0]
|
14
|
+
form.password = ARGV[1]
|
15
|
+
page = agent.submit(form)
|
16
|
+
|
17
|
+
# Go to the upload page
|
18
|
+
page = agent.click page.links.text('Upload')
|
19
|
+
|
20
|
+
# Fill out the form
|
21
|
+
form = page.forms.action('/photos_upload_process.gne').first
|
22
|
+
form.file_uploads.name('file1').first.file_name = ARGV[2]
|
23
|
+
agent.submit(form)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
# This example logs a user in to rubyforge and prints out the body of the
|
4
|
+
# page after logging the user in.
|
5
|
+
require 'rubygems'
|
6
|
+
require 'mechanize'
|
7
|
+
|
8
|
+
# Create a new mechanize object
|
9
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(STDERR) }
|
10
|
+
|
11
|
+
# Load the rubyforge website
|
12
|
+
page = agent.get('http://rubyforge.org/')
|
13
|
+
page = agent.click page.links.text(/Log In/) # Click the login link
|
14
|
+
form = page.forms[1] # Select the first form
|
15
|
+
form.form_loginname = ARGV[0]
|
16
|
+
form.form_pw = ARGV[1]
|
17
|
+
|
18
|
+
# Submit the form
|
19
|
+
page = agent.submit(form, form.buttons.first)
|
20
|
+
|
21
|
+
puts page.body # Print out the body
|
data/examples/spider.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mechanize'
|
5
|
+
|
6
|
+
agent = WWW::Mechanize.new
|
7
|
+
stack = agent.get(ARGV[0]).links
|
8
|
+
while l = stack.pop
|
9
|
+
next unless l.uri.host == agent.history.first.uri.host
|
10
|
+
stack.push(*(agent.click(l).links)) unless agent.visited? l.href
|
11
|
+
end
|
data/lib/mechanize.rb
ADDED
@@ -0,0 +1,619 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'net/https'
|
3
|
+
require 'uri'
|
4
|
+
require 'webrick/httputils'
|
5
|
+
require 'zlib'
|
6
|
+
require 'stringio'
|
7
|
+
require 'digest/md5'
|
8
|
+
require 'fileutils'
|
9
|
+
require 'nokogiri'
|
10
|
+
require 'forwardable'
|
11
|
+
require 'iconv'
|
12
|
+
require 'nkf'
|
13
|
+
|
14
|
+
require 'www/mechanize/util'
|
15
|
+
require 'www/mechanize/content_type_error'
|
16
|
+
require 'www/mechanize/response_code_error'
|
17
|
+
require 'www/mechanize/unsupported_scheme_error'
|
18
|
+
require 'www/mechanize/redirect_limit_reached_error'
|
19
|
+
require 'www/mechanize/redirect_not_get_or_head_error'
|
20
|
+
require 'www/mechanize/cookie'
|
21
|
+
require 'www/mechanize/cookie_jar'
|
22
|
+
require 'www/mechanize/history'
|
23
|
+
require 'www/mechanize/form'
|
24
|
+
require 'www/mechanize/pluggable_parsers'
|
25
|
+
require 'www/mechanize/file_response'
|
26
|
+
require 'www/mechanize/inspect'
|
27
|
+
require 'www/mechanize/chain'
|
28
|
+
require 'www/mechanize/monkey_patch'
|
29
|
+
|
30
|
+
module WWW
|
31
|
+
# = Synopsis
|
32
|
+
# The Mechanize library is used for automating interaction with a website. It
|
33
|
+
# can follow links, and submit forms. Form fields can be populated and
|
34
|
+
# submitted. A history of URL's is maintained and can be queried.
|
35
|
+
#
|
36
|
+
# == Example
|
37
|
+
# require 'rubygems'
|
38
|
+
# require 'mechanize'
|
39
|
+
# require 'logger'
|
40
|
+
#
|
41
|
+
# agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
|
42
|
+
# agent.user_agent_alias = 'Mac Safari'
|
43
|
+
# page = agent.get("http://www.google.com/")
|
44
|
+
# search_form = page.form_with(:name => "f")
|
45
|
+
# search_form.field_with(:name => "q").value = "Hello"
|
46
|
+
# search_results = agent.submit(search_form)
|
47
|
+
# puts search_results.body
|
48
|
+
class Mechanize
|
49
|
+
##
|
50
|
+
# The version of Mechanize you are using.
|
51
|
+
VERSION = '0.9.3'
|
52
|
+
|
53
|
+
##
|
54
|
+
# User Agent aliases
|
55
|
+
AGENT_ALIASES = {
|
56
|
+
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
|
57
|
+
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
|
58
|
+
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
|
59
|
+
'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
|
60
|
+
'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
|
61
|
+
'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
|
62
|
+
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
|
63
|
+
'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
|
64
|
+
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
|
65
|
+
'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
|
66
|
+
}
|
67
|
+
|
68
|
+
attr_accessor :cookie_jar
|
69
|
+
attr_accessor :open_timeout, :read_timeout
|
70
|
+
attr_accessor :user_agent
|
71
|
+
attr_accessor :watch_for_set
|
72
|
+
attr_accessor :ca_file
|
73
|
+
attr_accessor :key
|
74
|
+
attr_accessor :cert
|
75
|
+
attr_accessor :pass
|
76
|
+
attr_accessor :redirect_ok
|
77
|
+
attr_accessor :keep_alive_time
|
78
|
+
attr_accessor :keep_alive
|
79
|
+
attr_accessor :conditional_requests
|
80
|
+
attr_accessor :follow_meta_refresh
|
81
|
+
attr_accessor :verify_callback
|
82
|
+
attr_accessor :history_added
|
83
|
+
attr_accessor :scheme_handlers
|
84
|
+
attr_accessor :redirection_limit
|
85
|
+
|
86
|
+
# A hash of custom request headers
|
87
|
+
attr_accessor :request_headers
|
88
|
+
|
89
|
+
# The HTML parser to be used when parsing documents
|
90
|
+
attr_accessor :html_parser
|
91
|
+
|
92
|
+
attr_reader :history
|
93
|
+
attr_reader :pluggable_parser
|
94
|
+
|
95
|
+
alias :follow_redirect? :redirect_ok
|
96
|
+
|
97
|
+
@html_parser = Nokogiri::HTML
|
98
|
+
class << self; attr_accessor :html_parser, :log end
|
99
|
+
|
100
|
+
def initialize
|
101
|
+
# attr_accessors
|
102
|
+
@cookie_jar = CookieJar.new
|
103
|
+
@log = nil
|
104
|
+
@open_timeout = nil
|
105
|
+
@read_timeout = nil
|
106
|
+
@user_agent = AGENT_ALIASES['Mechanize']
|
107
|
+
@watch_for_set = nil
|
108
|
+
@history_added = nil
|
109
|
+
@ca_file = nil # OpenSSL server certificate file
|
110
|
+
|
111
|
+
# callback for OpenSSL errors while verifying the server certificate
|
112
|
+
# chain, can be used for debugging or to ignore errors by always
|
113
|
+
# returning _true_
|
114
|
+
@verify_callback = nil
|
115
|
+
@cert = nil # OpenSSL Certificate
|
116
|
+
@key = nil # OpenSSL Private Key
|
117
|
+
@pass = nil # OpenSSL Password
|
118
|
+
@redirect_ok = true # Should we follow redirects?
|
119
|
+
|
120
|
+
# attr_readers
|
121
|
+
@history = WWW::Mechanize::History.new
|
122
|
+
@pluggable_parser = PluggableParser.new
|
123
|
+
|
124
|
+
# Auth variables
|
125
|
+
@user = nil # Auth User
|
126
|
+
@password = nil # Auth Password
|
127
|
+
@digest = nil # DigestAuth Digest
|
128
|
+
@auth_hash = {} # Keep track of urls for sending auth
|
129
|
+
@request_headers= {} # A hash of request headers to be used
|
130
|
+
|
131
|
+
# Proxy settings
|
132
|
+
@proxy_addr = nil
|
133
|
+
@proxy_pass = nil
|
134
|
+
@proxy_port = nil
|
135
|
+
@proxy_user = nil
|
136
|
+
|
137
|
+
@conditional_requests = true
|
138
|
+
|
139
|
+
@follow_meta_refresh = false
|
140
|
+
@redirection_limit = 20
|
141
|
+
|
142
|
+
# Connection Cache & Keep alive
|
143
|
+
@connection_cache = {}
|
144
|
+
@keep_alive_time = 300
|
145
|
+
@keep_alive = true
|
146
|
+
|
147
|
+
@scheme_handlers = Hash.new { |h,k|
|
148
|
+
h[k] = lambda { |link, page|
|
149
|
+
raise UnsupportedSchemeError.new(k)
|
150
|
+
}
|
151
|
+
}
|
152
|
+
@scheme_handlers['http'] = lambda { |link, page| link }
|
153
|
+
@scheme_handlers['https'] = @scheme_handlers['http']
|
154
|
+
@scheme_handlers['relative'] = @scheme_handlers['http']
|
155
|
+
@scheme_handlers['file'] = @scheme_handlers['http']
|
156
|
+
|
157
|
+
@pre_connect_hook = Chain::PreConnectHook.new
|
158
|
+
@post_connect_hook = Chain::PostConnectHook.new
|
159
|
+
|
160
|
+
@html_parser = self.class.html_parser
|
161
|
+
|
162
|
+
yield self if block_given?
|
163
|
+
end
|
164
|
+
|
165
|
+
def max_history=(length); @history.max_size = length end
|
166
|
+
def max_history; @history.max_size end
|
167
|
+
def log=(l); self.class.log = l end
|
168
|
+
def log; self.class.log end
|
169
|
+
|
170
|
+
def pre_connect_hooks
|
171
|
+
@pre_connect_hook.hooks
|
172
|
+
end
|
173
|
+
|
174
|
+
def post_connect_hooks
|
175
|
+
@post_connect_hook.hooks
|
176
|
+
end
|
177
|
+
|
178
|
+
# Sets the proxy address, port, user, and password
|
179
|
+
# +addr+ should be a host, with no "http://"
|
180
|
+
def set_proxy(addr, port, user = nil, pass = nil)
|
181
|
+
@proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
|
182
|
+
end
|
183
|
+
|
184
|
+
# Set the user agent for the Mechanize object.
|
185
|
+
# See AGENT_ALIASES
|
186
|
+
def user_agent_alias=(al)
|
187
|
+
self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
|
188
|
+
end
|
189
|
+
|
190
|
+
# Returns a list of cookies stored in the cookie jar.
|
191
|
+
def cookies
|
192
|
+
@cookie_jar.to_a
|
193
|
+
end
|
194
|
+
|
195
|
+
# Sets the user and password to be used for authentication.
|
196
|
+
def auth(user, password)
|
197
|
+
@user = user
|
198
|
+
@password = password
|
199
|
+
end
|
200
|
+
alias :basic_auth :auth
|
201
|
+
|
202
|
+
# Fetches the URL passed in and returns a page.
|
203
|
+
def get(options, parameters = [], referer = nil)
|
204
|
+
unless options.is_a? Hash
|
205
|
+
url = options
|
206
|
+
unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
|
207
|
+
referer = parameters
|
208
|
+
parameters = []
|
209
|
+
end
|
210
|
+
else
|
211
|
+
raise ArgumentError.new("url must be specified") unless url = options[:url]
|
212
|
+
parameters = options[:params] || []
|
213
|
+
referer = options[:referer]
|
214
|
+
headers = options[:headers]
|
215
|
+
end
|
216
|
+
|
217
|
+
unless referer
|
218
|
+
if url.to_s =~ /^http/
|
219
|
+
referer = Page.new(nil, {'content-type'=>'text/html'})
|
220
|
+
else
|
221
|
+
referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
# FIXME: Huge hack so that using a URI as a referer works. I need to
|
226
|
+
# refactor everything to pass around URIs but still support
|
227
|
+
# WWW::Mechanize::Page#base
|
228
|
+
unless referer.is_a?(WWW::Mechanize::File)
|
229
|
+
referer = referer.is_a?(String) ?
|
230
|
+
Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
|
231
|
+
Page.new(referer, {'content-type' => 'text/html'})
|
232
|
+
end
|
233
|
+
|
234
|
+
# fetch the page
|
235
|
+
page = fetch_page( :uri => url,
|
236
|
+
:referer => referer,
|
237
|
+
:headers => headers || {},
|
238
|
+
:params => parameters
|
239
|
+
)
|
240
|
+
add_to_history(page)
|
241
|
+
yield page if block_given?
|
242
|
+
page
|
243
|
+
end
|
244
|
+
|
245
|
+
####
|
246
|
+
# PUT to +url+ with +entity+, and setting +options+:
|
247
|
+
#
|
248
|
+
# put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
|
249
|
+
#
|
250
|
+
def put(url, entity, options = {})
|
251
|
+
request_with_entity(:put, url, entity, options)
|
252
|
+
end
|
253
|
+
|
254
|
+
####
|
255
|
+
# DELETE to +url+ with +query_params+, and setting +options+:
|
256
|
+
#
|
257
|
+
# delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
|
258
|
+
#
|
259
|
+
def delete(url, query_params = {}, options = {})
|
260
|
+
page = head(url, query_params, options.merge({:verb => :delete}))
|
261
|
+
add_to_history(page)
|
262
|
+
page
|
263
|
+
end
|
264
|
+
|
265
|
+
####
|
266
|
+
# HEAD to +url+ with +query_params+, and setting +options+:
|
267
|
+
#
|
268
|
+
# head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
|
269
|
+
#
|
270
|
+
def head(url, query_params = {}, options = {})
|
271
|
+
options = {
|
272
|
+
:uri => url,
|
273
|
+
:headers => {},
|
274
|
+
:params => query_params,
|
275
|
+
:verb => :head
|
276
|
+
}.merge(options)
|
277
|
+
# fetch the page
|
278
|
+
page = fetch_page(options)
|
279
|
+
yield page if block_given?
|
280
|
+
page
|
281
|
+
end
|
282
|
+
|
283
|
+
# Fetch a file and return the contents of the file.
|
284
|
+
def get_file(url)
|
285
|
+
get(url).body
|
286
|
+
end
|
287
|
+
|
288
|
+
# Clicks the WWW::Mechanize::Link object passed in and returns the
|
289
|
+
# page fetched.
|
290
|
+
def click(link)
|
291
|
+
referer = link.page rescue referer = nil
|
292
|
+
href = link.respond_to?(:href) ? link.href :
|
293
|
+
(link['href'] || link['src'])
|
294
|
+
get(:url => href, :referer => (referer || current_page()))
|
295
|
+
end
|
296
|
+
|
297
|
+
# Equivalent to the browser back button. Returns the most recent page
|
298
|
+
# visited.
|
299
|
+
def back
|
300
|
+
@history.pop
|
301
|
+
end
|
302
|
+
|
303
|
+
# Posts to the given URL with the request entity. The request
|
304
|
+
# entity is specified by either a string, or a list of key-value
|
305
|
+
# pairs represented by a hash or an array of arrays.
|
306
|
+
#
|
307
|
+
# Examples:
|
308
|
+
# agent.post('http://example.com/', "foo" => "bar")
|
309
|
+
#
|
310
|
+
# agent.post('http://example.com/', [ ["foo", "bar"] ])
|
311
|
+
#
|
312
|
+
# agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
|
313
|
+
def post(url, query={}, headers={})
|
314
|
+
if query.is_a?(String)
|
315
|
+
return request_with_entity(:post, url, query, :headers => headers)
|
316
|
+
end
|
317
|
+
node = {}
|
318
|
+
# Create a fake form
|
319
|
+
class << node
|
320
|
+
def search(*args); []; end
|
321
|
+
end
|
322
|
+
node['method'] = 'POST'
|
323
|
+
node['enctype'] = 'application/x-www-form-urlencoded'
|
324
|
+
|
325
|
+
form = Form.new(node)
|
326
|
+
query.each { |k,v|
|
327
|
+
if v.is_a?(IO)
|
328
|
+
form.enctype = 'multipart/form-data'
|
329
|
+
ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
|
330
|
+
ul.file_data = v.read
|
331
|
+
form.file_uploads << ul
|
332
|
+
else
|
333
|
+
form.fields << Form::Field.new(k.to_s,v)
|
334
|
+
end
|
335
|
+
}
|
336
|
+
post_form(url, form, headers)
|
337
|
+
end
|
338
|
+
|
339
|
+
# Submit a form with an optional button.
|
340
|
+
# Without a button:
|
341
|
+
# page = agent.get('http://example.com')
|
342
|
+
# agent.submit(page.forms.first)
|
343
|
+
# With a button
|
344
|
+
# agent.submit(page.forms.first, page.forms.first.buttons.first)
|
345
|
+
def submit(form, button=nil, headers={})
|
346
|
+
form.add_button_to_query(button) if button
|
347
|
+
case form.method.upcase
|
348
|
+
when 'POST'
|
349
|
+
post_form(form.action, form, headers)
|
350
|
+
when 'GET'
|
351
|
+
get( :url => form.action.gsub(/\?[^\?]*$/, ''),
|
352
|
+
:params => form.build_query,
|
353
|
+
:headers => headers,
|
354
|
+
:referer => form.page
|
355
|
+
)
|
356
|
+
else
|
357
|
+
raise "unsupported method: #{form.method.upcase}"
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
def request_with_entity(verb, url, entity, options={})
|
362
|
+
cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
|
363
|
+
|
364
|
+
options = {
|
365
|
+
:uri => url,
|
366
|
+
:referer => cur_page,
|
367
|
+
:headers => {},
|
368
|
+
}.update(options)
|
369
|
+
|
370
|
+
headers = {
|
371
|
+
'Content-Type' => 'application/octet-stream',
|
372
|
+
'Content-Length' => entity.size.to_s,
|
373
|
+
}.update(options[:headers])
|
374
|
+
|
375
|
+
options.update({
|
376
|
+
:verb => verb,
|
377
|
+
:params => [entity],
|
378
|
+
:headers => headers,
|
379
|
+
})
|
380
|
+
|
381
|
+
page = fetch_page(options)
|
382
|
+
add_to_history(page)
|
383
|
+
page
|
384
|
+
end
|
385
|
+
|
386
|
+
# Returns the current page loaded by Mechanize
|
387
|
+
def current_page
|
388
|
+
@history.last
|
389
|
+
end
|
390
|
+
|
391
|
+
# Returns whether or not a url has been visited
|
392
|
+
def visited?(url)
|
393
|
+
! visited_page(url).nil?
|
394
|
+
end
|
395
|
+
|
396
|
+
# Returns a visited page for the url passed in, otherwise nil
|
397
|
+
def visited_page(url)
|
398
|
+
if url.respond_to? :href
|
399
|
+
url = url.href
|
400
|
+
end
|
401
|
+
@history.visited_page(resolve(url))
|
402
|
+
end
|
403
|
+
|
404
|
+
# Runs given block, then resets the page history as it was before. self is
|
405
|
+
# given as a parameter to the block. Returns the value of the block.
|
406
|
+
def transact
|
407
|
+
history_backup = @history.dup
|
408
|
+
begin
|
409
|
+
yield self
|
410
|
+
ensure
|
411
|
+
@history = history_backup
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
alias :page :current_page
|
416
|
+
|
417
|
+
private
|
418
|
+
|
419
|
+
def resolve(url, referer = current_page())
|
420
|
+
hash = { :uri => url, :referer => referer }
|
421
|
+
chain = Chain.new([
|
422
|
+
Chain::URIResolver.new(@scheme_handlers)
|
423
|
+
]).handle(hash)
|
424
|
+
hash[:uri].to_s
|
425
|
+
end
|
426
|
+
|
427
|
+
def post_form(url, form, headers = {})
|
428
|
+
cur_page = form.page || current_page ||
|
429
|
+
Page.new( nil, {'content-type'=>'text/html'})
|
430
|
+
|
431
|
+
request_data = form.request_data
|
432
|
+
|
433
|
+
log.debug("query: #{ request_data.inspect }") if log
|
434
|
+
|
435
|
+
# fetch the page
|
436
|
+
page = fetch_page( :uri => url,
|
437
|
+
:referer => cur_page,
|
438
|
+
:verb => :post,
|
439
|
+
:params => [request_data],
|
440
|
+
:headers => {
|
441
|
+
'Content-Type' => form.enctype,
|
442
|
+
'Content-Length' => request_data.size.to_s,
|
443
|
+
}.merge(headers))
|
444
|
+
add_to_history(page)
|
445
|
+
page
|
446
|
+
end
|
447
|
+
|
448
|
+
# uri is an absolute URI
|
449
|
+
def fetch_page(params)
|
450
|
+
options = {
|
451
|
+
:request => nil,
|
452
|
+
:response => nil,
|
453
|
+
:connection => nil,
|
454
|
+
:referer => current_page(),
|
455
|
+
:uri => nil,
|
456
|
+
:verb => :get,
|
457
|
+
:agent => self,
|
458
|
+
:redirects => 0,
|
459
|
+
:params => [],
|
460
|
+
:headers => {},
|
461
|
+
}.merge(params)
|
462
|
+
|
463
|
+
before_connect = Chain.new([
|
464
|
+
Chain::URIResolver.new(@scheme_handlers),
|
465
|
+
Chain::ParameterResolver.new,
|
466
|
+
Chain::RequestResolver.new,
|
467
|
+
Chain::ConnectionResolver.new(
|
468
|
+
@connection_cache,
|
469
|
+
@keep_alive,
|
470
|
+
@proxy_addr,
|
471
|
+
@proxy_port,
|
472
|
+
@proxy_user,
|
473
|
+
@proxy_pass
|
474
|
+
),
|
475
|
+
Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
|
476
|
+
Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
|
477
|
+
Chain::HeaderResolver.new(
|
478
|
+
@keep_alive,
|
479
|
+
@keep_alive_time,
|
480
|
+
@cookie_jar,
|
481
|
+
@user_agent,
|
482
|
+
@request_headers
|
483
|
+
),
|
484
|
+
Chain::CustomHeaders.new,
|
485
|
+
@pre_connect_hook,
|
486
|
+
])
|
487
|
+
before_connect.handle(options)
|
488
|
+
|
489
|
+
uri = options[:uri]
|
490
|
+
request = options[:request]
|
491
|
+
cur_page = options[:referer]
|
492
|
+
request_data = options[:params]
|
493
|
+
redirects = options[:redirects]
|
494
|
+
http_obj = options[:connection]
|
495
|
+
|
496
|
+
# Add If-Modified-Since if page is in history
|
497
|
+
if( (page = visited_page(uri)) && page.response['Last-Modified'] )
|
498
|
+
request['If-Modified-Since'] = page.response['Last-Modified']
|
499
|
+
end if(@conditional_requests)
|
500
|
+
|
501
|
+
# Specify timeouts if given
|
502
|
+
http_obj.open_timeout = @open_timeout if @open_timeout
|
503
|
+
http_obj.read_timeout = @read_timeout if @read_timeout
|
504
|
+
http_obj.start unless http_obj.started?
|
505
|
+
|
506
|
+
# Log specified headers for the request
|
507
|
+
log.info("#{ request.class }: #{ request.path }") if log
|
508
|
+
request.each_header do |k, v|
|
509
|
+
log.debug("request-header: #{ k } => #{ v }")
|
510
|
+
end if log
|
511
|
+
|
512
|
+
# Send the request
|
513
|
+
attempts = 0
|
514
|
+
begin
|
515
|
+
response = http_obj.request(request, *request_data) { |r|
|
516
|
+
connection_chain = Chain.new([
|
517
|
+
Chain::ResponseReader.new(r),
|
518
|
+
Chain::BodyDecodingHandler.new,
|
519
|
+
])
|
520
|
+
connection_chain.handle(options)
|
521
|
+
}
|
522
|
+
rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
|
523
|
+
log.error("Rescuing EOF error") if log
|
524
|
+
http_obj.finish
|
525
|
+
raise x if attempts >= 2
|
526
|
+
request.body = nil
|
527
|
+
http_obj.start
|
528
|
+
attempts += 1
|
529
|
+
retry
|
530
|
+
end
|
531
|
+
|
532
|
+
after_connect = Chain.new([
|
533
|
+
@post_connect_hook,
|
534
|
+
Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
|
535
|
+
Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
|
536
|
+
])
|
537
|
+
after_connect.handle(options)
|
538
|
+
|
539
|
+
res_klass = options[:res_klass]
|
540
|
+
response_body = options[:response_body]
|
541
|
+
page = options[:page]
|
542
|
+
|
543
|
+
log.info("status: #{ page.code }") if log
|
544
|
+
|
545
|
+
if follow_meta_refresh
|
546
|
+
redirect_uri = nil
|
547
|
+
referer = page
|
548
|
+
if (page.respond_to?(:meta) && (redirect = page.meta.first))
|
549
|
+
redirect_uri = redirect.uri.to_s
|
550
|
+
sleep redirect.node['delay'].to_f
|
551
|
+
referer = Page.new(nil, {'content-type'=>'text/html'})
|
552
|
+
elsif refresh = response['refresh']
|
553
|
+
delay, redirect_uri = Page::Meta.parse(refresh, uri)
|
554
|
+
raise StandardError, "Invalid refresh http header" unless delay
|
555
|
+
if redirects + 1 > redirection_limit
|
556
|
+
raise RedirectLimitReachedError.new(page, redirects)
|
557
|
+
end
|
558
|
+
sleep delay.to_f
|
559
|
+
end
|
560
|
+
if redirect_uri
|
561
|
+
@history.push(page, page.uri)
|
562
|
+
return fetch_page(
|
563
|
+
:uri => redirect_uri,
|
564
|
+
:referer => referer,
|
565
|
+
:params => [],
|
566
|
+
:verb => :get,
|
567
|
+
:redirects => redirects + 1
|
568
|
+
)
|
569
|
+
end
|
570
|
+
end
|
571
|
+
|
572
|
+
return page if res_klass <= Net::HTTPSuccess
|
573
|
+
|
574
|
+
if res_klass == Net::HTTPNotModified
|
575
|
+
log.debug("Got cached page") if log
|
576
|
+
return visited_page(uri) || page
|
577
|
+
elsif res_klass <= Net::HTTPRedirection
|
578
|
+
return page unless follow_redirect?
|
579
|
+
log.info("follow redirect to: #{ response['Location'] }") if log
|
580
|
+
from_uri = page.uri
|
581
|
+
raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
|
582
|
+
redirect_verb = options[:verb] == :head ? :head : :get
|
583
|
+
page = fetch_page( :uri => response['Location'].to_s,
|
584
|
+
:referer => page,
|
585
|
+
:params => [],
|
586
|
+
:verb => redirect_verb,
|
587
|
+
:redirects => redirects + 1
|
588
|
+
)
|
589
|
+
@history.push(page, from_uri)
|
590
|
+
return page
|
591
|
+
elsif res_klass <= Net::HTTPUnauthorized
|
592
|
+
raise ResponseCodeError.new(page) unless @user || @password
|
593
|
+
raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
|
594
|
+
if response['www-authenticate'] =~ /Digest/i
|
595
|
+
@auth_hash[uri.host] = :digest
|
596
|
+
if response['server'] =~ /Microsoft-IIS/
|
597
|
+
@auth_hash[uri.host] = :iis_digest
|
598
|
+
end
|
599
|
+
@digest = response['www-authenticate']
|
600
|
+
else
|
601
|
+
@auth_hash[uri.host] = :basic
|
602
|
+
end
|
603
|
+
return fetch_page( :uri => uri,
|
604
|
+
:referer => cur_page,
|
605
|
+
:verb => request.method.downcase.to_sym,
|
606
|
+
:params => request_data,
|
607
|
+
:headers => options[:headers]
|
608
|
+
)
|
609
|
+
end
|
610
|
+
|
611
|
+
raise ResponseCodeError.new(page), "Unhandled response", caller
|
612
|
+
end
|
613
|
+
|
614
|
+
def add_to_history(page)
|
615
|
+
@history.push(page, resolve(page.uri))
|
616
|
+
history_added.call(page) if history_added
|
617
|
+
end
|
618
|
+
end
|
619
|
+
end
|