eric-mechanize 0.9.3.20090623142847

Sign up to get free protection for your applications and to get access to all the features.
Files changed (173) hide show
  1. data/CHANGELOG.rdoc +504 -0
  2. data/EXAMPLES.rdoc +171 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +122 -0
  5. data/LICENSE.rdoc +340 -0
  6. data/Manifest.txt +169 -0
  7. data/README.rdoc +60 -0
  8. data/Rakefile +43 -0
  9. data/examples/flickr_upload.rb +23 -0
  10. data/examples/mech-dump.rb +7 -0
  11. data/examples/proxy_req.rb +9 -0
  12. data/examples/rubyforge.rb +21 -0
  13. data/examples/spider.rb +11 -0
  14. data/lib/mechanize.rb +7 -0
  15. data/lib/www/mechanize.rb +619 -0
  16. data/lib/www/mechanize/chain.rb +34 -0
  17. data/lib/www/mechanize/chain/auth_headers.rb +80 -0
  18. data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
  19. data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
  20. data/lib/www/mechanize/chain/custom_headers.rb +23 -0
  21. data/lib/www/mechanize/chain/handler.rb +9 -0
  22. data/lib/www/mechanize/chain/header_resolver.rb +53 -0
  23. data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
  24. data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
  25. data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
  26. data/lib/www/mechanize/chain/request_resolver.rb +32 -0
  27. data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
  28. data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
  29. data/lib/www/mechanize/chain/response_reader.rb +41 -0
  30. data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
  31. data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
  32. data/lib/www/mechanize/content_type_error.rb +16 -0
  33. data/lib/www/mechanize/cookie.rb +72 -0
  34. data/lib/www/mechanize/cookie_jar.rb +191 -0
  35. data/lib/www/mechanize/file.rb +73 -0
  36. data/lib/www/mechanize/file_response.rb +62 -0
  37. data/lib/www/mechanize/file_saver.rb +39 -0
  38. data/lib/www/mechanize/form.rb +360 -0
  39. data/lib/www/mechanize/form/button.rb +8 -0
  40. data/lib/www/mechanize/form/check_box.rb +13 -0
  41. data/lib/www/mechanize/form/field.rb +28 -0
  42. data/lib/www/mechanize/form/file_upload.rb +24 -0
  43. data/lib/www/mechanize/form/image_button.rb +23 -0
  44. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  45. data/lib/www/mechanize/form/option.rb +51 -0
  46. data/lib/www/mechanize/form/radio_button.rb +38 -0
  47. data/lib/www/mechanize/form/select_list.rb +45 -0
  48. data/lib/www/mechanize/headers.rb +12 -0
  49. data/lib/www/mechanize/history.rb +67 -0
  50. data/lib/www/mechanize/inspect.rb +90 -0
  51. data/lib/www/mechanize/monkey_patch.rb +37 -0
  52. data/lib/www/mechanize/page.rb +181 -0
  53. data/lib/www/mechanize/page/base.rb +10 -0
  54. data/lib/www/mechanize/page/frame.rb +22 -0
  55. data/lib/www/mechanize/page/link.rb +50 -0
  56. data/lib/www/mechanize/page/meta.rb +51 -0
  57. data/lib/www/mechanize/pluggable_parsers.rb +103 -0
  58. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  59. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
  60. data/lib/www/mechanize/response_code_error.rb +25 -0
  61. data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
  62. data/lib/www/mechanize/util.rb +76 -0
  63. data/mechanize.gemspec +41 -0
  64. data/test/chain/test_argument_validator.rb +14 -0
  65. data/test/chain/test_auth_headers.rb +25 -0
  66. data/test/chain/test_custom_headers.rb +18 -0
  67. data/test/chain/test_header_resolver.rb +28 -0
  68. data/test/chain/test_parameter_resolver.rb +35 -0
  69. data/test/chain/test_request_resolver.rb +29 -0
  70. data/test/chain/test_response_reader.rb +24 -0
  71. data/test/data/htpasswd +1 -0
  72. data/test/data/server.crt +16 -0
  73. data/test/data/server.csr +12 -0
  74. data/test/data/server.key +15 -0
  75. data/test/data/server.pem +15 -0
  76. data/test/helper.rb +129 -0
  77. data/test/htdocs/alt_text.html +10 -0
  78. data/test/htdocs/bad_form_test.html +9 -0
  79. data/test/htdocs/button.jpg +0 -0
  80. data/test/htdocs/empty_form.html +6 -0
  81. data/test/htdocs/file_upload.html +26 -0
  82. data/test/htdocs/find_link.html +41 -0
  83. data/test/htdocs/form_multi_select.html +16 -0
  84. data/test/htdocs/form_multival.html +37 -0
  85. data/test/htdocs/form_no_action.html +18 -0
  86. data/test/htdocs/form_no_input_name.html +16 -0
  87. data/test/htdocs/form_select.html +16 -0
  88. data/test/htdocs/form_select_all.html +16 -0
  89. data/test/htdocs/form_select_none.html +17 -0
  90. data/test/htdocs/form_select_noopts.html +10 -0
  91. data/test/htdocs/form_set_fields.html +14 -0
  92. data/test/htdocs/form_test.html +188 -0
  93. data/test/htdocs/frame_test.html +30 -0
  94. data/test/htdocs/google.html +13 -0
  95. data/test/htdocs/iframe_test.html +16 -0
  96. data/test/htdocs/index.html +6 -0
  97. data/test/htdocs/link with space.html +5 -0
  98. data/test/htdocs/meta_cookie.html +11 -0
  99. data/test/htdocs/no_title_test.html +6 -0
  100. data/test/htdocs/relative/tc_relative_links.html +21 -0
  101. data/test/htdocs/tc_bad_links.html +5 -0
  102. data/test/htdocs/tc_base_link.html +8 -0
  103. data/test/htdocs/tc_blank_form.html +11 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_links.html +18 -0
  109. data/test/htdocs/tc_no_attributes.html +16 -0
  110. data/test/htdocs/tc_pretty_print.html +17 -0
  111. data/test/htdocs/tc_radiobuttons.html +17 -0
  112. data/test/htdocs/tc_referer.html +10 -0
  113. data/test/htdocs/tc_relative_links.html +19 -0
  114. data/test/htdocs/tc_textarea.html +23 -0
  115. data/test/htdocs/unusual______.html +5 -0
  116. data/test/servlets.rb +365 -0
  117. data/test/ssl_server.rb +48 -0
  118. data/test/test_authenticate.rb +71 -0
  119. data/test/test_bad_links.rb +25 -0
  120. data/test/test_blank_form.rb +16 -0
  121. data/test/test_checkboxes.rb +61 -0
  122. data/test/test_content_type.rb +13 -0
  123. data/test/test_cookie_class.rb +338 -0
  124. data/test/test_cookie_jar.rb +362 -0
  125. data/test/test_cookies.rb +123 -0
  126. data/test/test_encoded_links.rb +20 -0
  127. data/test/test_errors.rb +49 -0
  128. data/test/test_follow_meta.rb +108 -0
  129. data/test/test_form_action.rb +52 -0
  130. data/test/test_form_as_hash.rb +61 -0
  131. data/test/test_form_button.rb +38 -0
  132. data/test/test_form_no_inputname.rb +15 -0
  133. data/test/test_forms.rb +564 -0
  134. data/test/test_frames.rb +25 -0
  135. data/test/test_get_headers.rb +52 -0
  136. data/test/test_gzipping.rb +22 -0
  137. data/test/test_hash_api.rb +45 -0
  138. data/test/test_history.rb +142 -0
  139. data/test/test_history_added.rb +16 -0
  140. data/test/test_html_unscape_forms.rb +39 -0
  141. data/test/test_if_modified_since.rb +20 -0
  142. data/test/test_keep_alive.rb +31 -0
  143. data/test/test_links.rb +120 -0
  144. data/test/test_mech.rb +268 -0
  145. data/test/test_mechanize_file.rb +47 -0
  146. data/test/test_meta.rb +65 -0
  147. data/test/test_multi_select.rb +106 -0
  148. data/test/test_no_attributes.rb +13 -0
  149. data/test/test_option.rb +18 -0
  150. data/test/test_page.rb +124 -0
  151. data/test/test_pluggable_parser.rb +145 -0
  152. data/test/test_post_form.rb +34 -0
  153. data/test/test_pretty_print.rb +22 -0
  154. data/test/test_radiobutton.rb +75 -0
  155. data/test/test_redirect_limit_reached.rb +41 -0
  156. data/test/test_redirect_verb_handling.rb +45 -0
  157. data/test/test_referer.rb +39 -0
  158. data/test/test_relative_links.rb +40 -0
  159. data/test/test_request.rb +13 -0
  160. data/test/test_response_code.rb +52 -0
  161. data/test/test_save_file.rb +48 -0
  162. data/test/test_scheme.rb +48 -0
  163. data/test/test_select.rb +106 -0
  164. data/test/test_select_all.rb +15 -0
  165. data/test/test_select_none.rb +15 -0
  166. data/test/test_select_noopts.rb +16 -0
  167. data/test/test_set_fields.rb +44 -0
  168. data/test/test_ssl_server.rb +20 -0
  169. data/test/test_subclass.rb +14 -0
  170. data/test/test_textarea.rb +45 -0
  171. data/test/test_upload.rb +109 -0
  172. data/test/test_verbs.rb +25 -0
  173. metadata +314 -0
data/Manifest.txt ADDED
@@ -0,0 +1,169 @@
1
+ CHANGELOG.rdoc
2
+ EXAMPLES.rdoc
3
+ FAQ.rdoc
4
+ GUIDE.rdoc
5
+ LICENSE.rdoc
6
+ Manifest.txt
7
+ README.rdoc
8
+ Rakefile
9
+ examples/flickr_upload.rb
10
+ examples/mech-dump.rb
11
+ examples/proxy_req.rb
12
+ examples/rubyforge.rb
13
+ examples/spider.rb
14
+ lib/mechanize.rb
15
+ lib/www/mechanize.rb
16
+ lib/www/mechanize/chain.rb
17
+ lib/www/mechanize/chain/auth_headers.rb
18
+ lib/www/mechanize/chain/body_decoding_handler.rb
19
+ lib/www/mechanize/chain/connection_resolver.rb
20
+ lib/www/mechanize/chain/custom_headers.rb
21
+ lib/www/mechanize/chain/handler.rb
22
+ lib/www/mechanize/chain/header_resolver.rb
23
+ lib/www/mechanize/chain/parameter_resolver.rb
24
+ lib/www/mechanize/chain/post_connect_hook.rb
25
+ lib/www/mechanize/chain/pre_connect_hook.rb
26
+ lib/www/mechanize/chain/request_resolver.rb
27
+ lib/www/mechanize/chain/response_body_parser.rb
28
+ lib/www/mechanize/chain/response_header_handler.rb
29
+ lib/www/mechanize/chain/response_reader.rb
30
+ lib/www/mechanize/chain/ssl_resolver.rb
31
+ lib/www/mechanize/chain/uri_resolver.rb
32
+ lib/www/mechanize/content_type_error.rb
33
+ lib/www/mechanize/cookie.rb
34
+ lib/www/mechanize/cookie_jar.rb
35
+ lib/www/mechanize/file.rb
36
+ lib/www/mechanize/file_response.rb
37
+ lib/www/mechanize/file_saver.rb
38
+ lib/www/mechanize/form.rb
39
+ lib/www/mechanize/form/button.rb
40
+ lib/www/mechanize/form/check_box.rb
41
+ lib/www/mechanize/form/field.rb
42
+ lib/www/mechanize/form/file_upload.rb
43
+ lib/www/mechanize/form/image_button.rb
44
+ lib/www/mechanize/form/multi_select_list.rb
45
+ lib/www/mechanize/form/option.rb
46
+ lib/www/mechanize/form/radio_button.rb
47
+ lib/www/mechanize/form/select_list.rb
48
+ lib/www/mechanize/headers.rb
49
+ lib/www/mechanize/history.rb
50
+ lib/www/mechanize/inspect.rb
51
+ lib/www/mechanize/monkey_patch.rb
52
+ lib/www/mechanize/page.rb
53
+ lib/www/mechanize/page/base.rb
54
+ lib/www/mechanize/page/frame.rb
55
+ lib/www/mechanize/page/link.rb
56
+ lib/www/mechanize/page/meta.rb
57
+ lib/www/mechanize/pluggable_parsers.rb
58
+ lib/www/mechanize/redirect_limit_reached_error.rb
59
+ lib/www/mechanize/redirect_not_get_or_head_error.rb
60
+ lib/www/mechanize/response_code_error.rb
61
+ lib/www/mechanize/unsupported_scheme_error.rb
62
+ lib/www/mechanize/util.rb
63
+ mechanize.gemspec
64
+ test/chain/test_argument_validator.rb
65
+ test/chain/test_custom_headers.rb
66
+ test/chain/test_parameter_resolver.rb
67
+ test/chain/test_request_resolver.rb
68
+ test/chain/test_response_reader.rb
69
+ test/data/htpasswd
70
+ test/data/server.crt
71
+ test/data/server.csr
72
+ test/data/server.key
73
+ test/data/server.pem
74
+ test/helper.rb
75
+ test/htdocs/alt_text.html
76
+ test/htdocs/bad_form_test.html
77
+ test/htdocs/button.jpg
78
+ test/htdocs/empty_form.html
79
+ test/htdocs/file_upload.html
80
+ test/htdocs/find_link.html
81
+ test/htdocs/form_multi_select.html
82
+ test/htdocs/form_multival.html
83
+ test/htdocs/form_no_action.html
84
+ test/htdocs/form_no_input_name.html
85
+ test/htdocs/form_select.html
86
+ test/htdocs/form_select_all.html
87
+ test/htdocs/form_select_none.html
88
+ test/htdocs/form_select_noopts.html
89
+ test/htdocs/form_set_fields.html
90
+ test/htdocs/form_test.html
91
+ test/htdocs/frame_test.html
92
+ test/htdocs/google.html
93
+ test/htdocs/iframe_test.html
94
+ test/htdocs/index.html
95
+ test/htdocs/link with space.html
96
+ test/htdocs/meta_cookie.html
97
+ test/htdocs/no_title_test.html
98
+ test/htdocs/relative/tc_relative_links.html
99
+ test/htdocs/tc_bad_links.html
100
+ test/htdocs/tc_base_link.html
101
+ test/htdocs/tc_blank_form.html
102
+ test/htdocs/tc_checkboxes.html
103
+ test/htdocs/tc_encoded_links.html
104
+ test/htdocs/tc_follow_meta.html
105
+ test/htdocs/tc_form_action.html
106
+ test/htdocs/tc_links.html
107
+ test/htdocs/tc_no_attributes.html
108
+ test/htdocs/tc_pretty_print.html
109
+ test/htdocs/tc_radiobuttons.html
110
+ test/htdocs/tc_referer.html
111
+ test/htdocs/tc_relative_links.html
112
+ test/htdocs/tc_textarea.html
113
+ test/htdocs/unusual______.html
114
+ test/servlets.rb
115
+ test/ssl_server.rb
116
+ test/test_authenticate.rb
117
+ test/test_bad_links.rb
118
+ test/test_blank_form.rb
119
+ test/test_checkboxes.rb
120
+ test/test_content_type.rb
121
+ test/test_cookie_class.rb
122
+ test/test_cookie_jar.rb
123
+ test/test_cookies.rb
124
+ test/test_encoded_links.rb
125
+ test/test_errors.rb
126
+ test/test_follow_meta.rb
127
+ test/test_form_action.rb
128
+ test/test_form_as_hash.rb
129
+ test/test_form_button.rb
130
+ test/test_form_no_inputname.rb
131
+ test/test_forms.rb
132
+ test/test_frames.rb
133
+ test/test_get_headers.rb
134
+ test/test_gzipping.rb
135
+ test/test_hash_api.rb
136
+ test/test_history.rb
137
+ test/test_history_added.rb
138
+ test/test_html_unscape_forms.rb
139
+ test/test_if_modified_since.rb
140
+ test/test_keep_alive.rb
141
+ test/test_links.rb
142
+ test/test_mech.rb
143
+ test/test_mechanize_file.rb
144
+ test/test_multi_select.rb
145
+ test/test_no_attributes.rb
146
+ test/test_option.rb
147
+ test/test_page.rb
148
+ test/test_pluggable_parser.rb
149
+ test/test_post_form.rb
150
+ test/test_pretty_print.rb
151
+ test/test_radiobutton.rb
152
+ test/test_redirect_limit_reached.rb
153
+ test/test_redirect_verb_handling.rb
154
+ test/test_referer.rb
155
+ test/test_relative_links.rb
156
+ test/test_request.rb
157
+ test/test_response_code.rb
158
+ test/test_save_file.rb
159
+ test/test_scheme.rb
160
+ test/test_select.rb
161
+ test/test_select_all.rb
162
+ test/test_select_none.rb
163
+ test/test_select_noopts.rb
164
+ test/test_set_fields.rb
165
+ test/test_ssl_server.rb
166
+ test/test_subclass.rb
167
+ test/test_textarea.rb
168
+ test/test_upload.rb
169
+ test/test_verbs.rb
data/README.rdoc ADDED
@@ -0,0 +1,60 @@
1
+ = WWW::Mechanize
2
+
3
+ * http://mechanize.rubyforge.org/
4
+ * http://github.com/tenderlove/mechanize/tree/master
5
+
6
+ == DESCRIPTION
7
+
8
+ The Mechanize library is used for automating interaction with websites.
9
+ Mechanize automatically stores and sends cookies, follows redirects,
10
+ can follow links, and submit forms. Form fields can be populated and
11
+ submitted. Mechanize also keeps track of the sites that you have visited as
12
+ a history.
13
+
14
+ == Dependencies
15
+
16
+ * ruby 1.8.6
17
+ * nokogiri[http://nokogiri.rubyforge.org]
18
+
19
+ == SUPPORT:
20
+
21
+ The mechanize mailing list is available here:
22
+
23
+ * http://rubyforge.org/mailman/listinfo/mechanize-users
24
+
25
+ The bug tracker is available here:
26
+
27
+ * http://rubyforge.org/tracker/?atid=5709&group_id=1453
28
+
29
+ == Examples
30
+
31
+ If you are just starting, check out the GUIDE.
32
+ Also, check out the EXAMPLES file.
33
+
34
+ == Authors
35
+
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
+
38
+ Copyright (c) 2006-2009:
39
+
40
+ * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
+ * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
+
43
+ This library comes with a shameless plug for employing me
44
+ (Aaron[http://tenderlovemaking.com/]) programming
45
+ Ruby, my favorite language!
46
+
47
+ == Acknowledgments
48
+
49
+ This library was heavily influenced by its namesake in the perl world. A big
50
+ thanks goes to Andy Lester (andy@petdance.com), the author of the original
51
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
52
+
53
+ Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
54
+ who's helped out in various ways. Finally, thank you to the people using this
55
+ library!
56
+
57
+ == License
58
+
59
+ This library is distributed under the GPL. Please see the LICENSE file.
60
+
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
5
+ require 'mechanize'
6
+
7
+ HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
8
+ p.developer('Aaron Patterson','aaronp@rubyforge.org')
9
+ p.developer('Mike Dalessio','mike.dalessio@gmail.com')
10
+ p.readme_file = 'README.rdoc'
11
+ p.history_file = 'CHANGELOG.rdoc'
12
+ p.extra_rdoc_files = FileList['*.rdoc']
13
+ p.summary = "Mechanize provides automated web-browsing"
14
+ p.extra_deps = [['nokogiri', '>= 1.2.1']]
15
+ end
16
+
17
+ desc "Update SSL Certificate"
18
+ task('ssl_cert') do |p|
19
+ sh "openssl genrsa -des3 -out server.key 1024"
20
+ sh "openssl req -new -key server.key -out server.csr"
21
+ sh "cp server.key server.key.org"
22
+ sh "openssl rsa -in server.key.org -out server.key"
23
+ sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
24
+ sh "cp server.key server.pem"
25
+ sh "mv server.key server.csr server.crt server.pem test/data/"
26
+ sh "rm server.key.org"
27
+ end
28
+
29
+ namespace :gem do
30
+ desc 'Generate a gem spec'
31
+ task :spec do
32
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
33
+ HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
34
+ f.write(HOE.spec.to_ruby)
35
+ end
36
+ end
37
+ end
38
+
39
+ desc "Run code-coverage analysis"
40
+ task :coverage do
41
+ rm_rf "coverage"
42
+ sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
43
+ end
@@ -0,0 +1,23 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+
8
+ # Get the flickr sign in page
9
+ page = agent.get('http://flickr.com/signin/flickr/')
10
+
11
+ # Fill out the login form
12
+ form = page.forms.name('flickrloginform').first
13
+ form.email = ARGV[0]
14
+ form.password = ARGV[1]
15
+ page = agent.submit(form)
16
+
17
+ # Go to the upload page
18
+ page = agent.click page.links.text('Upload')
19
+
20
+ # Fill out the form
21
+ form = page.forms.action('/photos_upload_process.gne').first
22
+ form.file_uploads.name('file1').first.file_name = ARGV[2]
23
+ agent.submit(form)
@@ -0,0 +1,7 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ puts agent.get(ARGV[0]).inspect
@@ -0,0 +1,9 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ agent.set_proxy('localhost', '8000')
8
+ page = agent.get(ARGV[0])
9
+ puts page.body
@@ -0,0 +1,21 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ # This example logs a user in to rubyforge and prints out the body of the
4
+ # page after logging the user in.
5
+ require 'rubygems'
6
+ require 'mechanize'
7
+
8
+ # Create a new mechanize object
9
+ agent = WWW::Mechanize.new { |a| a.log = Logger.new(STDERR) }
10
+
11
+ # Load the rubyforge website
12
+ page = agent.get('http://rubyforge.org/')
13
+ page = agent.click page.links.text(/Log In/) # Click the login link
14
+ form = page.forms[1] # Select the first form
15
+ form.form_loginname = ARGV[0]
16
+ form.form_pw = ARGV[1]
17
+
18
+ # Submit the form
19
+ page = agent.submit(form, form.buttons.first)
20
+
21
+ puts page.body # Print out the body
@@ -0,0 +1,11 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ stack = agent.get(ARGV[0]).links
8
+ while l = stack.pop
9
+ next unless l.uri.host == agent.history.first.uri.host
10
+ stack.push(*(agent.click(l).links)) unless agent.visited? l.href
11
+ end
data/lib/mechanize.rb ADDED
@@ -0,0 +1,7 @@
1
+ # Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
2
+ # Copyright (c) 2007 by Aaron Patterson (aaronp@rubyforge.org)
3
+ #
4
+ # Please see the LICENSE file for licensing.
5
+
6
+
7
+ require 'www/mechanize'
@@ -0,0 +1,619 @@
1
+ require 'net/http'
2
+ require 'net/https'
3
+ require 'uri'
4
+ require 'webrick/httputils'
5
+ require 'zlib'
6
+ require 'stringio'
7
+ require 'digest/md5'
8
+ require 'fileutils'
9
+ require 'nokogiri'
10
+ require 'forwardable'
11
+ require 'iconv'
12
+ require 'nkf'
13
+
14
+ require 'www/mechanize/util'
15
+ require 'www/mechanize/content_type_error'
16
+ require 'www/mechanize/response_code_error'
17
+ require 'www/mechanize/unsupported_scheme_error'
18
+ require 'www/mechanize/redirect_limit_reached_error'
19
+ require 'www/mechanize/redirect_not_get_or_head_error'
20
+ require 'www/mechanize/cookie'
21
+ require 'www/mechanize/cookie_jar'
22
+ require 'www/mechanize/history'
23
+ require 'www/mechanize/form'
24
+ require 'www/mechanize/pluggable_parsers'
25
+ require 'www/mechanize/file_response'
26
+ require 'www/mechanize/inspect'
27
+ require 'www/mechanize/chain'
28
+ require 'www/mechanize/monkey_patch'
29
+
30
+ module WWW
31
+ # = Synopsis
32
+ # The Mechanize library is used for automating interaction with a website. It
33
+ # can follow links, and submit forms. Form fields can be populated and
34
+ # submitted. A history of URL's is maintained and can be queried.
35
+ #
36
+ # == Example
37
+ # require 'rubygems'
38
+ # require 'mechanize'
39
+ # require 'logger'
40
+ #
41
+ # agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
42
+ # agent.user_agent_alias = 'Mac Safari'
43
+ # page = agent.get("http://www.google.com/")
44
+ # search_form = page.form_with(:name => "f")
45
+ # search_form.field_with(:name => "q").value = "Hello"
46
+ # search_results = agent.submit(search_form)
47
+ # puts search_results.body
48
+ class Mechanize
49
+ ##
50
+ # The version of Mechanize you are using.
51
+ VERSION = '0.9.3'
52
+
53
+ ##
54
+ # User Agent aliases
55
+ AGENT_ALIASES = {
56
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
57
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
58
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
59
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
60
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
61
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
62
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
63
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
64
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
65
+ 'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
66
+ }
67
+
68
+ attr_accessor :cookie_jar
69
+ attr_accessor :open_timeout, :read_timeout
70
+ attr_accessor :user_agent
71
+ attr_accessor :watch_for_set
72
+ attr_accessor :ca_file
73
+ attr_accessor :key
74
+ attr_accessor :cert
75
+ attr_accessor :pass
76
+ attr_accessor :redirect_ok
77
+ attr_accessor :keep_alive_time
78
+ attr_accessor :keep_alive
79
+ attr_accessor :conditional_requests
80
+ attr_accessor :follow_meta_refresh
81
+ attr_accessor :verify_callback
82
+ attr_accessor :history_added
83
+ attr_accessor :scheme_handlers
84
+ attr_accessor :redirection_limit
85
+
86
+ # A hash of custom request headers
87
+ attr_accessor :request_headers
88
+
89
+ # The HTML parser to be used when parsing documents
90
+ attr_accessor :html_parser
91
+
92
+ attr_reader :history
93
+ attr_reader :pluggable_parser
94
+
95
+ alias :follow_redirect? :redirect_ok
96
+
97
+ @html_parser = Nokogiri::HTML
98
+ class << self; attr_accessor :html_parser, :log end
99
+
100
+ def initialize
101
+ # attr_accessors
102
+ @cookie_jar = CookieJar.new
103
+ @log = nil
104
+ @open_timeout = nil
105
+ @read_timeout = nil
106
+ @user_agent = AGENT_ALIASES['Mechanize']
107
+ @watch_for_set = nil
108
+ @history_added = nil
109
+ @ca_file = nil # OpenSSL server certificate file
110
+
111
+ # callback for OpenSSL errors while verifying the server certificate
112
+ # chain, can be used for debugging or to ignore errors by always
113
+ # returning _true_
114
+ @verify_callback = nil
115
+ @cert = nil # OpenSSL Certificate
116
+ @key = nil # OpenSSL Private Key
117
+ @pass = nil # OpenSSL Password
118
+ @redirect_ok = true # Should we follow redirects?
119
+
120
+ # attr_readers
121
+ @history = WWW::Mechanize::History.new
122
+ @pluggable_parser = PluggableParser.new
123
+
124
+ # Auth variables
125
+ @user = nil # Auth User
126
+ @password = nil # Auth Password
127
+ @digest = nil # DigestAuth Digest
128
+ @auth_hash = {} # Keep track of urls for sending auth
129
+ @request_headers= {} # A hash of request headers to be used
130
+
131
+ # Proxy settings
132
+ @proxy_addr = nil
133
+ @proxy_pass = nil
134
+ @proxy_port = nil
135
+ @proxy_user = nil
136
+
137
+ @conditional_requests = true
138
+
139
+ @follow_meta_refresh = false
140
+ @redirection_limit = 20
141
+
142
+ # Connection Cache & Keep alive
143
+ @connection_cache = {}
144
+ @keep_alive_time = 300
145
+ @keep_alive = true
146
+
147
+ @scheme_handlers = Hash.new { |h,k|
148
+ h[k] = lambda { |link, page|
149
+ raise UnsupportedSchemeError.new(k)
150
+ }
151
+ }
152
+ @scheme_handlers['http'] = lambda { |link, page| link }
153
+ @scheme_handlers['https'] = @scheme_handlers['http']
154
+ @scheme_handlers['relative'] = @scheme_handlers['http']
155
+ @scheme_handlers['file'] = @scheme_handlers['http']
156
+
157
+ @pre_connect_hook = Chain::PreConnectHook.new
158
+ @post_connect_hook = Chain::PostConnectHook.new
159
+
160
+ @html_parser = self.class.html_parser
161
+
162
+ yield self if block_given?
163
+ end
164
+
165
+ def max_history=(length); @history.max_size = length end
166
+ def max_history; @history.max_size end
167
+ def log=(l); self.class.log = l end
168
+ def log; self.class.log end
169
+
170
+ def pre_connect_hooks
171
+ @pre_connect_hook.hooks
172
+ end
173
+
174
+ def post_connect_hooks
175
+ @post_connect_hook.hooks
176
+ end
177
+
178
+ # Sets the proxy address, port, user, and password
179
+ # +addr+ should be a host, with no "http://"
180
+ def set_proxy(addr, port, user = nil, pass = nil)
181
+ @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
182
+ end
183
+
184
+ # Set the user agent for the Mechanize object.
185
+ # See AGENT_ALIASES
186
+ def user_agent_alias=(al)
187
+ self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
188
+ end
189
+
190
+ # Returns a list of cookies stored in the cookie jar.
191
+ def cookies
192
+ @cookie_jar.to_a
193
+ end
194
+
195
+ # Sets the user and password to be used for authentication.
196
+ def auth(user, password)
197
+ @user = user
198
+ @password = password
199
+ end
200
+ alias :basic_auth :auth
201
+
202
+ # Fetches the URL passed in and returns a page.
203
+ def get(options, parameters = [], referer = nil)
204
+ unless options.is_a? Hash
205
+ url = options
206
+ unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
207
+ referer = parameters
208
+ parameters = []
209
+ end
210
+ else
211
+ raise ArgumentError.new("url must be specified") unless url = options[:url]
212
+ parameters = options[:params] || []
213
+ referer = options[:referer]
214
+ headers = options[:headers]
215
+ end
216
+
217
+ unless referer
218
+ if url.to_s =~ /^http/
219
+ referer = Page.new(nil, {'content-type'=>'text/html'})
220
+ else
221
+ referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
222
+ end
223
+ end
224
+
225
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
226
+ # refactor everything to pass around URIs but still support
227
+ # WWW::Mechanize::Page#base
228
+ unless referer.is_a?(WWW::Mechanize::File)
229
+ referer = referer.is_a?(String) ?
230
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
231
+ Page.new(referer, {'content-type' => 'text/html'})
232
+ end
233
+
234
+ # fetch the page
235
+ page = fetch_page( :uri => url,
236
+ :referer => referer,
237
+ :headers => headers || {},
238
+ :params => parameters
239
+ )
240
+ add_to_history(page)
241
+ yield page if block_given?
242
+ page
243
+ end
244
+
245
+ ####
246
+ # PUT to +url+ with +entity+, and setting +options+:
247
+ #
248
+ # put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
249
+ #
250
+ def put(url, entity, options = {})
251
+ request_with_entity(:put, url, entity, options)
252
+ end
253
+
254
+ ####
255
+ # DELETE to +url+ with +query_params+, and setting +options+:
256
+ #
257
+ # delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
258
+ #
259
+ def delete(url, query_params = {}, options = {})
260
+ page = head(url, query_params, options.merge({:verb => :delete}))
261
+ add_to_history(page)
262
+ page
263
+ end
264
+
265
+ ####
266
+ # HEAD to +url+ with +query_params+, and setting +options+:
267
+ #
268
+ # head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
269
+ #
270
+ def head(url, query_params = {}, options = {})
271
+ options = {
272
+ :uri => url,
273
+ :headers => {},
274
+ :params => query_params,
275
+ :verb => :head
276
+ }.merge(options)
277
+ # fetch the page
278
+ page = fetch_page(options)
279
+ yield page if block_given?
280
+ page
281
+ end
282
+
283
+ # Fetch a file and return the contents of the file.
284
+ def get_file(url)
285
+ get(url).body
286
+ end
287
+
288
+ # Clicks the WWW::Mechanize::Link object passed in and returns the
289
+ # page fetched.
290
+ def click(link)
291
+ referer = link.page rescue referer = nil
292
+ href = link.respond_to?(:href) ? link.href :
293
+ (link['href'] || link['src'])
294
+ get(:url => href, :referer => (referer || current_page()))
295
+ end
296
+
297
+ # Equivalent to the browser back button. Returns the most recent page
298
+ # visited.
299
+ def back
300
+ @history.pop
301
+ end
302
+
303
+ # Posts to the given URL with the request entity. The request
304
+ # entity is specified by either a string, or a list of key-value
305
+ # pairs represented by a hash or an array of arrays.
306
+ #
307
+ # Examples:
308
+ # agent.post('http://example.com/', "foo" => "bar")
309
+ #
310
+ # agent.post('http://example.com/', [ ["foo", "bar"] ])
311
+ #
312
+ # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
313
+ def post(url, query={}, headers={})
314
+ if query.is_a?(String)
315
+ return request_with_entity(:post, url, query, :headers => headers)
316
+ end
317
+ node = {}
318
+ # Create a fake form
319
+ class << node
320
+ def search(*args); []; end
321
+ end
322
+ node['method'] = 'POST'
323
+ node['enctype'] = 'application/x-www-form-urlencoded'
324
+
325
+ form = Form.new(node)
326
+ query.each { |k,v|
327
+ if v.is_a?(IO)
328
+ form.enctype = 'multipart/form-data'
329
+ ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
330
+ ul.file_data = v.read
331
+ form.file_uploads << ul
332
+ else
333
+ form.fields << Form::Field.new(k.to_s,v)
334
+ end
335
+ }
336
+ post_form(url, form, headers)
337
+ end
338
+
339
+ # Submit a form with an optional button.
340
+ # Without a button:
341
+ # page = agent.get('http://example.com')
342
+ # agent.submit(page.forms.first)
343
+ # With a button
344
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
345
+ def submit(form, button=nil, headers={})
346
+ form.add_button_to_query(button) if button
347
+ case form.method.upcase
348
+ when 'POST'
349
+ post_form(form.action, form, headers)
350
+ when 'GET'
351
+ get( :url => form.action.gsub(/\?[^\?]*$/, ''),
352
+ :params => form.build_query,
353
+ :headers => headers,
354
+ :referer => form.page
355
+ )
356
+ else
357
+ raise "unsupported method: #{form.method.upcase}"
358
+ end
359
+ end
360
+
361
+ def request_with_entity(verb, url, entity, options={})
362
+ cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
363
+
364
+ options = {
365
+ :uri => url,
366
+ :referer => cur_page,
367
+ :headers => {},
368
+ }.update(options)
369
+
370
+ headers = {
371
+ 'Content-Type' => 'application/octet-stream',
372
+ 'Content-Length' => entity.size.to_s,
373
+ }.update(options[:headers])
374
+
375
+ options.update({
376
+ :verb => verb,
377
+ :params => [entity],
378
+ :headers => headers,
379
+ })
380
+
381
+ page = fetch_page(options)
382
+ add_to_history(page)
383
+ page
384
+ end
385
+
386
+ # Returns the current page loaded by Mechanize
387
+ def current_page
388
+ @history.last
389
+ end
390
+
391
+ # Returns whether or not a url has been visited
392
+ def visited?(url)
393
+ ! visited_page(url).nil?
394
+ end
395
+
396
+ # Returns a visited page for the url passed in, otherwise nil
397
+ def visited_page(url)
398
+ if url.respond_to? :href
399
+ url = url.href
400
+ end
401
+ @history.visited_page(resolve(url))
402
+ end
403
+
404
+ # Runs given block, then resets the page history as it was before. self is
405
+ # given as a parameter to the block. Returns the value of the block.
406
+ def transact
407
+ history_backup = @history.dup
408
+ begin
409
+ yield self
410
+ ensure
411
+ @history = history_backup
412
+ end
413
+ end
414
+
415
+ alias :page :current_page
416
+
417
+ private
418
+
419
+ def resolve(url, referer = current_page())
420
+ hash = { :uri => url, :referer => referer }
421
+ chain = Chain.new([
422
+ Chain::URIResolver.new(@scheme_handlers)
423
+ ]).handle(hash)
424
+ hash[:uri].to_s
425
+ end
426
+
427
+ def post_form(url, form, headers = {})
428
+ cur_page = form.page || current_page ||
429
+ Page.new( nil, {'content-type'=>'text/html'})
430
+
431
+ request_data = form.request_data
432
+
433
+ log.debug("query: #{ request_data.inspect }") if log
434
+
435
+ # fetch the page
436
+ page = fetch_page( :uri => url,
437
+ :referer => cur_page,
438
+ :verb => :post,
439
+ :params => [request_data],
440
+ :headers => {
441
+ 'Content-Type' => form.enctype,
442
+ 'Content-Length' => request_data.size.to_s,
443
+ }.merge(headers))
444
+ add_to_history(page)
445
+ page
446
+ end
447
+
448
+ # uri is an absolute URI
449
+ def fetch_page(params)
450
+ options = {
451
+ :request => nil,
452
+ :response => nil,
453
+ :connection => nil,
454
+ :referer => current_page(),
455
+ :uri => nil,
456
+ :verb => :get,
457
+ :agent => self,
458
+ :redirects => 0,
459
+ :params => [],
460
+ :headers => {},
461
+ }.merge(params)
462
+
463
+ before_connect = Chain.new([
464
+ Chain::URIResolver.new(@scheme_handlers),
465
+ Chain::ParameterResolver.new,
466
+ Chain::RequestResolver.new,
467
+ Chain::ConnectionResolver.new(
468
+ @connection_cache,
469
+ @keep_alive,
470
+ @proxy_addr,
471
+ @proxy_port,
472
+ @proxy_user,
473
+ @proxy_pass
474
+ ),
475
+ Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
476
+ Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
477
+ Chain::HeaderResolver.new(
478
+ @keep_alive,
479
+ @keep_alive_time,
480
+ @cookie_jar,
481
+ @user_agent,
482
+ @request_headers
483
+ ),
484
+ Chain::CustomHeaders.new,
485
+ @pre_connect_hook,
486
+ ])
487
+ before_connect.handle(options)
488
+
489
+ uri = options[:uri]
490
+ request = options[:request]
491
+ cur_page = options[:referer]
492
+ request_data = options[:params]
493
+ redirects = options[:redirects]
494
+ http_obj = options[:connection]
495
+
496
+ # Add If-Modified-Since if page is in history
497
+ if( (page = visited_page(uri)) && page.response['Last-Modified'] )
498
+ request['If-Modified-Since'] = page.response['Last-Modified']
499
+ end if(@conditional_requests)
500
+
501
+ # Specify timeouts if given
502
+ http_obj.open_timeout = @open_timeout if @open_timeout
503
+ http_obj.read_timeout = @read_timeout if @read_timeout
504
+ http_obj.start unless http_obj.started?
505
+
506
+ # Log specified headers for the request
507
+ log.info("#{ request.class }: #{ request.path }") if log
508
+ request.each_header do |k, v|
509
+ log.debug("request-header: #{ k } => #{ v }")
510
+ end if log
511
+
512
+ # Send the request
513
+ attempts = 0
514
+ begin
515
+ response = http_obj.request(request, *request_data) { |r|
516
+ connection_chain = Chain.new([
517
+ Chain::ResponseReader.new(r),
518
+ Chain::BodyDecodingHandler.new,
519
+ ])
520
+ connection_chain.handle(options)
521
+ }
522
+ rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
523
+ log.error("Rescuing EOF error") if log
524
+ http_obj.finish
525
+ raise x if attempts >= 2
526
+ request.body = nil
527
+ http_obj.start
528
+ attempts += 1
529
+ retry
530
+ end
531
+
532
+ after_connect = Chain.new([
533
+ @post_connect_hook,
534
+ Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
535
+ Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
536
+ ])
537
+ after_connect.handle(options)
538
+
539
+ res_klass = options[:res_klass]
540
+ response_body = options[:response_body]
541
+ page = options[:page]
542
+
543
+ log.info("status: #{ page.code }") if log
544
+
545
+ if follow_meta_refresh
546
+ redirect_uri = nil
547
+ referer = page
548
+ if (page.respond_to?(:meta) && (redirect = page.meta.first))
549
+ redirect_uri = redirect.uri.to_s
550
+ sleep redirect.node['delay'].to_f
551
+ referer = Page.new(nil, {'content-type'=>'text/html'})
552
+ elsif refresh = response['refresh']
553
+ delay, redirect_uri = Page::Meta.parse(refresh, uri)
554
+ raise StandardError, "Invalid refresh http header" unless delay
555
+ if redirects + 1 > redirection_limit
556
+ raise RedirectLimitReachedError.new(page, redirects)
557
+ end
558
+ sleep delay.to_f
559
+ end
560
+ if redirect_uri
561
+ @history.push(page, page.uri)
562
+ return fetch_page(
563
+ :uri => redirect_uri,
564
+ :referer => referer,
565
+ :params => [],
566
+ :verb => :get,
567
+ :redirects => redirects + 1
568
+ )
569
+ end
570
+ end
571
+
572
+ return page if res_klass <= Net::HTTPSuccess
573
+
574
+ if res_klass == Net::HTTPNotModified
575
+ log.debug("Got cached page") if log
576
+ return visited_page(uri) || page
577
+ elsif res_klass <= Net::HTTPRedirection
578
+ return page unless follow_redirect?
579
+ log.info("follow redirect to: #{ response['Location'] }") if log
580
+ from_uri = page.uri
581
+ raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
582
+ redirect_verb = options[:verb] == :head ? :head : :get
583
+ page = fetch_page( :uri => response['Location'].to_s,
584
+ :referer => page,
585
+ :params => [],
586
+ :verb => redirect_verb,
587
+ :redirects => redirects + 1
588
+ )
589
+ @history.push(page, from_uri)
590
+ return page
591
+ elsif res_klass <= Net::HTTPUnauthorized
592
+ raise ResponseCodeError.new(page) unless @user || @password
593
+ raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
594
+ if response['www-authenticate'] =~ /Digest/i
595
+ @auth_hash[uri.host] = :digest
596
+ if response['server'] =~ /Microsoft-IIS/
597
+ @auth_hash[uri.host] = :iis_digest
598
+ end
599
+ @digest = response['www-authenticate']
600
+ else
601
+ @auth_hash[uri.host] = :basic
602
+ end
603
+ return fetch_page( :uri => uri,
604
+ :referer => cur_page,
605
+ :verb => request.method.downcase.to_sym,
606
+ :params => request_data,
607
+ :headers => options[:headers]
608
+ )
609
+ end
610
+
611
+ raise ResponseCodeError.new(page), "Unhandled response", caller
612
+ end
613
+
614
+ def add_to_history(page)
615
+ @history.push(page, resolve(page.uri))
616
+ history_added.call(page) if history_added
617
+ end
618
+ end
619
+ end