mechanize-ntlm 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (171) hide show
  1. data/CHANGELOG.rdoc +480 -0
  2. data/EXAMPLES.rdoc +171 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +122 -0
  5. data/LICENSE.rdoc +340 -0
  6. data/Manifest.txt +169 -0
  7. data/README.rdoc +60 -0
  8. data/Rakefile +44 -0
  9. data/examples/flickr_upload.rb +23 -0
  10. data/examples/mech-dump.rb +7 -0
  11. data/examples/proxy_req.rb +9 -0
  12. data/examples/rubyforge.rb +21 -0
  13. data/examples/spider.rb +11 -0
  14. data/lib/mechanize-ntlm.rb +7 -0
  15. data/lib/www/mechanize.rb +582 -0
  16. data/lib/www/mechanize/chain.rb +34 -0
  17. data/lib/www/mechanize/chain/auth_headers.rb +82 -0
  18. data/lib/www/mechanize/chain/body_decoding_handler.rb +43 -0
  19. data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
  20. data/lib/www/mechanize/chain/custom_headers.rb +23 -0
  21. data/lib/www/mechanize/chain/handler.rb +9 -0
  22. data/lib/www/mechanize/chain/header_resolver.rb +48 -0
  23. data/lib/www/mechanize/chain/parameter_resolver.rb +23 -0
  24. data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
  25. data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
  26. data/lib/www/mechanize/chain/request_resolver.rb +32 -0
  27. data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
  28. data/lib/www/mechanize/chain/response_header_handler.rb +51 -0
  29. data/lib/www/mechanize/chain/response_reader.rb +41 -0
  30. data/lib/www/mechanize/chain/ssl_resolver.rb +36 -0
  31. data/lib/www/mechanize/chain/uri_resolver.rb +73 -0
  32. data/lib/www/mechanize/content_type_error.rb +16 -0
  33. data/lib/www/mechanize/cookie.rb +72 -0
  34. data/lib/www/mechanize/cookie_jar.rb +191 -0
  35. data/lib/www/mechanize/file.rb +73 -0
  36. data/lib/www/mechanize/file_response.rb +62 -0
  37. data/lib/www/mechanize/file_saver.rb +39 -0
  38. data/lib/www/mechanize/form.rb +359 -0
  39. data/lib/www/mechanize/form/button.rb +8 -0
  40. data/lib/www/mechanize/form/check_box.rb +13 -0
  41. data/lib/www/mechanize/form/field.rb +28 -0
  42. data/lib/www/mechanize/form/file_upload.rb +24 -0
  43. data/lib/www/mechanize/form/image_button.rb +23 -0
  44. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  45. data/lib/www/mechanize/form/option.rb +51 -0
  46. data/lib/www/mechanize/form/radio_button.rb +38 -0
  47. data/lib/www/mechanize/form/select_list.rb +45 -0
  48. data/lib/www/mechanize/headers.rb +12 -0
  49. data/lib/www/mechanize/history.rb +67 -0
  50. data/lib/www/mechanize/inspect.rb +90 -0
  51. data/lib/www/mechanize/monkey_patch.rb +37 -0
  52. data/lib/www/mechanize/page.rb +145 -0
  53. data/lib/www/mechanize/page/base.rb +10 -0
  54. data/lib/www/mechanize/page/frame.rb +22 -0
  55. data/lib/www/mechanize/page/link.rb +50 -0
  56. data/lib/www/mechanize/page/meta.rb +10 -0
  57. data/lib/www/mechanize/pluggable_parsers.rb +103 -0
  58. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  59. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
  60. data/lib/www/mechanize/response_code_error.rb +25 -0
  61. data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
  62. data/lib/www/mechanize/util.rb +76 -0
  63. data/lib/www/ntlm-http/lib/net/ntlm_http.rb +854 -0
  64. data/mechanize.gemspec +24 -0
  65. data/test/chain/test_argument_validator.rb +14 -0
  66. data/test/chain/test_custom_headers.rb +18 -0
  67. data/test/chain/test_parameter_resolver.rb +35 -0
  68. data/test/chain/test_request_resolver.rb +29 -0
  69. data/test/chain/test_response_reader.rb +24 -0
  70. data/test/data/htpasswd +1 -0
  71. data/test/data/server.crt +16 -0
  72. data/test/data/server.csr +12 -0
  73. data/test/data/server.key +15 -0
  74. data/test/data/server.pem +15 -0
  75. data/test/helper.rb +127 -0
  76. data/test/htdocs/alt_text.html +10 -0
  77. data/test/htdocs/bad_form_test.html +9 -0
  78. data/test/htdocs/button.jpg +0 -0
  79. data/test/htdocs/empty_form.html +6 -0
  80. data/test/htdocs/file_upload.html +26 -0
  81. data/test/htdocs/find_link.html +41 -0
  82. data/test/htdocs/form_multi_select.html +16 -0
  83. data/test/htdocs/form_multival.html +37 -0
  84. data/test/htdocs/form_no_action.html +18 -0
  85. data/test/htdocs/form_no_input_name.html +16 -0
  86. data/test/htdocs/form_select.html +16 -0
  87. data/test/htdocs/form_select_all.html +16 -0
  88. data/test/htdocs/form_select_none.html +17 -0
  89. data/test/htdocs/form_select_noopts.html +10 -0
  90. data/test/htdocs/form_set_fields.html +14 -0
  91. data/test/htdocs/form_test.html +188 -0
  92. data/test/htdocs/frame_test.html +30 -0
  93. data/test/htdocs/google.html +13 -0
  94. data/test/htdocs/iframe_test.html +16 -0
  95. data/test/htdocs/index.html +6 -0
  96. data/test/htdocs/link with space.html +5 -0
  97. data/test/htdocs/meta_cookie.html +11 -0
  98. data/test/htdocs/no_title_test.html +6 -0
  99. data/test/htdocs/relative/tc_relative_links.html +21 -0
  100. data/test/htdocs/tc_bad_links.html +5 -0
  101. data/test/htdocs/tc_base_link.html +8 -0
  102. data/test/htdocs/tc_blank_form.html +11 -0
  103. data/test/htdocs/tc_checkboxes.html +19 -0
  104. data/test/htdocs/tc_encoded_links.html +5 -0
  105. data/test/htdocs/tc_follow_meta.html +8 -0
  106. data/test/htdocs/tc_form_action.html +48 -0
  107. data/test/htdocs/tc_links.html +18 -0
  108. data/test/htdocs/tc_no_attributes.html +16 -0
  109. data/test/htdocs/tc_pretty_print.html +17 -0
  110. data/test/htdocs/tc_radiobuttons.html +17 -0
  111. data/test/htdocs/tc_referer.html +10 -0
  112. data/test/htdocs/tc_relative_links.html +19 -0
  113. data/test/htdocs/tc_textarea.html +23 -0
  114. data/test/htdocs/unusual______.html +5 -0
  115. data/test/servlets.rb +339 -0
  116. data/test/ssl_server.rb +48 -0
  117. data/test/test_authenticate.rb +71 -0
  118. data/test/test_bad_links.rb +25 -0
  119. data/test/test_blank_form.rb +16 -0
  120. data/test/test_checkboxes.rb +61 -0
  121. data/test/test_content_type.rb +13 -0
  122. data/test/test_cookie_class.rb +338 -0
  123. data/test/test_cookie_jar.rb +343 -0
  124. data/test/test_cookies.rb +123 -0
  125. data/test/test_encoded_links.rb +20 -0
  126. data/test/test_errors.rb +49 -0
  127. data/test/test_follow_meta.rb +69 -0
  128. data/test/test_form_action.rb +44 -0
  129. data/test/test_form_as_hash.rb +61 -0
  130. data/test/test_form_button.rb +38 -0
  131. data/test/test_form_no_inputname.rb +15 -0
  132. data/test/test_forms.rb +575 -0
  133. data/test/test_frames.rb +25 -0
  134. data/test/test_get_headers.rb +52 -0
  135. data/test/test_gzipping.rb +22 -0
  136. data/test/test_hash_api.rb +45 -0
  137. data/test/test_history.rb +142 -0
  138. data/test/test_history_added.rb +16 -0
  139. data/test/test_html_unscape_forms.rb +39 -0
  140. data/test/test_if_modified_since.rb +20 -0
  141. data/test/test_keep_alive.rb +31 -0
  142. data/test/test_links.rb +120 -0
  143. data/test/test_mech.rb +259 -0
  144. data/test/test_mechanize_file.rb +47 -0
  145. data/test/test_multi_select.rb +106 -0
  146. data/test/test_no_attributes.rb +13 -0
  147. data/test/test_option.rb +18 -0
  148. data/test/test_page.rb +67 -0
  149. data/test/test_pluggable_parser.rb +145 -0
  150. data/test/test_post_form.rb +34 -0
  151. data/test/test_pretty_print.rb +22 -0
  152. data/test/test_radiobutton.rb +75 -0
  153. data/test/test_redirect_limit_reached.rb +41 -0
  154. data/test/test_redirect_verb_handling.rb +45 -0
  155. data/test/test_referer.rb +39 -0
  156. data/test/test_relative_links.rb +40 -0
  157. data/test/test_request.rb +13 -0
  158. data/test/test_response_code.rb +52 -0
  159. data/test/test_save_file.rb +48 -0
  160. data/test/test_scheme.rb +48 -0
  161. data/test/test_select.rb +106 -0
  162. data/test/test_select_all.rb +15 -0
  163. data/test/test_select_none.rb +15 -0
  164. data/test/test_select_noopts.rb +16 -0
  165. data/test/test_set_fields.rb +44 -0
  166. data/test/test_ssl_server.rb +20 -0
  167. data/test/test_subclass.rb +14 -0
  168. data/test/test_textarea.rb +45 -0
  169. data/test/test_upload.rb +109 -0
  170. data/test/test_verbs.rb +25 -0
  171. metadata +284 -0
@@ -0,0 +1,169 @@
1
+ CHANGELOG.rdoc
2
+ EXAMPLES.rdoc
3
+ FAQ.rdoc
4
+ GUIDE.rdoc
5
+ LICENSE.rdoc
6
+ Manifest.txt
7
+ README.rdoc
8
+ Rakefile
9
+ examples/flickr_upload.rb
10
+ examples/mech-dump.rb
11
+ examples/proxy_req.rb
12
+ examples/rubyforge.rb
13
+ examples/spider.rb
14
+ lib/mechanize.rb
15
+ lib/www/mechanize.rb
16
+ lib/www/mechanize/chain.rb
17
+ lib/www/mechanize/chain/auth_headers.rb
18
+ lib/www/mechanize/chain/body_decoding_handler.rb
19
+ lib/www/mechanize/chain/connection_resolver.rb
20
+ lib/www/mechanize/chain/custom_headers.rb
21
+ lib/www/mechanize/chain/handler.rb
22
+ lib/www/mechanize/chain/header_resolver.rb
23
+ lib/www/mechanize/chain/parameter_resolver.rb
24
+ lib/www/mechanize/chain/post_connect_hook.rb
25
+ lib/www/mechanize/chain/pre_connect_hook.rb
26
+ lib/www/mechanize/chain/request_resolver.rb
27
+ lib/www/mechanize/chain/response_body_parser.rb
28
+ lib/www/mechanize/chain/response_header_handler.rb
29
+ lib/www/mechanize/chain/response_reader.rb
30
+ lib/www/mechanize/chain/ssl_resolver.rb
31
+ lib/www/mechanize/chain/uri_resolver.rb
32
+ lib/www/mechanize/content_type_error.rb
33
+ lib/www/mechanize/cookie.rb
34
+ lib/www/mechanize/cookie_jar.rb
35
+ lib/www/mechanize/file.rb
36
+ lib/www/mechanize/file_response.rb
37
+ lib/www/mechanize/file_saver.rb
38
+ lib/www/mechanize/form.rb
39
+ lib/www/mechanize/form/button.rb
40
+ lib/www/mechanize/form/check_box.rb
41
+ lib/www/mechanize/form/field.rb
42
+ lib/www/mechanize/form/file_upload.rb
43
+ lib/www/mechanize/form/image_button.rb
44
+ lib/www/mechanize/form/multi_select_list.rb
45
+ lib/www/mechanize/form/option.rb
46
+ lib/www/mechanize/form/radio_button.rb
47
+ lib/www/mechanize/form/select_list.rb
48
+ lib/www/mechanize/headers.rb
49
+ lib/www/mechanize/history.rb
50
+ lib/www/mechanize/inspect.rb
51
+ lib/www/mechanize/monkey_patch.rb
52
+ lib/www/mechanize/page.rb
53
+ lib/www/mechanize/page/base.rb
54
+ lib/www/mechanize/page/frame.rb
55
+ lib/www/mechanize/page/link.rb
56
+ lib/www/mechanize/page/meta.rb
57
+ lib/www/mechanize/pluggable_parsers.rb
58
+ lib/www/mechanize/redirect_limit_reached_error.rb
59
+ lib/www/mechanize/redirect_not_get_or_head_error.rb
60
+ lib/www/mechanize/response_code_error.rb
61
+ lib/www/mechanize/unsupported_scheme_error.rb
62
+ lib/www/mechanize/util.rb
63
+ mechanize.gemspec
64
+ test/chain/test_argument_validator.rb
65
+ test/chain/test_custom_headers.rb
66
+ test/chain/test_parameter_resolver.rb
67
+ test/chain/test_request_resolver.rb
68
+ test/chain/test_response_reader.rb
69
+ test/data/htpasswd
70
+ test/data/server.crt
71
+ test/data/server.csr
72
+ test/data/server.key
73
+ test/data/server.pem
74
+ test/helper.rb
75
+ test/htdocs/alt_text.html
76
+ test/htdocs/bad_form_test.html
77
+ test/htdocs/button.jpg
78
+ test/htdocs/empty_form.html
79
+ test/htdocs/file_upload.html
80
+ test/htdocs/find_link.html
81
+ test/htdocs/form_multi_select.html
82
+ test/htdocs/form_multival.html
83
+ test/htdocs/form_no_action.html
84
+ test/htdocs/form_no_input_name.html
85
+ test/htdocs/form_select.html
86
+ test/htdocs/form_select_all.html
87
+ test/htdocs/form_select_none.html
88
+ test/htdocs/form_select_noopts.html
89
+ test/htdocs/form_set_fields.html
90
+ test/htdocs/form_test.html
91
+ test/htdocs/frame_test.html
92
+ test/htdocs/google.html
93
+ test/htdocs/iframe_test.html
94
+ test/htdocs/index.html
95
+ test/htdocs/link with space.html
96
+ test/htdocs/meta_cookie.html
97
+ test/htdocs/no_title_test.html
98
+ test/htdocs/relative/tc_relative_links.html
99
+ test/htdocs/tc_bad_links.html
100
+ test/htdocs/tc_base_link.html
101
+ test/htdocs/tc_blank_form.html
102
+ test/htdocs/tc_checkboxes.html
103
+ test/htdocs/tc_encoded_links.html
104
+ test/htdocs/tc_follow_meta.html
105
+ test/htdocs/tc_form_action.html
106
+ test/htdocs/tc_links.html
107
+ test/htdocs/tc_no_attributes.html
108
+ test/htdocs/tc_pretty_print.html
109
+ test/htdocs/tc_radiobuttons.html
110
+ test/htdocs/tc_referer.html
111
+ test/htdocs/tc_relative_links.html
112
+ test/htdocs/tc_textarea.html
113
+ test/htdocs/unusual______.html
114
+ test/servlets.rb
115
+ test/ssl_server.rb
116
+ test/test_authenticate.rb
117
+ test/test_bad_links.rb
118
+ test/test_blank_form.rb
119
+ test/test_checkboxes.rb
120
+ test/test_content_type.rb
121
+ test/test_cookie_class.rb
122
+ test/test_cookie_jar.rb
123
+ test/test_cookies.rb
124
+ test/test_encoded_links.rb
125
+ test/test_errors.rb
126
+ test/test_follow_meta.rb
127
+ test/test_form_action.rb
128
+ test/test_form_as_hash.rb
129
+ test/test_form_button.rb
130
+ test/test_form_no_inputname.rb
131
+ test/test_forms.rb
132
+ test/test_frames.rb
133
+ test/test_get_headers.rb
134
+ test/test_gzipping.rb
135
+ test/test_hash_api.rb
136
+ test/test_history.rb
137
+ test/test_history_added.rb
138
+ test/test_html_unscape_forms.rb
139
+ test/test_if_modified_since.rb
140
+ test/test_keep_alive.rb
141
+ test/test_links.rb
142
+ test/test_mech.rb
143
+ test/test_mechanize_file.rb
144
+ test/test_multi_select.rb
145
+ test/test_no_attributes.rb
146
+ test/test_option.rb
147
+ test/test_page.rb
148
+ test/test_pluggable_parser.rb
149
+ test/test_post_form.rb
150
+ test/test_pretty_print.rb
151
+ test/test_radiobutton.rb
152
+ test/test_redirect_limit_reached.rb
153
+ test/test_redirect_verb_handling.rb
154
+ test/test_referer.rb
155
+ test/test_relative_links.rb
156
+ test/test_request.rb
157
+ test/test_response_code.rb
158
+ test/test_save_file.rb
159
+ test/test_scheme.rb
160
+ test/test_select.rb
161
+ test/test_select_all.rb
162
+ test/test_select_none.rb
163
+ test/test_select_noopts.rb
164
+ test/test_set_fields.rb
165
+ test/test_ssl_server.rb
166
+ test/test_subclass.rb
167
+ test/test_textarea.rb
168
+ test/test_upload.rb
169
+ test/test_verbs.rb
@@ -0,0 +1,60 @@
1
+ = WWW::Mechanize
2
+
3
+ http://mechanize.rubyforge.org/
4
+ http://github.com/tenderlove/mechanize/tree/master
5
+
6
+ == DESCRIPTION
7
+
8
+ The Mechanize library is used for automating interaction with websites.
9
+ Mechanize automatically stores and sends cookies, follows redirects,
10
+ can follow links, and submit forms. Form fields can be populated and
11
+ submitted. Mechanize also keeps track of the sites that you have visited as
12
+ a history.
13
+
14
+ == Dependencies
15
+
16
+ * ruby 1.8.6
17
+ * nokogiri[http://nokogiri.rubyforge.org]
18
+
19
+ == SUPPORT:
20
+
21
+ The mechanize mailing list is available here:
22
+
23
+ * http://rubyforge.org/mailman/listinfo/mechanize-users
24
+
25
+ The bug tracker is available here:
26
+
27
+ * http://rubyforge.org/tracker/?atid=5709&group_id=1453
28
+
29
+ == Examples
30
+
31
+ If you are just starting, check out the GUIDE[link://files/GUIDE_rdoc.html].
32
+ Also, check out the EXAMPLES[link://files/EXAMPLES_rdoc.html] file.
33
+
34
+ == Authors
35
+
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
+
38
+ Copyright (c) 2006-2009:
39
+
40
+ * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
+ * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
+
43
+ This library comes with a shameless plug for employing me
44
+ (Aaron[http://tenderlovemaking.com/]) programming
45
+ Ruby, my favorite language!
46
+
47
+ == Acknowledgments
48
+
49
+ This library was heavily influenced by its namesake in the perl world. A big
50
+ thanks goes to Andy Lester (andy@petdance.com), the author of the original
51
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
52
+
53
+ Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
54
+ who's helped out in various ways. Finally, thank you to the people using this
55
+ library!
56
+
57
+ == License
58
+
59
+ This library is distributed under the GPL. Please see the LICENSE[link://files/LICENSE_rdoc.html] file.
60
+
@@ -0,0 +1,44 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
5
+ require 'mechanize'
6
+
7
+ HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
8
+ p.rubyforge_name = 'mechanize'
9
+ p.developer('Aaron Patterson','aaronp@rubyforge.org')
10
+ p.developer('Mike Dalessio','mike.dalessio@gmail.com')
11
+ p.readme_file = 'README.rdoc'
12
+ p.history_file = 'CHANGELOG.rdoc'
13
+ p.extra_rdoc_files = FileList['*.rdoc']
14
+ p.summary = "Mechanize provides automated web-browsing"
15
+ p.extra_deps = [['nokogiri', '>= 1.2.1']]
16
+ end
17
+
18
+ desc "Update SSL Certificate"
19
+ task('ssl_cert') do |p|
20
+ sh "openssl genrsa -des3 -out server.key 1024"
21
+ sh "openssl req -new -key server.key -out server.csr"
22
+ sh "cp server.key server.key.org"
23
+ sh "openssl rsa -in server.key.org -out server.key"
24
+ sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
25
+ sh "cp server.key server.pem"
26
+ sh "mv server.key server.csr server.crt server.pem test/data/"
27
+ sh "rm server.key.org"
28
+ end
29
+
30
+ namespace :gem do
31
+ desc 'Generate a gem spec'
32
+ task :spec do
33
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
34
+ HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
35
+ f.write(HOE.spec.to_ruby)
36
+ end
37
+ end
38
+ end
39
+
40
+ desc "Run code-coverage analysis"
41
+ task :coverage do
42
+ rm_rf "coverage"
43
+ sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
44
+ end
@@ -0,0 +1,23 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+
8
+ # Get the flickr sign in page
9
+ page = agent.get('http://flickr.com/signin/flickr/')
10
+
11
+ # Fill out the login form
12
+ form = page.forms.name('flickrloginform').first
13
+ form.email = ARGV[0]
14
+ form.password = ARGV[1]
15
+ page = agent.submit(form)
16
+
17
+ # Go to the upload page
18
+ page = agent.click page.links.text('Upload')
19
+
20
+ # Fill out the form
21
+ form = page.forms.action('/photos_upload_process.gne').first
22
+ form.file_uploads.name('file1').first.file_name = ARGV[2]
23
+ agent.submit(form)
@@ -0,0 +1,7 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ puts agent.get(ARGV[0]).inspect
@@ -0,0 +1,9 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ agent.set_proxy('localhost', '8000')
8
+ page = agent.get(ARGV[0])
9
+ puts page.body
@@ -0,0 +1,21 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ # This example logs a user in to rubyforge and prints out the body of the
4
+ # page after logging the user in.
5
+ require 'rubygems'
6
+ require 'mechanize'
7
+
8
+ # Create a new mechanize object
9
+ agent = WWW::Mechanize.new { |a| a.log = Logger.new(STDERR) }
10
+
11
+ # Load the rubyforge website
12
+ page = agent.get('http://rubyforge.org/')
13
+ page = agent.click page.links.text(/Log In/) # Click the login link
14
+ form = page.forms[1] # Select the first form
15
+ form.form_loginname = ARGV[0]
16
+ form.form_pw = ARGV[1]
17
+
18
+ # Submit the form
19
+ page = agent.submit(form, form.buttons.first)
20
+
21
+ puts page.body # Print out the body
@@ -0,0 +1,11 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ stack = agent.get(ARGV[0]).links
8
+ while l = stack.pop
9
+ next unless l.uri.host == agent.history.first.uri.host
10
+ stack.push(*(agent.click(l).links)) unless agent.visited? l.href
11
+ end
@@ -0,0 +1,7 @@
1
+ # Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
2
+ # Copyright (c) 2007 by Aaron Patterson (aaronp@rubyforge.org)
3
+ #
4
+ # Please see the LICENSE file for licensing.
5
+
6
+
7
+ require 'www/mechanize'
@@ -0,0 +1,582 @@
1
+ require 'net/http'
2
+ require 'net/https'
3
+ require 'uri'
4
+ require 'webrick/httputils'
5
+ require 'zlib'
6
+ require 'stringio'
7
+ require 'digest/md5'
8
+ require 'fileutils'
9
+ require 'nokogiri'
10
+ require 'forwardable'
11
+ require 'iconv'
12
+ require 'nkf'
13
+
14
+ require 'www/mechanize/util'
15
+ require 'www/mechanize/content_type_error'
16
+ require 'www/mechanize/response_code_error'
17
+ require 'www/mechanize/unsupported_scheme_error'
18
+ require 'www/mechanize/redirect_limit_reached_error'
19
+ require 'www/mechanize/redirect_not_get_or_head_error'
20
+ require 'www/mechanize/cookie'
21
+ require 'www/mechanize/cookie_jar'
22
+ require 'www/mechanize/history'
23
+ require 'www/mechanize/form'
24
+ require 'www/mechanize/pluggable_parsers'
25
+ require 'www/mechanize/file_response'
26
+ require 'www/mechanize/inspect'
27
+ require 'www/mechanize/chain'
28
+ require 'www/mechanize/monkey_patch'
29
+
30
+ require 'www/ntlm-http/lib/net/ntlm_http'
31
+
32
+ module WWW
33
+ # = Synopsis
34
+ # The Mechanize library is used for automating interaction with a website. It
35
+ # can follow links, and submit forms. Form fields can be populated and
36
+ # submitted. A history of URL's is maintained and can be queried.
37
+ #
38
+ # == Example
39
+ # require 'rubygems'
40
+ # require 'mechanize'
41
+ # require 'logger'
42
+ #
43
+ # agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
44
+ # agent.user_agent_alias = 'Mac Safari'
45
+ # page = agent.get("http://www.google.com/")
46
+ # search_form = page.form_with(:name => "f")
47
+ # search_form.field_with(:name => "q").value = "Hello"
48
+ # search_results = agent.submit(search_form)
49
+ # puts search_results.body
50
+ class Mechanize
51
+ ##
52
+ # The version of Mechanize you are using.
53
+ VERSION = '0.9.2'
54
+
55
+ ##
56
+ # User Agent aliases
57
+ AGENT_ALIASES = {
58
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
59
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
60
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
61
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
62
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
63
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
64
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
65
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
66
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
67
+ 'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
68
+ }
69
+
70
+ attr_accessor :cookie_jar
71
+ attr_accessor :open_timeout, :read_timeout
72
+ attr_accessor :user_agent
73
+ attr_accessor :watch_for_set
74
+ attr_accessor :ca_file
75
+ attr_accessor :key
76
+ attr_accessor :cert
77
+ attr_accessor :pass
78
+ attr_accessor :redirect_ok
79
+ attr_accessor :keep_alive_time
80
+ attr_accessor :keep_alive
81
+ attr_accessor :conditional_requests
82
+ attr_accessor :follow_meta_refresh
83
+ attr_accessor :verify_callback
84
+ attr_accessor :history_added
85
+ attr_accessor :scheme_handlers
86
+ attr_accessor :redirection_limit
87
+
88
+ attr_reader :history
89
+ attr_reader :pluggable_parser
90
+
91
+ alias :follow_redirect? :redirect_ok
92
+
93
+ @html_parser = Nokogiri::HTML
94
+ class << self; attr_accessor :html_parser, :log end
95
+
96
+ def initialize
97
+ # attr_accessors
98
+ @cookie_jar = CookieJar.new
99
+ @log = nil
100
+ @open_timeout = nil
101
+ @read_timeout = nil
102
+ @user_agent = AGENT_ALIASES['Mechanize']
103
+ @watch_for_set = nil
104
+ @history_added = nil
105
+ @ca_file = nil # OpenSSL server certificate file
106
+
107
+ # callback for OpenSSL errors while verifying the server certificate
108
+ # chain, can be used for debugging or to ignore errors by always
109
+ # returning _true_
110
+ @verify_callback = nil
111
+ @cert = nil # OpenSSL Certificate
112
+ @key = nil # OpenSSL Private Key
113
+ @pass = nil # OpenSSL Password
114
+ @redirect_ok = true # Should we follow redirects?
115
+
116
+ # attr_readers
117
+ @history = WWW::Mechanize::History.new
118
+ @pluggable_parser = PluggableParser.new
119
+
120
+ # Auth variables
121
+ @user = nil # Auth User
122
+ @password = nil # Auth Password
123
+ @digest = nil # DigestAuth Digest
124
+ @auth_hash = {} # Keep track of urls for sending auth
125
+
126
+ # Proxy settings
127
+ @proxy_addr = nil
128
+ @proxy_pass = nil
129
+ @proxy_port = nil
130
+ @proxy_user = nil
131
+
132
+ @conditional_requests = true
133
+
134
+ @follow_meta_refresh = false
135
+ @redirection_limit = 20
136
+
137
+ # Connection Cache & Keep alive
138
+ @connection_cache = {}
139
+ @keep_alive_time = 300
140
+ @keep_alive = true
141
+
142
+ @scheme_handlers = Hash.new { |h,k|
143
+ h[k] = lambda { |link, page|
144
+ raise UnsupportedSchemeError.new(k)
145
+ }
146
+ }
147
+ @scheme_handlers['http'] = lambda { |link, page| link }
148
+ @scheme_handlers['https'] = @scheme_handlers['http']
149
+ @scheme_handlers['relative'] = @scheme_handlers['http']
150
+ @scheme_handlers['file'] = @scheme_handlers['http']
151
+
152
+ @pre_connect_hook = Chain::PreConnectHook.new
153
+ @post_connect_hook = Chain::PostConnectHook.new
154
+
155
+ yield self if block_given?
156
+ end
157
+
158
+ def max_history=(length); @history.max_size = length end
159
+ def max_history; @history.max_size end
160
+ def log=(l); self.class.log = l end
161
+ def log; self.class.log end
162
+
163
+ def pre_connect_hooks
164
+ @pre_connect_hook.hooks
165
+ end
166
+
167
+ def post_connect_hooks
168
+ @post_connect_hook.hooks
169
+ end
170
+
171
+ # Sets the proxy address, port, user, and password
172
+ # +addr+ should be a host, with no "http://"
173
+ def set_proxy(addr, port, user = nil, pass = nil)
174
+ @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
175
+ end
176
+
177
+ # Set the user agent for the Mechanize object.
178
+ # See AGENT_ALIASES
179
+ def user_agent_alias=(al)
180
+ self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
181
+ end
182
+
183
+ # Returns a list of cookies stored in the cookie jar.
184
+ def cookies
185
+ @cookie_jar.to_a
186
+ end
187
+
188
+ # Sets the user and password to be used for authentication.
189
+ def auth(user, password)
190
+ @user = user
191
+ @password = password
192
+ end
193
+ alias :basic_auth :auth
194
+
195
+ # Fetches the URL passed in and returns a page.
196
+ def get(options, parameters = [], referer = nil)
197
+ unless options.is_a? Hash
198
+ url = options
199
+ unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
200
+ referer = parameters
201
+ parameters = []
202
+ end
203
+ else
204
+ raise ArgumentError.new("url must be specified") unless url = options[:url]
205
+ parameters = options[:params] || []
206
+ referer = options[:referer]
207
+ headers = options[:headers]
208
+ end
209
+
210
+ unless referer
211
+ if url.to_s =~ /^http/
212
+ referer = Page.new(nil, {'content-type'=>'text/html'})
213
+ else
214
+ referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
215
+ end
216
+ end
217
+
218
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
219
+ # refactor everything to pass around URIs but still support
220
+ # WWW::Mechanize::Page#base
221
+ unless referer.is_a?(WWW::Mechanize::File)
222
+ referer = referer.is_a?(String) ?
223
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
224
+ Page.new(referer, {'content-type' => 'text/html'})
225
+ end
226
+
227
+ # fetch the page
228
+ page = fetch_page( :uri => url,
229
+ :referer => referer,
230
+ :headers => headers || {},
231
+ :params => parameters
232
+ )
233
+ add_to_history(page)
234
+ yield page if block_given?
235
+ page
236
+ end
237
+
238
+ ####
239
+ # PUT to +url+ with +query_params+, and setting +options+:
240
+ #
241
+ # put('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
242
+ #
243
+ def put(url, query_params = {}, options = {})
244
+ page = head(url, query_params, options.merge({:verb => :put}))
245
+ add_to_history(page)
246
+ page
247
+ end
248
+
249
+ ####
250
+ # DELETE to +url+ with +query_params+, and setting +options+:
251
+ #
252
+ # delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
253
+ #
254
+ def delete(url, query_params = {}, options = {})
255
+ page = head(url, query_params, options.merge({:verb => :delete}))
256
+ add_to_history(page)
257
+ page
258
+ end
259
+
260
+ ####
261
+ # HEAD to +url+ with +query_params+, and setting +options+:
262
+ #
263
+ # head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
264
+ #
265
+ def head(url, query_params = {}, options = {})
266
+ options = {
267
+ :uri => url,
268
+ :headers => {},
269
+ :params => query_params,
270
+ :verb => :head
271
+ }.merge(options)
272
+ # fetch the page
273
+ page = fetch_page(options)
274
+ yield page if block_given?
275
+ page
276
+ end
277
+
278
+ # Fetch a file and return the contents of the file.
279
+ def get_file(url)
280
+ get(url).body
281
+ end
282
+
283
+ # Clicks the WWW::Mechanize::Link object passed in and returns the
284
+ # page fetched.
285
+ def click(link)
286
+ referer = link.page rescue referer = nil
287
+ href = link.respond_to?(:href) ? link.href :
288
+ (link['href'] || link['src'])
289
+ get(:url => href, :referer => (referer || current_page()))
290
+ end
291
+
292
+ # Equivalent to the browser back button. Returns the most recent page
293
+ # visited.
294
+ def back
295
+ @history.pop
296
+ end
297
+
298
+ # Posts to the given URL wht the query parameters passed in. Query
299
+ # parameters can be passed as a hash, or as an array of arrays.
300
+ # Example:
301
+ # agent.post('http://example.com/', "foo" => "bar")
302
+ # or
303
+ # agent.post('http://example.com/', [ ["foo", "bar"] ])
304
+ def post(url, query={})
305
+ node = {}
306
+ # Create a fake form
307
+ class << node
308
+ def search(*args); []; end
309
+ end
310
+ node['method'] = 'POST'
311
+ node['enctype'] = 'application/x-www-form-urlencoded'
312
+
313
+ form = Form.new(node)
314
+ query.each { |k,v|
315
+ if v.is_a?(IO)
316
+ form.enctype = 'multipart/form-data'
317
+ ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
318
+ ul.file_data = v.read
319
+ form.file_uploads << ul
320
+ else
321
+ form.fields << Form::Field.new(k.to_s,v)
322
+ end
323
+ }
324
+ post_form(url, form)
325
+ end
326
+
327
+ # Submit a form with an optional button.
328
+ # Without a button:
329
+ # page = agent.get('http://example.com')
330
+ # agent.submit(page.forms.first)
331
+ # With a button
332
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
333
+ def submit(form, button=nil, headers={})
334
+ form.add_button_to_query(button) if button
335
+ case form.method.upcase
336
+ when 'POST'
337
+ post_form(form.action, form, headers)
338
+ when 'GET'
339
+ get( :url => form.action.gsub(/\?[^\?]*$/, ''),
340
+ :params => form.build_query,
341
+ :headers => headers,
342
+ :referer => form.page
343
+ )
344
+ else
345
+ raise "unsupported method: #{form.method.upcase}"
346
+ end
347
+ end
348
+
349
+ # Returns the current page loaded by Mechanize
350
+ def current_page
351
+ @history.last
352
+ end
353
+
354
+ # Returns whether or not a url has been visited
355
+ def visited?(url)
356
+ ! visited_page(url).nil?
357
+ end
358
+
359
+ # Returns a visited page for the url passed in, otherwise nil
360
+ def visited_page(url)
361
+ if url.respond_to? :href
362
+ url = url.href
363
+ end
364
+ @history.visited_page(resolve(url))
365
+ end
366
+
367
+ # Runs given block, then resets the page history as it was before. self is
368
+ # given as a parameter to the block. Returns the value of the block.
369
+ def transact
370
+ history_backup = @history.dup
371
+ begin
372
+ yield self
373
+ ensure
374
+ @history = history_backup
375
+ end
376
+ end
377
+
378
+ alias :page :current_page
379
+
380
+ private
381
+
382
+ def resolve(url, referer = current_page())
383
+ hash = { :uri => url, :referer => referer }
384
+ chain = Chain.new([
385
+ Chain::URIResolver.new(@scheme_handlers)
386
+ ]).handle(hash)
387
+ hash[:uri].to_s
388
+ end
389
+
390
+ def post_form(url, form, headers = {})
391
+ cur_page = form.page || current_page ||
392
+ Page.new( nil, {'content-type'=>'text/html'})
393
+
394
+ request_data = form.request_data
395
+
396
+ log.debug("query: #{ request_data.inspect }") if log
397
+
398
+ # fetch the page
399
+ page = fetch_page( :uri => url,
400
+ :referer => cur_page,
401
+ :verb => :post,
402
+ :params => [request_data],
403
+ :headers => {
404
+ 'Content-Type' => form.enctype,
405
+ 'Content-Length' => request_data.size.to_s,
406
+ }.merge(headers))
407
+ add_to_history(page)
408
+ page
409
+ end
410
+
411
+ # uri is an absolute URI
412
+ def fetch_page(params)
413
+ options = {
414
+ :request => nil,
415
+ :response => nil,
416
+ :connection => nil,
417
+ :referer => current_page(),
418
+ :uri => nil,
419
+ :verb => :get,
420
+ :agent => self,
421
+ :redirects => 0,
422
+ :params => [],
423
+ :headers => {},
424
+ }.merge(params)
425
+
426
+ before_connect = Chain.new([
427
+ Chain::URIResolver.new(@scheme_handlers),
428
+ Chain::ParameterResolver.new,
429
+ Chain::RequestResolver.new,
430
+ Chain::ConnectionResolver.new(
431
+ @connection_cache,
432
+ @keep_alive,
433
+ @proxy_addr,
434
+ @proxy_port,
435
+ @proxy_user,
436
+ @proxy_pass
437
+ ),
438
+ Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
439
+ Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
440
+ Chain::HeaderResolver.new( @keep_alive,
441
+ @keep_alive_time,
442
+ @cookie_jar,
443
+ @user_agent),
444
+ Chain::CustomHeaders.new,
445
+ @pre_connect_hook,
446
+ ])
447
+ before_connect.handle(options)
448
+
449
+ uri = options[:uri]
450
+ request = options[:request]
451
+ cur_page = options[:referer]
452
+ request_data = options[:params]
453
+ redirects = options[:redirects]
454
+ http_obj = options[:connection]
455
+
456
+ # Add If-Modified-Since if page is in history
457
+ if( (page = visited_page(uri)) && page.response['Last-Modified'] )
458
+ request['If-Modified-Since'] = page.response['Last-Modified']
459
+ end if(@conditional_requests)
460
+
461
+ # Specify timeouts if given
462
+ http_obj.open_timeout = @open_timeout if @open_timeout
463
+ http_obj.read_timeout = @read_timeout if @read_timeout
464
+ http_obj.start unless http_obj.started?
465
+
466
+ # Log specified headers for the request
467
+ log.info("#{ request.class }: #{ request.path }") if log
468
+ request.each_header do |k, v|
469
+ log.debug("request-header: #{ k } => #{ v }")
470
+ end if log
471
+
472
+ # Send the request
473
+ attempts = 0
474
+ begin
475
+ response = http_obj.request(request, *request_data) { |r|
476
+ connection_chain = Chain.new([
477
+ Chain::ResponseReader.new(r),
478
+ Chain::BodyDecodingHandler.new,
479
+ ])
480
+ connection_chain.handle(options)
481
+ }
482
+ rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
483
+ log.error("Rescuing EOF error") if log
484
+ http_obj.finish
485
+ raise x if attempts >= 2
486
+ request.body = nil
487
+ http_obj.start
488
+ attempts += 1
489
+ retry
490
+ end
491
+
492
+ after_connect = Chain.new([
493
+ @post_connect_hook,
494
+ Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
495
+ Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
496
+ ])
497
+ after_connect.handle(options)
498
+
499
+ res_klass = options[:res_klass]
500
+ response_body = options[:response_body]
501
+ page = options[:page]
502
+
503
+ log.info("status: #{ page.code }") if log
504
+
505
+ if follow_meta_refresh
506
+ redirect_uri = nil
507
+ if (page.respond_to?(:meta) && (redirect = page.meta.first))
508
+ redirect_uri = redirect.uri.to_s
509
+ elsif refresh = response['refresh']
510
+ parsed_refresh = refresh.match(/^\s*(\d+\.?\d*);\s*(url|URL)=(\S*)\s*$/)
511
+ raise StandardError, "Invalid refresh http header" unless parsed_refresh
512
+ delay = parsed_refresh[1]
513
+ location = parsed_refresh[3]
514
+ location = "http://#{uri.host}#{location}" unless location.include?("http")
515
+ if redirects + 1 > redirection_limit
516
+ raise RedirectLimitReachedError.new(page, redirects)
517
+ end
518
+ sleep delay.to_i
519
+ redirect_uri = location
520
+ end
521
+ if redirect_uri
522
+ @history.push(page, page.uri)
523
+ return fetch_page(
524
+ :uri => redirect_uri,
525
+ :referer => page,
526
+ :params => [],
527
+ :verb => :get,
528
+ :redirects => redirects + 1
529
+ )
530
+ end
531
+ end
532
+
533
+ return page if res_klass <= Net::HTTPSuccess
534
+
535
+ if res_klass == Net::HTTPNotModified
536
+ log.debug("Got cached page") if log
537
+ return visited_page(uri) || page
538
+ elsif res_klass <= Net::HTTPRedirection
539
+ return page unless follow_redirect?
540
+ log.info("follow redirect to: #{ response['Location'] }") if log
541
+ from_uri = page.uri
542
+ raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
543
+ redirect_verb = options[:verb] == :head ? :head : :get
544
+ page = fetch_page( :uri => response['Location'].to_s,
545
+ :referer => page,
546
+ :params => [],
547
+ :verb => redirect_verb,
548
+ :redirects => redirects + 1
549
+ )
550
+ @history.push(page, from_uri)
551
+ return page
552
+ elsif res_klass <= Net::HTTPUnauthorized
553
+ raise ResponseCodeError.new(page) unless @user || @password
554
+ raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
555
+ if response['www-authenticate'] =~ /Digest/i
556
+ @auth_hash[uri.host] = :digest
557
+ if response['server'] =~ /Microsoft-IIS/
558
+ @auth_hash[uri.host] = :iis_digest
559
+ end
560
+ @digest = response['www-authenticate']
561
+ elsif response['www-authenticate'] =~ /NTLM/
562
+ @auth_hash[uri.host] = :ntlm
563
+ else
564
+ @auth_hash[uri.host] = :basic
565
+ end
566
+ return fetch_page( :uri => uri,
567
+ :referer => cur_page,
568
+ :verb => request.method.downcase.to_sym,
569
+ :params => request_data,
570
+ :headers => options[:headers]
571
+ )
572
+ end
573
+
574
+ raise ResponseCodeError.new(page), "Unhandled response", caller
575
+ end
576
+
577
+ def add_to_history(page)
578
+ @history.push(page, resolve(page.uri))
579
+ history_added.call(page) if history_added
580
+ end
581
+ end
582
+ end