tenderlove-mechanize 0.9.3.20090623142847 → 0.9.3.20090911221705

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. data/Manifest.txt +55 -48
  2. data/Rakefile +12 -22
  3. data/lib/mechanize.rb +618 -4
  4. data/lib/mechanize/chain.rb +33 -0
  5. data/lib/mechanize/chain/auth_headers.rb +78 -0
  6. data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
  7. data/lib/mechanize/chain/connection_resolver.rb +76 -0
  8. data/lib/mechanize/chain/custom_headers.rb +21 -0
  9. data/lib/{www/mechanize → mechanize}/chain/handler.rb +1 -1
  10. data/lib/mechanize/chain/header_resolver.rb +51 -0
  11. data/lib/mechanize/chain/parameter_resolver.rb +22 -0
  12. data/lib/{www/mechanize → mechanize}/chain/post_connect_hook.rb +0 -0
  13. data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
  14. data/lib/mechanize/chain/request_resolver.rb +30 -0
  15. data/lib/mechanize/chain/response_body_parser.rb +38 -0
  16. data/lib/mechanize/chain/response_header_handler.rb +48 -0
  17. data/lib/mechanize/chain/response_reader.rb +39 -0
  18. data/lib/mechanize/chain/ssl_resolver.rb +40 -0
  19. data/lib/mechanize/chain/uri_resolver.rb +75 -0
  20. data/lib/mechanize/content_type_error.rb +14 -0
  21. data/lib/mechanize/cookie.rb +70 -0
  22. data/lib/mechanize/cookie_jar.rb +188 -0
  23. data/lib/mechanize/file.rb +71 -0
  24. data/lib/mechanize/file_response.rb +60 -0
  25. data/lib/mechanize/file_saver.rb +37 -0
  26. data/lib/mechanize/form.rb +378 -0
  27. data/lib/mechanize/form/button.rb +9 -0
  28. data/lib/mechanize/form/check_box.rb +11 -0
  29. data/lib/mechanize/form/field.rb +30 -0
  30. data/lib/mechanize/form/file_upload.rb +22 -0
  31. data/lib/mechanize/form/image_button.rb +21 -0
  32. data/lib/mechanize/form/multi_select_list.rb +67 -0
  33. data/lib/mechanize/form/option.rb +49 -0
  34. data/lib/mechanize/form/radio_button.rb +49 -0
  35. data/lib/mechanize/form/select_list.rb +43 -0
  36. data/lib/mechanize/headers.rb +11 -0
  37. data/lib/mechanize/history.rb +65 -0
  38. data/lib/mechanize/inspect.rb +88 -0
  39. data/lib/{www/mechanize → mechanize}/monkey_patch.rb +4 -6
  40. data/lib/mechanize/page.rb +206 -0
  41. data/lib/mechanize/page/base.rb +8 -0
  42. data/lib/mechanize/page/frame.rb +20 -0
  43. data/lib/mechanize/page/image.rb +26 -0
  44. data/lib/mechanize/page/label.rb +20 -0
  45. data/lib/mechanize/page/link.rb +48 -0
  46. data/lib/mechanize/page/meta.rb +50 -0
  47. data/lib/mechanize/pluggable_parsers.rb +101 -0
  48. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  49. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
  50. data/lib/mechanize/response_code_error.rb +22 -0
  51. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  52. data/lib/mechanize/util.rb +67 -0
  53. data/mechanize.gemspec +8 -8
  54. data/test/chain/test_argument_validator.rb +2 -2
  55. data/test/chain/test_auth_headers.rb +2 -2
  56. data/test/chain/test_custom_headers.rb +2 -2
  57. data/test/chain/test_header_resolver.rb +3 -3
  58. data/test/chain/test_parameter_resolver.rb +4 -4
  59. data/test/chain/test_request_resolver.rb +4 -4
  60. data/test/chain/test_response_reader.rb +3 -3
  61. data/test/helper.rb +1 -1
  62. data/test/htdocs/tc_bad_charset.html +9 -0
  63. data/test/htdocs/tc_charset.html +6 -0
  64. data/test/htdocs/test_bad_encoding.html +52 -0
  65. data/test/test_authenticate.rb +3 -3
  66. data/test/test_bad_links.rb +1 -1
  67. data/test/test_blank_form.rb +1 -1
  68. data/test/test_checkboxes.rb +1 -1
  69. data/test/test_content_type.rb +2 -2
  70. data/test/test_cookie_class.rb +12 -12
  71. data/test/test_cookie_jar.rb +13 -13
  72. data/test/test_cookies.rb +1 -1
  73. data/test/test_encoded_links.rb +1 -1
  74. data/test/test_errors.rb +2 -2
  75. data/test/test_follow_meta.rb +3 -3
  76. data/test/test_form_action.rb +1 -1
  77. data/test/test_form_as_hash.rb +1 -1
  78. data/test/test_form_button.rb +2 -2
  79. data/test/test_form_no_inputname.rb +1 -1
  80. data/test/test_forms.rb +1 -1
  81. data/test/test_frames.rb +1 -1
  82. data/test/test_get_headers.rb +1 -1
  83. data/test/test_gzipping.rb +2 -2
  84. data/test/test_hash_api.rb +1 -1
  85. data/test/test_history.rb +7 -7
  86. data/test/test_history_added.rb +1 -1
  87. data/test/test_html_unscape_forms.rb +7 -7
  88. data/test/test_if_modified_since.rb +1 -1
  89. data/test/test_keep_alive.rb +1 -1
  90. data/test/test_links.rb +2 -2
  91. data/test/test_mech.rb +2 -2
  92. data/test/test_mechanize_file.rb +7 -7
  93. data/test/test_meta.rb +2 -2
  94. data/test/test_multi_select.rb +1 -1
  95. data/test/test_no_attributes.rb +1 -1
  96. data/test/test_option.rb +1 -1
  97. data/test/test_page.rb +3 -3
  98. data/test/test_pluggable_parser.rb +14 -14
  99. data/test/test_post_form.rb +1 -1
  100. data/test/test_pretty_print.rb +2 -2
  101. data/test/test_radiobutton.rb +1 -1
  102. data/test/test_redirect_limit_reached.rb +1 -3
  103. data/test/test_redirect_verb_handling.rb +1 -3
  104. data/test/test_referer.rb +1 -1
  105. data/test/test_relative_links.rb +1 -1
  106. data/test/test_request.rb +1 -1
  107. data/test/test_response_code.rb +3 -3
  108. data/test/test_save_file.rb +3 -3
  109. data/test/test_scheme.rb +3 -3
  110. data/test/test_select.rb +2 -2
  111. data/test/test_select_all.rb +1 -1
  112. data/test/test_select_none.rb +1 -1
  113. data/test/test_select_noopts.rb +1 -1
  114. data/test/test_set_fields.rb +1 -1
  115. data/test/test_ssl_server.rb +1 -1
  116. data/test/test_subclass.rb +1 -1
  117. data/test/test_textarea.rb +1 -1
  118. data/test/test_upload.rb +1 -1
  119. data/test/test_verbs.rb +1 -1
  120. metadata +61 -56
  121. data/lib/www/mechanize.rb +0 -619
  122. data/lib/www/mechanize/chain.rb +0 -34
  123. data/lib/www/mechanize/chain/auth_headers.rb +0 -80
  124. data/lib/www/mechanize/chain/body_decoding_handler.rb +0 -48
  125. data/lib/www/mechanize/chain/connection_resolver.rb +0 -78
  126. data/lib/www/mechanize/chain/custom_headers.rb +0 -23
  127. data/lib/www/mechanize/chain/header_resolver.rb +0 -53
  128. data/lib/www/mechanize/chain/parameter_resolver.rb +0 -24
  129. data/lib/www/mechanize/chain/pre_connect_hook.rb +0 -22
  130. data/lib/www/mechanize/chain/request_resolver.rb +0 -32
  131. data/lib/www/mechanize/chain/response_body_parser.rb +0 -40
  132. data/lib/www/mechanize/chain/response_header_handler.rb +0 -50
  133. data/lib/www/mechanize/chain/response_reader.rb +0 -41
  134. data/lib/www/mechanize/chain/ssl_resolver.rb +0 -42
  135. data/lib/www/mechanize/chain/uri_resolver.rb +0 -77
  136. data/lib/www/mechanize/content_type_error.rb +0 -16
  137. data/lib/www/mechanize/cookie.rb +0 -72
  138. data/lib/www/mechanize/cookie_jar.rb +0 -191
  139. data/lib/www/mechanize/file.rb +0 -73
  140. data/lib/www/mechanize/file_response.rb +0 -62
  141. data/lib/www/mechanize/file_saver.rb +0 -39
  142. data/lib/www/mechanize/form.rb +0 -360
  143. data/lib/www/mechanize/form/button.rb +0 -8
  144. data/lib/www/mechanize/form/check_box.rb +0 -13
  145. data/lib/www/mechanize/form/field.rb +0 -28
  146. data/lib/www/mechanize/form/file_upload.rb +0 -24
  147. data/lib/www/mechanize/form/image_button.rb +0 -23
  148. data/lib/www/mechanize/form/multi_select_list.rb +0 -69
  149. data/lib/www/mechanize/form/option.rb +0 -51
  150. data/lib/www/mechanize/form/radio_button.rb +0 -38
  151. data/lib/www/mechanize/form/select_list.rb +0 -45
  152. data/lib/www/mechanize/headers.rb +0 -12
  153. data/lib/www/mechanize/history.rb +0 -67
  154. data/lib/www/mechanize/inspect.rb +0 -90
  155. data/lib/www/mechanize/page.rb +0 -181
  156. data/lib/www/mechanize/page/base.rb +0 -10
  157. data/lib/www/mechanize/page/frame.rb +0 -22
  158. data/lib/www/mechanize/page/link.rb +0 -50
  159. data/lib/www/mechanize/page/meta.rb +0 -51
  160. data/lib/www/mechanize/pluggable_parsers.rb +0 -103
  161. data/lib/www/mechanize/redirect_limit_reached_error.rb +0 -18
  162. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +0 -20
  163. data/lib/www/mechanize/response_code_error.rb +0 -25
  164. data/lib/www/mechanize/unsupported_scheme_error.rb +0 -10
  165. data/lib/www/mechanize/util.rb +0 -76
data/test/test_select.rb CHANGED
@@ -2,7 +2,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
3
  class SelectTest < Test::Unit::TestCase
4
4
  def setup
5
- @agent = WWW::Mechanize.new
5
+ @agent = Mechanize.new
6
6
  @page = @agent.get("http://localhost/form_select.html")
7
7
  @form = @page.forms.first
8
8
  end
@@ -18,7 +18,7 @@ class SelectTest < Test::Unit::TestCase
18
18
  end
19
19
 
20
20
  def test_correct_class
21
- assert_instance_of(WWW::Mechanize::Form::SelectList,
21
+ assert_instance_of(Mechanize::Form::SelectList,
22
22
  @form.field_with(:name => 'list'))
23
23
  end
24
24
 
@@ -1,7 +1,7 @@
1
1
  require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
  class SelectAllTest < Test::Unit::TestCase
3
3
  def setup
4
- @agent = WWW::Mechanize.new
4
+ @agent = Mechanize.new
5
5
  @page = @agent.get("http://localhost/form_select_all.html")
6
6
  @form = @page.forms.first
7
7
  end
@@ -1,7 +1,7 @@
1
1
  require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
  class SelectNoneTest < Test::Unit::TestCase
3
3
  def setup
4
- @agent = WWW::Mechanize.new
4
+ @agent = Mechanize.new
5
5
  @page = @agent.get("http://localhost/form_select_none.html")
6
6
  @form = @page.forms.first
7
7
  end
@@ -2,7 +2,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
3
  class SelectNoOptionsTest < Test::Unit::TestCase
4
4
  def setup
5
- @agent = WWW::Mechanize.new
5
+ @agent = Mechanize.new
6
6
  @page = @agent.get("http://localhost/form_select_noopts.html")
7
7
  @form = @page.forms.first
8
8
  end
@@ -2,7 +2,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
3
  class TestSetFields < Test::Unit::TestCase
4
4
  def setup
5
- @agent = WWW::Mechanize.new
5
+ @agent = Mechanize.new
6
6
  @page = @agent.get("http://localhost/form_set_fields.html")
7
7
  @form = @page.forms.first
8
8
  end
@@ -2,7 +2,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
3
  class SSLServerTest < Test::Unit::TestCase
4
4
  def setup
5
- @agent = WWW::Mechanize.new
5
+ @agent = Mechanize.new
6
6
  end
7
7
 
8
8
  def test_ssl_request
@@ -2,7 +2,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
3
  class TestSubclass < Test::Unit::TestCase
4
4
  def setup
5
- @agent = WWW::Mechanize.new
5
+ @agent = Mechanize.new
6
6
  end
7
7
 
8
8
  def test_send_cookie
@@ -2,7 +2,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
3
  class TestTextArea < Test::Unit::TestCase
4
4
  def setup
5
- @agent = WWW::Mechanize.new
5
+ @agent = Mechanize.new
6
6
  @page = @agent.get("http://localhost/tc_textarea.html")
7
7
  end
8
8
 
data/test/test_upload.rb CHANGED
@@ -2,7 +2,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
3
  class UploadMechTest < Test::Unit::TestCase
4
4
  def setup
5
- @agent = WWW::Mechanize.new
5
+ @agent = Mechanize.new
6
6
  @page = @agent.get("http://localhost/file_upload.html")
7
7
  end
8
8
 
data/test/test_verbs.rb CHANGED
@@ -2,7 +2,7 @@ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
2
 
3
3
  class VerbsTest < Test::Unit::TestCase
4
4
  def setup
5
- @agent = WWW::Mechanize.new
5
+ @agent = Mechanize.new
6
6
  end
7
7
 
8
8
  def test_put
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tenderlove-mechanize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3.20090623142847
4
+ version: 0.9.3.20090911221705
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aaron Patterson
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-06-23 00:00:00 -07:00
13
+ date: 2009-09-11 00:00:00 -07:00
14
14
  default_executable:
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -31,7 +31,7 @@ dependencies:
31
31
  requirements:
32
32
  - - ">="
33
33
  - !ruby/object:Gem::Version
34
- version: 2.2.0
34
+ version: 2.3.3
35
35
  version:
36
36
  description: The Mechanize library is used for automating interaction with websites. Mechanize automatically stores and sends cookies, follows redirects, can follow links, and submit forms. Form fields can be populated and submitted. Mechanize also keeps track of the sites that you have visited as a history.
37
37
  email:
@@ -64,57 +64,60 @@ files:
64
64
  - examples/rubyforge.rb
65
65
  - examples/spider.rb
66
66
  - lib/mechanize.rb
67
- - lib/www/mechanize.rb
68
- - lib/www/mechanize/chain.rb
69
- - lib/www/mechanize/chain/auth_headers.rb
70
- - lib/www/mechanize/chain/body_decoding_handler.rb
71
- - lib/www/mechanize/chain/connection_resolver.rb
72
- - lib/www/mechanize/chain/custom_headers.rb
73
- - lib/www/mechanize/chain/handler.rb
74
- - lib/www/mechanize/chain/header_resolver.rb
75
- - lib/www/mechanize/chain/parameter_resolver.rb
76
- - lib/www/mechanize/chain/post_connect_hook.rb
77
- - lib/www/mechanize/chain/pre_connect_hook.rb
78
- - lib/www/mechanize/chain/request_resolver.rb
79
- - lib/www/mechanize/chain/response_body_parser.rb
80
- - lib/www/mechanize/chain/response_header_handler.rb
81
- - lib/www/mechanize/chain/response_reader.rb
82
- - lib/www/mechanize/chain/ssl_resolver.rb
83
- - lib/www/mechanize/chain/uri_resolver.rb
84
- - lib/www/mechanize/content_type_error.rb
85
- - lib/www/mechanize/cookie.rb
86
- - lib/www/mechanize/cookie_jar.rb
87
- - lib/www/mechanize/file.rb
88
- - lib/www/mechanize/file_response.rb
89
- - lib/www/mechanize/file_saver.rb
90
- - lib/www/mechanize/form.rb
91
- - lib/www/mechanize/form/button.rb
92
- - lib/www/mechanize/form/check_box.rb
93
- - lib/www/mechanize/form/field.rb
94
- - lib/www/mechanize/form/file_upload.rb
95
- - lib/www/mechanize/form/image_button.rb
96
- - lib/www/mechanize/form/multi_select_list.rb
97
- - lib/www/mechanize/form/option.rb
98
- - lib/www/mechanize/form/radio_button.rb
99
- - lib/www/mechanize/form/select_list.rb
100
- - lib/www/mechanize/headers.rb
101
- - lib/www/mechanize/history.rb
102
- - lib/www/mechanize/inspect.rb
103
- - lib/www/mechanize/monkey_patch.rb
104
- - lib/www/mechanize/page.rb
105
- - lib/www/mechanize/page/base.rb
106
- - lib/www/mechanize/page/frame.rb
107
- - lib/www/mechanize/page/link.rb
108
- - lib/www/mechanize/page/meta.rb
109
- - lib/www/mechanize/pluggable_parsers.rb
110
- - lib/www/mechanize/redirect_limit_reached_error.rb
111
- - lib/www/mechanize/redirect_not_get_or_head_error.rb
112
- - lib/www/mechanize/response_code_error.rb
113
- - lib/www/mechanize/unsupported_scheme_error.rb
114
- - lib/www/mechanize/util.rb
67
+ - lib/mechanize/chain.rb
68
+ - lib/mechanize/chain/auth_headers.rb
69
+ - lib/mechanize/chain/body_decoding_handler.rb
70
+ - lib/mechanize/chain/connection_resolver.rb
71
+ - lib/mechanize/chain/custom_headers.rb
72
+ - lib/mechanize/chain/handler.rb
73
+ - lib/mechanize/chain/header_resolver.rb
74
+ - lib/mechanize/chain/parameter_resolver.rb
75
+ - lib/mechanize/chain/post_connect_hook.rb
76
+ - lib/mechanize/chain/pre_connect_hook.rb
77
+ - lib/mechanize/chain/request_resolver.rb
78
+ - lib/mechanize/chain/response_body_parser.rb
79
+ - lib/mechanize/chain/response_header_handler.rb
80
+ - lib/mechanize/chain/response_reader.rb
81
+ - lib/mechanize/chain/ssl_resolver.rb
82
+ - lib/mechanize/chain/uri_resolver.rb
83
+ - lib/mechanize/content_type_error.rb
84
+ - lib/mechanize/cookie.rb
85
+ - lib/mechanize/cookie_jar.rb
86
+ - lib/mechanize/file.rb
87
+ - lib/mechanize/file_response.rb
88
+ - lib/mechanize/file_saver.rb
89
+ - lib/mechanize/form.rb
90
+ - lib/mechanize/form/button.rb
91
+ - lib/mechanize/form/check_box.rb
92
+ - lib/mechanize/form/field.rb
93
+ - lib/mechanize/form/file_upload.rb
94
+ - lib/mechanize/form/image_button.rb
95
+ - lib/mechanize/form/multi_select_list.rb
96
+ - lib/mechanize/form/option.rb
97
+ - lib/mechanize/form/radio_button.rb
98
+ - lib/mechanize/form/select_list.rb
99
+ - lib/mechanize/headers.rb
100
+ - lib/mechanize/history.rb
101
+ - lib/mechanize/inspect.rb
102
+ - lib/mechanize/monkey_patch.rb
103
+ - lib/mechanize/page.rb
104
+ - lib/mechanize/page/base.rb
105
+ - lib/mechanize/page/frame.rb
106
+ - lib/mechanize/page/image.rb
107
+ - lib/mechanize/page/label.rb
108
+ - lib/mechanize/page/link.rb
109
+ - lib/mechanize/page/meta.rb
110
+ - lib/mechanize/pluggable_parsers.rb
111
+ - lib/mechanize/redirect_limit_reached_error.rb
112
+ - lib/mechanize/redirect_not_get_or_head_error.rb
113
+ - lib/mechanize/response_code_error.rb
114
+ - lib/mechanize/unsupported_scheme_error.rb
115
+ - lib/mechanize/util.rb
115
116
  - mechanize.gemspec
116
117
  - test/chain/test_argument_validator.rb
118
+ - test/chain/test_auth_headers.rb
117
119
  - test/chain/test_custom_headers.rb
120
+ - test/chain/test_header_resolver.rb
118
121
  - test/chain/test_parameter_resolver.rb
119
122
  - test/chain/test_request_resolver.rb
120
123
  - test/chain/test_response_reader.rb
@@ -148,9 +151,11 @@ files:
148
151
  - test/htdocs/meta_cookie.html
149
152
  - test/htdocs/no_title_test.html
150
153
  - test/htdocs/relative/tc_relative_links.html
154
+ - test/htdocs/tc_bad_charset.html
151
155
  - test/htdocs/tc_bad_links.html
152
156
  - test/htdocs/tc_base_link.html
153
157
  - test/htdocs/tc_blank_form.html
158
+ - test/htdocs/tc_charset.html
154
159
  - test/htdocs/tc_checkboxes.html
155
160
  - test/htdocs/tc_encoded_links.html
156
161
  - test/htdocs/tc_follow_meta.html
@@ -162,6 +167,7 @@ files:
162
167
  - test/htdocs/tc_referer.html
163
168
  - test/htdocs/tc_relative_links.html
164
169
  - test/htdocs/tc_textarea.html
170
+ - test/htdocs/test_bad_encoding.html
165
171
  - test/htdocs/unusual______.html
166
172
  - test/servlets.rb
167
173
  - test/ssl_server.rb
@@ -193,6 +199,7 @@ files:
193
199
  - test/test_links.rb
194
200
  - test/test_mech.rb
195
201
  - test/test_mechanize_file.rb
202
+ - test/test_meta.rb
196
203
  - test/test_multi_select.rb
197
204
  - test/test_no_attributes.rb
198
205
  - test/test_option.rb
@@ -219,11 +226,9 @@ files:
219
226
  - test/test_textarea.rb
220
227
  - test/test_upload.rb
221
228
  - test/test_verbs.rb
222
- - test/chain/test_auth_headers.rb
223
- - test/chain/test_header_resolver.rb
224
- - test/test_meta.rb
225
229
  has_rdoc: false
226
230
  homepage: http://mechanize.rubyforge.org/
231
+ licenses:
227
232
  post_install_message:
228
233
  rdoc_options:
229
234
  - --main
@@ -245,10 +250,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
245
250
  requirements: []
246
251
 
247
252
  rubyforge_project: mechanize
248
- rubygems_version: 1.2.0
253
+ rubygems_version: 1.3.5
249
254
  signing_key:
250
255
  specification_version: 3
251
- summary: Mechanize provides automated web-browsing
256
+ summary: The Mechanize library is used for automating interaction with websites
252
257
  test_files:
253
258
  - test/chain/test_argument_validator.rb
254
259
  - test/chain/test_auth_headers.rb
data/lib/www/mechanize.rb DELETED
@@ -1,619 +0,0 @@
1
- require 'net/http'
2
- require 'net/https'
3
- require 'uri'
4
- require 'webrick/httputils'
5
- require 'zlib'
6
- require 'stringio'
7
- require 'digest/md5'
8
- require 'fileutils'
9
- require 'nokogiri'
10
- require 'forwardable'
11
- require 'iconv'
12
- require 'nkf'
13
-
14
- require 'www/mechanize/util'
15
- require 'www/mechanize/content_type_error'
16
- require 'www/mechanize/response_code_error'
17
- require 'www/mechanize/unsupported_scheme_error'
18
- require 'www/mechanize/redirect_limit_reached_error'
19
- require 'www/mechanize/redirect_not_get_or_head_error'
20
- require 'www/mechanize/cookie'
21
- require 'www/mechanize/cookie_jar'
22
- require 'www/mechanize/history'
23
- require 'www/mechanize/form'
24
- require 'www/mechanize/pluggable_parsers'
25
- require 'www/mechanize/file_response'
26
- require 'www/mechanize/inspect'
27
- require 'www/mechanize/chain'
28
- require 'www/mechanize/monkey_patch'
29
-
30
- module WWW
31
- # = Synopsis
32
- # The Mechanize library is used for automating interaction with a website. It
33
- # can follow links, and submit forms. Form fields can be populated and
34
- # submitted. A history of URL's is maintained and can be queried.
35
- #
36
- # == Example
37
- # require 'rubygems'
38
- # require 'mechanize'
39
- # require 'logger'
40
- #
41
- # agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
42
- # agent.user_agent_alias = 'Mac Safari'
43
- # page = agent.get("http://www.google.com/")
44
- # search_form = page.form_with(:name => "f")
45
- # search_form.field_with(:name => "q").value = "Hello"
46
- # search_results = agent.submit(search_form)
47
- # puts search_results.body
48
- class Mechanize
49
- ##
50
- # The version of Mechanize you are using.
51
- VERSION = '0.9.3'
52
-
53
- ##
54
- # User Agent aliases
55
- AGENT_ALIASES = {
56
- 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
57
- 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
58
- 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
59
- 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
60
- 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
61
- 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
62
- 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
63
- 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
64
- 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
65
- 'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
66
- }
67
-
68
- attr_accessor :cookie_jar
69
- attr_accessor :open_timeout, :read_timeout
70
- attr_accessor :user_agent
71
- attr_accessor :watch_for_set
72
- attr_accessor :ca_file
73
- attr_accessor :key
74
- attr_accessor :cert
75
- attr_accessor :pass
76
- attr_accessor :redirect_ok
77
- attr_accessor :keep_alive_time
78
- attr_accessor :keep_alive
79
- attr_accessor :conditional_requests
80
- attr_accessor :follow_meta_refresh
81
- attr_accessor :verify_callback
82
- attr_accessor :history_added
83
- attr_accessor :scheme_handlers
84
- attr_accessor :redirection_limit
85
-
86
- # A hash of custom request headers
87
- attr_accessor :request_headers
88
-
89
- # The HTML parser to be used when parsing documents
90
- attr_accessor :html_parser
91
-
92
- attr_reader :history
93
- attr_reader :pluggable_parser
94
-
95
- alias :follow_redirect? :redirect_ok
96
-
97
- @html_parser = Nokogiri::HTML
98
- class << self; attr_accessor :html_parser, :log end
99
-
100
- def initialize
101
- # attr_accessors
102
- @cookie_jar = CookieJar.new
103
- @log = nil
104
- @open_timeout = nil
105
- @read_timeout = nil
106
- @user_agent = AGENT_ALIASES['Mechanize']
107
- @watch_for_set = nil
108
- @history_added = nil
109
- @ca_file = nil # OpenSSL server certificate file
110
-
111
- # callback for OpenSSL errors while verifying the server certificate
112
- # chain, can be used for debugging or to ignore errors by always
113
- # returning _true_
114
- @verify_callback = nil
115
- @cert = nil # OpenSSL Certificate
116
- @key = nil # OpenSSL Private Key
117
- @pass = nil # OpenSSL Password
118
- @redirect_ok = true # Should we follow redirects?
119
-
120
- # attr_readers
121
- @history = WWW::Mechanize::History.new
122
- @pluggable_parser = PluggableParser.new
123
-
124
- # Auth variables
125
- @user = nil # Auth User
126
- @password = nil # Auth Password
127
- @digest = nil # DigestAuth Digest
128
- @auth_hash = {} # Keep track of urls for sending auth
129
- @request_headers= {} # A hash of request headers to be used
130
-
131
- # Proxy settings
132
- @proxy_addr = nil
133
- @proxy_pass = nil
134
- @proxy_port = nil
135
- @proxy_user = nil
136
-
137
- @conditional_requests = true
138
-
139
- @follow_meta_refresh = false
140
- @redirection_limit = 20
141
-
142
- # Connection Cache & Keep alive
143
- @connection_cache = {}
144
- @keep_alive_time = 300
145
- @keep_alive = true
146
-
147
- @scheme_handlers = Hash.new { |h,k|
148
- h[k] = lambda { |link, page|
149
- raise UnsupportedSchemeError.new(k)
150
- }
151
- }
152
- @scheme_handlers['http'] = lambda { |link, page| link }
153
- @scheme_handlers['https'] = @scheme_handlers['http']
154
- @scheme_handlers['relative'] = @scheme_handlers['http']
155
- @scheme_handlers['file'] = @scheme_handlers['http']
156
-
157
- @pre_connect_hook = Chain::PreConnectHook.new
158
- @post_connect_hook = Chain::PostConnectHook.new
159
-
160
- @html_parser = self.class.html_parser
161
-
162
- yield self if block_given?
163
- end
164
-
165
- def max_history=(length); @history.max_size = length end
166
- def max_history; @history.max_size end
167
- def log=(l); self.class.log = l end
168
- def log; self.class.log end
169
-
170
- def pre_connect_hooks
171
- @pre_connect_hook.hooks
172
- end
173
-
174
- def post_connect_hooks
175
- @post_connect_hook.hooks
176
- end
177
-
178
- # Sets the proxy address, port, user, and password
179
- # +addr+ should be a host, with no "http://"
180
- def set_proxy(addr, port, user = nil, pass = nil)
181
- @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
182
- end
183
-
184
- # Set the user agent for the Mechanize object.
185
- # See AGENT_ALIASES
186
- def user_agent_alias=(al)
187
- self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
188
- end
189
-
190
- # Returns a list of cookies stored in the cookie jar.
191
- def cookies
192
- @cookie_jar.to_a
193
- end
194
-
195
- # Sets the user and password to be used for authentication.
196
- def auth(user, password)
197
- @user = user
198
- @password = password
199
- end
200
- alias :basic_auth :auth
201
-
202
- # Fetches the URL passed in and returns a page.
203
- def get(options, parameters = [], referer = nil)
204
- unless options.is_a? Hash
205
- url = options
206
- unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
207
- referer = parameters
208
- parameters = []
209
- end
210
- else
211
- raise ArgumentError.new("url must be specified") unless url = options[:url]
212
- parameters = options[:params] || []
213
- referer = options[:referer]
214
- headers = options[:headers]
215
- end
216
-
217
- unless referer
218
- if url.to_s =~ /^http/
219
- referer = Page.new(nil, {'content-type'=>'text/html'})
220
- else
221
- referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
222
- end
223
- end
224
-
225
- # FIXME: Huge hack so that using a URI as a referer works. I need to
226
- # refactor everything to pass around URIs but still support
227
- # WWW::Mechanize::Page#base
228
- unless referer.is_a?(WWW::Mechanize::File)
229
- referer = referer.is_a?(String) ?
230
- Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
231
- Page.new(referer, {'content-type' => 'text/html'})
232
- end
233
-
234
- # fetch the page
235
- page = fetch_page( :uri => url,
236
- :referer => referer,
237
- :headers => headers || {},
238
- :params => parameters
239
- )
240
- add_to_history(page)
241
- yield page if block_given?
242
- page
243
- end
244
-
245
- ####
246
- # PUT to +url+ with +entity+, and setting +options+:
247
- #
248
- # put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
249
- #
250
- def put(url, entity, options = {})
251
- request_with_entity(:put, url, entity, options)
252
- end
253
-
254
- ####
255
- # DELETE to +url+ with +query_params+, and setting +options+:
256
- #
257
- # delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
258
- #
259
- def delete(url, query_params = {}, options = {})
260
- page = head(url, query_params, options.merge({:verb => :delete}))
261
- add_to_history(page)
262
- page
263
- end
264
-
265
- ####
266
- # HEAD to +url+ with +query_params+, and setting +options+:
267
- #
268
- # head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
269
- #
270
- def head(url, query_params = {}, options = {})
271
- options = {
272
- :uri => url,
273
- :headers => {},
274
- :params => query_params,
275
- :verb => :head
276
- }.merge(options)
277
- # fetch the page
278
- page = fetch_page(options)
279
- yield page if block_given?
280
- page
281
- end
282
-
283
- # Fetch a file and return the contents of the file.
284
- def get_file(url)
285
- get(url).body
286
- end
287
-
288
- # Clicks the WWW::Mechanize::Link object passed in and returns the
289
- # page fetched.
290
- def click(link)
291
- referer = link.page rescue referer = nil
292
- href = link.respond_to?(:href) ? link.href :
293
- (link['href'] || link['src'])
294
- get(:url => href, :referer => (referer || current_page()))
295
- end
296
-
297
- # Equivalent to the browser back button. Returns the most recent page
298
- # visited.
299
- def back
300
- @history.pop
301
- end
302
-
303
- # Posts to the given URL with the request entity. The request
304
- # entity is specified by either a string, or a list of key-value
305
- # pairs represented by a hash or an array of arrays.
306
- #
307
- # Examples:
308
- # agent.post('http://example.com/', "foo" => "bar")
309
- #
310
- # agent.post('http://example.com/', [ ["foo", "bar"] ])
311
- #
312
- # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
313
- def post(url, query={}, headers={})
314
- if query.is_a?(String)
315
- return request_with_entity(:post, url, query, :headers => headers)
316
- end
317
- node = {}
318
- # Create a fake form
319
- class << node
320
- def search(*args); []; end
321
- end
322
- node['method'] = 'POST'
323
- node['enctype'] = 'application/x-www-form-urlencoded'
324
-
325
- form = Form.new(node)
326
- query.each { |k,v|
327
- if v.is_a?(IO)
328
- form.enctype = 'multipart/form-data'
329
- ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
330
- ul.file_data = v.read
331
- form.file_uploads << ul
332
- else
333
- form.fields << Form::Field.new(k.to_s,v)
334
- end
335
- }
336
- post_form(url, form, headers)
337
- end
338
-
339
- # Submit a form with an optional button.
340
- # Without a button:
341
- # page = agent.get('http://example.com')
342
- # agent.submit(page.forms.first)
343
- # With a button
344
- # agent.submit(page.forms.first, page.forms.first.buttons.first)
345
- def submit(form, button=nil, headers={})
346
- form.add_button_to_query(button) if button
347
- case form.method.upcase
348
- when 'POST'
349
- post_form(form.action, form, headers)
350
- when 'GET'
351
- get( :url => form.action.gsub(/\?[^\?]*$/, ''),
352
- :params => form.build_query,
353
- :headers => headers,
354
- :referer => form.page
355
- )
356
- else
357
- raise "unsupported method: #{form.method.upcase}"
358
- end
359
- end
360
-
361
- def request_with_entity(verb, url, entity, options={})
362
- cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
363
-
364
- options = {
365
- :uri => url,
366
- :referer => cur_page,
367
- :headers => {},
368
- }.update(options)
369
-
370
- headers = {
371
- 'Content-Type' => 'application/octet-stream',
372
- 'Content-Length' => entity.size.to_s,
373
- }.update(options[:headers])
374
-
375
- options.update({
376
- :verb => verb,
377
- :params => [entity],
378
- :headers => headers,
379
- })
380
-
381
- page = fetch_page(options)
382
- add_to_history(page)
383
- page
384
- end
385
-
386
- # Returns the current page loaded by Mechanize
387
- def current_page
388
- @history.last
389
- end
390
-
391
- # Returns whether or not a url has been visited
392
- def visited?(url)
393
- ! visited_page(url).nil?
394
- end
395
-
396
- # Returns a visited page for the url passed in, otherwise nil
397
- def visited_page(url)
398
- if url.respond_to? :href
399
- url = url.href
400
- end
401
- @history.visited_page(resolve(url))
402
- end
403
-
404
- # Runs given block, then resets the page history as it was before. self is
405
- # given as a parameter to the block. Returns the value of the block.
406
- def transact
407
- history_backup = @history.dup
408
- begin
409
- yield self
410
- ensure
411
- @history = history_backup
412
- end
413
- end
414
-
415
- alias :page :current_page
416
-
417
- private
418
-
419
- def resolve(url, referer = current_page())
420
- hash = { :uri => url, :referer => referer }
421
- chain = Chain.new([
422
- Chain::URIResolver.new(@scheme_handlers)
423
- ]).handle(hash)
424
- hash[:uri].to_s
425
- end
426
-
427
- def post_form(url, form, headers = {})
428
- cur_page = form.page || current_page ||
429
- Page.new( nil, {'content-type'=>'text/html'})
430
-
431
- request_data = form.request_data
432
-
433
- log.debug("query: #{ request_data.inspect }") if log
434
-
435
- # fetch the page
436
- page = fetch_page( :uri => url,
437
- :referer => cur_page,
438
- :verb => :post,
439
- :params => [request_data],
440
- :headers => {
441
- 'Content-Type' => form.enctype,
442
- 'Content-Length' => request_data.size.to_s,
443
- }.merge(headers))
444
- add_to_history(page)
445
- page
446
- end
447
-
448
- # uri is an absolute URI
449
- def fetch_page(params)
450
- options = {
451
- :request => nil,
452
- :response => nil,
453
- :connection => nil,
454
- :referer => current_page(),
455
- :uri => nil,
456
- :verb => :get,
457
- :agent => self,
458
- :redirects => 0,
459
- :params => [],
460
- :headers => {},
461
- }.merge(params)
462
-
463
- before_connect = Chain.new([
464
- Chain::URIResolver.new(@scheme_handlers),
465
- Chain::ParameterResolver.new,
466
- Chain::RequestResolver.new,
467
- Chain::ConnectionResolver.new(
468
- @connection_cache,
469
- @keep_alive,
470
- @proxy_addr,
471
- @proxy_port,
472
- @proxy_user,
473
- @proxy_pass
474
- ),
475
- Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
476
- Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
477
- Chain::HeaderResolver.new(
478
- @keep_alive,
479
- @keep_alive_time,
480
- @cookie_jar,
481
- @user_agent,
482
- @request_headers
483
- ),
484
- Chain::CustomHeaders.new,
485
- @pre_connect_hook,
486
- ])
487
- before_connect.handle(options)
488
-
489
- uri = options[:uri]
490
- request = options[:request]
491
- cur_page = options[:referer]
492
- request_data = options[:params]
493
- redirects = options[:redirects]
494
- http_obj = options[:connection]
495
-
496
- # Add If-Modified-Since if page is in history
497
- if( (page = visited_page(uri)) && page.response['Last-Modified'] )
498
- request['If-Modified-Since'] = page.response['Last-Modified']
499
- end if(@conditional_requests)
500
-
501
- # Specify timeouts if given
502
- http_obj.open_timeout = @open_timeout if @open_timeout
503
- http_obj.read_timeout = @read_timeout if @read_timeout
504
- http_obj.start unless http_obj.started?
505
-
506
- # Log specified headers for the request
507
- log.info("#{ request.class }: #{ request.path }") if log
508
- request.each_header do |k, v|
509
- log.debug("request-header: #{ k } => #{ v }")
510
- end if log
511
-
512
- # Send the request
513
- attempts = 0
514
- begin
515
- response = http_obj.request(request, *request_data) { |r|
516
- connection_chain = Chain.new([
517
- Chain::ResponseReader.new(r),
518
- Chain::BodyDecodingHandler.new,
519
- ])
520
- connection_chain.handle(options)
521
- }
522
- rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
523
- log.error("Rescuing EOF error") if log
524
- http_obj.finish
525
- raise x if attempts >= 2
526
- request.body = nil
527
- http_obj.start
528
- attempts += 1
529
- retry
530
- end
531
-
532
- after_connect = Chain.new([
533
- @post_connect_hook,
534
- Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
535
- Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
536
- ])
537
- after_connect.handle(options)
538
-
539
- res_klass = options[:res_klass]
540
- response_body = options[:response_body]
541
- page = options[:page]
542
-
543
- log.info("status: #{ page.code }") if log
544
-
545
- if follow_meta_refresh
546
- redirect_uri = nil
547
- referer = page
548
- if (page.respond_to?(:meta) && (redirect = page.meta.first))
549
- redirect_uri = redirect.uri.to_s
550
- sleep redirect.node['delay'].to_f
551
- referer = Page.new(nil, {'content-type'=>'text/html'})
552
- elsif refresh = response['refresh']
553
- delay, redirect_uri = Page::Meta.parse(refresh, uri)
554
- raise StandardError, "Invalid refresh http header" unless delay
555
- if redirects + 1 > redirection_limit
556
- raise RedirectLimitReachedError.new(page, redirects)
557
- end
558
- sleep delay.to_f
559
- end
560
- if redirect_uri
561
- @history.push(page, page.uri)
562
- return fetch_page(
563
- :uri => redirect_uri,
564
- :referer => referer,
565
- :params => [],
566
- :verb => :get,
567
- :redirects => redirects + 1
568
- )
569
- end
570
- end
571
-
572
- return page if res_klass <= Net::HTTPSuccess
573
-
574
- if res_klass == Net::HTTPNotModified
575
- log.debug("Got cached page") if log
576
- return visited_page(uri) || page
577
- elsif res_klass <= Net::HTTPRedirection
578
- return page unless follow_redirect?
579
- log.info("follow redirect to: #{ response['Location'] }") if log
580
- from_uri = page.uri
581
- raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
582
- redirect_verb = options[:verb] == :head ? :head : :get
583
- page = fetch_page( :uri => response['Location'].to_s,
584
- :referer => page,
585
- :params => [],
586
- :verb => redirect_verb,
587
- :redirects => redirects + 1
588
- )
589
- @history.push(page, from_uri)
590
- return page
591
- elsif res_klass <= Net::HTTPUnauthorized
592
- raise ResponseCodeError.new(page) unless @user || @password
593
- raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
594
- if response['www-authenticate'] =~ /Digest/i
595
- @auth_hash[uri.host] = :digest
596
- if response['server'] =~ /Microsoft-IIS/
597
- @auth_hash[uri.host] = :iis_digest
598
- end
599
- @digest = response['www-authenticate']
600
- else
601
- @auth_hash[uri.host] = :basic
602
- end
603
- return fetch_page( :uri => uri,
604
- :referer => cur_page,
605
- :verb => request.method.downcase.to_sym,
606
- :params => request_data,
607
- :headers => options[:headers]
608
- )
609
- end
610
-
611
- raise ResponseCodeError.new(page), "Unhandled response", caller
612
- end
613
-
614
- def add_to_history(page)
615
- @history.push(page, resolve(page.uri))
616
- history_added.call(page) if history_added
617
- end
618
- end
619
- end