tenderlove-mechanize 0.9.3.20090623142847 → 0.9.3.20090911221705

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. data/Manifest.txt +55 -48
  2. data/Rakefile +12 -22
  3. data/lib/mechanize.rb +618 -4
  4. data/lib/mechanize/chain.rb +33 -0
  5. data/lib/mechanize/chain/auth_headers.rb +78 -0
  6. data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
  7. data/lib/mechanize/chain/connection_resolver.rb +76 -0
  8. data/lib/mechanize/chain/custom_headers.rb +21 -0
  9. data/lib/{www/mechanize → mechanize}/chain/handler.rb +1 -1
  10. data/lib/mechanize/chain/header_resolver.rb +51 -0
  11. data/lib/mechanize/chain/parameter_resolver.rb +22 -0
  12. data/lib/{www/mechanize → mechanize}/chain/post_connect_hook.rb +0 -0
  13. data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
  14. data/lib/mechanize/chain/request_resolver.rb +30 -0
  15. data/lib/mechanize/chain/response_body_parser.rb +38 -0
  16. data/lib/mechanize/chain/response_header_handler.rb +48 -0
  17. data/lib/mechanize/chain/response_reader.rb +39 -0
  18. data/lib/mechanize/chain/ssl_resolver.rb +40 -0
  19. data/lib/mechanize/chain/uri_resolver.rb +75 -0
  20. data/lib/mechanize/content_type_error.rb +14 -0
  21. data/lib/mechanize/cookie.rb +70 -0
  22. data/lib/mechanize/cookie_jar.rb +188 -0
  23. data/lib/mechanize/file.rb +71 -0
  24. data/lib/mechanize/file_response.rb +60 -0
  25. data/lib/mechanize/file_saver.rb +37 -0
  26. data/lib/mechanize/form.rb +378 -0
  27. data/lib/mechanize/form/button.rb +9 -0
  28. data/lib/mechanize/form/check_box.rb +11 -0
  29. data/lib/mechanize/form/field.rb +30 -0
  30. data/lib/mechanize/form/file_upload.rb +22 -0
  31. data/lib/mechanize/form/image_button.rb +21 -0
  32. data/lib/mechanize/form/multi_select_list.rb +67 -0
  33. data/lib/mechanize/form/option.rb +49 -0
  34. data/lib/mechanize/form/radio_button.rb +49 -0
  35. data/lib/mechanize/form/select_list.rb +43 -0
  36. data/lib/mechanize/headers.rb +11 -0
  37. data/lib/mechanize/history.rb +65 -0
  38. data/lib/mechanize/inspect.rb +88 -0
  39. data/lib/{www/mechanize → mechanize}/monkey_patch.rb +4 -6
  40. data/lib/mechanize/page.rb +206 -0
  41. data/lib/mechanize/page/base.rb +8 -0
  42. data/lib/mechanize/page/frame.rb +20 -0
  43. data/lib/mechanize/page/image.rb +26 -0
  44. data/lib/mechanize/page/label.rb +20 -0
  45. data/lib/mechanize/page/link.rb +48 -0
  46. data/lib/mechanize/page/meta.rb +50 -0
  47. data/lib/mechanize/pluggable_parsers.rb +101 -0
  48. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  49. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
  50. data/lib/mechanize/response_code_error.rb +22 -0
  51. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  52. data/lib/mechanize/util.rb +67 -0
  53. data/mechanize.gemspec +8 -8
  54. data/test/chain/test_argument_validator.rb +2 -2
  55. data/test/chain/test_auth_headers.rb +2 -2
  56. data/test/chain/test_custom_headers.rb +2 -2
  57. data/test/chain/test_header_resolver.rb +3 -3
  58. data/test/chain/test_parameter_resolver.rb +4 -4
  59. data/test/chain/test_request_resolver.rb +4 -4
  60. data/test/chain/test_response_reader.rb +3 -3
  61. data/test/helper.rb +1 -1
  62. data/test/htdocs/tc_bad_charset.html +9 -0
  63. data/test/htdocs/tc_charset.html +6 -0
  64. data/test/htdocs/test_bad_encoding.html +52 -0
  65. data/test/test_authenticate.rb +3 -3
  66. data/test/test_bad_links.rb +1 -1
  67. data/test/test_blank_form.rb +1 -1
  68. data/test/test_checkboxes.rb +1 -1
  69. data/test/test_content_type.rb +2 -2
  70. data/test/test_cookie_class.rb +12 -12
  71. data/test/test_cookie_jar.rb +13 -13
  72. data/test/test_cookies.rb +1 -1
  73. data/test/test_encoded_links.rb +1 -1
  74. data/test/test_errors.rb +2 -2
  75. data/test/test_follow_meta.rb +3 -3
  76. data/test/test_form_action.rb +1 -1
  77. data/test/test_form_as_hash.rb +1 -1
  78. data/test/test_form_button.rb +2 -2
  79. data/test/test_form_no_inputname.rb +1 -1
  80. data/test/test_forms.rb +1 -1
  81. data/test/test_frames.rb +1 -1
  82. data/test/test_get_headers.rb +1 -1
  83. data/test/test_gzipping.rb +2 -2
  84. data/test/test_hash_api.rb +1 -1
  85. data/test/test_history.rb +7 -7
  86. data/test/test_history_added.rb +1 -1
  87. data/test/test_html_unscape_forms.rb +7 -7
  88. data/test/test_if_modified_since.rb +1 -1
  89. data/test/test_keep_alive.rb +1 -1
  90. data/test/test_links.rb +2 -2
  91. data/test/test_mech.rb +2 -2
  92. data/test/test_mechanize_file.rb +7 -7
  93. data/test/test_meta.rb +2 -2
  94. data/test/test_multi_select.rb +1 -1
  95. data/test/test_no_attributes.rb +1 -1
  96. data/test/test_option.rb +1 -1
  97. data/test/test_page.rb +3 -3
  98. data/test/test_pluggable_parser.rb +14 -14
  99. data/test/test_post_form.rb +1 -1
  100. data/test/test_pretty_print.rb +2 -2
  101. data/test/test_radiobutton.rb +1 -1
  102. data/test/test_redirect_limit_reached.rb +1 -3
  103. data/test/test_redirect_verb_handling.rb +1 -3
  104. data/test/test_referer.rb +1 -1
  105. data/test/test_relative_links.rb +1 -1
  106. data/test/test_request.rb +1 -1
  107. data/test/test_response_code.rb +3 -3
  108. data/test/test_save_file.rb +3 -3
  109. data/test/test_scheme.rb +3 -3
  110. data/test/test_select.rb +2 -2
  111. data/test/test_select_all.rb +1 -1
  112. data/test/test_select_none.rb +1 -1
  113. data/test/test_select_noopts.rb +1 -1
  114. data/test/test_set_fields.rb +1 -1
  115. data/test/test_ssl_server.rb +1 -1
  116. data/test/test_subclass.rb +1 -1
  117. data/test/test_textarea.rb +1 -1
  118. data/test/test_upload.rb +1 -1
  119. data/test/test_verbs.rb +1 -1
  120. metadata +61 -56
  121. data/lib/www/mechanize.rb +0 -619
  122. data/lib/www/mechanize/chain.rb +0 -34
  123. data/lib/www/mechanize/chain/auth_headers.rb +0 -80
  124. data/lib/www/mechanize/chain/body_decoding_handler.rb +0 -48
  125. data/lib/www/mechanize/chain/connection_resolver.rb +0 -78
  126. data/lib/www/mechanize/chain/custom_headers.rb +0 -23
  127. data/lib/www/mechanize/chain/header_resolver.rb +0 -53
  128. data/lib/www/mechanize/chain/parameter_resolver.rb +0 -24
  129. data/lib/www/mechanize/chain/pre_connect_hook.rb +0 -22
  130. data/lib/www/mechanize/chain/request_resolver.rb +0 -32
  131. data/lib/www/mechanize/chain/response_body_parser.rb +0 -40
  132. data/lib/www/mechanize/chain/response_header_handler.rb +0 -50
  133. data/lib/www/mechanize/chain/response_reader.rb +0 -41
  134. data/lib/www/mechanize/chain/ssl_resolver.rb +0 -42
  135. data/lib/www/mechanize/chain/uri_resolver.rb +0 -77
  136. data/lib/www/mechanize/content_type_error.rb +0 -16
  137. data/lib/www/mechanize/cookie.rb +0 -72
  138. data/lib/www/mechanize/cookie_jar.rb +0 -191
  139. data/lib/www/mechanize/file.rb +0 -73
  140. data/lib/www/mechanize/file_response.rb +0 -62
  141. data/lib/www/mechanize/file_saver.rb +0 -39
  142. data/lib/www/mechanize/form.rb +0 -360
  143. data/lib/www/mechanize/form/button.rb +0 -8
  144. data/lib/www/mechanize/form/check_box.rb +0 -13
  145. data/lib/www/mechanize/form/field.rb +0 -28
  146. data/lib/www/mechanize/form/file_upload.rb +0 -24
  147. data/lib/www/mechanize/form/image_button.rb +0 -23
  148. data/lib/www/mechanize/form/multi_select_list.rb +0 -69
  149. data/lib/www/mechanize/form/option.rb +0 -51
  150. data/lib/www/mechanize/form/radio_button.rb +0 -38
  151. data/lib/www/mechanize/form/select_list.rb +0 -45
  152. data/lib/www/mechanize/headers.rb +0 -12
  153. data/lib/www/mechanize/history.rb +0 -67
  154. data/lib/www/mechanize/inspect.rb +0 -90
  155. data/lib/www/mechanize/page.rb +0 -181
  156. data/lib/www/mechanize/page/base.rb +0 -10
  157. data/lib/www/mechanize/page/frame.rb +0 -22
  158. data/lib/www/mechanize/page/link.rb +0 -50
  159. data/lib/www/mechanize/page/meta.rb +0 -51
  160. data/lib/www/mechanize/pluggable_parsers.rb +0 -103
  161. data/lib/www/mechanize/redirect_limit_reached_error.rb +0 -18
  162. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +0 -20
  163. data/lib/www/mechanize/response_code_error.rb +0 -25
  164. data/lib/www/mechanize/unsupported_scheme_error.rb +0 -10
  165. data/lib/www/mechanize/util.rb +0 -76
data/Manifest.txt CHANGED
@@ -12,57 +12,60 @@ examples/proxy_req.rb
12
12
  examples/rubyforge.rb
13
13
  examples/spider.rb
14
14
  lib/mechanize.rb
15
- lib/www/mechanize.rb
16
- lib/www/mechanize/chain.rb
17
- lib/www/mechanize/chain/auth_headers.rb
18
- lib/www/mechanize/chain/body_decoding_handler.rb
19
- lib/www/mechanize/chain/connection_resolver.rb
20
- lib/www/mechanize/chain/custom_headers.rb
21
- lib/www/mechanize/chain/handler.rb
22
- lib/www/mechanize/chain/header_resolver.rb
23
- lib/www/mechanize/chain/parameter_resolver.rb
24
- lib/www/mechanize/chain/post_connect_hook.rb
25
- lib/www/mechanize/chain/pre_connect_hook.rb
26
- lib/www/mechanize/chain/request_resolver.rb
27
- lib/www/mechanize/chain/response_body_parser.rb
28
- lib/www/mechanize/chain/response_header_handler.rb
29
- lib/www/mechanize/chain/response_reader.rb
30
- lib/www/mechanize/chain/ssl_resolver.rb
31
- lib/www/mechanize/chain/uri_resolver.rb
32
- lib/www/mechanize/content_type_error.rb
33
- lib/www/mechanize/cookie.rb
34
- lib/www/mechanize/cookie_jar.rb
35
- lib/www/mechanize/file.rb
36
- lib/www/mechanize/file_response.rb
37
- lib/www/mechanize/file_saver.rb
38
- lib/www/mechanize/form.rb
39
- lib/www/mechanize/form/button.rb
40
- lib/www/mechanize/form/check_box.rb
41
- lib/www/mechanize/form/field.rb
42
- lib/www/mechanize/form/file_upload.rb
43
- lib/www/mechanize/form/image_button.rb
44
- lib/www/mechanize/form/multi_select_list.rb
45
- lib/www/mechanize/form/option.rb
46
- lib/www/mechanize/form/radio_button.rb
47
- lib/www/mechanize/form/select_list.rb
48
- lib/www/mechanize/headers.rb
49
- lib/www/mechanize/history.rb
50
- lib/www/mechanize/inspect.rb
51
- lib/www/mechanize/monkey_patch.rb
52
- lib/www/mechanize/page.rb
53
- lib/www/mechanize/page/base.rb
54
- lib/www/mechanize/page/frame.rb
55
- lib/www/mechanize/page/link.rb
56
- lib/www/mechanize/page/meta.rb
57
- lib/www/mechanize/pluggable_parsers.rb
58
- lib/www/mechanize/redirect_limit_reached_error.rb
59
- lib/www/mechanize/redirect_not_get_or_head_error.rb
60
- lib/www/mechanize/response_code_error.rb
61
- lib/www/mechanize/unsupported_scheme_error.rb
62
- lib/www/mechanize/util.rb
15
+ lib/mechanize/chain.rb
16
+ lib/mechanize/chain/auth_headers.rb
17
+ lib/mechanize/chain/body_decoding_handler.rb
18
+ lib/mechanize/chain/connection_resolver.rb
19
+ lib/mechanize/chain/custom_headers.rb
20
+ lib/mechanize/chain/handler.rb
21
+ lib/mechanize/chain/header_resolver.rb
22
+ lib/mechanize/chain/parameter_resolver.rb
23
+ lib/mechanize/chain/post_connect_hook.rb
24
+ lib/mechanize/chain/pre_connect_hook.rb
25
+ lib/mechanize/chain/request_resolver.rb
26
+ lib/mechanize/chain/response_body_parser.rb
27
+ lib/mechanize/chain/response_header_handler.rb
28
+ lib/mechanize/chain/response_reader.rb
29
+ lib/mechanize/chain/ssl_resolver.rb
30
+ lib/mechanize/chain/uri_resolver.rb
31
+ lib/mechanize/content_type_error.rb
32
+ lib/mechanize/cookie.rb
33
+ lib/mechanize/cookie_jar.rb
34
+ lib/mechanize/file.rb
35
+ lib/mechanize/file_response.rb
36
+ lib/mechanize/file_saver.rb
37
+ lib/mechanize/form.rb
38
+ lib/mechanize/form/button.rb
39
+ lib/mechanize/form/check_box.rb
40
+ lib/mechanize/form/field.rb
41
+ lib/mechanize/form/file_upload.rb
42
+ lib/mechanize/form/image_button.rb
43
+ lib/mechanize/form/multi_select_list.rb
44
+ lib/mechanize/form/option.rb
45
+ lib/mechanize/form/radio_button.rb
46
+ lib/mechanize/form/select_list.rb
47
+ lib/mechanize/headers.rb
48
+ lib/mechanize/history.rb
49
+ lib/mechanize/inspect.rb
50
+ lib/mechanize/monkey_patch.rb
51
+ lib/mechanize/page.rb
52
+ lib/mechanize/page/base.rb
53
+ lib/mechanize/page/frame.rb
54
+ lib/mechanize/page/image.rb
55
+ lib/mechanize/page/label.rb
56
+ lib/mechanize/page/link.rb
57
+ lib/mechanize/page/meta.rb
58
+ lib/mechanize/pluggable_parsers.rb
59
+ lib/mechanize/redirect_limit_reached_error.rb
60
+ lib/mechanize/redirect_not_get_or_head_error.rb
61
+ lib/mechanize/response_code_error.rb
62
+ lib/mechanize/unsupported_scheme_error.rb
63
+ lib/mechanize/util.rb
63
64
  mechanize.gemspec
64
65
  test/chain/test_argument_validator.rb
66
+ test/chain/test_auth_headers.rb
65
67
  test/chain/test_custom_headers.rb
68
+ test/chain/test_header_resolver.rb
66
69
  test/chain/test_parameter_resolver.rb
67
70
  test/chain/test_request_resolver.rb
68
71
  test/chain/test_response_reader.rb
@@ -96,9 +99,11 @@ test/htdocs/link with space.html
96
99
  test/htdocs/meta_cookie.html
97
100
  test/htdocs/no_title_test.html
98
101
  test/htdocs/relative/tc_relative_links.html
102
+ test/htdocs/tc_bad_charset.html
99
103
  test/htdocs/tc_bad_links.html
100
104
  test/htdocs/tc_base_link.html
101
105
  test/htdocs/tc_blank_form.html
106
+ test/htdocs/tc_charset.html
102
107
  test/htdocs/tc_checkboxes.html
103
108
  test/htdocs/tc_encoded_links.html
104
109
  test/htdocs/tc_follow_meta.html
@@ -110,6 +115,7 @@ test/htdocs/tc_radiobuttons.html
110
115
  test/htdocs/tc_referer.html
111
116
  test/htdocs/tc_relative_links.html
112
117
  test/htdocs/tc_textarea.html
118
+ test/htdocs/test_bad_encoding.html
113
119
  test/htdocs/unusual______.html
114
120
  test/servlets.rb
115
121
  test/ssl_server.rb
@@ -141,6 +147,7 @@ test/test_keep_alive.rb
141
147
  test/test_links.rb
142
148
  test/test_mech.rb
143
149
  test/test_mechanize_file.rb
150
+ test/test_meta.rb
144
151
  test/test_multi_select.rb
145
152
  test/test_no_attributes.rb
146
153
  test/test_option.rb
data/Rakefile CHANGED
@@ -1,17 +1,14 @@
1
1
  require 'rubygems'
2
2
  require 'hoe'
3
3
 
4
- $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
5
- require 'mechanize'
4
+ Hoe.spec 'mechanize' do
5
+ developer 'Aaron Patterson', 'aaronp@rubyforge.org'
6
+ developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
6
7
 
7
- HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
8
- p.developer('Aaron Patterson','aaronp@rubyforge.org')
9
- p.developer('Mike Dalessio','mike.dalessio@gmail.com')
10
- p.readme_file = 'README.rdoc'
11
- p.history_file = 'CHANGELOG.rdoc'
12
- p.extra_rdoc_files = FileList['*.rdoc']
13
- p.summary = "Mechanize provides automated web-browsing"
14
- p.extra_deps = [['nokogiri', '>= 1.2.1']]
8
+ self.readme_file = 'README.rdoc'
9
+ self.history_file = 'CHANGELOG.rdoc'
10
+ self.extra_rdoc_files += Dir['*.rdoc']
11
+ self.extra_deps << ['nokogiri', '>= 1.2.1']
15
12
  end
16
13
 
17
14
  desc "Update SSL Certificate"
@@ -26,18 +23,11 @@ task('ssl_cert') do |p|
26
23
  sh "rm server.key.org"
27
24
  end
28
25
 
29
- namespace :gem do
30
- desc 'Generate a gem spec'
31
- task :spec do
32
- File.open("#{HOE.name}.gemspec", 'w') do |f|
33
- HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
34
- f.write(HOE.spec.to_ruby)
35
- end
26
+ desc 'Generate a gem spec'
27
+ task "gem:spec" do
28
+ File.open("mechanize.gemspec", 'w') do |f|
29
+ now = Time.now.strftime("%Y%m%d%H%M%S")
30
+ f.write `rake debug_gem`.sub(/(s.version = ".*)(")/) { "#{$1}.#{now}#{$2}" }
36
31
  end
37
32
  end
38
33
 
39
- desc "Run code-coverage analysis"
40
- task :coverage do
41
- rm_rf "coverage"
42
- sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
43
- end
data/lib/mechanize.rb CHANGED
@@ -1,7 +1,621 @@
1
- # Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
2
- # Copyright (c) 2007 by Aaron Patterson (aaronp@rubyforge.org)
1
+ require 'net/http'
2
+ require 'net/https'
3
+ require 'uri'
4
+ require 'webrick/httputils'
5
+ require 'zlib'
6
+ require 'stringio'
7
+ require 'digest/md5'
8
+ require 'fileutils'
9
+ require 'nokogiri'
10
+ require 'forwardable'
11
+ require 'iconv'
12
+ require 'nkf'
13
+
14
+ require 'mechanize/util'
15
+ require 'mechanize/content_type_error'
16
+ require 'mechanize/response_code_error'
17
+ require 'mechanize/unsupported_scheme_error'
18
+ require 'mechanize/redirect_limit_reached_error'
19
+ require 'mechanize/redirect_not_get_or_head_error'
20
+ require 'mechanize/cookie'
21
+ require 'mechanize/cookie_jar'
22
+ require 'mechanize/history'
23
+ require 'mechanize/form'
24
+ require 'mechanize/pluggable_parsers'
25
+ require 'mechanize/file_response'
26
+ require 'mechanize/inspect'
27
+ require 'mechanize/chain'
28
+ require 'mechanize/monkey_patch'
29
+
30
+ # = Synopsis
31
+ # The Mechanize library is used for automating interaction with a website. It
32
+ # can follow links, and submit forms. Form fields can be populated and
33
+ # submitted. A history of URL's is maintained and can be queried.
34
+ #
35
+ # == Example
36
+ # require 'rubygems'
37
+ # require 'mechanize'
38
+ # require 'logger'
3
39
  #
4
- # Please see the LICENSE file for licensing.
40
+ # agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
41
+ # agent.user_agent_alias = 'Mac Safari'
42
+ # page = agent.get("http://www.google.com/")
43
+ # search_form = page.form_with(:name => "f")
44
+ # search_form.field_with(:name => "q").value = "Hello"
45
+ # search_results = agent.submit(search_form)
46
+ # puts search_results.body
47
+ class Mechanize
48
+ ##
49
+ # The version of Mechanize you are using.
50
+ VERSION = '0.9.3'
51
+
52
+ ##
53
+ # User Agent aliases
54
+ AGENT_ALIASES = {
55
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
56
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
57
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
58
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
59
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
60
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
61
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
62
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
63
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
64
+ 'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
65
+ }
66
+
67
+ attr_accessor :cookie_jar
68
+ attr_accessor :open_timeout, :read_timeout
69
+ attr_accessor :user_agent
70
+ attr_accessor :watch_for_set
71
+ attr_accessor :ca_file
72
+ attr_accessor :key
73
+ attr_accessor :cert
74
+ attr_accessor :pass
75
+ attr_accessor :redirect_ok
76
+ attr_accessor :keep_alive_time
77
+ attr_accessor :keep_alive
78
+ attr_accessor :conditional_requests
79
+ attr_accessor :follow_meta_refresh
80
+ attr_accessor :verify_callback
81
+ attr_accessor :history_added
82
+ attr_accessor :scheme_handlers
83
+ attr_accessor :redirection_limit
84
+
85
+ # A hash of custom request headers
86
+ attr_accessor :request_headers
87
+
88
+ # The HTML parser to be used when parsing documents
89
+ attr_accessor :html_parser
90
+
91
+ attr_reader :history
92
+ attr_reader :pluggable_parser
93
+
94
+ alias :follow_redirect? :redirect_ok
95
+
96
+ @html_parser = Nokogiri::HTML
97
+ class << self; attr_accessor :html_parser, :log end
98
+
99
+ def initialize
100
+ # attr_accessors
101
+ @cookie_jar = CookieJar.new
102
+ @log = nil
103
+ @open_timeout = nil
104
+ @read_timeout = nil
105
+ @user_agent = AGENT_ALIASES['Mechanize']
106
+ @watch_for_set = nil
107
+ @history_added = nil
108
+ @ca_file = nil # OpenSSL server certificate file
109
+
110
+ # callback for OpenSSL errors while verifying the server certificate
111
+ # chain, can be used for debugging or to ignore errors by always
112
+ # returning _true_
113
+ @verify_callback = nil
114
+ @cert = nil # OpenSSL Certificate
115
+ @key = nil # OpenSSL Private Key
116
+ @pass = nil # OpenSSL Password
117
+ @redirect_ok = true # Should we follow redirects?
118
+
119
+ # attr_readers
120
+ @history = Mechanize::History.new
121
+ @pluggable_parser = PluggableParser.new
122
+
123
+ # Auth variables
124
+ @user = nil # Auth User
125
+ @password = nil # Auth Password
126
+ @digest = nil # DigestAuth Digest
127
+ @auth_hash = {} # Keep track of urls for sending auth
128
+ @request_headers= {} # A hash of request headers to be used
129
+
130
+ # Proxy settings
131
+ @proxy_addr = nil
132
+ @proxy_pass = nil
133
+ @proxy_port = nil
134
+ @proxy_user = nil
135
+
136
+ @conditional_requests = true
137
+
138
+ @follow_meta_refresh = false
139
+ @redirection_limit = 20
140
+
141
+ # Connection Cache & Keep alive
142
+ @connection_cache = {}
143
+ @keep_alive_time = 300
144
+ @keep_alive = true
145
+
146
+ @scheme_handlers = Hash.new { |h,k|
147
+ h[k] = lambda { |link, page|
148
+ raise UnsupportedSchemeError.new(k)
149
+ }
150
+ }
151
+ @scheme_handlers['http'] = lambda { |link, page| link }
152
+ @scheme_handlers['https'] = @scheme_handlers['http']
153
+ @scheme_handlers['relative'] = @scheme_handlers['http']
154
+ @scheme_handlers['file'] = @scheme_handlers['http']
155
+
156
+ @pre_connect_hook = Chain::PreConnectHook.new
157
+ @post_connect_hook = Chain::PostConnectHook.new
158
+
159
+ @html_parser = self.class.html_parser
160
+
161
+ yield self if block_given?
162
+ end
163
+
164
+ def max_history=(length); @history.max_size = length end
165
+ def max_history; @history.max_size end
166
+ def log=(l); self.class.log = l end
167
+ def log; self.class.log end
168
+
169
+ def pre_connect_hooks
170
+ @pre_connect_hook.hooks
171
+ end
172
+
173
+ def post_connect_hooks
174
+ @post_connect_hook.hooks
175
+ end
176
+
177
+ # Sets the proxy address, port, user, and password
178
+ # +addr+ should be a host, with no "http://"
179
+ def set_proxy(addr, port, user = nil, pass = nil)
180
+ @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
181
+ end
182
+
183
+ # Set the user agent for the Mechanize object.
184
+ # See AGENT_ALIASES
185
+ def user_agent_alias=(al)
186
+ self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
187
+ end
188
+
189
+ # Returns a list of cookies stored in the cookie jar.
190
+ def cookies
191
+ @cookie_jar.to_a
192
+ end
193
+
194
+ # Sets the user and password to be used for authentication.
195
+ def auth(user, password)
196
+ @user = user
197
+ @password = password
198
+ end
199
+ alias :basic_auth :auth
200
+
201
+ # Fetches the URL passed in and returns a page.
202
+ def get(options, parameters = [], referer = nil)
203
+ unless options.is_a? Hash
204
+ url = options
205
+ unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
206
+ referer = parameters
207
+ parameters = []
208
+ end
209
+ else
210
+ raise ArgumentError.new("url must be specified") unless url = options[:url]
211
+ parameters = options[:params] || []
212
+ referer = options[:referer]
213
+ headers = options[:headers]
214
+ end
215
+
216
+ unless referer
217
+ if url.to_s =~ /^http/
218
+ referer = Page.new(nil, {'content-type'=>'text/html'})
219
+ else
220
+ referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
221
+ end
222
+ end
223
+
224
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
225
+ # refactor everything to pass around URIs but still support
226
+ # Mechanize::Page#base
227
+ unless referer.is_a?(Mechanize::File)
228
+ referer = referer.is_a?(String) ?
229
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
230
+ Page.new(referer, {'content-type' => 'text/html'})
231
+ end
232
+
233
+ # fetch the page
234
+ page = fetch_page( :uri => url,
235
+ :referer => referer,
236
+ :headers => headers || {},
237
+ :params => parameters
238
+ )
239
+ add_to_history(page)
240
+ yield page if block_given?
241
+ page
242
+ end
243
+
244
+ ####
245
+ # PUT to +url+ with +entity+, and setting +options+:
246
+ #
247
+ # put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
248
+ #
249
+ def put(url, entity, options = {})
250
+ request_with_entity(:put, url, entity, options)
251
+ end
252
+
253
+ ####
254
+ # DELETE to +url+ with +query_params+, and setting +options+:
255
+ #
256
+ # delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
257
+ #
258
+ def delete(url, query_params = {}, options = {})
259
+ page = head(url, query_params, options.merge({:verb => :delete}))
260
+ add_to_history(page)
261
+ page
262
+ end
263
+
264
+ ####
265
+ # HEAD to +url+ with +query_params+, and setting +options+:
266
+ #
267
+ # head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
268
+ #
269
+ def head(url, query_params = {}, options = {})
270
+ options = {
271
+ :uri => url,
272
+ :headers => {},
273
+ :params => query_params,
274
+ :verb => :head
275
+ }.merge(options)
276
+ # fetch the page
277
+ page = fetch_page(options)
278
+ yield page if block_given?
279
+ page
280
+ end
281
+
282
+ # Fetch a file and return the contents of the file.
283
+ def get_file(url)
284
+ get(url).body
285
+ end
286
+
287
+ # Clicks the Mechanize::Link object passed in and returns the
288
+ # page fetched.
289
+ def click(link)
290
+ referer = link.page rescue referer = nil
291
+ href = link.respond_to?(:href) ? link.href :
292
+ (link['href'] || link['src'])
293
+ get(:url => href, :referer => (referer || current_page()))
294
+ end
295
+
296
+ # Equivalent to the browser back button. Returns the most recent page
297
+ # visited.
298
+ def back
299
+ @history.pop
300
+ end
301
+
302
+ # Posts to the given URL with the request entity. The request
303
+ # entity is specified by either a string, or a list of key-value
304
+ # pairs represented by a hash or an array of arrays.
305
+ #
306
+ # Examples:
307
+ # agent.post('http://example.com/', "foo" => "bar")
308
+ #
309
+ # agent.post('http://example.com/', [ ["foo", "bar"] ])
310
+ #
311
+ # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
312
+ def post(url, query={}, headers={})
313
+ if query.is_a?(String)
314
+ return request_with_entity(:post, url, query, :headers => headers)
315
+ end
316
+ node = {}
317
+ # Create a fake form
318
+ class << node
319
+ def search(*args); []; end
320
+ end
321
+ node['method'] = 'POST'
322
+ node['enctype'] = 'application/x-www-form-urlencoded'
323
+
324
+ form = Form.new(node)
325
+ query.each { |k,v|
326
+ if v.is_a?(IO)
327
+ form.enctype = 'multipart/form-data'
328
+ ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
329
+ ul.file_data = v.read
330
+ form.file_uploads << ul
331
+ else
332
+ form.fields << Form::Field.new(k.to_s,v)
333
+ end
334
+ }
335
+ post_form(url, form, headers)
336
+ end
337
+
338
+ # Submit a form with an optional button.
339
+ # Without a button:
340
+ # page = agent.get('http://example.com')
341
+ # agent.submit(page.forms.first)
342
+ # With a button
343
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
344
+ def submit(form, button=nil, headers={})
345
+ form.add_button_to_query(button) if button
346
+ case form.method.upcase
347
+ when 'POST'
348
+ post_form(form.action, form, headers)
349
+ when 'GET'
350
+ get( :url => form.action.gsub(/\?[^\?]*$/, ''),
351
+ :params => form.build_query,
352
+ :headers => headers,
353
+ :referer => form.page
354
+ )
355
+ else
356
+ raise "unsupported method: #{form.method.upcase}"
357
+ end
358
+ end
359
+
360
+ def request_with_entity(verb, url, entity, options={})
361
+ cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
362
+
363
+ options = {
364
+ :uri => url,
365
+ :referer => cur_page,
366
+ :headers => {},
367
+ }.update(options)
368
+
369
+ headers = {
370
+ 'Content-Type' => 'application/octet-stream',
371
+ 'Content-Length' => entity.size.to_s,
372
+ }.update(options[:headers])
373
+
374
+ options.update({
375
+ :verb => verb,
376
+ :params => [entity],
377
+ :headers => headers,
378
+ })
379
+
380
+ page = fetch_page(options)
381
+ add_to_history(page)
382
+ page
383
+ end
384
+
385
+ # Returns the current page loaded by Mechanize
386
+ def current_page
387
+ @history.last
388
+ end
389
+
390
+ # Returns whether or not a url has been visited
391
+ def visited?(url)
392
+ ! visited_page(url).nil?
393
+ end
394
+
395
+ # Returns a visited page for the url passed in, otherwise nil
396
+ def visited_page(url)
397
+ if url.respond_to? :href
398
+ url = url.href
399
+ end
400
+ @history.visited_page(resolve(url))
401
+ end
402
+
403
+ # Runs given block, then resets the page history as it was before. self is
404
+ # given as a parameter to the block. Returns the value of the block.
405
+ def transact
406
+ history_backup = @history.dup
407
+ begin
408
+ yield self
409
+ ensure
410
+ @history = history_backup
411
+ end
412
+ end
413
+
414
+ alias :page :current_page
415
+
416
+ private
417
+
418
+ def resolve(url, referer = current_page())
419
+ hash = { :uri => url, :referer => referer }
420
+ chain = Chain.new([
421
+ Chain::URIResolver.new(@scheme_handlers)
422
+ ]).handle(hash)
423
+ hash[:uri].to_s
424
+ end
425
+
426
+ def post_form(url, form, headers = {})
427
+ cur_page = form.page || current_page ||
428
+ Page.new( nil, {'content-type'=>'text/html'})
429
+
430
+ request_data = form.request_data
431
+
432
+ log.debug("query: #{ request_data.inspect }") if log
433
+
434
+ # fetch the page
435
+ page = fetch_page( :uri => url,
436
+ :referer => cur_page,
437
+ :verb => :post,
438
+ :params => [request_data],
439
+ :headers => {
440
+ 'Content-Type' => form.enctype,
441
+ 'Content-Length' => request_data.size.to_s,
442
+ }.merge(headers))
443
+ add_to_history(page)
444
+ page
445
+ end
446
+
447
+ # uri is an absolute URI
448
+ def fetch_page(params)
449
+ options = {
450
+ :request => nil,
451
+ :response => nil,
452
+ :connection => nil,
453
+ :referer => current_page(),
454
+ :uri => nil,
455
+ :verb => :get,
456
+ :agent => self,
457
+ :redirects => 0,
458
+ :params => [],
459
+ :headers => {},
460
+ }.merge(params)
461
+
462
+ before_connect = Chain.new([
463
+ Chain::URIResolver.new(@scheme_handlers),
464
+ Chain::ParameterResolver.new,
465
+ Chain::RequestResolver.new,
466
+ Chain::ConnectionResolver.new(
467
+ @connection_cache,
468
+ @keep_alive,
469
+ @proxy_addr,
470
+ @proxy_port,
471
+ @proxy_user,
472
+ @proxy_pass
473
+ ),
474
+ Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
475
+ Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
476
+ Chain::HeaderResolver.new(
477
+ @keep_alive,
478
+ @keep_alive_time,
479
+ @cookie_jar,
480
+ @user_agent,
481
+ @request_headers
482
+ ),
483
+ Chain::CustomHeaders.new,
484
+ @pre_connect_hook,
485
+ ])
486
+ before_connect.handle(options)
487
+
488
+ uri = options[:uri]
489
+ request = options[:request]
490
+ cur_page = options[:referer]
491
+ request_data = options[:params]
492
+ redirects = options[:redirects]
493
+ http_obj = options[:connection]
494
+
495
+ # Add If-Modified-Since if page is in history
496
+ if( (page = visited_page(uri)) && page.response['Last-Modified'] )
497
+ request['If-Modified-Since'] = page.response['Last-Modified']
498
+ end if(@conditional_requests)
499
+
500
+ # Specify timeouts if given
501
+ http_obj.open_timeout = @open_timeout if @open_timeout
502
+ http_obj.read_timeout = @read_timeout if @read_timeout
503
+ http_obj.start unless http_obj.started?
504
+
505
+ # Log specified headers for the request
506
+ log.info("#{ request.class }: #{ request.path }") if log
507
+ request.each_header do |k, v|
508
+ log.debug("request-header: #{ k } => #{ v }")
509
+ end if log
510
+
511
+ # Send the request
512
+ attempts = 0
513
+ begin
514
+ response = http_obj.request(request, *request_data) { |r|
515
+ connection_chain = Chain.new([
516
+ Chain::ResponseReader.new(r),
517
+ Chain::BodyDecodingHandler.new,
518
+ ])
519
+ connection_chain.handle(options)
520
+ }
521
+ rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
522
+ log.error("Rescuing EOF error") if log
523
+ http_obj.finish
524
+ raise x if attempts >= 2
525
+ request.body = nil
526
+ http_obj.start
527
+ attempts += 1
528
+ retry
529
+ end
530
+
531
+ after_connect = Chain.new([
532
+ @post_connect_hook,
533
+ Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
534
+ Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
535
+ ])
536
+ after_connect.handle(options)
537
+
538
+ res_klass = options[:res_klass]
539
+ response_body = options[:response_body]
540
+ page = options[:page]
541
+
542
+ log.info("status: #{ page.code }") if log
543
+
544
+ if follow_meta_refresh
545
+ redirect_uri = nil
546
+ referer = page
547
+ if (page.respond_to?(:meta) && (redirect = page.meta.first))
548
+ redirect_uri = redirect.uri.to_s
549
+ sleep redirect.node['delay'].to_f
550
+ referer = Page.new(nil, {'content-type'=>'text/html'})
551
+ elsif refresh = response['refresh']
552
+ delay, redirect_uri = Page::Meta.parse(refresh, uri)
553
+ raise StandardError, "Invalid refresh http header" unless delay
554
+ if redirects + 1 > redirection_limit
555
+ raise RedirectLimitReachedError.new(page, redirects)
556
+ end
557
+ sleep delay.to_f
558
+ end
559
+ if redirect_uri
560
+ @history.push(page, page.uri)
561
+ return fetch_page(
562
+ :uri => redirect_uri,
563
+ :referer => referer,
564
+ :params => [],
565
+ :verb => :get,
566
+ :redirects => redirects + 1
567
+ )
568
+ end
569
+ end
570
+
571
+ return page if res_klass <= Net::HTTPSuccess
572
+
573
+ if res_klass == Net::HTTPNotModified
574
+ log.debug("Got cached page") if log
575
+ return visited_page(uri) || page
576
+ elsif res_klass <= Net::HTTPRedirection
577
+ return page unless follow_redirect?
578
+ log.info("follow redirect to: #{ response['Location'] }") if log
579
+ from_uri = page.uri
580
+ raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
581
+ redirect_verb = options[:verb] == :head ? :head : :get
582
+ page = fetch_page( :uri => response['Location'].to_s,
583
+ :referer => page,
584
+ :params => [],
585
+ :verb => redirect_verb,
586
+ :redirects => redirects + 1
587
+ )
588
+ @history.push(page, from_uri)
589
+ return page
590
+ elsif res_klass <= Net::HTTPUnauthorized
591
+ raise ResponseCodeError.new(page) unless @user || @password
592
+ raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
593
+ if response['www-authenticate'] =~ /Digest/i
594
+ @auth_hash[uri.host] = :digest
595
+ if response['server'] =~ /Microsoft-IIS/
596
+ @auth_hash[uri.host] = :iis_digest
597
+ end
598
+ @digest = response['www-authenticate']
599
+ else
600
+ @auth_hash[uri.host] = :basic
601
+ end
602
+ return fetch_page( :uri => uri,
603
+ :referer => cur_page,
604
+ :verb => request.method.downcase.to_sym,
605
+ :params => request_data,
606
+ :headers => options[:headers]
607
+ )
608
+ end
609
+
610
+ raise ResponseCodeError.new(page), "Unhandled response", caller
611
+ end
612
+
613
+ def add_to_history(page)
614
+ @history.push(page, resolve(page.uri))
615
+ history_added.call(page) if history_added
616
+ end
617
+ end
5
618
 
619
+ module WWW; end
620
+ WWW::Mechanize = ::Mechanize
6
621
 
7
- require 'www/mechanize'