tenderlove-mechanize 0.9.3.20090623142847 → 0.9.3.20090911221705

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. data/Manifest.txt +55 -48
  2. data/Rakefile +12 -22
  3. data/lib/mechanize.rb +618 -4
  4. data/lib/mechanize/chain.rb +33 -0
  5. data/lib/mechanize/chain/auth_headers.rb +78 -0
  6. data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
  7. data/lib/mechanize/chain/connection_resolver.rb +76 -0
  8. data/lib/mechanize/chain/custom_headers.rb +21 -0
  9. data/lib/{www/mechanize → mechanize}/chain/handler.rb +1 -1
  10. data/lib/mechanize/chain/header_resolver.rb +51 -0
  11. data/lib/mechanize/chain/parameter_resolver.rb +22 -0
  12. data/lib/{www/mechanize → mechanize}/chain/post_connect_hook.rb +0 -0
  13. data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
  14. data/lib/mechanize/chain/request_resolver.rb +30 -0
  15. data/lib/mechanize/chain/response_body_parser.rb +38 -0
  16. data/lib/mechanize/chain/response_header_handler.rb +48 -0
  17. data/lib/mechanize/chain/response_reader.rb +39 -0
  18. data/lib/mechanize/chain/ssl_resolver.rb +40 -0
  19. data/lib/mechanize/chain/uri_resolver.rb +75 -0
  20. data/lib/mechanize/content_type_error.rb +14 -0
  21. data/lib/mechanize/cookie.rb +70 -0
  22. data/lib/mechanize/cookie_jar.rb +188 -0
  23. data/lib/mechanize/file.rb +71 -0
  24. data/lib/mechanize/file_response.rb +60 -0
  25. data/lib/mechanize/file_saver.rb +37 -0
  26. data/lib/mechanize/form.rb +378 -0
  27. data/lib/mechanize/form/button.rb +9 -0
  28. data/lib/mechanize/form/check_box.rb +11 -0
  29. data/lib/mechanize/form/field.rb +30 -0
  30. data/lib/mechanize/form/file_upload.rb +22 -0
  31. data/lib/mechanize/form/image_button.rb +21 -0
  32. data/lib/mechanize/form/multi_select_list.rb +67 -0
  33. data/lib/mechanize/form/option.rb +49 -0
  34. data/lib/mechanize/form/radio_button.rb +49 -0
  35. data/lib/mechanize/form/select_list.rb +43 -0
  36. data/lib/mechanize/headers.rb +11 -0
  37. data/lib/mechanize/history.rb +65 -0
  38. data/lib/mechanize/inspect.rb +88 -0
  39. data/lib/{www/mechanize → mechanize}/monkey_patch.rb +4 -6
  40. data/lib/mechanize/page.rb +206 -0
  41. data/lib/mechanize/page/base.rb +8 -0
  42. data/lib/mechanize/page/frame.rb +20 -0
  43. data/lib/mechanize/page/image.rb +26 -0
  44. data/lib/mechanize/page/label.rb +20 -0
  45. data/lib/mechanize/page/link.rb +48 -0
  46. data/lib/mechanize/page/meta.rb +50 -0
  47. data/lib/mechanize/pluggable_parsers.rb +101 -0
  48. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  49. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
  50. data/lib/mechanize/response_code_error.rb +22 -0
  51. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  52. data/lib/mechanize/util.rb +67 -0
  53. data/mechanize.gemspec +8 -8
  54. data/test/chain/test_argument_validator.rb +2 -2
  55. data/test/chain/test_auth_headers.rb +2 -2
  56. data/test/chain/test_custom_headers.rb +2 -2
  57. data/test/chain/test_header_resolver.rb +3 -3
  58. data/test/chain/test_parameter_resolver.rb +4 -4
  59. data/test/chain/test_request_resolver.rb +4 -4
  60. data/test/chain/test_response_reader.rb +3 -3
  61. data/test/helper.rb +1 -1
  62. data/test/htdocs/tc_bad_charset.html +9 -0
  63. data/test/htdocs/tc_charset.html +6 -0
  64. data/test/htdocs/test_bad_encoding.html +52 -0
  65. data/test/test_authenticate.rb +3 -3
  66. data/test/test_bad_links.rb +1 -1
  67. data/test/test_blank_form.rb +1 -1
  68. data/test/test_checkboxes.rb +1 -1
  69. data/test/test_content_type.rb +2 -2
  70. data/test/test_cookie_class.rb +12 -12
  71. data/test/test_cookie_jar.rb +13 -13
  72. data/test/test_cookies.rb +1 -1
  73. data/test/test_encoded_links.rb +1 -1
  74. data/test/test_errors.rb +2 -2
  75. data/test/test_follow_meta.rb +3 -3
  76. data/test/test_form_action.rb +1 -1
  77. data/test/test_form_as_hash.rb +1 -1
  78. data/test/test_form_button.rb +2 -2
  79. data/test/test_form_no_inputname.rb +1 -1
  80. data/test/test_forms.rb +1 -1
  81. data/test/test_frames.rb +1 -1
  82. data/test/test_get_headers.rb +1 -1
  83. data/test/test_gzipping.rb +2 -2
  84. data/test/test_hash_api.rb +1 -1
  85. data/test/test_history.rb +7 -7
  86. data/test/test_history_added.rb +1 -1
  87. data/test/test_html_unscape_forms.rb +7 -7
  88. data/test/test_if_modified_since.rb +1 -1
  89. data/test/test_keep_alive.rb +1 -1
  90. data/test/test_links.rb +2 -2
  91. data/test/test_mech.rb +2 -2
  92. data/test/test_mechanize_file.rb +7 -7
  93. data/test/test_meta.rb +2 -2
  94. data/test/test_multi_select.rb +1 -1
  95. data/test/test_no_attributes.rb +1 -1
  96. data/test/test_option.rb +1 -1
  97. data/test/test_page.rb +3 -3
  98. data/test/test_pluggable_parser.rb +14 -14
  99. data/test/test_post_form.rb +1 -1
  100. data/test/test_pretty_print.rb +2 -2
  101. data/test/test_radiobutton.rb +1 -1
  102. data/test/test_redirect_limit_reached.rb +1 -3
  103. data/test/test_redirect_verb_handling.rb +1 -3
  104. data/test/test_referer.rb +1 -1
  105. data/test/test_relative_links.rb +1 -1
  106. data/test/test_request.rb +1 -1
  107. data/test/test_response_code.rb +3 -3
  108. data/test/test_save_file.rb +3 -3
  109. data/test/test_scheme.rb +3 -3
  110. data/test/test_select.rb +2 -2
  111. data/test/test_select_all.rb +1 -1
  112. data/test/test_select_none.rb +1 -1
  113. data/test/test_select_noopts.rb +1 -1
  114. data/test/test_set_fields.rb +1 -1
  115. data/test/test_ssl_server.rb +1 -1
  116. data/test/test_subclass.rb +1 -1
  117. data/test/test_textarea.rb +1 -1
  118. data/test/test_upload.rb +1 -1
  119. data/test/test_verbs.rb +1 -1
  120. metadata +61 -56
  121. data/lib/www/mechanize.rb +0 -619
  122. data/lib/www/mechanize/chain.rb +0 -34
  123. data/lib/www/mechanize/chain/auth_headers.rb +0 -80
  124. data/lib/www/mechanize/chain/body_decoding_handler.rb +0 -48
  125. data/lib/www/mechanize/chain/connection_resolver.rb +0 -78
  126. data/lib/www/mechanize/chain/custom_headers.rb +0 -23
  127. data/lib/www/mechanize/chain/header_resolver.rb +0 -53
  128. data/lib/www/mechanize/chain/parameter_resolver.rb +0 -24
  129. data/lib/www/mechanize/chain/pre_connect_hook.rb +0 -22
  130. data/lib/www/mechanize/chain/request_resolver.rb +0 -32
  131. data/lib/www/mechanize/chain/response_body_parser.rb +0 -40
  132. data/lib/www/mechanize/chain/response_header_handler.rb +0 -50
  133. data/lib/www/mechanize/chain/response_reader.rb +0 -41
  134. data/lib/www/mechanize/chain/ssl_resolver.rb +0 -42
  135. data/lib/www/mechanize/chain/uri_resolver.rb +0 -77
  136. data/lib/www/mechanize/content_type_error.rb +0 -16
  137. data/lib/www/mechanize/cookie.rb +0 -72
  138. data/lib/www/mechanize/cookie_jar.rb +0 -191
  139. data/lib/www/mechanize/file.rb +0 -73
  140. data/lib/www/mechanize/file_response.rb +0 -62
  141. data/lib/www/mechanize/file_saver.rb +0 -39
  142. data/lib/www/mechanize/form.rb +0 -360
  143. data/lib/www/mechanize/form/button.rb +0 -8
  144. data/lib/www/mechanize/form/check_box.rb +0 -13
  145. data/lib/www/mechanize/form/field.rb +0 -28
  146. data/lib/www/mechanize/form/file_upload.rb +0 -24
  147. data/lib/www/mechanize/form/image_button.rb +0 -23
  148. data/lib/www/mechanize/form/multi_select_list.rb +0 -69
  149. data/lib/www/mechanize/form/option.rb +0 -51
  150. data/lib/www/mechanize/form/radio_button.rb +0 -38
  151. data/lib/www/mechanize/form/select_list.rb +0 -45
  152. data/lib/www/mechanize/headers.rb +0 -12
  153. data/lib/www/mechanize/history.rb +0 -67
  154. data/lib/www/mechanize/inspect.rb +0 -90
  155. data/lib/www/mechanize/page.rb +0 -181
  156. data/lib/www/mechanize/page/base.rb +0 -10
  157. data/lib/www/mechanize/page/frame.rb +0 -22
  158. data/lib/www/mechanize/page/link.rb +0 -50
  159. data/lib/www/mechanize/page/meta.rb +0 -51
  160. data/lib/www/mechanize/pluggable_parsers.rb +0 -103
  161. data/lib/www/mechanize/redirect_limit_reached_error.rb +0 -18
  162. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +0 -20
  163. data/lib/www/mechanize/response_code_error.rb +0 -25
  164. data/lib/www/mechanize/unsupported_scheme_error.rb +0 -10
  165. data/lib/www/mechanize/util.rb +0 -76
data/Manifest.txt CHANGED
@@ -12,57 +12,60 @@ examples/proxy_req.rb
12
12
  examples/rubyforge.rb
13
13
  examples/spider.rb
14
14
  lib/mechanize.rb
15
- lib/www/mechanize.rb
16
- lib/www/mechanize/chain.rb
17
- lib/www/mechanize/chain/auth_headers.rb
18
- lib/www/mechanize/chain/body_decoding_handler.rb
19
- lib/www/mechanize/chain/connection_resolver.rb
20
- lib/www/mechanize/chain/custom_headers.rb
21
- lib/www/mechanize/chain/handler.rb
22
- lib/www/mechanize/chain/header_resolver.rb
23
- lib/www/mechanize/chain/parameter_resolver.rb
24
- lib/www/mechanize/chain/post_connect_hook.rb
25
- lib/www/mechanize/chain/pre_connect_hook.rb
26
- lib/www/mechanize/chain/request_resolver.rb
27
- lib/www/mechanize/chain/response_body_parser.rb
28
- lib/www/mechanize/chain/response_header_handler.rb
29
- lib/www/mechanize/chain/response_reader.rb
30
- lib/www/mechanize/chain/ssl_resolver.rb
31
- lib/www/mechanize/chain/uri_resolver.rb
32
- lib/www/mechanize/content_type_error.rb
33
- lib/www/mechanize/cookie.rb
34
- lib/www/mechanize/cookie_jar.rb
35
- lib/www/mechanize/file.rb
36
- lib/www/mechanize/file_response.rb
37
- lib/www/mechanize/file_saver.rb
38
- lib/www/mechanize/form.rb
39
- lib/www/mechanize/form/button.rb
40
- lib/www/mechanize/form/check_box.rb
41
- lib/www/mechanize/form/field.rb
42
- lib/www/mechanize/form/file_upload.rb
43
- lib/www/mechanize/form/image_button.rb
44
- lib/www/mechanize/form/multi_select_list.rb
45
- lib/www/mechanize/form/option.rb
46
- lib/www/mechanize/form/radio_button.rb
47
- lib/www/mechanize/form/select_list.rb
48
- lib/www/mechanize/headers.rb
49
- lib/www/mechanize/history.rb
50
- lib/www/mechanize/inspect.rb
51
- lib/www/mechanize/monkey_patch.rb
52
- lib/www/mechanize/page.rb
53
- lib/www/mechanize/page/base.rb
54
- lib/www/mechanize/page/frame.rb
55
- lib/www/mechanize/page/link.rb
56
- lib/www/mechanize/page/meta.rb
57
- lib/www/mechanize/pluggable_parsers.rb
58
- lib/www/mechanize/redirect_limit_reached_error.rb
59
- lib/www/mechanize/redirect_not_get_or_head_error.rb
60
- lib/www/mechanize/response_code_error.rb
61
- lib/www/mechanize/unsupported_scheme_error.rb
62
- lib/www/mechanize/util.rb
15
+ lib/mechanize/chain.rb
16
+ lib/mechanize/chain/auth_headers.rb
17
+ lib/mechanize/chain/body_decoding_handler.rb
18
+ lib/mechanize/chain/connection_resolver.rb
19
+ lib/mechanize/chain/custom_headers.rb
20
+ lib/mechanize/chain/handler.rb
21
+ lib/mechanize/chain/header_resolver.rb
22
+ lib/mechanize/chain/parameter_resolver.rb
23
+ lib/mechanize/chain/post_connect_hook.rb
24
+ lib/mechanize/chain/pre_connect_hook.rb
25
+ lib/mechanize/chain/request_resolver.rb
26
+ lib/mechanize/chain/response_body_parser.rb
27
+ lib/mechanize/chain/response_header_handler.rb
28
+ lib/mechanize/chain/response_reader.rb
29
+ lib/mechanize/chain/ssl_resolver.rb
30
+ lib/mechanize/chain/uri_resolver.rb
31
+ lib/mechanize/content_type_error.rb
32
+ lib/mechanize/cookie.rb
33
+ lib/mechanize/cookie_jar.rb
34
+ lib/mechanize/file.rb
35
+ lib/mechanize/file_response.rb
36
+ lib/mechanize/file_saver.rb
37
+ lib/mechanize/form.rb
38
+ lib/mechanize/form/button.rb
39
+ lib/mechanize/form/check_box.rb
40
+ lib/mechanize/form/field.rb
41
+ lib/mechanize/form/file_upload.rb
42
+ lib/mechanize/form/image_button.rb
43
+ lib/mechanize/form/multi_select_list.rb
44
+ lib/mechanize/form/option.rb
45
+ lib/mechanize/form/radio_button.rb
46
+ lib/mechanize/form/select_list.rb
47
+ lib/mechanize/headers.rb
48
+ lib/mechanize/history.rb
49
+ lib/mechanize/inspect.rb
50
+ lib/mechanize/monkey_patch.rb
51
+ lib/mechanize/page.rb
52
+ lib/mechanize/page/base.rb
53
+ lib/mechanize/page/frame.rb
54
+ lib/mechanize/page/image.rb
55
+ lib/mechanize/page/label.rb
56
+ lib/mechanize/page/link.rb
57
+ lib/mechanize/page/meta.rb
58
+ lib/mechanize/pluggable_parsers.rb
59
+ lib/mechanize/redirect_limit_reached_error.rb
60
+ lib/mechanize/redirect_not_get_or_head_error.rb
61
+ lib/mechanize/response_code_error.rb
62
+ lib/mechanize/unsupported_scheme_error.rb
63
+ lib/mechanize/util.rb
63
64
  mechanize.gemspec
64
65
  test/chain/test_argument_validator.rb
66
+ test/chain/test_auth_headers.rb
65
67
  test/chain/test_custom_headers.rb
68
+ test/chain/test_header_resolver.rb
66
69
  test/chain/test_parameter_resolver.rb
67
70
  test/chain/test_request_resolver.rb
68
71
  test/chain/test_response_reader.rb
@@ -96,9 +99,11 @@ test/htdocs/link with space.html
96
99
  test/htdocs/meta_cookie.html
97
100
  test/htdocs/no_title_test.html
98
101
  test/htdocs/relative/tc_relative_links.html
102
+ test/htdocs/tc_bad_charset.html
99
103
  test/htdocs/tc_bad_links.html
100
104
  test/htdocs/tc_base_link.html
101
105
  test/htdocs/tc_blank_form.html
106
+ test/htdocs/tc_charset.html
102
107
  test/htdocs/tc_checkboxes.html
103
108
  test/htdocs/tc_encoded_links.html
104
109
  test/htdocs/tc_follow_meta.html
@@ -110,6 +115,7 @@ test/htdocs/tc_radiobuttons.html
110
115
  test/htdocs/tc_referer.html
111
116
  test/htdocs/tc_relative_links.html
112
117
  test/htdocs/tc_textarea.html
118
+ test/htdocs/test_bad_encoding.html
113
119
  test/htdocs/unusual______.html
114
120
  test/servlets.rb
115
121
  test/ssl_server.rb
@@ -141,6 +147,7 @@ test/test_keep_alive.rb
141
147
  test/test_links.rb
142
148
  test/test_mech.rb
143
149
  test/test_mechanize_file.rb
150
+ test/test_meta.rb
144
151
  test/test_multi_select.rb
145
152
  test/test_no_attributes.rb
146
153
  test/test_option.rb
data/Rakefile CHANGED
@@ -1,17 +1,14 @@
1
1
  require 'rubygems'
2
2
  require 'hoe'
3
3
 
4
- $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
5
- require 'mechanize'
4
+ Hoe.spec 'mechanize' do
5
+ developer 'Aaron Patterson', 'aaronp@rubyforge.org'
6
+ developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
6
7
 
7
- HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
8
- p.developer('Aaron Patterson','aaronp@rubyforge.org')
9
- p.developer('Mike Dalessio','mike.dalessio@gmail.com')
10
- p.readme_file = 'README.rdoc'
11
- p.history_file = 'CHANGELOG.rdoc'
12
- p.extra_rdoc_files = FileList['*.rdoc']
13
- p.summary = "Mechanize provides automated web-browsing"
14
- p.extra_deps = [['nokogiri', '>= 1.2.1']]
8
+ self.readme_file = 'README.rdoc'
9
+ self.history_file = 'CHANGELOG.rdoc'
10
+ self.extra_rdoc_files += Dir['*.rdoc']
11
+ self.extra_deps << ['nokogiri', '>= 1.2.1']
15
12
  end
16
13
 
17
14
  desc "Update SSL Certificate"
@@ -26,18 +23,11 @@ task('ssl_cert') do |p|
26
23
  sh "rm server.key.org"
27
24
  end
28
25
 
29
- namespace :gem do
30
- desc 'Generate a gem spec'
31
- task :spec do
32
- File.open("#{HOE.name}.gemspec", 'w') do |f|
33
- HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
34
- f.write(HOE.spec.to_ruby)
35
- end
26
+ desc 'Generate a gem spec'
27
+ task "gem:spec" do
28
+ File.open("mechanize.gemspec", 'w') do |f|
29
+ now = Time.now.strftime("%Y%m%d%H%M%S")
30
+ f.write `rake debug_gem`.sub(/(s.version = ".*)(")/) { "#{$1}.#{now}#{$2}" }
36
31
  end
37
32
  end
38
33
 
39
- desc "Run code-coverage analysis"
40
- task :coverage do
41
- rm_rf "coverage"
42
- sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
43
- end
data/lib/mechanize.rb CHANGED
@@ -1,7 +1,621 @@
1
- # Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
2
- # Copyright (c) 2007 by Aaron Patterson (aaronp@rubyforge.org)
1
+ require 'net/http'
2
+ require 'net/https'
3
+ require 'uri'
4
+ require 'webrick/httputils'
5
+ require 'zlib'
6
+ require 'stringio'
7
+ require 'digest/md5'
8
+ require 'fileutils'
9
+ require 'nokogiri'
10
+ require 'forwardable'
11
+ require 'iconv'
12
+ require 'nkf'
13
+
14
+ require 'mechanize/util'
15
+ require 'mechanize/content_type_error'
16
+ require 'mechanize/response_code_error'
17
+ require 'mechanize/unsupported_scheme_error'
18
+ require 'mechanize/redirect_limit_reached_error'
19
+ require 'mechanize/redirect_not_get_or_head_error'
20
+ require 'mechanize/cookie'
21
+ require 'mechanize/cookie_jar'
22
+ require 'mechanize/history'
23
+ require 'mechanize/form'
24
+ require 'mechanize/pluggable_parsers'
25
+ require 'mechanize/file_response'
26
+ require 'mechanize/inspect'
27
+ require 'mechanize/chain'
28
+ require 'mechanize/monkey_patch'
29
+
30
+ # = Synopsis
31
+ # The Mechanize library is used for automating interaction with a website. It
32
+ # can follow links, and submit forms. Form fields can be populated and
33
+ # submitted. A history of URL's is maintained and can be queried.
34
+ #
35
+ # == Example
36
+ # require 'rubygems'
37
+ # require 'mechanize'
38
+ # require 'logger'
3
39
  #
4
- # Please see the LICENSE file for licensing.
40
+ # agent = WWW::Mechanize.new { |a| a.log = Logger.new("mech.log") }
41
+ # agent.user_agent_alias = 'Mac Safari'
42
+ # page = agent.get("http://www.google.com/")
43
+ # search_form = page.form_with(:name => "f")
44
+ # search_form.field_with(:name => "q").value = "Hello"
45
+ # search_results = agent.submit(search_form)
46
+ # puts search_results.body
47
+ class Mechanize
48
+ ##
49
+ # The version of Mechanize you are using.
50
+ VERSION = '0.9.3'
51
+
52
+ ##
53
+ # User Agent aliases
54
+ AGENT_ALIASES = {
55
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
56
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
57
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
58
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
59
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
60
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
61
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
62
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
63
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
64
+ 'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
65
+ }
66
+
67
+ attr_accessor :cookie_jar
68
+ attr_accessor :open_timeout, :read_timeout
69
+ attr_accessor :user_agent
70
+ attr_accessor :watch_for_set
71
+ attr_accessor :ca_file
72
+ attr_accessor :key
73
+ attr_accessor :cert
74
+ attr_accessor :pass
75
+ attr_accessor :redirect_ok
76
+ attr_accessor :keep_alive_time
77
+ attr_accessor :keep_alive
78
+ attr_accessor :conditional_requests
79
+ attr_accessor :follow_meta_refresh
80
+ attr_accessor :verify_callback
81
+ attr_accessor :history_added
82
+ attr_accessor :scheme_handlers
83
+ attr_accessor :redirection_limit
84
+
85
+ # A hash of custom request headers
86
+ attr_accessor :request_headers
87
+
88
+ # The HTML parser to be used when parsing documents
89
+ attr_accessor :html_parser
90
+
91
+ attr_reader :history
92
+ attr_reader :pluggable_parser
93
+
94
+ alias :follow_redirect? :redirect_ok
95
+
96
+ @html_parser = Nokogiri::HTML
97
+ class << self; attr_accessor :html_parser, :log end
98
+
99
+ def initialize
100
+ # attr_accessors
101
+ @cookie_jar = CookieJar.new
102
+ @log = nil
103
+ @open_timeout = nil
104
+ @read_timeout = nil
105
+ @user_agent = AGENT_ALIASES['Mechanize']
106
+ @watch_for_set = nil
107
+ @history_added = nil
108
+ @ca_file = nil # OpenSSL server certificate file
109
+
110
+ # callback for OpenSSL errors while verifying the server certificate
111
+ # chain, can be used for debugging or to ignore errors by always
112
+ # returning _true_
113
+ @verify_callback = nil
114
+ @cert = nil # OpenSSL Certificate
115
+ @key = nil # OpenSSL Private Key
116
+ @pass = nil # OpenSSL Password
117
+ @redirect_ok = true # Should we follow redirects?
118
+
119
+ # attr_readers
120
+ @history = Mechanize::History.new
121
+ @pluggable_parser = PluggableParser.new
122
+
123
+ # Auth variables
124
+ @user = nil # Auth User
125
+ @password = nil # Auth Password
126
+ @digest = nil # DigestAuth Digest
127
+ @auth_hash = {} # Keep track of urls for sending auth
128
+ @request_headers= {} # A hash of request headers to be used
129
+
130
+ # Proxy settings
131
+ @proxy_addr = nil
132
+ @proxy_pass = nil
133
+ @proxy_port = nil
134
+ @proxy_user = nil
135
+
136
+ @conditional_requests = true
137
+
138
+ @follow_meta_refresh = false
139
+ @redirection_limit = 20
140
+
141
+ # Connection Cache & Keep alive
142
+ @connection_cache = {}
143
+ @keep_alive_time = 300
144
+ @keep_alive = true
145
+
146
+ @scheme_handlers = Hash.new { |h,k|
147
+ h[k] = lambda { |link, page|
148
+ raise UnsupportedSchemeError.new(k)
149
+ }
150
+ }
151
+ @scheme_handlers['http'] = lambda { |link, page| link }
152
+ @scheme_handlers['https'] = @scheme_handlers['http']
153
+ @scheme_handlers['relative'] = @scheme_handlers['http']
154
+ @scheme_handlers['file'] = @scheme_handlers['http']
155
+
156
+ @pre_connect_hook = Chain::PreConnectHook.new
157
+ @post_connect_hook = Chain::PostConnectHook.new
158
+
159
+ @html_parser = self.class.html_parser
160
+
161
+ yield self if block_given?
162
+ end
163
+
164
+ def max_history=(length); @history.max_size = length end
165
+ def max_history; @history.max_size end
166
+ def log=(l); self.class.log = l end
167
+ def log; self.class.log end
168
+
169
+ def pre_connect_hooks
170
+ @pre_connect_hook.hooks
171
+ end
172
+
173
+ def post_connect_hooks
174
+ @post_connect_hook.hooks
175
+ end
176
+
177
+ # Sets the proxy address, port, user, and password
178
+ # +addr+ should be a host, with no "http://"
179
+ def set_proxy(addr, port, user = nil, pass = nil)
180
+ @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
181
+ end
182
+
183
+ # Set the user agent for the Mechanize object.
184
+ # See AGENT_ALIASES
185
+ def user_agent_alias=(al)
186
+ self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
187
+ end
188
+
189
+ # Returns a list of cookies stored in the cookie jar.
190
+ def cookies
191
+ @cookie_jar.to_a
192
+ end
193
+
194
+ # Sets the user and password to be used for authentication.
195
+ def auth(user, password)
196
+ @user = user
197
+ @password = password
198
+ end
199
+ alias :basic_auth :auth
200
+
201
+ # Fetches the URL passed in and returns a page.
202
+ def get(options, parameters = [], referer = nil)
203
+ unless options.is_a? Hash
204
+ url = options
205
+ unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
206
+ referer = parameters
207
+ parameters = []
208
+ end
209
+ else
210
+ raise ArgumentError.new("url must be specified") unless url = options[:url]
211
+ parameters = options[:params] || []
212
+ referer = options[:referer]
213
+ headers = options[:headers]
214
+ end
215
+
216
+ unless referer
217
+ if url.to_s =~ /^http/
218
+ referer = Page.new(nil, {'content-type'=>'text/html'})
219
+ else
220
+ referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
221
+ end
222
+ end
223
+
224
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
225
+ # refactor everything to pass around URIs but still support
226
+ # Mechanize::Page#base
227
+ unless referer.is_a?(Mechanize::File)
228
+ referer = referer.is_a?(String) ?
229
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
230
+ Page.new(referer, {'content-type' => 'text/html'})
231
+ end
232
+
233
+ # fetch the page
234
+ page = fetch_page( :uri => url,
235
+ :referer => referer,
236
+ :headers => headers || {},
237
+ :params => parameters
238
+ )
239
+ add_to_history(page)
240
+ yield page if block_given?
241
+ page
242
+ end
243
+
244
+ ####
245
+ # PUT to +url+ with +entity+, and setting +options+:
246
+ #
247
+ # put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})
248
+ #
249
+ def put(url, entity, options = {})
250
+ request_with_entity(:put, url, entity, options)
251
+ end
252
+
253
+ ####
254
+ # DELETE to +url+ with +query_params+, and setting +options+:
255
+ #
256
+ # delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
257
+ #
258
+ def delete(url, query_params = {}, options = {})
259
+ page = head(url, query_params, options.merge({:verb => :delete}))
260
+ add_to_history(page)
261
+ page
262
+ end
263
+
264
+ ####
265
+ # HEAD to +url+ with +query_params+, and setting +options+:
266
+ #
267
+ # head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})
268
+ #
269
+ def head(url, query_params = {}, options = {})
270
+ options = {
271
+ :uri => url,
272
+ :headers => {},
273
+ :params => query_params,
274
+ :verb => :head
275
+ }.merge(options)
276
+ # fetch the page
277
+ page = fetch_page(options)
278
+ yield page if block_given?
279
+ page
280
+ end
281
+
282
+ # Fetch a file and return the contents of the file.
283
+ def get_file(url)
284
+ get(url).body
285
+ end
286
+
287
+ # Clicks the Mechanize::Link object passed in and returns the
288
+ # page fetched.
289
+ def click(link)
290
+ referer = link.page rescue referer = nil
291
+ href = link.respond_to?(:href) ? link.href :
292
+ (link['href'] || link['src'])
293
+ get(:url => href, :referer => (referer || current_page()))
294
+ end
295
+
296
+ # Equivalent to the browser back button. Returns the most recent page
297
+ # visited.
298
+ def back
299
+ @history.pop
300
+ end
301
+
302
+ # Posts to the given URL with the request entity. The request
303
+ # entity is specified by either a string, or a list of key-value
304
+ # pairs represented by a hash or an array of arrays.
305
+ #
306
+ # Examples:
307
+ # agent.post('http://example.com/', "foo" => "bar")
308
+ #
309
+ # agent.post('http://example.com/', [ ["foo", "bar"] ])
310
+ #
311
+ # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
312
+ def post(url, query={}, headers={})
313
+ if query.is_a?(String)
314
+ return request_with_entity(:post, url, query, :headers => headers)
315
+ end
316
+ node = {}
317
+ # Create a fake form
318
+ class << node
319
+ def search(*args); []; end
320
+ end
321
+ node['method'] = 'POST'
322
+ node['enctype'] = 'application/x-www-form-urlencoded'
323
+
324
+ form = Form.new(node)
325
+ query.each { |k,v|
326
+ if v.is_a?(IO)
327
+ form.enctype = 'multipart/form-data'
328
+ ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
329
+ ul.file_data = v.read
330
+ form.file_uploads << ul
331
+ else
332
+ form.fields << Form::Field.new(k.to_s,v)
333
+ end
334
+ }
335
+ post_form(url, form, headers)
336
+ end
337
+
338
+ # Submit a form with an optional button.
339
+ # Without a button:
340
+ # page = agent.get('http://example.com')
341
+ # agent.submit(page.forms.first)
342
+ # With a button
343
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
344
+ def submit(form, button=nil, headers={})
345
+ form.add_button_to_query(button) if button
346
+ case form.method.upcase
347
+ when 'POST'
348
+ post_form(form.action, form, headers)
349
+ when 'GET'
350
+ get( :url => form.action.gsub(/\?[^\?]*$/, ''),
351
+ :params => form.build_query,
352
+ :headers => headers,
353
+ :referer => form.page
354
+ )
355
+ else
356
+ raise "unsupported method: #{form.method.upcase}"
357
+ end
358
+ end
359
+
360
+ def request_with_entity(verb, url, entity, options={})
361
+ cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})
362
+
363
+ options = {
364
+ :uri => url,
365
+ :referer => cur_page,
366
+ :headers => {},
367
+ }.update(options)
368
+
369
+ headers = {
370
+ 'Content-Type' => 'application/octet-stream',
371
+ 'Content-Length' => entity.size.to_s,
372
+ }.update(options[:headers])
373
+
374
+ options.update({
375
+ :verb => verb,
376
+ :params => [entity],
377
+ :headers => headers,
378
+ })
379
+
380
+ page = fetch_page(options)
381
+ add_to_history(page)
382
+ page
383
+ end
384
+
385
+ # Returns the current page loaded by Mechanize
386
+ def current_page
387
+ @history.last
388
+ end
389
+
390
+ # Returns whether or not a url has been visited
391
+ def visited?(url)
392
+ ! visited_page(url).nil?
393
+ end
394
+
395
+ # Returns a visited page for the url passed in, otherwise nil
396
+ def visited_page(url)
397
+ if url.respond_to? :href
398
+ url = url.href
399
+ end
400
+ @history.visited_page(resolve(url))
401
+ end
402
+
403
+ # Runs given block, then resets the page history as it was before. self is
404
+ # given as a parameter to the block. Returns the value of the block.
405
+ def transact
406
+ history_backup = @history.dup
407
+ begin
408
+ yield self
409
+ ensure
410
+ @history = history_backup
411
+ end
412
+ end
413
+
414
+ alias :page :current_page
415
+
416
+ private
417
+
418
+ def resolve(url, referer = current_page())
419
+ hash = { :uri => url, :referer => referer }
420
+ chain = Chain.new([
421
+ Chain::URIResolver.new(@scheme_handlers)
422
+ ]).handle(hash)
423
+ hash[:uri].to_s
424
+ end
425
+
426
+ def post_form(url, form, headers = {})
427
+ cur_page = form.page || current_page ||
428
+ Page.new( nil, {'content-type'=>'text/html'})
429
+
430
+ request_data = form.request_data
431
+
432
+ log.debug("query: #{ request_data.inspect }") if log
433
+
434
+ # fetch the page
435
+ page = fetch_page( :uri => url,
436
+ :referer => cur_page,
437
+ :verb => :post,
438
+ :params => [request_data],
439
+ :headers => {
440
+ 'Content-Type' => form.enctype,
441
+ 'Content-Length' => request_data.size.to_s,
442
+ }.merge(headers))
443
+ add_to_history(page)
444
+ page
445
+ end
446
+
447
+ # uri is an absolute URI
448
+ def fetch_page(params)
449
+ options = {
450
+ :request => nil,
451
+ :response => nil,
452
+ :connection => nil,
453
+ :referer => current_page(),
454
+ :uri => nil,
455
+ :verb => :get,
456
+ :agent => self,
457
+ :redirects => 0,
458
+ :params => [],
459
+ :headers => {},
460
+ }.merge(params)
461
+
462
+ before_connect = Chain.new([
463
+ Chain::URIResolver.new(@scheme_handlers),
464
+ Chain::ParameterResolver.new,
465
+ Chain::RequestResolver.new,
466
+ Chain::ConnectionResolver.new(
467
+ @connection_cache,
468
+ @keep_alive,
469
+ @proxy_addr,
470
+ @proxy_port,
471
+ @proxy_user,
472
+ @proxy_pass
473
+ ),
474
+ Chain::SSLResolver.new(@ca_file, @verify_callback, @cert, @key, @pass),
475
+ Chain::AuthHeaders.new(@auth_hash, @user, @password, @digest),
476
+ Chain::HeaderResolver.new(
477
+ @keep_alive,
478
+ @keep_alive_time,
479
+ @cookie_jar,
480
+ @user_agent,
481
+ @request_headers
482
+ ),
483
+ Chain::CustomHeaders.new,
484
+ @pre_connect_hook,
485
+ ])
486
+ before_connect.handle(options)
487
+
488
+ uri = options[:uri]
489
+ request = options[:request]
490
+ cur_page = options[:referer]
491
+ request_data = options[:params]
492
+ redirects = options[:redirects]
493
+ http_obj = options[:connection]
494
+
495
+ # Add If-Modified-Since if page is in history
496
+ if( (page = visited_page(uri)) && page.response['Last-Modified'] )
497
+ request['If-Modified-Since'] = page.response['Last-Modified']
498
+ end if(@conditional_requests)
499
+
500
+ # Specify timeouts if given
501
+ http_obj.open_timeout = @open_timeout if @open_timeout
502
+ http_obj.read_timeout = @read_timeout if @read_timeout
503
+ http_obj.start unless http_obj.started?
504
+
505
+ # Log specified headers for the request
506
+ log.info("#{ request.class }: #{ request.path }") if log
507
+ request.each_header do |k, v|
508
+ log.debug("request-header: #{ k } => #{ v }")
509
+ end if log
510
+
511
+ # Send the request
512
+ attempts = 0
513
+ begin
514
+ response = http_obj.request(request, *request_data) { |r|
515
+ connection_chain = Chain.new([
516
+ Chain::ResponseReader.new(r),
517
+ Chain::BodyDecodingHandler.new,
518
+ ])
519
+ connection_chain.handle(options)
520
+ }
521
+ rescue EOFError, Errno::ECONNRESET, Errno::EPIPE => x
522
+ log.error("Rescuing EOF error") if log
523
+ http_obj.finish
524
+ raise x if attempts >= 2
525
+ request.body = nil
526
+ http_obj.start
527
+ attempts += 1
528
+ retry
529
+ end
530
+
531
+ after_connect = Chain.new([
532
+ @post_connect_hook,
533
+ Chain::ResponseBodyParser.new(@pluggable_parser, @watch_for_set),
534
+ Chain::ResponseHeaderHandler.new(@cookie_jar, @connection_cache),
535
+ ])
536
+ after_connect.handle(options)
537
+
538
+ res_klass = options[:res_klass]
539
+ response_body = options[:response_body]
540
+ page = options[:page]
541
+
542
+ log.info("status: #{ page.code }") if log
543
+
544
+ if follow_meta_refresh
545
+ redirect_uri = nil
546
+ referer = page
547
+ if (page.respond_to?(:meta) && (redirect = page.meta.first))
548
+ redirect_uri = redirect.uri.to_s
549
+ sleep redirect.node['delay'].to_f
550
+ referer = Page.new(nil, {'content-type'=>'text/html'})
551
+ elsif refresh = response['refresh']
552
+ delay, redirect_uri = Page::Meta.parse(refresh, uri)
553
+ raise StandardError, "Invalid refresh http header" unless delay
554
+ if redirects + 1 > redirection_limit
555
+ raise RedirectLimitReachedError.new(page, redirects)
556
+ end
557
+ sleep delay.to_f
558
+ end
559
+ if redirect_uri
560
+ @history.push(page, page.uri)
561
+ return fetch_page(
562
+ :uri => redirect_uri,
563
+ :referer => referer,
564
+ :params => [],
565
+ :verb => :get,
566
+ :redirects => redirects + 1
567
+ )
568
+ end
569
+ end
570
+
571
+ return page if res_klass <= Net::HTTPSuccess
572
+
573
+ if res_klass == Net::HTTPNotModified
574
+ log.debug("Got cached page") if log
575
+ return visited_page(uri) || page
576
+ elsif res_klass <= Net::HTTPRedirection
577
+ return page unless follow_redirect?
578
+ log.info("follow redirect to: #{ response['Location'] }") if log
579
+ from_uri = page.uri
580
+ raise RedirectLimitReachedError.new(page, redirects) if redirects + 1 > redirection_limit
581
+ redirect_verb = options[:verb] == :head ? :head : :get
582
+ page = fetch_page( :uri => response['Location'].to_s,
583
+ :referer => page,
584
+ :params => [],
585
+ :verb => redirect_verb,
586
+ :redirects => redirects + 1
587
+ )
588
+ @history.push(page, from_uri)
589
+ return page
590
+ elsif res_klass <= Net::HTTPUnauthorized
591
+ raise ResponseCodeError.new(page) unless @user || @password
592
+ raise ResponseCodeError.new(page) if @auth_hash.has_key?(uri.host)
593
+ if response['www-authenticate'] =~ /Digest/i
594
+ @auth_hash[uri.host] = :digest
595
+ if response['server'] =~ /Microsoft-IIS/
596
+ @auth_hash[uri.host] = :iis_digest
597
+ end
598
+ @digest = response['www-authenticate']
599
+ else
600
+ @auth_hash[uri.host] = :basic
601
+ end
602
+ return fetch_page( :uri => uri,
603
+ :referer => cur_page,
604
+ :verb => request.method.downcase.to_sym,
605
+ :params => request_data,
606
+ :headers => options[:headers]
607
+ )
608
+ end
609
+
610
+ raise ResponseCodeError.new(page), "Unhandled response", caller
611
+ end
612
+
613
+ def add_to_history(page)
614
+ @history.push(page, resolve(page.uri))
615
+ history_added.call(page) if history_added
616
+ end
617
+ end
5
618
 
619
+ module WWW; end
620
+ WWW::Mechanize = ::Mechanize
6
621
 
7
- require 'www/mechanize'