neocoin-mechanize 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (174) hide show
  1. data/.autotest +6 -0
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.rdoc +638 -0
  4. data/EXAMPLES.rdoc +187 -0
  5. data/FAQ.rdoc +11 -0
  6. data/GUIDE.rdoc +163 -0
  7. data/LICENSE.rdoc +20 -0
  8. data/Manifest.txt +172 -0
  9. data/README.rdoc +63 -0
  10. data/Rakefile +36 -0
  11. data/examples/flickr_upload.rb +22 -0
  12. data/examples/mech-dump.rb +5 -0
  13. data/examples/proxy_req.rb +7 -0
  14. data/examples/rubyforge.rb +20 -0
  15. data/examples/spider.rb +21 -0
  16. data/lib/mechanize.rb +662 -0
  17. data/lib/mechanize/content_type_error.rb +14 -0
  18. data/lib/mechanize/cookie.rb +85 -0
  19. data/lib/mechanize/cookie_jar.rb +241 -0
  20. data/lib/mechanize/element_matcher.rb +35 -0
  21. data/lib/mechanize/file.rb +80 -0
  22. data/lib/mechanize/file_connection.rb +17 -0
  23. data/lib/mechanize/file_request.rb +26 -0
  24. data/lib/mechanize/file_response.rb +74 -0
  25. data/lib/mechanize/file_saver.rb +37 -0
  26. data/lib/mechanize/form.rb +478 -0
  27. data/lib/mechanize/form/button.rb +9 -0
  28. data/lib/mechanize/form/check_box.rb +11 -0
  29. data/lib/mechanize/form/field.rb +44 -0
  30. data/lib/mechanize/form/file_upload.rb +23 -0
  31. data/lib/mechanize/form/image_button.rb +20 -0
  32. data/lib/mechanize/form/multi_select_list.rb +83 -0
  33. data/lib/mechanize/form/option.rb +49 -0
  34. data/lib/mechanize/form/radio_button.rb +48 -0
  35. data/lib/mechanize/form/select_list.rb +40 -0
  36. data/lib/mechanize/headers.rb +25 -0
  37. data/lib/mechanize/history.rb +83 -0
  38. data/lib/mechanize/http.rb +3 -0
  39. data/lib/mechanize/http/agent.rb +738 -0
  40. data/lib/mechanize/inspect.rb +88 -0
  41. data/lib/mechanize/monkey_patch.rb +37 -0
  42. data/lib/mechanize/page.rb +408 -0
  43. data/lib/mechanize/page/base.rb +8 -0
  44. data/lib/mechanize/page/frame.rb +27 -0
  45. data/lib/mechanize/page/image.rb +30 -0
  46. data/lib/mechanize/page/label.rb +20 -0
  47. data/lib/mechanize/page/link.rb +82 -0
  48. data/lib/mechanize/page/meta_refresh.rb +56 -0
  49. data/lib/mechanize/pluggable_parsers.rb +101 -0
  50. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  51. data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
  52. data/lib/mechanize/response_code_error.rb +22 -0
  53. data/lib/mechanize/response_read_error.rb +27 -0
  54. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  55. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  56. data/lib/mechanize/util.rb +113 -0
  57. data/test/data/htpasswd +1 -0
  58. data/test/data/server.crt +16 -0
  59. data/test/data/server.csr +12 -0
  60. data/test/data/server.key +15 -0
  61. data/test/data/server.pem +15 -0
  62. data/test/helper.rb +175 -0
  63. data/test/htdocs/alt_text.html +10 -0
  64. data/test/htdocs/bad_form_test.html +9 -0
  65. data/test/htdocs/button.jpg +0 -0
  66. data/test/htdocs/canonical_uri.html +9 -0
  67. data/test/htdocs/dir with spaces/foo.html +1 -0
  68. data/test/htdocs/empty_form.html +6 -0
  69. data/test/htdocs/file_upload.html +26 -0
  70. data/test/htdocs/find_link.html +41 -0
  71. data/test/htdocs/form_multi_select.html +16 -0
  72. data/test/htdocs/form_multival.html +37 -0
  73. data/test/htdocs/form_no_action.html +18 -0
  74. data/test/htdocs/form_no_input_name.html +16 -0
  75. data/test/htdocs/form_select.html +16 -0
  76. data/test/htdocs/form_select_all.html +16 -0
  77. data/test/htdocs/form_select_none.html +17 -0
  78. data/test/htdocs/form_select_noopts.html +10 -0
  79. data/test/htdocs/form_set_fields.html +14 -0
  80. data/test/htdocs/form_test.html +188 -0
  81. data/test/htdocs/frame_referer_test.html +10 -0
  82. data/test/htdocs/frame_test.html +30 -0
  83. data/test/htdocs/google.html +13 -0
  84. data/test/htdocs/iframe_test.html +16 -0
  85. data/test/htdocs/index.html +6 -0
  86. data/test/htdocs/link with space.html +5 -0
  87. data/test/htdocs/meta_cookie.html +11 -0
  88. data/test/htdocs/no_title_test.html +6 -0
  89. data/test/htdocs/nofollow.html +9 -0
  90. data/test/htdocs/noindex.html +9 -0
  91. data/test/htdocs/norobots.html +8 -0
  92. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  93. data/test/htdocs/rel_nofollow.html +8 -0
  94. data/test/htdocs/relative/tc_relative_links.html +21 -0
  95. data/test/htdocs/robots.html +8 -0
  96. data/test/htdocs/robots.txt +2 -0
  97. data/test/htdocs/tc_bad_charset.html +9 -0
  98. data/test/htdocs/tc_bad_links.html +5 -0
  99. data/test/htdocs/tc_base_images.html +10 -0
  100. data/test/htdocs/tc_base_link.html +8 -0
  101. data/test/htdocs/tc_blank_form.html +11 -0
  102. data/test/htdocs/tc_charset.html +6 -0
  103. data/test/htdocs/tc_checkboxes.html +19 -0
  104. data/test/htdocs/tc_encoded_links.html +5 -0
  105. data/test/htdocs/tc_field_precedence.html +11 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_images.html +8 -0
  109. data/test/htdocs/tc_links.html +18 -0
  110. data/test/htdocs/tc_meta_in_body.html +9 -0
  111. data/test/htdocs/tc_no_attributes.html +16 -0
  112. data/test/htdocs/tc_pretty_print.html +17 -0
  113. data/test/htdocs/tc_radiobuttons.html +17 -0
  114. data/test/htdocs/tc_referer.html +16 -0
  115. data/test/htdocs/tc_relative_links.html +19 -0
  116. data/test/htdocs/tc_textarea.html +23 -0
  117. data/test/htdocs/test_bad_encoding.html +52 -0
  118. data/test/htdocs/test_click.html +11 -0
  119. data/test/htdocs/unusual______.html +5 -0
  120. data/test/servlets.rb +402 -0
  121. data/test/ssl_server.rb +48 -0
  122. data/test/test_cookies.rb +129 -0
  123. data/test/test_form_action.rb +52 -0
  124. data/test/test_form_as_hash.rb +59 -0
  125. data/test/test_form_button.rb +46 -0
  126. data/test/test_frames.rb +34 -0
  127. data/test/test_headers.rb +33 -0
  128. data/test/test_history.rb +118 -0
  129. data/test/test_history_added.rb +16 -0
  130. data/test/test_html_unscape_forms.rb +46 -0
  131. data/test/test_if_modified_since.rb +20 -0
  132. data/test/test_images.rb +19 -0
  133. data/test/test_mechanize.rb +842 -0
  134. data/test/test_mechanize_cookie.rb +345 -0
  135. data/test/test_mechanize_cookie_jar.rb +401 -0
  136. data/test/test_mechanize_file.rb +53 -0
  137. data/test/test_mechanize_file_request.rb +19 -0
  138. data/test/test_mechanize_file_response.rb +21 -0
  139. data/test/test_mechanize_form.rb +576 -0
  140. data/test/test_mechanize_form_check_box.rb +37 -0
  141. data/test/test_mechanize_form_encoding.rb +120 -0
  142. data/test/test_mechanize_form_field.rb +21 -0
  143. data/test/test_mechanize_form_image_button.rb +12 -0
  144. data/test/test_mechanize_form_textarea.rb +51 -0
  145. data/test/test_mechanize_http_agent.rb +697 -0
  146. data/test/test_mechanize_link.rb +84 -0
  147. data/test/test_mechanize_page_encoding.rb +147 -0
  148. data/test/test_mechanize_page_link.rb +382 -0
  149. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  150. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  151. data/test/test_mechanize_subclass.rb +22 -0
  152. data/test/test_mechanize_util.rb +92 -0
  153. data/test/test_multi_select.rb +118 -0
  154. data/test/test_no_attributes.rb +13 -0
  155. data/test/test_option.rb +18 -0
  156. data/test/test_pluggable_parser.rb +136 -0
  157. data/test/test_post_form.rb +37 -0
  158. data/test/test_pretty_print.rb +22 -0
  159. data/test/test_radiobutton.rb +75 -0
  160. data/test/test_redirect_limit_reached.rb +39 -0
  161. data/test/test_referer.rb +81 -0
  162. data/test/test_relative_links.rb +40 -0
  163. data/test/test_request.rb +13 -0
  164. data/test/test_response_code.rb +53 -0
  165. data/test/test_robots.rb +72 -0
  166. data/test/test_save_file.rb +48 -0
  167. data/test/test_scheme.rb +48 -0
  168. data/test/test_select.rb +119 -0
  169. data/test/test_select_all.rb +15 -0
  170. data/test/test_select_none.rb +15 -0
  171. data/test/test_select_noopts.rb +18 -0
  172. data/test/test_set_fields.rb +44 -0
  173. data/test/test_ssl_server.rb +20 -0
  174. metadata +354 -0
data/README.rdoc ADDED
@@ -0,0 +1,63 @@
1
+ = Mechanize
2
+
3
+ * http://mechanize.rubyforge.org
4
+ * http://github.com/tenderlove/mechanize/tree/master
5
+
6
+ == DESCRIPTION
7
+
8
+ The Mechanize library is used for automating interaction with websites.
9
+ Mechanize automatically stores and sends cookies, follows redirects,
10
+ can follow links, and submit forms. Form fields can be populated and
11
+ submitted. Mechanize also keeps track of the sites that you have visited as
12
+ a history.
13
+
14
+ == Dependencies
15
+
16
+ * ruby 1.8.7
17
+ * nokogiri[http://nokogiri.rubyforge.org]
18
+
19
+ == SUPPORT:
20
+
21
+ The mechanize mailing list is available here:
22
+
23
+ * http://rubyforge.org/mailman/listinfo/mechanize-users
24
+
25
+ The bug tracker is available here:
26
+
27
+ * http://github.com/tenderlove/mechanize/issues
28
+
29
+ == Examples
30
+
31
+ If you are just starting, check out the GUIDE.
32
+ Also, check out the EXAMPLES file.
33
+
34
+ == Authors
35
+
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
+
38
+ Copyright (c) 2006-2011:
39
+
40
+ * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
+ * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
+
43
+ Copyright (c) 2011:
44
+
45
+ * {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net)
46
+
47
+ This library comes with a shameless plug for employing me
48
+ (Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language!
49
+
50
+ == Acknowledgments
51
+
52
+ This library was heavily influenced by its namesake in the perl world. A big
53
+ thanks goes to Andy Lester (andy@petdance.com), the author of the original
54
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
55
+
56
+ Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
57
+ who's helped out in various ways. Finally, thank you to the people using this
58
+ library!
59
+
60
+ == License
61
+
62
+ This library is distributed under the MIT license. Please see the LICENSE file.
63
+
data/Rakefile ADDED
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ Hoe.plugin :git
5
+ Hoe.plugin :minitest
6
+
7
+ Hoe.spec 'neocoin-mechanize' do
8
+ developer 'Eric Hodel', 'drbrain@segment7.net'
9
+ developer 'Aaron Patterson', 'aaronp@rubyforge.org'
10
+ developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
11
+
12
+ self.readme_file = 'README.rdoc'
13
+ self.history_file = 'CHANGELOG.rdoc'
14
+ self.extra_rdoc_files += Dir['*.rdoc']
15
+
16
+ rdoc_locations << 'drbrain@rubyforge.org:/var/www/gforge-projects/mechanize/'
17
+
18
+ self.extra_deps << ['nokogiri', '~> 1.4']
19
+ self.extra_deps << ['net-http-persistent', '~> 1.8']
20
+ self.extra_deps << ['net-http-digest_auth', '~> 1.1', '>= 1.1.1']
21
+ self.extra_deps << ['webrobots', '~> 0.0', '>= 0.0.9']
22
+
23
+ self.spec_extras[:required_ruby_version] = '>= 1.8.7'
24
+ end
25
+
26
+ desc "Update SSL Certificate"
27
+ task('ssl_cert') do |p|
28
+ sh "openssl genrsa -des3 -out server.key 1024"
29
+ sh "openssl req -new -key server.key -out server.csr"
30
+ sh "cp server.key server.key.org"
31
+ sh "openssl rsa -in server.key.org -out server.key"
32
+ sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
33
+ sh "cp server.key server.pem"
34
+ sh "mv server.key server.csr server.crt server.pem test/data/"
35
+ sh "rm server.key.org"
36
+ end
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+
6
+ # Get the flickr sign in page
7
+ page = agent.get 'http://flickr.com/signin/flickr/'
8
+
9
+ # Fill out the login form
10
+ form = page.form_with :name => 'flickrloginform'
11
+ form.email = ARGV[0]
12
+ form.password = ARGV[1]
13
+ form.submit
14
+
15
+ # Go to the upload page
16
+ page = page.link_with(:text => 'Upload').click
17
+
18
+ # Fill out the form
19
+ form = page.forms.action('/photos_upload_process.gne').first
20
+ form.file_uploads.name('file1').first.file_name = ARGV[2]
21
+ form.submit
22
+
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ puts agent.get(ARGV[0]).inspect
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ agent.set_proxy('localhost', '8000')
6
+ page = agent.get(ARGV[0])
7
+ puts page.body
@@ -0,0 +1,20 @@
1
+ # This example logs a user in to rubyforge and prints out the body of the
2
+ # page after logging the user in.
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+ require 'logger'
6
+
7
+ # Create a new mechanize object
8
+ agent = Mechanize.new { |a| a.log = Logger.new(STDERR) }
9
+
10
+ # Load the rubyforge website
11
+ page = agent.get('http://rubyforge.org/')
12
+ page = agent.click page.link_with(:text => /Log In/) # Click the login link
13
+ form = page.forms[1] # Select the first form
14
+ form.form_loginname = ARGV[0]
15
+ form.form_pw = ARGV[1]
16
+
17
+ # Submit the form
18
+ page = form.submit form.buttons.first
19
+
20
+ puts page.body # Print out the body
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ stack = agent.get(ARGV[0]).links
6
+
7
+ while l = stack.pop
8
+ next unless l.uri
9
+ host = l.uri.host
10
+ next unless host.nil? or host == agent.history.first.uri.host
11
+ next if agent.visited? l.href
12
+
13
+ puts "crawling #{l.uri}"
14
+ begin
15
+ page = l.click
16
+ next unless Mechanize::Page === page
17
+ stack.push(*page.links)
18
+ rescue Mechanize::ResponseCodeError
19
+ end
20
+ end
21
+
data/lib/mechanize.rb ADDED
@@ -0,0 +1,662 @@
1
+ require 'fileutils'
2
+ require 'forwardable'
3
+ require 'iconv' if RUBY_VERSION < '1.9.2'
4
+ require 'mutex_m'
5
+ require 'net/http/digest_auth'
6
+ require 'net/http/persistent'
7
+ require 'nkf'
8
+ require 'nokogiri'
9
+ require 'openssl'
10
+ require 'stringio'
11
+ require 'uri'
12
+ require 'webrick/httputils'
13
+ require 'zlib'
14
+
15
+ # = Synopsis
16
+ # The Mechanize library is used for automating interaction with a website. It
17
+ # can follow links, and submit forms. Form fields can be populated and
18
+ # submitted. A history of URL's is maintained and can be queried.
19
+ #
20
+ # == Example
21
+ # require 'rubygems'
22
+ # require 'mechanize'
23
+ # require 'logger'
24
+ #
25
+ # agent = Mechanize.new { |a| a.log = Logger.new("mech.log") }
26
+ # agent.user_agent_alias = 'Mac Safari'
27
+ # page = agent.get("http://www.google.com/")
28
+ # search_form = page.form_with(:name => "f")
29
+ # search_form.field_with(:name => "q").value = "Hello"
30
+ # search_results = agent.submit(search_form)
31
+ # puts search_results.body
32
+ class Mechanize
33
+
34
+ ##
35
+ # The version of Mechanize you are using.
36
+ VERSION = '2.0.2'
37
+
38
+ class Error < RuntimeError
39
+ end
40
+
41
+ ruby_version = if RUBY_PATCHLEVEL >= 0 then
42
+ "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
43
+ else
44
+ "#{RUBY_VERSION}dev#{RUBY_REVISION}"
45
+ end
46
+
47
+ # HTTP/1.1 keep-alives are always active. This does nothing.
48
+ attr_accessor :keep_alive
49
+
50
+ # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
51
+ # now uses net-http-persistent which only supports HTTP/1.1 persistent
52
+ # connections
53
+ attr_accessor :keep_alive_time
54
+
55
+ ##
56
+ # User Agent aliases
57
+
58
+ AGENT_ALIASES = {
59
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
60
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
61
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
62
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
63
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
64
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
65
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
66
+ 'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
67
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
68
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
69
+ 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
70
+ }
71
+
72
+ # A Mechanize::CookieJar which stores cookies
73
+
74
+ def cookie_jar
75
+ @agent.cookie_jar
76
+ end
77
+
78
+ def cookie_jar= cookie_jar
79
+ @agent.cookie_jar = cookie_jar
80
+ end
81
+
82
+ # Length of time to wait until a connection is opened in seconds
83
+ def open_timeout
84
+ @agent.open_timeout
85
+ end
86
+
87
+ def open_timeout= open_timeout
88
+ @agent.open_timeout = open_timeout
89
+ end
90
+
91
+ # Length of time to attempt to read data from the server
92
+ def read_timeout
93
+ @agent.read_timeout
94
+ end
95
+
96
+ def read_timeout= read_timeout
97
+ @agent.read_timeout = read_timeout
98
+ end
99
+
100
+ # The identification string for the client initiating a web request
101
+ def user_agent
102
+ @agent.user_agent
103
+ end
104
+
105
+ # The value of watch_for_set is passed to pluggable parsers for retrieved
106
+ # content
107
+ attr_accessor :watch_for_set
108
+
109
+ # Path to an OpenSSL server certificate file
110
+ def ca_file
111
+ @agent.ca_file
112
+ end
113
+
114
+ def ca_file= ca_file
115
+ @agent.ca_file = ca_file
116
+ end
117
+
118
+ def certificate
119
+ @agent.certificate
120
+ end
121
+
122
+ # An OpenSSL private key or the path to a private key
123
+ def key
124
+ @agent.key
125
+ end
126
+
127
+ def key= key
128
+ @agent.key = key
129
+ end
130
+
131
+ # An OpenSSL client certificate or the path to a certificate file.
132
+ def cert
133
+ @agent.cert
134
+ end
135
+
136
+ def cert= cert
137
+ @agent.cert = cert
138
+ end
139
+
140
+ # OpenSSL key password
141
+ def pass
142
+ @agent.pass
143
+ end
144
+
145
+ def pass= pass
146
+ @agent.pass = pass
147
+ end
148
+
149
+ # Controls how this agent deals with redirects. The following values are
150
+ # allowed:
151
+ #
152
+ # :all, true:: All 3xx redirects are followed (default)
153
+ # :permanent:: Only 301 Moved Permanantly redirects are followed
154
+ # false:: No redirects are followed
155
+
156
+ def redirect_ok
157
+ @agent.redirect_ok
158
+ end
159
+
160
+ def redirect_ok= follow
161
+ @agent.redirect_ok = follow
162
+ end
163
+
164
+ def gzip_enabled
165
+ @agent.gzip_enabled
166
+ end
167
+
168
+ # Disables HTTP/1.1 gzip compression (enabled by default)
169
+ def gzip_enabled=enabled
170
+ @agent.gzip_enabled = enabled
171
+ end
172
+
173
+ def conditional_requests
174
+ @agent.conditional_requests
175
+ end
176
+
177
+ # Disables If-Modified-Since conditional requests (enabled by default)
178
+ def conditional_requests= enabled
179
+ @agent.conditional_requests = enabled
180
+ end
181
+
182
+ # Follow HTML meta refresh. If set to +:anywhere+ meta refresh tags outside
183
+ # of the head element will be followed.
184
+ def follow_meta_refresh
185
+ @agent.follow_meta_refresh
186
+ end
187
+
188
+ def follow_meta_refresh= follow
189
+ @agent.follow_meta_refresh = follow
190
+ end
191
+
192
+ # A callback for additional certificate verification. See
193
+ # OpenSSL::SSL::SSLContext#verify_callback
194
+ #
195
+ # The callback can be used for debugging or to ignore errors by always
196
+ # returning +true+. Specifying nil uses the default method that was valid
197
+ # when the SSLContext was created
198
+ def verify_callback
199
+ @agent.verify_callback
200
+ end
201
+
202
+ def verify_callback= verify_callback
203
+ @agent.verify_callback = verify_callback
204
+ end
205
+
206
+ attr_accessor :history_added
207
+
208
+ def redirection_limit
209
+ @agent.redirection_limit
210
+ end
211
+
212
+ def redirection_limit= limit
213
+ @agent.redirection_limit = limit
214
+ end
215
+
216
+ def scheme_handlers
217
+ @agent.scheme_handlers
218
+ end
219
+
220
+ def scheme_handlers= scheme_handlers
221
+ @agent.scheme_handlers = scheme_handlers
222
+ end
223
+
224
+ # A hash of custom request headers
225
+ def request_headers
226
+ @agent.request_headers
227
+ end
228
+
229
+ def request_headers= request_headers
230
+ @agent.request_headers = request_headers
231
+ end
232
+
233
+ # Proxy settings
234
+ attr_reader :proxy_addr
235
+ attr_reader :proxy_pass
236
+ attr_reader :proxy_port
237
+ attr_reader :proxy_user
238
+
239
+ # The HTML parser to be used when parsing documents
240
+ attr_accessor :html_parser
241
+
242
+ attr_reader :agent # :nodoc:
243
+
244
+ def history
245
+ @agent.history
246
+ end
247
+
248
+ attr_reader :pluggable_parser
249
+
250
+ # A list of hooks to call after retrieving a response. Hooks are called with
251
+ # the agent and the response returned.
252
+
253
+ def post_connect_hooks
254
+ @agent.post_connect_hooks
255
+ end
256
+
257
+ # A list of hooks to call before making a request. Hooks are called with
258
+ # the agent and the request to be performed.
259
+
260
+ def pre_connect_hooks
261
+ @agent.pre_connect_hooks
262
+ end
263
+
264
+ alias follow_redirect? redirect_ok
265
+
266
+ @html_parser = Nokogiri::HTML
267
+ class << self
268
+ attr_accessor :html_parser, :log
269
+
270
+ def inherited(child)
271
+ child.html_parser ||= html_parser
272
+ child.log ||= log
273
+ super
274
+ end
275
+ end
276
+
277
+ # A default encoding name used when parsing HTML parsing. When set it is
278
+ # used after any other encoding. The default is nil.
279
+
280
+ attr_accessor :default_encoding
281
+
282
+ # Overrides the encodings given by the HTTP server and the HTML page with
283
+ # the default_encoding when set to true.
284
+ attr_accessor :force_default_encoding
285
+
286
+ def initialize
287
+ @agent = Mechanize::HTTP::Agent.new
288
+ @agent.context = self
289
+
290
+ # attr_accessors
291
+ @agent.user_agent = AGENT_ALIASES['Mechanize']
292
+ @watch_for_set = nil
293
+ @history_added = nil
294
+
295
+ # attr_readers
296
+ @pluggable_parser = PluggableParser.new
297
+
298
+ @keep_alive = true
299
+ @keep_alive_time = 0
300
+
301
+ # Proxy
302
+ @proxy_addr = nil
303
+ @proxy_port = nil
304
+ @proxy_user = nil
305
+ @proxy_pass = nil
306
+
307
+ @html_parser = self.class.html_parser
308
+
309
+ @default_encoding = nil
310
+ @force_default_encoding = false
311
+
312
+ yield self if block_given?
313
+
314
+ @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
315
+ @agent.set_http
316
+ end
317
+
318
+ def max_history
319
+ @agent.history.max_size
320
+ end
321
+
322
+ def max_history= length
323
+ @agent.history.max_size = length
324
+ end
325
+
326
+ def log=(l); self.class.log = l end
327
+ def log; self.class.log end
328
+
329
+ def user_agent= user_agent
330
+ @agent.user_agent = user_agent
331
+ end
332
+
333
+ # Set the user agent for the Mechanize object. See AGENT_ALIASES
334
+ def user_agent_alias=(al)
335
+ self.user_agent = AGENT_ALIASES[al] ||
336
+ raise(ArgumentError, "unknown agent alias #{al.inspect}")
337
+ end
338
+
339
+ # Returns a list of cookies stored in the cookie jar.
340
+ def cookies
341
+ @agent.cookie_jar.to_a
342
+ end
343
+
344
+ # Sets the user and password to be used for authentication.
345
+ def auth(user, password)
346
+ @agent.user = user
347
+ @agent.password = password
348
+ end
349
+
350
+ alias :basic_auth :auth
351
+
352
+ # Fetches the URL passed in and returns a page.
353
+ def get(uri, parameters = [], referer = nil, headers = {})
354
+ method = :get
355
+
356
+ if Hash === uri then
357
+ options = uri
358
+ location = Gem.location_of_caller.join ':'
359
+ warn "#{location}: Mechanize#get with options hash is deprecated and will be removed October 2011"
360
+
361
+ raise ArgumentError, "url must be specified" unless uri = options[:url]
362
+ parameters = options[:params] || []
363
+ referer = options[:referer]
364
+ headers = options[:headers]
365
+ method = options[:verb] || method
366
+ end
367
+
368
+ referer ||=
369
+ if uri.to_s =~ %r{\Ahttps?://}
370
+ Page.new(nil, {'content-type'=>'text/html'})
371
+ else
372
+ current_page || Page.new(nil, {'content-type'=>'text/html'})
373
+ end
374
+
375
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
376
+ # refactor everything to pass around URIs but still support
377
+ # Mechanize::Page#base
378
+ unless referer.is_a?(Mechanize::File)
379
+ referer = referer.is_a?(String) ?
380
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
381
+ Page.new(referer, {'content-type' => 'text/html'})
382
+ end
383
+
384
+ # fetch the page
385
+ headers ||= {}
386
+ page = @agent.fetch uri, method, headers, parameters, referer
387
+ add_to_history(page)
388
+ yield page if block_given?
389
+ page
390
+ end
391
+
392
+ ##
393
+ # PUT to +url+ with +entity+, and setting +headers+:
394
+ #
395
+ # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
396
+ #
397
+ def put(url, entity, headers = {})
398
+ request_with_entity(:put, url, entity, headers)
399
+ end
400
+
401
+ ##
402
+ # DELETE to +url+ with +query_params+, and setting +headers+:
403
+ #
404
+ # delete('http://example/', {'q' => 'foo'}, {})
405
+ #
406
+ def delete(uri, query_params = {}, headers = {})
407
+ page = @agent.fetch(uri, :delete, headers, query_params)
408
+ add_to_history(page)
409
+ page
410
+ end
411
+
412
+ ##
413
+ # HEAD to +url+ with +query_params+, and setting +headers+:
414
+ #
415
+ # head('http://example/', {'q' => 'foo'}, {})
416
+ #
417
+ def head(uri, query_params = {}, headers = {})
418
+ # fetch the page
419
+ page = @agent.fetch(uri, :head, headers, query_params)
420
+ yield page if block_given?
421
+ page
422
+ end
423
+
424
+ # Fetch a file and return the contents of the file.
425
+ def get_file(url)
426
+ get(url).body
427
+ end
428
+
429
+ # If the parameter is a string, finds the button or link with the
430
+ # value of the string and clicks it. Otherwise, clicks the
431
+ # Mechanize::Page::Link object passed in. Returns the page fetched.
432
+ def click(link)
433
+ case link
434
+ when Page::Link
435
+ referer = link.page || current_page()
436
+ if @agent.robots
437
+ if (referer.is_a?(Page) && referer.parser.nofollow?) || link.rel?('nofollow')
438
+ raise RobotsDisallowedError.new(link.href)
439
+ end
440
+ end
441
+ if link.rel?('noreferrer')
442
+ href = @agent.resolve(link.href, link.page || current_page)
443
+ referer = Page.new(nil, {'content-type'=>'text/html'})
444
+ else
445
+ href = link.href
446
+ end
447
+ get href, [], referer
448
+ when String, Regexp
449
+ if real_link = page.link_with(:text => link)
450
+ click real_link
451
+ else
452
+ button = nil
453
+ form = page.forms.find do |f|
454
+ button = f.button_with(:value => link)
455
+ button.is_a? Form::Submit
456
+ end
457
+ submit form, button if form
458
+ end
459
+ else
460
+ referer = current_page()
461
+ href = link.respond_to?(:href) ? link.href :
462
+ (link['href'] || link['src'])
463
+ get href, [], referer
464
+ end
465
+ end
466
+
467
+ # Equivalent to the browser back button. Returns the most recent page
468
+ # visited.
469
+ def back
470
+ @agent.history.pop
471
+ end
472
+
473
+ # Posts to the given URL with the request entity. The request
474
+ # entity is specified by either a string, or a list of key-value
475
+ # pairs represented by a hash or an array of arrays.
476
+ #
477
+ # Examples:
478
+ # agent.post('http://example.com/', "foo" => "bar")
479
+ #
480
+ # agent.post('http://example.com/', [ ["foo", "bar"] ])
481
+ #
482
+ # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
483
+ def post(url, query={}, headers={})
484
+ if query.is_a?(String)
485
+ return request_with_entity(:post, url, query, headers)
486
+ end
487
+ node = {}
488
+ # Create a fake form
489
+ class << node
490
+ def search(*args); []; end
491
+ end
492
+ node['method'] = 'POST'
493
+ node['enctype'] = 'application/x-www-form-urlencoded'
494
+
495
+ form = Form.new(node)
496
+
497
+ query.each { |k, v|
498
+ if v.is_a?(IO)
499
+ form.enctype = 'multipart/form-data'
500
+ ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
501
+ ul.file_data = v.read
502
+ form.file_uploads << ul
503
+ else
504
+ form.fields << Form::Field.new({'name' => k.to_s},v)
505
+ end
506
+ }
507
+ post_form(url, form, headers)
508
+ end
509
+
510
+ # Submit a form with an optional button.
511
+ # Without a button:
512
+ # page = agent.get('http://example.com')
513
+ # agent.submit(page.forms.first)
514
+ # With a button
515
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
516
+ def submit(form, button=nil, headers={})
517
+ form.add_button_to_query(button) if button
518
+ case form.method.upcase
519
+ when 'POST'
520
+ post_form(form.action, form, headers)
521
+ when 'GET'
522
+ get(form.action.gsub(/\?[^\?]*$/, ''),
523
+ form.build_query,
524
+ form.page,
525
+ headers)
526
+ else
527
+ raise ArgumentError, "unsupported method: #{form.method.upcase}"
528
+ end
529
+ end
530
+
531
+ def request_with_entity(verb, uri, entity, headers = {})
532
+ cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
533
+
534
+ headers = {
535
+ 'Content-Type' => 'application/octet-stream',
536
+ 'Content-Length' => entity.size.to_s,
537
+ }.update headers
538
+
539
+ page = @agent.fetch uri, verb, headers, [entity], cur_page
540
+ add_to_history(page)
541
+ page
542
+ end
543
+
544
+ # Returns the current page loaded by Mechanize
545
+ def current_page
546
+ @agent.current_page
547
+ end
548
+
549
+ # Returns a visited page for the url passed in, otherwise nil
550
+ def visited_page(url)
551
+ url = url.href if url.respond_to? :href
552
+
553
+ @agent.visited_page url
554
+ end
555
+
556
+ # Returns whether or not a url has been visited
557
+ alias visited? visited_page
558
+
559
+ def parse uri, response, body
560
+ content_type = nil
561
+
562
+ unless response['Content-Type'].nil?
563
+ data, = response['Content-Type'].split ';', 2
564
+ content_type, = data.downcase.split ',', 2 unless data.nil?
565
+ end
566
+
567
+ # Find our pluggable parser
568
+ parser_klass = @pluggable_parser.parser content_type
569
+
570
+ parser_klass.new uri, response, body, response.code do |parser|
571
+ parser.mech = self if parser.respond_to? :mech=
572
+
573
+ parser.watch_for_set = @watch_for_set if
574
+ @watch_for_set and parser.respond_to?(:watch_for_set=)
575
+ end
576
+ end
577
+
578
+ ##
579
+ # Sets the proxy +address+ at +port+ with an optional +user+ and +password+
580
+
581
+ def set_proxy address, port, user = nil, password = nil
582
+ @proxy_addr = address
583
+ @proxy_port = port
584
+ @proxy_user = user
585
+ @proxy_pass = password
586
+
587
+ @agent.set_proxy address, port, user, password
588
+ @agent.set_http
589
+ end
590
+
591
+ # Runs given block, then resets the page history as it was before. self is
592
+ # given as a parameter to the block. Returns the value of the block.
593
+ def transact
594
+ history_backup = @agent.history.dup
595
+ begin
596
+ yield self
597
+ ensure
598
+ @agent.history = history_backup
599
+ end
600
+ end
601
+
602
+ def robots
603
+ @agent.robots
604
+ end
605
+
606
+ def robots= enabled
607
+ @agent.robots = enabled
608
+ end
609
+
610
+ alias :page :current_page
611
+
612
+ private
613
+
614
+ def post_form(uri, form, headers = {})
615
+ cur_page = form.page || current_page ||
616
+ Page.new(nil, {'content-type'=>'text/html'})
617
+
618
+ request_data = form.request_data
619
+
620
+ log.debug("query: #{ request_data.inspect }") if log
621
+
622
+ headers = {
623
+ 'Content-Type' => form.enctype,
624
+ 'Content-Length' => request_data.size.to_s,
625
+ }.merge headers
626
+
627
+ # fetch the page
628
+ page = @agent.fetch uri, :post, headers, [request_data], cur_page
629
+ add_to_history(page)
630
+ page
631
+ end
632
+
633
+ def add_to_history(page)
634
+ @agent.history.push(page, @agent.resolve(page.uri))
635
+ @history_added.call(page) if @history_added
636
+ end
637
+
638
+ end
639
+
640
+ require 'mechanize/content_type_error'
641
+ require 'mechanize/cookie'
642
+ require 'mechanize/cookie_jar'
643
+ require 'mechanize/file'
644
+ require 'mechanize/file_connection'
645
+ require 'mechanize/file_request'
646
+ require 'mechanize/file_response'
647
+ require 'mechanize/form'
648
+ require 'mechanize/history'
649
+ require 'mechanize/http'
650
+ require 'mechanize/http/agent'
651
+ require 'mechanize/page'
652
+ require 'mechanize/inspect'
653
+ require 'mechanize/monkey_patch'
654
+ require 'mechanize/pluggable_parsers'
655
+ require 'mechanize/redirect_limit_reached_error'
656
+ require 'mechanize/redirect_not_get_or_head_error'
657
+ require 'mechanize/response_code_error'
658
+ require 'mechanize/response_read_error'
659
+ require 'mechanize/robots_disallowed_error'
660
+ require 'mechanize/unsupported_scheme_error'
661
+ require 'mechanize/util'
662
+