neocoin-mechanize 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. data/.autotest +6 -0
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.rdoc +638 -0
  4. data/EXAMPLES.rdoc +187 -0
  5. data/FAQ.rdoc +11 -0
  6. data/GUIDE.rdoc +163 -0
  7. data/LICENSE.rdoc +20 -0
  8. data/Manifest.txt +172 -0
  9. data/README.rdoc +63 -0
  10. data/Rakefile +36 -0
  11. data/examples/flickr_upload.rb +22 -0
  12. data/examples/mech-dump.rb +5 -0
  13. data/examples/proxy_req.rb +7 -0
  14. data/examples/rubyforge.rb +20 -0
  15. data/examples/spider.rb +21 -0
  16. data/lib/mechanize.rb +662 -0
  17. data/lib/mechanize/content_type_error.rb +14 -0
  18. data/lib/mechanize/cookie.rb +85 -0
  19. data/lib/mechanize/cookie_jar.rb +241 -0
  20. data/lib/mechanize/element_matcher.rb +35 -0
  21. data/lib/mechanize/file.rb +80 -0
  22. data/lib/mechanize/file_connection.rb +17 -0
  23. data/lib/mechanize/file_request.rb +26 -0
  24. data/lib/mechanize/file_response.rb +74 -0
  25. data/lib/mechanize/file_saver.rb +37 -0
  26. data/lib/mechanize/form.rb +478 -0
  27. data/lib/mechanize/form/button.rb +9 -0
  28. data/lib/mechanize/form/check_box.rb +11 -0
  29. data/lib/mechanize/form/field.rb +44 -0
  30. data/lib/mechanize/form/file_upload.rb +23 -0
  31. data/lib/mechanize/form/image_button.rb +20 -0
  32. data/lib/mechanize/form/multi_select_list.rb +83 -0
  33. data/lib/mechanize/form/option.rb +49 -0
  34. data/lib/mechanize/form/radio_button.rb +48 -0
  35. data/lib/mechanize/form/select_list.rb +40 -0
  36. data/lib/mechanize/headers.rb +25 -0
  37. data/lib/mechanize/history.rb +83 -0
  38. data/lib/mechanize/http.rb +3 -0
  39. data/lib/mechanize/http/agent.rb +738 -0
  40. data/lib/mechanize/inspect.rb +88 -0
  41. data/lib/mechanize/monkey_patch.rb +37 -0
  42. data/lib/mechanize/page.rb +408 -0
  43. data/lib/mechanize/page/base.rb +8 -0
  44. data/lib/mechanize/page/frame.rb +27 -0
  45. data/lib/mechanize/page/image.rb +30 -0
  46. data/lib/mechanize/page/label.rb +20 -0
  47. data/lib/mechanize/page/link.rb +82 -0
  48. data/lib/mechanize/page/meta_refresh.rb +56 -0
  49. data/lib/mechanize/pluggable_parsers.rb +101 -0
  50. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  51. data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
  52. data/lib/mechanize/response_code_error.rb +22 -0
  53. data/lib/mechanize/response_read_error.rb +27 -0
  54. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  55. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  56. data/lib/mechanize/util.rb +113 -0
  57. data/test/data/htpasswd +1 -0
  58. data/test/data/server.crt +16 -0
  59. data/test/data/server.csr +12 -0
  60. data/test/data/server.key +15 -0
  61. data/test/data/server.pem +15 -0
  62. data/test/helper.rb +175 -0
  63. data/test/htdocs/alt_text.html +10 -0
  64. data/test/htdocs/bad_form_test.html +9 -0
  65. data/test/htdocs/button.jpg +0 -0
  66. data/test/htdocs/canonical_uri.html +9 -0
  67. data/test/htdocs/dir with spaces/foo.html +1 -0
  68. data/test/htdocs/empty_form.html +6 -0
  69. data/test/htdocs/file_upload.html +26 -0
  70. data/test/htdocs/find_link.html +41 -0
  71. data/test/htdocs/form_multi_select.html +16 -0
  72. data/test/htdocs/form_multival.html +37 -0
  73. data/test/htdocs/form_no_action.html +18 -0
  74. data/test/htdocs/form_no_input_name.html +16 -0
  75. data/test/htdocs/form_select.html +16 -0
  76. data/test/htdocs/form_select_all.html +16 -0
  77. data/test/htdocs/form_select_none.html +17 -0
  78. data/test/htdocs/form_select_noopts.html +10 -0
  79. data/test/htdocs/form_set_fields.html +14 -0
  80. data/test/htdocs/form_test.html +188 -0
  81. data/test/htdocs/frame_referer_test.html +10 -0
  82. data/test/htdocs/frame_test.html +30 -0
  83. data/test/htdocs/google.html +13 -0
  84. data/test/htdocs/iframe_test.html +16 -0
  85. data/test/htdocs/index.html +6 -0
  86. data/test/htdocs/link with space.html +5 -0
  87. data/test/htdocs/meta_cookie.html +11 -0
  88. data/test/htdocs/no_title_test.html +6 -0
  89. data/test/htdocs/nofollow.html +9 -0
  90. data/test/htdocs/noindex.html +9 -0
  91. data/test/htdocs/norobots.html +8 -0
  92. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  93. data/test/htdocs/rel_nofollow.html +8 -0
  94. data/test/htdocs/relative/tc_relative_links.html +21 -0
  95. data/test/htdocs/robots.html +8 -0
  96. data/test/htdocs/robots.txt +2 -0
  97. data/test/htdocs/tc_bad_charset.html +9 -0
  98. data/test/htdocs/tc_bad_links.html +5 -0
  99. data/test/htdocs/tc_base_images.html +10 -0
  100. data/test/htdocs/tc_base_link.html +8 -0
  101. data/test/htdocs/tc_blank_form.html +11 -0
  102. data/test/htdocs/tc_charset.html +6 -0
  103. data/test/htdocs/tc_checkboxes.html +19 -0
  104. data/test/htdocs/tc_encoded_links.html +5 -0
  105. data/test/htdocs/tc_field_precedence.html +11 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_images.html +8 -0
  109. data/test/htdocs/tc_links.html +18 -0
  110. data/test/htdocs/tc_meta_in_body.html +9 -0
  111. data/test/htdocs/tc_no_attributes.html +16 -0
  112. data/test/htdocs/tc_pretty_print.html +17 -0
  113. data/test/htdocs/tc_radiobuttons.html +17 -0
  114. data/test/htdocs/tc_referer.html +16 -0
  115. data/test/htdocs/tc_relative_links.html +19 -0
  116. data/test/htdocs/tc_textarea.html +23 -0
  117. data/test/htdocs/test_bad_encoding.html +52 -0
  118. data/test/htdocs/test_click.html +11 -0
  119. data/test/htdocs/unusual______.html +5 -0
  120. data/test/servlets.rb +402 -0
  121. data/test/ssl_server.rb +48 -0
  122. data/test/test_cookies.rb +129 -0
  123. data/test/test_form_action.rb +52 -0
  124. data/test/test_form_as_hash.rb +59 -0
  125. data/test/test_form_button.rb +46 -0
  126. data/test/test_frames.rb +34 -0
  127. data/test/test_headers.rb +33 -0
  128. data/test/test_history.rb +118 -0
  129. data/test/test_history_added.rb +16 -0
  130. data/test/test_html_unscape_forms.rb +46 -0
  131. data/test/test_if_modified_since.rb +20 -0
  132. data/test/test_images.rb +19 -0
  133. data/test/test_mechanize.rb +842 -0
  134. data/test/test_mechanize_cookie.rb +345 -0
  135. data/test/test_mechanize_cookie_jar.rb +401 -0
  136. data/test/test_mechanize_file.rb +53 -0
  137. data/test/test_mechanize_file_request.rb +19 -0
  138. data/test/test_mechanize_file_response.rb +21 -0
  139. data/test/test_mechanize_form.rb +576 -0
  140. data/test/test_mechanize_form_check_box.rb +37 -0
  141. data/test/test_mechanize_form_encoding.rb +120 -0
  142. data/test/test_mechanize_form_field.rb +21 -0
  143. data/test/test_mechanize_form_image_button.rb +12 -0
  144. data/test/test_mechanize_form_textarea.rb +51 -0
  145. data/test/test_mechanize_http_agent.rb +697 -0
  146. data/test/test_mechanize_link.rb +84 -0
  147. data/test/test_mechanize_page_encoding.rb +147 -0
  148. data/test/test_mechanize_page_link.rb +382 -0
  149. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  150. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  151. data/test/test_mechanize_subclass.rb +22 -0
  152. data/test/test_mechanize_util.rb +92 -0
  153. data/test/test_multi_select.rb +118 -0
  154. data/test/test_no_attributes.rb +13 -0
  155. data/test/test_option.rb +18 -0
  156. data/test/test_pluggable_parser.rb +136 -0
  157. data/test/test_post_form.rb +37 -0
  158. data/test/test_pretty_print.rb +22 -0
  159. data/test/test_radiobutton.rb +75 -0
  160. data/test/test_redirect_limit_reached.rb +39 -0
  161. data/test/test_referer.rb +81 -0
  162. data/test/test_relative_links.rb +40 -0
  163. data/test/test_request.rb +13 -0
  164. data/test/test_response_code.rb +53 -0
  165. data/test/test_robots.rb +72 -0
  166. data/test/test_save_file.rb +48 -0
  167. data/test/test_scheme.rb +48 -0
  168. data/test/test_select.rb +119 -0
  169. data/test/test_select_all.rb +15 -0
  170. data/test/test_select_none.rb +15 -0
  171. data/test/test_select_noopts.rb +18 -0
  172. data/test/test_set_fields.rb +44 -0
  173. data/test/test_ssl_server.rb +20 -0
  174. metadata +354 -0
data/README.rdoc ADDED
@@ -0,0 +1,63 @@
1
+ = Mechanize
2
+
3
+ * http://mechanize.rubyforge.org
4
+ * http://github.com/tenderlove/mechanize/tree/master
5
+
6
+ == DESCRIPTION
7
+
8
+ The Mechanize library is used for automating interaction with websites.
9
+ Mechanize automatically stores and sends cookies, follows redirects,
10
+ can follow links, and submit forms. Form fields can be populated and
11
+ submitted. Mechanize also keeps track of the sites that you have visited as
12
+ a history.
13
+
14
+ == Dependencies
15
+
16
+ * ruby 1.8.7
17
+ * nokogiri[http://nokogiri.rubyforge.org]
18
+
19
+ == SUPPORT:
20
+
21
+ The mechanize mailing list is available here:
22
+
23
+ * http://rubyforge.org/mailman/listinfo/mechanize-users
24
+
25
+ The bug tracker is available here:
26
+
27
+ * http://github.com/tenderlove/mechanize/issues
28
+
29
+ == Examples
30
+
31
+ If you are just starting, check out the GUIDE.
32
+ Also, check out the EXAMPLES file.
33
+
34
+ == Authors
35
+
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
+
38
+ Copyright (c) 2006-2011:
39
+
40
+ * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
+ * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
+
43
+ Copyright (c) 2011:
44
+
45
+ * {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net)
46
+
47
+ This library comes with a shameless plug for employing me
48
+ (Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language!
49
+
50
+ == Acknowledgments
51
+
52
+ This library was heavily influenced by its namesake in the perl world. A big
53
+ thanks goes to Andy Lester (andy@petdance.com), the author of the original
54
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
55
+
56
+ Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
57
+ who's helped out in various ways. Finally, thank you to the people using this
58
+ library!
59
+
60
+ == License
61
+
62
+ This library is distributed under the MIT license. Please see the LICENSE file.
63
+
data/Rakefile ADDED
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ Hoe.plugin :git
5
+ Hoe.plugin :minitest
6
+
7
+ Hoe.spec 'neocoin-mechanize' do
8
+ developer 'Eric Hodel', 'drbrain@segment7.net'
9
+ developer 'Aaron Patterson', 'aaronp@rubyforge.org'
10
+ developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
11
+
12
+ self.readme_file = 'README.rdoc'
13
+ self.history_file = 'CHANGELOG.rdoc'
14
+ self.extra_rdoc_files += Dir['*.rdoc']
15
+
16
+ rdoc_locations << 'drbrain@rubyforge.org:/var/www/gforge-projects/mechanize/'
17
+
18
+ self.extra_deps << ['nokogiri', '~> 1.4']
19
+ self.extra_deps << ['net-http-persistent', '~> 1.8']
20
+ self.extra_deps << ['net-http-digest_auth', '~> 1.1', '>= 1.1.1']
21
+ self.extra_deps << ['webrobots', '~> 0.0', '>= 0.0.9']
22
+
23
+ self.spec_extras[:required_ruby_version] = '>= 1.8.7'
24
+ end
25
+
26
+ desc "Update SSL Certificate"
27
+ task('ssl_cert') do |p|
28
+ sh "openssl genrsa -des3 -out server.key 1024"
29
+ sh "openssl req -new -key server.key -out server.csr"
30
+ sh "cp server.key server.key.org"
31
+ sh "openssl rsa -in server.key.org -out server.key"
32
+ sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
33
+ sh "cp server.key server.pem"
34
+ sh "mv server.key server.csr server.crt server.pem test/data/"
35
+ sh "rm server.key.org"
36
+ end
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+
6
+ # Get the flickr sign in page
7
+ page = agent.get 'http://flickr.com/signin/flickr/'
8
+
9
+ # Fill out the login form
10
+ form = page.form_with :name => 'flickrloginform'
11
+ form.email = ARGV[0]
12
+ form.password = ARGV[1]
13
+ form.submit
14
+
15
+ # Go to the upload page
16
+ page = page.link_with(:text => 'Upload').click
17
+
18
+ # Fill out the form
19
+ form = page.forms.action('/photos_upload_process.gne').first
20
+ form.file_uploads.name('file1').first.file_name = ARGV[2]
21
+ form.submit
22
+
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ puts agent.get(ARGV[0]).inspect
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ agent.set_proxy('localhost', '8000')
6
+ page = agent.get(ARGV[0])
7
+ puts page.body
@@ -0,0 +1,20 @@
1
+ # This example logs a user in to rubyforge and prints out the body of the
2
+ # page after logging the user in.
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+ require 'logger'
6
+
7
+ # Create a new mechanize object
8
+ agent = Mechanize.new { |a| a.log = Logger.new(STDERR) }
9
+
10
+ # Load the rubyforge website
11
+ page = agent.get('http://rubyforge.org/')
12
+ page = agent.click page.link_with(:text => /Log In/) # Click the login link
13
+ form = page.forms[1] # Select the first form
14
+ form.form_loginname = ARGV[0]
15
+ form.form_pw = ARGV[1]
16
+
17
+ # Submit the form
18
+ page = form.submit form.buttons.first
19
+
20
+ puts page.body # Print out the body
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ stack = agent.get(ARGV[0]).links
6
+
7
+ while l = stack.pop
8
+ next unless l.uri
9
+ host = l.uri.host
10
+ next unless host.nil? or host == agent.history.first.uri.host
11
+ next if agent.visited? l.href
12
+
13
+ puts "crawling #{l.uri}"
14
+ begin
15
+ page = l.click
16
+ next unless Mechanize::Page === page
17
+ stack.push(*page.links)
18
+ rescue Mechanize::ResponseCodeError
19
+ end
20
+ end
21
+
data/lib/mechanize.rb ADDED
@@ -0,0 +1,662 @@
1
+ require 'fileutils'
2
+ require 'forwardable'
3
+ require 'iconv' if RUBY_VERSION < '1.9.2'
4
+ require 'mutex_m'
5
+ require 'net/http/digest_auth'
6
+ require 'net/http/persistent'
7
+ require 'nkf'
8
+ require 'nokogiri'
9
+ require 'openssl'
10
+ require 'stringio'
11
+ require 'uri'
12
+ require 'webrick/httputils'
13
+ require 'zlib'
14
+
15
+ # = Synopsis
16
+ # The Mechanize library is used for automating interaction with a website. It
17
+ # can follow links, and submit forms. Form fields can be populated and
18
+ # submitted. A history of URL's is maintained and can be queried.
19
+ #
20
+ # == Example
21
+ # require 'rubygems'
22
+ # require 'mechanize'
23
+ # require 'logger'
24
+ #
25
+ # agent = Mechanize.new { |a| a.log = Logger.new("mech.log") }
26
+ # agent.user_agent_alias = 'Mac Safari'
27
+ # page = agent.get("http://www.google.com/")
28
+ # search_form = page.form_with(:name => "f")
29
+ # search_form.field_with(:name => "q").value = "Hello"
30
+ # search_results = agent.submit(search_form)
31
+ # puts search_results.body
32
+ class Mechanize
33
+
34
+ ##
35
+ # The version of Mechanize you are using.
36
+ VERSION = '2.0.2'
37
+
38
+ class Error < RuntimeError
39
+ end
40
+
41
+ ruby_version = if RUBY_PATCHLEVEL >= 0 then
42
+ "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
43
+ else
44
+ "#{RUBY_VERSION}dev#{RUBY_REVISION}"
45
+ end
46
+
47
+ # HTTP/1.1 keep-alives are always active. This does nothing.
48
+ attr_accessor :keep_alive
49
+
50
+ # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
51
+ # now uses net-http-persistent which only supports HTTP/1.1 persistent
52
+ # connections
53
+ attr_accessor :keep_alive_time
54
+
55
+ ##
56
+ # User Agent aliases
57
+
58
+ AGENT_ALIASES = {
59
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
60
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
61
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
62
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
63
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
64
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
65
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
66
+ 'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
67
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
68
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
69
+ 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
70
+ }
71
+
72
+ # A Mechanize::CookieJar which stores cookies
73
+
74
+ def cookie_jar
75
+ @agent.cookie_jar
76
+ end
77
+
78
+ def cookie_jar= cookie_jar
79
+ @agent.cookie_jar = cookie_jar
80
+ end
81
+
82
+ # Length of time to wait until a connection is opened in seconds
83
+ def open_timeout
84
+ @agent.open_timeout
85
+ end
86
+
87
+ def open_timeout= open_timeout
88
+ @agent.open_timeout = open_timeout
89
+ end
90
+
91
+ # Length of time to attempt to read data from the server
92
+ def read_timeout
93
+ @agent.read_timeout
94
+ end
95
+
96
+ def read_timeout= read_timeout
97
+ @agent.read_timeout = read_timeout
98
+ end
99
+
100
+ # The identification string for the client initiating a web request
101
+ def user_agent
102
+ @agent.user_agent
103
+ end
104
+
105
+ # The value of watch_for_set is passed to pluggable parsers for retrieved
106
+ # content
107
+ attr_accessor :watch_for_set
108
+
109
+ # Path to an OpenSSL server certificate file
110
+ def ca_file
111
+ @agent.ca_file
112
+ end
113
+
114
+ def ca_file= ca_file
115
+ @agent.ca_file = ca_file
116
+ end
117
+
118
+ def certificate
119
+ @agent.certificate
120
+ end
121
+
122
+ # An OpenSSL private key or the path to a private key
123
+ def key
124
+ @agent.key
125
+ end
126
+
127
+ def key= key
128
+ @agent.key = key
129
+ end
130
+
131
+ # An OpenSSL client certificate or the path to a certificate file.
132
+ def cert
133
+ @agent.cert
134
+ end
135
+
136
+ def cert= cert
137
+ @agent.cert = cert
138
+ end
139
+
140
+ # OpenSSL key password
141
+ def pass
142
+ @agent.pass
143
+ end
144
+
145
+ def pass= pass
146
+ @agent.pass = pass
147
+ end
148
+
149
+ # Controls how this agent deals with redirects. The following values are
150
+ # allowed:
151
+ #
152
+ # :all, true:: All 3xx redirects are followed (default)
153
+ # :permanent:: Only 301 Moved Permanantly redirects are followed
154
+ # false:: No redirects are followed
155
+
156
+ def redirect_ok
157
+ @agent.redirect_ok
158
+ end
159
+
160
+ def redirect_ok= follow
161
+ @agent.redirect_ok = follow
162
+ end
163
+
164
+ def gzip_enabled
165
+ @agent.gzip_enabled
166
+ end
167
+
168
+ # Disables HTTP/1.1 gzip compression (enabled by default)
169
+ def gzip_enabled=enabled
170
+ @agent.gzip_enabled = enabled
171
+ end
172
+
173
+ def conditional_requests
174
+ @agent.conditional_requests
175
+ end
176
+
177
+ # Disables If-Modified-Since conditional requests (enabled by default)
178
+ def conditional_requests= enabled
179
+ @agent.conditional_requests = enabled
180
+ end
181
+
182
+ # Follow HTML meta refresh. If set to +:anywhere+ meta refresh tags outside
183
+ # of the head element will be followed.
184
+ def follow_meta_refresh
185
+ @agent.follow_meta_refresh
186
+ end
187
+
188
+ def follow_meta_refresh= follow
189
+ @agent.follow_meta_refresh = follow
190
+ end
191
+
192
+ # A callback for additional certificate verification. See
193
+ # OpenSSL::SSL::SSLContext#verify_callback
194
+ #
195
+ # The callback can be used for debugging or to ignore errors by always
196
+ # returning +true+. Specifying nil uses the default method that was valid
197
+ # when the SSLContext was created
198
+ def verify_callback
199
+ @agent.verify_callback
200
+ end
201
+
202
+ def verify_callback= verify_callback
203
+ @agent.verify_callback = verify_callback
204
+ end
205
+
206
+ attr_accessor :history_added
207
+
208
+ def redirection_limit
209
+ @agent.redirection_limit
210
+ end
211
+
212
+ def redirection_limit= limit
213
+ @agent.redirection_limit = limit
214
+ end
215
+
216
+ def scheme_handlers
217
+ @agent.scheme_handlers
218
+ end
219
+
220
+ def scheme_handlers= scheme_handlers
221
+ @agent.scheme_handlers = scheme_handlers
222
+ end
223
+
224
+ # A hash of custom request headers
225
+ def request_headers
226
+ @agent.request_headers
227
+ end
228
+
229
+ def request_headers= request_headers
230
+ @agent.request_headers = request_headers
231
+ end
232
+
233
+ # Proxy settings
234
+ attr_reader :proxy_addr
235
+ attr_reader :proxy_pass
236
+ attr_reader :proxy_port
237
+ attr_reader :proxy_user
238
+
239
+ # The HTML parser to be used when parsing documents
240
+ attr_accessor :html_parser
241
+
242
+ attr_reader :agent # :nodoc:
243
+
244
+ def history
245
+ @agent.history
246
+ end
247
+
248
+ attr_reader :pluggable_parser
249
+
250
+ # A list of hooks to call after retrieving a response. Hooks are called with
251
+ # the agent and the response returned.
252
+
253
+ def post_connect_hooks
254
+ @agent.post_connect_hooks
255
+ end
256
+
257
+ # A list of hooks to call before making a request. Hooks are called with
258
+ # the agent and the request to be performed.
259
+
260
+ def pre_connect_hooks
261
+ @agent.pre_connect_hooks
262
+ end
263
+
264
+ alias follow_redirect? redirect_ok
265
+
266
+ @html_parser = Nokogiri::HTML
267
+ class << self
268
+ attr_accessor :html_parser, :log
269
+
270
+ def inherited(child)
271
+ child.html_parser ||= html_parser
272
+ child.log ||= log
273
+ super
274
+ end
275
+ end
276
+
277
+ # A default encoding name used when parsing HTML parsing. When set it is
278
+ # used after any other encoding. The default is nil.
279
+
280
+ attr_accessor :default_encoding
281
+
282
+ # Overrides the encodings given by the HTTP server and the HTML page with
283
+ # the default_encoding when set to true.
284
+ attr_accessor :force_default_encoding
285
+
286
+ def initialize
287
+ @agent = Mechanize::HTTP::Agent.new
288
+ @agent.context = self
289
+
290
+ # attr_accessors
291
+ @agent.user_agent = AGENT_ALIASES['Mechanize']
292
+ @watch_for_set = nil
293
+ @history_added = nil
294
+
295
+ # attr_readers
296
+ @pluggable_parser = PluggableParser.new
297
+
298
+ @keep_alive = true
299
+ @keep_alive_time = 0
300
+
301
+ # Proxy
302
+ @proxy_addr = nil
303
+ @proxy_port = nil
304
+ @proxy_user = nil
305
+ @proxy_pass = nil
306
+
307
+ @html_parser = self.class.html_parser
308
+
309
+ @default_encoding = nil
310
+ @force_default_encoding = false
311
+
312
+ yield self if block_given?
313
+
314
+ @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
315
+ @agent.set_http
316
+ end
317
+
318
+ def max_history
319
+ @agent.history.max_size
320
+ end
321
+
322
+ def max_history= length
323
+ @agent.history.max_size = length
324
+ end
325
+
326
+ def log=(l); self.class.log = l end
327
+ def log; self.class.log end
328
+
329
+ def user_agent= user_agent
330
+ @agent.user_agent = user_agent
331
+ end
332
+
333
+ # Set the user agent for the Mechanize object. See AGENT_ALIASES
334
+ def user_agent_alias=(al)
335
+ self.user_agent = AGENT_ALIASES[al] ||
336
+ raise(ArgumentError, "unknown agent alias #{al.inspect}")
337
+ end
338
+
339
+ # Returns a list of cookies stored in the cookie jar.
340
+ def cookies
341
+ @agent.cookie_jar.to_a
342
+ end
343
+
344
+ # Sets the user and password to be used for authentication.
345
+ def auth(user, password)
346
+ @agent.user = user
347
+ @agent.password = password
348
+ end
349
+
350
+ alias :basic_auth :auth
351
+
352
+ # Fetches the URL passed in and returns a page.
353
+ def get(uri, parameters = [], referer = nil, headers = {})
354
+ method = :get
355
+
356
+ if Hash === uri then
357
+ options = uri
358
+ location = Gem.location_of_caller.join ':'
359
+ warn "#{location}: Mechanize#get with options hash is deprecated and will be removed October 2011"
360
+
361
+ raise ArgumentError, "url must be specified" unless uri = options[:url]
362
+ parameters = options[:params] || []
363
+ referer = options[:referer]
364
+ headers = options[:headers]
365
+ method = options[:verb] || method
366
+ end
367
+
368
+ referer ||=
369
+ if uri.to_s =~ %r{\Ahttps?://}
370
+ Page.new(nil, {'content-type'=>'text/html'})
371
+ else
372
+ current_page || Page.new(nil, {'content-type'=>'text/html'})
373
+ end
374
+
375
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
376
+ # refactor everything to pass around URIs but still support
377
+ # Mechanize::Page#base
378
+ unless referer.is_a?(Mechanize::File)
379
+ referer = referer.is_a?(String) ?
380
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
381
+ Page.new(referer, {'content-type' => 'text/html'})
382
+ end
383
+
384
+ # fetch the page
385
+ headers ||= {}
386
+ page = @agent.fetch uri, method, headers, parameters, referer
387
+ add_to_history(page)
388
+ yield page if block_given?
389
+ page
390
+ end
391
+
392
+ ##
393
+ # PUT to +url+ with +entity+, and setting +headers+:
394
+ #
395
+ # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
396
+ #
397
+ def put(url, entity, headers = {})
398
+ request_with_entity(:put, url, entity, headers)
399
+ end
400
+
401
+ ##
402
+ # DELETE to +url+ with +query_params+, and setting +headers+:
403
+ #
404
+ # delete('http://example/', {'q' => 'foo'}, {})
405
+ #
406
+ def delete(uri, query_params = {}, headers = {})
407
+ page = @agent.fetch(uri, :delete, headers, query_params)
408
+ add_to_history(page)
409
+ page
410
+ end
411
+
412
+ ##
413
+ # HEAD to +url+ with +query_params+, and setting +headers+:
414
+ #
415
+ # head('http://example/', {'q' => 'foo'}, {})
416
+ #
417
+ def head(uri, query_params = {}, headers = {})
418
+ # fetch the page
419
+ page = @agent.fetch(uri, :head, headers, query_params)
420
+ yield page if block_given?
421
+ page
422
+ end
423
+
424
+ # Fetch a file and return the contents of the file.
425
+ def get_file(url)
426
+ get(url).body
427
+ end
428
+
429
+ # If the parameter is a string, finds the button or link with the
430
+ # value of the string and clicks it. Otherwise, clicks the
431
+ # Mechanize::Page::Link object passed in. Returns the page fetched.
432
+ def click(link)
433
+ case link
434
+ when Page::Link
435
+ referer = link.page || current_page()
436
+ if @agent.robots
437
+ if (referer.is_a?(Page) && referer.parser.nofollow?) || link.rel?('nofollow')
438
+ raise RobotsDisallowedError.new(link.href)
439
+ end
440
+ end
441
+ if link.rel?('noreferrer')
442
+ href = @agent.resolve(link.href, link.page || current_page)
443
+ referer = Page.new(nil, {'content-type'=>'text/html'})
444
+ else
445
+ href = link.href
446
+ end
447
+ get href, [], referer
448
+ when String, Regexp
449
+ if real_link = page.link_with(:text => link)
450
+ click real_link
451
+ else
452
+ button = nil
453
+ form = page.forms.find do |f|
454
+ button = f.button_with(:value => link)
455
+ button.is_a? Form::Submit
456
+ end
457
+ submit form, button if form
458
+ end
459
+ else
460
+ referer = current_page()
461
+ href = link.respond_to?(:href) ? link.href :
462
+ (link['href'] || link['src'])
463
+ get href, [], referer
464
+ end
465
+ end
466
+
467
+ # Equivalent to the browser back button. Returns the most recent page
468
+ # visited.
469
+ def back
470
+ @agent.history.pop
471
+ end
472
+
473
+ # Posts to the given URL with the request entity. The request
474
+ # entity is specified by either a string, or a list of key-value
475
+ # pairs represented by a hash or an array of arrays.
476
+ #
477
+ # Examples:
478
+ # agent.post('http://example.com/', "foo" => "bar")
479
+ #
480
+ # agent.post('http://example.com/', [ ["foo", "bar"] ])
481
+ #
482
+ # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
483
+ def post(url, query={}, headers={})
484
+ if query.is_a?(String)
485
+ return request_with_entity(:post, url, query, headers)
486
+ end
487
+ node = {}
488
+ # Create a fake form
489
+ class << node
490
+ def search(*args); []; end
491
+ end
492
+ node['method'] = 'POST'
493
+ node['enctype'] = 'application/x-www-form-urlencoded'
494
+
495
+ form = Form.new(node)
496
+
497
+ query.each { |k, v|
498
+ if v.is_a?(IO)
499
+ form.enctype = 'multipart/form-data'
500
+ ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
501
+ ul.file_data = v.read
502
+ form.file_uploads << ul
503
+ else
504
+ form.fields << Form::Field.new({'name' => k.to_s},v)
505
+ end
506
+ }
507
+ post_form(url, form, headers)
508
+ end
509
+
510
+ # Submit a form with an optional button.
511
+ # Without a button:
512
+ # page = agent.get('http://example.com')
513
+ # agent.submit(page.forms.first)
514
+ # With a button
515
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
516
+ def submit(form, button=nil, headers={})
517
+ form.add_button_to_query(button) if button
518
+ case form.method.upcase
519
+ when 'POST'
520
+ post_form(form.action, form, headers)
521
+ when 'GET'
522
+ get(form.action.gsub(/\?[^\?]*$/, ''),
523
+ form.build_query,
524
+ form.page,
525
+ headers)
526
+ else
527
+ raise ArgumentError, "unsupported method: #{form.method.upcase}"
528
+ end
529
+ end
530
+
531
+ def request_with_entity(verb, uri, entity, headers = {})
532
+ cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
533
+
534
+ headers = {
535
+ 'Content-Type' => 'application/octet-stream',
536
+ 'Content-Length' => entity.size.to_s,
537
+ }.update headers
538
+
539
+ page = @agent.fetch uri, verb, headers, [entity], cur_page
540
+ add_to_history(page)
541
+ page
542
+ end
543
+
544
+ # Returns the current page loaded by Mechanize
545
+ def current_page
546
+ @agent.current_page
547
+ end
548
+
549
+ # Returns a visited page for the url passed in, otherwise nil
550
+ def visited_page(url)
551
+ url = url.href if url.respond_to? :href
552
+
553
+ @agent.visited_page url
554
+ end
555
+
556
+ # Returns whether or not a url has been visited
557
+ alias visited? visited_page
558
+
559
+ def parse uri, response, body
560
+ content_type = nil
561
+
562
+ unless response['Content-Type'].nil?
563
+ data, = response['Content-Type'].split ';', 2
564
+ content_type, = data.downcase.split ',', 2 unless data.nil?
565
+ end
566
+
567
+ # Find our pluggable parser
568
+ parser_klass = @pluggable_parser.parser content_type
569
+
570
+ parser_klass.new uri, response, body, response.code do |parser|
571
+ parser.mech = self if parser.respond_to? :mech=
572
+
573
+ parser.watch_for_set = @watch_for_set if
574
+ @watch_for_set and parser.respond_to?(:watch_for_set=)
575
+ end
576
+ end
577
+
578
+ ##
579
+ # Sets the proxy +address+ at +port+ with an optional +user+ and +password+
580
+
581
+ def set_proxy address, port, user = nil, password = nil
582
+ @proxy_addr = address
583
+ @proxy_port = port
584
+ @proxy_user = user
585
+ @proxy_pass = password
586
+
587
+ @agent.set_proxy address, port, user, password
588
+ @agent.set_http
589
+ end
590
+
591
+ # Runs given block, then resets the page history as it was before. self is
592
+ # given as a parameter to the block. Returns the value of the block.
593
+ def transact
594
+ history_backup = @agent.history.dup
595
+ begin
596
+ yield self
597
+ ensure
598
+ @agent.history = history_backup
599
+ end
600
+ end
601
+
602
+ def robots
603
+ @agent.robots
604
+ end
605
+
606
+ def robots= enabled
607
+ @agent.robots = enabled
608
+ end
609
+
610
+ alias :page :current_page
611
+
612
+ private
613
+
614
+ def post_form(uri, form, headers = {})
615
+ cur_page = form.page || current_page ||
616
+ Page.new(nil, {'content-type'=>'text/html'})
617
+
618
+ request_data = form.request_data
619
+
620
+ log.debug("query: #{ request_data.inspect }") if log
621
+
622
+ headers = {
623
+ 'Content-Type' => form.enctype,
624
+ 'Content-Length' => request_data.size.to_s,
625
+ }.merge headers
626
+
627
+ # fetch the page
628
+ page = @agent.fetch uri, :post, headers, [request_data], cur_page
629
+ add_to_history(page)
630
+ page
631
+ end
632
+
633
+ def add_to_history(page)
634
+ @agent.history.push(page, @agent.resolve(page.uri))
635
+ @history_added.call(page) if @history_added
636
+ end
637
+
638
+ end
639
+
640
+ require 'mechanize/content_type_error'
641
+ require 'mechanize/cookie'
642
+ require 'mechanize/cookie_jar'
643
+ require 'mechanize/file'
644
+ require 'mechanize/file_connection'
645
+ require 'mechanize/file_request'
646
+ require 'mechanize/file_response'
647
+ require 'mechanize/form'
648
+ require 'mechanize/history'
649
+ require 'mechanize/http'
650
+ require 'mechanize/http/agent'
651
+ require 'mechanize/page'
652
+ require 'mechanize/inspect'
653
+ require 'mechanize/monkey_patch'
654
+ require 'mechanize/pluggable_parsers'
655
+ require 'mechanize/redirect_limit_reached_error'
656
+ require 'mechanize/redirect_not_get_or_head_error'
657
+ require 'mechanize/response_code_error'
658
+ require 'mechanize/response_read_error'
659
+ require 'mechanize/robots_disallowed_error'
660
+ require 'mechanize/unsupported_scheme_error'
661
+ require 'mechanize/util'
662
+