aai10-mechanize 2.0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. data/.autotest +6 -0
  2. data/.gitignore +9 -0
  3. data/CHANGELOG.rdoc +652 -0
  4. data/EXAMPLES.rdoc +187 -0
  5. data/FAQ.rdoc +11 -0
  6. data/GUIDE.rdoc +163 -0
  7. data/LICENSE.rdoc +20 -0
  8. data/Manifest.txt +172 -0
  9. data/README.rdoc +63 -0
  10. data/Rakefile +36 -0
  11. data/aai10-mechanize.gemspec +20 -0
  12. data/examples/flickr_upload.rb +22 -0
  13. data/examples/mech-dump.rb +5 -0
  14. data/examples/proxy_req.rb +7 -0
  15. data/examples/rubyforge.rb +20 -0
  16. data/examples/spider.rb +21 -0
  17. data/lib/mechanize.rb +664 -0
  18. data/lib/mechanize/content_type_error.rb +14 -0
  19. data/lib/mechanize/cookie.rb +116 -0
  20. data/lib/mechanize/cookie_jar.rb +202 -0
  21. data/lib/mechanize/element_matcher.rb +35 -0
  22. data/lib/mechanize/file.rb +80 -0
  23. data/lib/mechanize/file_connection.rb +17 -0
  24. data/lib/mechanize/file_request.rb +26 -0
  25. data/lib/mechanize/file_response.rb +74 -0
  26. data/lib/mechanize/file_saver.rb +37 -0
  27. data/lib/mechanize/form.rb +478 -0
  28. data/lib/mechanize/form/button.rb +9 -0
  29. data/lib/mechanize/form/check_box.rb +11 -0
  30. data/lib/mechanize/form/field.rb +44 -0
  31. data/lib/mechanize/form/file_upload.rb +23 -0
  32. data/lib/mechanize/form/image_button.rb +20 -0
  33. data/lib/mechanize/form/multi_select_list.rb +83 -0
  34. data/lib/mechanize/form/option.rb +49 -0
  35. data/lib/mechanize/form/radio_button.rb +48 -0
  36. data/lib/mechanize/form/select_list.rb +40 -0
  37. data/lib/mechanize/headers.rb +25 -0
  38. data/lib/mechanize/history.rb +83 -0
  39. data/lib/mechanize/http.rb +3 -0
  40. data/lib/mechanize/http/agent.rb +738 -0
  41. data/lib/mechanize/inspect.rb +88 -0
  42. data/lib/mechanize/monkey_patch.rb +37 -0
  43. data/lib/mechanize/page.rb +408 -0
  44. data/lib/mechanize/page/base.rb +8 -0
  45. data/lib/mechanize/page/frame.rb +27 -0
  46. data/lib/mechanize/page/image.rb +30 -0
  47. data/lib/mechanize/page/label.rb +20 -0
  48. data/lib/mechanize/page/link.rb +82 -0
  49. data/lib/mechanize/page/meta_refresh.rb +56 -0
  50. data/lib/mechanize/pluggable_parsers.rb +101 -0
  51. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  52. data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
  53. data/lib/mechanize/response_code_error.rb +22 -0
  54. data/lib/mechanize/response_read_error.rb +27 -0
  55. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  57. data/lib/mechanize/util.rb +113 -0
  58. data/test/data/htpasswd +1 -0
  59. data/test/data/server.crt +16 -0
  60. data/test/data/server.csr +12 -0
  61. data/test/data/server.key +15 -0
  62. data/test/data/server.pem +15 -0
  63. data/test/helper.rb +175 -0
  64. data/test/htdocs/alt_text.html +10 -0
  65. data/test/htdocs/bad_form_test.html +9 -0
  66. data/test/htdocs/button.jpg +0 -0
  67. data/test/htdocs/canonical_uri.html +9 -0
  68. data/test/htdocs/dir with spaces/foo.html +1 -0
  69. data/test/htdocs/empty_form.html +6 -0
  70. data/test/htdocs/file_upload.html +26 -0
  71. data/test/htdocs/find_link.html +41 -0
  72. data/test/htdocs/form_multi_select.html +16 -0
  73. data/test/htdocs/form_multival.html +37 -0
  74. data/test/htdocs/form_no_action.html +18 -0
  75. data/test/htdocs/form_no_input_name.html +16 -0
  76. data/test/htdocs/form_select.html +16 -0
  77. data/test/htdocs/form_select_all.html +16 -0
  78. data/test/htdocs/form_select_none.html +17 -0
  79. data/test/htdocs/form_select_noopts.html +10 -0
  80. data/test/htdocs/form_set_fields.html +14 -0
  81. data/test/htdocs/form_test.html +188 -0
  82. data/test/htdocs/frame_referer_test.html +10 -0
  83. data/test/htdocs/frame_test.html +30 -0
  84. data/test/htdocs/google.html +13 -0
  85. data/test/htdocs/iframe_test.html +16 -0
  86. data/test/htdocs/index.html +6 -0
  87. data/test/htdocs/link with space.html +5 -0
  88. data/test/htdocs/meta_cookie.html +11 -0
  89. data/test/htdocs/no_title_test.html +6 -0
  90. data/test/htdocs/nofollow.html +9 -0
  91. data/test/htdocs/noindex.html +9 -0
  92. data/test/htdocs/norobots.html +8 -0
  93. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  94. data/test/htdocs/rel_nofollow.html +8 -0
  95. data/test/htdocs/relative/tc_relative_links.html +21 -0
  96. data/test/htdocs/robots.html +8 -0
  97. data/test/htdocs/robots.txt +2 -0
  98. data/test/htdocs/tc_bad_charset.html +9 -0
  99. data/test/htdocs/tc_bad_links.html +5 -0
  100. data/test/htdocs/tc_base_images.html +10 -0
  101. data/test/htdocs/tc_base_link.html +8 -0
  102. data/test/htdocs/tc_blank_form.html +11 -0
  103. data/test/htdocs/tc_charset.html +6 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_field_precedence.html +11 -0
  107. data/test/htdocs/tc_follow_meta.html +8 -0
  108. data/test/htdocs/tc_form_action.html +48 -0
  109. data/test/htdocs/tc_images.html +8 -0
  110. data/test/htdocs/tc_links.html +18 -0
  111. data/test/htdocs/tc_meta_in_body.html +9 -0
  112. data/test/htdocs/tc_no_attributes.html +16 -0
  113. data/test/htdocs/tc_pretty_print.html +17 -0
  114. data/test/htdocs/tc_radiobuttons.html +17 -0
  115. data/test/htdocs/tc_referer.html +16 -0
  116. data/test/htdocs/tc_relative_links.html +19 -0
  117. data/test/htdocs/tc_textarea.html +23 -0
  118. data/test/htdocs/test_bad_encoding.html +52 -0
  119. data/test/htdocs/test_click.html +11 -0
  120. data/test/htdocs/unusual______.html +5 -0
  121. data/test/servlets.rb +402 -0
  122. data/test/ssl_server.rb +48 -0
  123. data/test/test_cookies.rb +129 -0
  124. data/test/test_form_action.rb +52 -0
  125. data/test/test_form_as_hash.rb +59 -0
  126. data/test/test_form_button.rb +46 -0
  127. data/test/test_frames.rb +34 -0
  128. data/test/test_headers.rb +33 -0
  129. data/test/test_history.rb +118 -0
  130. data/test/test_history_added.rb +16 -0
  131. data/test/test_html_unscape_forms.rb +46 -0
  132. data/test/test_if_modified_since.rb +20 -0
  133. data/test/test_images.rb +19 -0
  134. data/test/test_mechanize.rb +852 -0
  135. data/test/test_mechanize_cookie.rb +345 -0
  136. data/test/test_mechanize_cookie_jar.rb +433 -0
  137. data/test/test_mechanize_file.rb +53 -0
  138. data/test/test_mechanize_file_request.rb +19 -0
  139. data/test/test_mechanize_file_response.rb +21 -0
  140. data/test/test_mechanize_form.rb +576 -0
  141. data/test/test_mechanize_form_check_box.rb +37 -0
  142. data/test/test_mechanize_form_encoding.rb +120 -0
  143. data/test/test_mechanize_form_field.rb +21 -0
  144. data/test/test_mechanize_form_image_button.rb +12 -0
  145. data/test/test_mechanize_form_textarea.rb +51 -0
  146. data/test/test_mechanize_http_agent.rb +697 -0
  147. data/test/test_mechanize_link.rb +84 -0
  148. data/test/test_mechanize_page_encoding.rb +147 -0
  149. data/test/test_mechanize_page_link.rb +382 -0
  150. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  151. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  152. data/test/test_mechanize_subclass.rb +22 -0
  153. data/test/test_mechanize_util.rb +92 -0
  154. data/test/test_multi_select.rb +118 -0
  155. data/test/test_no_attributes.rb +13 -0
  156. data/test/test_option.rb +18 -0
  157. data/test/test_pluggable_parser.rb +136 -0
  158. data/test/test_post_form.rb +37 -0
  159. data/test/test_pretty_print.rb +22 -0
  160. data/test/test_radiobutton.rb +75 -0
  161. data/test/test_redirect_limit_reached.rb +39 -0
  162. data/test/test_redirect_ok.rb +25 -0
  163. data/test/test_referer.rb +81 -0
  164. data/test/test_relative_links.rb +40 -0
  165. data/test/test_request.rb +13 -0
  166. data/test/test_response_code.rb +53 -0
  167. data/test/test_robots.rb +72 -0
  168. data/test/test_save_file.rb +48 -0
  169. data/test/test_scheme.rb +48 -0
  170. data/test/test_select.rb +119 -0
  171. data/test/test_select_all.rb +15 -0
  172. data/test/test_select_none.rb +15 -0
  173. data/test/test_select_noopts.rb +18 -0
  174. data/test/test_set_fields.rb +44 -0
  175. data/test/test_ssl_server.rb +20 -0
  176. metadata +360 -0
@@ -0,0 +1,63 @@
1
+ = Mechanize
2
+
3
+ * http://mechanize.rubyforge.org
4
+ * http://github.com/tenderlove/mechanize/tree/master
5
+
6
+ == DESCRIPTION
7
+
8
+ The Mechanize library is used for automating interaction with websites.
9
+ Mechanize automatically stores and sends cookies, follows redirects,
10
+ can follow links, and submit forms. Form fields can be populated and
11
+ submitted. Mechanize also keeps track of the sites that you have visited as
12
+ a history.
13
+
14
+ == Dependencies
15
+
16
+ * ruby 1.8.7
17
+ * nokogiri[http://nokogiri.rubyforge.org]
18
+
19
+ == SUPPORT:
20
+
21
+ The mechanize mailing list is available here:
22
+
23
+ * http://rubyforge.org/mailman/listinfo/mechanize-users
24
+
25
+ The bug tracker is available here:
26
+
27
+ * http://github.com/tenderlove/mechanize/issues
28
+
29
+ == Examples
30
+
31
+ If you are just starting, check out the GUIDE.
32
+ Also, check out the EXAMPLES file.
33
+
34
+ == Authors
35
+
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
+
38
+ Copyright (c) 2006-2011:
39
+
40
+ * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
+ * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
+
43
+ Copyright (c) 2011:
44
+
45
+ * {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net)
46
+
47
+ This library comes with a shameless plug for employing me
48
+ (Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language!
49
+
50
+ == Acknowledgments
51
+
52
+ This library was heavily influenced by its namesake in the perl world. A big
53
+ thanks goes to Andy Lester (andy@petdance.com), the author of the original
54
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize/]. Ruby Mechanize would not be around without you!
55
+
56
+ Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
57
+ who's helped out in various ways. Finally, thank you to the people using this
58
+ library!
59
+
60
+ == License
61
+
62
+ This library is distributed under the MIT license. Please see the LICENSE file.
63
+
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ Hoe.plugin :git
5
+ Hoe.plugin :minitest
6
+
7
+ Hoe.spec 'mechanize' do
8
+ developer 'Eric Hodel', 'drbrain@segment7.net'
9
+ developer 'Aaron Patterson', 'aaronp@rubyforge.org'
10
+ developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
11
+
12
+ self.readme_file = 'README.rdoc'
13
+ self.history_file = 'CHANGELOG.rdoc'
14
+ self.extra_rdoc_files += Dir['*.rdoc']
15
+
16
+ rdoc_locations << 'drbrain@rubyforge.org:/var/www/gforge-projects/mechanize/'
17
+
18
+ self.extra_deps << ['nokogiri', '~> 1.4']
19
+ self.extra_deps << ['net-http-persistent', '~> 1.8']
20
+ self.extra_deps << ['net-http-digest_auth', '~> 1.1', '>= 1.1.1']
21
+ self.extra_deps << ['webrobots', '~> 0.0', '>= 0.0.9']
22
+
23
+ self.spec_extras[:required_ruby_version] = '>= 1.8.7'
24
+ end
25
+
26
+ desc "Update SSL Certificate"
27
+ task('ssl_cert') do |p|
28
+ sh "openssl genrsa -des3 -out server.key 1024"
29
+ sh "openssl req -new -key server.key -out server.csr"
30
+ sh "cp server.key server.key.org"
31
+ sh "openssl rsa -in server.key.org -out server.key"
32
+ sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
33
+ sh "cp server.key server.pem"
34
+ sh "mv server.key server.csr server.crt server.pem test/data/"
35
+ sh "rm server.key.org"
36
+ end
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "aai10-mechanize"
6
+ s.version = "2.0.1.0"
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ["Alexey Aleksandrov"]
9
+ s.email = ["aai10@mail.msiu.ru "]
10
+ s.homepage = ""
11
+ s.summary = %q{Mechanize Bug Fix}
12
+ s.description = %q{Fix error in mechanize}
13
+
14
+ s.rubyforge_project = "aai10-mechanize"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+ end
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+
6
+ # Get the flickr sign in page
7
+ page = agent.get 'http://flickr.com/signin/flickr/'
8
+
9
+ # Fill out the login form
10
+ form = page.form_with :name => 'flickrloginform'
11
+ form.email = ARGV[0]
12
+ form.password = ARGV[1]
13
+ form.submit
14
+
15
+ # Go to the upload page
16
+ page = page.link_with(:text => 'Upload').click
17
+
18
+ # Fill out the form
19
+ form = page.forms.action('/photos_upload_process.gne').first
20
+ form.file_uploads.name('file1').first.file_name = ARGV[2]
21
+ form.submit
22
+
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ puts agent.get(ARGV[0]).inspect
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ agent.set_proxy('localhost', '8000')
6
+ page = agent.get(ARGV[0])
7
+ puts page.body
@@ -0,0 +1,20 @@
1
+ # This example logs a user in to rubyforge and prints out the body of the
2
+ # page after logging the user in.
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+ require 'logger'
6
+
7
+ # Create a new mechanize object
8
+ agent = Mechanize.new { |a| a.log = Logger.new(STDERR) }
9
+
10
+ # Load the rubyforge website
11
+ page = agent.get('http://rubyforge.org/')
12
+ page = agent.click page.link_with(:text => /Log In/) # Click the login link
13
+ form = page.forms[1] # Select the first form
14
+ form.form_loginname = ARGV[0]
15
+ form.form_pw = ARGV[1]
16
+
17
+ # Submit the form
18
+ page = form.submit form.buttons.first
19
+
20
+ puts page.body # Print out the body
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ stack = agent.get(ARGV[0]).links
6
+
7
+ while l = stack.pop
8
+ next unless l.uri
9
+ host = l.uri.host
10
+ next unless host.nil? or host == agent.history.first.uri.host
11
+ next if agent.visited? l.href
12
+
13
+ puts "crawling #{l.uri}"
14
+ begin
15
+ page = l.click
16
+ next unless Mechanize::Page === page
17
+ stack.push(*page.links)
18
+ rescue Mechanize::ResponseCodeError
19
+ end
20
+ end
21
+
@@ -0,0 +1,664 @@
1
+ require 'fileutils'
2
+ require 'forwardable'
3
+ require 'iconv' if RUBY_VERSION < '1.9.2'
4
+ require 'mutex_m'
5
+ require 'net/http/digest_auth'
6
+ require 'net/http/persistent'
7
+ require 'nkf'
8
+ require 'nokogiri'
9
+ require 'openssl'
10
+ require 'stringio'
11
+ require 'uri'
12
+ require 'webrick/httputils'
13
+ require 'zlib'
14
+
15
+ # = Synopsis
16
+ # The Mechanize library is used for automating interaction with a website. It
17
+ # can follow links, and submit forms. Form fields can be populated and
18
+ # submitted. A history of URL's is maintained and can be queried.
19
+ #
20
+ # == Example
21
+ # require 'rubygems'
22
+ # require 'mechanize'
23
+ # require 'logger'
24
+ #
25
+ # agent = Mechanize.new { |a| a.log = Logger.new("mech.log") }
26
+ # agent.user_agent_alias = 'Mac Safari'
27
+ # page = agent.get("http://www.google.com/")
28
+ # search_form = page.form_with(:name => "f")
29
+ # search_form.field_with(:name => "q").value = "Hello"
30
+ # search_results = agent.submit(search_form)
31
+ # puts search_results.body
32
+ class Mechanize
33
+
34
+ ##
35
+ # The version of Mechanize you are using.
36
+ VERSION = '2.0.2'
37
+
38
+ class Error < RuntimeError
39
+ end
40
+
41
+ ruby_version = if RUBY_PATCHLEVEL >= 0 then
42
+ "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
43
+ else
44
+ "#{RUBY_VERSION}dev#{RUBY_REVISION}"
45
+ end
46
+
47
+ # HTTP/1.1 keep-alives are always active. This does nothing.
48
+ attr_accessor :keep_alive
49
+
50
+ # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
51
+ # now uses net-http-persistent which only supports HTTP/1.1 persistent
52
+ # connections
53
+ attr_accessor :keep_alive_time
54
+
55
+ ##
56
+ # User Agent aliases
57
+
58
+ AGENT_ALIASES = {
59
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
60
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
61
+ 'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
62
+ 'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
63
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
64
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
65
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
66
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
67
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
68
+ 'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
69
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
70
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
71
+ 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
72
+ }
73
+
74
+ # A Mechanize::CookieJar which stores cookies
75
+
76
+ def cookie_jar
77
+ @agent.cookie_jar
78
+ end
79
+
80
+ def cookie_jar= cookie_jar
81
+ @agent.cookie_jar = cookie_jar
82
+ end
83
+
84
+ # Length of time to wait until a connection is opened in seconds
85
+ def open_timeout
86
+ @agent.open_timeout
87
+ end
88
+
89
+ def open_timeout= open_timeout
90
+ @agent.open_timeout = open_timeout
91
+ end
92
+
93
+ # Length of time to attempt to read data from the server
94
+ def read_timeout
95
+ @agent.read_timeout
96
+ end
97
+
98
+ def read_timeout= read_timeout
99
+ @agent.read_timeout = read_timeout
100
+ end
101
+
102
+ # The identification string for the client initiating a web request
103
+ def user_agent
104
+ @agent.user_agent
105
+ end
106
+
107
+ # The value of watch_for_set is passed to pluggable parsers for retrieved
108
+ # content
109
+ attr_accessor :watch_for_set
110
+
111
+ # Path to an OpenSSL server certificate file
112
+ def ca_file
113
+ @agent.ca_file
114
+ end
115
+
116
+ def ca_file= ca_file
117
+ @agent.ca_file = ca_file
118
+ end
119
+
120
+ def certificate
121
+ @agent.certificate
122
+ end
123
+
124
+ # An OpenSSL private key or the path to a private key
125
+ def key
126
+ @agent.key
127
+ end
128
+
129
+ def key= key
130
+ @agent.key = key
131
+ end
132
+
133
+ # An OpenSSL client certificate or the path to a certificate file.
134
+ def cert
135
+ @agent.cert
136
+ end
137
+
138
+ def cert= cert
139
+ @agent.cert = cert
140
+ end
141
+
142
+ # OpenSSL key password
143
+ def pass
144
+ @agent.pass
145
+ end
146
+
147
+ def pass= pass
148
+ @agent.pass = pass
149
+ end
150
+
151
+ # Controls how this agent deals with redirects. The following values are
152
+ # allowed:
153
+ #
154
+ # :all, true:: All 3xx redirects are followed (default)
155
+ # :permanent:: Only 301 Moved Permanantly redirects are followed
156
+ # false:: No redirects are followed
157
+
158
+ def redirect_ok
159
+ @agent.redirect_ok
160
+ end
161
+
162
+ def redirect_ok= follow
163
+ @agent.redirect_ok = follow
164
+ end
165
+
166
+ def gzip_enabled
167
+ @agent.gzip_enabled
168
+ end
169
+
170
+ # Disables HTTP/1.1 gzip compression (enabled by default)
171
+ def gzip_enabled=enabled
172
+ @agent.gzip_enabled = enabled
173
+ end
174
+
175
+ def conditional_requests
176
+ @agent.conditional_requests
177
+ end
178
+
179
+ # Disables If-Modified-Since conditional requests (enabled by default)
180
+ def conditional_requests= enabled
181
+ @agent.conditional_requests = enabled
182
+ end
183
+
184
+ # Follow HTML meta refresh. If set to +:anywhere+ meta refresh tags outside
185
+ # of the head element will be followed.
186
+ def follow_meta_refresh
187
+ @agent.follow_meta_refresh
188
+ end
189
+
190
+ def follow_meta_refresh= follow
191
+ @agent.follow_meta_refresh = follow
192
+ end
193
+
194
+ # A callback for additional certificate verification. See
195
+ # OpenSSL::SSL::SSLContext#verify_callback
196
+ #
197
+ # The callback can be used for debugging or to ignore errors by always
198
+ # returning +true+. Specifying nil uses the default method that was valid
199
+ # when the SSLContext was created
200
+ def verify_callback
201
+ @agent.verify_callback
202
+ end
203
+
204
+ def verify_callback= verify_callback
205
+ @agent.verify_callback = verify_callback
206
+ end
207
+
208
+ attr_accessor :history_added
209
+
210
+ def redirection_limit
211
+ @agent.redirection_limit
212
+ end
213
+
214
+ def redirection_limit= limit
215
+ @agent.redirection_limit = limit
216
+ end
217
+
218
+ def scheme_handlers
219
+ @agent.scheme_handlers
220
+ end
221
+
222
+ def scheme_handlers= scheme_handlers
223
+ @agent.scheme_handlers = scheme_handlers
224
+ end
225
+
226
+ # A hash of custom request headers
227
+ def request_headers
228
+ @agent.request_headers
229
+ end
230
+
231
+ def request_headers= request_headers
232
+ @agent.request_headers = request_headers
233
+ end
234
+
235
+ # Proxy settings
236
+ attr_reader :proxy_addr
237
+ attr_reader :proxy_pass
238
+ attr_reader :proxy_port
239
+ attr_reader :proxy_user
240
+
241
+ # The HTML parser to be used when parsing documents
242
+ attr_accessor :html_parser
243
+
244
+ attr_reader :agent # :nodoc:
245
+
246
+ def history
247
+ @agent.history
248
+ end
249
+
250
+ attr_reader :pluggable_parser
251
+
252
+ # A list of hooks to call after retrieving a response. Hooks are called with
253
+ # the agent and the response returned.
254
+
255
+ def post_connect_hooks
256
+ @agent.post_connect_hooks
257
+ end
258
+
259
+ # A list of hooks to call before making a request. Hooks are called with
260
+ # the agent and the request to be performed.
261
+
262
+ def pre_connect_hooks
263
+ @agent.pre_connect_hooks
264
+ end
265
+
266
+ alias follow_redirect? redirect_ok
267
+
268
+ @html_parser = Nokogiri::HTML
269
+ class << self
270
+ attr_accessor :html_parser, :log
271
+
272
+ def inherited(child)
273
+ child.html_parser ||= html_parser
274
+ child.log ||= log
275
+ super
276
+ end
277
+ end
278
+
279
+ # A default encoding name used when parsing HTML parsing. When set it is
280
+ # used after any other encoding. The default is nil.
281
+
282
+ attr_accessor :default_encoding
283
+
284
+ # Overrides the encodings given by the HTTP server and the HTML page with
285
+ # the default_encoding when set to true.
286
+ attr_accessor :force_default_encoding
287
+
288
+ def initialize
289
+ @agent = Mechanize::HTTP::Agent.new
290
+ @agent.context = self
291
+
292
+ # attr_accessors
293
+ @agent.user_agent = AGENT_ALIASES['Mechanize']
294
+ @watch_for_set = nil
295
+ @history_added = nil
296
+
297
+ # attr_readers
298
+ @pluggable_parser = PluggableParser.new
299
+
300
+ @keep_alive = true
301
+ @keep_alive_time = 0
302
+
303
+ # Proxy
304
+ @proxy_addr = nil
305
+ @proxy_port = nil
306
+ @proxy_user = nil
307
+ @proxy_pass = nil
308
+
309
+ @html_parser = self.class.html_parser
310
+
311
+ @default_encoding = nil
312
+ @force_default_encoding = false
313
+
314
+ yield self if block_given?
315
+
316
+ @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
317
+ @agent.set_http
318
+ end
319
+
320
+ def max_history
321
+ @agent.history.max_size
322
+ end
323
+
324
+ def max_history= length
325
+ @agent.history.max_size = length
326
+ end
327
+
328
+ def log=(l); Mechanize.log = l end
329
+ def log; Mechanize.log end
330
+
331
+ def user_agent= user_agent
332
+ @agent.user_agent = user_agent
333
+ end
334
+
335
+ # Set the user agent for the Mechanize object. See AGENT_ALIASES
336
+ def user_agent_alias=(al)
337
+ self.user_agent = AGENT_ALIASES[al] ||
338
+ raise(ArgumentError, "unknown agent alias #{al.inspect}")
339
+ end
340
+
341
+ # Returns a list of cookies stored in the cookie jar.
342
+ def cookies
343
+ @agent.cookie_jar.to_a
344
+ end
345
+
346
+ # Sets the user and password to be used for authentication.
347
+ def auth(user, password)
348
+ @agent.user = user
349
+ @agent.password = password
350
+ end
351
+
352
+ alias :basic_auth :auth
353
+
354
+ # Fetches the URL passed in and returns a page.
355
+ def get(uri, parameters = [], referer = nil, headers = {})
356
+ method = :get
357
+
358
+ if Hash === uri then
359
+ options = uri
360
+ location = Gem.location_of_caller.join ':'
361
+ warn "#{location}: Mechanize#get with options hash is deprecated and will be removed October 2011"
362
+
363
+ raise ArgumentError, "url must be specified" unless uri = options[:url]
364
+ parameters = options[:params] || []
365
+ referer = options[:referer]
366
+ headers = options[:headers]
367
+ method = options[:verb] || method
368
+ end
369
+
370
+ referer ||=
371
+ if uri.to_s =~ %r{\Ahttps?://}
372
+ Page.new(nil, {'content-type'=>'text/html'})
373
+ else
374
+ current_page || Page.new(nil, {'content-type'=>'text/html'})
375
+ end
376
+
377
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
378
+ # refactor everything to pass around URIs but still support
379
+ # Mechanize::Page#base
380
+ unless referer.is_a?(Mechanize::File)
381
+ referer = referer.is_a?(String) ?
382
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
383
+ Page.new(referer, {'content-type' => 'text/html'})
384
+ end
385
+
386
+ # fetch the page
387
+ headers ||= {}
388
+ page = @agent.fetch uri, method, headers, parameters, referer
389
+ add_to_history(page)
390
+ yield page if block_given?
391
+ page
392
+ end
393
+
394
+ ##
395
+ # PUT to +url+ with +entity+, and setting +headers+:
396
+ #
397
+ # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
398
+ #
399
+ def put(url, entity, headers = {})
400
+ request_with_entity(:put, url, entity, headers)
401
+ end
402
+
403
+ ##
404
+ # DELETE to +url+ with +query_params+, and setting +headers+:
405
+ #
406
+ # delete('http://example/', {'q' => 'foo'}, {})
407
+ #
408
+ def delete(uri, query_params = {}, headers = {})
409
+ page = @agent.fetch(uri, :delete, headers, query_params)
410
+ add_to_history(page)
411
+ page
412
+ end
413
+
414
+ ##
415
+ # HEAD to +url+ with +query_params+, and setting +headers+:
416
+ #
417
+ # head('http://example/', {'q' => 'foo'}, {})
418
+ #
419
+ def head(uri, query_params = {}, headers = {})
420
+ # fetch the page
421
+ page = @agent.fetch(uri, :head, headers, query_params)
422
+ yield page if block_given?
423
+ page
424
+ end
425
+
426
+ # Fetch a file and return the contents of the file.
427
+ def get_file(url)
428
+ get(url).body
429
+ end
430
+
431
+ # If the parameter is a string, finds the button or link with the
432
+ # value of the string and clicks it. Otherwise, clicks the
433
+ # Mechanize::Page::Link object passed in. Returns the page fetched.
434
+ def click(link)
435
+ case link
436
+ when Page::Link
437
+ referer = link.page || current_page()
438
+ if @agent.robots
439
+ if (referer.is_a?(Page) && referer.parser.nofollow?) || link.rel?('nofollow')
440
+ raise RobotsDisallowedError.new(link.href)
441
+ end
442
+ end
443
+ if link.rel?('noreferrer')
444
+ href = @agent.resolve(link.href, link.page || current_page)
445
+ referer = Page.new(nil, {'content-type'=>'text/html'})
446
+ else
447
+ href = link.href
448
+ end
449
+ get href, [], referer
450
+ when String, Regexp
451
+ if real_link = page.link_with(:text => link)
452
+ click real_link
453
+ else
454
+ button = nil
455
+ form = page.forms.find do |f|
456
+ button = f.button_with(:value => link)
457
+ button.is_a? Form::Submit
458
+ end
459
+ submit form, button if form
460
+ end
461
+ else
462
+ referer = current_page()
463
+ href = link.respond_to?(:href) ? link.href :
464
+ (link['href'] || link['src'])
465
+ get href, [], referer
466
+ end
467
+ end
468
+
469
+ # Equivalent to the browser back button. Returns the most recent page
470
+ # visited.
471
+ def back
472
+ @agent.history.pop
473
+ end
474
+
475
+ # Posts to the given URL with the request entity. The request
476
+ # entity is specified by either a string, or a list of key-value
477
+ # pairs represented by a hash or an array of arrays.
478
+ #
479
+ # Examples:
480
+ # agent.post('http://example.com/', "foo" => "bar")
481
+ #
482
+ # agent.post('http://example.com/', [ ["foo", "bar"] ])
483
+ #
484
+ # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
485
+ def post(url, query={}, headers={})
486
+ if query.is_a?(String)
487
+ return request_with_entity(:post, url, query, headers)
488
+ end
489
+ node = {}
490
+ # Create a fake form
491
+ class << node
492
+ def search(*args); []; end
493
+ end
494
+ node['method'] = 'POST'
495
+ node['enctype'] = 'application/x-www-form-urlencoded'
496
+
497
+ form = Form.new(node)
498
+
499
+ query.each { |k, v|
500
+ if v.is_a?(IO)
501
+ form.enctype = 'multipart/form-data'
502
+ ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
503
+ ul.file_data = v.read
504
+ form.file_uploads << ul
505
+ else
506
+ form.fields << Form::Field.new({'name' => k.to_s},v)
507
+ end
508
+ }
509
+ post_form(url, form, headers)
510
+ end
511
+
512
+ # Submit a form with an optional button.
513
+ # Without a button:
514
+ # page = agent.get('http://example.com')
515
+ # agent.submit(page.forms.first)
516
+ # With a button
517
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
518
+ def submit(form, button=nil, headers={})
519
+ form.add_button_to_query(button) if button
520
+ case form.method.upcase
521
+ when 'POST'
522
+ post_form(form.action, form, headers)
523
+ when 'GET'
524
+ get(form.action.gsub(/\?[^\?]*$/, ''),
525
+ form.build_query,
526
+ form.page,
527
+ headers)
528
+ else
529
+ raise ArgumentError, "unsupported method: #{form.method.upcase}"
530
+ end
531
+ end
532
+
533
+ def request_with_entity(verb, uri, entity, headers = {})
534
+ cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
535
+
536
+ headers = {
537
+ 'Content-Type' => 'application/octet-stream',
538
+ 'Content-Length' => entity.size.to_s,
539
+ }.update headers
540
+
541
+ page = @agent.fetch uri, verb, headers, [entity], cur_page
542
+ add_to_history(page)
543
+ page
544
+ end
545
+
546
+ # Returns the current page loaded by Mechanize
547
+ def current_page
548
+ @agent.current_page
549
+ end
550
+
551
+ # Returns a visited page for the url passed in, otherwise nil
552
+ def visited_page(url)
553
+ url = url.href if url.respond_to? :href
554
+
555
+ @agent.visited_page url
556
+ end
557
+
558
+ # Returns whether or not a url has been visited
559
+ alias visited? visited_page
560
+
561
+ def parse uri, response, body
562
+ content_type = nil
563
+
564
+ unless response['Content-Type'].nil?
565
+ data, = response['Content-Type'].split ';', 2
566
+ content_type, = data.downcase.split ',', 2 unless data.nil?
567
+ end
568
+
569
+ # Find our pluggable parser
570
+ parser_klass = @pluggable_parser.parser content_type
571
+
572
+ parser_klass.new uri, response, body, response.code do |parser|
573
+ parser.mech = self if parser.respond_to? :mech=
574
+
575
+ parser.watch_for_set = @watch_for_set if
576
+ @watch_for_set and parser.respond_to?(:watch_for_set=)
577
+ end
578
+ end
579
+
580
+ ##
581
+ # Sets the proxy +address+ at +port+ with an optional +user+ and +password+
582
+
583
+ def set_proxy address, port, user = nil, password = nil
584
+ @proxy_addr = address
585
+ @proxy_port = port
586
+ @proxy_user = user
587
+ @proxy_pass = password
588
+
589
+ @agent.set_proxy address, port, user, password
590
+ @agent.set_http
591
+ end
592
+
593
+ # Runs given block, then resets the page history as it was before. self is
594
+ # given as a parameter to the block. Returns the value of the block.
595
+ def transact
596
+ history_backup = @agent.history.dup
597
+ begin
598
+ yield self
599
+ ensure
600
+ @agent.history = history_backup
601
+ end
602
+ end
603
+
604
+ def robots
605
+ @agent.robots
606
+ end
607
+
608
+ def robots= enabled
609
+ @agent.robots = enabled
610
+ end
611
+
612
+ alias :page :current_page
613
+
614
+ private
615
+
616
+ def post_form(uri, form, headers = {})
617
+ cur_page = form.page || current_page ||
618
+ Page.new(nil, {'content-type'=>'text/html'})
619
+
620
+ request_data = form.request_data
621
+
622
+ log.debug("query: #{ request_data.inspect }") if log
623
+
624
+ headers = {
625
+ 'Content-Type' => form.enctype,
626
+ 'Content-Length' => request_data.size.to_s,
627
+ }.merge headers
628
+
629
+ # fetch the page
630
+ page = @agent.fetch uri, :post, headers, [request_data], cur_page
631
+ add_to_history(page)
632
+ page
633
+ end
634
+
635
+ def add_to_history(page)
636
+ @agent.history.push(page, @agent.resolve(page.uri))
637
+ @history_added.call(page) if @history_added
638
+ end
639
+
640
+ end
641
+
642
+ require 'mechanize/content_type_error'
643
+ require 'mechanize/cookie'
644
+ require 'mechanize/cookie_jar'
645
+ require 'mechanize/file'
646
+ require 'mechanize/file_connection'
647
+ require 'mechanize/file_request'
648
+ require 'mechanize/file_response'
649
+ require 'mechanize/form'
650
+ require 'mechanize/history'
651
+ require 'mechanize/http'
652
+ require 'mechanize/http/agent'
653
+ require 'mechanize/page'
654
+ require 'mechanize/inspect'
655
+ require 'mechanize/monkey_patch'
656
+ require 'mechanize/pluggable_parsers'
657
+ require 'mechanize/redirect_limit_reached_error'
658
+ require 'mechanize/redirect_not_get_or_head_error'
659
+ require 'mechanize/response_code_error'
660
+ require 'mechanize/response_read_error'
661
+ require 'mechanize/robots_disallowed_error'
662
+ require 'mechanize/unsupported_scheme_error'
663
+ require 'mechanize/util'
664
+