aai10-mechanize 2.0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. data/.autotest +6 -0
  2. data/.gitignore +9 -0
  3. data/CHANGELOG.rdoc +652 -0
  4. data/EXAMPLES.rdoc +187 -0
  5. data/FAQ.rdoc +11 -0
  6. data/GUIDE.rdoc +163 -0
  7. data/LICENSE.rdoc +20 -0
  8. data/Manifest.txt +172 -0
  9. data/README.rdoc +63 -0
  10. data/Rakefile +36 -0
  11. data/aai10-mechanize.gemspec +20 -0
  12. data/examples/flickr_upload.rb +22 -0
  13. data/examples/mech-dump.rb +5 -0
  14. data/examples/proxy_req.rb +7 -0
  15. data/examples/rubyforge.rb +20 -0
  16. data/examples/spider.rb +21 -0
  17. data/lib/mechanize.rb +664 -0
  18. data/lib/mechanize/content_type_error.rb +14 -0
  19. data/lib/mechanize/cookie.rb +116 -0
  20. data/lib/mechanize/cookie_jar.rb +202 -0
  21. data/lib/mechanize/element_matcher.rb +35 -0
  22. data/lib/mechanize/file.rb +80 -0
  23. data/lib/mechanize/file_connection.rb +17 -0
  24. data/lib/mechanize/file_request.rb +26 -0
  25. data/lib/mechanize/file_response.rb +74 -0
  26. data/lib/mechanize/file_saver.rb +37 -0
  27. data/lib/mechanize/form.rb +478 -0
  28. data/lib/mechanize/form/button.rb +9 -0
  29. data/lib/mechanize/form/check_box.rb +11 -0
  30. data/lib/mechanize/form/field.rb +44 -0
  31. data/lib/mechanize/form/file_upload.rb +23 -0
  32. data/lib/mechanize/form/image_button.rb +20 -0
  33. data/lib/mechanize/form/multi_select_list.rb +83 -0
  34. data/lib/mechanize/form/option.rb +49 -0
  35. data/lib/mechanize/form/radio_button.rb +48 -0
  36. data/lib/mechanize/form/select_list.rb +40 -0
  37. data/lib/mechanize/headers.rb +25 -0
  38. data/lib/mechanize/history.rb +83 -0
  39. data/lib/mechanize/http.rb +3 -0
  40. data/lib/mechanize/http/agent.rb +738 -0
  41. data/lib/mechanize/inspect.rb +88 -0
  42. data/lib/mechanize/monkey_patch.rb +37 -0
  43. data/lib/mechanize/page.rb +408 -0
  44. data/lib/mechanize/page/base.rb +8 -0
  45. data/lib/mechanize/page/frame.rb +27 -0
  46. data/lib/mechanize/page/image.rb +30 -0
  47. data/lib/mechanize/page/label.rb +20 -0
  48. data/lib/mechanize/page/link.rb +82 -0
  49. data/lib/mechanize/page/meta_refresh.rb +56 -0
  50. data/lib/mechanize/pluggable_parsers.rb +101 -0
  51. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  52. data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
  53. data/lib/mechanize/response_code_error.rb +22 -0
  54. data/lib/mechanize/response_read_error.rb +27 -0
  55. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  57. data/lib/mechanize/util.rb +113 -0
  58. data/test/data/htpasswd +1 -0
  59. data/test/data/server.crt +16 -0
  60. data/test/data/server.csr +12 -0
  61. data/test/data/server.key +15 -0
  62. data/test/data/server.pem +15 -0
  63. data/test/helper.rb +175 -0
  64. data/test/htdocs/alt_text.html +10 -0
  65. data/test/htdocs/bad_form_test.html +9 -0
  66. data/test/htdocs/button.jpg +0 -0
  67. data/test/htdocs/canonical_uri.html +9 -0
  68. data/test/htdocs/dir with spaces/foo.html +1 -0
  69. data/test/htdocs/empty_form.html +6 -0
  70. data/test/htdocs/file_upload.html +26 -0
  71. data/test/htdocs/find_link.html +41 -0
  72. data/test/htdocs/form_multi_select.html +16 -0
  73. data/test/htdocs/form_multival.html +37 -0
  74. data/test/htdocs/form_no_action.html +18 -0
  75. data/test/htdocs/form_no_input_name.html +16 -0
  76. data/test/htdocs/form_select.html +16 -0
  77. data/test/htdocs/form_select_all.html +16 -0
  78. data/test/htdocs/form_select_none.html +17 -0
  79. data/test/htdocs/form_select_noopts.html +10 -0
  80. data/test/htdocs/form_set_fields.html +14 -0
  81. data/test/htdocs/form_test.html +188 -0
  82. data/test/htdocs/frame_referer_test.html +10 -0
  83. data/test/htdocs/frame_test.html +30 -0
  84. data/test/htdocs/google.html +13 -0
  85. data/test/htdocs/iframe_test.html +16 -0
  86. data/test/htdocs/index.html +6 -0
  87. data/test/htdocs/link with space.html +5 -0
  88. data/test/htdocs/meta_cookie.html +11 -0
  89. data/test/htdocs/no_title_test.html +6 -0
  90. data/test/htdocs/nofollow.html +9 -0
  91. data/test/htdocs/noindex.html +9 -0
  92. data/test/htdocs/norobots.html +8 -0
  93. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  94. data/test/htdocs/rel_nofollow.html +8 -0
  95. data/test/htdocs/relative/tc_relative_links.html +21 -0
  96. data/test/htdocs/robots.html +8 -0
  97. data/test/htdocs/robots.txt +2 -0
  98. data/test/htdocs/tc_bad_charset.html +9 -0
  99. data/test/htdocs/tc_bad_links.html +5 -0
  100. data/test/htdocs/tc_base_images.html +10 -0
  101. data/test/htdocs/tc_base_link.html +8 -0
  102. data/test/htdocs/tc_blank_form.html +11 -0
  103. data/test/htdocs/tc_charset.html +6 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_field_precedence.html +11 -0
  107. data/test/htdocs/tc_follow_meta.html +8 -0
  108. data/test/htdocs/tc_form_action.html +48 -0
  109. data/test/htdocs/tc_images.html +8 -0
  110. data/test/htdocs/tc_links.html +18 -0
  111. data/test/htdocs/tc_meta_in_body.html +9 -0
  112. data/test/htdocs/tc_no_attributes.html +16 -0
  113. data/test/htdocs/tc_pretty_print.html +17 -0
  114. data/test/htdocs/tc_radiobuttons.html +17 -0
  115. data/test/htdocs/tc_referer.html +16 -0
  116. data/test/htdocs/tc_relative_links.html +19 -0
  117. data/test/htdocs/tc_textarea.html +23 -0
  118. data/test/htdocs/test_bad_encoding.html +52 -0
  119. data/test/htdocs/test_click.html +11 -0
  120. data/test/htdocs/unusual______.html +5 -0
  121. data/test/servlets.rb +402 -0
  122. data/test/ssl_server.rb +48 -0
  123. data/test/test_cookies.rb +129 -0
  124. data/test/test_form_action.rb +52 -0
  125. data/test/test_form_as_hash.rb +59 -0
  126. data/test/test_form_button.rb +46 -0
  127. data/test/test_frames.rb +34 -0
  128. data/test/test_headers.rb +33 -0
  129. data/test/test_history.rb +118 -0
  130. data/test/test_history_added.rb +16 -0
  131. data/test/test_html_unscape_forms.rb +46 -0
  132. data/test/test_if_modified_since.rb +20 -0
  133. data/test/test_images.rb +19 -0
  134. data/test/test_mechanize.rb +852 -0
  135. data/test/test_mechanize_cookie.rb +345 -0
  136. data/test/test_mechanize_cookie_jar.rb +433 -0
  137. data/test/test_mechanize_file.rb +53 -0
  138. data/test/test_mechanize_file_request.rb +19 -0
  139. data/test/test_mechanize_file_response.rb +21 -0
  140. data/test/test_mechanize_form.rb +576 -0
  141. data/test/test_mechanize_form_check_box.rb +37 -0
  142. data/test/test_mechanize_form_encoding.rb +120 -0
  143. data/test/test_mechanize_form_field.rb +21 -0
  144. data/test/test_mechanize_form_image_button.rb +12 -0
  145. data/test/test_mechanize_form_textarea.rb +51 -0
  146. data/test/test_mechanize_http_agent.rb +697 -0
  147. data/test/test_mechanize_link.rb +84 -0
  148. data/test/test_mechanize_page_encoding.rb +147 -0
  149. data/test/test_mechanize_page_link.rb +382 -0
  150. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  151. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  152. data/test/test_mechanize_subclass.rb +22 -0
  153. data/test/test_mechanize_util.rb +92 -0
  154. data/test/test_multi_select.rb +118 -0
  155. data/test/test_no_attributes.rb +13 -0
  156. data/test/test_option.rb +18 -0
  157. data/test/test_pluggable_parser.rb +136 -0
  158. data/test/test_post_form.rb +37 -0
  159. data/test/test_pretty_print.rb +22 -0
  160. data/test/test_radiobutton.rb +75 -0
  161. data/test/test_redirect_limit_reached.rb +39 -0
  162. data/test/test_redirect_ok.rb +25 -0
  163. data/test/test_referer.rb +81 -0
  164. data/test/test_relative_links.rb +40 -0
  165. data/test/test_request.rb +13 -0
  166. data/test/test_response_code.rb +53 -0
  167. data/test/test_robots.rb +72 -0
  168. data/test/test_save_file.rb +48 -0
  169. data/test/test_scheme.rb +48 -0
  170. data/test/test_select.rb +119 -0
  171. data/test/test_select_all.rb +15 -0
  172. data/test/test_select_none.rb +15 -0
  173. data/test/test_select_noopts.rb +18 -0
  174. data/test/test_set_fields.rb +44 -0
  175. data/test/test_ssl_server.rb +20 -0
  176. metadata +360 -0
@@ -0,0 +1,63 @@
1
+ = Mechanize
2
+
3
+ * http://mechanize.rubyforge.org
4
+ * http://github.com/tenderlove/mechanize/tree/master
5
+
6
+ == DESCRIPTION
7
+
8
+ The Mechanize library is used for automating interaction with websites.
9
+ Mechanize automatically stores and sends cookies, follows redirects,
10
+ can follow links, and submit forms. Form fields can be populated and
11
+ submitted. Mechanize also keeps track of the sites that you have visited as
12
+ a history.
13
+
14
+ == Dependencies
15
+
16
+ * ruby 1.8.7
17
+ * nokogiri[http://nokogiri.rubyforge.org]
18
+
19
+ == SUPPORT:
20
+
21
+ The mechanize mailing list is available here:
22
+
23
+ * http://rubyforge.org/mailman/listinfo/mechanize-users
24
+
25
+ The bug tracker is available here:
26
+
27
+ * http://github.com/tenderlove/mechanize/issues
28
+
29
+ == Examples
30
+
31
+ If you are just starting, check out the GUIDE.
32
+ Also, check out the EXAMPLES file.
33
+
34
+ == Authors
35
+
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
+
38
+ Copyright (c) 2006-2011:
39
+
40
+ * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
+ * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
+
43
+ Copyright (c) 2011:
44
+
45
+ * {Eric Hodel}[http://blog.segment7.net] (drbrain@segment7.net)
46
+
47
+ This library comes with a shameless plug for employing me
48
+ (Aaron[http://tenderlovemaking.com/]) programming Ruby, my favorite language!
49
+
50
+ == Acknowledgments
51
+
52
+ This library was heavily influenced by its namesake in the perl world. A big
53
+ thanks goes to Andy Lester (andy@petdance.com), the author of the original
54
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize/]. Ruby Mechanize would not be around without you!
55
+
56
+ Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
57
+ who's helped out in various ways. Finally, thank you to the people using this
58
+ library!
59
+
60
+ == License
61
+
62
+ This library is distributed under the MIT license. Please see the LICENSE file.
63
+
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ Hoe.plugin :git
5
+ Hoe.plugin :minitest
6
+
7
+ Hoe.spec 'mechanize' do
8
+ developer 'Eric Hodel', 'drbrain@segment7.net'
9
+ developer 'Aaron Patterson', 'aaronp@rubyforge.org'
10
+ developer 'Mike Dalessio', 'mike.dalessio@gmail.com'
11
+
12
+ self.readme_file = 'README.rdoc'
13
+ self.history_file = 'CHANGELOG.rdoc'
14
+ self.extra_rdoc_files += Dir['*.rdoc']
15
+
16
+ rdoc_locations << 'drbrain@rubyforge.org:/var/www/gforge-projects/mechanize/'
17
+
18
+ self.extra_deps << ['nokogiri', '~> 1.4']
19
+ self.extra_deps << ['net-http-persistent', '~> 1.8']
20
+ self.extra_deps << ['net-http-digest_auth', '~> 1.1', '>= 1.1.1']
21
+ self.extra_deps << ['webrobots', '~> 0.0', '>= 0.0.9']
22
+
23
+ self.spec_extras[:required_ruby_version] = '>= 1.8.7'
24
+ end
25
+
26
+ desc "Update SSL Certificate"
27
+ task('ssl_cert') do |p|
28
+ sh "openssl genrsa -des3 -out server.key 1024"
29
+ sh "openssl req -new -key server.key -out server.csr"
30
+ sh "cp server.key server.key.org"
31
+ sh "openssl rsa -in server.key.org -out server.key"
32
+ sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
33
+ sh "cp server.key server.pem"
34
+ sh "mv server.key server.csr server.crt server.pem test/data/"
35
+ sh "rm server.key.org"
36
+ end
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "aai10-mechanize"
6
+ s.version = "2.0.1.0"
7
+ s.platform = Gem::Platform::RUBY
8
+ s.authors = ["Alexey Aleksandrov"]
9
+ s.email = ["aai10@mail.msiu.ru "]
10
+ s.homepage = ""
11
+ s.summary = %q{Mechanize Bug Fix}
12
+ s.description = %q{Fix error in mechanize}
13
+
14
+ s.rubyforge_project = "aai10-mechanize"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+ end
@@ -0,0 +1,22 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+
6
+ # Get the flickr sign in page
7
+ page = agent.get 'http://flickr.com/signin/flickr/'
8
+
9
+ # Fill out the login form
10
+ form = page.form_with :name => 'flickrloginform'
11
+ form.email = ARGV[0]
12
+ form.password = ARGV[1]
13
+ form.submit
14
+
15
+ # Go to the upload page
16
+ page = page.link_with(:text => 'Upload').click
17
+
18
+ # Fill out the form
19
+ form = page.forms.action('/photos_upload_process.gne').first
20
+ form.file_uploads.name('file1').first.file_name = ARGV[2]
21
+ form.submit
22
+
@@ -0,0 +1,5 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ puts agent.get(ARGV[0]).inspect
@@ -0,0 +1,7 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ agent.set_proxy('localhost', '8000')
6
+ page = agent.get(ARGV[0])
7
+ puts page.body
@@ -0,0 +1,20 @@
1
+ # This example logs a user in to rubyforge and prints out the body of the
2
+ # page after logging the user in.
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+ require 'logger'
6
+
7
+ # Create a new mechanize object
8
+ agent = Mechanize.new { |a| a.log = Logger.new(STDERR) }
9
+
10
+ # Load the rubyforge website
11
+ page = agent.get('http://rubyforge.org/')
12
+ page = agent.click page.link_with(:text => /Log In/) # Click the login link
13
+ form = page.forms[1] # Select the first form
14
+ form.form_loginname = ARGV[0]
15
+ form.form_pw = ARGV[1]
16
+
17
+ # Submit the form
18
+ page = form.submit form.buttons.first
19
+
20
+ puts page.body # Print out the body
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+
4
+ agent = Mechanize.new
5
+ stack = agent.get(ARGV[0]).links
6
+
7
+ while l = stack.pop
8
+ next unless l.uri
9
+ host = l.uri.host
10
+ next unless host.nil? or host == agent.history.first.uri.host
11
+ next if agent.visited? l.href
12
+
13
+ puts "crawling #{l.uri}"
14
+ begin
15
+ page = l.click
16
+ next unless Mechanize::Page === page
17
+ stack.push(*page.links)
18
+ rescue Mechanize::ResponseCodeError
19
+ end
20
+ end
21
+
@@ -0,0 +1,664 @@
1
+ require 'fileutils'
2
+ require 'forwardable'
3
+ require 'iconv' if RUBY_VERSION < '1.9.2'
4
+ require 'mutex_m'
5
+ require 'net/http/digest_auth'
6
+ require 'net/http/persistent'
7
+ require 'nkf'
8
+ require 'nokogiri'
9
+ require 'openssl'
10
+ require 'stringio'
11
+ require 'uri'
12
+ require 'webrick/httputils'
13
+ require 'zlib'
14
+
15
+ # = Synopsis
16
+ # The Mechanize library is used for automating interaction with a website. It
17
+ # can follow links, and submit forms. Form fields can be populated and
18
+ # submitted. A history of URL's is maintained and can be queried.
19
+ #
20
+ # == Example
21
+ # require 'rubygems'
22
+ # require 'mechanize'
23
+ # require 'logger'
24
+ #
25
+ # agent = Mechanize.new { |a| a.log = Logger.new("mech.log") }
26
+ # agent.user_agent_alias = 'Mac Safari'
27
+ # page = agent.get("http://www.google.com/")
28
+ # search_form = page.form_with(:name => "f")
29
+ # search_form.field_with(:name => "q").value = "Hello"
30
+ # search_results = agent.submit(search_form)
31
+ # puts search_results.body
32
+ class Mechanize
33
+
34
+ ##
35
+ # The version of Mechanize you are using.
36
+ VERSION = '2.0.2'
37
+
38
+ class Error < RuntimeError
39
+ end
40
+
41
+ ruby_version = if RUBY_PATCHLEVEL >= 0 then
42
+ "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
43
+ else
44
+ "#{RUBY_VERSION}dev#{RUBY_REVISION}"
45
+ end
46
+
47
+ # HTTP/1.1 keep-alives are always active. This does nothing.
48
+ attr_accessor :keep_alive
49
+
50
+ # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
51
+ # now uses net-http-persistent which only supports HTTP/1.1 persistent
52
+ # connections
53
+ attr_accessor :keep_alive_time
54
+
55
+ ##
56
+ # User Agent aliases
57
+
58
+ AGENT_ALIASES = {
59
+ 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
60
+ 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
61
+ 'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
62
+ 'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
63
+ 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
64
+ 'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
65
+ 'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
66
+ 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
67
+ 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
68
+ 'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
69
+ 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
70
+ 'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
71
+ 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
72
+ }
73
+
74
+ # A Mechanize::CookieJar which stores cookies
75
+
76
+ def cookie_jar
77
+ @agent.cookie_jar
78
+ end
79
+
80
+ def cookie_jar= cookie_jar
81
+ @agent.cookie_jar = cookie_jar
82
+ end
83
+
84
+ # Length of time to wait until a connection is opened in seconds
85
+ def open_timeout
86
+ @agent.open_timeout
87
+ end
88
+
89
+ def open_timeout= open_timeout
90
+ @agent.open_timeout = open_timeout
91
+ end
92
+
93
+ # Length of time to attempt to read data from the server
94
+ def read_timeout
95
+ @agent.read_timeout
96
+ end
97
+
98
+ def read_timeout= read_timeout
99
+ @agent.read_timeout = read_timeout
100
+ end
101
+
102
+ # The identification string for the client initiating a web request
103
+ def user_agent
104
+ @agent.user_agent
105
+ end
106
+
107
+ # The value of watch_for_set is passed to pluggable parsers for retrieved
108
+ # content
109
+ attr_accessor :watch_for_set
110
+
111
+ # Path to an OpenSSL server certificate file
112
+ def ca_file
113
+ @agent.ca_file
114
+ end
115
+
116
+ def ca_file= ca_file
117
+ @agent.ca_file = ca_file
118
+ end
119
+
120
+ def certificate
121
+ @agent.certificate
122
+ end
123
+
124
+ # An OpenSSL private key or the path to a private key
125
+ def key
126
+ @agent.key
127
+ end
128
+
129
+ def key= key
130
+ @agent.key = key
131
+ end
132
+
133
+ # An OpenSSL client certificate or the path to a certificate file.
134
+ def cert
135
+ @agent.cert
136
+ end
137
+
138
+ def cert= cert
139
+ @agent.cert = cert
140
+ end
141
+
142
+ # OpenSSL key password
143
+ def pass
144
+ @agent.pass
145
+ end
146
+
147
+ def pass= pass
148
+ @agent.pass = pass
149
+ end
150
+
151
+ # Controls how this agent deals with redirects. The following values are
152
+ # allowed:
153
+ #
154
+ # :all, true:: All 3xx redirects are followed (default)
155
+ # :permanent:: Only 301 Moved Permanantly redirects are followed
156
+ # false:: No redirects are followed
157
+
158
+ def redirect_ok
159
+ @agent.redirect_ok
160
+ end
161
+
162
+ def redirect_ok= follow
163
+ @agent.redirect_ok = follow
164
+ end
165
+
166
+ def gzip_enabled
167
+ @agent.gzip_enabled
168
+ end
169
+
170
+ # Disables HTTP/1.1 gzip compression (enabled by default)
171
+ def gzip_enabled=enabled
172
+ @agent.gzip_enabled = enabled
173
+ end
174
+
175
+ def conditional_requests
176
+ @agent.conditional_requests
177
+ end
178
+
179
+ # Disables If-Modified-Since conditional requests (enabled by default)
180
+ def conditional_requests= enabled
181
+ @agent.conditional_requests = enabled
182
+ end
183
+
184
+ # Follow HTML meta refresh. If set to +:anywhere+ meta refresh tags outside
185
+ # of the head element will be followed.
186
+ def follow_meta_refresh
187
+ @agent.follow_meta_refresh
188
+ end
189
+
190
+ def follow_meta_refresh= follow
191
+ @agent.follow_meta_refresh = follow
192
+ end
193
+
194
+ # A callback for additional certificate verification. See
195
+ # OpenSSL::SSL::SSLContext#verify_callback
196
+ #
197
+ # The callback can be used for debugging or to ignore errors by always
198
+ # returning +true+. Specifying nil uses the default method that was valid
199
+ # when the SSLContext was created
200
+ def verify_callback
201
+ @agent.verify_callback
202
+ end
203
+
204
+ def verify_callback= verify_callback
205
+ @agent.verify_callback = verify_callback
206
+ end
207
+
208
+ attr_accessor :history_added
209
+
210
+ def redirection_limit
211
+ @agent.redirection_limit
212
+ end
213
+
214
+ def redirection_limit= limit
215
+ @agent.redirection_limit = limit
216
+ end
217
+
218
+ def scheme_handlers
219
+ @agent.scheme_handlers
220
+ end
221
+
222
+ def scheme_handlers= scheme_handlers
223
+ @agent.scheme_handlers = scheme_handlers
224
+ end
225
+
226
+ # A hash of custom request headers
227
+ def request_headers
228
+ @agent.request_headers
229
+ end
230
+
231
+ def request_headers= request_headers
232
+ @agent.request_headers = request_headers
233
+ end
234
+
235
+ # Proxy settings
236
+ attr_reader :proxy_addr
237
+ attr_reader :proxy_pass
238
+ attr_reader :proxy_port
239
+ attr_reader :proxy_user
240
+
241
+ # The HTML parser to be used when parsing documents
242
+ attr_accessor :html_parser
243
+
244
+ attr_reader :agent # :nodoc:
245
+
246
+ def history
247
+ @agent.history
248
+ end
249
+
250
+ attr_reader :pluggable_parser
251
+
252
+ # A list of hooks to call after retrieving a response. Hooks are called with
253
+ # the agent and the response returned.
254
+
255
+ def post_connect_hooks
256
+ @agent.post_connect_hooks
257
+ end
258
+
259
+ # A list of hooks to call before making a request. Hooks are called with
260
+ # the agent and the request to be performed.
261
+
262
+ def pre_connect_hooks
263
+ @agent.pre_connect_hooks
264
+ end
265
+
266
+ alias follow_redirect? redirect_ok
267
+
268
+ @html_parser = Nokogiri::HTML
269
+ class << self
270
+ attr_accessor :html_parser, :log
271
+
272
+ def inherited(child)
273
+ child.html_parser ||= html_parser
274
+ child.log ||= log
275
+ super
276
+ end
277
+ end
278
+
279
+ # A default encoding name used when parsing HTML parsing. When set it is
280
+ # used after any other encoding. The default is nil.
281
+
282
+ attr_accessor :default_encoding
283
+
284
+ # Overrides the encodings given by the HTTP server and the HTML page with
285
+ # the default_encoding when set to true.
286
+ attr_accessor :force_default_encoding
287
+
288
+ def initialize
289
+ @agent = Mechanize::HTTP::Agent.new
290
+ @agent.context = self
291
+
292
+ # attr_accessors
293
+ @agent.user_agent = AGENT_ALIASES['Mechanize']
294
+ @watch_for_set = nil
295
+ @history_added = nil
296
+
297
+ # attr_readers
298
+ @pluggable_parser = PluggableParser.new
299
+
300
+ @keep_alive = true
301
+ @keep_alive_time = 0
302
+
303
+ # Proxy
304
+ @proxy_addr = nil
305
+ @proxy_port = nil
306
+ @proxy_user = nil
307
+ @proxy_pass = nil
308
+
309
+ @html_parser = self.class.html_parser
310
+
311
+ @default_encoding = nil
312
+ @force_default_encoding = false
313
+
314
+ yield self if block_given?
315
+
316
+ @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
317
+ @agent.set_http
318
+ end
319
+
320
+ def max_history
321
+ @agent.history.max_size
322
+ end
323
+
324
+ def max_history= length
325
+ @agent.history.max_size = length
326
+ end
327
+
328
+ def log=(l); Mechanize.log = l end
329
+ def log; Mechanize.log end
330
+
331
+ def user_agent= user_agent
332
+ @agent.user_agent = user_agent
333
+ end
334
+
335
+ # Set the user agent for the Mechanize object. See AGENT_ALIASES
336
+ def user_agent_alias=(al)
337
+ self.user_agent = AGENT_ALIASES[al] ||
338
+ raise(ArgumentError, "unknown agent alias #{al.inspect}")
339
+ end
340
+
341
+ # Returns a list of cookies stored in the cookie jar.
342
+ def cookies
343
+ @agent.cookie_jar.to_a
344
+ end
345
+
346
+ # Sets the user and password to be used for authentication.
347
+ def auth(user, password)
348
+ @agent.user = user
349
+ @agent.password = password
350
+ end
351
+
352
+ alias :basic_auth :auth
353
+
354
+ # Fetches the URL passed in and returns a page.
355
+ def get(uri, parameters = [], referer = nil, headers = {})
356
+ method = :get
357
+
358
+ if Hash === uri then
359
+ options = uri
360
+ location = Gem.location_of_caller.join ':'
361
+ warn "#{location}: Mechanize#get with options hash is deprecated and will be removed October 2011"
362
+
363
+ raise ArgumentError, "url must be specified" unless uri = options[:url]
364
+ parameters = options[:params] || []
365
+ referer = options[:referer]
366
+ headers = options[:headers]
367
+ method = options[:verb] || method
368
+ end
369
+
370
+ referer ||=
371
+ if uri.to_s =~ %r{\Ahttps?://}
372
+ Page.new(nil, {'content-type'=>'text/html'})
373
+ else
374
+ current_page || Page.new(nil, {'content-type'=>'text/html'})
375
+ end
376
+
377
+ # FIXME: Huge hack so that using a URI as a referer works. I need to
378
+ # refactor everything to pass around URIs but still support
379
+ # Mechanize::Page#base
380
+ unless referer.is_a?(Mechanize::File)
381
+ referer = referer.is_a?(String) ?
382
+ Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
383
+ Page.new(referer, {'content-type' => 'text/html'})
384
+ end
385
+
386
+ # fetch the page
387
+ headers ||= {}
388
+ page = @agent.fetch uri, method, headers, parameters, referer
389
+ add_to_history(page)
390
+ yield page if block_given?
391
+ page
392
+ end
393
+
394
+ ##
395
+ # PUT to +url+ with +entity+, and setting +headers+:
396
+ #
397
+ # put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
398
+ #
399
+ def put(url, entity, headers = {})
400
+ request_with_entity(:put, url, entity, headers)
401
+ end
402
+
403
+ ##
404
+ # DELETE to +url+ with +query_params+, and setting +headers+:
405
+ #
406
+ # delete('http://example/', {'q' => 'foo'}, {})
407
+ #
408
+ def delete(uri, query_params = {}, headers = {})
409
+ page = @agent.fetch(uri, :delete, headers, query_params)
410
+ add_to_history(page)
411
+ page
412
+ end
413
+
414
+ ##
415
+ # HEAD to +url+ with +query_params+, and setting +headers+:
416
+ #
417
+ # head('http://example/', {'q' => 'foo'}, {})
418
+ #
419
+ def head(uri, query_params = {}, headers = {})
420
+ # fetch the page
421
+ page = @agent.fetch(uri, :head, headers, query_params)
422
+ yield page if block_given?
423
+ page
424
+ end
425
+
426
+ # Fetch a file and return the contents of the file.
427
+ def get_file(url)
428
+ get(url).body
429
+ end
430
+
431
+ # If the parameter is a string, finds the button or link with the
432
+ # value of the string and clicks it. Otherwise, clicks the
433
+ # Mechanize::Page::Link object passed in. Returns the page fetched.
434
+ def click(link)
435
+ case link
436
+ when Page::Link
437
+ referer = link.page || current_page()
438
+ if @agent.robots
439
+ if (referer.is_a?(Page) && referer.parser.nofollow?) || link.rel?('nofollow')
440
+ raise RobotsDisallowedError.new(link.href)
441
+ end
442
+ end
443
+ if link.rel?('noreferrer')
444
+ href = @agent.resolve(link.href, link.page || current_page)
445
+ referer = Page.new(nil, {'content-type'=>'text/html'})
446
+ else
447
+ href = link.href
448
+ end
449
+ get href, [], referer
450
+ when String, Regexp
451
+ if real_link = page.link_with(:text => link)
452
+ click real_link
453
+ else
454
+ button = nil
455
+ form = page.forms.find do |f|
456
+ button = f.button_with(:value => link)
457
+ button.is_a? Form::Submit
458
+ end
459
+ submit form, button if form
460
+ end
461
+ else
462
+ referer = current_page()
463
+ href = link.respond_to?(:href) ? link.href :
464
+ (link['href'] || link['src'])
465
+ get href, [], referer
466
+ end
467
+ end
468
+
469
+ # Equivalent to the browser back button. Returns the most recent page
470
+ # visited.
471
+ def back
472
+ @agent.history.pop
473
+ end
474
+
475
+ # Posts to the given URL with the request entity. The request
476
+ # entity is specified by either a string, or a list of key-value
477
+ # pairs represented by a hash or an array of arrays.
478
+ #
479
+ # Examples:
480
+ # agent.post('http://example.com/', "foo" => "bar")
481
+ #
482
+ # agent.post('http://example.com/', [ ["foo", "bar"] ])
483
+ #
484
+ # agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')
485
+ def post(url, query={}, headers={})
486
+ if query.is_a?(String)
487
+ return request_with_entity(:post, url, query, headers)
488
+ end
489
+ node = {}
490
+ # Create a fake form
491
+ class << node
492
+ def search(*args); []; end
493
+ end
494
+ node['method'] = 'POST'
495
+ node['enctype'] = 'application/x-www-form-urlencoded'
496
+
497
+ form = Form.new(node)
498
+
499
+ query.each { |k, v|
500
+ if v.is_a?(IO)
501
+ form.enctype = 'multipart/form-data'
502
+ ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
503
+ ul.file_data = v.read
504
+ form.file_uploads << ul
505
+ else
506
+ form.fields << Form::Field.new({'name' => k.to_s},v)
507
+ end
508
+ }
509
+ post_form(url, form, headers)
510
+ end
511
+
512
+ # Submit a form with an optional button.
513
+ # Without a button:
514
+ # page = agent.get('http://example.com')
515
+ # agent.submit(page.forms.first)
516
+ # With a button
517
+ # agent.submit(page.forms.first, page.forms.first.buttons.first)
518
+ def submit(form, button=nil, headers={})
519
+ form.add_button_to_query(button) if button
520
+ case form.method.upcase
521
+ when 'POST'
522
+ post_form(form.action, form, headers)
523
+ when 'GET'
524
+ get(form.action.gsub(/\?[^\?]*$/, ''),
525
+ form.build_query,
526
+ form.page,
527
+ headers)
528
+ else
529
+ raise ArgumentError, "unsupported method: #{form.method.upcase}"
530
+ end
531
+ end
532
+
533
+ def request_with_entity(verb, uri, entity, headers = {})
534
+ cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
535
+
536
+ headers = {
537
+ 'Content-Type' => 'application/octet-stream',
538
+ 'Content-Length' => entity.size.to_s,
539
+ }.update headers
540
+
541
+ page = @agent.fetch uri, verb, headers, [entity], cur_page
542
+ add_to_history(page)
543
+ page
544
+ end
545
+
546
+ # Returns the current page loaded by Mechanize
547
+ def current_page
548
+ @agent.current_page
549
+ end
550
+
551
+ # Returns a visited page for the url passed in, otherwise nil
552
+ def visited_page(url)
553
+ url = url.href if url.respond_to? :href
554
+
555
+ @agent.visited_page url
556
+ end
557
+
558
+ # Returns whether or not a url has been visited
559
+ alias visited? visited_page
560
+
561
+ def parse uri, response, body
562
+ content_type = nil
563
+
564
+ unless response['Content-Type'].nil?
565
+ data, = response['Content-Type'].split ';', 2
566
+ content_type, = data.downcase.split ',', 2 unless data.nil?
567
+ end
568
+
569
+ # Find our pluggable parser
570
+ parser_klass = @pluggable_parser.parser content_type
571
+
572
+ parser_klass.new uri, response, body, response.code do |parser|
573
+ parser.mech = self if parser.respond_to? :mech=
574
+
575
+ parser.watch_for_set = @watch_for_set if
576
+ @watch_for_set and parser.respond_to?(:watch_for_set=)
577
+ end
578
+ end
579
+
580
+ ##
581
+ # Sets the proxy +address+ at +port+ with an optional +user+ and +password+
582
+
583
+ def set_proxy address, port, user = nil, password = nil
584
+ @proxy_addr = address
585
+ @proxy_port = port
586
+ @proxy_user = user
587
+ @proxy_pass = password
588
+
589
+ @agent.set_proxy address, port, user, password
590
+ @agent.set_http
591
+ end
592
+
593
+ # Runs given block, then resets the page history as it was before. self is
594
+ # given as a parameter to the block. Returns the value of the block.
595
+ def transact
596
+ history_backup = @agent.history.dup
597
+ begin
598
+ yield self
599
+ ensure
600
+ @agent.history = history_backup
601
+ end
602
+ end
603
+
604
+ def robots
605
+ @agent.robots
606
+ end
607
+
608
+ def robots= enabled
609
+ @agent.robots = enabled
610
+ end
611
+
612
+ alias :page :current_page
613
+
614
+ private
615
+
616
+ def post_form(uri, form, headers = {})
617
+ cur_page = form.page || current_page ||
618
+ Page.new(nil, {'content-type'=>'text/html'})
619
+
620
+ request_data = form.request_data
621
+
622
+ log.debug("query: #{ request_data.inspect }") if log
623
+
624
+ headers = {
625
+ 'Content-Type' => form.enctype,
626
+ 'Content-Length' => request_data.size.to_s,
627
+ }.merge headers
628
+
629
+ # fetch the page
630
+ page = @agent.fetch uri, :post, headers, [request_data], cur_page
631
+ add_to_history(page)
632
+ page
633
+ end
634
+
635
+ def add_to_history(page)
636
+ @agent.history.push(page, @agent.resolve(page.uri))
637
+ @history_added.call(page) if @history_added
638
+ end
639
+
640
+ end
641
+
642
+ require 'mechanize/content_type_error'
643
+ require 'mechanize/cookie'
644
+ require 'mechanize/cookie_jar'
645
+ require 'mechanize/file'
646
+ require 'mechanize/file_connection'
647
+ require 'mechanize/file_request'
648
+ require 'mechanize/file_response'
649
+ require 'mechanize/form'
650
+ require 'mechanize/history'
651
+ require 'mechanize/http'
652
+ require 'mechanize/http/agent'
653
+ require 'mechanize/page'
654
+ require 'mechanize/inspect'
655
+ require 'mechanize/monkey_patch'
656
+ require 'mechanize/pluggable_parsers'
657
+ require 'mechanize/redirect_limit_reached_error'
658
+ require 'mechanize/redirect_not_get_or_head_error'
659
+ require 'mechanize/response_code_error'
660
+ require 'mechanize/response_read_error'
661
+ require 'mechanize/robots_disallowed_error'
662
+ require 'mechanize/unsupported_scheme_error'
663
+ require 'mechanize/util'
664
+