tenderlove-mechanize 0.9.3.20090617085936

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. data/CHANGELOG.rdoc +496 -0
  2. data/EXAMPLES.rdoc +171 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +122 -0
  5. data/LICENSE.rdoc +340 -0
  6. data/Manifest.txt +169 -0
  7. data/README.rdoc +60 -0
  8. data/Rakefile +43 -0
  9. data/examples/flickr_upload.rb +23 -0
  10. data/examples/mech-dump.rb +7 -0
  11. data/examples/proxy_req.rb +9 -0
  12. data/examples/rubyforge.rb +21 -0
  13. data/examples/spider.rb +11 -0
  14. data/lib/mechanize.rb +7 -0
  15. data/lib/www/mechanize/chain/auth_headers.rb +80 -0
  16. data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
  17. data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
  18. data/lib/www/mechanize/chain/custom_headers.rb +23 -0
  19. data/lib/www/mechanize/chain/handler.rb +9 -0
  20. data/lib/www/mechanize/chain/header_resolver.rb +53 -0
  21. data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
  22. data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
  23. data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
  24. data/lib/www/mechanize/chain/request_resolver.rb +32 -0
  25. data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
  26. data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
  27. data/lib/www/mechanize/chain/response_reader.rb +41 -0
  28. data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
  29. data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
  30. data/lib/www/mechanize/chain.rb +34 -0
  31. data/lib/www/mechanize/content_type_error.rb +16 -0
  32. data/lib/www/mechanize/cookie.rb +72 -0
  33. data/lib/www/mechanize/cookie_jar.rb +191 -0
  34. data/lib/www/mechanize/file.rb +73 -0
  35. data/lib/www/mechanize/file_response.rb +62 -0
  36. data/lib/www/mechanize/file_saver.rb +39 -0
  37. data/lib/www/mechanize/form/button.rb +8 -0
  38. data/lib/www/mechanize/form/check_box.rb +13 -0
  39. data/lib/www/mechanize/form/field.rb +28 -0
  40. data/lib/www/mechanize/form/file_upload.rb +24 -0
  41. data/lib/www/mechanize/form/image_button.rb +23 -0
  42. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  43. data/lib/www/mechanize/form/option.rb +51 -0
  44. data/lib/www/mechanize/form/radio_button.rb +38 -0
  45. data/lib/www/mechanize/form/select_list.rb +45 -0
  46. data/lib/www/mechanize/form.rb +360 -0
  47. data/lib/www/mechanize/headers.rb +12 -0
  48. data/lib/www/mechanize/history.rb +67 -0
  49. data/lib/www/mechanize/inspect.rb +90 -0
  50. data/lib/www/mechanize/monkey_patch.rb +37 -0
  51. data/lib/www/mechanize/page/base.rb +10 -0
  52. data/lib/www/mechanize/page/frame.rb +22 -0
  53. data/lib/www/mechanize/page/link.rb +50 -0
  54. data/lib/www/mechanize/page/meta.rb +51 -0
  55. data/lib/www/mechanize/page.rb +176 -0
  56. data/lib/www/mechanize/pluggable_parsers.rb +103 -0
  57. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  58. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
  59. data/lib/www/mechanize/response_code_error.rb +25 -0
  60. data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
  61. data/lib/www/mechanize/util.rb +76 -0
  62. data/lib/www/mechanize.rb +619 -0
  63. data/mechanize.gemspec +41 -0
  64. data/test/chain/test_argument_validator.rb +14 -0
  65. data/test/chain/test_auth_headers.rb +25 -0
  66. data/test/chain/test_custom_headers.rb +18 -0
  67. data/test/chain/test_header_resolver.rb +28 -0
  68. data/test/chain/test_parameter_resolver.rb +35 -0
  69. data/test/chain/test_request_resolver.rb +29 -0
  70. data/test/chain/test_response_reader.rb +24 -0
  71. data/test/data/htpasswd +1 -0
  72. data/test/data/server.crt +16 -0
  73. data/test/data/server.csr +12 -0
  74. data/test/data/server.key +15 -0
  75. data/test/data/server.pem +15 -0
  76. data/test/helper.rb +129 -0
  77. data/test/htdocs/alt_text.html +10 -0
  78. data/test/htdocs/bad_form_test.html +9 -0
  79. data/test/htdocs/button.jpg +0 -0
  80. data/test/htdocs/empty_form.html +6 -0
  81. data/test/htdocs/file_upload.html +26 -0
  82. data/test/htdocs/find_link.html +41 -0
  83. data/test/htdocs/form_multi_select.html +16 -0
  84. data/test/htdocs/form_multival.html +37 -0
  85. data/test/htdocs/form_no_action.html +18 -0
  86. data/test/htdocs/form_no_input_name.html +16 -0
  87. data/test/htdocs/form_select.html +16 -0
  88. data/test/htdocs/form_select_all.html +16 -0
  89. data/test/htdocs/form_select_none.html +17 -0
  90. data/test/htdocs/form_select_noopts.html +10 -0
  91. data/test/htdocs/form_set_fields.html +14 -0
  92. data/test/htdocs/form_test.html +188 -0
  93. data/test/htdocs/frame_test.html +30 -0
  94. data/test/htdocs/google.html +13 -0
  95. data/test/htdocs/iframe_test.html +16 -0
  96. data/test/htdocs/index.html +6 -0
  97. data/test/htdocs/link with space.html +5 -0
  98. data/test/htdocs/meta_cookie.html +11 -0
  99. data/test/htdocs/no_title_test.html +6 -0
  100. data/test/htdocs/relative/tc_relative_links.html +21 -0
  101. data/test/htdocs/tc_bad_links.html +5 -0
  102. data/test/htdocs/tc_base_link.html +8 -0
  103. data/test/htdocs/tc_blank_form.html +11 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_links.html +18 -0
  109. data/test/htdocs/tc_no_attributes.html +16 -0
  110. data/test/htdocs/tc_pretty_print.html +17 -0
  111. data/test/htdocs/tc_radiobuttons.html +17 -0
  112. data/test/htdocs/tc_referer.html +10 -0
  113. data/test/htdocs/tc_relative_links.html +19 -0
  114. data/test/htdocs/tc_textarea.html +23 -0
  115. data/test/htdocs/unusual______.html +5 -0
  116. data/test/servlets.rb +365 -0
  117. data/test/ssl_server.rb +48 -0
  118. data/test/test_authenticate.rb +71 -0
  119. data/test/test_bad_links.rb +25 -0
  120. data/test/test_blank_form.rb +16 -0
  121. data/test/test_checkboxes.rb +61 -0
  122. data/test/test_content_type.rb +13 -0
  123. data/test/test_cookie_class.rb +338 -0
  124. data/test/test_cookie_jar.rb +362 -0
  125. data/test/test_cookies.rb +123 -0
  126. data/test/test_encoded_links.rb +20 -0
  127. data/test/test_errors.rb +49 -0
  128. data/test/test_follow_meta.rb +108 -0
  129. data/test/test_form_action.rb +44 -0
  130. data/test/test_form_as_hash.rb +61 -0
  131. data/test/test_form_button.rb +38 -0
  132. data/test/test_form_no_inputname.rb +15 -0
  133. data/test/test_forms.rb +564 -0
  134. data/test/test_frames.rb +25 -0
  135. data/test/test_get_headers.rb +52 -0
  136. data/test/test_gzipping.rb +22 -0
  137. data/test/test_hash_api.rb +45 -0
  138. data/test/test_history.rb +142 -0
  139. data/test/test_history_added.rb +16 -0
  140. data/test/test_html_unscape_forms.rb +39 -0
  141. data/test/test_if_modified_since.rb +20 -0
  142. data/test/test_keep_alive.rb +31 -0
  143. data/test/test_links.rb +120 -0
  144. data/test/test_mech.rb +268 -0
  145. data/test/test_mechanize_file.rb +47 -0
  146. data/test/test_meta.rb +65 -0
  147. data/test/test_multi_select.rb +106 -0
  148. data/test/test_no_attributes.rb +13 -0
  149. data/test/test_option.rb +18 -0
  150. data/test/test_page.rb +119 -0
  151. data/test/test_pluggable_parser.rb +145 -0
  152. data/test/test_post_form.rb +34 -0
  153. data/test/test_pretty_print.rb +22 -0
  154. data/test/test_radiobutton.rb +75 -0
  155. data/test/test_redirect_limit_reached.rb +41 -0
  156. data/test/test_redirect_verb_handling.rb +45 -0
  157. data/test/test_referer.rb +39 -0
  158. data/test/test_relative_links.rb +40 -0
  159. data/test/test_request.rb +13 -0
  160. data/test/test_response_code.rb +52 -0
  161. data/test/test_save_file.rb +48 -0
  162. data/test/test_scheme.rb +48 -0
  163. data/test/test_select.rb +106 -0
  164. data/test/test_select_all.rb +15 -0
  165. data/test/test_select_none.rb +15 -0
  166. data/test/test_select_noopts.rb +16 -0
  167. data/test/test_set_fields.rb +44 -0
  168. data/test/test_ssl_server.rb +20 -0
  169. data/test/test_subclass.rb +14 -0
  170. data/test/test_textarea.rb +45 -0
  171. data/test/test_upload.rb +109 -0
  172. data/test/test_verbs.rb +25 -0
  173. metadata +314 -0
data/README.rdoc ADDED
@@ -0,0 +1,60 @@
1
+ = WWW::Mechanize
2
+
3
+ * http://mechanize.rubyforge.org/
4
+ * http://github.com/tenderlove/mechanize/tree/master
5
+
6
+ == DESCRIPTION
7
+
8
+ The Mechanize library is used for automating interaction with websites.
9
+ Mechanize automatically stores and sends cookies, follows redirects,
10
+ can follow links, and submit forms. Form fields can be populated and
11
+ submitted. Mechanize also keeps track of the sites that you have visited as
12
+ a history.
13
+
14
+ == Dependencies
15
+
16
+ * ruby 1.8.6
17
+ * nokogiri[http://nokogiri.rubyforge.org]
18
+
19
+ == SUPPORT:
20
+
21
+ The mechanize mailing list is available here:
22
+
23
+ * http://rubyforge.org/mailman/listinfo/mechanize-users
24
+
25
+ The bug tracker is available here:
26
+
27
+ * http://rubyforge.org/tracker/?atid=5709&group_id=1453
28
+
29
+ == Examples
30
+
31
+ If you are just starting, check out the GUIDE.
32
+ Also, check out the EXAMPLES file.
33
+
34
+ == Authors
35
+
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
+
38
+ Copyright (c) 2006-2009:
39
+
40
+ * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
+ * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
+
43
+ This library comes with a shameless plug for employing me
44
+ (Aaron[http://tenderlovemaking.com/]) programming
45
+ Ruby, my favorite language!
46
+
47
+ == Acknowledgments
48
+
49
+ This library was heavily influenced by its namesake in the perl world. A big
50
+ thanks goes to Andy Lester (andy@petdance.com), the author of the original
51
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
52
+
53
+ Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
54
+ who's helped out in various ways. Finally, thank you to the people using this
55
+ library!
56
+
57
+ == License
58
+
59
+ This library is distributed under the GPL. Please see the LICENSE file.
60
+
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
5
+ require 'mechanize'
6
+
7
+ HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
8
+ p.developer('Aaron Patterson','aaronp@rubyforge.org')
9
+ p.developer('Mike Dalessio','mike.dalessio@gmail.com')
10
+ p.readme_file = 'README.rdoc'
11
+ p.history_file = 'CHANGELOG.rdoc'
12
+ p.extra_rdoc_files = FileList['*.rdoc']
13
+ p.summary = "Mechanize provides automated web-browsing"
14
+ p.extra_deps = [['nokogiri', '>= 1.2.1']]
15
+ end
16
+
17
+ desc "Update SSL Certificate"
18
+ task('ssl_cert') do |p|
19
+ sh "openssl genrsa -des3 -out server.key 1024"
20
+ sh "openssl req -new -key server.key -out server.csr"
21
+ sh "cp server.key server.key.org"
22
+ sh "openssl rsa -in server.key.org -out server.key"
23
+ sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
24
+ sh "cp server.key server.pem"
25
+ sh "mv server.key server.csr server.crt server.pem test/data/"
26
+ sh "rm server.key.org"
27
+ end
28
+
29
+ namespace :gem do
30
+ desc 'Generate a gem spec'
31
+ task :spec do
32
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
33
+ HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
34
+ f.write(HOE.spec.to_ruby)
35
+ end
36
+ end
37
+ end
38
+
39
+ desc "Run code-coverage analysis"
40
+ task :coverage do
41
+ rm_rf "coverage"
42
+ sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
43
+ end
@@ -0,0 +1,23 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+
8
+ # Get the flickr sign in page
9
+ page = agent.get('http://flickr.com/signin/flickr/')
10
+
11
+ # Fill out the login form
12
+ form = page.forms.name('flickrloginform').first
13
+ form.email = ARGV[0]
14
+ form.password = ARGV[1]
15
+ page = agent.submit(form)
16
+
17
+ # Go to the upload page
18
+ page = agent.click page.links.text('Upload')
19
+
20
+ # Fill out the form
21
+ form = page.forms.action('/photos_upload_process.gne').first
22
+ form.file_uploads.name('file1').first.file_name = ARGV[2]
23
+ agent.submit(form)
@@ -0,0 +1,7 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ puts agent.get(ARGV[0]).inspect
@@ -0,0 +1,9 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ agent.set_proxy('localhost', '8000')
8
+ page = agent.get(ARGV[0])
9
+ puts page.body
@@ -0,0 +1,21 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ # This example logs a user in to rubyforge and prints out the body of the
4
+ # page after logging the user in.
5
+ require 'rubygems'
6
+ require 'mechanize'
7
+
8
+ # Create a new mechanize object
9
+ agent = WWW::Mechanize.new { |a| a.log = Logger.new(STDERR) }
10
+
11
+ # Load the rubyforge website
12
+ page = agent.get('http://rubyforge.org/')
13
+ page = agent.click page.links.text(/Log In/) # Click the login link
14
+ form = page.forms[1] # Select the first form
15
+ form.form_loginname = ARGV[0]
16
+ form.form_pw = ARGV[1]
17
+
18
+ # Submit the form
19
+ page = agent.submit(form, form.buttons.first)
20
+
21
+ puts page.body # Print out the body
@@ -0,0 +1,11 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ stack = agent.get(ARGV[0]).links
8
+ while l = stack.pop
9
+ next unless l.uri.host == agent.history.first.uri.host
10
+ stack.push(*(agent.click(l).links)) unless agent.visited? l.href
11
+ end
data/lib/mechanize.rb ADDED
@@ -0,0 +1,7 @@
1
+ # Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
2
+ # Copyright (c) 2007 by Aaron Patterson (aaronp@rubyforge.org)
3
+ #
4
+ # Please see the LICENSE file for licensing.
5
+
6
+
7
+ require 'www/mechanize'
@@ -0,0 +1,80 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class AuthHeaders
5
+ include WWW::Handler
6
+
7
+ @@nonce_count = Hash.new(0)
8
+ CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
9
+
10
+ def initialize(auth_hash, user, password, digest)
11
+ @auth_hash = auth_hash
12
+ @user = user
13
+ @password = password
14
+ @digest = digest
15
+ end
16
+
17
+ def handle(ctx, params)
18
+ uri = params[:uri]
19
+ request = params[:request]
20
+
21
+ if( @auth_hash[uri.host] )
22
+ case @auth_hash[uri.host]
23
+ when :basic
24
+ request.basic_auth(@user, @password)
25
+ when :iis_digest
26
+ digest_response = self.gen_auth_header(uri,request, @digest, true)
27
+ request['Authorization'] = digest_response
28
+ when :digest
29
+ if @digest
30
+ digest_response = self.gen_auth_header(uri,request, @digest)
31
+ request['Authorization'] = digest_response
32
+ end
33
+ end
34
+ end
35
+ super
36
+ end
37
+
38
+ def gen_auth_header(uri, request, auth_header, is_IIS = false)
39
+ auth_header =~ /^(\w+) (.*)/
40
+
41
+ params = {}
42
+ $2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
43
+ params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
44
+ }
45
+
46
+ @@nonce_count[params['nonce']] += 1
47
+
48
+ a_1 = "#{@user}:#{params['realm']}:#{@password}"
49
+ a_2 = "#{request.method}:#{uri.path}"
50
+ request_digest = ''
51
+ request_digest << Digest::MD5.hexdigest(a_1)
52
+ request_digest << ':' << params['nonce']
53
+ request_digest << ':' << ('%08x' % @@nonce_count[params['nonce']])
54
+ request_digest << ':' << CNONCE
55
+ request_digest << ':' << params['qop']
56
+ request_digest << ':' << Digest::MD5.hexdigest(a_2)
57
+
58
+ header = ''
59
+ header << "Digest username=\"#{@user}\", "
60
+ if is_IIS then
61
+ header << "qop=\"#{params['qop']}\", "
62
+ else
63
+ header << "qop=#{params['qop']}, "
64
+ end
65
+ header << "uri=\"#{uri.path}\", "
66
+ header << %w{ algorithm opaque nonce realm }.map { |field|
67
+ next unless params[field]
68
+ "#{field}=\"#{params[field]}\""
69
+ }.compact.join(', ')
70
+
71
+ header << ", nc=#{'%08x' % @@nonce_count[params['nonce']]}, "
72
+ header << "cnonce=\"#{CNONCE}\", "
73
+ header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
74
+
75
+ return header
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,48 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class BodyDecodingHandler
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, options)
8
+ body = options[:response_body]
9
+ response = options[:response]
10
+
11
+ options[:response_body] =
12
+ if encoding = response['Content-Encoding']
13
+ case encoding.downcase
14
+ when 'gzip'
15
+ Mechanize.log.debug('gunzip body') if Mechanize.log
16
+ if response['Content-Length'].to_i > 0 || body.length > 0
17
+ begin
18
+ Zlib::GzipReader.new(body).read
19
+ rescue Zlib::BufError, Zlib::GzipFile::Error
20
+ if Mechanize.log
21
+ Mechanize.log.error('Caught a Zlib::BufError')
22
+ end
23
+ body.rewind
24
+ body.read(10)
25
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
26
+ rescue Zlib::DataError
27
+ if Mechanize.log
28
+ Mechanize.log.error("Caught a Zlib::DataError, unable to decode page: #{$!.to_s}")
29
+ end
30
+ ''
31
+ end
32
+ else
33
+ ''
34
+ end
35
+ when 'x-gzip'
36
+ body.read
37
+ else
38
+ raise 'Unsupported content encoding'
39
+ end
40
+ else
41
+ body.read
42
+ end
43
+ super
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,78 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ConnectionResolver
5
+ include WWW::Handler
6
+
7
+ def initialize( connection_cache,
8
+ keep_alive,
9
+ proxy_addr,
10
+ proxy_port,
11
+ proxy_user,
12
+ proxy_pass )
13
+
14
+ @connection_cache = connection_cache
15
+ @keep_alive = keep_alive
16
+ @proxy_addr = proxy_addr
17
+ @proxy_port = proxy_port
18
+ @proxy_user = proxy_user
19
+ @proxy_pass = proxy_pass
20
+ end
21
+
22
+ def handle(ctx, params)
23
+ uri = params[:uri]
24
+ http_obj = nil
25
+
26
+ case uri.scheme.downcase
27
+ when 'http', 'https'
28
+ cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
29
+ :connection => nil,
30
+ :keep_alive_options => {},
31
+ })
32
+ http_obj = cache_obj[:connection]
33
+ if http_obj.nil? || ! http_obj.started?
34
+ http_obj = cache_obj[:connection] =
35
+ Net::HTTP.new( uri.host,
36
+ uri.port,
37
+ @proxy_addr,
38
+ @proxy_port,
39
+ @proxy_user,
40
+ @proxy_pass
41
+ )
42
+ cache_obj[:keep_alive_options] = {}
43
+ end
44
+
45
+ # If we're keeping connections alive and the last request time is too
46
+ # long ago, stop the connection. Or, if the max requests left is 1,
47
+ # reset the connection.
48
+ if @keep_alive && http_obj.started?
49
+ opts = cache_obj[:keep_alive_options]
50
+ if((opts[:timeout] &&
51
+ Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
52
+ opts[:max] && opts[:max].to_i == 1)
53
+
54
+ Mechanize.log.debug('Finishing stale connection') if Mechanize.log
55
+ http_obj.finish
56
+
57
+ end
58
+ end
59
+
60
+ cache_obj[:last_request_time] = Time.now.to_i
61
+ when 'file'
62
+ http_obj = Object.new
63
+ class << http_obj
64
+ def started?; true; end
65
+ def request(request, *args, &block)
66
+ response = FileResponse.new(request.uri.path)
67
+ yield response
68
+ end
69
+ end
70
+ end
71
+
72
+ params[:connection] = http_obj
73
+ super
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,23 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class CustomHeaders
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ request = params[:request]
9
+ params[:headers].each do |k,v|
10
+ case k
11
+ when :etag then request["ETag"] = v
12
+ when :if_modified_since then request["If-Modified-Since"] = v
13
+ else
14
+ raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
15
+ request[k] = v
16
+ end
17
+ end
18
+ super
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,9 @@
1
+ module WWW
2
+ module Handler
3
+ attr_accessor :chain
4
+
5
+ def handle(ctx, request)
6
+ chain.pass(self, request)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,53 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class HeaderResolver
5
+ include WWW::Handler
6
+ def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent, headers)
7
+ @keep_alive = keep_alive
8
+ @keep_alive_time = keep_alive_time
9
+ @cookie_jar = cookie_jar
10
+ @user_agent = user_agent
11
+ @headers = headers
12
+ end
13
+
14
+ def handle(ctx, params)
15
+ uri = params[:uri]
16
+ referer = params[:referer]
17
+ request = params[:request]
18
+
19
+ if @keep_alive
20
+ request['Connection'] = 'keep-alive'
21
+ request['Keep-Alive'] = @keep_alive_time.to_s
22
+ else
23
+ request['Connection'] = 'close'
24
+ end
25
+ request['Accept-Encoding'] = 'gzip,identity'
26
+ request['Accept-Language'] = 'en-us,en;q=0.5'
27
+ host = "#{uri.host}#{[80, 443].include?(uri.port.to_i) ? '' : ':' + uri.port.to_s}"
28
+ request['Host'] = host
29
+ request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
30
+
31
+ unless @cookie_jar.empty?(uri)
32
+ cookies = @cookie_jar.cookies(uri)
33
+ cookie = cookies.length > 0 ? cookies.join("; ") : nil
34
+ request.add_field('Cookie', cookie)
35
+ end
36
+
37
+ # Add Referer header to request
38
+ if referer && referer.uri
39
+ request['Referer'] = referer.uri.to_s
40
+ end
41
+
42
+ # Add User-Agent header to request
43
+ request['User-Agent'] = @user_agent if @user_agent
44
+
45
+ @headers.each do |k,v|
46
+ request[k] = v
47
+ end if request
48
+ super
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,24 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ParameterResolver
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ parameters = params[:params]
9
+ uri = params[:uri]
10
+ case params[:verb]
11
+ when :head, :get, :delete, :trace
12
+ if parameters.length > 0
13
+ uri.query ||= ''
14
+ uri.query << '&' if uri.query.length > 0
15
+ uri.query << Util.build_query_string(parameters)
16
+ end
17
+ params[:params] = []
18
+ end
19
+ super
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
File without changes
@@ -0,0 +1,22 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class PreConnectHook
5
+ include WWW::Handler
6
+
7
+ attr_accessor :hooks
8
+ def initialize
9
+ @hooks = []
10
+ end
11
+
12
+ def handle(ctx, params)
13
+ @hooks.each { |hook| hook.call(params) }
14
+ super
15
+ end
16
+ end
17
+
18
+ class PostConnectHook < PreConnectHook
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,32 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class RequestResolver
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ uri = params[:uri]
9
+ if %w{ http https }.include?(uri.scheme.downcase)
10
+ klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
11
+ params[:request] ||= klass.new(uri.request_uri)
12
+ end
13
+
14
+ if %w{ file }.include?(uri.scheme.downcase)
15
+ o = Struct.new(:uri).new(uri)
16
+ class << o
17
+ def add_field(*args); end
18
+ alias :[]= :add_field
19
+ def path
20
+ uri.path
21
+ end
22
+ def each_header; end
23
+ end
24
+ params[:request] ||= o
25
+ end
26
+
27
+ super
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,40 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ResponseBodyParser
5
+ include WWW::Handler
6
+
7
+ def initialize(pluggable_parser, watch_for_set)
8
+ @pluggable_parser = pluggable_parser
9
+ @watch_for_set = watch_for_set
10
+ end
11
+
12
+ def handle(ctx, params)
13
+ response = params[:response]
14
+ response_body = params[:response_body]
15
+ uri = params[:uri]
16
+
17
+ content_type = nil
18
+ unless response['Content-Type'].nil?
19
+ data = response['Content-Type'].match(/^([^;]*)/)
20
+ content_type = data[1].downcase.split(',')[0] unless data.nil?
21
+ end
22
+
23
+ # Find our pluggable parser
24
+ params[:page] = @pluggable_parser.parser(content_type).new(
25
+ uri,
26
+ response,
27
+ response_body,
28
+ response.code
29
+ ) { |parser|
30
+ parser.mech = params[:agent] if parser.respond_to? :mech=
31
+ if parser.respond_to?(:watch_for_set=) && @watch_for_set
32
+ parser.watch_for_set = @watch_for_set
33
+ end
34
+ }
35
+ super
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,50 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ResponseHeaderHandler
5
+ include WWW::Handler
6
+
7
+ def initialize(cookie_jar, connection_cache)
8
+ @cookie_jar = cookie_jar
9
+ @connection_cache = connection_cache
10
+ end
11
+
12
+ def handle(ctx, params)
13
+ response = params[:response]
14
+ uri = params[:uri]
15
+ page = params[:page]
16
+ cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
17
+ :connection => nil,
18
+ :keep_alive_options => {},
19
+ })
20
+
21
+ # If the server sends back keep alive options, save them
22
+ if keep_alive_info = response['keep-alive']
23
+ keep_alive_info.split(/,\s*/).each do |option|
24
+ k, v = option.split(/=/)
25
+ cache_obj[:keep_alive_options] ||= {}
26
+ cache_obj[:keep_alive_options][k.intern] = v
27
+ end
28
+ end
29
+
30
+ if page.is_a?(Page) && page.body =~ /Set-Cookie/n
31
+ page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
32
+ Cookie::parse(uri, meta['content']) { |c|
33
+ Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
34
+ @cookie_jar.add(uri, c)
35
+ }
36
+ end
37
+ end
38
+
39
+ (response.get_fields('Set-Cookie')||[]).each do |cookie|
40
+ Cookie::parse(uri, cookie) { |c|
41
+ Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
42
+ @cookie_jar.add(uri, c)
43
+ }
44
+ end
45
+ super
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end