tenderlove-mechanize 0.9.3.20090617085936

Sign up to get free protection for your applications and to get access to all the features.
Files changed (173) hide show
  1. data/CHANGELOG.rdoc +496 -0
  2. data/EXAMPLES.rdoc +171 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +122 -0
  5. data/LICENSE.rdoc +340 -0
  6. data/Manifest.txt +169 -0
  7. data/README.rdoc +60 -0
  8. data/Rakefile +43 -0
  9. data/examples/flickr_upload.rb +23 -0
  10. data/examples/mech-dump.rb +7 -0
  11. data/examples/proxy_req.rb +9 -0
  12. data/examples/rubyforge.rb +21 -0
  13. data/examples/spider.rb +11 -0
  14. data/lib/mechanize.rb +7 -0
  15. data/lib/www/mechanize/chain/auth_headers.rb +80 -0
  16. data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
  17. data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
  18. data/lib/www/mechanize/chain/custom_headers.rb +23 -0
  19. data/lib/www/mechanize/chain/handler.rb +9 -0
  20. data/lib/www/mechanize/chain/header_resolver.rb +53 -0
  21. data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
  22. data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
  23. data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
  24. data/lib/www/mechanize/chain/request_resolver.rb +32 -0
  25. data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
  26. data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
  27. data/lib/www/mechanize/chain/response_reader.rb +41 -0
  28. data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
  29. data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
  30. data/lib/www/mechanize/chain.rb +34 -0
  31. data/lib/www/mechanize/content_type_error.rb +16 -0
  32. data/lib/www/mechanize/cookie.rb +72 -0
  33. data/lib/www/mechanize/cookie_jar.rb +191 -0
  34. data/lib/www/mechanize/file.rb +73 -0
  35. data/lib/www/mechanize/file_response.rb +62 -0
  36. data/lib/www/mechanize/file_saver.rb +39 -0
  37. data/lib/www/mechanize/form/button.rb +8 -0
  38. data/lib/www/mechanize/form/check_box.rb +13 -0
  39. data/lib/www/mechanize/form/field.rb +28 -0
  40. data/lib/www/mechanize/form/file_upload.rb +24 -0
  41. data/lib/www/mechanize/form/image_button.rb +23 -0
  42. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  43. data/lib/www/mechanize/form/option.rb +51 -0
  44. data/lib/www/mechanize/form/radio_button.rb +38 -0
  45. data/lib/www/mechanize/form/select_list.rb +45 -0
  46. data/lib/www/mechanize/form.rb +360 -0
  47. data/lib/www/mechanize/headers.rb +12 -0
  48. data/lib/www/mechanize/history.rb +67 -0
  49. data/lib/www/mechanize/inspect.rb +90 -0
  50. data/lib/www/mechanize/monkey_patch.rb +37 -0
  51. data/lib/www/mechanize/page/base.rb +10 -0
  52. data/lib/www/mechanize/page/frame.rb +22 -0
  53. data/lib/www/mechanize/page/link.rb +50 -0
  54. data/lib/www/mechanize/page/meta.rb +51 -0
  55. data/lib/www/mechanize/page.rb +176 -0
  56. data/lib/www/mechanize/pluggable_parsers.rb +103 -0
  57. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  58. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
  59. data/lib/www/mechanize/response_code_error.rb +25 -0
  60. data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
  61. data/lib/www/mechanize/util.rb +76 -0
  62. data/lib/www/mechanize.rb +619 -0
  63. data/mechanize.gemspec +41 -0
  64. data/test/chain/test_argument_validator.rb +14 -0
  65. data/test/chain/test_auth_headers.rb +25 -0
  66. data/test/chain/test_custom_headers.rb +18 -0
  67. data/test/chain/test_header_resolver.rb +28 -0
  68. data/test/chain/test_parameter_resolver.rb +35 -0
  69. data/test/chain/test_request_resolver.rb +29 -0
  70. data/test/chain/test_response_reader.rb +24 -0
  71. data/test/data/htpasswd +1 -0
  72. data/test/data/server.crt +16 -0
  73. data/test/data/server.csr +12 -0
  74. data/test/data/server.key +15 -0
  75. data/test/data/server.pem +15 -0
  76. data/test/helper.rb +129 -0
  77. data/test/htdocs/alt_text.html +10 -0
  78. data/test/htdocs/bad_form_test.html +9 -0
  79. data/test/htdocs/button.jpg +0 -0
  80. data/test/htdocs/empty_form.html +6 -0
  81. data/test/htdocs/file_upload.html +26 -0
  82. data/test/htdocs/find_link.html +41 -0
  83. data/test/htdocs/form_multi_select.html +16 -0
  84. data/test/htdocs/form_multival.html +37 -0
  85. data/test/htdocs/form_no_action.html +18 -0
  86. data/test/htdocs/form_no_input_name.html +16 -0
  87. data/test/htdocs/form_select.html +16 -0
  88. data/test/htdocs/form_select_all.html +16 -0
  89. data/test/htdocs/form_select_none.html +17 -0
  90. data/test/htdocs/form_select_noopts.html +10 -0
  91. data/test/htdocs/form_set_fields.html +14 -0
  92. data/test/htdocs/form_test.html +188 -0
  93. data/test/htdocs/frame_test.html +30 -0
  94. data/test/htdocs/google.html +13 -0
  95. data/test/htdocs/iframe_test.html +16 -0
  96. data/test/htdocs/index.html +6 -0
  97. data/test/htdocs/link with space.html +5 -0
  98. data/test/htdocs/meta_cookie.html +11 -0
  99. data/test/htdocs/no_title_test.html +6 -0
  100. data/test/htdocs/relative/tc_relative_links.html +21 -0
  101. data/test/htdocs/tc_bad_links.html +5 -0
  102. data/test/htdocs/tc_base_link.html +8 -0
  103. data/test/htdocs/tc_blank_form.html +11 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_links.html +18 -0
  109. data/test/htdocs/tc_no_attributes.html +16 -0
  110. data/test/htdocs/tc_pretty_print.html +17 -0
  111. data/test/htdocs/tc_radiobuttons.html +17 -0
  112. data/test/htdocs/tc_referer.html +10 -0
  113. data/test/htdocs/tc_relative_links.html +19 -0
  114. data/test/htdocs/tc_textarea.html +23 -0
  115. data/test/htdocs/unusual______.html +5 -0
  116. data/test/servlets.rb +365 -0
  117. data/test/ssl_server.rb +48 -0
  118. data/test/test_authenticate.rb +71 -0
  119. data/test/test_bad_links.rb +25 -0
  120. data/test/test_blank_form.rb +16 -0
  121. data/test/test_checkboxes.rb +61 -0
  122. data/test/test_content_type.rb +13 -0
  123. data/test/test_cookie_class.rb +338 -0
  124. data/test/test_cookie_jar.rb +362 -0
  125. data/test/test_cookies.rb +123 -0
  126. data/test/test_encoded_links.rb +20 -0
  127. data/test/test_errors.rb +49 -0
  128. data/test/test_follow_meta.rb +108 -0
  129. data/test/test_form_action.rb +44 -0
  130. data/test/test_form_as_hash.rb +61 -0
  131. data/test/test_form_button.rb +38 -0
  132. data/test/test_form_no_inputname.rb +15 -0
  133. data/test/test_forms.rb +564 -0
  134. data/test/test_frames.rb +25 -0
  135. data/test/test_get_headers.rb +52 -0
  136. data/test/test_gzipping.rb +22 -0
  137. data/test/test_hash_api.rb +45 -0
  138. data/test/test_history.rb +142 -0
  139. data/test/test_history_added.rb +16 -0
  140. data/test/test_html_unscape_forms.rb +39 -0
  141. data/test/test_if_modified_since.rb +20 -0
  142. data/test/test_keep_alive.rb +31 -0
  143. data/test/test_links.rb +120 -0
  144. data/test/test_mech.rb +268 -0
  145. data/test/test_mechanize_file.rb +47 -0
  146. data/test/test_meta.rb +65 -0
  147. data/test/test_multi_select.rb +106 -0
  148. data/test/test_no_attributes.rb +13 -0
  149. data/test/test_option.rb +18 -0
  150. data/test/test_page.rb +119 -0
  151. data/test/test_pluggable_parser.rb +145 -0
  152. data/test/test_post_form.rb +34 -0
  153. data/test/test_pretty_print.rb +22 -0
  154. data/test/test_radiobutton.rb +75 -0
  155. data/test/test_redirect_limit_reached.rb +41 -0
  156. data/test/test_redirect_verb_handling.rb +45 -0
  157. data/test/test_referer.rb +39 -0
  158. data/test/test_relative_links.rb +40 -0
  159. data/test/test_request.rb +13 -0
  160. data/test/test_response_code.rb +52 -0
  161. data/test/test_save_file.rb +48 -0
  162. data/test/test_scheme.rb +48 -0
  163. data/test/test_select.rb +106 -0
  164. data/test/test_select_all.rb +15 -0
  165. data/test/test_select_none.rb +15 -0
  166. data/test/test_select_noopts.rb +16 -0
  167. data/test/test_set_fields.rb +44 -0
  168. data/test/test_ssl_server.rb +20 -0
  169. data/test/test_subclass.rb +14 -0
  170. data/test/test_textarea.rb +45 -0
  171. data/test/test_upload.rb +109 -0
  172. data/test/test_verbs.rb +25 -0
  173. metadata +314 -0
data/README.rdoc ADDED
@@ -0,0 +1,60 @@
1
+ = WWW::Mechanize
2
+
3
+ * http://mechanize.rubyforge.org/
4
+ * http://github.com/tenderlove/mechanize/tree/master
5
+
6
+ == DESCRIPTION
7
+
8
+ The Mechanize library is used for automating interaction with websites.
9
+ Mechanize automatically stores and sends cookies, follows redirects,
10
+ can follow links, and submit forms. Form fields can be populated and
11
+ submitted. Mechanize also keeps track of the sites that you have visited as
12
+ a history.
13
+
14
+ == Dependencies
15
+
16
+ * ruby 1.8.6
17
+ * nokogiri[http://nokogiri.rubyforge.org]
18
+
19
+ == SUPPORT:
20
+
21
+ The mechanize mailing list is available here:
22
+
23
+ * http://rubyforge.org/mailman/listinfo/mechanize-users
24
+
25
+ The bug tracker is available here:
26
+
27
+ * http://rubyforge.org/tracker/?atid=5709&group_id=1453
28
+
29
+ == Examples
30
+
31
+ If you are just starting, check out the GUIDE.
32
+ Also, check out the EXAMPLES file.
33
+
34
+ == Authors
35
+
36
+ Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
37
+
38
+ Copyright (c) 2006-2009:
39
+
40
+ * {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
41
+ * {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
42
+
43
+ This library comes with a shameless plug for employing me
44
+ (Aaron[http://tenderlovemaking.com/]) programming
45
+ Ruby, my favorite language!
46
+
47
+ == Acknowledgments
48
+
49
+ This library was heavily influenced by its namesake in the perl world. A big
50
+ thanks goes to Andy Lester (andy@petdance.com), the author of the original
51
+ perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
52
+
53
+ Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
54
+ who's helped out in various ways. Finally, thank you to the people using this
55
+ library!
56
+
57
+ == License
58
+
59
+ This library is distributed under the GPL. Please see the LICENSE file.
60
+
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require 'rubygems'
2
+ require 'hoe'
3
+
4
+ $LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
5
+ require 'mechanize'
6
+
7
+ HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
8
+ p.developer('Aaron Patterson','aaronp@rubyforge.org')
9
+ p.developer('Mike Dalessio','mike.dalessio@gmail.com')
10
+ p.readme_file = 'README.rdoc'
11
+ p.history_file = 'CHANGELOG.rdoc'
12
+ p.extra_rdoc_files = FileList['*.rdoc']
13
+ p.summary = "Mechanize provides automated web-browsing"
14
+ p.extra_deps = [['nokogiri', '>= 1.2.1']]
15
+ end
16
+
17
+ desc "Update SSL Certificate"
18
+ task('ssl_cert') do |p|
19
+ sh "openssl genrsa -des3 -out server.key 1024"
20
+ sh "openssl req -new -key server.key -out server.csr"
21
+ sh "cp server.key server.key.org"
22
+ sh "openssl rsa -in server.key.org -out server.key"
23
+ sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
24
+ sh "cp server.key server.pem"
25
+ sh "mv server.key server.csr server.crt server.pem test/data/"
26
+ sh "rm server.key.org"
27
+ end
28
+
29
+ namespace :gem do
30
+ desc 'Generate a gem spec'
31
+ task :spec do
32
+ File.open("#{HOE.name}.gemspec", 'w') do |f|
33
+ HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
34
+ f.write(HOE.spec.to_ruby)
35
+ end
36
+ end
37
+ end
38
+
39
+ desc "Run code-coverage analysis"
40
+ task :coverage do
41
+ rm_rf "coverage"
42
+ sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
43
+ end
@@ -0,0 +1,23 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+
8
+ # Get the flickr sign in page
9
+ page = agent.get('http://flickr.com/signin/flickr/')
10
+
11
+ # Fill out the login form
12
+ form = page.forms.name('flickrloginform').first
13
+ form.email = ARGV[0]
14
+ form.password = ARGV[1]
15
+ page = agent.submit(form)
16
+
17
+ # Go to the upload page
18
+ page = agent.click page.links.text('Upload')
19
+
20
+ # Fill out the form
21
+ form = page.forms.action('/photos_upload_process.gne').first
22
+ form.file_uploads.name('file1').first.file_name = ARGV[2]
23
+ agent.submit(form)
@@ -0,0 +1,7 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ puts agent.get(ARGV[0]).inspect
@@ -0,0 +1,9 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ agent.set_proxy('localhost', '8000')
8
+ page = agent.get(ARGV[0])
9
+ puts page.body
@@ -0,0 +1,21 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ # This example logs a user in to rubyforge and prints out the body of the
4
+ # page after logging the user in.
5
+ require 'rubygems'
6
+ require 'mechanize'
7
+
8
+ # Create a new mechanize object
9
+ agent = WWW::Mechanize.new { |a| a.log = Logger.new(STDERR) }
10
+
11
+ # Load the rubyforge website
12
+ page = agent.get('http://rubyforge.org/')
13
+ page = agent.click page.links.text(/Log In/) # Click the login link
14
+ form = page.forms[1] # Select the first form
15
+ form.form_loginname = ARGV[0]
16
+ form.form_pw = ARGV[1]
17
+
18
+ # Submit the form
19
+ page = agent.submit(form, form.buttons.first)
20
+
21
+ puts page.body # Print out the body
@@ -0,0 +1,11 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
2
+
3
+ require 'rubygems'
4
+ require 'mechanize'
5
+
6
+ agent = WWW::Mechanize.new
7
+ stack = agent.get(ARGV[0]).links
8
+ while l = stack.pop
9
+ next unless l.uri.host == agent.history.first.uri.host
10
+ stack.push(*(agent.click(l).links)) unless agent.visited? l.href
11
+ end
data/lib/mechanize.rb ADDED
@@ -0,0 +1,7 @@
1
+ # Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
2
+ # Copyright (c) 2007 by Aaron Patterson (aaronp@rubyforge.org)
3
+ #
4
+ # Please see the LICENSE file for licensing.
5
+
6
+
7
+ require 'www/mechanize'
@@ -0,0 +1,80 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class AuthHeaders
5
+ include WWW::Handler
6
+
7
+ @@nonce_count = Hash.new(0)
8
+ CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
9
+
10
+ def initialize(auth_hash, user, password, digest)
11
+ @auth_hash = auth_hash
12
+ @user = user
13
+ @password = password
14
+ @digest = digest
15
+ end
16
+
17
+ def handle(ctx, params)
18
+ uri = params[:uri]
19
+ request = params[:request]
20
+
21
+ if( @auth_hash[uri.host] )
22
+ case @auth_hash[uri.host]
23
+ when :basic
24
+ request.basic_auth(@user, @password)
25
+ when :iis_digest
26
+ digest_response = self.gen_auth_header(uri,request, @digest, true)
27
+ request['Authorization'] = digest_response
28
+ when :digest
29
+ if @digest
30
+ digest_response = self.gen_auth_header(uri,request, @digest)
31
+ request['Authorization'] = digest_response
32
+ end
33
+ end
34
+ end
35
+ super
36
+ end
37
+
38
+ def gen_auth_header(uri, request, auth_header, is_IIS = false)
39
+ auth_header =~ /^(\w+) (.*)/
40
+
41
+ params = {}
42
+ $2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
43
+ params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
44
+ }
45
+
46
+ @@nonce_count[params['nonce']] += 1
47
+
48
+ a_1 = "#{@user}:#{params['realm']}:#{@password}"
49
+ a_2 = "#{request.method}:#{uri.path}"
50
+ request_digest = ''
51
+ request_digest << Digest::MD5.hexdigest(a_1)
52
+ request_digest << ':' << params['nonce']
53
+ request_digest << ':' << ('%08x' % @@nonce_count[params['nonce']])
54
+ request_digest << ':' << CNONCE
55
+ request_digest << ':' << params['qop']
56
+ request_digest << ':' << Digest::MD5.hexdigest(a_2)
57
+
58
+ header = ''
59
+ header << "Digest username=\"#{@user}\", "
60
+ if is_IIS then
61
+ header << "qop=\"#{params['qop']}\", "
62
+ else
63
+ header << "qop=#{params['qop']}, "
64
+ end
65
+ header << "uri=\"#{uri.path}\", "
66
+ header << %w{ algorithm opaque nonce realm }.map { |field|
67
+ next unless params[field]
68
+ "#{field}=\"#{params[field]}\""
69
+ }.compact.join(', ')
70
+
71
+ header << ", nc=#{'%08x' % @@nonce_count[params['nonce']]}, "
72
+ header << "cnonce=\"#{CNONCE}\", "
73
+ header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
74
+
75
+ return header
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,48 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class BodyDecodingHandler
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, options)
8
+ body = options[:response_body]
9
+ response = options[:response]
10
+
11
+ options[:response_body] =
12
+ if encoding = response['Content-Encoding']
13
+ case encoding.downcase
14
+ when 'gzip'
15
+ Mechanize.log.debug('gunzip body') if Mechanize.log
16
+ if response['Content-Length'].to_i > 0 || body.length > 0
17
+ begin
18
+ Zlib::GzipReader.new(body).read
19
+ rescue Zlib::BufError, Zlib::GzipFile::Error
20
+ if Mechanize.log
21
+ Mechanize.log.error('Caught a Zlib::BufError')
22
+ end
23
+ body.rewind
24
+ body.read(10)
25
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
26
+ rescue Zlib::DataError
27
+ if Mechanize.log
28
+ Mechanize.log.error("Caught a Zlib::DataError, unable to decode page: #{$!.to_s}")
29
+ end
30
+ ''
31
+ end
32
+ else
33
+ ''
34
+ end
35
+ when 'x-gzip'
36
+ body.read
37
+ else
38
+ raise 'Unsupported content encoding'
39
+ end
40
+ else
41
+ body.read
42
+ end
43
+ super
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,78 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ConnectionResolver
5
+ include WWW::Handler
6
+
7
+ def initialize( connection_cache,
8
+ keep_alive,
9
+ proxy_addr,
10
+ proxy_port,
11
+ proxy_user,
12
+ proxy_pass )
13
+
14
+ @connection_cache = connection_cache
15
+ @keep_alive = keep_alive
16
+ @proxy_addr = proxy_addr
17
+ @proxy_port = proxy_port
18
+ @proxy_user = proxy_user
19
+ @proxy_pass = proxy_pass
20
+ end
21
+
22
+ def handle(ctx, params)
23
+ uri = params[:uri]
24
+ http_obj = nil
25
+
26
+ case uri.scheme.downcase
27
+ when 'http', 'https'
28
+ cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
29
+ :connection => nil,
30
+ :keep_alive_options => {},
31
+ })
32
+ http_obj = cache_obj[:connection]
33
+ if http_obj.nil? || ! http_obj.started?
34
+ http_obj = cache_obj[:connection] =
35
+ Net::HTTP.new( uri.host,
36
+ uri.port,
37
+ @proxy_addr,
38
+ @proxy_port,
39
+ @proxy_user,
40
+ @proxy_pass
41
+ )
42
+ cache_obj[:keep_alive_options] = {}
43
+ end
44
+
45
+ # If we're keeping connections alive and the last request time is too
46
+ # long ago, stop the connection. Or, if the max requests left is 1,
47
+ # reset the connection.
48
+ if @keep_alive && http_obj.started?
49
+ opts = cache_obj[:keep_alive_options]
50
+ if((opts[:timeout] &&
51
+ Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
52
+ opts[:max] && opts[:max].to_i == 1)
53
+
54
+ Mechanize.log.debug('Finishing stale connection') if Mechanize.log
55
+ http_obj.finish
56
+
57
+ end
58
+ end
59
+
60
+ cache_obj[:last_request_time] = Time.now.to_i
61
+ when 'file'
62
+ http_obj = Object.new
63
+ class << http_obj
64
+ def started?; true; end
65
+ def request(request, *args, &block)
66
+ response = FileResponse.new(request.uri.path)
67
+ yield response
68
+ end
69
+ end
70
+ end
71
+
72
+ params[:connection] = http_obj
73
+ super
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,23 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class CustomHeaders
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ request = params[:request]
9
+ params[:headers].each do |k,v|
10
+ case k
11
+ when :etag then request["ETag"] = v
12
+ when :if_modified_since then request["If-Modified-Since"] = v
13
+ else
14
+ raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
15
+ request[k] = v
16
+ end
17
+ end
18
+ super
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,9 @@
1
+ module WWW
2
+ module Handler
3
+ attr_accessor :chain
4
+
5
+ def handle(ctx, request)
6
+ chain.pass(self, request)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,53 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class HeaderResolver
5
+ include WWW::Handler
6
+ def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent, headers)
7
+ @keep_alive = keep_alive
8
+ @keep_alive_time = keep_alive_time
9
+ @cookie_jar = cookie_jar
10
+ @user_agent = user_agent
11
+ @headers = headers
12
+ end
13
+
14
+ def handle(ctx, params)
15
+ uri = params[:uri]
16
+ referer = params[:referer]
17
+ request = params[:request]
18
+
19
+ if @keep_alive
20
+ request['Connection'] = 'keep-alive'
21
+ request['Keep-Alive'] = @keep_alive_time.to_s
22
+ else
23
+ request['Connection'] = 'close'
24
+ end
25
+ request['Accept-Encoding'] = 'gzip,identity'
26
+ request['Accept-Language'] = 'en-us,en;q=0.5'
27
+ host = "#{uri.host}#{[80, 443].include?(uri.port.to_i) ? '' : ':' + uri.port.to_s}"
28
+ request['Host'] = host
29
+ request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
30
+
31
+ unless @cookie_jar.empty?(uri)
32
+ cookies = @cookie_jar.cookies(uri)
33
+ cookie = cookies.length > 0 ? cookies.join("; ") : nil
34
+ request.add_field('Cookie', cookie)
35
+ end
36
+
37
+ # Add Referer header to request
38
+ if referer && referer.uri
39
+ request['Referer'] = referer.uri.to_s
40
+ end
41
+
42
+ # Add User-Agent header to request
43
+ request['User-Agent'] = @user_agent if @user_agent
44
+
45
+ @headers.each do |k,v|
46
+ request[k] = v
47
+ end if request
48
+ super
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,24 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ParameterResolver
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ parameters = params[:params]
9
+ uri = params[:uri]
10
+ case params[:verb]
11
+ when :head, :get, :delete, :trace
12
+ if parameters.length > 0
13
+ uri.query ||= ''
14
+ uri.query << '&' if uri.query.length > 0
15
+ uri.query << Util.build_query_string(parameters)
16
+ end
17
+ params[:params] = []
18
+ end
19
+ super
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
File without changes
@@ -0,0 +1,22 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class PreConnectHook
5
+ include WWW::Handler
6
+
7
+ attr_accessor :hooks
8
+ def initialize
9
+ @hooks = []
10
+ end
11
+
12
+ def handle(ctx, params)
13
+ @hooks.each { |hook| hook.call(params) }
14
+ super
15
+ end
16
+ end
17
+
18
+ class PostConnectHook < PreConnectHook
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,32 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class RequestResolver
5
+ include WWW::Handler
6
+
7
+ def handle(ctx, params)
8
+ uri = params[:uri]
9
+ if %w{ http https }.include?(uri.scheme.downcase)
10
+ klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
11
+ params[:request] ||= klass.new(uri.request_uri)
12
+ end
13
+
14
+ if %w{ file }.include?(uri.scheme.downcase)
15
+ o = Struct.new(:uri).new(uri)
16
+ class << o
17
+ def add_field(*args); end
18
+ alias :[]= :add_field
19
+ def path
20
+ uri.path
21
+ end
22
+ def each_header; end
23
+ end
24
+ params[:request] ||= o
25
+ end
26
+
27
+ super
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,40 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ResponseBodyParser
5
+ include WWW::Handler
6
+
7
+ def initialize(pluggable_parser, watch_for_set)
8
+ @pluggable_parser = pluggable_parser
9
+ @watch_for_set = watch_for_set
10
+ end
11
+
12
+ def handle(ctx, params)
13
+ response = params[:response]
14
+ response_body = params[:response_body]
15
+ uri = params[:uri]
16
+
17
+ content_type = nil
18
+ unless response['Content-Type'].nil?
19
+ data = response['Content-Type'].match(/^([^;]*)/)
20
+ content_type = data[1].downcase.split(',')[0] unless data.nil?
21
+ end
22
+
23
+ # Find our pluggable parser
24
+ params[:page] = @pluggable_parser.parser(content_type).new(
25
+ uri,
26
+ response,
27
+ response_body,
28
+ response.code
29
+ ) { |parser|
30
+ parser.mech = params[:agent] if parser.respond_to? :mech=
31
+ if parser.respond_to?(:watch_for_set=) && @watch_for_set
32
+ parser.watch_for_set = @watch_for_set
33
+ end
34
+ }
35
+ super
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,50 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Chain
4
+ class ResponseHeaderHandler
5
+ include WWW::Handler
6
+
7
+ def initialize(cookie_jar, connection_cache)
8
+ @cookie_jar = cookie_jar
9
+ @connection_cache = connection_cache
10
+ end
11
+
12
+ def handle(ctx, params)
13
+ response = params[:response]
14
+ uri = params[:uri]
15
+ page = params[:page]
16
+ cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
17
+ :connection => nil,
18
+ :keep_alive_options => {},
19
+ })
20
+
21
+ # If the server sends back keep alive options, save them
22
+ if keep_alive_info = response['keep-alive']
23
+ keep_alive_info.split(/,\s*/).each do |option|
24
+ k, v = option.split(/=/)
25
+ cache_obj[:keep_alive_options] ||= {}
26
+ cache_obj[:keep_alive_options][k.intern] = v
27
+ end
28
+ end
29
+
30
+ if page.is_a?(Page) && page.body =~ /Set-Cookie/n
31
+ page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
32
+ Cookie::parse(uri, meta['content']) { |c|
33
+ Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
34
+ @cookie_jar.add(uri, c)
35
+ }
36
+ end
37
+ end
38
+
39
+ (response.get_fields('Set-Cookie')||[]).each do |cookie|
40
+ Cookie::parse(uri, cookie) { |c|
41
+ Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
42
+ @cookie_jar.add(uri, c)
43
+ }
44
+ end
45
+ super
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end