tenderlove-mechanize 0.9.3.20090617085936
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +496 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +169 -0
- data/README.rdoc +60 -0
- data/Rakefile +43 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +7 -0
- data/lib/www/mechanize/chain/auth_headers.rb +80 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +53 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +32 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/content_type_error.rb +16 -0
- data/lib/www/mechanize/cookie.rb +72 -0
- data/lib/www/mechanize/cookie_jar.rb +191 -0
- data/lib/www/mechanize/file.rb +73 -0
- data/lib/www/mechanize/file_response.rb +62 -0
- data/lib/www/mechanize/file_saver.rb +39 -0
- data/lib/www/mechanize/form/button.rb +8 -0
- data/lib/www/mechanize/form/check_box.rb +13 -0
- data/lib/www/mechanize/form/field.rb +28 -0
- data/lib/www/mechanize/form/file_upload.rb +24 -0
- data/lib/www/mechanize/form/image_button.rb +23 -0
- data/lib/www/mechanize/form/multi_select_list.rb +69 -0
- data/lib/www/mechanize/form/option.rb +51 -0
- data/lib/www/mechanize/form/radio_button.rb +38 -0
- data/lib/www/mechanize/form/select_list.rb +45 -0
- data/lib/www/mechanize/form.rb +360 -0
- data/lib/www/mechanize/headers.rb +12 -0
- data/lib/www/mechanize/history.rb +67 -0
- data/lib/www/mechanize/inspect.rb +90 -0
- data/lib/www/mechanize/monkey_patch.rb +37 -0
- data/lib/www/mechanize/page/base.rb +10 -0
- data/lib/www/mechanize/page/frame.rb +22 -0
- data/lib/www/mechanize/page/link.rb +50 -0
- data/lib/www/mechanize/page/meta.rb +51 -0
- data/lib/www/mechanize/page.rb +176 -0
- data/lib/www/mechanize/pluggable_parsers.rb +103 -0
- data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
- data/lib/www/mechanize/response_code_error.rb +25 -0
- data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
- data/lib/www/mechanize/util.rb +76 -0
- data/lib/www/mechanize.rb +619 -0
- data/mechanize.gemspec +41 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +44 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +564 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +52 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +120 -0
- data/test/test_mech.rb +268 -0
- data/test/test_mechanize_file.rb +47 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +119 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +41 -0
- data/test/test_redirect_verb_handling.rb +45 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +314 -0
data/README.rdoc
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
= WWW::Mechanize
|
2
|
+
|
3
|
+
* http://mechanize.rubyforge.org/
|
4
|
+
* http://github.com/tenderlove/mechanize/tree/master
|
5
|
+
|
6
|
+
== DESCRIPTION
|
7
|
+
|
8
|
+
The Mechanize library is used for automating interaction with websites.
|
9
|
+
Mechanize automatically stores and sends cookies, follows redirects,
|
10
|
+
can follow links, and submit forms. Form fields can be populated and
|
11
|
+
submitted. Mechanize also keeps track of the sites that you have visited as
|
12
|
+
a history.
|
13
|
+
|
14
|
+
== Dependencies
|
15
|
+
|
16
|
+
* ruby 1.8.6
|
17
|
+
* nokogiri[http://nokogiri.rubyforge.org]
|
18
|
+
|
19
|
+
== SUPPORT:
|
20
|
+
|
21
|
+
The mechanize mailing list is available here:
|
22
|
+
|
23
|
+
* http://rubyforge.org/mailman/listinfo/mechanize-users
|
24
|
+
|
25
|
+
The bug tracker is available here:
|
26
|
+
|
27
|
+
* http://rubyforge.org/tracker/?atid=5709&group_id=1453
|
28
|
+
|
29
|
+
== Examples
|
30
|
+
|
31
|
+
If you are just starting, check out the GUIDE.
|
32
|
+
Also, check out the EXAMPLES file.
|
33
|
+
|
34
|
+
== Authors
|
35
|
+
|
36
|
+
Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
|
37
|
+
|
38
|
+
Copyright (c) 2006-2009:
|
39
|
+
|
40
|
+
* {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
|
41
|
+
* {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
|
42
|
+
|
43
|
+
This library comes with a shameless plug for employing me
|
44
|
+
(Aaron[http://tenderlovemaking.com/]) programming
|
45
|
+
Ruby, my favorite language!
|
46
|
+
|
47
|
+
== Acknowledgments
|
48
|
+
|
49
|
+
This library was heavily influenced by its namesake in the perl world. A big
|
50
|
+
thanks goes to Andy Lester (andy@petdance.com), the author of the original
|
51
|
+
perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
|
52
|
+
|
53
|
+
Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
|
54
|
+
who's helped out in various ways. Finally, thank you to the people using this
|
55
|
+
library!
|
56
|
+
|
57
|
+
== License
|
58
|
+
|
59
|
+
This library is distributed under the GPL. Please see the LICENSE file.
|
60
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hoe'
|
3
|
+
|
4
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
|
5
|
+
require 'mechanize'
|
6
|
+
|
7
|
+
HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
|
8
|
+
p.developer('Aaron Patterson','aaronp@rubyforge.org')
|
9
|
+
p.developer('Mike Dalessio','mike.dalessio@gmail.com')
|
10
|
+
p.readme_file = 'README.rdoc'
|
11
|
+
p.history_file = 'CHANGELOG.rdoc'
|
12
|
+
p.extra_rdoc_files = FileList['*.rdoc']
|
13
|
+
p.summary = "Mechanize provides automated web-browsing"
|
14
|
+
p.extra_deps = [['nokogiri', '>= 1.2.1']]
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Update SSL Certificate"
|
18
|
+
task('ssl_cert') do |p|
|
19
|
+
sh "openssl genrsa -des3 -out server.key 1024"
|
20
|
+
sh "openssl req -new -key server.key -out server.csr"
|
21
|
+
sh "cp server.key server.key.org"
|
22
|
+
sh "openssl rsa -in server.key.org -out server.key"
|
23
|
+
sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
|
24
|
+
sh "cp server.key server.pem"
|
25
|
+
sh "mv server.key server.csr server.crt server.pem test/data/"
|
26
|
+
sh "rm server.key.org"
|
27
|
+
end
|
28
|
+
|
29
|
+
namespace :gem do
|
30
|
+
desc 'Generate a gem spec'
|
31
|
+
task :spec do
|
32
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
33
|
+
HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
|
34
|
+
f.write(HOE.spec.to_ruby)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
desc "Run code-coverage analysis"
|
40
|
+
task :coverage do
|
41
|
+
rm_rf "coverage"
|
42
|
+
sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
|
43
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mechanize'
|
5
|
+
|
6
|
+
agent = WWW::Mechanize.new
|
7
|
+
|
8
|
+
# Get the flickr sign in page
|
9
|
+
page = agent.get('http://flickr.com/signin/flickr/')
|
10
|
+
|
11
|
+
# Fill out the login form
|
12
|
+
form = page.forms.name('flickrloginform').first
|
13
|
+
form.email = ARGV[0]
|
14
|
+
form.password = ARGV[1]
|
15
|
+
page = agent.submit(form)
|
16
|
+
|
17
|
+
# Go to the upload page
|
18
|
+
page = agent.click page.links.text('Upload')
|
19
|
+
|
20
|
+
# Fill out the form
|
21
|
+
form = page.forms.action('/photos_upload_process.gne').first
|
22
|
+
form.file_uploads.name('file1').first.file_name = ARGV[2]
|
23
|
+
agent.submit(form)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
# This example logs a user in to rubyforge and prints out the body of the
|
4
|
+
# page after logging the user in.
|
5
|
+
require 'rubygems'
|
6
|
+
require 'mechanize'
|
7
|
+
|
8
|
+
# Create a new mechanize object
|
9
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(STDERR) }
|
10
|
+
|
11
|
+
# Load the rubyforge website
|
12
|
+
page = agent.get('http://rubyforge.org/')
|
13
|
+
page = agent.click page.links.text(/Log In/) # Click the login link
|
14
|
+
form = page.forms[1] # Select the first form
|
15
|
+
form.form_loginname = ARGV[0]
|
16
|
+
form.form_pw = ARGV[1]
|
17
|
+
|
18
|
+
# Submit the form
|
19
|
+
page = agent.submit(form, form.buttons.first)
|
20
|
+
|
21
|
+
puts page.body # Print out the body
|
data/examples/spider.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mechanize'
|
5
|
+
|
6
|
+
agent = WWW::Mechanize.new
|
7
|
+
stack = agent.get(ARGV[0]).links
|
8
|
+
while l = stack.pop
|
9
|
+
next unless l.uri.host == agent.history.first.uri.host
|
10
|
+
stack.push(*(agent.click(l).links)) unless agent.visited? l.href
|
11
|
+
end
|
data/lib/mechanize.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class AuthHeaders
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
@@nonce_count = Hash.new(0)
|
8
|
+
CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
|
9
|
+
|
10
|
+
def initialize(auth_hash, user, password, digest)
|
11
|
+
@auth_hash = auth_hash
|
12
|
+
@user = user
|
13
|
+
@password = password
|
14
|
+
@digest = digest
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle(ctx, params)
|
18
|
+
uri = params[:uri]
|
19
|
+
request = params[:request]
|
20
|
+
|
21
|
+
if( @auth_hash[uri.host] )
|
22
|
+
case @auth_hash[uri.host]
|
23
|
+
when :basic
|
24
|
+
request.basic_auth(@user, @password)
|
25
|
+
when :iis_digest
|
26
|
+
digest_response = self.gen_auth_header(uri,request, @digest, true)
|
27
|
+
request['Authorization'] = digest_response
|
28
|
+
when :digest
|
29
|
+
if @digest
|
30
|
+
digest_response = self.gen_auth_header(uri,request, @digest)
|
31
|
+
request['Authorization'] = digest_response
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
def gen_auth_header(uri, request, auth_header, is_IIS = false)
|
39
|
+
auth_header =~ /^(\w+) (.*)/
|
40
|
+
|
41
|
+
params = {}
|
42
|
+
$2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
|
43
|
+
params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
|
44
|
+
}
|
45
|
+
|
46
|
+
@@nonce_count[params['nonce']] += 1
|
47
|
+
|
48
|
+
a_1 = "#{@user}:#{params['realm']}:#{@password}"
|
49
|
+
a_2 = "#{request.method}:#{uri.path}"
|
50
|
+
request_digest = ''
|
51
|
+
request_digest << Digest::MD5.hexdigest(a_1)
|
52
|
+
request_digest << ':' << params['nonce']
|
53
|
+
request_digest << ':' << ('%08x' % @@nonce_count[params['nonce']])
|
54
|
+
request_digest << ':' << CNONCE
|
55
|
+
request_digest << ':' << params['qop']
|
56
|
+
request_digest << ':' << Digest::MD5.hexdigest(a_2)
|
57
|
+
|
58
|
+
header = ''
|
59
|
+
header << "Digest username=\"#{@user}\", "
|
60
|
+
if is_IIS then
|
61
|
+
header << "qop=\"#{params['qop']}\", "
|
62
|
+
else
|
63
|
+
header << "qop=#{params['qop']}, "
|
64
|
+
end
|
65
|
+
header << "uri=\"#{uri.path}\", "
|
66
|
+
header << %w{ algorithm opaque nonce realm }.map { |field|
|
67
|
+
next unless params[field]
|
68
|
+
"#{field}=\"#{params[field]}\""
|
69
|
+
}.compact.join(', ')
|
70
|
+
|
71
|
+
header << ", nc=#{'%08x' % @@nonce_count[params['nonce']]}, "
|
72
|
+
header << "cnonce=\"#{CNONCE}\", "
|
73
|
+
header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
|
74
|
+
|
75
|
+
return header
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class BodyDecodingHandler
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, options)
|
8
|
+
body = options[:response_body]
|
9
|
+
response = options[:response]
|
10
|
+
|
11
|
+
options[:response_body] =
|
12
|
+
if encoding = response['Content-Encoding']
|
13
|
+
case encoding.downcase
|
14
|
+
when 'gzip'
|
15
|
+
Mechanize.log.debug('gunzip body') if Mechanize.log
|
16
|
+
if response['Content-Length'].to_i > 0 || body.length > 0
|
17
|
+
begin
|
18
|
+
Zlib::GzipReader.new(body).read
|
19
|
+
rescue Zlib::BufError, Zlib::GzipFile::Error
|
20
|
+
if Mechanize.log
|
21
|
+
Mechanize.log.error('Caught a Zlib::BufError')
|
22
|
+
end
|
23
|
+
body.rewind
|
24
|
+
body.read(10)
|
25
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
|
26
|
+
rescue Zlib::DataError
|
27
|
+
if Mechanize.log
|
28
|
+
Mechanize.log.error("Caught a Zlib::DataError, unable to decode page: #{$!.to_s}")
|
29
|
+
end
|
30
|
+
''
|
31
|
+
end
|
32
|
+
else
|
33
|
+
''
|
34
|
+
end
|
35
|
+
when 'x-gzip'
|
36
|
+
body.read
|
37
|
+
else
|
38
|
+
raise 'Unsupported content encoding'
|
39
|
+
end
|
40
|
+
else
|
41
|
+
body.read
|
42
|
+
end
|
43
|
+
super
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ConnectionResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize( connection_cache,
|
8
|
+
keep_alive,
|
9
|
+
proxy_addr,
|
10
|
+
proxy_port,
|
11
|
+
proxy_user,
|
12
|
+
proxy_pass )
|
13
|
+
|
14
|
+
@connection_cache = connection_cache
|
15
|
+
@keep_alive = keep_alive
|
16
|
+
@proxy_addr = proxy_addr
|
17
|
+
@proxy_port = proxy_port
|
18
|
+
@proxy_user = proxy_user
|
19
|
+
@proxy_pass = proxy_pass
|
20
|
+
end
|
21
|
+
|
22
|
+
def handle(ctx, params)
|
23
|
+
uri = params[:uri]
|
24
|
+
http_obj = nil
|
25
|
+
|
26
|
+
case uri.scheme.downcase
|
27
|
+
when 'http', 'https'
|
28
|
+
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
|
29
|
+
:connection => nil,
|
30
|
+
:keep_alive_options => {},
|
31
|
+
})
|
32
|
+
http_obj = cache_obj[:connection]
|
33
|
+
if http_obj.nil? || ! http_obj.started?
|
34
|
+
http_obj = cache_obj[:connection] =
|
35
|
+
Net::HTTP.new( uri.host,
|
36
|
+
uri.port,
|
37
|
+
@proxy_addr,
|
38
|
+
@proxy_port,
|
39
|
+
@proxy_user,
|
40
|
+
@proxy_pass
|
41
|
+
)
|
42
|
+
cache_obj[:keep_alive_options] = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# If we're keeping connections alive and the last request time is too
|
46
|
+
# long ago, stop the connection. Or, if the max requests left is 1,
|
47
|
+
# reset the connection.
|
48
|
+
if @keep_alive && http_obj.started?
|
49
|
+
opts = cache_obj[:keep_alive_options]
|
50
|
+
if((opts[:timeout] &&
|
51
|
+
Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
|
52
|
+
opts[:max] && opts[:max].to_i == 1)
|
53
|
+
|
54
|
+
Mechanize.log.debug('Finishing stale connection') if Mechanize.log
|
55
|
+
http_obj.finish
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
cache_obj[:last_request_time] = Time.now.to_i
|
61
|
+
when 'file'
|
62
|
+
http_obj = Object.new
|
63
|
+
class << http_obj
|
64
|
+
def started?; true; end
|
65
|
+
def request(request, *args, &block)
|
66
|
+
response = FileResponse.new(request.uri.path)
|
67
|
+
yield response
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
params[:connection] = http_obj
|
73
|
+
super
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class CustomHeaders
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
request = params[:request]
|
9
|
+
params[:headers].each do |k,v|
|
10
|
+
case k
|
11
|
+
when :etag then request["ETag"] = v
|
12
|
+
when :if_modified_since then request["If-Modified-Since"] = v
|
13
|
+
else
|
14
|
+
raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
|
15
|
+
request[k] = v
|
16
|
+
end
|
17
|
+
end
|
18
|
+
super
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class HeaderResolver
|
5
|
+
include WWW::Handler
|
6
|
+
def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent, headers)
|
7
|
+
@keep_alive = keep_alive
|
8
|
+
@keep_alive_time = keep_alive_time
|
9
|
+
@cookie_jar = cookie_jar
|
10
|
+
@user_agent = user_agent
|
11
|
+
@headers = headers
|
12
|
+
end
|
13
|
+
|
14
|
+
def handle(ctx, params)
|
15
|
+
uri = params[:uri]
|
16
|
+
referer = params[:referer]
|
17
|
+
request = params[:request]
|
18
|
+
|
19
|
+
if @keep_alive
|
20
|
+
request['Connection'] = 'keep-alive'
|
21
|
+
request['Keep-Alive'] = @keep_alive_time.to_s
|
22
|
+
else
|
23
|
+
request['Connection'] = 'close'
|
24
|
+
end
|
25
|
+
request['Accept-Encoding'] = 'gzip,identity'
|
26
|
+
request['Accept-Language'] = 'en-us,en;q=0.5'
|
27
|
+
host = "#{uri.host}#{[80, 443].include?(uri.port.to_i) ? '' : ':' + uri.port.to_s}"
|
28
|
+
request['Host'] = host
|
29
|
+
request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
|
30
|
+
|
31
|
+
unless @cookie_jar.empty?(uri)
|
32
|
+
cookies = @cookie_jar.cookies(uri)
|
33
|
+
cookie = cookies.length > 0 ? cookies.join("; ") : nil
|
34
|
+
request.add_field('Cookie', cookie)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Add Referer header to request
|
38
|
+
if referer && referer.uri
|
39
|
+
request['Referer'] = referer.uri.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add User-Agent header to request
|
43
|
+
request['User-Agent'] = @user_agent if @user_agent
|
44
|
+
|
45
|
+
@headers.each do |k,v|
|
46
|
+
request[k] = v
|
47
|
+
end if request
|
48
|
+
super
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ParameterResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
parameters = params[:params]
|
9
|
+
uri = params[:uri]
|
10
|
+
case params[:verb]
|
11
|
+
when :head, :get, :delete, :trace
|
12
|
+
if parameters.length > 0
|
13
|
+
uri.query ||= ''
|
14
|
+
uri.query << '&' if uri.query.length > 0
|
15
|
+
uri.query << Util.build_query_string(parameters)
|
16
|
+
end
|
17
|
+
params[:params] = []
|
18
|
+
end
|
19
|
+
super
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class PreConnectHook
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
attr_accessor :hooks
|
8
|
+
def initialize
|
9
|
+
@hooks = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
@hooks.each { |hook| hook.call(params) }
|
14
|
+
super
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class PostConnectHook < PreConnectHook
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class RequestResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
uri = params[:uri]
|
9
|
+
if %w{ http https }.include?(uri.scheme.downcase)
|
10
|
+
klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
|
11
|
+
params[:request] ||= klass.new(uri.request_uri)
|
12
|
+
end
|
13
|
+
|
14
|
+
if %w{ file }.include?(uri.scheme.downcase)
|
15
|
+
o = Struct.new(:uri).new(uri)
|
16
|
+
class << o
|
17
|
+
def add_field(*args); end
|
18
|
+
alias :[]= :add_field
|
19
|
+
def path
|
20
|
+
uri.path
|
21
|
+
end
|
22
|
+
def each_header; end
|
23
|
+
end
|
24
|
+
params[:request] ||= o
|
25
|
+
end
|
26
|
+
|
27
|
+
super
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ResponseBodyParser
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(pluggable_parser, watch_for_set)
|
8
|
+
@pluggable_parser = pluggable_parser
|
9
|
+
@watch_for_set = watch_for_set
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
response = params[:response]
|
14
|
+
response_body = params[:response_body]
|
15
|
+
uri = params[:uri]
|
16
|
+
|
17
|
+
content_type = nil
|
18
|
+
unless response['Content-Type'].nil?
|
19
|
+
data = response['Content-Type'].match(/^([^;]*)/)
|
20
|
+
content_type = data[1].downcase.split(',')[0] unless data.nil?
|
21
|
+
end
|
22
|
+
|
23
|
+
# Find our pluggable parser
|
24
|
+
params[:page] = @pluggable_parser.parser(content_type).new(
|
25
|
+
uri,
|
26
|
+
response,
|
27
|
+
response_body,
|
28
|
+
response.code
|
29
|
+
) { |parser|
|
30
|
+
parser.mech = params[:agent] if parser.respond_to? :mech=
|
31
|
+
if parser.respond_to?(:watch_for_set=) && @watch_for_set
|
32
|
+
parser.watch_for_set = @watch_for_set
|
33
|
+
end
|
34
|
+
}
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ResponseHeaderHandler
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(cookie_jar, connection_cache)
|
8
|
+
@cookie_jar = cookie_jar
|
9
|
+
@connection_cache = connection_cache
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
response = params[:response]
|
14
|
+
uri = params[:uri]
|
15
|
+
page = params[:page]
|
16
|
+
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
|
17
|
+
:connection => nil,
|
18
|
+
:keep_alive_options => {},
|
19
|
+
})
|
20
|
+
|
21
|
+
# If the server sends back keep alive options, save them
|
22
|
+
if keep_alive_info = response['keep-alive']
|
23
|
+
keep_alive_info.split(/,\s*/).each do |option|
|
24
|
+
k, v = option.split(/=/)
|
25
|
+
cache_obj[:keep_alive_options] ||= {}
|
26
|
+
cache_obj[:keep_alive_options][k.intern] = v
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
if page.is_a?(Page) && page.body =~ /Set-Cookie/n
|
31
|
+
page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
|
32
|
+
Cookie::parse(uri, meta['content']) { |c|
|
33
|
+
Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
|
34
|
+
@cookie_jar.add(uri, c)
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
(response.get_fields('Set-Cookie')||[]).each do |cookie|
|
40
|
+
Cookie::parse(uri, cookie) { |c|
|
41
|
+
Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
|
42
|
+
@cookie_jar.add(uri, c)
|
43
|
+
}
|
44
|
+
end
|
45
|
+
super
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|