tenderlove-mechanize 0.9.3.20090617085936
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +496 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +169 -0
- data/README.rdoc +60 -0
- data/Rakefile +43 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +7 -0
- data/lib/www/mechanize/chain/auth_headers.rb +80 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +53 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +32 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/content_type_error.rb +16 -0
- data/lib/www/mechanize/cookie.rb +72 -0
- data/lib/www/mechanize/cookie_jar.rb +191 -0
- data/lib/www/mechanize/file.rb +73 -0
- data/lib/www/mechanize/file_response.rb +62 -0
- data/lib/www/mechanize/file_saver.rb +39 -0
- data/lib/www/mechanize/form/button.rb +8 -0
- data/lib/www/mechanize/form/check_box.rb +13 -0
- data/lib/www/mechanize/form/field.rb +28 -0
- data/lib/www/mechanize/form/file_upload.rb +24 -0
- data/lib/www/mechanize/form/image_button.rb +23 -0
- data/lib/www/mechanize/form/multi_select_list.rb +69 -0
- data/lib/www/mechanize/form/option.rb +51 -0
- data/lib/www/mechanize/form/radio_button.rb +38 -0
- data/lib/www/mechanize/form/select_list.rb +45 -0
- data/lib/www/mechanize/form.rb +360 -0
- data/lib/www/mechanize/headers.rb +12 -0
- data/lib/www/mechanize/history.rb +67 -0
- data/lib/www/mechanize/inspect.rb +90 -0
- data/lib/www/mechanize/monkey_patch.rb +37 -0
- data/lib/www/mechanize/page/base.rb +10 -0
- data/lib/www/mechanize/page/frame.rb +22 -0
- data/lib/www/mechanize/page/link.rb +50 -0
- data/lib/www/mechanize/page/meta.rb +51 -0
- data/lib/www/mechanize/page.rb +176 -0
- data/lib/www/mechanize/pluggable_parsers.rb +103 -0
- data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
- data/lib/www/mechanize/response_code_error.rb +25 -0
- data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
- data/lib/www/mechanize/util.rb +76 -0
- data/lib/www/mechanize.rb +619 -0
- data/mechanize.gemspec +41 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +44 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +564 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +52 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +120 -0
- data/test/test_mech.rb +268 -0
- data/test/test_mechanize_file.rb +47 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +119 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +41 -0
- data/test/test_redirect_verb_handling.rb +45 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +314 -0
data/README.rdoc
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
= WWW::Mechanize
|
2
|
+
|
3
|
+
* http://mechanize.rubyforge.org/
|
4
|
+
* http://github.com/tenderlove/mechanize/tree/master
|
5
|
+
|
6
|
+
== DESCRIPTION
|
7
|
+
|
8
|
+
The Mechanize library is used for automating interaction with websites.
|
9
|
+
Mechanize automatically stores and sends cookies, follows redirects,
|
10
|
+
can follow links, and submit forms. Form fields can be populated and
|
11
|
+
submitted. Mechanize also keeps track of the sites that you have visited as
|
12
|
+
a history.
|
13
|
+
|
14
|
+
== Dependencies
|
15
|
+
|
16
|
+
* ruby 1.8.6
|
17
|
+
* nokogiri[http://nokogiri.rubyforge.org]
|
18
|
+
|
19
|
+
== SUPPORT:
|
20
|
+
|
21
|
+
The mechanize mailing list is available here:
|
22
|
+
|
23
|
+
* http://rubyforge.org/mailman/listinfo/mechanize-users
|
24
|
+
|
25
|
+
The bug tracker is available here:
|
26
|
+
|
27
|
+
* http://rubyforge.org/tracker/?atid=5709&group_id=1453
|
28
|
+
|
29
|
+
== Examples
|
30
|
+
|
31
|
+
If you are just starting, check out the GUIDE.
|
32
|
+
Also, check out the EXAMPLES file.
|
33
|
+
|
34
|
+
== Authors
|
35
|
+
|
36
|
+
Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
|
37
|
+
|
38
|
+
Copyright (c) 2006-2009:
|
39
|
+
|
40
|
+
* {Aaron Patterson}[http://tenderlovemaking.com] (aaronp@rubyforge.org)
|
41
|
+
* {Mike Dalessio}[http://mike.daless.io] (mike@csa.net)
|
42
|
+
|
43
|
+
This library comes with a shameless plug for employing me
|
44
|
+
(Aaron[http://tenderlovemaking.com/]) programming
|
45
|
+
Ruby, my favorite language!
|
46
|
+
|
47
|
+
== Acknowledgments
|
48
|
+
|
49
|
+
This library was heavily influenced by its namesake in the perl world. A big
|
50
|
+
thanks goes to Andy Lester (andy@petdance.com), the author of the original
|
51
|
+
perl Mechanize which is available here[http://search.cpan.org/~petdance/WWW-Mechanize-1.20/]. Ruby Mechanize would not be around without you!
|
52
|
+
|
53
|
+
Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone
|
54
|
+
who's helped out in various ways. Finally, thank you to the people using this
|
55
|
+
library!
|
56
|
+
|
57
|
+
== License
|
58
|
+
|
59
|
+
This library is distributed under the GPL. Please see the LICENSE file.
|
60
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hoe'
|
3
|
+
|
4
|
+
$LOAD_PATH.unshift File.join(File.dirname(__FILE__), "lib")
|
5
|
+
require 'mechanize'
|
6
|
+
|
7
|
+
HOE = Hoe.new('mechanize', WWW::Mechanize::VERSION) do |p|
|
8
|
+
p.developer('Aaron Patterson','aaronp@rubyforge.org')
|
9
|
+
p.developer('Mike Dalessio','mike.dalessio@gmail.com')
|
10
|
+
p.readme_file = 'README.rdoc'
|
11
|
+
p.history_file = 'CHANGELOG.rdoc'
|
12
|
+
p.extra_rdoc_files = FileList['*.rdoc']
|
13
|
+
p.summary = "Mechanize provides automated web-browsing"
|
14
|
+
p.extra_deps = [['nokogiri', '>= 1.2.1']]
|
15
|
+
end
|
16
|
+
|
17
|
+
desc "Update SSL Certificate"
|
18
|
+
task('ssl_cert') do |p|
|
19
|
+
sh "openssl genrsa -des3 -out server.key 1024"
|
20
|
+
sh "openssl req -new -key server.key -out server.csr"
|
21
|
+
sh "cp server.key server.key.org"
|
22
|
+
sh "openssl rsa -in server.key.org -out server.key"
|
23
|
+
sh "openssl x509 -req -days 365 -in server.csr -signkey server.key -out server.crt"
|
24
|
+
sh "cp server.key server.pem"
|
25
|
+
sh "mv server.key server.csr server.crt server.pem test/data/"
|
26
|
+
sh "rm server.key.org"
|
27
|
+
end
|
28
|
+
|
29
|
+
namespace :gem do
|
30
|
+
desc 'Generate a gem spec'
|
31
|
+
task :spec do
|
32
|
+
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
33
|
+
HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
|
34
|
+
f.write(HOE.spec.to_ruby)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
desc "Run code-coverage analysis"
|
40
|
+
task :coverage do
|
41
|
+
rm_rf "coverage"
|
42
|
+
sh "rcov -x Library -I lib:test #{Dir[*HOE.test_globs].join(' ')}"
|
43
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mechanize'
|
5
|
+
|
6
|
+
agent = WWW::Mechanize.new
|
7
|
+
|
8
|
+
# Get the flickr sign in page
|
9
|
+
page = agent.get('http://flickr.com/signin/flickr/')
|
10
|
+
|
11
|
+
# Fill out the login form
|
12
|
+
form = page.forms.name('flickrloginform').first
|
13
|
+
form.email = ARGV[0]
|
14
|
+
form.password = ARGV[1]
|
15
|
+
page = agent.submit(form)
|
16
|
+
|
17
|
+
# Go to the upload page
|
18
|
+
page = agent.click page.links.text('Upload')
|
19
|
+
|
20
|
+
# Fill out the form
|
21
|
+
form = page.forms.action('/photos_upload_process.gne').first
|
22
|
+
form.file_uploads.name('file1').first.file_name = ARGV[2]
|
23
|
+
agent.submit(form)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
# This example logs a user in to rubyforge and prints out the body of the
|
4
|
+
# page after logging the user in.
|
5
|
+
require 'rubygems'
|
6
|
+
require 'mechanize'
|
7
|
+
|
8
|
+
# Create a new mechanize object
|
9
|
+
agent = WWW::Mechanize.new { |a| a.log = Logger.new(STDERR) }
|
10
|
+
|
11
|
+
# Load the rubyforge website
|
12
|
+
page = agent.get('http://rubyforge.org/')
|
13
|
+
page = agent.click page.links.text(/Log In/) # Click the login link
|
14
|
+
form = page.forms[1] # Select the first form
|
15
|
+
form.form_loginname = ARGV[0]
|
16
|
+
form.form_pw = ARGV[1]
|
17
|
+
|
18
|
+
# Submit the form
|
19
|
+
page = agent.submit(form, form.buttons.first)
|
20
|
+
|
21
|
+
puts page.body # Print out the body
|
data/examples/spider.rb
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'mechanize'
|
5
|
+
|
6
|
+
agent = WWW::Mechanize.new
|
7
|
+
stack = agent.get(ARGV[0]).links
|
8
|
+
while l = stack.pop
|
9
|
+
next unless l.uri.host == agent.history.first.uri.host
|
10
|
+
stack.push(*(agent.click(l).links)) unless agent.visited? l.href
|
11
|
+
end
|
data/lib/mechanize.rb
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class AuthHeaders
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
@@nonce_count = Hash.new(0)
|
8
|
+
CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
|
9
|
+
|
10
|
+
def initialize(auth_hash, user, password, digest)
|
11
|
+
@auth_hash = auth_hash
|
12
|
+
@user = user
|
13
|
+
@password = password
|
14
|
+
@digest = digest
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle(ctx, params)
|
18
|
+
uri = params[:uri]
|
19
|
+
request = params[:request]
|
20
|
+
|
21
|
+
if( @auth_hash[uri.host] )
|
22
|
+
case @auth_hash[uri.host]
|
23
|
+
when :basic
|
24
|
+
request.basic_auth(@user, @password)
|
25
|
+
when :iis_digest
|
26
|
+
digest_response = self.gen_auth_header(uri,request, @digest, true)
|
27
|
+
request['Authorization'] = digest_response
|
28
|
+
when :digest
|
29
|
+
if @digest
|
30
|
+
digest_response = self.gen_auth_header(uri,request, @digest)
|
31
|
+
request['Authorization'] = digest_response
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
def gen_auth_header(uri, request, auth_header, is_IIS = false)
|
39
|
+
auth_header =~ /^(\w+) (.*)/
|
40
|
+
|
41
|
+
params = {}
|
42
|
+
$2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
|
43
|
+
params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
|
44
|
+
}
|
45
|
+
|
46
|
+
@@nonce_count[params['nonce']] += 1
|
47
|
+
|
48
|
+
a_1 = "#{@user}:#{params['realm']}:#{@password}"
|
49
|
+
a_2 = "#{request.method}:#{uri.path}"
|
50
|
+
request_digest = ''
|
51
|
+
request_digest << Digest::MD5.hexdigest(a_1)
|
52
|
+
request_digest << ':' << params['nonce']
|
53
|
+
request_digest << ':' << ('%08x' % @@nonce_count[params['nonce']])
|
54
|
+
request_digest << ':' << CNONCE
|
55
|
+
request_digest << ':' << params['qop']
|
56
|
+
request_digest << ':' << Digest::MD5.hexdigest(a_2)
|
57
|
+
|
58
|
+
header = ''
|
59
|
+
header << "Digest username=\"#{@user}\", "
|
60
|
+
if is_IIS then
|
61
|
+
header << "qop=\"#{params['qop']}\", "
|
62
|
+
else
|
63
|
+
header << "qop=#{params['qop']}, "
|
64
|
+
end
|
65
|
+
header << "uri=\"#{uri.path}\", "
|
66
|
+
header << %w{ algorithm opaque nonce realm }.map { |field|
|
67
|
+
next unless params[field]
|
68
|
+
"#{field}=\"#{params[field]}\""
|
69
|
+
}.compact.join(', ')
|
70
|
+
|
71
|
+
header << ", nc=#{'%08x' % @@nonce_count[params['nonce']]}, "
|
72
|
+
header << "cnonce=\"#{CNONCE}\", "
|
73
|
+
header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
|
74
|
+
|
75
|
+
return header
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class BodyDecodingHandler
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, options)
|
8
|
+
body = options[:response_body]
|
9
|
+
response = options[:response]
|
10
|
+
|
11
|
+
options[:response_body] =
|
12
|
+
if encoding = response['Content-Encoding']
|
13
|
+
case encoding.downcase
|
14
|
+
when 'gzip'
|
15
|
+
Mechanize.log.debug('gunzip body') if Mechanize.log
|
16
|
+
if response['Content-Length'].to_i > 0 || body.length > 0
|
17
|
+
begin
|
18
|
+
Zlib::GzipReader.new(body).read
|
19
|
+
rescue Zlib::BufError, Zlib::GzipFile::Error
|
20
|
+
if Mechanize.log
|
21
|
+
Mechanize.log.error('Caught a Zlib::BufError')
|
22
|
+
end
|
23
|
+
body.rewind
|
24
|
+
body.read(10)
|
25
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
|
26
|
+
rescue Zlib::DataError
|
27
|
+
if Mechanize.log
|
28
|
+
Mechanize.log.error("Caught a Zlib::DataError, unable to decode page: #{$!.to_s}")
|
29
|
+
end
|
30
|
+
''
|
31
|
+
end
|
32
|
+
else
|
33
|
+
''
|
34
|
+
end
|
35
|
+
when 'x-gzip'
|
36
|
+
body.read
|
37
|
+
else
|
38
|
+
raise 'Unsupported content encoding'
|
39
|
+
end
|
40
|
+
else
|
41
|
+
body.read
|
42
|
+
end
|
43
|
+
super
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ConnectionResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize( connection_cache,
|
8
|
+
keep_alive,
|
9
|
+
proxy_addr,
|
10
|
+
proxy_port,
|
11
|
+
proxy_user,
|
12
|
+
proxy_pass )
|
13
|
+
|
14
|
+
@connection_cache = connection_cache
|
15
|
+
@keep_alive = keep_alive
|
16
|
+
@proxy_addr = proxy_addr
|
17
|
+
@proxy_port = proxy_port
|
18
|
+
@proxy_user = proxy_user
|
19
|
+
@proxy_pass = proxy_pass
|
20
|
+
end
|
21
|
+
|
22
|
+
def handle(ctx, params)
|
23
|
+
uri = params[:uri]
|
24
|
+
http_obj = nil
|
25
|
+
|
26
|
+
case uri.scheme.downcase
|
27
|
+
when 'http', 'https'
|
28
|
+
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
|
29
|
+
:connection => nil,
|
30
|
+
:keep_alive_options => {},
|
31
|
+
})
|
32
|
+
http_obj = cache_obj[:connection]
|
33
|
+
if http_obj.nil? || ! http_obj.started?
|
34
|
+
http_obj = cache_obj[:connection] =
|
35
|
+
Net::HTTP.new( uri.host,
|
36
|
+
uri.port,
|
37
|
+
@proxy_addr,
|
38
|
+
@proxy_port,
|
39
|
+
@proxy_user,
|
40
|
+
@proxy_pass
|
41
|
+
)
|
42
|
+
cache_obj[:keep_alive_options] = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# If we're keeping connections alive and the last request time is too
|
46
|
+
# long ago, stop the connection. Or, if the max requests left is 1,
|
47
|
+
# reset the connection.
|
48
|
+
if @keep_alive && http_obj.started?
|
49
|
+
opts = cache_obj[:keep_alive_options]
|
50
|
+
if((opts[:timeout] &&
|
51
|
+
Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
|
52
|
+
opts[:max] && opts[:max].to_i == 1)
|
53
|
+
|
54
|
+
Mechanize.log.debug('Finishing stale connection') if Mechanize.log
|
55
|
+
http_obj.finish
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
cache_obj[:last_request_time] = Time.now.to_i
|
61
|
+
when 'file'
|
62
|
+
http_obj = Object.new
|
63
|
+
class << http_obj
|
64
|
+
def started?; true; end
|
65
|
+
def request(request, *args, &block)
|
66
|
+
response = FileResponse.new(request.uri.path)
|
67
|
+
yield response
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
params[:connection] = http_obj
|
73
|
+
super
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class CustomHeaders
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
request = params[:request]
|
9
|
+
params[:headers].each do |k,v|
|
10
|
+
case k
|
11
|
+
when :etag then request["ETag"] = v
|
12
|
+
when :if_modified_since then request["If-Modified-Since"] = v
|
13
|
+
else
|
14
|
+
raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
|
15
|
+
request[k] = v
|
16
|
+
end
|
17
|
+
end
|
18
|
+
super
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class HeaderResolver
|
5
|
+
include WWW::Handler
|
6
|
+
def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent, headers)
|
7
|
+
@keep_alive = keep_alive
|
8
|
+
@keep_alive_time = keep_alive_time
|
9
|
+
@cookie_jar = cookie_jar
|
10
|
+
@user_agent = user_agent
|
11
|
+
@headers = headers
|
12
|
+
end
|
13
|
+
|
14
|
+
def handle(ctx, params)
|
15
|
+
uri = params[:uri]
|
16
|
+
referer = params[:referer]
|
17
|
+
request = params[:request]
|
18
|
+
|
19
|
+
if @keep_alive
|
20
|
+
request['Connection'] = 'keep-alive'
|
21
|
+
request['Keep-Alive'] = @keep_alive_time.to_s
|
22
|
+
else
|
23
|
+
request['Connection'] = 'close'
|
24
|
+
end
|
25
|
+
request['Accept-Encoding'] = 'gzip,identity'
|
26
|
+
request['Accept-Language'] = 'en-us,en;q=0.5'
|
27
|
+
host = "#{uri.host}#{[80, 443].include?(uri.port.to_i) ? '' : ':' + uri.port.to_s}"
|
28
|
+
request['Host'] = host
|
29
|
+
request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
|
30
|
+
|
31
|
+
unless @cookie_jar.empty?(uri)
|
32
|
+
cookies = @cookie_jar.cookies(uri)
|
33
|
+
cookie = cookies.length > 0 ? cookies.join("; ") : nil
|
34
|
+
request.add_field('Cookie', cookie)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Add Referer header to request
|
38
|
+
if referer && referer.uri
|
39
|
+
request['Referer'] = referer.uri.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add User-Agent header to request
|
43
|
+
request['User-Agent'] = @user_agent if @user_agent
|
44
|
+
|
45
|
+
@headers.each do |k,v|
|
46
|
+
request[k] = v
|
47
|
+
end if request
|
48
|
+
super
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ParameterResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
parameters = params[:params]
|
9
|
+
uri = params[:uri]
|
10
|
+
case params[:verb]
|
11
|
+
when :head, :get, :delete, :trace
|
12
|
+
if parameters.length > 0
|
13
|
+
uri.query ||= ''
|
14
|
+
uri.query << '&' if uri.query.length > 0
|
15
|
+
uri.query << Util.build_query_string(parameters)
|
16
|
+
end
|
17
|
+
params[:params] = []
|
18
|
+
end
|
19
|
+
super
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class PreConnectHook
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
attr_accessor :hooks
|
8
|
+
def initialize
|
9
|
+
@hooks = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
@hooks.each { |hook| hook.call(params) }
|
14
|
+
super
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class PostConnectHook < PreConnectHook
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class RequestResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
uri = params[:uri]
|
9
|
+
if %w{ http https }.include?(uri.scheme.downcase)
|
10
|
+
klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
|
11
|
+
params[:request] ||= klass.new(uri.request_uri)
|
12
|
+
end
|
13
|
+
|
14
|
+
if %w{ file }.include?(uri.scheme.downcase)
|
15
|
+
o = Struct.new(:uri).new(uri)
|
16
|
+
class << o
|
17
|
+
def add_field(*args); end
|
18
|
+
alias :[]= :add_field
|
19
|
+
def path
|
20
|
+
uri.path
|
21
|
+
end
|
22
|
+
def each_header; end
|
23
|
+
end
|
24
|
+
params[:request] ||= o
|
25
|
+
end
|
26
|
+
|
27
|
+
super
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ResponseBodyParser
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(pluggable_parser, watch_for_set)
|
8
|
+
@pluggable_parser = pluggable_parser
|
9
|
+
@watch_for_set = watch_for_set
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
response = params[:response]
|
14
|
+
response_body = params[:response_body]
|
15
|
+
uri = params[:uri]
|
16
|
+
|
17
|
+
content_type = nil
|
18
|
+
unless response['Content-Type'].nil?
|
19
|
+
data = response['Content-Type'].match(/^([^;]*)/)
|
20
|
+
content_type = data[1].downcase.split(',')[0] unless data.nil?
|
21
|
+
end
|
22
|
+
|
23
|
+
# Find our pluggable parser
|
24
|
+
params[:page] = @pluggable_parser.parser(content_type).new(
|
25
|
+
uri,
|
26
|
+
response,
|
27
|
+
response_body,
|
28
|
+
response.code
|
29
|
+
) { |parser|
|
30
|
+
parser.mech = params[:agent] if parser.respond_to? :mech=
|
31
|
+
if parser.respond_to?(:watch_for_set=) && @watch_for_set
|
32
|
+
parser.watch_for_set = @watch_for_set
|
33
|
+
end
|
34
|
+
}
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ResponseHeaderHandler
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(cookie_jar, connection_cache)
|
8
|
+
@cookie_jar = cookie_jar
|
9
|
+
@connection_cache = connection_cache
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
response = params[:response]
|
14
|
+
uri = params[:uri]
|
15
|
+
page = params[:page]
|
16
|
+
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
|
17
|
+
:connection => nil,
|
18
|
+
:keep_alive_options => {},
|
19
|
+
})
|
20
|
+
|
21
|
+
# If the server sends back keep alive options, save them
|
22
|
+
if keep_alive_info = response['keep-alive']
|
23
|
+
keep_alive_info.split(/,\s*/).each do |option|
|
24
|
+
k, v = option.split(/=/)
|
25
|
+
cache_obj[:keep_alive_options] ||= {}
|
26
|
+
cache_obj[:keep_alive_options][k.intern] = v
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
if page.is_a?(Page) && page.body =~ /Set-Cookie/n
|
31
|
+
page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
|
32
|
+
Cookie::parse(uri, meta['content']) { |c|
|
33
|
+
Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
|
34
|
+
@cookie_jar.add(uri, c)
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
(response.get_fields('Set-Cookie')||[]).each do |cookie|
|
40
|
+
Cookie::parse(uri, cookie) { |c|
|
41
|
+
Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
|
42
|
+
@cookie_jar.add(uri, c)
|
43
|
+
}
|
44
|
+
end
|
45
|
+
super
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|