mechanize 0.7.8 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data/History.txt +14 -0
- data/Manifest.txt +30 -5
- data/README.txt +5 -5
- data/Rakefile +6 -0
- data/{eg → examples}/flickr_upload.rb +0 -0
- data/{eg → examples}/mech-dump.rb +0 -0
- data/{eg → examples}/proxy_req.rb +0 -0
- data/{eg → examples}/rubyforge.rb +0 -0
- data/{eg → examples}/spider.rb +0 -0
- data/lib/www/mechanize.rb +183 -404
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/chain/auth_headers.rb +79 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +43 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +47 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +23 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +28 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +36 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +56 -0
- data/lib/www/mechanize/cookie.rb +1 -1
- data/lib/www/mechanize/file_response.rb +60 -0
- data/lib/www/mechanize/form.rb +12 -4
- data/lib/www/mechanize/form/field.rb +2 -2
- data/lib/www/mechanize/form/file_upload.rb +1 -1
- data/lib/www/mechanize/form/option.rb +1 -1
- data/lib/www/mechanize/list.rb +4 -0
- data/lib/www/mechanize/page.rb +20 -10
- data/lib/www/mechanize/util.rb +29 -0
- data/mechanize.gemspec +4 -4
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/helper.rb +3 -1
- data/test/servlets.rb +43 -0
- data/test/test_authenticate.rb +13 -12
- data/test/test_bad_links.rb +1 -1
- data/test/test_blank_form.rb +1 -1
- data/test/test_checkboxes.rb +1 -1
- data/test/test_content_type.rb +1 -1
- data/test/test_cookie_class.rb +1 -1
- data/test/test_cookie_jar.rb +1 -1
- data/test/test_cookies.rb +1 -1
- data/test/test_encoded_links.rb +1 -1
- data/test/test_errors.rb +1 -1
- data/test/test_follow_meta.rb +1 -1
- data/test/test_form_action.rb +1 -1
- data/test/test_form_as_hash.rb +1 -1
- data/test/test_form_button.rb +22 -17
- data/test/test_form_no_inputname.rb +1 -1
- data/test/test_forms.rb +2 -1
- data/test/test_frames.rb +1 -1
- data/test/test_get_headers.rb +1 -1
- data/test/test_gzipping.rb +1 -1
- data/test/test_hash_api.rb +17 -14
- data/test/test_history.rb +1 -1
- data/test/test_history_added.rb +1 -1
- data/test/test_html_unscape_forms.rb +1 -1
- data/test/test_if_modified_since.rb +1 -1
- data/test/test_keep_alive.rb +1 -1
- data/test/test_links.rb +1 -1
- data/test/test_mech.rb +18 -11
- data/test/test_mechanize_file.rb +1 -1
- data/test/test_multi_select.rb +1 -1
- data/test/test_no_attributes.rb +1 -1
- data/test/test_option.rb +2 -1
- data/test/test_page.rb +1 -1
- data/test/test_pluggable_parser.rb +1 -1
- data/test/test_post_form.rb +1 -1
- data/test/test_pretty_print.rb +1 -1
- data/test/test_radiobutton.rb +1 -1
- data/test/test_redirect_limit_reached.rb +1 -1
- data/test/test_referer.rb +1 -1
- data/test/test_relative_links.rb +1 -1
- data/test/test_response_code.rb +7 -1
- data/test/test_save_file.rb +1 -1
- data/test/test_scheme.rb +44 -0
- data/test/test_select.rb +1 -1
- data/test/test_select_all.rb +1 -1
- data/test/test_select_none.rb +1 -1
- data/test/test_select_noopts.rb +1 -1
- data/test/test_set_fields.rb +1 -1
- data/test/test_ssl_server.rb +1 -1
- data/test/test_subclass.rb +4 -11
- data/test/test_textarea.rb +1 -1
- data/test/test_upload.rb +1 -1
- data/test/test_verbs.rb +22 -0
- metadata +39 -7
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'www/mechanize/chain/handler'
|
2
|
+
require 'www/mechanize/chain/uri_resolver'
|
3
|
+
require 'www/mechanize/chain/parameter_resolver'
|
4
|
+
require 'www/mechanize/chain/request_resolver'
|
5
|
+
require 'www/mechanize/chain/custom_headers'
|
6
|
+
require 'www/mechanize/chain/connection_resolver'
|
7
|
+
require 'www/mechanize/chain/ssl_resolver'
|
8
|
+
require 'www/mechanize/chain/pre_connect_hook'
|
9
|
+
require 'www/mechanize/chain/auth_headers'
|
10
|
+
require 'www/mechanize/chain/header_resolver'
|
11
|
+
require 'www/mechanize/chain/response_body_parser'
|
12
|
+
require 'www/mechanize/chain/response_header_handler'
|
13
|
+
require 'www/mechanize/chain/response_reader'
|
14
|
+
require 'www/mechanize/chain/body_decoding_handler'
|
15
|
+
|
16
|
+
module WWW
|
17
|
+
class Mechanize
|
18
|
+
class Chain
|
19
|
+
def initialize(list)
|
20
|
+
@list = list
|
21
|
+
@list.each { |l| l.chain = self }
|
22
|
+
end
|
23
|
+
|
24
|
+
def handle(request)
|
25
|
+
@list.first.handle(self, request)
|
26
|
+
end
|
27
|
+
|
28
|
+
def pass(obj, request)
|
29
|
+
next_link = @list[@list.index(obj) + 1]
|
30
|
+
next_link.handle(self, request) if next_link
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class AuthHeaders
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
@@nonce_count = -1
|
8
|
+
CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
|
9
|
+
|
10
|
+
def initialize(auth_hash, user, password, digest)
|
11
|
+
@auth_hash = auth_hash
|
12
|
+
@user = user
|
13
|
+
@password = password
|
14
|
+
@digest = digest
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle(ctx, params)
|
18
|
+
uri = params[:uri]
|
19
|
+
request = params[:request]
|
20
|
+
|
21
|
+
if( @auth_hash[uri.host] )
|
22
|
+
case @auth_hash[uri.host]
|
23
|
+
when :basic
|
24
|
+
request.basic_auth(@user, @password)
|
25
|
+
when :iis_digest
|
26
|
+
digest_response = self.gen_auth_header(uri,request, @digest, true)
|
27
|
+
request.add_field('Authorization', digest_response)
|
28
|
+
when :digest
|
29
|
+
if @digest
|
30
|
+
digest_response = self.gen_auth_header(uri,request, @digest)
|
31
|
+
request.add_field('Authorization', digest_response)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
def gen_auth_header(uri, request, auth_header, is_IIS = false)
|
39
|
+
@@nonce_count += 1
|
40
|
+
|
41
|
+
auth_header =~ /^(\w+) (.*)/
|
42
|
+
|
43
|
+
params = {}
|
44
|
+
$2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
|
45
|
+
params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
|
46
|
+
}
|
47
|
+
|
48
|
+
a_1 = "#{@user}:#{params['realm']}:#{@password}"
|
49
|
+
a_2 = "#{request.method}:#{uri.path}"
|
50
|
+
request_digest = ''
|
51
|
+
request_digest << Digest::MD5.hexdigest(a_1)
|
52
|
+
request_digest << ':' << params['nonce']
|
53
|
+
request_digest << ':' << ('%08x' % @@nonce_count)
|
54
|
+
request_digest << ':' << CNONCE
|
55
|
+
request_digest << ':' << params['qop']
|
56
|
+
request_digest << ':' << Digest::MD5.hexdigest(a_2)
|
57
|
+
|
58
|
+
header = ''
|
59
|
+
header << "Digest username=\"#{@user}\", "
|
60
|
+
header << "realm=\"#{params['realm']}\", "
|
61
|
+
if is_IIS then
|
62
|
+
header << "qop=\"#{params['qop']}\", "
|
63
|
+
else
|
64
|
+
header << "qop=#{params['qop']}, "
|
65
|
+
end
|
66
|
+
header << "uri=\"#{uri.path}\", "
|
67
|
+
header << "algorithm=\"#{params['algorithm']}\", "
|
68
|
+
header << "opaque=\"#{params['opaque']}\", " if params['opaque']
|
69
|
+
header << "nonce=\"#{params['nonce']}\", "
|
70
|
+
header << "nc=#{'%08x' % @@nonce_count}, "
|
71
|
+
header << "cnonce=\"#{CNONCE}\", "
|
72
|
+
header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
|
73
|
+
|
74
|
+
return header
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class BodyDecodingHandler
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, options)
|
8
|
+
body = options[:response_body]
|
9
|
+
response = options[:response]
|
10
|
+
|
11
|
+
options[:response_body] =
|
12
|
+
if encoding = response['Content-Encoding']
|
13
|
+
case encoding.downcase
|
14
|
+
when 'gzip'
|
15
|
+
Mechanize.log.debug('gunzip body') if Mechanize.log
|
16
|
+
if response['Content-Length'].to_i > 0 || body.length > 0
|
17
|
+
begin
|
18
|
+
Zlib::GzipReader.new(body).read
|
19
|
+
rescue Zlib::BufError, Zlib::GzipFile::Error
|
20
|
+
if Mechanize.log
|
21
|
+
Mechanize.log.error('Caught a Zlib::BufError')
|
22
|
+
end
|
23
|
+
body.rewind
|
24
|
+
body.read(10)
|
25
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
|
26
|
+
end
|
27
|
+
else
|
28
|
+
''
|
29
|
+
end
|
30
|
+
when 'x-gzip'
|
31
|
+
body.read
|
32
|
+
else
|
33
|
+
raise 'Unsupported content encoding'
|
34
|
+
end
|
35
|
+
else
|
36
|
+
body.read
|
37
|
+
end
|
38
|
+
super
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ConnectionResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize( connection_cache,
|
8
|
+
keep_alive,
|
9
|
+
proxy_addr,
|
10
|
+
proxy_port,
|
11
|
+
proxy_user,
|
12
|
+
proxy_pass )
|
13
|
+
|
14
|
+
@connection_cache = connection_cache
|
15
|
+
@keep_alive = keep_alive
|
16
|
+
@proxy_addr = proxy_addr
|
17
|
+
@proxy_port = proxy_port
|
18
|
+
@proxy_user = proxy_user
|
19
|
+
@proxy_pass = proxy_pass
|
20
|
+
end
|
21
|
+
|
22
|
+
def handle(ctx, params)
|
23
|
+
uri = params[:uri]
|
24
|
+
http_obj = nil
|
25
|
+
|
26
|
+
case uri.scheme.downcase
|
27
|
+
when 'http', 'https'
|
28
|
+
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
|
29
|
+
:connection => nil,
|
30
|
+
:keep_alive_options => {},
|
31
|
+
})
|
32
|
+
http_obj = cache_obj[:connection]
|
33
|
+
if http_obj.nil? || ! http_obj.started?
|
34
|
+
http_obj = cache_obj[:connection] =
|
35
|
+
Net::HTTP.new( uri.host,
|
36
|
+
uri.port,
|
37
|
+
@proxy_addr,
|
38
|
+
@proxy_port,
|
39
|
+
@proxy_user,
|
40
|
+
@proxy_pass
|
41
|
+
)
|
42
|
+
cache_obj[:keep_alive_options] = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# If we're keeping connections alive and the last request time is too
|
46
|
+
# long ago, stop the connection. Or, if the max requests left is 1,
|
47
|
+
# reset the connection.
|
48
|
+
if @keep_alive && http_obj.started?
|
49
|
+
opts = cache_obj[:keep_alive_options]
|
50
|
+
if((opts[:timeout] &&
|
51
|
+
Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
|
52
|
+
opts[:max] && opts[:max].to_i == 1)
|
53
|
+
|
54
|
+
log.debug('Finishing stale connection') if log
|
55
|
+
http_obj.finish
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
cache_obj[:last_request_time] = Time.now.to_i
|
61
|
+
when 'file'
|
62
|
+
http_obj = Object.new
|
63
|
+
class << http_obj
|
64
|
+
def started?; true; end
|
65
|
+
def request(request, *args, &block)
|
66
|
+
response = FileResponse.new(request.uri.path)
|
67
|
+
yield response
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
params[:connection] = http_obj
|
73
|
+
super
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class CustomHeaders
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
request = params[:request]
|
9
|
+
params[:headers].each do |k,v|
|
10
|
+
case k
|
11
|
+
when :etag then request.add_field("ETag", v)
|
12
|
+
when :if_modified_since then request.add_field("If-Modified-Since", v)
|
13
|
+
else
|
14
|
+
raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
|
15
|
+
request.add_field(k,v)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
super
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class HeaderResolver
|
5
|
+
include WWW::Handler
|
6
|
+
def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent)
|
7
|
+
@keep_alive = keep_alive
|
8
|
+
@keep_alive_time = keep_alive_time
|
9
|
+
@cookie_jar = cookie_jar
|
10
|
+
@user_agent = user_agent
|
11
|
+
end
|
12
|
+
|
13
|
+
def handle(ctx, params)
|
14
|
+
uri = params[:uri]
|
15
|
+
referer = params[:referer]
|
16
|
+
request = params[:request]
|
17
|
+
|
18
|
+
if @keep_alive
|
19
|
+
request['Connection'] = 'keep-alive'
|
20
|
+
request['Keep-Alive'] = @keep_alive_time.to_s
|
21
|
+
else
|
22
|
+
request['Connection'] = 'close'
|
23
|
+
end
|
24
|
+
request['Accept-Encoding'] = 'gzip,identity'
|
25
|
+
request['Accept-Language'] = 'en-us,en;q=0.5'
|
26
|
+
request['Host'] = uri.host
|
27
|
+
request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
|
28
|
+
|
29
|
+
unless @cookie_jar.empty?(uri)
|
30
|
+
cookies = @cookie_jar.cookies(uri)
|
31
|
+
cookie = cookies.length > 0 ? cookies.join("; ") : nil
|
32
|
+
request.add_field('Cookie', cookie)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Add Referer header to request
|
36
|
+
if referer && referer.uri
|
37
|
+
request['Referer'] = referer.uri.to_s
|
38
|
+
end
|
39
|
+
|
40
|
+
# Add User-Agent header to request
|
41
|
+
request['User-Agent'] = @user_agent if @user_agent
|
42
|
+
super
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ParameterResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
parameters = params[:params]
|
9
|
+
uri = params[:uri]
|
10
|
+
if params[:verb] == :get
|
11
|
+
if parameters.length > 0
|
12
|
+
uri.query ||= ''
|
13
|
+
uri.query << '&' if uri.query.length > 0
|
14
|
+
uri.query << Util.build_query_string(parameters)
|
15
|
+
end
|
16
|
+
params[:params] = []
|
17
|
+
end
|
18
|
+
super
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class PreConnectHook
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
attr_accessor :hooks
|
8
|
+
def initialize
|
9
|
+
@hooks = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
@hooks.each { |hook| hook.call(params) }
|
14
|
+
super
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class PostConnectHook < PreConnectHook
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class RequestResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
uri = params[:uri]
|
9
|
+
if %w{ http https }.include?(uri.scheme.downcase)
|
10
|
+
klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
|
11
|
+
params[:request] ||= klass.new(uri.request_uri)
|
12
|
+
end
|
13
|
+
|
14
|
+
if %w{ file }.include?(uri.scheme.downcase)
|
15
|
+
o = Struct.new(:uri).new(uri)
|
16
|
+
class << o
|
17
|
+
def add_field(*args); end
|
18
|
+
alias :[]= :add_field
|
19
|
+
end
|
20
|
+
params[:request] ||= o
|
21
|
+
end
|
22
|
+
|
23
|
+
super
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ResponseBodyParser
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(pluggable_parser, watch_for_set)
|
8
|
+
@pluggable_parser = pluggable_parser
|
9
|
+
@watch_for_set = watch_for_set
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
response = params[:response]
|
14
|
+
response_body = params[:response_body]
|
15
|
+
uri = params[:uri]
|
16
|
+
|
17
|
+
content_type = nil
|
18
|
+
unless response['Content-Type'].nil?
|
19
|
+
data = response['Content-Type'].match(/^([^;]*)/)
|
20
|
+
content_type = data[1].downcase unless data.nil?
|
21
|
+
end
|
22
|
+
|
23
|
+
# Find our pluggable parser
|
24
|
+
params[:page] = @pluggable_parser.parser(content_type).new(
|
25
|
+
uri,
|
26
|
+
response,
|
27
|
+
response_body,
|
28
|
+
response.code
|
29
|
+
) { |parser|
|
30
|
+
parser.mech = params[:agent] if parser.respond_to? :mech=
|
31
|
+
if parser.respond_to?(:watch_for_set=) && @watch_for_set
|
32
|
+
parser.watch_for_set = @watch_for_set
|
33
|
+
end
|
34
|
+
}
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|