eric-mechanize 0.9.3.20090623142847
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +504 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +169 -0
- data/README.rdoc +60 -0
- data/Rakefile +43 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +7 -0
- data/lib/www/mechanize.rb +619 -0
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/chain/auth_headers.rb +80 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +53 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +32 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
- data/lib/www/mechanize/content_type_error.rb +16 -0
- data/lib/www/mechanize/cookie.rb +72 -0
- data/lib/www/mechanize/cookie_jar.rb +191 -0
- data/lib/www/mechanize/file.rb +73 -0
- data/lib/www/mechanize/file_response.rb +62 -0
- data/lib/www/mechanize/file_saver.rb +39 -0
- data/lib/www/mechanize/form.rb +360 -0
- data/lib/www/mechanize/form/button.rb +8 -0
- data/lib/www/mechanize/form/check_box.rb +13 -0
- data/lib/www/mechanize/form/field.rb +28 -0
- data/lib/www/mechanize/form/file_upload.rb +24 -0
- data/lib/www/mechanize/form/image_button.rb +23 -0
- data/lib/www/mechanize/form/multi_select_list.rb +69 -0
- data/lib/www/mechanize/form/option.rb +51 -0
- data/lib/www/mechanize/form/radio_button.rb +38 -0
- data/lib/www/mechanize/form/select_list.rb +45 -0
- data/lib/www/mechanize/headers.rb +12 -0
- data/lib/www/mechanize/history.rb +67 -0
- data/lib/www/mechanize/inspect.rb +90 -0
- data/lib/www/mechanize/monkey_patch.rb +37 -0
- data/lib/www/mechanize/page.rb +181 -0
- data/lib/www/mechanize/page/base.rb +10 -0
- data/lib/www/mechanize/page/frame.rb +22 -0
- data/lib/www/mechanize/page/link.rb +50 -0
- data/lib/www/mechanize/page/meta.rb +51 -0
- data/lib/www/mechanize/pluggable_parsers.rb +103 -0
- data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
- data/lib/www/mechanize/response_code_error.rb +25 -0
- data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
- data/lib/www/mechanize/util.rb +76 -0
- data/mechanize.gemspec +41 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +564 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +52 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +120 -0
- data/test/test_mech.rb +268 -0
- data/test/test_mechanize_file.rb +47 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +124 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +41 -0
- data/test/test_redirect_verb_handling.rb +45 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +314 -0
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'www/mechanize/chain/handler'
|
2
|
+
require 'www/mechanize/chain/uri_resolver'
|
3
|
+
require 'www/mechanize/chain/parameter_resolver'
|
4
|
+
require 'www/mechanize/chain/request_resolver'
|
5
|
+
require 'www/mechanize/chain/custom_headers'
|
6
|
+
require 'www/mechanize/chain/connection_resolver'
|
7
|
+
require 'www/mechanize/chain/ssl_resolver'
|
8
|
+
require 'www/mechanize/chain/pre_connect_hook'
|
9
|
+
require 'www/mechanize/chain/auth_headers'
|
10
|
+
require 'www/mechanize/chain/header_resolver'
|
11
|
+
require 'www/mechanize/chain/response_body_parser'
|
12
|
+
require 'www/mechanize/chain/response_header_handler'
|
13
|
+
require 'www/mechanize/chain/response_reader'
|
14
|
+
require 'www/mechanize/chain/body_decoding_handler'
|
15
|
+
|
16
|
+
module WWW
|
17
|
+
class Mechanize
|
18
|
+
class Chain
|
19
|
+
def initialize(list)
|
20
|
+
@list = list
|
21
|
+
@list.each { |l| l.chain = self }
|
22
|
+
end
|
23
|
+
|
24
|
+
def handle(request)
|
25
|
+
@list.first.handle(self, request)
|
26
|
+
end
|
27
|
+
|
28
|
+
def pass(obj, request)
|
29
|
+
next_link = @list[@list.index(obj) + 1]
|
30
|
+
next_link.handle(self, request) if next_link
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class AuthHeaders
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
@@nonce_count = Hash.new(0)
|
8
|
+
CNONCE = Digest::MD5.hexdigest("%x" % (Time.now.to_i + rand(65535)))
|
9
|
+
|
10
|
+
def initialize(auth_hash, user, password, digest)
|
11
|
+
@auth_hash = auth_hash
|
12
|
+
@user = user
|
13
|
+
@password = password
|
14
|
+
@digest = digest
|
15
|
+
end
|
16
|
+
|
17
|
+
def handle(ctx, params)
|
18
|
+
uri = params[:uri]
|
19
|
+
request = params[:request]
|
20
|
+
|
21
|
+
if( @auth_hash[uri.host] )
|
22
|
+
case @auth_hash[uri.host]
|
23
|
+
when :basic
|
24
|
+
request.basic_auth(@user, @password)
|
25
|
+
when :iis_digest
|
26
|
+
digest_response = self.gen_auth_header(uri,request, @digest, true)
|
27
|
+
request['Authorization'] = digest_response
|
28
|
+
when :digest
|
29
|
+
if @digest
|
30
|
+
digest_response = self.gen_auth_header(uri,request, @digest)
|
31
|
+
request['Authorization'] = digest_response
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
super
|
36
|
+
end
|
37
|
+
|
38
|
+
def gen_auth_header(uri, request, auth_header, is_IIS = false)
|
39
|
+
auth_header =~ /^(\w+) (.*)/
|
40
|
+
|
41
|
+
params = {}
|
42
|
+
$2.gsub(/(\w+)=("[^"]*"|[^,]*)/) {
|
43
|
+
params[$1] = $2.gsub(/^"/, '').gsub(/"$/, '')
|
44
|
+
}
|
45
|
+
|
46
|
+
@@nonce_count[params['nonce']] += 1
|
47
|
+
|
48
|
+
a_1 = "#{@user}:#{params['realm']}:#{@password}"
|
49
|
+
a_2 = "#{request.method}:#{uri.path}"
|
50
|
+
request_digest = ''
|
51
|
+
request_digest << Digest::MD5.hexdigest(a_1)
|
52
|
+
request_digest << ':' << params['nonce']
|
53
|
+
request_digest << ':' << ('%08x' % @@nonce_count[params['nonce']])
|
54
|
+
request_digest << ':' << CNONCE
|
55
|
+
request_digest << ':' << params['qop']
|
56
|
+
request_digest << ':' << Digest::MD5.hexdigest(a_2)
|
57
|
+
|
58
|
+
header = ''
|
59
|
+
header << "Digest username=\"#{@user}\", "
|
60
|
+
if is_IIS then
|
61
|
+
header << "qop=\"#{params['qop']}\", "
|
62
|
+
else
|
63
|
+
header << "qop=#{params['qop']}, "
|
64
|
+
end
|
65
|
+
header << "uri=\"#{uri.path}\", "
|
66
|
+
header << %w{ algorithm opaque nonce realm }.map { |field|
|
67
|
+
next unless params[field]
|
68
|
+
"#{field}=\"#{params[field]}\""
|
69
|
+
}.compact.join(', ')
|
70
|
+
|
71
|
+
header << ", nc=#{'%08x' % @@nonce_count[params['nonce']]}, "
|
72
|
+
header << "cnonce=\"#{CNONCE}\", "
|
73
|
+
header << "response=\"#{Digest::MD5.hexdigest(request_digest)}\""
|
74
|
+
|
75
|
+
return header
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class BodyDecodingHandler
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, options)
|
8
|
+
body = options[:response_body]
|
9
|
+
response = options[:response]
|
10
|
+
|
11
|
+
options[:response_body] =
|
12
|
+
if encoding = response['Content-Encoding']
|
13
|
+
case encoding.downcase
|
14
|
+
when 'gzip'
|
15
|
+
Mechanize.log.debug('gunzip body') if Mechanize.log
|
16
|
+
if response['Content-Length'].to_i > 0 || body.length > 0
|
17
|
+
begin
|
18
|
+
Zlib::GzipReader.new(body).read
|
19
|
+
rescue Zlib::BufError, Zlib::GzipFile::Error
|
20
|
+
if Mechanize.log
|
21
|
+
Mechanize.log.error('Caught a Zlib::BufError')
|
22
|
+
end
|
23
|
+
body.rewind
|
24
|
+
body.read(10)
|
25
|
+
Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
|
26
|
+
rescue Zlib::DataError
|
27
|
+
if Mechanize.log
|
28
|
+
Mechanize.log.error("Caught a Zlib::DataError, unable to decode page: #{$!.to_s}")
|
29
|
+
end
|
30
|
+
''
|
31
|
+
end
|
32
|
+
else
|
33
|
+
''
|
34
|
+
end
|
35
|
+
when 'x-gzip'
|
36
|
+
body.read
|
37
|
+
else
|
38
|
+
raise 'Unsupported content encoding'
|
39
|
+
end
|
40
|
+
else
|
41
|
+
body.read
|
42
|
+
end
|
43
|
+
super
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ConnectionResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize( connection_cache,
|
8
|
+
keep_alive,
|
9
|
+
proxy_addr,
|
10
|
+
proxy_port,
|
11
|
+
proxy_user,
|
12
|
+
proxy_pass )
|
13
|
+
|
14
|
+
@connection_cache = connection_cache
|
15
|
+
@keep_alive = keep_alive
|
16
|
+
@proxy_addr = proxy_addr
|
17
|
+
@proxy_port = proxy_port
|
18
|
+
@proxy_user = proxy_user
|
19
|
+
@proxy_pass = proxy_pass
|
20
|
+
end
|
21
|
+
|
22
|
+
def handle(ctx, params)
|
23
|
+
uri = params[:uri]
|
24
|
+
http_obj = nil
|
25
|
+
|
26
|
+
case uri.scheme.downcase
|
27
|
+
when 'http', 'https'
|
28
|
+
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
|
29
|
+
:connection => nil,
|
30
|
+
:keep_alive_options => {},
|
31
|
+
})
|
32
|
+
http_obj = cache_obj[:connection]
|
33
|
+
if http_obj.nil? || ! http_obj.started?
|
34
|
+
http_obj = cache_obj[:connection] =
|
35
|
+
Net::HTTP.new( uri.host,
|
36
|
+
uri.port,
|
37
|
+
@proxy_addr,
|
38
|
+
@proxy_port,
|
39
|
+
@proxy_user,
|
40
|
+
@proxy_pass
|
41
|
+
)
|
42
|
+
cache_obj[:keep_alive_options] = {}
|
43
|
+
end
|
44
|
+
|
45
|
+
# If we're keeping connections alive and the last request time is too
|
46
|
+
# long ago, stop the connection. Or, if the max requests left is 1,
|
47
|
+
# reset the connection.
|
48
|
+
if @keep_alive && http_obj.started?
|
49
|
+
opts = cache_obj[:keep_alive_options]
|
50
|
+
if((opts[:timeout] &&
|
51
|
+
Time.now.to_i - cache_obj[:last_request_time] > opts[:timeout].to_i) ||
|
52
|
+
opts[:max] && opts[:max].to_i == 1)
|
53
|
+
|
54
|
+
Mechanize.log.debug('Finishing stale connection') if Mechanize.log
|
55
|
+
http_obj.finish
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
cache_obj[:last_request_time] = Time.now.to_i
|
61
|
+
when 'file'
|
62
|
+
http_obj = Object.new
|
63
|
+
class << http_obj
|
64
|
+
def started?; true; end
|
65
|
+
def request(request, *args, &block)
|
66
|
+
response = FileResponse.new(request.uri.path)
|
67
|
+
yield response
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
params[:connection] = http_obj
|
73
|
+
super
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class CustomHeaders
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
request = params[:request]
|
9
|
+
params[:headers].each do |k,v|
|
10
|
+
case k
|
11
|
+
when :etag then request["ETag"] = v
|
12
|
+
when :if_modified_since then request["If-Modified-Since"] = v
|
13
|
+
else
|
14
|
+
raise ArgumentError.new("unknown header symbol #{k}") if k.is_a? Symbol
|
15
|
+
request[k] = v
|
16
|
+
end
|
17
|
+
end
|
18
|
+
super
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class HeaderResolver
|
5
|
+
include WWW::Handler
|
6
|
+
def initialize(keep_alive, keep_alive_time, cookie_jar, user_agent, headers)
|
7
|
+
@keep_alive = keep_alive
|
8
|
+
@keep_alive_time = keep_alive_time
|
9
|
+
@cookie_jar = cookie_jar
|
10
|
+
@user_agent = user_agent
|
11
|
+
@headers = headers
|
12
|
+
end
|
13
|
+
|
14
|
+
def handle(ctx, params)
|
15
|
+
uri = params[:uri]
|
16
|
+
referer = params[:referer]
|
17
|
+
request = params[:request]
|
18
|
+
|
19
|
+
if @keep_alive
|
20
|
+
request['Connection'] = 'keep-alive'
|
21
|
+
request['Keep-Alive'] = @keep_alive_time.to_s
|
22
|
+
else
|
23
|
+
request['Connection'] = 'close'
|
24
|
+
end
|
25
|
+
request['Accept-Encoding'] = 'gzip,identity'
|
26
|
+
request['Accept-Language'] = 'en-us,en;q=0.5'
|
27
|
+
host = "#{uri.host}#{[80, 443].include?(uri.port.to_i) ? '' : ':' + uri.port.to_s}"
|
28
|
+
request['Host'] = host
|
29
|
+
request['Accept-Charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
|
30
|
+
|
31
|
+
unless @cookie_jar.empty?(uri)
|
32
|
+
cookies = @cookie_jar.cookies(uri)
|
33
|
+
cookie = cookies.length > 0 ? cookies.join("; ") : nil
|
34
|
+
request.add_field('Cookie', cookie)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Add Referer header to request
|
38
|
+
if referer && referer.uri
|
39
|
+
request['Referer'] = referer.uri.to_s
|
40
|
+
end
|
41
|
+
|
42
|
+
# Add User-Agent header to request
|
43
|
+
request['User-Agent'] = @user_agent if @user_agent
|
44
|
+
|
45
|
+
@headers.each do |k,v|
|
46
|
+
request[k] = v
|
47
|
+
end if request
|
48
|
+
super
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ParameterResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
parameters = params[:params]
|
9
|
+
uri = params[:uri]
|
10
|
+
case params[:verb]
|
11
|
+
when :head, :get, :delete, :trace
|
12
|
+
if parameters.length > 0
|
13
|
+
uri.query ||= ''
|
14
|
+
uri.query << '&' if uri.query.length > 0
|
15
|
+
uri.query << Util.build_query_string(parameters)
|
16
|
+
end
|
17
|
+
params[:params] = []
|
18
|
+
end
|
19
|
+
super
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
File without changes
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class PreConnectHook
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
attr_accessor :hooks
|
8
|
+
def initialize
|
9
|
+
@hooks = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
@hooks.each { |hook| hook.call(params) }
|
14
|
+
super
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class PostConnectHook < PreConnectHook
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class RequestResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def handle(ctx, params)
|
8
|
+
uri = params[:uri]
|
9
|
+
if %w{ http https }.include?(uri.scheme.downcase)
|
10
|
+
klass = Net::HTTP.const_get(params[:verb].to_s.capitalize)
|
11
|
+
params[:request] ||= klass.new(uri.request_uri)
|
12
|
+
end
|
13
|
+
|
14
|
+
if %w{ file }.include?(uri.scheme.downcase)
|
15
|
+
o = Struct.new(:uri).new(uri)
|
16
|
+
class << o
|
17
|
+
def add_field(*args); end
|
18
|
+
alias :[]= :add_field
|
19
|
+
def path
|
20
|
+
uri.path
|
21
|
+
end
|
22
|
+
def each_header; end
|
23
|
+
end
|
24
|
+
params[:request] ||= o
|
25
|
+
end
|
26
|
+
|
27
|
+
super
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ResponseBodyParser
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(pluggable_parser, watch_for_set)
|
8
|
+
@pluggable_parser = pluggable_parser
|
9
|
+
@watch_for_set = watch_for_set
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
response = params[:response]
|
14
|
+
response_body = params[:response_body]
|
15
|
+
uri = params[:uri]
|
16
|
+
|
17
|
+
content_type = nil
|
18
|
+
unless response['Content-Type'].nil?
|
19
|
+
data = response['Content-Type'].match(/^([^;]*)/)
|
20
|
+
content_type = data[1].downcase.split(',')[0] unless data.nil?
|
21
|
+
end
|
22
|
+
|
23
|
+
# Find our pluggable parser
|
24
|
+
params[:page] = @pluggable_parser.parser(content_type).new(
|
25
|
+
uri,
|
26
|
+
response,
|
27
|
+
response_body,
|
28
|
+
response.code
|
29
|
+
) { |parser|
|
30
|
+
parser.mech = params[:agent] if parser.respond_to? :mech=
|
31
|
+
if parser.respond_to?(:watch_for_set=) && @watch_for_set
|
32
|
+
parser.watch_for_set = @watch_for_set
|
33
|
+
end
|
34
|
+
}
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|