tenderlove-mechanize 0.9.3.20090623142847 → 0.9.3.20090911221705
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +55 -48
- data/Rakefile +12 -22
- data/lib/mechanize.rb +618 -4
- data/lib/mechanize/chain.rb +33 -0
- data/lib/mechanize/chain/auth_headers.rb +78 -0
- data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
- data/lib/mechanize/chain/connection_resolver.rb +76 -0
- data/lib/mechanize/chain/custom_headers.rb +21 -0
- data/lib/{www/mechanize → mechanize}/chain/handler.rb +1 -1
- data/lib/mechanize/chain/header_resolver.rb +51 -0
- data/lib/mechanize/chain/parameter_resolver.rb +22 -0
- data/lib/{www/mechanize → mechanize}/chain/post_connect_hook.rb +0 -0
- data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
- data/lib/mechanize/chain/request_resolver.rb +30 -0
- data/lib/mechanize/chain/response_body_parser.rb +38 -0
- data/lib/mechanize/chain/response_header_handler.rb +48 -0
- data/lib/mechanize/chain/response_reader.rb +39 -0
- data/lib/mechanize/chain/ssl_resolver.rb +40 -0
- data/lib/mechanize/chain/uri_resolver.rb +75 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +70 -0
- data/lib/mechanize/cookie_jar.rb +188 -0
- data/lib/mechanize/file.rb +71 -0
- data/lib/mechanize/file_response.rb +60 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +378 -0
- data/lib/mechanize/form/button.rb +9 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +30 -0
- data/lib/mechanize/form/file_upload.rb +22 -0
- data/lib/mechanize/form/image_button.rb +21 -0
- data/lib/mechanize/form/multi_select_list.rb +67 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +49 -0
- data/lib/mechanize/form/select_list.rb +43 -0
- data/lib/mechanize/headers.rb +11 -0
- data/lib/mechanize/history.rb +65 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/{www/mechanize → mechanize}/monkey_patch.rb +4 -6
- data/lib/mechanize/page.rb +206 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/frame.rb +20 -0
- data/lib/mechanize/page/image.rb +26 -0
- data/lib/mechanize/page/label.rb +20 -0
- data/lib/mechanize/page/link.rb +48 -0
- data/lib/mechanize/page/meta.rb +50 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +67 -0
- data/mechanize.gemspec +8 -8
- data/test/chain/test_argument_validator.rb +2 -2
- data/test/chain/test_auth_headers.rb +2 -2
- data/test/chain/test_custom_headers.rb +2 -2
- data/test/chain/test_header_resolver.rb +3 -3
- data/test/chain/test_parameter_resolver.rb +4 -4
- data/test/chain/test_request_resolver.rb +4 -4
- data/test/chain/test_response_reader.rb +3 -3
- data/test/helper.rb +1 -1
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/test_authenticate.rb +3 -3
- data/test/test_bad_links.rb +1 -1
- data/test/test_blank_form.rb +1 -1
- data/test/test_checkboxes.rb +1 -1
- data/test/test_content_type.rb +2 -2
- data/test/test_cookie_class.rb +12 -12
- data/test/test_cookie_jar.rb +13 -13
- data/test/test_cookies.rb +1 -1
- data/test/test_encoded_links.rb +1 -1
- data/test/test_errors.rb +2 -2
- data/test/test_follow_meta.rb +3 -3
- data/test/test_form_action.rb +1 -1
- data/test/test_form_as_hash.rb +1 -1
- data/test/test_form_button.rb +2 -2
- data/test/test_form_no_inputname.rb +1 -1
- data/test/test_forms.rb +1 -1
- data/test/test_frames.rb +1 -1
- data/test/test_get_headers.rb +1 -1
- data/test/test_gzipping.rb +2 -2
- data/test/test_hash_api.rb +1 -1
- data/test/test_history.rb +7 -7
- data/test/test_history_added.rb +1 -1
- data/test/test_html_unscape_forms.rb +7 -7
- data/test/test_if_modified_since.rb +1 -1
- data/test/test_keep_alive.rb +1 -1
- data/test/test_links.rb +2 -2
- data/test/test_mech.rb +2 -2
- data/test/test_mechanize_file.rb +7 -7
- data/test/test_meta.rb +2 -2
- data/test/test_multi_select.rb +1 -1
- data/test/test_no_attributes.rb +1 -1
- data/test/test_option.rb +1 -1
- data/test/test_page.rb +3 -3
- data/test/test_pluggable_parser.rb +14 -14
- data/test/test_post_form.rb +1 -1
- data/test/test_pretty_print.rb +2 -2
- data/test/test_radiobutton.rb +1 -1
- data/test/test_redirect_limit_reached.rb +1 -3
- data/test/test_redirect_verb_handling.rb +1 -3
- data/test/test_referer.rb +1 -1
- data/test/test_relative_links.rb +1 -1
- data/test/test_request.rb +1 -1
- data/test/test_response_code.rb +3 -3
- data/test/test_save_file.rb +3 -3
- data/test/test_scheme.rb +3 -3
- data/test/test_select.rb +2 -2
- data/test/test_select_all.rb +1 -1
- data/test/test_select_none.rb +1 -1
- data/test/test_select_noopts.rb +1 -1
- data/test/test_set_fields.rb +1 -1
- data/test/test_ssl_server.rb +1 -1
- data/test/test_subclass.rb +1 -1
- data/test/test_textarea.rb +1 -1
- data/test/test_upload.rb +1 -1
- data/test/test_verbs.rb +1 -1
- metadata +61 -56
- data/lib/www/mechanize.rb +0 -619
- data/lib/www/mechanize/chain.rb +0 -34
- data/lib/www/mechanize/chain/auth_headers.rb +0 -80
- data/lib/www/mechanize/chain/body_decoding_handler.rb +0 -48
- data/lib/www/mechanize/chain/connection_resolver.rb +0 -78
- data/lib/www/mechanize/chain/custom_headers.rb +0 -23
- data/lib/www/mechanize/chain/header_resolver.rb +0 -53
- data/lib/www/mechanize/chain/parameter_resolver.rb +0 -24
- data/lib/www/mechanize/chain/pre_connect_hook.rb +0 -22
- data/lib/www/mechanize/chain/request_resolver.rb +0 -32
- data/lib/www/mechanize/chain/response_body_parser.rb +0 -40
- data/lib/www/mechanize/chain/response_header_handler.rb +0 -50
- data/lib/www/mechanize/chain/response_reader.rb +0 -41
- data/lib/www/mechanize/chain/ssl_resolver.rb +0 -42
- data/lib/www/mechanize/chain/uri_resolver.rb +0 -77
- data/lib/www/mechanize/content_type_error.rb +0 -16
- data/lib/www/mechanize/cookie.rb +0 -72
- data/lib/www/mechanize/cookie_jar.rb +0 -191
- data/lib/www/mechanize/file.rb +0 -73
- data/lib/www/mechanize/file_response.rb +0 -62
- data/lib/www/mechanize/file_saver.rb +0 -39
- data/lib/www/mechanize/form.rb +0 -360
- data/lib/www/mechanize/form/button.rb +0 -8
- data/lib/www/mechanize/form/check_box.rb +0 -13
- data/lib/www/mechanize/form/field.rb +0 -28
- data/lib/www/mechanize/form/file_upload.rb +0 -24
- data/lib/www/mechanize/form/image_button.rb +0 -23
- data/lib/www/mechanize/form/multi_select_list.rb +0 -69
- data/lib/www/mechanize/form/option.rb +0 -51
- data/lib/www/mechanize/form/radio_button.rb +0 -38
- data/lib/www/mechanize/form/select_list.rb +0 -45
- data/lib/www/mechanize/headers.rb +0 -12
- data/lib/www/mechanize/history.rb +0 -67
- data/lib/www/mechanize/inspect.rb +0 -90
- data/lib/www/mechanize/page.rb +0 -181
- data/lib/www/mechanize/page/base.rb +0 -10
- data/lib/www/mechanize/page/frame.rb +0 -22
- data/lib/www/mechanize/page/link.rb +0 -50
- data/lib/www/mechanize/page/meta.rb +0 -51
- data/lib/www/mechanize/pluggable_parsers.rb +0 -103
- data/lib/www/mechanize/redirect_limit_reached_error.rb +0 -18
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +0 -20
- data/lib/www/mechanize/response_code_error.rb +0 -25
- data/lib/www/mechanize/unsupported_scheme_error.rb +0 -10
- data/lib/www/mechanize/util.rb +0 -76
@@ -0,0 +1,39 @@
|
|
1
|
+
class Mechanize
|
2
|
+
class Chain
|
3
|
+
class ResponseReader
|
4
|
+
include Mechanize::Handler
|
5
|
+
|
6
|
+
def initialize(response)
|
7
|
+
@response = response
|
8
|
+
end
|
9
|
+
|
10
|
+
def handle(ctx, params)
|
11
|
+
params[:response] = @response
|
12
|
+
body = StringIO.new
|
13
|
+
total = 0
|
14
|
+
@response.read_body { |part|
|
15
|
+
total += part.length
|
16
|
+
body.write(part)
|
17
|
+
Mechanize.log.debug("Read #{total} bytes") if Mechanize.log
|
18
|
+
}
|
19
|
+
body.rewind
|
20
|
+
|
21
|
+
res_klass = Net::HTTPResponse::CODE_TO_OBJ[@response.code.to_s]
|
22
|
+
raise ResponseCodeError.new(@response) unless res_klass
|
23
|
+
|
24
|
+
# Net::HTTP ignores EOFError if Content-length is given, so we emulate it here.
|
25
|
+
unless res_klass <= Net::HTTPRedirection
|
26
|
+
raise EOFError if (!params[:request].is_a?(Net::HTTP::Head)) && @response.content_length() && @response.content_length() != total
|
27
|
+
end
|
28
|
+
|
29
|
+
@response.each_header { |k,v|
|
30
|
+
Mechanize.log.debug("response-header: #{ k } => #{ v }")
|
31
|
+
} if Mechanize.log
|
32
|
+
|
33
|
+
params[:response_body] = body
|
34
|
+
params[:res_klass] = res_klass
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class Mechanize
|
2
|
+
class Chain
|
3
|
+
class SSLResolver
|
4
|
+
include Mechanize::Handler
|
5
|
+
|
6
|
+
def initialize(ca_file, verify_callback, cert, key, pass)
|
7
|
+
@ca_file = ca_file
|
8
|
+
@verify_callback = verify_callback
|
9
|
+
@cert = cert
|
10
|
+
@key = key
|
11
|
+
@pass = pass
|
12
|
+
end
|
13
|
+
|
14
|
+
def handle(ctx, params)
|
15
|
+
uri = params[:uri]
|
16
|
+
http_obj = params[:connection]
|
17
|
+
|
18
|
+
ssl = nil
|
19
|
+
if http_obj.instance_variable_defined?(:@ssl_context)
|
20
|
+
ssl = http_obj.instance_variable_get(:@ssl_context)
|
21
|
+
end
|
22
|
+
|
23
|
+
if uri.scheme == 'https' && ! http_obj.started? && ! ssl.frozen?
|
24
|
+
http_obj.use_ssl = true
|
25
|
+
http_obj.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
26
|
+
if @ca_file
|
27
|
+
http_obj.ca_file = @ca_file
|
28
|
+
http_obj.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
29
|
+
http_obj.verify_callback = @verify_callback if @verify_callback
|
30
|
+
end
|
31
|
+
if @cert && @key
|
32
|
+
http_obj.cert = OpenSSL::X509::Certificate.new(::File.read(@cert))
|
33
|
+
http_obj.key = OpenSSL::PKey::RSA.new(::File.read(@key), @pass)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
class Mechanize
|
2
|
+
class Chain
|
3
|
+
class URIResolver
|
4
|
+
include Mechanize::Handler
|
5
|
+
|
6
|
+
def initialize(scheme_handlers)
|
7
|
+
@scheme_handlers = scheme_handlers
|
8
|
+
end
|
9
|
+
|
10
|
+
def handle(ctx, params)
|
11
|
+
raise ArgumentError.new('uri must be specified') unless params[:uri]
|
12
|
+
params[:uri] = params[:uri].dup if params[:uri].is_a?(URI)
|
13
|
+
uri = params[:uri]
|
14
|
+
referer = params[:referer]
|
15
|
+
unless uri.is_a?(URI)
|
16
|
+
uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/) { |match|
|
17
|
+
if RUBY_VERSION >= "1.9.0"
|
18
|
+
CGI.escape(match)
|
19
|
+
else
|
20
|
+
sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'c')[0])
|
21
|
+
end
|
22
|
+
}
|
23
|
+
|
24
|
+
escaped_uri = Util.html_unescape(
|
25
|
+
uri.split(/(?:%[0-9A-Fa-f]{2})+|#/).zip(
|
26
|
+
uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
|
27
|
+
).map { |x,y|
|
28
|
+
"#{URI.escape(x)}#{y}"
|
29
|
+
}.join('')
|
30
|
+
)
|
31
|
+
|
32
|
+
begin
|
33
|
+
uri = URI.parse(escaped_uri)
|
34
|
+
rescue
|
35
|
+
uri = URI.parse(URI.escape(escaped_uri))
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
uri = @scheme_handlers[
|
40
|
+
uri.relative? ? 'relative' : uri.scheme.downcase
|
41
|
+
].call(uri, params[:referer])
|
42
|
+
|
43
|
+
if params[:referer] && params[:referer].uri
|
44
|
+
if uri.path.length == 0 && uri.relative?
|
45
|
+
uri.path = params[:referer].uri.path
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
uri.path = '/' if uri.path.length == 0
|
50
|
+
|
51
|
+
if uri.relative?
|
52
|
+
raise 'need absolute URL' unless referer && referer.uri
|
53
|
+
base = nil
|
54
|
+
if referer.respond_to?(:bases) && referer.parser
|
55
|
+
base = referer.bases.last
|
56
|
+
end
|
57
|
+
|
58
|
+
uri = ((base && base.uri && base.uri.absolute?) ?
|
59
|
+
base.uri :
|
60
|
+
referer.uri) + uri
|
61
|
+
uri = referer.uri + uri
|
62
|
+
# Strip initial "/.." bits from the path
|
63
|
+
uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
|
64
|
+
end
|
65
|
+
|
66
|
+
unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
|
67
|
+
raise "unsupported scheme: #{uri.scheme}"
|
68
|
+
end
|
69
|
+
params[:uri] = uri
|
70
|
+
|
71
|
+
super
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class Mechanize
|
2
|
+
# =Synopsis
|
3
|
+
# This class contains an error for when a pluggable parser tries to
|
4
|
+
# parse a content type that it does not know how to handle. For example
|
5
|
+
# if Mechanize::Page were to try to parse a PDF, a ContentTypeError
|
6
|
+
# would be thrown.
|
7
|
+
class ContentTypeError < RuntimeError
|
8
|
+
attr_reader :content_type
|
9
|
+
|
10
|
+
def initialize(content_type)
|
11
|
+
@content_type = content_type
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'webrick/cookie'
|
3
|
+
|
4
|
+
class Mechanize
|
5
|
+
# This class is used to represent an HTTP Cookie.
|
6
|
+
class Cookie < WEBrick::Cookie
|
7
|
+
def self.parse(uri, str, log = Mechanize.log)
|
8
|
+
return str.split(/,(?=[^;,]*=)|,$/).collect { |c|
|
9
|
+
cookie_elem = c.split(/;+/)
|
10
|
+
first_elem = cookie_elem.shift
|
11
|
+
first_elem.strip!
|
12
|
+
key, value = first_elem.split(/\=/, 2)
|
13
|
+
|
14
|
+
cookie = nil
|
15
|
+
begin
|
16
|
+
cookie = new(key, WEBrick::HTTPUtils.dequote(value))
|
17
|
+
rescue
|
18
|
+
log.warn("Couldn't parse key/value: #{first_elem}") if log
|
19
|
+
end
|
20
|
+
next unless cookie
|
21
|
+
|
22
|
+
cookie_elem.each do |pair|
|
23
|
+
pair.strip!
|
24
|
+
key, value = pair.split(/\=/, 2)
|
25
|
+
if value
|
26
|
+
value = WEBrick::HTTPUtils.dequote(value.strip)
|
27
|
+
end
|
28
|
+
case key.downcase
|
29
|
+
when "domain" then cookie.domain = value.sub(/^\./, '')
|
30
|
+
when "path" then cookie.path = value
|
31
|
+
when 'expires'
|
32
|
+
begin
|
33
|
+
cookie.expires = Time::parse(value)
|
34
|
+
rescue
|
35
|
+
if log
|
36
|
+
log.warn("Couldn't parse expires: #{value}")
|
37
|
+
end
|
38
|
+
end
|
39
|
+
when "max-age" then
|
40
|
+
begin
|
41
|
+
cookie.max_age = Integer(value)
|
42
|
+
rescue
|
43
|
+
log.warn("Couldn't parse max age '#{value}'") if log
|
44
|
+
cookie.max_age = nil
|
45
|
+
end
|
46
|
+
when "comment" then cookie.comment = value
|
47
|
+
when "version" then
|
48
|
+
begin
|
49
|
+
cookie.version = Integer(value)
|
50
|
+
rescue
|
51
|
+
log.warn("Couldn't parse version '#{value}'") if log
|
52
|
+
cookie.version = nil
|
53
|
+
end
|
54
|
+
when "secure" then cookie.secure = true
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
cookie.path ||= uri.path.to_s.sub(%r%[^/]*$%, '')
|
59
|
+
cookie.secure ||= false
|
60
|
+
cookie.domain ||= uri.host
|
61
|
+
# Move this in to the cookie jar
|
62
|
+
yield cookie if block_given?
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def to_s
|
67
|
+
"#{@name}=#{@value}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,188 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class Mechanize
|
4
|
+
# This class is used to manage the Cookies that have been returned from
|
5
|
+
# any particular website.
|
6
|
+
class CookieJar
|
7
|
+
attr_reader :jar
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
@jar = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
# Add a cookie to the Jar.
|
14
|
+
def add(uri, cookie)
|
15
|
+
return unless uri.host =~ /#{CookieJar.strip_port(cookie.domain)}$/i
|
16
|
+
|
17
|
+
normal_domain = cookie.domain.downcase
|
18
|
+
|
19
|
+
unless @jar.has_key?(normal_domain)
|
20
|
+
@jar[normal_domain] = Hash.new { |h,k| h[k] = {} }
|
21
|
+
end
|
22
|
+
|
23
|
+
@jar[normal_domain][cookie.path][cookie.name] = cookie
|
24
|
+
cleanup
|
25
|
+
cookie
|
26
|
+
end
|
27
|
+
|
28
|
+
# Fetch the cookies that should be used for the URI object passed in.
|
29
|
+
def cookies(url)
|
30
|
+
cleanup
|
31
|
+
url.path = '/' if url.path.empty?
|
32
|
+
|
33
|
+
domains = @jar.find_all { |domain, _|
|
34
|
+
url.host =~ /#{CookieJar.strip_port(domain)}$/i
|
35
|
+
}
|
36
|
+
|
37
|
+
return [] unless domains.length > 0
|
38
|
+
|
39
|
+
cookies = domains.map { |_,paths|
|
40
|
+
paths.find_all { |path, _|
|
41
|
+
url.path =~ /^#{Regexp.escape(path)}/
|
42
|
+
}.map { |_,cookie| cookie.values }
|
43
|
+
}.flatten
|
44
|
+
|
45
|
+
cookies.find_all { |cookie|
|
46
|
+
!cookie.expires || Time.now < cookie.expires
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
def empty?(url)
|
51
|
+
cookies(url).length > 0 ? false : true
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_a
|
55
|
+
cookies = []
|
56
|
+
@jar.each do |domain, paths|
|
57
|
+
paths.each do |path, names|
|
58
|
+
cookies << names.values
|
59
|
+
end
|
60
|
+
end
|
61
|
+
cookies.flatten
|
62
|
+
end
|
63
|
+
|
64
|
+
# Save the cookie jar to a file in the format specified.
|
65
|
+
#
|
66
|
+
# Available formats:
|
67
|
+
# :yaml <- YAML structure
|
68
|
+
# :cookiestxt <- Mozilla's cookies.txt format
|
69
|
+
def save_as(file, format = :yaml)
|
70
|
+
::File.open(file, "w") { |f|
|
71
|
+
case format
|
72
|
+
when :yaml then
|
73
|
+
YAML::dump(@jar, f)
|
74
|
+
when :cookiestxt then
|
75
|
+
dump_cookiestxt(f)
|
76
|
+
else
|
77
|
+
raise "Unknown cookie jar file format"
|
78
|
+
end
|
79
|
+
}
|
80
|
+
end
|
81
|
+
|
82
|
+
# Load cookie jar from a file in the format specified.
|
83
|
+
#
|
84
|
+
# Available formats:
|
85
|
+
# :yaml <- YAML structure.
|
86
|
+
# :cookiestxt <- Mozilla's cookies.txt format
|
87
|
+
def load(file, format = :yaml)
|
88
|
+
@jar = ::File.open(file) { |f|
|
89
|
+
case format
|
90
|
+
when :yaml then
|
91
|
+
YAML::load(f)
|
92
|
+
when :cookiestxt then
|
93
|
+
load_cookiestxt(f)
|
94
|
+
else
|
95
|
+
raise "Unknown cookie jar file format"
|
96
|
+
end
|
97
|
+
}
|
98
|
+
end
|
99
|
+
|
100
|
+
# Clear the cookie jar
|
101
|
+
def clear!
|
102
|
+
@jar = {}
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
# Read cookies from Mozilla cookies.txt-style IO stream
|
107
|
+
def load_cookiestxt(io)
|
108
|
+
now = Time.now
|
109
|
+
fakeuri = Struct.new(:host) # add_cookie wants something resembling a URI.
|
110
|
+
|
111
|
+
io.each_line do |line|
|
112
|
+
line.chomp!
|
113
|
+
line.gsub!(/#.+/, '')
|
114
|
+
fields = line.split("\t")
|
115
|
+
|
116
|
+
next if fields.length != 7
|
117
|
+
|
118
|
+
expires_seconds = fields[4].to_i
|
119
|
+
begin
|
120
|
+
expires = Time.at(expires_seconds)
|
121
|
+
rescue
|
122
|
+
next
|
123
|
+
# Just in case we ever decide to support DateTime...
|
124
|
+
# expires = DateTime.new(1970,1,1) + ((expires_seconds + 1) / (60*60*24.0))
|
125
|
+
end
|
126
|
+
next if expires < now
|
127
|
+
|
128
|
+
c = Mechanize::Cookie.new(fields[5], fields[6])
|
129
|
+
c.domain = fields[0]
|
130
|
+
# Field 1 indicates whether the cookie can be read by other machines at the same domain.
|
131
|
+
# This is computed by the cookie implementation, based on the domain value.
|
132
|
+
c.path = fields[2] # Path for which the cookie is relevant
|
133
|
+
c.secure = (fields[3] == "TRUE") # Requires a secure connection
|
134
|
+
c.expires = expires # Time the cookie expires.
|
135
|
+
c.version = 0 # Conforms to Netscape cookie spec.
|
136
|
+
|
137
|
+
add(fakeuri.new(c.domain), c)
|
138
|
+
end
|
139
|
+
@jar
|
140
|
+
end
|
141
|
+
|
142
|
+
# Write cookies to Mozilla cookies.txt-style IO stream
|
143
|
+
def dump_cookiestxt(io)
|
144
|
+
to_a.each do |cookie|
|
145
|
+
fields = []
|
146
|
+
fields[0] = cookie.domain
|
147
|
+
|
148
|
+
if cookie.domain =~ /^\./
|
149
|
+
fields[1] = "TRUE"
|
150
|
+
else
|
151
|
+
fields[1] = "FALSE"
|
152
|
+
end
|
153
|
+
|
154
|
+
fields[2] = cookie.path
|
155
|
+
|
156
|
+
if cookie.secure == true
|
157
|
+
fields[3] = "TRUE"
|
158
|
+
else
|
159
|
+
fields[3] = "FALSE"
|
160
|
+
end
|
161
|
+
|
162
|
+
fields[4] = cookie.expires.to_i.to_s
|
163
|
+
|
164
|
+
fields[5] = cookie.name
|
165
|
+
fields[6] = cookie.value
|
166
|
+
io.puts(fields.join("\t"))
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
# Remove expired cookies
|
172
|
+
def cleanup
|
173
|
+
@jar.each do |domain, paths|
|
174
|
+
paths.each do |path, names|
|
175
|
+
names.each do |cookie_name, cookie|
|
176
|
+
if cookie.expires && Time.now > cookie.expires
|
177
|
+
paths[path].delete(cookie_name)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.strip_port(host)
|
185
|
+
host.gsub(/:[0-9]+$/,'')
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
class Mechanize
|
2
|
+
# = Synopsis
|
3
|
+
# This is the default (and base) class for the Pluggable Parsers. If
|
4
|
+
# Mechanize cannot find an appropriate class to use for the content type,
|
5
|
+
# this class will be used. For example, if you download a JPG, Mechanize
|
6
|
+
# will not know how to parse it, so this class will be instantiated.
|
7
|
+
#
|
8
|
+
# This is a good class to use as the base class for building your own
|
9
|
+
# pluggable parsers.
|
10
|
+
#
|
11
|
+
# == Example
|
12
|
+
# require 'rubygems'
|
13
|
+
# require 'mechanize'
|
14
|
+
#
|
15
|
+
# agent = Mechanize.new
|
16
|
+
# agent.get('http://example.com/foo.jpg').class #=> Mechanize::File
|
17
|
+
#
|
18
|
+
class File
|
19
|
+
attr_accessor :uri, :response, :body, :code, :filename
|
20
|
+
alias :header :response
|
21
|
+
|
22
|
+
alias :content :body
|
23
|
+
|
24
|
+
def initialize(uri=nil, response=nil, body=nil, code=nil)
|
25
|
+
@uri, @body, @code = uri, body, code
|
26
|
+
@response = Headers.new
|
27
|
+
|
28
|
+
# Copy the headers in to a hash to prevent memory leaks
|
29
|
+
if response
|
30
|
+
response.each { |k,v|
|
31
|
+
@response[k] = v
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
@filename = 'index.html'
|
36
|
+
|
37
|
+
# Set the filename
|
38
|
+
if disposition = @response['content-disposition']
|
39
|
+
disposition.split(/;\s*/).each do |pair|
|
40
|
+
k,v = pair.split(/=/, 2)
|
41
|
+
@filename = v if k && k.downcase == 'filename'
|
42
|
+
end
|
43
|
+
else
|
44
|
+
if @uri
|
45
|
+
@filename = @uri.path.split(/\//).last || 'index.html'
|
46
|
+
@filename << ".html" unless @filename =~ /\./
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
yield self if block_given?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Use this method to save the content of this object to filename
|
54
|
+
def save_as(filename = nil)
|
55
|
+
if filename.nil?
|
56
|
+
filename = @filename
|
57
|
+
number = 1
|
58
|
+
while(::File.exists?(filename))
|
59
|
+
filename = "#{@filename}.#{number}"
|
60
|
+
number += 1
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
::File::open(filename, "wb") { |f|
|
65
|
+
f.write body
|
66
|
+
}
|
67
|
+
end
|
68
|
+
|
69
|
+
alias :save :save_as
|
70
|
+
end
|
71
|
+
end
|