knu-mechanize 0.9.3.20090623142847
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +504 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +169 -0
- data/README.rdoc +60 -0
- data/Rakefile +43 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +7 -0
- data/lib/www/mechanize.rb +619 -0
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/chain/auth_headers.rb +80 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +53 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +32 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
- data/lib/www/mechanize/content_type_error.rb +16 -0
- data/lib/www/mechanize/cookie.rb +72 -0
- data/lib/www/mechanize/cookie_jar.rb +191 -0
- data/lib/www/mechanize/file.rb +73 -0
- data/lib/www/mechanize/file_response.rb +62 -0
- data/lib/www/mechanize/file_saver.rb +39 -0
- data/lib/www/mechanize/form.rb +360 -0
- data/lib/www/mechanize/form/button.rb +8 -0
- data/lib/www/mechanize/form/check_box.rb +13 -0
- data/lib/www/mechanize/form/field.rb +28 -0
- data/lib/www/mechanize/form/file_upload.rb +24 -0
- data/lib/www/mechanize/form/image_button.rb +23 -0
- data/lib/www/mechanize/form/multi_select_list.rb +69 -0
- data/lib/www/mechanize/form/option.rb +51 -0
- data/lib/www/mechanize/form/radio_button.rb +38 -0
- data/lib/www/mechanize/form/select_list.rb +45 -0
- data/lib/www/mechanize/headers.rb +12 -0
- data/lib/www/mechanize/history.rb +67 -0
- data/lib/www/mechanize/inspect.rb +90 -0
- data/lib/www/mechanize/monkey_patch.rb +37 -0
- data/lib/www/mechanize/page.rb +181 -0
- data/lib/www/mechanize/page/base.rb +10 -0
- data/lib/www/mechanize/page/frame.rb +22 -0
- data/lib/www/mechanize/page/link.rb +50 -0
- data/lib/www/mechanize/page/meta.rb +51 -0
- data/lib/www/mechanize/pluggable_parsers.rb +103 -0
- data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
- data/lib/www/mechanize/response_code_error.rb +25 -0
- data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
- data/lib/www/mechanize/util.rb +76 -0
- data/mechanize.gemspec +41 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +564 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +52 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +120 -0
- data/test/test_mech.rb +268 -0
- data/test/test_mechanize_file.rb +47 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +119 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +41 -0
- data/test/test_redirect_verb_handling.rb +45 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +314 -0
@@ -0,0 +1,50 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ResponseHeaderHandler
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(cookie_jar, connection_cache)
|
8
|
+
@cookie_jar = cookie_jar
|
9
|
+
@connection_cache = connection_cache
|
10
|
+
end
|
11
|
+
|
12
|
+
def handle(ctx, params)
|
13
|
+
response = params[:response]
|
14
|
+
uri = params[:uri]
|
15
|
+
page = params[:page]
|
16
|
+
cache_obj = (@connection_cache["#{uri.host}:#{uri.port}"] ||= {
|
17
|
+
:connection => nil,
|
18
|
+
:keep_alive_options => {},
|
19
|
+
})
|
20
|
+
|
21
|
+
# If the server sends back keep alive options, save them
|
22
|
+
if keep_alive_info = response['keep-alive']
|
23
|
+
keep_alive_info.split(/,\s*/).each do |option|
|
24
|
+
k, v = option.split(/=/)
|
25
|
+
cache_obj[:keep_alive_options] ||= {}
|
26
|
+
cache_obj[:keep_alive_options][k.intern] = v
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
if page.is_a?(Page) && page.body =~ /Set-Cookie/n
|
31
|
+
page.search('//meta[@http-equiv="Set-Cookie"]').each do |meta|
|
32
|
+
Cookie::parse(uri, meta['content']) { |c|
|
33
|
+
Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
|
34
|
+
@cookie_jar.add(uri, c)
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
(response.get_fields('Set-Cookie')||[]).each do |cookie|
|
40
|
+
Cookie::parse(uri, cookie) { |c|
|
41
|
+
Mechanize.log.debug("saved cookie: #{c}") if Mechanize.log
|
42
|
+
@cookie_jar.add(uri, c)
|
43
|
+
}
|
44
|
+
end
|
45
|
+
super
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class ResponseReader
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(response)
|
8
|
+
@response = response
|
9
|
+
end
|
10
|
+
|
11
|
+
def handle(ctx, params)
|
12
|
+
params[:response] = @response
|
13
|
+
body = StringIO.new
|
14
|
+
total = 0
|
15
|
+
@response.read_body { |part|
|
16
|
+
total += part.length
|
17
|
+
body.write(part)
|
18
|
+
Mechanize.log.debug("Read #{total} bytes") if Mechanize.log
|
19
|
+
}
|
20
|
+
body.rewind
|
21
|
+
|
22
|
+
res_klass = Net::HTTPResponse::CODE_TO_OBJ[@response.code.to_s]
|
23
|
+
raise ResponseCodeError.new(@response) unless res_klass
|
24
|
+
|
25
|
+
# Net::HTTP ignores EOFError if Content-length is given, so we emulate it here.
|
26
|
+
unless res_klass <= Net::HTTPRedirection
|
27
|
+
raise EOFError if (!params[:request].is_a?(Net::HTTP::Head)) && @response.content_length() && @response.content_length() != total
|
28
|
+
end
|
29
|
+
|
30
|
+
@response.each_header { |k,v|
|
31
|
+
Mechanize.log.debug("response-header: #{ k } => #{ v }")
|
32
|
+
} if Mechanize.log
|
33
|
+
|
34
|
+
params[:response_body] = body
|
35
|
+
params[:res_klass] = res_klass
|
36
|
+
super
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class SSLResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(ca_file, verify_callback, cert, key, pass)
|
8
|
+
@ca_file = ca_file
|
9
|
+
@verify_callback = verify_callback
|
10
|
+
@cert = cert
|
11
|
+
@key = key
|
12
|
+
@pass = pass
|
13
|
+
end
|
14
|
+
|
15
|
+
def handle(ctx, params)
|
16
|
+
uri = params[:uri]
|
17
|
+
http_obj = params[:connection]
|
18
|
+
|
19
|
+
ssl = nil
|
20
|
+
if http_obj.instance_variable_defined?(:@ssl_context)
|
21
|
+
http_obj.instance_variable_get(:@ssl_context)
|
22
|
+
end
|
23
|
+
|
24
|
+
if uri.scheme == 'https' && ! http_obj.started? && ! ssl.frozen?
|
25
|
+
http_obj.use_ssl = true
|
26
|
+
http_obj.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
27
|
+
if @ca_file
|
28
|
+
http_obj.ca_file = @ca_file
|
29
|
+
http_obj.verify_mode = OpenSSL::SSL::VERIFY_PEER
|
30
|
+
http_obj.verify_callback = @verify_callback if @verify_callback
|
31
|
+
end
|
32
|
+
if @cert && @key
|
33
|
+
http_obj.cert = OpenSSL::X509::Certificate.new(::File.read(@cert))
|
34
|
+
http_obj.key = OpenSSL::PKey::RSA.new(::File.read(@key), @pass)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
super
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Chain
|
4
|
+
class URIResolver
|
5
|
+
include WWW::Handler
|
6
|
+
|
7
|
+
def initialize(scheme_handlers)
|
8
|
+
@scheme_handlers = scheme_handlers
|
9
|
+
end
|
10
|
+
|
11
|
+
def handle(ctx, params)
|
12
|
+
raise ArgumentError.new('uri must be specified') unless params[:uri]
|
13
|
+
params[:uri] = params[:uri].dup if params[:uri].is_a?(URI)
|
14
|
+
uri = params[:uri]
|
15
|
+
referer = params[:referer]
|
16
|
+
unless uri.is_a?(URI)
|
17
|
+
uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/) { |match|
|
18
|
+
if RUBY_VERSION >= "1.9.0"
|
19
|
+
CGI.escape(match)
|
20
|
+
else
|
21
|
+
sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'c')[0])
|
22
|
+
end
|
23
|
+
}
|
24
|
+
|
25
|
+
escaped_uri = Util.html_unescape(
|
26
|
+
uri.split(/(?:%[0-9A-Fa-f]{2})+|#/).zip(
|
27
|
+
uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
|
28
|
+
).map { |x,y|
|
29
|
+
"#{URI.escape(x)}#{y}"
|
30
|
+
}.join('')
|
31
|
+
)
|
32
|
+
|
33
|
+
begin
|
34
|
+
uri = URI.parse(escaped_uri)
|
35
|
+
rescue
|
36
|
+
uri = URI.parse(URI.escape(escaped_uri))
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
uri = @scheme_handlers[
|
41
|
+
uri.relative? ? 'relative' : uri.scheme.downcase
|
42
|
+
].call(uri, params[:referer])
|
43
|
+
|
44
|
+
if params[:referer] && params[:referer].uri
|
45
|
+
if uri.path.length == 0 && uri.relative?
|
46
|
+
uri.path = params[:referer].uri.path
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
uri.path = '/' if uri.path.length == 0
|
51
|
+
|
52
|
+
if uri.relative?
|
53
|
+
raise 'need absolute URL' unless referer && referer.uri
|
54
|
+
base = nil
|
55
|
+
if referer.respond_to?(:bases) && referer.parser
|
56
|
+
base = referer.bases.last
|
57
|
+
end
|
58
|
+
|
59
|
+
uri = ((base && base.uri && base.uri.absolute?) ?
|
60
|
+
base.uri :
|
61
|
+
referer.uri) + uri
|
62
|
+
uri = referer.uri + uri
|
63
|
+
# Strip initial "/.." bits from the path
|
64
|
+
uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
|
65
|
+
end
|
66
|
+
|
67
|
+
unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
|
68
|
+
raise "unsupported scheme: #{uri.scheme}"
|
69
|
+
end
|
70
|
+
params[:uri] = uri
|
71
|
+
|
72
|
+
super
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
# =Synopsis
|
4
|
+
# This class contains an error for when a pluggable parser tries to
|
5
|
+
# parse a content type that it does not know how to handle. For example
|
6
|
+
# if WWW::Mechanize::Page were to try to parse a PDF, a ContentTypeError
|
7
|
+
# would be thrown.
|
8
|
+
class ContentTypeError < RuntimeError
|
9
|
+
attr_reader :content_type
|
10
|
+
|
11
|
+
def initialize(content_type)
|
12
|
+
@content_type = content_type
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'time'
|
2
|
+
require 'webrick/cookie'
|
3
|
+
|
4
|
+
module WWW
|
5
|
+
class Mechanize
|
6
|
+
# This class is used to represent an HTTP Cookie.
|
7
|
+
class Cookie < WEBrick::Cookie
|
8
|
+
def self.parse(uri, str, log = Mechanize.log)
|
9
|
+
return str.split(/,(?=[^;,]*=)|,$/).collect { |c|
|
10
|
+
cookie_elem = c.split(/;+/)
|
11
|
+
first_elem = cookie_elem.shift
|
12
|
+
first_elem.strip!
|
13
|
+
key, value = first_elem.split(/=/, 2)
|
14
|
+
|
15
|
+
cookie = nil
|
16
|
+
begin
|
17
|
+
cookie = new(key, WEBrick::HTTPUtils.dequote(value))
|
18
|
+
rescue
|
19
|
+
log.warn("Couldn't parse key/value: #{first_elem}") if log
|
20
|
+
end
|
21
|
+
next unless cookie
|
22
|
+
|
23
|
+
cookie_elem.each{|pair|
|
24
|
+
pair.strip!
|
25
|
+
key, value = pair.split(/=/, 2)
|
26
|
+
if value
|
27
|
+
value = WEBrick::HTTPUtils.dequote(value.strip)
|
28
|
+
end
|
29
|
+
case key.downcase
|
30
|
+
when "domain" then cookie.domain = value.sub(/^\./, '')
|
31
|
+
when "path" then cookie.path = value
|
32
|
+
when 'expires'
|
33
|
+
begin
|
34
|
+
cookie.expires = Time::parse(value)
|
35
|
+
rescue
|
36
|
+
if log
|
37
|
+
log.warn("Couldn't parse expires: #{value}")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
when "max-age" then
|
41
|
+
begin
|
42
|
+
cookie.max_age = Integer(value)
|
43
|
+
rescue
|
44
|
+
log.warn("Couldn't parse max age '#{value}'") if log
|
45
|
+
cookie.max_age = nil
|
46
|
+
end
|
47
|
+
when "comment" then cookie.comment = value
|
48
|
+
when "version" then
|
49
|
+
begin
|
50
|
+
cookie.version = Integer(value)
|
51
|
+
rescue
|
52
|
+
log.warn("Couldn't parse version '#{value}'") if log
|
53
|
+
cookie.version = nil
|
54
|
+
end
|
55
|
+
when "secure" then cookie.secure = true
|
56
|
+
end
|
57
|
+
}
|
58
|
+
|
59
|
+
cookie.path ||= uri.path.to_s.sub(/[^\/]*$/, '')
|
60
|
+
cookie.secure ||= false
|
61
|
+
cookie.domain ||= uri.host
|
62
|
+
# Move this in to the cookie jar
|
63
|
+
yield cookie if block_given?
|
64
|
+
}
|
65
|
+
end
|
66
|
+
|
67
|
+
def to_s
|
68
|
+
"#{@name}=#{@value}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,191 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module WWW
|
4
|
+
class Mechanize
|
5
|
+
# This class is used to manage the Cookies that have been returned from
|
6
|
+
# any particular website.
|
7
|
+
class CookieJar
|
8
|
+
attr_reader :jar
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@jar = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
# Add a cookie to the Jar.
|
15
|
+
def add(uri, cookie)
|
16
|
+
return unless uri.host =~ /#{CookieJar.strip_port(cookie.domain)}$/i
|
17
|
+
|
18
|
+
normal_domain = cookie.domain.downcase
|
19
|
+
|
20
|
+
unless @jar.has_key?(normal_domain)
|
21
|
+
@jar[normal_domain] = Hash.new { |h,k| h[k] = {} }
|
22
|
+
end
|
23
|
+
|
24
|
+
@jar[normal_domain][cookie.path][cookie.name] = cookie
|
25
|
+
cleanup
|
26
|
+
cookie
|
27
|
+
end
|
28
|
+
|
29
|
+
# Fetch the cookies that should be used for the URI object passed in.
|
30
|
+
def cookies(url)
|
31
|
+
cleanup
|
32
|
+
url.path = '/' if url.path.empty?
|
33
|
+
|
34
|
+
domains = @jar.find_all { |domain, _|
|
35
|
+
url.host =~ /#{CookieJar.strip_port(domain)}$/i
|
36
|
+
}
|
37
|
+
|
38
|
+
return [] unless domains.length > 0
|
39
|
+
|
40
|
+
cookies = domains.map { |_,paths|
|
41
|
+
paths.find_all { |path, _|
|
42
|
+
url.path =~ /^#{Regexp.escape(path)}/
|
43
|
+
}.map { |_,cookie| cookie.values }
|
44
|
+
}.flatten
|
45
|
+
|
46
|
+
cookies.find_all { |cookie|
|
47
|
+
!cookie.expires || Time.now < cookie.expires
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def empty?(url)
|
52
|
+
cookies(url).length > 0 ? false : true
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_a
|
56
|
+
cookies = []
|
57
|
+
@jar.each do |domain, paths|
|
58
|
+
paths.each do |path, names|
|
59
|
+
cookies << names.values
|
60
|
+
end
|
61
|
+
end
|
62
|
+
cookies.flatten
|
63
|
+
end
|
64
|
+
|
65
|
+
# Save the cookie jar to a file in the format specified.
|
66
|
+
#
|
67
|
+
# Available formats:
|
68
|
+
# :yaml <- YAML structure
|
69
|
+
# :cookiestxt <- Mozilla's cookies.txt format
|
70
|
+
def save_as(file, format = :yaml)
|
71
|
+
::File.open(file, "w") { |f|
|
72
|
+
case format
|
73
|
+
when :yaml then
|
74
|
+
YAML::dump(@jar, f)
|
75
|
+
when :cookiestxt then
|
76
|
+
dump_cookiestxt(f)
|
77
|
+
else
|
78
|
+
raise "Unknown cookie jar file format"
|
79
|
+
end
|
80
|
+
}
|
81
|
+
end
|
82
|
+
|
83
|
+
# Load cookie jar from a file in the format specified.
|
84
|
+
#
|
85
|
+
# Available formats:
|
86
|
+
# :yaml <- YAML structure.
|
87
|
+
# :cookiestxt <- Mozilla's cookies.txt format
|
88
|
+
def load(file, format = :yaml)
|
89
|
+
@jar = ::File.open(file) { |f|
|
90
|
+
case format
|
91
|
+
when :yaml then
|
92
|
+
YAML::load(f)
|
93
|
+
when :cookiestxt then
|
94
|
+
load_cookiestxt(f)
|
95
|
+
else
|
96
|
+
raise "Unknown cookie jar file format"
|
97
|
+
end
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
# Clear the cookie jar
|
102
|
+
def clear!
|
103
|
+
@jar = {}
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
# Read cookies from Mozilla cookies.txt-style IO stream
|
108
|
+
def load_cookiestxt(io)
|
109
|
+
now = Time.now
|
110
|
+
fakeuri = Struct.new(:host) # add_cookie wants something resembling a URI.
|
111
|
+
|
112
|
+
io.each_line do |line|
|
113
|
+
line.chomp!
|
114
|
+
line.gsub!(/#.+/, '')
|
115
|
+
fields = line.split("\t")
|
116
|
+
|
117
|
+
next if fields.length != 7
|
118
|
+
|
119
|
+
expires_seconds = fields[4].to_i
|
120
|
+
begin
|
121
|
+
expires = Time.at(expires_seconds)
|
122
|
+
rescue
|
123
|
+
next
|
124
|
+
# Just in case we ever decide to support DateTime...
|
125
|
+
# expires = DateTime.new(1970,1,1) + ((expires_seconds + 1) / (60*60*24.0))
|
126
|
+
end
|
127
|
+
next if expires < now
|
128
|
+
|
129
|
+
c = WWW::Mechanize::Cookie.new(fields[5], fields[6])
|
130
|
+
c.domain = fields[0]
|
131
|
+
# Field 1 indicates whether the cookie can be read by other machines at the same domain.
|
132
|
+
# This is computed by the cookie implementation, based on the domain value.
|
133
|
+
c.path = fields[2] # Path for which the cookie is relevant
|
134
|
+
c.secure = (fields[3] == "TRUE") # Requires a secure connection
|
135
|
+
c.expires = expires # Time the cookie expires.
|
136
|
+
c.version = 0 # Conforms to Netscape cookie spec.
|
137
|
+
|
138
|
+
add(fakeuri.new(c.domain), c)
|
139
|
+
end
|
140
|
+
@jar
|
141
|
+
end
|
142
|
+
|
143
|
+
# Write cookies to Mozilla cookies.txt-style IO stream
|
144
|
+
def dump_cookiestxt(io)
|
145
|
+
to_a.each do |cookie|
|
146
|
+
fields = []
|
147
|
+
fields[0] = cookie.domain
|
148
|
+
|
149
|
+
if cookie.domain =~ /^\./
|
150
|
+
fields[1] = "TRUE"
|
151
|
+
else
|
152
|
+
fields[1] = "FALSE"
|
153
|
+
end
|
154
|
+
|
155
|
+
fields[2] = cookie.path
|
156
|
+
|
157
|
+
if cookie.secure == true
|
158
|
+
fields[3] = "TRUE"
|
159
|
+
else
|
160
|
+
fields[3] = "FALSE"
|
161
|
+
end
|
162
|
+
|
163
|
+
fields[4] = cookie.expires.to_i.to_s
|
164
|
+
|
165
|
+
fields[5] = cookie.name
|
166
|
+
fields[6] = cookie.value
|
167
|
+
io.puts(fields.join("\t"))
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
private
|
172
|
+
# Remove expired cookies
|
173
|
+
def cleanup
|
174
|
+
@jar.each do |domain, paths|
|
175
|
+
paths.each do |path, names|
|
176
|
+
names.each do |cookie_name, cookie|
|
177
|
+
if cookie.expires && Time.now > cookie.expires
|
178
|
+
paths[path].delete(cookie_name)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def self.strip_port(host)
|
186
|
+
host.gsub(/:[0-9]+$/,'')
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|