mechanize 2.1 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- data.tar.gz.sig +0 -0
- data/CHANGELOG.rdoc +28 -0
- data/Manifest.txt +1 -1
- data/README.rdoc +1 -1
- data/Rakefile +1 -1
- data/examples/wikipedia_links_to_philosophy.rb +159 -0
- data/lib/mechanize.rb +68 -5
- data/lib/mechanize/download.rb +9 -8
- data/lib/mechanize/form.rb +8 -0
- data/lib/mechanize/form/field.rb +8 -0
- data/lib/mechanize/http/agent.rb +107 -65
- data/lib/mechanize/http/www_authenticate_parser.rb +14 -0
- data/lib/mechanize/page.rb +8 -10
- data/lib/mechanize/page/meta_refresh.rb +8 -1
- data/lib/mechanize/parser.rb +1 -1
- data/lib/mechanize/response_read_error.rb +15 -4
- data/lib/mechanize/test_case.rb +10 -0
- data/lib/mechanize/util.rb +23 -15
- data/test/htdocs/tc_referer.html +1 -1
- data/test/test_mechanize.rb +48 -2
- data/test/test_mechanize_download.rb +11 -1
- data/test/test_mechanize_file.rb +7 -0
- data/test/test_mechanize_form.rb +16 -1
- data/test/test_mechanize_http_agent.rb +155 -26
- data/test/test_mechanize_page_encoding.rb +6 -0
- data/test/test_mechanize_page_meta_refresh.rb +10 -0
- data/test/test_mechanize_parser.rb +10 -0
- data/test/test_mechanize_response_read_error.rb +28 -0
- data/test/test_mechanize_util.rb +5 -0
- metadata +47 -30
- metadata.gz.sig +0 -0
- data/FAQ.rdoc +0 -11
@@ -28,6 +28,11 @@ class Mechanize::HTTP::WWWAuthenticateParser
|
|
28
28
|
challenge = Mechanize::HTTP::AuthChallenge.new
|
29
29
|
|
30
30
|
scheme = auth_scheme
|
31
|
+
|
32
|
+
if scheme == 'Negotiate'
|
33
|
+
scan_comma_spaces
|
34
|
+
end
|
35
|
+
|
31
36
|
next unless scheme
|
32
37
|
challenge.scheme = scheme
|
33
38
|
|
@@ -82,6 +87,15 @@ class Mechanize::HTTP::WWWAuthenticateParser
|
|
82
87
|
@scanner.scan(/ +/)
|
83
88
|
end
|
84
89
|
|
90
|
+
##
|
91
|
+
# scans a comma followed by spaces
|
92
|
+
# needed for Negotiation, NTLM
|
93
|
+
#
|
94
|
+
|
95
|
+
def scan_comma_spaces
|
96
|
+
@scanner.scan(/, +/)
|
97
|
+
end
|
98
|
+
|
85
99
|
##
|
86
100
|
# token = 1*<any CHAR except CTLs or separators>
|
87
101
|
#
|
data/lib/mechanize/page.rb
CHANGED
@@ -364,10 +364,14 @@ class Mechanize::Page < Mechanize::File
|
|
364
364
|
return @labels_hash
|
365
365
|
end
|
366
366
|
|
367
|
-
|
368
|
-
charset
|
369
|
-
|
370
|
-
|
367
|
+
class << self
|
368
|
+
def charset content_type
|
369
|
+
charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\\"\/\[\]?={}\s]+)/i, 1]
|
370
|
+
return nil if charset == 'none'
|
371
|
+
charset
|
372
|
+
end
|
373
|
+
|
374
|
+
alias charset_from_content_type charset
|
371
375
|
end
|
372
376
|
|
373
377
|
def self.response_header_charset response
|
@@ -422,12 +426,6 @@ class Mechanize::Page < Mechanize::File
|
|
422
426
|
''
|
423
427
|
end
|
424
428
|
end
|
425
|
-
|
426
|
-
def self.charset_from_content_type content_type
|
427
|
-
charset = content_type[/charset=([^; ]+)/i, 1]
|
428
|
-
return nil if charset == 'none'
|
429
|
-
charset
|
430
|
-
end
|
431
429
|
end
|
432
430
|
|
433
431
|
require 'mechanize/headers'
|
@@ -24,6 +24,11 @@ class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
|
|
24
24
|
|
25
25
|
CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
|
26
26
|
|
27
|
+
##
|
28
|
+
# Regexp of unsafe URI characters that excludes % for Issue #177
|
29
|
+
|
30
|
+
UNSAFE = /[^\-_.!~*'()a-zA-Z\d;\/?:@&%=+$,\[\]]/
|
31
|
+
|
27
32
|
##
|
28
33
|
# Parses the delay and url from the content attribute of a meta refresh
|
29
34
|
# element. Parse requires the uri of the current page to infer a url when
|
@@ -37,7 +42,9 @@ class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
|
|
37
42
|
return unless content =~ CONTENT_REGEXP
|
38
43
|
|
39
44
|
link_self = $3.nil? || $3.empty?
|
40
|
-
delay
|
45
|
+
delay = $1
|
46
|
+
refresh_uri = $3
|
47
|
+
refresh_uri = Mechanize::Util.uri_escape refresh_uri, UNSAFE if refresh_uri
|
41
48
|
|
42
49
|
dest = base_uri
|
43
50
|
dest += refresh_uri if refresh_uri
|
data/lib/mechanize/parser.rb
CHANGED
@@ -113,7 +113,7 @@ module Mechanize::Parser
|
|
113
113
|
content_disposition =
|
114
114
|
Mechanize::HTTP::ContentDispositionParser.parse disposition
|
115
115
|
|
116
|
-
if content_disposition then
|
116
|
+
if content_disposition && content_disposition.filename then
|
117
117
|
filename = content_disposition.filename
|
118
118
|
filename = filename.split(/[\\\/]/).last
|
119
119
|
handled = true
|
@@ -7,16 +7,27 @@ class Mechanize::ResponseReadError < Mechanize::Error
|
|
7
7
|
|
8
8
|
attr_reader :body_io
|
9
9
|
attr_reader :error
|
10
|
+
attr_reader :mechanize
|
10
11
|
attr_reader :response
|
12
|
+
attr_reader :uri
|
11
13
|
|
12
14
|
##
|
13
15
|
# Creates a new ResponseReadError with the +error+ raised, the +response+
|
14
16
|
# and the +body_io+ for content read so far.
|
15
17
|
|
16
|
-
def initialize error, response, body_io
|
17
|
-
@
|
18
|
-
@
|
19
|
-
@
|
18
|
+
def initialize error, response, body_io, uri, mechanize
|
19
|
+
@body_io = body_io
|
20
|
+
@error = error
|
21
|
+
@mechanize = mechanize
|
22
|
+
@response = response
|
23
|
+
@uri = uri
|
24
|
+
end
|
25
|
+
|
26
|
+
##
|
27
|
+
# Converts this error into a Page, File, etc. based on the content-type
|
28
|
+
|
29
|
+
def force_parse
|
30
|
+
@mechanize.parse @uri, @response, @body_io
|
20
31
|
end
|
21
32
|
|
22
33
|
def message # :nodoc:
|
data/lib/mechanize/test_case.rb
CHANGED
@@ -108,6 +108,16 @@ UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
|
|
108
108
|
CERT
|
109
109
|
end
|
110
110
|
|
111
|
+
def tempfile content
|
112
|
+
body_io = Tempfile.new @__name__
|
113
|
+
body_io.unlink
|
114
|
+
body_io.write content
|
115
|
+
body_io.flush
|
116
|
+
body_io.rewind
|
117
|
+
|
118
|
+
body_io
|
119
|
+
end
|
120
|
+
|
111
121
|
end
|
112
122
|
|
113
123
|
class BasicAuthServlet < WEBrick::HTTPServlet::AbstractServlet
|
data/lib/mechanize/util.rb
CHANGED
@@ -1,11 +1,15 @@
|
|
1
1
|
require 'cgi'
|
2
|
+
require 'nkf'
|
2
3
|
|
3
4
|
class Mechanize::Util
|
4
5
|
CODE_DIC = {
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
NKF::JIS => "ISO-2022-JP",
|
7
|
+
NKF::EUC => "EUC-JP",
|
8
|
+
NKF::SJIS => "SHIFT_JIS",
|
9
|
+
NKF::UTF8 => "UTF-8",
|
10
|
+
NKF::UTF16 => "UTF-16",
|
11
|
+
NKF::UTF32 => "UTF-32",
|
12
|
+
}
|
9
13
|
|
10
14
|
# true if RUBY_VERSION is 1.9.0 or later
|
11
15
|
NEW_RUBY_ENCODING = RUBY_VERSION >= '1.9.0'
|
@@ -66,26 +70,30 @@ class Mechanize::Util
|
|
66
70
|
end
|
67
71
|
|
68
72
|
def self.detect_charset(src)
|
69
|
-
|
70
|
-
|
71
|
-
|
73
|
+
case enc = src && NKF.guess(src)
|
74
|
+
when Integer
|
75
|
+
# Ruby <= 1.8
|
76
|
+
CODE_DIC[enc]
|
72
77
|
else
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
enc = CODE_DIC[enc.intern]
|
77
|
-
end
|
78
|
-
enc || "ISO-8859-1"
|
78
|
+
# Ruby >= 1.9
|
79
|
+
enc && enc.to_s.upcase
|
80
|
+
end || "ISO-8859-1"
|
79
81
|
end
|
80
82
|
|
81
|
-
def self.uri_escape str
|
83
|
+
def self.uri_escape str, unsafe = nil
|
82
84
|
@parser ||= begin
|
83
85
|
URI::Parser.new
|
84
86
|
rescue NameError
|
85
87
|
URI
|
86
88
|
end
|
87
89
|
|
88
|
-
@parser
|
90
|
+
if URI == @parser then
|
91
|
+
unsafe ||= URI::UNSAFE
|
92
|
+
else
|
93
|
+
unsafe ||= @parser.regexp[:UNSAFE]
|
94
|
+
end
|
95
|
+
|
96
|
+
@parser.escape str, unsafe
|
89
97
|
end
|
90
98
|
|
91
99
|
def self.uri_unescape str
|
data/test/htdocs/tc_referer.html
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
<html>
|
2
2
|
<body>
|
3
3
|
<a href="/referer">Referer Servlet</a>
|
4
|
-
<a href="http://localhost/referer">Referer Servlet forced to http</a>
|
4
|
+
<a href="http://localhost/referer" name="foo">Referer Servlet forced to http</a>
|
5
5
|
<a href="https://localhost/referer">Referer Servlet forced to https</a>
|
6
6
|
<br />
|
7
7
|
<a href="/referer" rel="noreferrer">Referer Servlet (noreferrer)</a>
|
data/test/test_mechanize.rb
CHANGED
@@ -728,6 +728,12 @@ but not <a href="/" rel="me nofollow">this</a>!
|
|
728
728
|
@mech.history[1].uri.to_s)
|
729
729
|
end
|
730
730
|
|
731
|
+
def test_initialize
|
732
|
+
mech = Mechanize.new
|
733
|
+
|
734
|
+
assert_equal 50, mech.max_history
|
735
|
+
end
|
736
|
+
|
731
737
|
def test_html_parser_equals
|
732
738
|
@mech.html_parser = {}
|
733
739
|
assert_raises(NoMethodError) {
|
@@ -735,10 +741,14 @@ but not <a href="/" rel="me nofollow">this</a>!
|
|
735
741
|
}
|
736
742
|
end
|
737
743
|
|
744
|
+
def test_idle_timeout_default
|
745
|
+
assert_equal 5, Mechanize.new.idle_timeout
|
746
|
+
end
|
747
|
+
|
738
748
|
def test_idle_timeout_equals
|
739
|
-
@mech.idle_timeout =
|
749
|
+
@mech.idle_timeout = 15
|
740
750
|
|
741
|
-
assert_equal
|
751
|
+
assert_equal 15, @mech.idle_timeout
|
742
752
|
end
|
743
753
|
|
744
754
|
def test_keep_alive_equals
|
@@ -893,6 +903,42 @@ but not <a href="/" rel="me nofollow">this</a>!
|
|
893
903
|
assert_equal 5, @mech.read_timeout
|
894
904
|
end
|
895
905
|
|
906
|
+
def test_referer
|
907
|
+
host_path = "localhost/tc_referer.html?t=1"
|
908
|
+
['http', 'https'].each { |proto|
|
909
|
+
referer = "#{proto}://#{host_path}"
|
910
|
+
[
|
911
|
+
"",
|
912
|
+
"@",
|
913
|
+
"user1@",
|
914
|
+
":@",
|
915
|
+
"user1:@",
|
916
|
+
":password1@",
|
917
|
+
"user1:password1@",
|
918
|
+
].each { |userinfo|
|
919
|
+
url = "#{proto}://#{userinfo}#{host_path}"
|
920
|
+
[url, url + "#foo"].each { |furl|
|
921
|
+
[
|
922
|
+
['relative', true],
|
923
|
+
['insecure', proto == 'http'],
|
924
|
+
['secure', true],
|
925
|
+
['relative noreferrer', false],
|
926
|
+
['insecure noreferrer', false],
|
927
|
+
['secure noreferrer', false],
|
928
|
+
].each_with_index { |(type, bool), i|
|
929
|
+
rpage = @mech.get(furl)
|
930
|
+
page = rpage.links[i].click
|
931
|
+
assert_equal bool ? referer : '', page.body, "%s link from %s" % [type, furl]
|
932
|
+
}
|
933
|
+
|
934
|
+
rpage = @mech.get(furl)
|
935
|
+
page = rpage.forms.first.submit
|
936
|
+
assert_equal referer, page.body, "post from %s" % furl
|
937
|
+
}
|
938
|
+
}
|
939
|
+
}
|
940
|
+
end
|
941
|
+
|
896
942
|
def test_retry_change_requests_equals
|
897
943
|
refute @mech.retry_change_requests
|
898
944
|
|
@@ -23,7 +23,8 @@ class TestMechanizeDownload < Mechanize::TestCase
|
|
23
23
|
|
24
24
|
def test_save_tempfile
|
25
25
|
uri = URI.parse 'http://example/foo.html'
|
26
|
-
Tempfile.
|
26
|
+
Tempfile.open __name__ do |body_io|
|
27
|
+
body_io.unlink
|
27
28
|
body_io.write '0123456789'
|
28
29
|
|
29
30
|
body_io.flush
|
@@ -39,5 +40,14 @@ class TestMechanizeDownload < Mechanize::TestCase
|
|
39
40
|
end
|
40
41
|
end
|
41
42
|
|
43
|
+
def test_filename
|
44
|
+
uri = URI.parse 'http://example/foo.html'
|
45
|
+
body_io = StringIO.new '0123456789'
|
46
|
+
|
47
|
+
download = @parser.new uri, nil, body_io
|
48
|
+
|
49
|
+
assert_equal "foo.html", download.filename
|
50
|
+
end
|
51
|
+
|
42
52
|
end
|
43
53
|
|
data/test/test_mechanize_file.rb
CHANGED
data/test/test_mechanize_form.rb
CHANGED
@@ -5,7 +5,10 @@ class TestMechanizeForm < Mechanize::TestCase
|
|
5
5
|
def setup
|
6
6
|
super
|
7
7
|
|
8
|
-
@
|
8
|
+
@uri = URI 'http://example'
|
9
|
+
@page = page @uri
|
10
|
+
|
11
|
+
@form = Mechanize::Form.new node('form', 'name' => __name__), @mech, @page
|
9
12
|
end
|
10
13
|
|
11
14
|
def test_action
|
@@ -14,6 +17,18 @@ class TestMechanizeForm < Mechanize::TestCase
|
|
14
17
|
assert_equal '?a=b&b=c', form.action
|
15
18
|
end
|
16
19
|
|
20
|
+
def test_add_button_to_query
|
21
|
+
button = Mechanize::Form::Button.new node('input', 'type' => 'submit')
|
22
|
+
|
23
|
+
e = assert_raises ArgumentError do
|
24
|
+
@form.add_button_to_query button
|
25
|
+
end
|
26
|
+
|
27
|
+
assert_equal "#{button.inspect} does not belong to the same page " \
|
28
|
+
"as the form \"#{@__name__}\" in #{@uri}",
|
29
|
+
e.message
|
30
|
+
end
|
31
|
+
|
17
32
|
def test_aset
|
18
33
|
assert_empty @form.keys
|
19
34
|
|
@@ -115,13 +115,17 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
115
115
|
end
|
116
116
|
|
117
117
|
def test_fetch_file_nonexistent
|
118
|
-
|
118
|
+
in_tmpdir do
|
119
|
+
nonexistent = File.join Dir.pwd, 'nonexistent'
|
119
120
|
|
120
|
-
|
121
|
-
|
122
|
-
|
121
|
+
uri = URI.parse "file://#{nonexistent}"
|
122
|
+
|
123
|
+
e = assert_raises Mechanize::ResponseCodeError do
|
124
|
+
@agent.fetch uri
|
125
|
+
end
|
123
126
|
|
124
|
-
|
127
|
+
assert_equal '404 => Net::HTTPNotFound', e.message
|
128
|
+
end
|
125
129
|
end
|
126
130
|
|
127
131
|
def test_fetch_post_connect_hook
|
@@ -402,6 +406,31 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
402
406
|
assert_nil @req['referer']
|
403
407
|
end
|
404
408
|
|
409
|
+
def test_request_referer_strip
|
410
|
+
uri = URI.parse 'http://example.com/index.html'
|
411
|
+
|
412
|
+
host_path = "old.example/page.html?q=x"
|
413
|
+
referer = "http://#{host_path}"
|
414
|
+
|
415
|
+
[
|
416
|
+
"",
|
417
|
+
"@",
|
418
|
+
"user1@",
|
419
|
+
":@",
|
420
|
+
"user1:@",
|
421
|
+
":password1@",
|
422
|
+
"user1:password1@",
|
423
|
+
].each { |userinfo|
|
424
|
+
['', '#frag'].each { |frag|
|
425
|
+
url = URI.parse "http://#{userinfo}#{host_path}#{frag}"
|
426
|
+
|
427
|
+
@agent.request_referer @req, uri, url
|
428
|
+
|
429
|
+
assert_equal referer, @req['referer'], url
|
430
|
+
}
|
431
|
+
}
|
432
|
+
end
|
433
|
+
|
405
434
|
def test_request_user_agent
|
406
435
|
@agent.request_user_agent @req
|
407
436
|
|
@@ -516,12 +545,25 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
516
545
|
assert_empty @agent.authenticate_methods[base_uri][:basic]
|
517
546
|
end
|
518
547
|
|
548
|
+
def test_response_authenticate_no_www_authenticate
|
549
|
+
denied = page URI('http://example/denied'), 'text/html', '', 403
|
550
|
+
@agent.user = 'user'
|
551
|
+
@agent.password = 'password'
|
552
|
+
|
553
|
+
e = assert_raises Mechanize::UnauthorizedError do
|
554
|
+
@agent.response_authenticate @res, denied, @uri, @req, {}, nil, nil
|
555
|
+
end
|
556
|
+
|
557
|
+
assert_equal '403 => Net::HTTPForbidden', e.message
|
558
|
+
end
|
559
|
+
|
519
560
|
def test_response_authenticate_ntlm
|
520
561
|
@uri += '/ntlm'
|
521
562
|
@res.instance_variable_set(:@header,
|
522
|
-
'www-authenticate' => ['NTLM'])
|
563
|
+
'www-authenticate' => ['Negotiate, NTLM'])
|
523
564
|
@agent.user = 'user'
|
524
565
|
@agent.password = 'password'
|
566
|
+
@agent.domain = 'domain'
|
525
567
|
|
526
568
|
page = @agent.response_authenticate @res, nil, @uri, @req, {}, nil, nil
|
527
569
|
|
@@ -541,7 +583,6 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
541
583
|
end
|
542
584
|
|
543
585
|
def test_response_content_encoding_7_bit
|
544
|
-
def @res.content_length() 4 end
|
545
586
|
@res.instance_variable_set :@header, 'content-encoding' => %w[7bit]
|
546
587
|
|
547
588
|
body = @agent.response_content_encoding @res, StringIO.new('part')
|
@@ -550,7 +591,6 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
550
591
|
end
|
551
592
|
|
552
593
|
def test_response_content_encoding_deflate
|
553
|
-
def @res.content_length() 12 end
|
554
594
|
@res.instance_variable_set :@header, 'content-encoding' => %w[deflate]
|
555
595
|
body_io = StringIO.new "x\x9C+H,*\x01\x00\x04?\x01\xB8"
|
556
596
|
|
@@ -560,7 +600,6 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
560
600
|
end
|
561
601
|
|
562
602
|
def test_response_content_encoding_deflate_chunked
|
563
|
-
def @res.content_length() nil end
|
564
603
|
@res.instance_variable_set :@header, 'content-encoding' => %w[deflate]
|
565
604
|
body_io = StringIO.new "x\x9C+H,*\x01\x00\x04?\x01\xB8"
|
566
605
|
|
@@ -569,9 +608,28 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
569
608
|
assert_equal 'part', body.read
|
570
609
|
end
|
571
610
|
|
611
|
+
def test_response_content_encoding_deflate_corrupt
|
612
|
+
@res.instance_variable_set :@header, 'content-encoding' => %w[deflate]
|
613
|
+
body_io = StringIO.new "x\x9C+H,*\x01\x00\x04?\x01" # missing 1 byte
|
614
|
+
|
615
|
+
e = assert_raises Mechanize::Error do
|
616
|
+
@agent.response_content_encoding @res, body_io
|
617
|
+
end
|
618
|
+
|
619
|
+
assert_match %r%error handling content-encoding deflate:%, e.message
|
620
|
+
assert_match %r%Zlib%, e.message
|
621
|
+
end
|
622
|
+
|
623
|
+
def test_response_content_encoding_deflate_empty
|
624
|
+
@res.instance_variable_set :@header, 'content-encoding' => %w[deflate]
|
625
|
+
|
626
|
+
body = @agent.response_content_encoding @res, StringIO.new
|
627
|
+
|
628
|
+
assert_equal '', body.read
|
629
|
+
end
|
630
|
+
|
572
631
|
# IIS/6.0 ASP.NET/2.0.50727 does not wrap deflate with zlib, WTF?
|
573
632
|
def test_response_content_encoding_deflate_no_zlib
|
574
|
-
def @res.content_length() 6 end
|
575
633
|
@res.instance_variable_set :@header, 'content-encoding' => %w[deflate]
|
576
634
|
|
577
635
|
body = @agent.response_content_encoding @res, StringIO.new("+H,*\001\000")
|
@@ -580,7 +638,6 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
580
638
|
end
|
581
639
|
|
582
640
|
def test_response_content_encoding_gzip
|
583
|
-
def @res.content_length() 24 end
|
584
641
|
@res.instance_variable_set :@header, 'content-encoding' => %w[gzip]
|
585
642
|
body_io = StringIO.new \
|
586
643
|
"\037\213\b\0002\002\225M\000\003+H,*\001\000\306p\017I\004\000\000\000"
|
@@ -601,8 +658,54 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
601
658
|
assert_equal 'part', body.read
|
602
659
|
end
|
603
660
|
|
661
|
+
def test_response_content_encoding_gzip_corrupt
|
662
|
+
log = StringIO.new
|
663
|
+
logger = Logger.new log
|
664
|
+
@agent.context.log = logger
|
665
|
+
|
666
|
+
@res.instance_variable_set :@header, 'content-encoding' => %w[gzip]
|
667
|
+
body_io = StringIO.new \
|
668
|
+
"\037\213\b\0002\002\225M\000\003+H,*\001"
|
669
|
+
|
670
|
+
e = assert_raises Mechanize::Error do
|
671
|
+
@agent.response_content_encoding @res, body_io
|
672
|
+
end
|
673
|
+
|
674
|
+
assert_match %r%error handling content-encoding gzip:%, e.message
|
675
|
+
assert_match %r%Zlib%, e.message
|
676
|
+
|
677
|
+
assert_match %r%unable to gunzip response, trying raw inflate%, log.string
|
678
|
+
assert_match %r%unable to gunzip response:%, log.string
|
679
|
+
end
|
680
|
+
|
681
|
+
def test_response_content_encoding_gzip_corrupt_checksum
|
682
|
+
log = StringIO.new
|
683
|
+
logger = Logger.new log
|
684
|
+
@agent.context.log = logger
|
685
|
+
|
686
|
+
@res.instance_variable_set :@header, 'content-encoding' => %w[gzip]
|
687
|
+
body_io = StringIO.new \
|
688
|
+
"\037\213\b\0002\002\225M\000\003+H,*\001\000\306p\017I\004\000\000"
|
689
|
+
|
690
|
+
e = assert_raises Mechanize::Error do
|
691
|
+
@agent.response_content_encoding @res, body_io
|
692
|
+
end
|
693
|
+
|
694
|
+
assert_match %r%error handling content-encoding gzip:%, e.message
|
695
|
+
assert_match %r%Zlib%, e.message
|
696
|
+
|
697
|
+
assert_match %r%unable to gunzip response, trying raw inflate%, log.string
|
698
|
+
end
|
699
|
+
|
700
|
+
def test_response_content_encoding_gzip_empty
|
701
|
+
@res.instance_variable_set :@header, 'content-encoding' => %w[gzip]
|
702
|
+
|
703
|
+
body = @agent.response_content_encoding @res, StringIO.new
|
704
|
+
|
705
|
+
assert_equal '', body.read
|
706
|
+
end
|
707
|
+
|
604
708
|
def test_response_content_encoding_gzip_encoding_bad
|
605
|
-
def @res.content_length() 24 end
|
606
709
|
@res.instance_variable_set(:@header,
|
607
710
|
'content-encoding' => %w[gzip],
|
608
711
|
'content-type' => 'text/html; charset=UTF-8')
|
@@ -622,7 +725,6 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
622
725
|
end
|
623
726
|
|
624
727
|
def test_response_content_encoding_none
|
625
|
-
def @res.content_length() 4 end
|
626
728
|
@res.instance_variable_set :@header, 'content-encoding' => %w[none]
|
627
729
|
|
628
730
|
body = @agent.response_content_encoding @res, StringIO.new('part')
|
@@ -630,8 +732,36 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
630
732
|
assert_equal 'part', body.read
|
631
733
|
end
|
632
734
|
|
735
|
+
def test_response_content_encoding_tempfile_7_bit
|
736
|
+
body_io = tempfile 'part'
|
737
|
+
|
738
|
+
@res.instance_variable_set :@header, 'content-encoding' => %w[7bit]
|
739
|
+
|
740
|
+
body = @agent.response_content_encoding @res, body_io
|
741
|
+
|
742
|
+
assert_equal 'part', body.read
|
743
|
+
refute body_io.closed?
|
744
|
+
ensure
|
745
|
+
begin
|
746
|
+
body_io.close! unless body_io.closed?
|
747
|
+
rescue IOError
|
748
|
+
# HACK for ruby 1.8
|
749
|
+
end
|
750
|
+
end
|
751
|
+
|
752
|
+
def test_response_content_encoding_tempfile_gzip
|
753
|
+
body_io = tempfile "x\x9C+H,*\x01\x00\x04?\x01\xB8"
|
754
|
+
@res.instance_variable_set :@header, 'content-encoding' => %w[deflate]
|
755
|
+
|
756
|
+
body = @agent.response_content_encoding @res, body_io
|
757
|
+
|
758
|
+
assert_equal 'part', body.read
|
759
|
+
assert body_io.closed?
|
760
|
+
ensure
|
761
|
+
body_io.close! unless body_io.closed?
|
762
|
+
end
|
763
|
+
|
633
764
|
def test_response_content_encoding_x_gzip
|
634
|
-
def @res.content_length() 24 end
|
635
765
|
@res.instance_variable_set :@header, 'content-encoding' => %w[x-gzip]
|
636
766
|
body_io = StringIO.new \
|
637
767
|
"\037\213\b\0002\002\225M\000\003+H,*\001\000\306p\017I\004\000\000\000"
|
@@ -642,7 +772,6 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
642
772
|
end
|
643
773
|
|
644
774
|
def test_response_content_encoding_unknown
|
645
|
-
def @res.content_length() 4 end
|
646
775
|
@res.instance_variable_set :@header, 'content-encoding' => %w[unknown]
|
647
776
|
body = StringIO.new 'part'
|
648
777
|
|
@@ -650,7 +779,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
650
779
|
@agent.response_content_encoding @res, body
|
651
780
|
end
|
652
781
|
|
653
|
-
assert_equal '
|
782
|
+
assert_equal 'unsupported content-encoding: unknown', e.message
|
654
783
|
end
|
655
784
|
|
656
785
|
def test_get_meta_refresh_header_follow_self
|
@@ -838,7 +967,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
838
967
|
def @res.read_body() yield 'part' end
|
839
968
|
def @res.content_length() 4 end
|
840
969
|
|
841
|
-
io = @agent.response_read @res, @req
|
970
|
+
io = @agent.response_read @res, @req, @uri
|
842
971
|
|
843
972
|
body = io.read
|
844
973
|
|
@@ -850,7 +979,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
850
979
|
def @res.read_body() yield 'a' * 10241 end
|
851
980
|
def @res.content_length() 10241 end
|
852
981
|
|
853
|
-
io = @agent.response_read @res, @req
|
982
|
+
io = @agent.response_read @res, @req, @uri
|
854
983
|
|
855
984
|
assert_kind_of Tempfile, io
|
856
985
|
assert_equal 10241, io.stat.size
|
@@ -862,7 +991,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
862
991
|
end
|
863
992
|
def @res.content_length() end
|
864
993
|
|
865
|
-
io = @agent.response_read @res, @req
|
994
|
+
io = @agent.response_read @res, @req, @uri
|
866
995
|
|
867
996
|
assert_kind_of Tempfile, io
|
868
997
|
assert_equal 11264, io.stat.size
|
@@ -874,7 +1003,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
874
1003
|
def @res.content_length() end
|
875
1004
|
def @res.read_body() end
|
876
1005
|
|
877
|
-
io = @agent.response_read @res, req
|
1006
|
+
io = @agent.response_read @res, req, @uri
|
878
1007
|
|
879
1008
|
assert_equal '', io.read
|
880
1009
|
end
|
@@ -884,7 +1013,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
884
1013
|
def @res.read_body() yield 'part' end
|
885
1014
|
|
886
1015
|
e = assert_raises EOFError do
|
887
|
-
@agent.response_read @res, @req
|
1016
|
+
@agent.response_read @res, @req, @uri
|
888
1017
|
end
|
889
1018
|
|
890
1019
|
assert_equal 'Content-Length (5) does not match response body length (4)',
|
@@ -898,7 +1027,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
898
1027
|
def res.read_body() yield 'part' end
|
899
1028
|
res.instance_variable_set :@header, {}
|
900
1029
|
|
901
|
-
io = @agent.response_read res, @req
|
1030
|
+
io = @agent.response_read res, @req, @uri
|
902
1031
|
|
903
1032
|
assert_equal 'part', io.read
|
904
1033
|
end
|
@@ -910,7 +1039,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
910
1039
|
end
|
911
1040
|
|
912
1041
|
e = assert_raises Mechanize::ResponseReadError do
|
913
|
-
@agent.response_read @res, @req
|
1042
|
+
@agent.response_read @res, @req, @uri
|
914
1043
|
end
|
915
1044
|
|
916
1045
|
assert_equal @res, e.response
|
@@ -928,7 +1057,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
928
1057
|
req = Mechanize::FileRequest.new uri
|
929
1058
|
res = Mechanize::FileResponse.new tempfile.path
|
930
1059
|
|
931
|
-
io = @agent.response_read res, req
|
1060
|
+
io = @agent.response_read res, req, uri
|
932
1061
|
|
933
1062
|
expected = "π\n"
|
934
1063
|
expected.force_encoding Encoding::BINARY if expected.respond_to? :encoding
|
@@ -945,7 +1074,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
945
1074
|
def @res.content_length() end
|
946
1075
|
def @res.read_body() end
|
947
1076
|
|
948
|
-
io = @agent.response_read @res, req
|
1077
|
+
io = @agent.response_read @res, req, @uri
|
949
1078
|
|
950
1079
|
assert_equal '', io.read
|
951
1080
|
end
|
@@ -957,7 +1086,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
957
1086
|
def res.read_body() yield 'part' end
|
958
1087
|
|
959
1088
|
e = assert_raises Mechanize::ResponseCodeError do
|
960
|
-
@agent.response_read res, @req
|
1089
|
+
@agent.response_read res, @req, @uri
|
961
1090
|
end
|
962
1091
|
|
963
1092
|
assert_equal res, e.page
|