kitamomonga-mechanize 0.9.3.20090724215219
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +504 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +176 -0
- data/README.rdoc +60 -0
- data/Rakefile +33 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +666 -0
- data/lib/mechanize/chain.rb +34 -0
- data/lib/mechanize/chain/auth_headers.rb +78 -0
- data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
- data/lib/mechanize/chain/connection_resolver.rb +76 -0
- data/lib/mechanize/chain/custom_headers.rb +21 -0
- data/lib/mechanize/chain/handler.rb +9 -0
- data/lib/mechanize/chain/header_resolver.rb +51 -0
- data/lib/mechanize/chain/parameter_resolver.rb +22 -0
- data/lib/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/mechanize/chain/post_page_hook.rb +18 -0
- data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
- data/lib/mechanize/chain/request_resolver.rb +30 -0
- data/lib/mechanize/chain/response_body_parser.rb +38 -0
- data/lib/mechanize/chain/response_header_handler.rb +48 -0
- data/lib/mechanize/chain/response_reader.rb +39 -0
- data/lib/mechanize/chain/ssl_resolver.rb +40 -0
- data/lib/mechanize/chain/uri_resolver.rb +75 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +70 -0
- data/lib/mechanize/cookie_jar.rb +188 -0
- data/lib/mechanize/file.rb +71 -0
- data/lib/mechanize/file_response.rb +60 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +364 -0
- data/lib/mechanize/form/button.rb +7 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +26 -0
- data/lib/mechanize/form/file_upload.rb +22 -0
- data/lib/mechanize/form/image_button.rb +21 -0
- data/lib/mechanize/form/multi_select_list.rb +67 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +36 -0
- data/lib/mechanize/form/select_list.rb +43 -0
- data/lib/mechanize/headers.rb +11 -0
- data/lib/mechanize/history.rb +65 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/mechanize/monkey_patch.rb +35 -0
- data/lib/mechanize/page.rb +279 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/encoding.rb +61 -0
- data/lib/mechanize/page/frame.rb +20 -0
- data/lib/mechanize/page/link.rb +53 -0
- data/lib/mechanize/page/meta.rb +50 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +73 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +19 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +577 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +73 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +127 -0
- data/test/test_mech.rb +289 -0
- data/test/test_mechanize_file.rb +72 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +127 -0
- data/test/test_page_encoding.rb +298 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +39 -0
- data/test/test_redirect_verb_handling.rb +43 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +103 -0
- data/test/test_scheme.rb +63 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +320 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
2
|
+
|
3
|
+
class TestNoAttributes < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
@agent = Mechanize.new
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_parse_no_attributes
|
9
|
+
assert_nothing_raised do
|
10
|
+
page = @agent.get('http://localhost/tc_no_attributes.html')
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
data/test/test_option.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
2
|
+
|
3
|
+
class OptionTest < Test::Unit::TestCase
|
4
|
+
class FakeAttribute < Hash
|
5
|
+
attr_reader :inner_text
|
6
|
+
def initialize(inner_text)
|
7
|
+
@inner_text = inner_text
|
8
|
+
end
|
9
|
+
alias :has_attribute? :has_key?
|
10
|
+
alias :attributes :keys
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_option_missing_value
|
14
|
+
attribute = FakeAttribute.new('blah')
|
15
|
+
option = Mechanize::Form::Option.new(attribute, nil)
|
16
|
+
assert_equal('blah', option.value)
|
17
|
+
end
|
18
|
+
end
|
data/test/test_page.rb
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
2
|
+
|
3
|
+
require 'cgi'
|
4
|
+
|
5
|
+
class TestPage < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@agent = Mechanize.new
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_page_gets_charset_from_page
|
11
|
+
page = @agent.get("http://localhost/tc_charset.html")
|
12
|
+
assert_equal 'windows-1255', page.encoding
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_double_semicolon
|
16
|
+
page = @agent.get("http://localhost/http_headers?content-disposition=#{CGI.escape('attachment;; filename=fooooo')}")
|
17
|
+
assert page.parser
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_broken_charset
|
21
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=akldsjfhaldjfksh')}")
|
22
|
+
assert page.parser
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_mostly_broken_charset
|
26
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=ISO_8859-1')}")
|
27
|
+
assert_equal 'ISO_8859-1', page.encoding
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_another_mostly_broken_charset
|
31
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=UTF8')}")
|
32
|
+
assert_equal 'UTF8', page.parser.encoding
|
33
|
+
assert_equal 'UTF8', page.encoding
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_upper_case_content_type
|
37
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/HTML')}")
|
38
|
+
assert_instance_of Mechanize::Page, page
|
39
|
+
assert_equal 'text/HTML', page.content_type
|
40
|
+
end
|
41
|
+
|
42
|
+
def test_encoding_override_before_parser_initialized
|
43
|
+
# document has a bad encoding information - windows-1255
|
44
|
+
page = @agent.get("http://localhost/tc_bad_charset.html")
|
45
|
+
# encoding is wrong, so user wants to force ISO-8859-2
|
46
|
+
page.encoding = 'ISO-8859-2'
|
47
|
+
assert_equal 'ISO-8859-2', page.encoding
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_encoding_override_after_parser_was_initialized
|
51
|
+
# document has a bad encoding information - windows-1255
|
52
|
+
page = @agent.get("http://localhost/tc_bad_charset.html")
|
53
|
+
page.parser
|
54
|
+
# autodetection sets encoding to windows-1255
|
55
|
+
assert_equal 'windows-1255', page.encoding
|
56
|
+
# encoding is wrong, so user wants to force ISO-8859-2
|
57
|
+
page.encoding = 'ISO-8859-2'
|
58
|
+
assert_equal 'ISO-8859-2', page.encoding
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_page_gets_charset_sent_by_server
|
62
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=UTF-8')}")
|
63
|
+
assert_equal 'UTF-8', page.encoding
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_set_encoding
|
67
|
+
page = @agent.get("http://localhost/file_upload.html")
|
68
|
+
page.encoding = 'UTF-8'
|
69
|
+
assert_equal 'UTF-8', page.parser.encoding
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_page_gets_yielded
|
73
|
+
pages = nil
|
74
|
+
@agent.get("http://localhost/file_upload.html") { |page|
|
75
|
+
pages = page
|
76
|
+
}
|
77
|
+
assert pages
|
78
|
+
assert_equal('File Upload Form', pages.title)
|
79
|
+
end
|
80
|
+
|
81
|
+
def test_title
|
82
|
+
page = @agent.get("http://localhost/file_upload.html")
|
83
|
+
assert_equal('File Upload Form', page.title)
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_no_title
|
87
|
+
page = @agent.get("http://localhost/no_title_test.html")
|
88
|
+
assert_equal(nil, page.title)
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_page_decoded_with_charset
|
92
|
+
page = Mechanize::Page.new(
|
93
|
+
URI.parse('http://tenderlovemaking.com/'),
|
94
|
+
{ 'content-type' => 'text/html; charset=EUC-JP' },
|
95
|
+
'<html><body>hello</body></html>',
|
96
|
+
400,
|
97
|
+
@agent
|
98
|
+
)
|
99
|
+
assert_equal 'EUC-JP', page.parser.encoding
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_find_form_with_hash
|
103
|
+
page = @agent.get("http://localhost/tc_form_action.html")
|
104
|
+
form = page.form(:name => 'post_form1')
|
105
|
+
assert form
|
106
|
+
yielded = false
|
107
|
+
form = page.form(:name => 'post_form1') { |f|
|
108
|
+
yielded = true
|
109
|
+
assert f
|
110
|
+
assert_equal(form, f)
|
111
|
+
}
|
112
|
+
assert yielded
|
113
|
+
|
114
|
+
form_by_action = page.form(:action => '/form_post?a=b&b=c')
|
115
|
+
assert form_by_action
|
116
|
+
assert_equal(form, form_by_action)
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_find_form_with_attribute_class_or_id
|
120
|
+
page = @agent.get('http://localhost:2000/form_no_action.html')
|
121
|
+
form = page.form_with(:class => 'formclass')
|
122
|
+
assert_equal(form, page.forms[0])
|
123
|
+
form = page.form_with(:id => 'formid')
|
124
|
+
assert_equal(form, page.forms[0])
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
@@ -0,0 +1,298 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
3
|
+
|
4
|
+
# require 'hpricot'
|
5
|
+
# Mechanize.html_parser = Hpricot
|
6
|
+
|
7
|
+
require 'test/unit'
|
8
|
+
require 'kconv'
|
9
|
+
require 'iconv'
|
10
|
+
|
11
|
+
module EncodingTestPage
|
12
|
+
if RUBY_VERSION >= "1.9.0"
|
13
|
+
BODY_ENC_PAIR = {
|
14
|
+
:utf8 => ::Encoding::UTF_8,
|
15
|
+
:ascii => ::Encoding::US_ASCII,
|
16
|
+
:latin => ::Encoding::ISO_8859_2,
|
17
|
+
:cp1252 => ::Encoding::CP1252,
|
18
|
+
:sjis => ::Encoding::SHIFT_JIS,
|
19
|
+
:euc => ::Encoding::EUC_JP,
|
20
|
+
:cp932 => ::Encoding::CP932,
|
21
|
+
:bin => ::Encoding::ASCII_8BIT}
|
22
|
+
end
|
23
|
+
|
24
|
+
ENC_NAME = {
|
25
|
+
:utf8 => 'utf-8',
|
26
|
+
:ascii => 'us-ascii',
|
27
|
+
:latin => 'iso-8859-2',
|
28
|
+
:cp1252 => 'cp1252',
|
29
|
+
:sjis => 'shift_jis',
|
30
|
+
:euc => 'euc-jp',
|
31
|
+
:cp932 => 'cp932',
|
32
|
+
:utf8_upcase => 'UTF-8',
|
33
|
+
:unknown => '*unknown*'}
|
34
|
+
|
35
|
+
JP_TITLE = # other JP string may causes FAILURE, but it's due to misdetection of NKF.guess itself.
|
36
|
+
# This is Mechanize test, so don't modify me.
|
37
|
+
"\346\235\245\343\202\213\346\227\245\343\202\202\346\235\245\343\202\213\346\227\245" +
|
38
|
+
"\343\202\202\346\211\213\345\211\215\343\201\251\343\202\202\343\201\257\351\211\204" +
|
39
|
+
"\346\235\277\343\201\256\344\270\212\343\201\253\343\201\246\347\204\274\343\201\213" +
|
40
|
+
"\343\202\214\343\201\246\345\253\214\343\201\253\343\201\252\343\201\243\343\201\241" +
|
41
|
+
"\343\202\203\343\201\206\343\201\247\343\201\224\343\201\226\343\202\213"
|
42
|
+
|
43
|
+
# too short non-ascii strings don't work well at NKF.guess misdeteting test
|
44
|
+
TITLE = {
|
45
|
+
:ascii => "test page",
|
46
|
+
# "Bialystok" in UTF-8, 'puts' me on UTF-8 and latin-2 font console
|
47
|
+
:latin => "Bia\305\202ystok"*100,
|
48
|
+
# dagger mark, pure iso-8859-1 doesn't contain it.
|
49
|
+
# irb1.9_on_utf8> "\342\200\240".encode('iso-8859-1') #=> UndefinedConversionError
|
50
|
+
# irb1.9_on_utf8> "\342\200\240".encode('cp1252') #=> "\x86"
|
51
|
+
:cp1252 => "dagger mark dagger mark dagger mark dagger mark \342\200\240"*5,
|
52
|
+
:utf8 => JP_TITLE,
|
53
|
+
:sjis => JP_TITLE,
|
54
|
+
# circled integer, "marutuki-suuji" in Japanese. pure SHIFT_JIS doesn't know them.
|
55
|
+
# irb1.9_on_utf8> s.encode('shift_jis') #=> Encoding::UndefinedConversionError
|
56
|
+
:cp932 => "\342\221\240\342\221\241\342\221\242\342\221\243\342\221\244"*3,
|
57
|
+
:euc => JP_TITLE,
|
58
|
+
}
|
59
|
+
def page(h)
|
60
|
+
content_type = if h[:http]
|
61
|
+
"text/html; charset=#{ENC_NAME[h[:http]]}"
|
62
|
+
else
|
63
|
+
'text/html'
|
64
|
+
end
|
65
|
+
meta = if h[:meta]
|
66
|
+
"<meta http-equiv=\"content-type\" content=\"text/html; charset=#{ENC_NAME[h[:meta]]}\">"
|
67
|
+
else
|
68
|
+
''
|
69
|
+
end
|
70
|
+
html = convert("<html>#{meta}<title>#{TITLE[h[:body]]}</title></html>", h[:body])
|
71
|
+
|
72
|
+
return Mechanize::Page.new(
|
73
|
+
URI.parse('http://www.example.com/'),
|
74
|
+
{ 'content-type' => content_type },
|
75
|
+
html,
|
76
|
+
200,
|
77
|
+
h[:agent]|| Mechanize.new)
|
78
|
+
end
|
79
|
+
|
80
|
+
def convert(str, enc)
|
81
|
+
case enc
|
82
|
+
when :ascii then
|
83
|
+
Iconv::conv('ASCII', 'UTF-8', str)
|
84
|
+
when :latin then
|
85
|
+
Iconv::conv('ISO-8859-2', 'UTF-8', str)
|
86
|
+
when :utf8 then
|
87
|
+
NKF.nkf('-Wm0w', str)
|
88
|
+
when :sjis then
|
89
|
+
NKF.nkf('-Wm0s', str)
|
90
|
+
when :euc then
|
91
|
+
NKF.nkf('-Wm0e', str)
|
92
|
+
when :cp932 then
|
93
|
+
Iconv::conv('CP932', 'UTF-8', str)
|
94
|
+
when :cp1252 then
|
95
|
+
Iconv::conv('CP1252', 'UTF-8', str)
|
96
|
+
else
|
97
|
+
str
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def err_msg1(page, mes_name)
|
102
|
+
return <<EOM
|
103
|
+
At #{mes_name},
|
104
|
+
expected: #{Marshal.dump(TITLE[@enc]).inspect}
|
105
|
+
but got : #{Marshal.dump(page.at('title').inner_text).inspect}
|
106
|
+
EOM
|
107
|
+
end
|
108
|
+
|
109
|
+
def err_msg2(page, mes_name)
|
110
|
+
return <<EOM
|
111
|
+
At #{mes_name},
|
112
|
+
expected: #{Marshal.dump(convert(TITLE[@enc], @enc)).inspect}
|
113
|
+
but got : #{Marshal.dump(page.at('title').inner_text).inspect}
|
114
|
+
EOM
|
115
|
+
end
|
116
|
+
|
117
|
+
def assert_SUCCESS(page)
|
118
|
+
if Mechanize.html_parser == Nokogiri::HTML
|
119
|
+
# Nokogiri string returns UTF-8 string
|
120
|
+
assert(TITLE[@enc] == page.at('title').inner_text, err_msg1(page, 'SUCCESS'))
|
121
|
+
else
|
122
|
+
assert(convert(TITLE[@enc], @enc) == page.parser.at('title').inner_text, err_msg3(page, 'SUCCESS'))
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def assert_FAILURE(page)
|
127
|
+
if Mechanize.html_parser == Nokogiri::HTML
|
128
|
+
assert_equal(false, TITLE[@enc] == page.at('title').inner_text, err_msg1(page, 'FAILURE 1'))
|
129
|
+
if RUBY_VERSION >= "1.9.0"
|
130
|
+
assert_equal(false, TITLE[@enc] == page.at('title').inner_text.force_encoding(::Encoding::UTF_8), err_msg2(page, 'FAILURE 2'))
|
131
|
+
end
|
132
|
+
else
|
133
|
+
# Hpricot just returns "same" byte string, so never "FAILURE".
|
134
|
+
assert(convert(TITLE[@enc], @enc) == page.parser.at('title').inner_text, err_msg2(page, 'FAILURE'))
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# M H D
|
140
|
+
# Meta - t t meta works everytime
|
141
|
+
# HTTP f - t HTTP works only when meta doesn't exist
|
142
|
+
# Dete f f - Detect works only when both of meta and HTTP don't exist
|
143
|
+
module EncodingTest
|
144
|
+
include EncodingTestPage
|
145
|
+
attr_reader :bad
|
146
|
+
|
147
|
+
def test_with_no_meta_no_http
|
148
|
+
page = page(:body => @enc)
|
149
|
+
assert_SUCCESS(page)
|
150
|
+
end
|
151
|
+
|
152
|
+
def test_with_right_meta_any_http
|
153
|
+
page = page(:body => @enc, :meta => @enc)
|
154
|
+
assert_SUCCESS(page)
|
155
|
+
page = page(:body => @enc, :meta => @enc, :http => bad)
|
156
|
+
assert_SUCCESS(page)
|
157
|
+
page = page(:body => @enc, :meta => @enc, :http => @enc)
|
158
|
+
assert_SUCCESS(page)
|
159
|
+
end
|
160
|
+
|
161
|
+
def test_with_no_meta_right_http
|
162
|
+
page = page(:body => @enc, :http => @enc)
|
163
|
+
assert_SUCCESS(page)
|
164
|
+
end
|
165
|
+
|
166
|
+
def test_failure_with_bad_meta_any_http
|
167
|
+
page = page(:body => @enc, :meta => bad)
|
168
|
+
assert_FAILURE(page)
|
169
|
+
page = page(:body => @enc, :meta => bad, :http => bad)
|
170
|
+
assert_FAILURE(page)
|
171
|
+
page = page(:body => @enc, :meta => bad, :http => @enc)
|
172
|
+
assert_FAILURE(page)
|
173
|
+
end
|
174
|
+
|
175
|
+
def test_failure_with_no_meta_bad_http
|
176
|
+
page = page(:body => @enc, :http => bad)
|
177
|
+
assert_FAILURE(page)
|
178
|
+
end
|
179
|
+
|
180
|
+
def test_overwrite_encoding
|
181
|
+
page = page(:body => @enc, :meta => bad)
|
182
|
+
page.encoding = ENC_NAME[@enc]
|
183
|
+
assert_SUCCESS(page)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
class ASCIITest < Test::Unit::TestCase
|
188
|
+
include EncodingTest
|
189
|
+
def setup ; @enc, @bad = :ascii, :utf8 ; end
|
190
|
+
|
191
|
+
# ASCII successes at all the case
|
192
|
+
alias :assert_FAILURE :assert_SUCCESS
|
193
|
+
end
|
194
|
+
|
195
|
+
class LatinTest < Test::Unit::TestCase
|
196
|
+
include EncodingTest
|
197
|
+
def setup ; @enc, @bad = :latin, :utf8 ; end
|
198
|
+
|
199
|
+
# Latin chars are misdetected to Japanese 'Shift_JIS' by NKF.guess
|
200
|
+
undef :test_with_no_meta_no_http
|
201
|
+
def test_failure_with_no_meta_no_http_cause_of_detect_charset_mistake
|
202
|
+
page = page(:body => @enc)
|
203
|
+
assert_FAILURE(page)
|
204
|
+
end
|
205
|
+
end
|
206
|
+
|
207
|
+
class CP1252Test < Test::Unit::TestCase
|
208
|
+
include EncodingTest
|
209
|
+
def setup ; @enc, @bad = :cp1252, :utf8 ; end
|
210
|
+
end
|
211
|
+
|
212
|
+
class UTF8Test < Test::Unit::TestCase
|
213
|
+
include EncodingTest
|
214
|
+
def setup ; @enc, @bad = :utf8, :sjis ; end
|
215
|
+
end
|
216
|
+
|
217
|
+
class ShiftJISTest < Test::Unit::TestCase
|
218
|
+
include EncodingTest
|
219
|
+
def setup ; @enc, @bad = :sjis, :utf8 ; end
|
220
|
+
end
|
221
|
+
|
222
|
+
class CP932Test < Test::Unit::TestCase
|
223
|
+
include EncodingTest
|
224
|
+
def setup ; @enc, @bad = :cp932, :utf8 ; end
|
225
|
+
end
|
226
|
+
|
227
|
+
class EUCJPTest < Test::Unit::TestCase
|
228
|
+
include EncodingTest
|
229
|
+
def setup ; @enc, @bad = :euc, :utf8 ; end
|
230
|
+
end
|
231
|
+
|
232
|
+
# =====================================
|
233
|
+
|
234
|
+
class Etc_Test < Test::Unit::TestCase
|
235
|
+
|
236
|
+
include EncodingTestPage
|
237
|
+
|
238
|
+
def setup
|
239
|
+
@agent = Mechanize.new
|
240
|
+
end
|
241
|
+
|
242
|
+
def test_page_meta_encoding
|
243
|
+
page = page(:body => :latin, :meta => :utf8, :http => :latin)
|
244
|
+
assert_equal('utf-8', page.meta_encoding)
|
245
|
+
end
|
246
|
+
|
247
|
+
def test_page_meta_encoding_as_is
|
248
|
+
page = page(:body => :latin, :meta => :utf8_upcase, :http => :latin)
|
249
|
+
assert_equal('UTF-8', page.meta_encoding)
|
250
|
+
page = page(:body => :latin, :meta => :unknown, :http => :latin)
|
251
|
+
assert_equal('*unknown*', page.meta_encoding)
|
252
|
+
end
|
253
|
+
|
254
|
+
def test_page_http_encoding
|
255
|
+
page = page(:body => :latin, :http => :utf8)
|
256
|
+
assert_equal('utf-8', page.http_encoding)
|
257
|
+
end
|
258
|
+
|
259
|
+
def test_page_http_encoding_as_is
|
260
|
+
page = page(:body => :latin, :http => :utf8_upcase)
|
261
|
+
assert_equal('UTF-8', page.http_encoding)
|
262
|
+
page = page(:body => :latin, :http => :unknown)
|
263
|
+
assert_equal('*unknown*', page.http_encoding)
|
264
|
+
end
|
265
|
+
|
266
|
+
def test_page_body_encoding
|
267
|
+
page = page(:body => :utf8)
|
268
|
+
assert_equal('UTF-8', page.body_encoding)
|
269
|
+
end
|
270
|
+
|
271
|
+
def test_post_page_hook
|
272
|
+
@enc = :latin
|
273
|
+
page = page(:body => :latin)
|
274
|
+
assert_FAILURE(page)
|
275
|
+
@agent.post_page_hooks << lambda{|p| p.encoding = p.http_charset}
|
276
|
+
page = page(:body => :latin, :http => :latin, :agent => @agent)
|
277
|
+
assert_SUCCESS(page)
|
278
|
+
end
|
279
|
+
|
280
|
+
def test_reset_parser
|
281
|
+
data = {
|
282
|
+
:title => "\343\202\277\343\202\244\343\203\210\343\203\253",
|
283
|
+
:link => "\343\203\252\343\203\263\343\202\257"} # "title", "link" in japanese utf-8
|
284
|
+
page = Mechanize::Page.new(
|
285
|
+
URI.parse('http://www.example.com/'),
|
286
|
+
{ 'content-type' => 'text/html; charset=SHIFT_JIS' },
|
287
|
+
"<html><title>#{data[:title]}</title><a href=\"/\">#{data[:link]}</a></html>",
|
288
|
+
200,
|
289
|
+
@agent)
|
290
|
+
|
291
|
+
bad = [page.title, page.links[0].text]
|
292
|
+
page.encoding = 'utf-8' # correct encoding
|
293
|
+
good = [page.title, page.links[0].text]
|
294
|
+
|
295
|
+
assert_not_equal(good, bad) # Page#encoding resets title and links
|
296
|
+
assert_equal([data[:title], data[:link]], good) # correct encoding
|
297
|
+
end
|
298
|
+
end
|