kitamomonga-mechanize 0.9.3.20090724215219
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +504 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +176 -0
- data/README.rdoc +60 -0
- data/Rakefile +33 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +666 -0
- data/lib/mechanize/chain.rb +34 -0
- data/lib/mechanize/chain/auth_headers.rb +78 -0
- data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
- data/lib/mechanize/chain/connection_resolver.rb +76 -0
- data/lib/mechanize/chain/custom_headers.rb +21 -0
- data/lib/mechanize/chain/handler.rb +9 -0
- data/lib/mechanize/chain/header_resolver.rb +51 -0
- data/lib/mechanize/chain/parameter_resolver.rb +22 -0
- data/lib/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/mechanize/chain/post_page_hook.rb +18 -0
- data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
- data/lib/mechanize/chain/request_resolver.rb +30 -0
- data/lib/mechanize/chain/response_body_parser.rb +38 -0
- data/lib/mechanize/chain/response_header_handler.rb +48 -0
- data/lib/mechanize/chain/response_reader.rb +39 -0
- data/lib/mechanize/chain/ssl_resolver.rb +40 -0
- data/lib/mechanize/chain/uri_resolver.rb +75 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +70 -0
- data/lib/mechanize/cookie_jar.rb +188 -0
- data/lib/mechanize/file.rb +71 -0
- data/lib/mechanize/file_response.rb +60 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +364 -0
- data/lib/mechanize/form/button.rb +7 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +26 -0
- data/lib/mechanize/form/file_upload.rb +22 -0
- data/lib/mechanize/form/image_button.rb +21 -0
- data/lib/mechanize/form/multi_select_list.rb +67 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +36 -0
- data/lib/mechanize/form/select_list.rb +43 -0
- data/lib/mechanize/headers.rb +11 -0
- data/lib/mechanize/history.rb +65 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/mechanize/monkey_patch.rb +35 -0
- data/lib/mechanize/page.rb +279 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/encoding.rb +61 -0
- data/lib/mechanize/page/frame.rb +20 -0
- data/lib/mechanize/page/link.rb +53 -0
- data/lib/mechanize/page/meta.rb +50 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +73 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +19 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +577 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +73 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +127 -0
- data/test/test_mech.rb +289 -0
- data/test/test_mechanize_file.rb +72 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +127 -0
- data/test/test_page_encoding.rb +298 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +39 -0
- data/test/test_redirect_verb_handling.rb +43 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +103 -0
- data/test/test_scheme.rb +63 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +320 -0
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
|
2
|
+
|
|
3
|
+
class TestNoAttributes < Test::Unit::TestCase
|
|
4
|
+
def setup
|
|
5
|
+
@agent = Mechanize.new
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def test_parse_no_attributes
|
|
9
|
+
assert_nothing_raised do
|
|
10
|
+
page = @agent.get('http://localhost/tc_no_attributes.html')
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
data/test/test_option.rb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
|
2
|
+
|
|
3
|
+
class OptionTest < Test::Unit::TestCase
|
|
4
|
+
class FakeAttribute < Hash
|
|
5
|
+
attr_reader :inner_text
|
|
6
|
+
def initialize(inner_text)
|
|
7
|
+
@inner_text = inner_text
|
|
8
|
+
end
|
|
9
|
+
alias :has_attribute? :has_key?
|
|
10
|
+
alias :attributes :keys
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def test_option_missing_value
|
|
14
|
+
attribute = FakeAttribute.new('blah')
|
|
15
|
+
option = Mechanize::Form::Option.new(attribute, nil)
|
|
16
|
+
assert_equal('blah', option.value)
|
|
17
|
+
end
|
|
18
|
+
end
|
data/test/test_page.rb
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
|
2
|
+
|
|
3
|
+
require 'cgi'
|
|
4
|
+
|
|
5
|
+
class TestPage < Test::Unit::TestCase
|
|
6
|
+
def setup
|
|
7
|
+
@agent = Mechanize.new
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def test_page_gets_charset_from_page
|
|
11
|
+
page = @agent.get("http://localhost/tc_charset.html")
|
|
12
|
+
assert_equal 'windows-1255', page.encoding
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def test_double_semicolon
|
|
16
|
+
page = @agent.get("http://localhost/http_headers?content-disposition=#{CGI.escape('attachment;; filename=fooooo')}")
|
|
17
|
+
assert page.parser
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def test_broken_charset
|
|
21
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=akldsjfhaldjfksh')}")
|
|
22
|
+
assert page.parser
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_mostly_broken_charset
|
|
26
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=ISO_8859-1')}")
|
|
27
|
+
assert_equal 'ISO_8859-1', page.encoding
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def test_another_mostly_broken_charset
|
|
31
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=UTF8')}")
|
|
32
|
+
assert_equal 'UTF8', page.parser.encoding
|
|
33
|
+
assert_equal 'UTF8', page.encoding
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def test_upper_case_content_type
|
|
37
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/HTML')}")
|
|
38
|
+
assert_instance_of Mechanize::Page, page
|
|
39
|
+
assert_equal 'text/HTML', page.content_type
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def test_encoding_override_before_parser_initialized
|
|
43
|
+
# document has a bad encoding information - windows-1255
|
|
44
|
+
page = @agent.get("http://localhost/tc_bad_charset.html")
|
|
45
|
+
# encoding is wrong, so user wants to force ISO-8859-2
|
|
46
|
+
page.encoding = 'ISO-8859-2'
|
|
47
|
+
assert_equal 'ISO-8859-2', page.encoding
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def test_encoding_override_after_parser_was_initialized
|
|
51
|
+
# document has a bad encoding information - windows-1255
|
|
52
|
+
page = @agent.get("http://localhost/tc_bad_charset.html")
|
|
53
|
+
page.parser
|
|
54
|
+
# autodetection sets encoding to windows-1255
|
|
55
|
+
assert_equal 'windows-1255', page.encoding
|
|
56
|
+
# encoding is wrong, so user wants to force ISO-8859-2
|
|
57
|
+
page.encoding = 'ISO-8859-2'
|
|
58
|
+
assert_equal 'ISO-8859-2', page.encoding
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def test_page_gets_charset_sent_by_server
|
|
62
|
+
page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=UTF-8')}")
|
|
63
|
+
assert_equal 'UTF-8', page.encoding
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def test_set_encoding
|
|
67
|
+
page = @agent.get("http://localhost/file_upload.html")
|
|
68
|
+
page.encoding = 'UTF-8'
|
|
69
|
+
assert_equal 'UTF-8', page.parser.encoding
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def test_page_gets_yielded
|
|
73
|
+
pages = nil
|
|
74
|
+
@agent.get("http://localhost/file_upload.html") { |page|
|
|
75
|
+
pages = page
|
|
76
|
+
}
|
|
77
|
+
assert pages
|
|
78
|
+
assert_equal('File Upload Form', pages.title)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def test_title
|
|
82
|
+
page = @agent.get("http://localhost/file_upload.html")
|
|
83
|
+
assert_equal('File Upload Form', page.title)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def test_no_title
|
|
87
|
+
page = @agent.get("http://localhost/no_title_test.html")
|
|
88
|
+
assert_equal(nil, page.title)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def test_page_decoded_with_charset
|
|
92
|
+
page = Mechanize::Page.new(
|
|
93
|
+
URI.parse('http://tenderlovemaking.com/'),
|
|
94
|
+
{ 'content-type' => 'text/html; charset=EUC-JP' },
|
|
95
|
+
'<html><body>hello</body></html>',
|
|
96
|
+
400,
|
|
97
|
+
@agent
|
|
98
|
+
)
|
|
99
|
+
assert_equal 'EUC-JP', page.parser.encoding
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def test_find_form_with_hash
|
|
103
|
+
page = @agent.get("http://localhost/tc_form_action.html")
|
|
104
|
+
form = page.form(:name => 'post_form1')
|
|
105
|
+
assert form
|
|
106
|
+
yielded = false
|
|
107
|
+
form = page.form(:name => 'post_form1') { |f|
|
|
108
|
+
yielded = true
|
|
109
|
+
assert f
|
|
110
|
+
assert_equal(form, f)
|
|
111
|
+
}
|
|
112
|
+
assert yielded
|
|
113
|
+
|
|
114
|
+
form_by_action = page.form(:action => '/form_post?a=b&b=c')
|
|
115
|
+
assert form_by_action
|
|
116
|
+
assert_equal(form, form_by_action)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def test_find_form_with_attribute_class_or_id
|
|
120
|
+
page = @agent.get('http://localhost:2000/form_no_action.html')
|
|
121
|
+
form = page.form_with(:class => 'formclass')
|
|
122
|
+
assert_equal(form, page.forms[0])
|
|
123
|
+
form = page.form_with(:id => 'formid')
|
|
124
|
+
assert_equal(form, page.forms[0])
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
|
|
3
|
+
|
|
4
|
+
# require 'hpricot'
|
|
5
|
+
# Mechanize.html_parser = Hpricot
|
|
6
|
+
|
|
7
|
+
require 'test/unit'
|
|
8
|
+
require 'kconv'
|
|
9
|
+
require 'iconv'
|
|
10
|
+
|
|
11
|
+
module EncodingTestPage
|
|
12
|
+
if RUBY_VERSION >= "1.9.0"
|
|
13
|
+
BODY_ENC_PAIR = {
|
|
14
|
+
:utf8 => ::Encoding::UTF_8,
|
|
15
|
+
:ascii => ::Encoding::US_ASCII,
|
|
16
|
+
:latin => ::Encoding::ISO_8859_2,
|
|
17
|
+
:cp1252 => ::Encoding::CP1252,
|
|
18
|
+
:sjis => ::Encoding::SHIFT_JIS,
|
|
19
|
+
:euc => ::Encoding::EUC_JP,
|
|
20
|
+
:cp932 => ::Encoding::CP932,
|
|
21
|
+
:bin => ::Encoding::ASCII_8BIT}
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
ENC_NAME = {
|
|
25
|
+
:utf8 => 'utf-8',
|
|
26
|
+
:ascii => 'us-ascii',
|
|
27
|
+
:latin => 'iso-8859-2',
|
|
28
|
+
:cp1252 => 'cp1252',
|
|
29
|
+
:sjis => 'shift_jis',
|
|
30
|
+
:euc => 'euc-jp',
|
|
31
|
+
:cp932 => 'cp932',
|
|
32
|
+
:utf8_upcase => 'UTF-8',
|
|
33
|
+
:unknown => '*unknown*'}
|
|
34
|
+
|
|
35
|
+
JP_TITLE = # other JP string may causes FAILURE, but it's due to misdetection of NKF.guess itself.
|
|
36
|
+
# This is Mechanize test, so don't modify me.
|
|
37
|
+
"\346\235\245\343\202\213\346\227\245\343\202\202\346\235\245\343\202\213\346\227\245" +
|
|
38
|
+
"\343\202\202\346\211\213\345\211\215\343\201\251\343\202\202\343\201\257\351\211\204" +
|
|
39
|
+
"\346\235\277\343\201\256\344\270\212\343\201\253\343\201\246\347\204\274\343\201\213" +
|
|
40
|
+
"\343\202\214\343\201\246\345\253\214\343\201\253\343\201\252\343\201\243\343\201\241" +
|
|
41
|
+
"\343\202\203\343\201\206\343\201\247\343\201\224\343\201\226\343\202\213"
|
|
42
|
+
|
|
43
|
+
# too short non-ascii strings don't work well at NKF.guess misdeteting test
|
|
44
|
+
TITLE = {
|
|
45
|
+
:ascii => "test page",
|
|
46
|
+
# "Bialystok" in UTF-8, 'puts' me on UTF-8 and latin-2 font console
|
|
47
|
+
:latin => "Bia\305\202ystok"*100,
|
|
48
|
+
# dagger mark, pure iso-8859-1 doesn't contain it.
|
|
49
|
+
# irb1.9_on_utf8> "\342\200\240".encode('iso-8859-1') #=> UndefinedConversionError
|
|
50
|
+
# irb1.9_on_utf8> "\342\200\240".encode('cp1252') #=> "\x86"
|
|
51
|
+
:cp1252 => "dagger mark dagger mark dagger mark dagger mark \342\200\240"*5,
|
|
52
|
+
:utf8 => JP_TITLE,
|
|
53
|
+
:sjis => JP_TITLE,
|
|
54
|
+
# circled integer, "marutuki-suuji" in Japanese. pure SHIFT_JIS doesn't know them.
|
|
55
|
+
# irb1.9_on_utf8> s.encode('shift_jis') #=> Encoding::UndefinedConversionError
|
|
56
|
+
:cp932 => "\342\221\240\342\221\241\342\221\242\342\221\243\342\221\244"*3,
|
|
57
|
+
:euc => JP_TITLE,
|
|
58
|
+
}
|
|
59
|
+
def page(h)
|
|
60
|
+
content_type = if h[:http]
|
|
61
|
+
"text/html; charset=#{ENC_NAME[h[:http]]}"
|
|
62
|
+
else
|
|
63
|
+
'text/html'
|
|
64
|
+
end
|
|
65
|
+
meta = if h[:meta]
|
|
66
|
+
"<meta http-equiv=\"content-type\" content=\"text/html; charset=#{ENC_NAME[h[:meta]]}\">"
|
|
67
|
+
else
|
|
68
|
+
''
|
|
69
|
+
end
|
|
70
|
+
html = convert("<html>#{meta}<title>#{TITLE[h[:body]]}</title></html>", h[:body])
|
|
71
|
+
|
|
72
|
+
return Mechanize::Page.new(
|
|
73
|
+
URI.parse('http://www.example.com/'),
|
|
74
|
+
{ 'content-type' => content_type },
|
|
75
|
+
html,
|
|
76
|
+
200,
|
|
77
|
+
h[:agent]|| Mechanize.new)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def convert(str, enc)
|
|
81
|
+
case enc
|
|
82
|
+
when :ascii then
|
|
83
|
+
Iconv::conv('ASCII', 'UTF-8', str)
|
|
84
|
+
when :latin then
|
|
85
|
+
Iconv::conv('ISO-8859-2', 'UTF-8', str)
|
|
86
|
+
when :utf8 then
|
|
87
|
+
NKF.nkf('-Wm0w', str)
|
|
88
|
+
when :sjis then
|
|
89
|
+
NKF.nkf('-Wm0s', str)
|
|
90
|
+
when :euc then
|
|
91
|
+
NKF.nkf('-Wm0e', str)
|
|
92
|
+
when :cp932 then
|
|
93
|
+
Iconv::conv('CP932', 'UTF-8', str)
|
|
94
|
+
when :cp1252 then
|
|
95
|
+
Iconv::conv('CP1252', 'UTF-8', str)
|
|
96
|
+
else
|
|
97
|
+
str
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def err_msg1(page, mes_name)
|
|
102
|
+
return <<EOM
|
|
103
|
+
At #{mes_name},
|
|
104
|
+
expected: #{Marshal.dump(TITLE[@enc]).inspect}
|
|
105
|
+
but got : #{Marshal.dump(page.at('title').inner_text).inspect}
|
|
106
|
+
EOM
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def err_msg2(page, mes_name)
|
|
110
|
+
return <<EOM
|
|
111
|
+
At #{mes_name},
|
|
112
|
+
expected: #{Marshal.dump(convert(TITLE[@enc], @enc)).inspect}
|
|
113
|
+
but got : #{Marshal.dump(page.at('title').inner_text).inspect}
|
|
114
|
+
EOM
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def assert_SUCCESS(page)
|
|
118
|
+
if Mechanize.html_parser == Nokogiri::HTML
|
|
119
|
+
# Nokogiri string returns UTF-8 string
|
|
120
|
+
assert(TITLE[@enc] == page.at('title').inner_text, err_msg1(page, 'SUCCESS'))
|
|
121
|
+
else
|
|
122
|
+
assert(convert(TITLE[@enc], @enc) == page.parser.at('title').inner_text, err_msg3(page, 'SUCCESS'))
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def assert_FAILURE(page)
|
|
127
|
+
if Mechanize.html_parser == Nokogiri::HTML
|
|
128
|
+
assert_equal(false, TITLE[@enc] == page.at('title').inner_text, err_msg1(page, 'FAILURE 1'))
|
|
129
|
+
if RUBY_VERSION >= "1.9.0"
|
|
130
|
+
assert_equal(false, TITLE[@enc] == page.at('title').inner_text.force_encoding(::Encoding::UTF_8), err_msg2(page, 'FAILURE 2'))
|
|
131
|
+
end
|
|
132
|
+
else
|
|
133
|
+
# Hpricot just returns "same" byte string, so never "FAILURE".
|
|
134
|
+
assert(convert(TITLE[@enc], @enc) == page.parser.at('title').inner_text, err_msg2(page, 'FAILURE'))
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# M H D
|
|
140
|
+
# Meta - t t meta works everytime
|
|
141
|
+
# HTTP f - t HTTP works only when meta doesn't exist
|
|
142
|
+
# Dete f f - Detect works only when both of meta and HTTP don't exist
|
|
143
|
+
module EncodingTest
|
|
144
|
+
include EncodingTestPage
|
|
145
|
+
attr_reader :bad
|
|
146
|
+
|
|
147
|
+
def test_with_no_meta_no_http
|
|
148
|
+
page = page(:body => @enc)
|
|
149
|
+
assert_SUCCESS(page)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def test_with_right_meta_any_http
|
|
153
|
+
page = page(:body => @enc, :meta => @enc)
|
|
154
|
+
assert_SUCCESS(page)
|
|
155
|
+
page = page(:body => @enc, :meta => @enc, :http => bad)
|
|
156
|
+
assert_SUCCESS(page)
|
|
157
|
+
page = page(:body => @enc, :meta => @enc, :http => @enc)
|
|
158
|
+
assert_SUCCESS(page)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def test_with_no_meta_right_http
|
|
162
|
+
page = page(:body => @enc, :http => @enc)
|
|
163
|
+
assert_SUCCESS(page)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def test_failure_with_bad_meta_any_http
|
|
167
|
+
page = page(:body => @enc, :meta => bad)
|
|
168
|
+
assert_FAILURE(page)
|
|
169
|
+
page = page(:body => @enc, :meta => bad, :http => bad)
|
|
170
|
+
assert_FAILURE(page)
|
|
171
|
+
page = page(:body => @enc, :meta => bad, :http => @enc)
|
|
172
|
+
assert_FAILURE(page)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def test_failure_with_no_meta_bad_http
|
|
176
|
+
page = page(:body => @enc, :http => bad)
|
|
177
|
+
assert_FAILURE(page)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def test_overwrite_encoding
|
|
181
|
+
page = page(:body => @enc, :meta => bad)
|
|
182
|
+
page.encoding = ENC_NAME[@enc]
|
|
183
|
+
assert_SUCCESS(page)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
class ASCIITest < Test::Unit::TestCase
|
|
188
|
+
include EncodingTest
|
|
189
|
+
def setup ; @enc, @bad = :ascii, :utf8 ; end
|
|
190
|
+
|
|
191
|
+
# ASCII successes at all the case
|
|
192
|
+
alias :assert_FAILURE :assert_SUCCESS
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
class LatinTest < Test::Unit::TestCase
|
|
196
|
+
include EncodingTest
|
|
197
|
+
def setup ; @enc, @bad = :latin, :utf8 ; end
|
|
198
|
+
|
|
199
|
+
# Latin chars are misdetected to Japanese 'Shift_JIS' by NKF.guess
|
|
200
|
+
undef :test_with_no_meta_no_http
|
|
201
|
+
def test_failure_with_no_meta_no_http_cause_of_detect_charset_mistake
|
|
202
|
+
page = page(:body => @enc)
|
|
203
|
+
assert_FAILURE(page)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
class CP1252Test < Test::Unit::TestCase
|
|
208
|
+
include EncodingTest
|
|
209
|
+
def setup ; @enc, @bad = :cp1252, :utf8 ; end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
class UTF8Test < Test::Unit::TestCase
|
|
213
|
+
include EncodingTest
|
|
214
|
+
def setup ; @enc, @bad = :utf8, :sjis ; end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
class ShiftJISTest < Test::Unit::TestCase
|
|
218
|
+
include EncodingTest
|
|
219
|
+
def setup ; @enc, @bad = :sjis, :utf8 ; end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
class CP932Test < Test::Unit::TestCase
|
|
223
|
+
include EncodingTest
|
|
224
|
+
def setup ; @enc, @bad = :cp932, :utf8 ; end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
class EUCJPTest < Test::Unit::TestCase
|
|
228
|
+
include EncodingTest
|
|
229
|
+
def setup ; @enc, @bad = :euc, :utf8 ; end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# =====================================
|
|
233
|
+
|
|
234
|
+
class Etc_Test < Test::Unit::TestCase
|
|
235
|
+
|
|
236
|
+
include EncodingTestPage
|
|
237
|
+
|
|
238
|
+
def setup
|
|
239
|
+
@agent = Mechanize.new
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def test_page_meta_encoding
|
|
243
|
+
page = page(:body => :latin, :meta => :utf8, :http => :latin)
|
|
244
|
+
assert_equal('utf-8', page.meta_encoding)
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def test_page_meta_encoding_as_is
|
|
248
|
+
page = page(:body => :latin, :meta => :utf8_upcase, :http => :latin)
|
|
249
|
+
assert_equal('UTF-8', page.meta_encoding)
|
|
250
|
+
page = page(:body => :latin, :meta => :unknown, :http => :latin)
|
|
251
|
+
assert_equal('*unknown*', page.meta_encoding)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def test_page_http_encoding
|
|
255
|
+
page = page(:body => :latin, :http => :utf8)
|
|
256
|
+
assert_equal('utf-8', page.http_encoding)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def test_page_http_encoding_as_is
|
|
260
|
+
page = page(:body => :latin, :http => :utf8_upcase)
|
|
261
|
+
assert_equal('UTF-8', page.http_encoding)
|
|
262
|
+
page = page(:body => :latin, :http => :unknown)
|
|
263
|
+
assert_equal('*unknown*', page.http_encoding)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def test_page_body_encoding
|
|
267
|
+
page = page(:body => :utf8)
|
|
268
|
+
assert_equal('UTF-8', page.body_encoding)
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def test_post_page_hook
|
|
272
|
+
@enc = :latin
|
|
273
|
+
page = page(:body => :latin)
|
|
274
|
+
assert_FAILURE(page)
|
|
275
|
+
@agent.post_page_hooks << lambda{|p| p.encoding = p.http_charset}
|
|
276
|
+
page = page(:body => :latin, :http => :latin, :agent => @agent)
|
|
277
|
+
assert_SUCCESS(page)
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def test_reset_parser
|
|
281
|
+
data = {
|
|
282
|
+
:title => "\343\202\277\343\202\244\343\203\210\343\203\253",
|
|
283
|
+
:link => "\343\203\252\343\203\263\343\202\257"} # "title", "link" in japanese utf-8
|
|
284
|
+
page = Mechanize::Page.new(
|
|
285
|
+
URI.parse('http://www.example.com/'),
|
|
286
|
+
{ 'content-type' => 'text/html; charset=SHIFT_JIS' },
|
|
287
|
+
"<html><title>#{data[:title]}</title><a href=\"/\">#{data[:link]}</a></html>",
|
|
288
|
+
200,
|
|
289
|
+
@agent)
|
|
290
|
+
|
|
291
|
+
bad = [page.title, page.links[0].text]
|
|
292
|
+
page.encoding = 'utf-8' # correct encoding
|
|
293
|
+
good = [page.title, page.links[0].text]
|
|
294
|
+
|
|
295
|
+
assert_not_equal(good, bad) # Page#encoding resets title and links
|
|
296
|
+
assert_equal([data[:title], data[:link]], good) # correct encoding
|
|
297
|
+
end
|
|
298
|
+
end
|