kitamomonga-mechanize 0.9.3.20090724215219

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. data/CHANGELOG.rdoc +504 -0
  2. data/EXAMPLES.rdoc +171 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +122 -0
  5. data/LICENSE.rdoc +340 -0
  6. data/Manifest.txt +176 -0
  7. data/README.rdoc +60 -0
  8. data/Rakefile +33 -0
  9. data/examples/flickr_upload.rb +23 -0
  10. data/examples/mech-dump.rb +7 -0
  11. data/examples/proxy_req.rb +9 -0
  12. data/examples/rubyforge.rb +21 -0
  13. data/examples/spider.rb +11 -0
  14. data/lib/mechanize.rb +666 -0
  15. data/lib/mechanize/chain.rb +34 -0
  16. data/lib/mechanize/chain/auth_headers.rb +78 -0
  17. data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
  18. data/lib/mechanize/chain/connection_resolver.rb +76 -0
  19. data/lib/mechanize/chain/custom_headers.rb +21 -0
  20. data/lib/mechanize/chain/handler.rb +9 -0
  21. data/lib/mechanize/chain/header_resolver.rb +51 -0
  22. data/lib/mechanize/chain/parameter_resolver.rb +22 -0
  23. data/lib/mechanize/chain/post_connect_hook.rb +0 -0
  24. data/lib/mechanize/chain/post_page_hook.rb +18 -0
  25. data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
  26. data/lib/mechanize/chain/request_resolver.rb +30 -0
  27. data/lib/mechanize/chain/response_body_parser.rb +38 -0
  28. data/lib/mechanize/chain/response_header_handler.rb +48 -0
  29. data/lib/mechanize/chain/response_reader.rb +39 -0
  30. data/lib/mechanize/chain/ssl_resolver.rb +40 -0
  31. data/lib/mechanize/chain/uri_resolver.rb +75 -0
  32. data/lib/mechanize/content_type_error.rb +14 -0
  33. data/lib/mechanize/cookie.rb +70 -0
  34. data/lib/mechanize/cookie_jar.rb +188 -0
  35. data/lib/mechanize/file.rb +71 -0
  36. data/lib/mechanize/file_response.rb +60 -0
  37. data/lib/mechanize/file_saver.rb +37 -0
  38. data/lib/mechanize/form.rb +364 -0
  39. data/lib/mechanize/form/button.rb +7 -0
  40. data/lib/mechanize/form/check_box.rb +11 -0
  41. data/lib/mechanize/form/field.rb +26 -0
  42. data/lib/mechanize/form/file_upload.rb +22 -0
  43. data/lib/mechanize/form/image_button.rb +21 -0
  44. data/lib/mechanize/form/multi_select_list.rb +67 -0
  45. data/lib/mechanize/form/option.rb +49 -0
  46. data/lib/mechanize/form/radio_button.rb +36 -0
  47. data/lib/mechanize/form/select_list.rb +43 -0
  48. data/lib/mechanize/headers.rb +11 -0
  49. data/lib/mechanize/history.rb +65 -0
  50. data/lib/mechanize/inspect.rb +88 -0
  51. data/lib/mechanize/monkey_patch.rb +35 -0
  52. data/lib/mechanize/page.rb +279 -0
  53. data/lib/mechanize/page/base.rb +8 -0
  54. data/lib/mechanize/page/encoding.rb +61 -0
  55. data/lib/mechanize/page/frame.rb +20 -0
  56. data/lib/mechanize/page/link.rb +53 -0
  57. data/lib/mechanize/page/meta.rb +50 -0
  58. data/lib/mechanize/pluggable_parsers.rb +101 -0
  59. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  60. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
  61. data/lib/mechanize/response_code_error.rb +22 -0
  62. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  63. data/lib/mechanize/util.rb +73 -0
  64. data/test/chain/test_argument_validator.rb +14 -0
  65. data/test/chain/test_auth_headers.rb +25 -0
  66. data/test/chain/test_custom_headers.rb +18 -0
  67. data/test/chain/test_header_resolver.rb +28 -0
  68. data/test/chain/test_parameter_resolver.rb +35 -0
  69. data/test/chain/test_request_resolver.rb +29 -0
  70. data/test/chain/test_response_reader.rb +24 -0
  71. data/test/data/htpasswd +1 -0
  72. data/test/data/server.crt +16 -0
  73. data/test/data/server.csr +12 -0
  74. data/test/data/server.key +15 -0
  75. data/test/data/server.pem +15 -0
  76. data/test/helper.rb +129 -0
  77. data/test/htdocs/alt_text.html +10 -0
  78. data/test/htdocs/bad_form_test.html +9 -0
  79. data/test/htdocs/button.jpg +0 -0
  80. data/test/htdocs/empty_form.html +6 -0
  81. data/test/htdocs/file_upload.html +26 -0
  82. data/test/htdocs/find_link.html +41 -0
  83. data/test/htdocs/form_multi_select.html +16 -0
  84. data/test/htdocs/form_multival.html +37 -0
  85. data/test/htdocs/form_no_action.html +18 -0
  86. data/test/htdocs/form_no_input_name.html +16 -0
  87. data/test/htdocs/form_select.html +16 -0
  88. data/test/htdocs/form_select_all.html +16 -0
  89. data/test/htdocs/form_select_none.html +17 -0
  90. data/test/htdocs/form_select_noopts.html +10 -0
  91. data/test/htdocs/form_set_fields.html +14 -0
  92. data/test/htdocs/form_test.html +188 -0
  93. data/test/htdocs/frame_test.html +30 -0
  94. data/test/htdocs/google.html +13 -0
  95. data/test/htdocs/iframe_test.html +16 -0
  96. data/test/htdocs/index.html +6 -0
  97. data/test/htdocs/link with space.html +5 -0
  98. data/test/htdocs/meta_cookie.html +11 -0
  99. data/test/htdocs/no_title_test.html +6 -0
  100. data/test/htdocs/relative/tc_relative_links.html +21 -0
  101. data/test/htdocs/tc_bad_charset.html +9 -0
  102. data/test/htdocs/tc_bad_links.html +5 -0
  103. data/test/htdocs/tc_base_link.html +8 -0
  104. data/test/htdocs/tc_blank_form.html +11 -0
  105. data/test/htdocs/tc_charset.html +6 -0
  106. data/test/htdocs/tc_checkboxes.html +19 -0
  107. data/test/htdocs/tc_encoded_links.html +5 -0
  108. data/test/htdocs/tc_follow_meta.html +8 -0
  109. data/test/htdocs/tc_form_action.html +48 -0
  110. data/test/htdocs/tc_links.html +19 -0
  111. data/test/htdocs/tc_no_attributes.html +16 -0
  112. data/test/htdocs/tc_pretty_print.html +17 -0
  113. data/test/htdocs/tc_radiobuttons.html +17 -0
  114. data/test/htdocs/tc_referer.html +10 -0
  115. data/test/htdocs/tc_relative_links.html +19 -0
  116. data/test/htdocs/tc_textarea.html +23 -0
  117. data/test/htdocs/test_bad_encoding.html +52 -0
  118. data/test/htdocs/unusual______.html +5 -0
  119. data/test/servlets.rb +365 -0
  120. data/test/ssl_server.rb +48 -0
  121. data/test/test_authenticate.rb +71 -0
  122. data/test/test_bad_links.rb +25 -0
  123. data/test/test_blank_form.rb +16 -0
  124. data/test/test_checkboxes.rb +61 -0
  125. data/test/test_content_type.rb +13 -0
  126. data/test/test_cookie_class.rb +338 -0
  127. data/test/test_cookie_jar.rb +362 -0
  128. data/test/test_cookies.rb +123 -0
  129. data/test/test_encoded_links.rb +20 -0
  130. data/test/test_errors.rb +49 -0
  131. data/test/test_follow_meta.rb +108 -0
  132. data/test/test_form_action.rb +52 -0
  133. data/test/test_form_as_hash.rb +61 -0
  134. data/test/test_form_button.rb +38 -0
  135. data/test/test_form_no_inputname.rb +15 -0
  136. data/test/test_forms.rb +577 -0
  137. data/test/test_frames.rb +25 -0
  138. data/test/test_get_headers.rb +73 -0
  139. data/test/test_gzipping.rb +22 -0
  140. data/test/test_hash_api.rb +45 -0
  141. data/test/test_history.rb +142 -0
  142. data/test/test_history_added.rb +16 -0
  143. data/test/test_html_unscape_forms.rb +39 -0
  144. data/test/test_if_modified_since.rb +20 -0
  145. data/test/test_keep_alive.rb +31 -0
  146. data/test/test_links.rb +127 -0
  147. data/test/test_mech.rb +289 -0
  148. data/test/test_mechanize_file.rb +72 -0
  149. data/test/test_meta.rb +65 -0
  150. data/test/test_multi_select.rb +106 -0
  151. data/test/test_no_attributes.rb +13 -0
  152. data/test/test_option.rb +18 -0
  153. data/test/test_page.rb +127 -0
  154. data/test/test_page_encoding.rb +298 -0
  155. data/test/test_pluggable_parser.rb +145 -0
  156. data/test/test_post_form.rb +34 -0
  157. data/test/test_pretty_print.rb +22 -0
  158. data/test/test_radiobutton.rb +75 -0
  159. data/test/test_redirect_limit_reached.rb +39 -0
  160. data/test/test_redirect_verb_handling.rb +43 -0
  161. data/test/test_referer.rb +39 -0
  162. data/test/test_relative_links.rb +40 -0
  163. data/test/test_request.rb +13 -0
  164. data/test/test_response_code.rb +52 -0
  165. data/test/test_save_file.rb +103 -0
  166. data/test/test_scheme.rb +63 -0
  167. data/test/test_select.rb +106 -0
  168. data/test/test_select_all.rb +15 -0
  169. data/test/test_select_none.rb +15 -0
  170. data/test/test_select_noopts.rb +16 -0
  171. data/test/test_set_fields.rb +44 -0
  172. data/test/test_ssl_server.rb +20 -0
  173. data/test/test_subclass.rb +14 -0
  174. data/test/test_textarea.rb +45 -0
  175. data/test/test_upload.rb +109 -0
  176. data/test/test_verbs.rb +25 -0
  177. metadata +320 -0
@@ -0,0 +1,13 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
+
3
+ class TestNoAttributes < Test::Unit::TestCase
4
+ def setup
5
+ @agent = Mechanize.new
6
+ end
7
+
8
+ def test_parse_no_attributes
9
+ assert_nothing_raised do
10
+ page = @agent.get('http://localhost/tc_no_attributes.html')
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,18 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
+
3
+ class OptionTest < Test::Unit::TestCase
4
+ class FakeAttribute < Hash
5
+ attr_reader :inner_text
6
+ def initialize(inner_text)
7
+ @inner_text = inner_text
8
+ end
9
+ alias :has_attribute? :has_key?
10
+ alias :attributes :keys
11
+ end
12
+
13
+ def test_option_missing_value
14
+ attribute = FakeAttribute.new('blah')
15
+ option = Mechanize::Form::Option.new(attribute, nil)
16
+ assert_equal('blah', option.value)
17
+ end
18
+ end
data/test/test_page.rb ADDED
@@ -0,0 +1,127 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
2
+
3
+ require 'cgi'
4
+
5
+ class TestPage < Test::Unit::TestCase
6
+ def setup
7
+ @agent = Mechanize.new
8
+ end
9
+
10
+ def test_page_gets_charset_from_page
11
+ page = @agent.get("http://localhost/tc_charset.html")
12
+ assert_equal 'windows-1255', page.encoding
13
+ end
14
+
15
+ def test_double_semicolon
16
+ page = @agent.get("http://localhost/http_headers?content-disposition=#{CGI.escape('attachment;; filename=fooooo')}")
17
+ assert page.parser
18
+ end
19
+
20
+ def test_broken_charset
21
+ page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=akldsjfhaldjfksh')}")
22
+ assert page.parser
23
+ end
24
+
25
+ def test_mostly_broken_charset
26
+ page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=ISO_8859-1')}")
27
+ assert_equal 'ISO_8859-1', page.encoding
28
+ end
29
+
30
+ def test_another_mostly_broken_charset
31
+ page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=UTF8')}")
32
+ assert_equal 'UTF8', page.parser.encoding
33
+ assert_equal 'UTF8', page.encoding
34
+ end
35
+
36
+ def test_upper_case_content_type
37
+ page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/HTML')}")
38
+ assert_instance_of Mechanize::Page, page
39
+ assert_equal 'text/HTML', page.content_type
40
+ end
41
+
42
+ def test_encoding_override_before_parser_initialized
43
+ # document has a bad encoding information - windows-1255
44
+ page = @agent.get("http://localhost/tc_bad_charset.html")
45
+ # encoding is wrong, so user wants to force ISO-8859-2
46
+ page.encoding = 'ISO-8859-2'
47
+ assert_equal 'ISO-8859-2', page.encoding
48
+ end
49
+
50
+ def test_encoding_override_after_parser_was_initialized
51
+ # document has a bad encoding information - windows-1255
52
+ page = @agent.get("http://localhost/tc_bad_charset.html")
53
+ page.parser
54
+ # autodetection sets encoding to windows-1255
55
+ assert_equal 'windows-1255', page.encoding
56
+ # encoding is wrong, so user wants to force ISO-8859-2
57
+ page.encoding = 'ISO-8859-2'
58
+ assert_equal 'ISO-8859-2', page.encoding
59
+ end
60
+
61
+ def test_page_gets_charset_sent_by_server
62
+ page = @agent.get("http://localhost/http_headers?content-type=#{CGI.escape('text/html; charset=UTF-8')}")
63
+ assert_equal 'UTF-8', page.encoding
64
+ end
65
+
66
+ def test_set_encoding
67
+ page = @agent.get("http://localhost/file_upload.html")
68
+ page.encoding = 'UTF-8'
69
+ assert_equal 'UTF-8', page.parser.encoding
70
+ end
71
+
72
+ def test_page_gets_yielded
73
+ pages = nil
74
+ @agent.get("http://localhost/file_upload.html") { |page|
75
+ pages = page
76
+ }
77
+ assert pages
78
+ assert_equal('File Upload Form', pages.title)
79
+ end
80
+
81
+ def test_title
82
+ page = @agent.get("http://localhost/file_upload.html")
83
+ assert_equal('File Upload Form', page.title)
84
+ end
85
+
86
+ def test_no_title
87
+ page = @agent.get("http://localhost/no_title_test.html")
88
+ assert_equal(nil, page.title)
89
+ end
90
+
91
+ def test_page_decoded_with_charset
92
+ page = Mechanize::Page.new(
93
+ URI.parse('http://tenderlovemaking.com/'),
94
+ { 'content-type' => 'text/html; charset=EUC-JP' },
95
+ '<html><body>hello</body></html>',
96
+ 400,
97
+ @agent
98
+ )
99
+ assert_equal 'EUC-JP', page.parser.encoding
100
+ end
101
+
102
+ def test_find_form_with_hash
103
+ page = @agent.get("http://localhost/tc_form_action.html")
104
+ form = page.form(:name => 'post_form1')
105
+ assert form
106
+ yielded = false
107
+ form = page.form(:name => 'post_form1') { |f|
108
+ yielded = true
109
+ assert f
110
+ assert_equal(form, f)
111
+ }
112
+ assert yielded
113
+
114
+ form_by_action = page.form(:action => '/form_post?a=b&b=c')
115
+ assert form_by_action
116
+ assert_equal(form, form_by_action)
117
+ end
118
+
119
+ def test_find_form_with_attribute_class_or_id
120
+ page = @agent.get('http://localhost:2000/form_no_action.html')
121
+ form = page.form_with(:class => 'formclass')
122
+ assert_equal(form, page.forms[0])
123
+ form = page.form_with(:id => 'formid')
124
+ assert_equal(form, page.forms[0])
125
+ end
126
+ end
127
+
@@ -0,0 +1,298 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path(File.join(File.dirname(__FILE__), "helper"))
3
+
4
+ # require 'hpricot'
5
+ # Mechanize.html_parser = Hpricot
6
+
7
+ require 'test/unit'
8
+ require 'kconv'
9
+ require 'iconv'
10
+
11
+ module EncodingTestPage
12
+ if RUBY_VERSION >= "1.9.0"
13
+ BODY_ENC_PAIR = {
14
+ :utf8 => ::Encoding::UTF_8,
15
+ :ascii => ::Encoding::US_ASCII,
16
+ :latin => ::Encoding::ISO_8859_2,
17
+ :cp1252 => ::Encoding::CP1252,
18
+ :sjis => ::Encoding::SHIFT_JIS,
19
+ :euc => ::Encoding::EUC_JP,
20
+ :cp932 => ::Encoding::CP932,
21
+ :bin => ::Encoding::ASCII_8BIT}
22
+ end
23
+
24
+ ENC_NAME = {
25
+ :utf8 => 'utf-8',
26
+ :ascii => 'us-ascii',
27
+ :latin => 'iso-8859-2',
28
+ :cp1252 => 'cp1252',
29
+ :sjis => 'shift_jis',
30
+ :euc => 'euc-jp',
31
+ :cp932 => 'cp932',
32
+ :utf8_upcase => 'UTF-8',
33
+ :unknown => '*unknown*'}
34
+
35
+ JP_TITLE = # other JP string may causes FAILURE, but it's due to misdetection of NKF.guess itself.
36
+ # This is Mechanize test, so don't modify me.
37
+ "\346\235\245\343\202\213\346\227\245\343\202\202\346\235\245\343\202\213\346\227\245" +
38
+ "\343\202\202\346\211\213\345\211\215\343\201\251\343\202\202\343\201\257\351\211\204" +
39
+ "\346\235\277\343\201\256\344\270\212\343\201\253\343\201\246\347\204\274\343\201\213" +
40
+ "\343\202\214\343\201\246\345\253\214\343\201\253\343\201\252\343\201\243\343\201\241" +
41
+ "\343\202\203\343\201\206\343\201\247\343\201\224\343\201\226\343\202\213"
42
+
43
+ # too short non-ascii strings don't work well at NKF.guess misdeteting test
44
+ TITLE = {
45
+ :ascii => "test page",
46
+ # "Bialystok" in UTF-8, 'puts' me on UTF-8 and latin-2 font console
47
+ :latin => "Bia\305\202ystok"*100,
48
+ # dagger mark, pure iso-8859-1 doesn't contain it.
49
+ # irb1.9_on_utf8> "\342\200\240".encode('iso-8859-1') #=> UndefinedConversionError
50
+ # irb1.9_on_utf8> "\342\200\240".encode('cp1252') #=> "\x86"
51
+ :cp1252 => "dagger mark dagger mark dagger mark dagger mark \342\200\240"*5,
52
+ :utf8 => JP_TITLE,
53
+ :sjis => JP_TITLE,
54
+ # circled integer, "marutuki-suuji" in Japanese. pure SHIFT_JIS doesn't know them.
55
+ # irb1.9_on_utf8> s.encode('shift_jis') #=> Encoding::UndefinedConversionError
56
+ :cp932 => "\342\221\240\342\221\241\342\221\242\342\221\243\342\221\244"*3,
57
+ :euc => JP_TITLE,
58
+ }
59
+ def page(h)
60
+ content_type = if h[:http]
61
+ "text/html; charset=#{ENC_NAME[h[:http]]}"
62
+ else
63
+ 'text/html'
64
+ end
65
+ meta = if h[:meta]
66
+ "<meta http-equiv=\"content-type\" content=\"text/html; charset=#{ENC_NAME[h[:meta]]}\">"
67
+ else
68
+ ''
69
+ end
70
+ html = convert("<html>#{meta}<title>#{TITLE[h[:body]]}</title></html>", h[:body])
71
+
72
+ return Mechanize::Page.new(
73
+ URI.parse('http://www.example.com/'),
74
+ { 'content-type' => content_type },
75
+ html,
76
+ 200,
77
+ h[:agent]|| Mechanize.new)
78
+ end
79
+
80
+ def convert(str, enc)
81
+ case enc
82
+ when :ascii then
83
+ Iconv::conv('ASCII', 'UTF-8', str)
84
+ when :latin then
85
+ Iconv::conv('ISO-8859-2', 'UTF-8', str)
86
+ when :utf8 then
87
+ NKF.nkf('-Wm0w', str)
88
+ when :sjis then
89
+ NKF.nkf('-Wm0s', str)
90
+ when :euc then
91
+ NKF.nkf('-Wm0e', str)
92
+ when :cp932 then
93
+ Iconv::conv('CP932', 'UTF-8', str)
94
+ when :cp1252 then
95
+ Iconv::conv('CP1252', 'UTF-8', str)
96
+ else
97
+ str
98
+ end
99
+ end
100
+
101
+ def err_msg1(page, mes_name)
102
+ return <<EOM
103
+ At #{mes_name},
104
+ expected: #{Marshal.dump(TITLE[@enc]).inspect}
105
+ but got : #{Marshal.dump(page.at('title').inner_text).inspect}
106
+ EOM
107
+ end
108
+
109
+ def err_msg2(page, mes_name)
110
+ return <<EOM
111
+ At #{mes_name},
112
+ expected: #{Marshal.dump(convert(TITLE[@enc], @enc)).inspect}
113
+ but got : #{Marshal.dump(page.at('title').inner_text).inspect}
114
+ EOM
115
+ end
116
+
117
+ def assert_SUCCESS(page)
118
+ if Mechanize.html_parser == Nokogiri::HTML
119
+ # Nokogiri string returns UTF-8 string
120
+ assert(TITLE[@enc] == page.at('title').inner_text, err_msg1(page, 'SUCCESS'))
121
+ else
122
+ assert(convert(TITLE[@enc], @enc) == page.parser.at('title').inner_text, err_msg3(page, 'SUCCESS'))
123
+ end
124
+ end
125
+
126
+ def assert_FAILURE(page)
127
+ if Mechanize.html_parser == Nokogiri::HTML
128
+ assert_equal(false, TITLE[@enc] == page.at('title').inner_text, err_msg1(page, 'FAILURE 1'))
129
+ if RUBY_VERSION >= "1.9.0"
130
+ assert_equal(false, TITLE[@enc] == page.at('title').inner_text.force_encoding(::Encoding::UTF_8), err_msg2(page, 'FAILURE 2'))
131
+ end
132
+ else
133
+ # Hpricot just returns "same" byte string, so never "FAILURE".
134
+ assert(convert(TITLE[@enc], @enc) == page.parser.at('title').inner_text, err_msg2(page, 'FAILURE'))
135
+ end
136
+ end
137
+ end
138
+
139
+ # M H D
140
+ # Meta - t t meta works everytime
141
+ # HTTP f - t HTTP works only when meta doesn't exist
142
+ # Dete f f - Detect works only when both of meta and HTTP don't exist
143
+ module EncodingTest
144
+ include EncodingTestPage
145
+ attr_reader :bad
146
+
147
+ def test_with_no_meta_no_http
148
+ page = page(:body => @enc)
149
+ assert_SUCCESS(page)
150
+ end
151
+
152
+ def test_with_right_meta_any_http
153
+ page = page(:body => @enc, :meta => @enc)
154
+ assert_SUCCESS(page)
155
+ page = page(:body => @enc, :meta => @enc, :http => bad)
156
+ assert_SUCCESS(page)
157
+ page = page(:body => @enc, :meta => @enc, :http => @enc)
158
+ assert_SUCCESS(page)
159
+ end
160
+
161
+ def test_with_no_meta_right_http
162
+ page = page(:body => @enc, :http => @enc)
163
+ assert_SUCCESS(page)
164
+ end
165
+
166
+ def test_failure_with_bad_meta_any_http
167
+ page = page(:body => @enc, :meta => bad)
168
+ assert_FAILURE(page)
169
+ page = page(:body => @enc, :meta => bad, :http => bad)
170
+ assert_FAILURE(page)
171
+ page = page(:body => @enc, :meta => bad, :http => @enc)
172
+ assert_FAILURE(page)
173
+ end
174
+
175
+ def test_failure_with_no_meta_bad_http
176
+ page = page(:body => @enc, :http => bad)
177
+ assert_FAILURE(page)
178
+ end
179
+
180
+ def test_overwrite_encoding
181
+ page = page(:body => @enc, :meta => bad)
182
+ page.encoding = ENC_NAME[@enc]
183
+ assert_SUCCESS(page)
184
+ end
185
+ end
186
+
187
+ class ASCIITest < Test::Unit::TestCase
188
+ include EncodingTest
189
+ def setup ; @enc, @bad = :ascii, :utf8 ; end
190
+
191
+ # ASCII successes at all the case
192
+ alias :assert_FAILURE :assert_SUCCESS
193
+ end
194
+
195
+ class LatinTest < Test::Unit::TestCase
196
+ include EncodingTest
197
+ def setup ; @enc, @bad = :latin, :utf8 ; end
198
+
199
+ # Latin chars are misdetected to Japanese 'Shift_JIS' by NKF.guess
200
+ undef :test_with_no_meta_no_http
201
+ def test_failure_with_no_meta_no_http_cause_of_detect_charset_mistake
202
+ page = page(:body => @enc)
203
+ assert_FAILURE(page)
204
+ end
205
+ end
206
+
207
+ class CP1252Test < Test::Unit::TestCase
208
+ include EncodingTest
209
+ def setup ; @enc, @bad = :cp1252, :utf8 ; end
210
+ end
211
+
212
+ class UTF8Test < Test::Unit::TestCase
213
+ include EncodingTest
214
+ def setup ; @enc, @bad = :utf8, :sjis ; end
215
+ end
216
+
217
+ class ShiftJISTest < Test::Unit::TestCase
218
+ include EncodingTest
219
+ def setup ; @enc, @bad = :sjis, :utf8 ; end
220
+ end
221
+
222
+ class CP932Test < Test::Unit::TestCase
223
+ include EncodingTest
224
+ def setup ; @enc, @bad = :cp932, :utf8 ; end
225
+ end
226
+
227
+ class EUCJPTest < Test::Unit::TestCase
228
+ include EncodingTest
229
+ def setup ; @enc, @bad = :euc, :utf8 ; end
230
+ end
231
+
232
+ # =====================================
233
+
234
+ class Etc_Test < Test::Unit::TestCase
235
+
236
+ include EncodingTestPage
237
+
238
+ def setup
239
+ @agent = Mechanize.new
240
+ end
241
+
242
+ def test_page_meta_encoding
243
+ page = page(:body => :latin, :meta => :utf8, :http => :latin)
244
+ assert_equal('utf-8', page.meta_encoding)
245
+ end
246
+
247
+ def test_page_meta_encoding_as_is
248
+ page = page(:body => :latin, :meta => :utf8_upcase, :http => :latin)
249
+ assert_equal('UTF-8', page.meta_encoding)
250
+ page = page(:body => :latin, :meta => :unknown, :http => :latin)
251
+ assert_equal('*unknown*', page.meta_encoding)
252
+ end
253
+
254
+ def test_page_http_encoding
255
+ page = page(:body => :latin, :http => :utf8)
256
+ assert_equal('utf-8', page.http_encoding)
257
+ end
258
+
259
+ def test_page_http_encoding_as_is
260
+ page = page(:body => :latin, :http => :utf8_upcase)
261
+ assert_equal('UTF-8', page.http_encoding)
262
+ page = page(:body => :latin, :http => :unknown)
263
+ assert_equal('*unknown*', page.http_encoding)
264
+ end
265
+
266
+ def test_page_body_encoding
267
+ page = page(:body => :utf8)
268
+ assert_equal('UTF-8', page.body_encoding)
269
+ end
270
+
271
+ def test_post_page_hook
272
+ @enc = :latin
273
+ page = page(:body => :latin)
274
+ assert_FAILURE(page)
275
+ @agent.post_page_hooks << lambda{|p| p.encoding = p.http_charset}
276
+ page = page(:body => :latin, :http => :latin, :agent => @agent)
277
+ assert_SUCCESS(page)
278
+ end
279
+
280
+ def test_reset_parser
281
+ data = {
282
+ :title => "\343\202\277\343\202\244\343\203\210\343\203\253",
283
+ :link => "\343\203\252\343\203\263\343\202\257"} # "title", "link" in japanese utf-8
284
+ page = Mechanize::Page.new(
285
+ URI.parse('http://www.example.com/'),
286
+ { 'content-type' => 'text/html; charset=SHIFT_JIS' },
287
+ "<html><title>#{data[:title]}</title><a href=\"/\">#{data[:link]}</a></html>",
288
+ 200,
289
+ @agent)
290
+
291
+ bad = [page.title, page.links[0].text]
292
+ page.encoding = 'utf-8' # correct encoding
293
+ good = [page.title, page.links[0].text]
294
+
295
+ assert_not_equal(good, bad) # Page#encoding resets title and links
296
+ assert_equal([data[:title], data[:link]], good) # correct encoding
297
+ end
298
+ end