diamond-mechanize 2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (154) hide show
  1. data/CHANGELOG.rdoc +718 -0
  2. data/EXAMPLES.rdoc +187 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +163 -0
  5. data/LICENSE.rdoc +20 -0
  6. data/Manifest.txt +159 -0
  7. data/README.rdoc +64 -0
  8. data/Rakefile +49 -0
  9. data/lib/mechanize.rb +1079 -0
  10. data/lib/mechanize/content_type_error.rb +13 -0
  11. data/lib/mechanize/cookie.rb +232 -0
  12. data/lib/mechanize/cookie_jar.rb +194 -0
  13. data/lib/mechanize/download.rb +59 -0
  14. data/lib/mechanize/element_matcher.rb +36 -0
  15. data/lib/mechanize/file.rb +65 -0
  16. data/lib/mechanize/file_connection.rb +17 -0
  17. data/lib/mechanize/file_request.rb +26 -0
  18. data/lib/mechanize/file_response.rb +74 -0
  19. data/lib/mechanize/file_saver.rb +39 -0
  20. data/lib/mechanize/form.rb +543 -0
  21. data/lib/mechanize/form/button.rb +6 -0
  22. data/lib/mechanize/form/check_box.rb +12 -0
  23. data/lib/mechanize/form/field.rb +54 -0
  24. data/lib/mechanize/form/file_upload.rb +21 -0
  25. data/lib/mechanize/form/hidden.rb +3 -0
  26. data/lib/mechanize/form/image_button.rb +19 -0
  27. data/lib/mechanize/form/keygen.rb +34 -0
  28. data/lib/mechanize/form/multi_select_list.rb +94 -0
  29. data/lib/mechanize/form/option.rb +50 -0
  30. data/lib/mechanize/form/radio_button.rb +55 -0
  31. data/lib/mechanize/form/reset.rb +3 -0
  32. data/lib/mechanize/form/select_list.rb +44 -0
  33. data/lib/mechanize/form/submit.rb +3 -0
  34. data/lib/mechanize/form/text.rb +3 -0
  35. data/lib/mechanize/form/textarea.rb +3 -0
  36. data/lib/mechanize/headers.rb +23 -0
  37. data/lib/mechanize/history.rb +82 -0
  38. data/lib/mechanize/http.rb +8 -0
  39. data/lib/mechanize/http/agent.rb +1004 -0
  40. data/lib/mechanize/http/auth_challenge.rb +59 -0
  41. data/lib/mechanize/http/auth_realm.rb +31 -0
  42. data/lib/mechanize/http/content_disposition_parser.rb +188 -0
  43. data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
  44. data/lib/mechanize/monkey_patch.rb +16 -0
  45. data/lib/mechanize/page.rb +440 -0
  46. data/lib/mechanize/page/base.rb +7 -0
  47. data/lib/mechanize/page/frame.rb +27 -0
  48. data/lib/mechanize/page/image.rb +30 -0
  49. data/lib/mechanize/page/label.rb +20 -0
  50. data/lib/mechanize/page/link.rb +98 -0
  51. data/lib/mechanize/page/meta_refresh.rb +68 -0
  52. data/lib/mechanize/parser.rb +173 -0
  53. data/lib/mechanize/pluggable_parsers.rb +144 -0
  54. data/lib/mechanize/redirect_limit_reached_error.rb +19 -0
  55. data/lib/mechanize/redirect_not_get_or_head_error.rb +21 -0
  56. data/lib/mechanize/response_code_error.rb +21 -0
  57. data/lib/mechanize/response_read_error.rb +27 -0
  58. data/lib/mechanize/robots_disallowed_error.rb +28 -0
  59. data/lib/mechanize/test_case.rb +663 -0
  60. data/lib/mechanize/unauthorized_error.rb +3 -0
  61. data/lib/mechanize/unsupported_scheme_error.rb +6 -0
  62. data/lib/mechanize/util.rb +101 -0
  63. data/test/data/htpasswd +1 -0
  64. data/test/data/server.crt +16 -0
  65. data/test/data/server.csr +12 -0
  66. data/test/data/server.key +15 -0
  67. data/test/data/server.pem +15 -0
  68. data/test/htdocs/alt_text.html +10 -0
  69. data/test/htdocs/bad_form_test.html +9 -0
  70. data/test/htdocs/button.jpg +0 -0
  71. data/test/htdocs/canonical_uri.html +9 -0
  72. data/test/htdocs/dir with spaces/foo.html +1 -0
  73. data/test/htdocs/empty_form.html +6 -0
  74. data/test/htdocs/file_upload.html +26 -0
  75. data/test/htdocs/find_link.html +41 -0
  76. data/test/htdocs/form_multi_select.html +16 -0
  77. data/test/htdocs/form_multival.html +37 -0
  78. data/test/htdocs/form_no_action.html +18 -0
  79. data/test/htdocs/form_no_input_name.html +16 -0
  80. data/test/htdocs/form_order_test.html +11 -0
  81. data/test/htdocs/form_select.html +16 -0
  82. data/test/htdocs/form_set_fields.html +14 -0
  83. data/test/htdocs/form_test.html +188 -0
  84. data/test/htdocs/frame_referer_test.html +10 -0
  85. data/test/htdocs/frame_test.html +30 -0
  86. data/test/htdocs/google.html +13 -0
  87. data/test/htdocs/index.html +6 -0
  88. data/test/htdocs/link with space.html +5 -0
  89. data/test/htdocs/meta_cookie.html +11 -0
  90. data/test/htdocs/no_title_test.html +6 -0
  91. data/test/htdocs/noindex.html +9 -0
  92. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  93. data/test/htdocs/relative/tc_relative_links.html +21 -0
  94. data/test/htdocs/robots.html +8 -0
  95. data/test/htdocs/robots.txt +2 -0
  96. data/test/htdocs/tc_bad_charset.html +9 -0
  97. data/test/htdocs/tc_bad_links.html +5 -0
  98. data/test/htdocs/tc_base_link.html +8 -0
  99. data/test/htdocs/tc_blank_form.html +11 -0
  100. data/test/htdocs/tc_charset.html +6 -0
  101. data/test/htdocs/tc_checkboxes.html +19 -0
  102. data/test/htdocs/tc_encoded_links.html +5 -0
  103. data/test/htdocs/tc_field_precedence.html +11 -0
  104. data/test/htdocs/tc_follow_meta.html +8 -0
  105. data/test/htdocs/tc_form_action.html +48 -0
  106. data/test/htdocs/tc_links.html +19 -0
  107. data/test/htdocs/tc_meta_in_body.html +9 -0
  108. data/test/htdocs/tc_pretty_print.html +17 -0
  109. data/test/htdocs/tc_referer.html +16 -0
  110. data/test/htdocs/tc_relative_links.html +19 -0
  111. data/test/htdocs/tc_textarea.html +23 -0
  112. data/test/htdocs/test_click.html +11 -0
  113. data/test/htdocs/unusual______.html +5 -0
  114. data/test/test_mechanize.rb +1164 -0
  115. data/test/test_mechanize_cookie.rb +451 -0
  116. data/test/test_mechanize_cookie_jar.rb +483 -0
  117. data/test/test_mechanize_download.rb +43 -0
  118. data/test/test_mechanize_file.rb +61 -0
  119. data/test/test_mechanize_file_connection.rb +21 -0
  120. data/test/test_mechanize_file_request.rb +19 -0
  121. data/test/test_mechanize_file_saver.rb +21 -0
  122. data/test/test_mechanize_form.rb +875 -0
  123. data/test/test_mechanize_form_check_box.rb +38 -0
  124. data/test/test_mechanize_form_encoding.rb +114 -0
  125. data/test/test_mechanize_form_field.rb +63 -0
  126. data/test/test_mechanize_form_file_upload.rb +20 -0
  127. data/test/test_mechanize_form_image_button.rb +12 -0
  128. data/test/test_mechanize_form_keygen.rb +32 -0
  129. data/test/test_mechanize_form_multi_select_list.rb +84 -0
  130. data/test/test_mechanize_form_option.rb +55 -0
  131. data/test/test_mechanize_form_radio_button.rb +78 -0
  132. data/test/test_mechanize_form_select_list.rb +76 -0
  133. data/test/test_mechanize_form_textarea.rb +52 -0
  134. data/test/test_mechanize_headers.rb +35 -0
  135. data/test/test_mechanize_history.rb +103 -0
  136. data/test/test_mechanize_http_agent.rb +1225 -0
  137. data/test/test_mechanize_http_auth_challenge.rb +39 -0
  138. data/test/test_mechanize_http_auth_realm.rb +49 -0
  139. data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
  140. data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
  141. data/test/test_mechanize_link.rb +80 -0
  142. data/test/test_mechanize_page.rb +118 -0
  143. data/test/test_mechanize_page_encoding.rb +182 -0
  144. data/test/test_mechanize_page_frame.rb +16 -0
  145. data/test/test_mechanize_page_link.rb +390 -0
  146. data/test/test_mechanize_page_meta_refresh.rb +127 -0
  147. data/test/test_mechanize_parser.rb +289 -0
  148. data/test/test_mechanize_pluggable_parser.rb +52 -0
  149. data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
  150. data/test/test_mechanize_redirect_not_get_or_head_error.rb +14 -0
  151. data/test/test_mechanize_subclass.rb +22 -0
  152. data/test/test_mechanize_util.rb +103 -0
  153. data/test/test_multi_select.rb +119 -0
  154. metadata +216 -0
@@ -0,0 +1,182 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'mechanize/test_case'
3
+
4
+ # tests for Page encoding and charset and parsing
5
+
6
+ class TestMechanizePageEncoding < Mechanize::TestCase
7
+
8
+ MECH_ASCII_ENCODING = Mechanize::Util::NEW_RUBY_ENCODING ? 'US-ASCII' : 'ISO-8859-1'
9
+
10
+ def setup
11
+ super
12
+
13
+ @uri = URI('http://localhost/')
14
+ @response_headers = { 'content-type' => 'text/html' }
15
+ @body = '<title>hi</title>'
16
+ end
17
+
18
+ def util_page body = @body, headers = @response_headers
19
+ body.force_encoding Encoding::BINARY if body.respond_to? :force_encoding
20
+ Mechanize::Page.new @uri, headers, body, 200, @mech
21
+ end
22
+
23
+ def test_page_charset
24
+ charset = Mechanize::Page.charset 'text/html;charset=vAlue'
25
+ assert_equal 'vAlue', charset
26
+ end
27
+
28
+ def test_page_charset_upcase
29
+ charset = Mechanize::Page.charset 'TEXT/HTML;CHARSET=UTF-8'
30
+ assert_equal 'UTF-8', charset
31
+ end
32
+
33
+ def test_page_charset_semicolon
34
+ charset = Mechanize::Page.charset 'text/html;charset=UTF-8;'
35
+ assert_equal 'UTF-8', charset
36
+ end
37
+
38
+ def test_page_charset_no_chaset_token
39
+ charset = Mechanize::Page.charset 'text/html'
40
+ assert_nil charset
41
+ end
42
+
43
+ def test_page_charset_returns_nil_when_charset_says_none
44
+ charset = Mechanize::Page.charset 'text/html;charset=none'
45
+
46
+ assert_nil charset
47
+ end
48
+
49
+ def test_page_charset_multiple
50
+ charset = Mechanize::Page.charset 'text/html;charset=111;charset=222'
51
+
52
+ assert_equal '111', charset
53
+ end
54
+
55
+ def test_page_response_header_charset
56
+ headers = { 'content-type' => 'text/html;charset=HEADER' }
57
+ charsets = Mechanize::Page.response_header_charset(headers)
58
+
59
+ assert_equal ['HEADER'], charsets
60
+ end
61
+
62
+ def test_page_response_header_charset_no_token
63
+ headers = {'content-type' => 'text/html'}
64
+ charsets = Mechanize::Page.response_header_charset(headers)
65
+
66
+ assert_equal [], charsets
67
+
68
+ headers = {'X-My-Header' => 'hello'}
69
+ charsets = Mechanize::Page.response_header_charset(headers)
70
+
71
+ assert_equal [], charsets
72
+ end
73
+
74
+ def test_page_response_header_charset_wrong_header
75
+ headers = { 'x-content-type' => 'text/html;charset=bogus' }
76
+ charsets = Mechanize::Page.response_header_charset(headers)
77
+
78
+ assert_equal [], charsets
79
+ end
80
+
81
+ def test_response_header_charset
82
+ page = util_page nil, {'content-type' => 'text/html;charset=HEADER'}
83
+
84
+ assert_equal ['HEADER'], page.response_header_charset
85
+ end
86
+
87
+ def test_page_meta_charset
88
+ body = '<meta http-equiv="content-type" content="text/html;charset=META">'
89
+ charsets = Mechanize::Page.meta_charset(body)
90
+
91
+ assert_equal ['META'], charsets
92
+ end
93
+
94
+ def test_page_meta_charset_is_empty_when_no_charset_meta
95
+ body = '<meta http-equiv="refresh" content="5; url=index.html">'
96
+ charsets = Mechanize::Page.meta_charset(body)
97
+ assert_equal [], charsets
98
+ end
99
+
100
+ def test_page_meta_charset_no_content
101
+ body = '<meta http-equiv="content-type">'
102
+
103
+ charsets = Mechanize::Page.meta_charset(body)
104
+
105
+ assert_empty charsets
106
+ end
107
+
108
+ # Test to fix issue: https://github.com/tenderlove/mechanize/issues/143
109
+ def test_page_meta_charset_handles_whitespace
110
+ body = '<meta http-equiv = "Content-Type" content = "text/html; charset=iso-8859-1">'
111
+ charsets = Mechanize::Page.meta_charset(body)
112
+ assert_equal ["iso-8859-1"], charsets
113
+ end
114
+
115
+ def test_meta_charset
116
+ body = '<meta http-equiv="content-type" content="text/html;charset=META">'
117
+ page = util_page body
118
+
119
+ assert_equal ['META'], page.meta_charset
120
+ end
121
+
122
+ def test_detected_encoding
123
+ page = util_page
124
+
125
+ assert_equal MECH_ASCII_ENCODING, page.detected_encoding
126
+ end
127
+
128
+ def test_encodings
129
+ response = {'content-type' => 'text/html;charset=HEADER'}
130
+ body = '<meta http-equiv="content-type" content="text/html;charset=META">'
131
+ @mech.default_encoding = 'DEFAULT'
132
+ page = util_page body, response
133
+
134
+ assert_equal true, page.encodings.include?('HEADER')
135
+ assert_equal true, page.encodings.include?('META')
136
+ assert_equal true, page.encodings.include?(MECH_ASCII_ENCODING)
137
+ assert_equal true, page.encodings.include?('DEFAULT')
138
+ end
139
+
140
+ def test_parser_with_default_encoding
141
+ # pre test
142
+ assert_equal false, util_page.encodings.include?('Windows-1252')
143
+
144
+ @mech.default_encoding = 'Windows-1252'
145
+ page = util_page
146
+
147
+ assert_equal true, page.encodings.include?('Windows-1252')
148
+ end
149
+
150
+ def test_parser_force_default_encoding
151
+ @mech.default_encoding = 'Windows-1252'
152
+ @mech.force_default_encoding = true
153
+ page = util_page
154
+
155
+ assert page.encodings.include? 'Windows-1252'
156
+ end
157
+
158
+ def test_parser_encoding_equals_overwrites_force_default_encoding
159
+ @mech.default_encoding = 'Windows-1252'
160
+ @mech.force_default_encoding = true
161
+ page = util_page
162
+
163
+ assert_equal 'Windows-1252', page.encoding
164
+
165
+ page.encoding = 'ISO-8859-2'
166
+
167
+ assert_equal 'ISO-8859-2', page.encoding
168
+ end
169
+
170
+ def test_parser_encoding_when_searching_elements
171
+ skip "Encoding not implemented" unless have_encoding?
172
+
173
+ body = '<span id="latin1">hi</span>'
174
+ page = util_page body, 'content-type' => 'text/html,charset=ISO-8859-1'
175
+
176
+ result = page.search('#latin1')
177
+
178
+ assert_equal Encoding::UTF_8, result.text.encoding
179
+ end
180
+
181
+ end
182
+
@@ -0,0 +1,16 @@
1
+ require 'mechanize/test_case'
2
+
3
+ class TestMechanizePageFrame < Mechanize::TestCase
4
+
5
+ def test_content
6
+ page = page 'http://example/referer'
7
+ frame = node 'frame', 'name' => 'frame1', 'src' => 'http://example/'
8
+ frame = Mechanize::Page::Frame.new frame, @mech, page
9
+
10
+ frame.content
11
+
12
+ assert_equal 'http://example/referer', requests.first['Referer']
13
+ end
14
+
15
+ end
16
+
@@ -0,0 +1,390 @@
1
+ # coding: utf-8
2
+
3
+ require 'mechanize/test_case'
4
+
5
+ class TestMechanizePageLink < Mechanize::TestCase
6
+
7
+ WINDOWS_1255 = <<-HTML
8
+ <meta http-equiv="content-type" content="text/html; charset=windows-1255">
9
+ <title>hi</title>
10
+ HTML
11
+
12
+ BAD = <<-HTML
13
+ <meta http-equiv="content-type" content="text/html; charset=windows-1255">
14
+ <title>Bia\xB3ystok</title>
15
+ HTML
16
+ BAD.force_encoding Encoding::BINARY if defined? Encoding
17
+
18
+ SJIS_TITLE = "\x83\x65\x83\x58\x83\x67"
19
+
20
+ SJIS_AFTER_TITLE = <<-HTML
21
+ <title>#{SJIS_TITLE}</title>
22
+ <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
23
+ HTML
24
+
25
+ SJIS_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
26
+
27
+ SJIS_BAD_AFTER_TITLE = <<-HTML
28
+ <title>#{SJIS_TITLE}</title>
29
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
30
+ HTML
31
+
32
+ SJIS_BAD_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
33
+
34
+ UTF8_TITLE = 'テスト'
35
+ UTF8 = <<-HTML
36
+ <title>#{UTF8_TITLE}</title>
37
+ <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
38
+ HTML
39
+
40
+ ENCODING_ERROR_CLASS = Nokogiri::XML::SyntaxError
41
+
42
+ def setup
43
+ super
44
+
45
+ @uri = URI('http://example')
46
+ @res = { 'content-type' => 'text/html' }
47
+ @body = '<title>hi</title>'
48
+ end
49
+
50
+ def util_page body = @body, res = @res
51
+ body.force_encoding Encoding::BINARY if body.respond_to? :force_encoding
52
+ Mechanize::Page.new @uri, res, body, 200, @mech
53
+ end
54
+
55
+ def test_initialize_supported_content_type
56
+ page = Mechanize::Page.new nil, 'content-type' => 'application/xhtml+xml'
57
+ assert page
58
+ assert_equal 'application/xhtml+xml', page.content_type
59
+
60
+ page = Mechanize::Page.new nil, 'content-type' => 'text/html'
61
+ assert page
62
+ assert_equal 'text/html', page.content_type
63
+
64
+ page = Mechanize::Page.new nil, 'content-type' => 'application/xhtml+xml;charset=utf-8'
65
+ assert page
66
+ assert_equal 'application/xhtml+xml;charset=utf-8', page.content_type
67
+
68
+ page = Mechanize::Page.new nil, 'content-type' => 'text/html;charset=utf-8'
69
+ assert page
70
+ assert_equal 'text/html;charset=utf-8', page.content_type
71
+ end
72
+
73
+ def test_initialize_unsupported_content_type
74
+ e = assert_raises Mechanize::ContentTypeError do
75
+ Mechanize::Page.new nil, 'content-type' => 'text/plain'
76
+ end
77
+
78
+ assert_equal 'text/plain', e.content_type
79
+
80
+ e = assert_raises Mechanize::ContentTypeError do
81
+ Mechanize::Page.new nil, 'content-type' => 'text/plain;charset=utf-8'
82
+ end
83
+
84
+ assert_equal 'text/plain;charset=utf-8', e.content_type
85
+ end
86
+
87
+ def test_override_content_type
88
+ page = Mechanize::Page.new nil, {'content-type' => 'text/html'}, WINDOWS_1255
89
+ assert page
90
+ assert_equal 'text/html; charset=windows-1255', page.content_type
91
+ end
92
+
93
+ def test_canonical_uri
94
+ page = @mech.get("http://localhost/canonical_uri.html")
95
+ assert_equal(URI("http://localhost/canonical_uri"), page.canonical_uri)
96
+
97
+ page = @mech.get("http://localhost/file_upload.html")
98
+ assert_equal(nil, page.canonical_uri)
99
+ end
100
+
101
+ def test_canonical_uri_unescaped
102
+ page = util_page <<-BODY
103
+ <head>
104
+ <link rel="canonical" href="http://example/white space"/>
105
+ </head>
106
+ BODY
107
+
108
+ assert_equal @uri + '/white%20space', page.canonical_uri
109
+ end
110
+
111
+ def test_charset_from_content_type
112
+ charset = Mechanize::Page.__send__ :charset_from_content_type, 'text/html;charset=UTF-8'
113
+
114
+ assert_equal 'UTF-8', charset
115
+ end
116
+
117
+ def test_charset_from_bad_content_type
118
+ charset = Mechanize::Page.__send__ :charset_from_content_type, 'text/html'
119
+
120
+ assert_nil charset
121
+ end
122
+
123
+ def test_encoding
124
+ page = util_page WINDOWS_1255
125
+
126
+ assert_equal 'windows-1255', page.encoding
127
+ end
128
+
129
+ def test_encoding_charset_after_title
130
+ page = util_page SJIS_AFTER_TITLE
131
+
132
+ assert_equal false, page.encoding_error?
133
+
134
+ assert_equal 'Shift_JIS', page.encoding
135
+ end
136
+
137
+ def test_encoding_charset_after_title_bad
138
+ page = util_page UTF8
139
+
140
+ assert_equal false, page.encoding_error?
141
+
142
+ assert_equal 'UTF-8', page.encoding
143
+ end
144
+
145
+ def test_encoding_charset_after_title_double_bad
146
+ page = util_page SJIS_BAD_AFTER_TITLE
147
+
148
+ assert_equal false, page.encoding_error?
149
+
150
+ assert_equal 'SHIFT_JIS', page.encoding
151
+ end
152
+
153
+ def test_encoding_charset_bad
154
+ page = util_page "<title>#{UTF8_TITLE}</title>"
155
+ page.encodings.replace %w[
156
+ UTF-8
157
+ Shift_JIS
158
+ ]
159
+
160
+ assert_equal false, page.encoding_error?
161
+
162
+ assert_equal 'UTF-8', page.encoding
163
+ end
164
+
165
+ def test_encoding_meta_charset
166
+ page = util_page "<meta charset='UTF-8'>"
167
+
168
+ assert_equal 'UTF-8', page.encoding
169
+ end
170
+
171
+ def test_encoding_equals
172
+ page = util_page
173
+ page.meta_refresh
174
+ assert page.instance_variable_get(:@meta_refresh)
175
+
176
+ page.encoding = 'UTF-8'
177
+
178
+ assert_nil page.instance_variable_get(:@meta_refresh)
179
+
180
+ assert_equal 'UTF-8', page.encoding
181
+ assert_equal 'UTF-8', page.parser.encoding
182
+ end
183
+
184
+ def test_page_encoding_error?
185
+ page = util_page
186
+ page.parser.errors.clear
187
+ assert_equal false, page.encoding_error?
188
+ end
189
+
190
+ def test_detect_libxml2error_indicate_encoding
191
+ page = util_page
192
+ page.parser.errors.clear
193
+
194
+ # error in libxml2-2.7.8/parser.c, HTMLparser.c or parserInternals.c
195
+ page.parser.errors = [ENCODING_ERROR_CLASS.new("Input is not proper UTF-8, indicate encoding !\n")]
196
+ assert_equal true, page.encoding_error?
197
+ end
198
+
199
+ def test_detect_libxml2error_invalid_char
200
+ page = util_page
201
+ page.parser.errors.clear
202
+
203
+ # error in libxml2-2.7.8/HTMLparser.c
204
+ page.parser.errors = [ENCODING_ERROR_CLASS.new("Invalid char in CDATA 0x%X\n")]
205
+ assert_equal true, page.encoding_error?
206
+ end
207
+
208
+ def test_detect_libxml2error_input_conversion_failed
209
+ page = util_page
210
+ page.parser.errors.clear
211
+
212
+ # error in libxml2-2.7.8/encoding.c
213
+ page.parser.errors = [ENCODING_ERROR_CLASS.new("input conversion failed due to input error\n")]
214
+ assert_equal true, page.encoding_error?
215
+ end
216
+
217
+ def test_detect_libxml2error_which_unsupported_by_mechanize
218
+ page = util_page
219
+ page.parser.errors.clear
220
+
221
+ # error in libxml2-2.7.8/HTMLparser.c
222
+ page.parser.errors = [ENCODING_ERROR_CLASS.new("encoder error\n")]
223
+ assert_equal false, page.encoding_error?
224
+ end
225
+
226
+ def test_encoding_equals_before_parser
227
+ # document has a bad encoding information - windows-1255
228
+ page = util_page BAD
229
+
230
+ # encoding is wrong, so user wants to force ISO-8859-2
231
+ page.encoding = 'ISO-8859-2'
232
+
233
+ assert_equal false, page.encoding_error?
234
+ assert_equal 'ISO-8859-2', page.encoding
235
+ assert_equal 'ISO-8859-2', page.parser.encoding
236
+ end
237
+
238
+ def test_encoding_equals_after_parser
239
+ # document has a bad encoding information - windows-1255
240
+ page = util_page BAD
241
+ page.parser
242
+
243
+ # autodetection sets encoding to windows-1255
244
+ assert_equal 'windows-1255', page.encoding
245
+ # believe in yourself, not machine
246
+ assert_equal false, page.encoding_error?
247
+
248
+ # encoding is wrong, so user wants to force ISO-8859-2
249
+ page.encoding = 'ISO-8859-2'
250
+
251
+ assert_equal false, page.encoding_error?
252
+ assert_equal 'ISO-8859-2', page.encoding
253
+ assert_equal 'ISO-8859-2', page.parser.encoding
254
+ end
255
+
256
+ def test_frames_with
257
+ page = @mech.get("http://localhost/frame_test.html")
258
+ assert_equal(3, page.frames.size)
259
+
260
+ find_orig = page.frames.find_all { |f| f.name == 'frame1' }
261
+
262
+ find1 = page.frames_with(:name => 'frame1')
263
+
264
+ find_orig.zip(find1).each { |a,b|
265
+ assert_equal(a, b)
266
+ }
267
+ end
268
+
269
+ def test_links_with_dom_id
270
+ page = @mech.get("http://localhost/tc_links.html")
271
+ link = page.links_with(:dom_id => 'bold_aaron_link')
272
+ link_by_id = page.links_with(:id => 'bold_aaron_link')
273
+ assert_equal(1, link.length)
274
+ assert_equal('Aaron Patterson', link.first.text)
275
+ assert_equal(link, link_by_id)
276
+ end
277
+
278
+ def test_links_with_dom_class
279
+ page = @mech.get("http://localhost/tc_links.html")
280
+ link = page.links_with(:dom_class => 'thing_link')
281
+ link_by_class = page.links_with(:class => 'thing_link')
282
+ assert_equal(1, link.length)
283
+ assert_equal(link, link_by_class)
284
+ end
285
+
286
+ def test_link_with_encoded_space
287
+ page = @mech.get("http://localhost/tc_links.html")
288
+ link = page.link_with(:text => 'encoded space')
289
+ page = @mech.click link
290
+ end
291
+
292
+ def test_link_with_space
293
+ page = @mech.get("http://localhost/tc_links.html")
294
+ link = page.link_with(:text => 'not encoded space')
295
+ page = @mech.click link
296
+ end
297
+
298
+ def test_link_with_unusual_characters
299
+ page = @mech.get("http://localhost/tc_links.html")
300
+ link = page.link_with(:text => 'unusual characters')
301
+
302
+ @mech.click link
303
+
304
+ # HACK no assertion
305
+ end
306
+
307
+ def test_links
308
+ page = @mech.get("http://localhost/find_link.html")
309
+ assert_equal(18, page.links.length)
310
+ end
311
+
312
+ def test_links_with_bold
313
+ page = @mech.get("http://localhost/tc_links.html")
314
+ link = page.links_with(:text => /Bold Dude/)
315
+ assert_equal(1, link.length)
316
+ assert_equal('Bold Dude', link.first.text)
317
+ assert_equal [], link.first.rel
318
+ assert !link.first.rel?('me')
319
+ assert !link.first.rel?('nofollow')
320
+
321
+ link = page.links_with(:text => 'Aaron James Patterson')
322
+ assert_equal(1, link.length)
323
+ assert_equal('Aaron James Patterson', link.first.text)
324
+ assert_equal ['me'], link.first.rel
325
+ assert link.first.rel?('me')
326
+ assert !link.first.rel?('nofollow')
327
+
328
+ link = page.links_with(:text => 'Aaron Patterson')
329
+ assert_equal(1, link.length)
330
+ assert_equal('Aaron Patterson', link.first.text)
331
+ assert_equal ['me', 'nofollow'], link.first.rel
332
+ assert link.first.rel?('me')
333
+ assert link.first.rel?('nofollow')
334
+
335
+ link = page.links_with(:text => 'Ruby Rocks!')
336
+ assert_equal(1, link.length)
337
+ assert_equal('Ruby Rocks!', link.first.text)
338
+ end
339
+
340
+ def test_meta_refresh
341
+ page = @mech.get("http://localhost/find_link.html")
342
+ assert_equal(3, page.meta_refresh.length)
343
+ assert_equal(%w{
344
+ http://www.drphil.com/
345
+ http://www.upcase.com/
346
+ http://tenderlovemaking.com/ }.sort,
347
+ page.meta_refresh.map { |x| x.href.downcase }.sort)
348
+ end
349
+
350
+ def test_title
351
+ page = util_page
352
+
353
+ assert_equal('hi', page.title)
354
+ end
355
+
356
+ def test_title_none
357
+ page = util_page '' # invalid HTML
358
+
359
+ assert_equal(nil, page.title)
360
+ end
361
+
362
+ def test_page_decoded_with_charset
363
+ page = util_page @body, 'content-type' => 'text/html; charset=EUC-JP'
364
+
365
+ assert_equal 'EUC-JP', page.encoding
366
+ assert_equal 'EUC-JP', page.parser.encoding
367
+ end
368
+
369
+ def test_form
370
+ page = @mech.get("http://localhost/tc_form_action.html")
371
+
372
+ form = page.form(:name => 'post_form1')
373
+ assert form
374
+ yielded = false
375
+
376
+ form = page.form(:name => 'post_form1') { |f|
377
+ yielded = true
378
+ assert f
379
+ assert_equal(form, f)
380
+ }
381
+
382
+ assert yielded
383
+
384
+ form_by_action = page.form(:action => '/form_post?a=b&b=c')
385
+ assert form_by_action
386
+ assert_equal(form, form_by_action)
387
+ end
388
+
389
+ end
390
+