mechanize 2.7.6 → 2.12.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +43 -0
- data/.github/workflows/upstream.yml +51 -0
- data/.yardopts +8 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +221 -96
- data/EXAMPLES.rdoc +1 -24
- data/Gemfile +10 -4
- data/{LICENSE.rdoc → LICENSE.txt} +4 -0
- data/README.md +77 -0
- data/Rakefile +18 -3
- data/examples/latest_user_agents.rb +100 -0
- data/examples/rubygems.rb +2 -2
- data/examples/wikipedia_links_to_philosophy.rb +5 -6
- data/lib/mechanize/chunked_termination_error.rb +1 -0
- data/lib/mechanize/content_type_error.rb +1 -0
- data/lib/mechanize/cookie.rb +3 -15
- data/lib/mechanize/cookie_jar.rb +13 -9
- data/lib/mechanize/directory_saver.rb +1 -0
- data/lib/mechanize/download.rb +2 -1
- data/lib/mechanize/element_matcher.rb +1 -0
- data/lib/mechanize/element_not_found_error.rb +1 -0
- data/lib/mechanize/file.rb +2 -1
- data/lib/mechanize/file_connection.rb +5 -3
- data/lib/mechanize/file_request.rb +1 -0
- data/lib/mechanize/file_response.rb +4 -1
- data/lib/mechanize/file_saver.rb +1 -0
- data/lib/mechanize/form/button.rb +1 -0
- data/lib/mechanize/form/check_box.rb +1 -0
- data/lib/mechanize/form/field.rb +1 -0
- data/lib/mechanize/form/file_upload.rb +1 -0
- data/lib/mechanize/form/hidden.rb +1 -0
- data/lib/mechanize/form/image_button.rb +1 -0
- data/lib/mechanize/form/keygen.rb +1 -0
- data/lib/mechanize/form/multi_select_list.rb +2 -1
- data/lib/mechanize/form/option.rb +1 -0
- data/lib/mechanize/form/radio_button.rb +1 -0
- data/lib/mechanize/form/reset.rb +1 -0
- data/lib/mechanize/form/select_list.rb +1 -0
- data/lib/mechanize/form/submit.rb +1 -0
- data/lib/mechanize/form/text.rb +1 -0
- data/lib/mechanize/form/textarea.rb +1 -0
- data/lib/mechanize/form.rb +5 -13
- data/lib/mechanize/headers.rb +1 -0
- data/lib/mechanize/history.rb +1 -0
- data/lib/mechanize/http/agent.rb +83 -10
- data/lib/mechanize/http/auth_challenge.rb +1 -0
- data/lib/mechanize/http/auth_realm.rb +1 -0
- data/lib/mechanize/http/auth_store.rb +1 -0
- data/lib/mechanize/http/content_disposition_parser.rb +15 -4
- data/lib/mechanize/http/www_authenticate_parser.rb +3 -3
- data/lib/mechanize/http.rb +1 -0
- data/lib/mechanize/image.rb +1 -0
- data/lib/mechanize/page/base.rb +1 -0
- data/lib/mechanize/page/frame.rb +1 -0
- data/lib/mechanize/page/image.rb +1 -0
- data/lib/mechanize/page/label.rb +1 -0
- data/lib/mechanize/page/link.rb +8 -1
- data/lib/mechanize/page/meta_refresh.rb +1 -0
- data/lib/mechanize/page.rb +6 -8
- data/lib/mechanize/parser.rb +1 -0
- data/lib/mechanize/pluggable_parsers.rb +2 -1
- data/lib/mechanize/prependable.rb +1 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
- data/lib/mechanize/response_code_error.rb +2 -1
- data/lib/mechanize/response_read_error.rb +1 -0
- data/lib/mechanize/robots_disallowed_error.rb +1 -0
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
- data/lib/mechanize/test_case/form_servlet.rb +1 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
- data/lib/mechanize/test_case/header_servlet.rb +1 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/referer_servlet.rb +1 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
- data/lib/mechanize/test_case/robots_txt_servlet.rb +1 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/server.rb +1 -0
- data/lib/mechanize/test_case/servlets.rb +1 -0
- data/lib/mechanize/test_case/verb_servlet.rb +5 -6
- data/lib/mechanize/test_case.rb +34 -34
- data/lib/mechanize/unauthorized_error.rb +1 -0
- data/lib/mechanize/unsupported_scheme_error.rb +1 -0
- data/lib/mechanize/util.rb +2 -1
- data/lib/mechanize/version.rb +2 -1
- data/lib/mechanize/xml_file.rb +1 -0
- data/lib/mechanize.rb +56 -37
- data/mechanize.gemspec +43 -35
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/tc_links.html +1 -1
- data/test/test_mechanize.rb +21 -8
- data/test/test_mechanize_cookie.rb +38 -26
- data/test/test_mechanize_cookie_jar.rb +87 -54
- data/test/test_mechanize_directory_saver.rb +1 -0
- data/test/test_mechanize_download.rb +14 -1
- data/test/test_mechanize_element_not_found_error.rb +1 -0
- data/test/test_mechanize_file.rb +11 -0
- data/test/test_mechanize_file_connection.rb +23 -4
- data/test/test_mechanize_file_request.rb +1 -0
- data/test/test_mechanize_file_response.rb +26 -1
- data/test/test_mechanize_file_saver.rb +1 -0
- data/test/test_mechanize_form.rb +14 -1
- data/test/test_mechanize_form_check_box.rb +1 -0
- data/test/test_mechanize_form_encoding.rb +2 -1
- data/test/test_mechanize_form_field.rb +1 -0
- data/test/test_mechanize_form_file_upload.rb +1 -0
- data/test/test_mechanize_form_image_button.rb +1 -0
- data/test/test_mechanize_form_keygen.rb +2 -0
- data/test/test_mechanize_form_multi_select_list.rb +1 -0
- data/test/test_mechanize_form_option.rb +1 -0
- data/test/test_mechanize_form_radio_button.rb +1 -0
- data/test/test_mechanize_form_select_list.rb +1 -0
- data/test/test_mechanize_form_textarea.rb +1 -0
- data/test/test_mechanize_headers.rb +1 -0
- data/test/test_mechanize_history.rb +1 -0
- data/test/test_mechanize_http_agent.rb +187 -26
- data/test/test_mechanize_http_auth_challenge.rb +1 -0
- data/test/test_mechanize_http_auth_realm.rb +1 -0
- data/test/test_mechanize_http_auth_store.rb +1 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +28 -0
- data/test/test_mechanize_http_www_authenticate_parser.rb +1 -0
- data/test/test_mechanize_image.rb +1 -0
- data/test/test_mechanize_link.rb +25 -0
- data/test/test_mechanize_page.rb +15 -0
- data/test/test_mechanize_page_encoding.rb +33 -5
- data/test/test_mechanize_page_frame.rb +1 -0
- data/test/test_mechanize_page_image.rb +1 -0
- data/test/test_mechanize_page_link.rb +27 -23
- data/test/test_mechanize_page_meta_refresh.rb +1 -0
- data/test/test_mechanize_parser.rb +1 -0
- data/test/test_mechanize_pluggable_parser.rb +1 -0
- data/test/test_mechanize_redirect_limit_reached_error.rb +1 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +1 -0
- data/test/test_mechanize_response_read_error.rb +1 -0
- data/test/test_mechanize_subclass.rb +1 -0
- data/test/test_mechanize_util.rb +4 -3
- data/test/test_mechanize_xml_file.rb +1 -0
- data/test/test_multi_select.rb +1 -0
- metadata +106 -86
- data/.travis.yml +0 -36
- data/README.rdoc +0 -77
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
require 'mechanize/test_case'
|
3
4
|
|
4
5
|
# tests for Page encoding and charset and parsing
|
@@ -12,7 +13,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
12
13
|
|
13
14
|
@uri = URI('http://localhost/')
|
14
15
|
@response_headers = { 'content-type' => 'text/html' }
|
15
|
-
@body = '<title>hi</title>'
|
16
|
+
@body = +'<title>hi</title>'
|
16
17
|
end
|
17
18
|
|
18
19
|
def util_page body = @body, headers = @response_headers
|
@@ -118,7 +119,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
118
119
|
end
|
119
120
|
|
120
121
|
def test_meta_charset
|
121
|
-
body = '<meta http-equiv="content-type" content="text/html;charset=META">'
|
122
|
+
body = +'<meta http-equiv="content-type" content="text/html;charset=META">'
|
122
123
|
page = util_page body
|
123
124
|
|
124
125
|
assert_equal ['META'], page.meta_charset
|
@@ -132,7 +133,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
132
133
|
|
133
134
|
def test_encodings
|
134
135
|
response = {'content-type' => 'text/html;charset=HEADER'}
|
135
|
-
body = '<meta http-equiv="content-type" content="text/html;charset=META">'
|
136
|
+
body = +'<meta http-equiv="content-type" content="text/html;charset=META">'
|
136
137
|
@mech.default_encoding = 'DEFAULT'
|
137
138
|
page = util_page body, response
|
138
139
|
|
@@ -175,7 +176,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
175
176
|
def test_parser_encoding_when_searching_elements
|
176
177
|
skip "Encoding not implemented" unless have_encoding?
|
177
178
|
|
178
|
-
body = '<span id="latin1">hi</span>'
|
179
|
+
body = +'<span id="latin1">hi</span>'
|
179
180
|
page = util_page body, 'content-type' => 'text/html,charset=ISO-8859-1'
|
180
181
|
|
181
182
|
result = page.search('#latin1')
|
@@ -183,5 +184,32 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
183
184
|
assert_equal Encoding::UTF_8, result.text.encoding
|
184
185
|
end
|
185
186
|
|
186
|
-
|
187
|
+
def test_parser_error_message_containing_encoding_errors
|
188
|
+
skip if RUBY_ENGINE == 'jruby' # this is a libxml2-specific condition
|
189
|
+
|
190
|
+
# https://github.com/sparklemotion/mechanize/issues/553
|
191
|
+
body = +<<~EOF
|
192
|
+
<html>
|
193
|
+
<body>
|
194
|
+
<!--
|
195
|
+
## メモ
|
196
|
+
処理の一般化, 二重ループ, 多重ループ
|
197
|
+
wzxhzdk:25
|
198
|
+
-->
|
199
|
+
EOF
|
200
|
+
page = util_page body
|
201
|
+
|
202
|
+
# this should not raise an "invalid byte sequence in UTF-8" error while processing parsing errors
|
203
|
+
page.search("body")
|
187
204
|
|
205
|
+
# let's assert on the setup: a libxml2-returned parsing error itself contains an invalid character
|
206
|
+
# note that this problem only appears in libxml <= 2.9.10
|
207
|
+
error = page.parser.errors.find { |e| e.message.include?("Comment not terminated") }
|
208
|
+
if error
|
209
|
+
exception = assert_raises(ArgumentError) do
|
210
|
+
error.message =~ /any regex just to trigger encoding error/
|
211
|
+
end
|
212
|
+
assert_includes(exception.message, "invalid byte sequence in UTF-8")
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
@@ -1,7 +1,10 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'mechanize/test_case'
|
4
5
|
|
6
|
+
puts "Nokogiri::VERSION_INFO: #{Nokogiri::VERSION_INFO}"
|
7
|
+
|
5
8
|
class TestMechanizePageLink < Mechanize::TestCase
|
6
9
|
|
7
10
|
WINDOWS_1255 = <<-HTML
|
@@ -9,7 +12,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
9
12
|
<title>hi</title>
|
10
13
|
HTML
|
11
14
|
|
12
|
-
BAD = <<-HTML
|
15
|
+
BAD = <<-HTML.dup
|
13
16
|
<meta http-equiv="content-type" content="text/html; charset=windows-1255">
|
14
17
|
<title>Bia\xB3ystok</title>
|
15
18
|
HTML
|
@@ -17,18 +20,16 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
17
20
|
|
18
21
|
SJIS_TITLE = "\x83\x65\x83\x58\x83\x67"
|
19
22
|
|
20
|
-
SJIS_AFTER_TITLE = <<-HTML
|
23
|
+
SJIS_AFTER_TITLE = <<-HTML.dup
|
21
24
|
<title>#{SJIS_TITLE}</title>
|
22
25
|
<meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
|
23
26
|
HTML
|
24
|
-
|
25
27
|
SJIS_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
|
26
28
|
|
27
|
-
SJIS_BAD_AFTER_TITLE = <<-HTML
|
29
|
+
SJIS_BAD_AFTER_TITLE = <<-HTML.dup
|
28
30
|
<title>#{SJIS_TITLE}</title>
|
29
31
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
30
32
|
HTML
|
31
|
-
|
32
33
|
SJIS_BAD_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
|
33
34
|
|
34
35
|
UTF8_TITLE = 'テスト'
|
@@ -44,20 +45,17 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
44
45
|
|
45
46
|
@uri = URI('http://example')
|
46
47
|
@res = { 'content-type' => 'text/html' }
|
47
|
-
@body = '<title>hi</title>'
|
48
|
+
@body = +'<title>hi</title>'
|
48
49
|
end
|
49
50
|
|
50
51
|
def util_page body = @body, res = @res
|
51
52
|
Mechanize::Page.new @uri, res, body && body.force_encoding(Encoding::BINARY), 200, @mech
|
52
53
|
end
|
53
54
|
|
54
|
-
def
|
55
|
-
if RUBY_ENGINE == '
|
56
|
-
|
57
|
-
|
58
|
-
meth = caller[0][/`(\w+)/, 1]
|
59
|
-
warn "#{meth}: skipped because this feature currently depends on NKF"
|
60
|
-
true
|
55
|
+
def skip_if_nkf_dependency
|
56
|
+
if RUBY_ENGINE == 'jruby'
|
57
|
+
meth = caller_locations(1,1).first.base_label
|
58
|
+
skip "#{meth}: skipped because this feature currently depends on NKF"
|
61
59
|
end
|
62
60
|
end
|
63
61
|
|
@@ -76,7 +74,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
76
74
|
end
|
77
75
|
|
78
76
|
def test_canonical_uri_unescaped
|
79
|
-
page = util_page
|
77
|
+
page = util_page(+<<-BODY)
|
80
78
|
<head>
|
81
79
|
<link rel="canonical" href="http://example/white space"/>
|
82
80
|
</head>
|
@@ -98,7 +96,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
98
96
|
end
|
99
97
|
|
100
98
|
def test_encoding
|
101
|
-
page = util_page WINDOWS_1255
|
99
|
+
page = util_page WINDOWS_1255.dup
|
102
100
|
|
103
101
|
assert_equal 'windows-1255', page.encoding
|
104
102
|
end
|
@@ -112,17 +110,20 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
112
110
|
end
|
113
111
|
|
114
112
|
def test_encoding_charset_after_title_bad
|
115
|
-
|
113
|
+
skip_if_nkf_dependency
|
116
114
|
|
117
|
-
|
115
|
+
# https://gitlab.gnome.org/GNOME/libxml2/-/issues/543
|
116
|
+
skip if Nokogiri.uses_libxml?([">= 2.11.0", "< 2.12.0"])
|
117
|
+
|
118
|
+
page = util_page UTF8.dup
|
118
119
|
|
119
120
|
assert_equal false, page.encoding_error?
|
120
121
|
|
121
|
-
assert_equal
|
122
|
+
assert_equal "UTF-8", page.encoding
|
122
123
|
end
|
123
124
|
|
124
125
|
def test_encoding_charset_after_title_double_bad
|
125
|
-
|
126
|
+
skip_if_nkf_dependency
|
126
127
|
|
127
128
|
page = util_page SJIS_BAD_AFTER_TITLE
|
128
129
|
|
@@ -132,9 +133,12 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
132
133
|
end
|
133
134
|
|
134
135
|
def test_encoding_charset_bad
|
135
|
-
|
136
|
+
skip_if_nkf_dependency
|
137
|
+
|
138
|
+
# https://gitlab.gnome.org/GNOME/libxml2/-/issues/543
|
139
|
+
skip if Nokogiri.uses_libxml?([">= 2.11.0", "< 2.12.0"])
|
136
140
|
|
137
|
-
page = util_page
|
141
|
+
page = util_page(+"<title>#{UTF8_TITLE}</title>")
|
138
142
|
page.encodings.replace %w[
|
139
143
|
UTF-8
|
140
144
|
Shift_JIS
|
@@ -146,7 +150,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
146
150
|
end
|
147
151
|
|
148
152
|
def test_encoding_meta_charset
|
149
|
-
page = util_page
|
153
|
+
page = util_page(+"<meta charset='UTF-8'>")
|
150
154
|
|
151
155
|
assert_equal 'UTF-8', page.encoding
|
152
156
|
end
|
@@ -337,7 +341,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
337
341
|
end
|
338
342
|
|
339
343
|
def test_title_none
|
340
|
-
page = util_page
|
344
|
+
page = util_page(+'') # invalid HTML
|
341
345
|
|
342
346
|
assert_nil(page.title)
|
343
347
|
end
|
data/test/test_mechanize_util.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'mechanize/test_case'
|
4
5
|
|
@@ -6,7 +7,7 @@ class TestMechanizeUtil < Mechanize::TestCase
|
|
6
7
|
|
7
8
|
INPUTTED_VALUE = "テスト" # "test" in Japanese UTF-8 encoding
|
8
9
|
CONTENT_ENCODING = 'Shift_JIS' # one of Japanese encoding
|
9
|
-
ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding
|
10
|
+
ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".dup.force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding
|
10
11
|
|
11
12
|
ENCODING_ERRORS = [EncodingError, Encoding::ConverterNotFoundError] # and so on
|
12
13
|
ERROR_LOG_MESSAGE = /from_native_charset: Encoding::ConverterNotFoundError: form encoding: "UTF-eight"/
|
@@ -67,7 +68,7 @@ class TestMechanizeUtil < Mechanize::TestCase
|
|
67
68
|
end
|
68
69
|
|
69
70
|
def test_from_native_charset_logs_form_when_encoding_error_raised
|
70
|
-
sio = StringIO.new
|
71
|
+
sio = StringIO.new
|
71
72
|
log = Logger.new(sio)
|
72
73
|
log.level = Logger::DEBUG
|
73
74
|
|
@@ -79,7 +80,7 @@ class TestMechanizeUtil < Mechanize::TestCase
|
|
79
80
|
end
|
80
81
|
|
81
82
|
def test_from_native_charset_logs_form_when_encoding_error_is_ignored
|
82
|
-
sio = StringIO.new
|
83
|
+
sio = StringIO.new
|
83
84
|
log = Logger.new(sio)
|
84
85
|
log.level = Logger::DEBUG
|
85
86
|
|
data/test/test_multi_select.rb
CHANGED