mechanize 2.7.6 → 2.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +43 -0
- data/.github/workflows/upstream.yml +51 -0
- data/.yardopts +8 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +221 -96
- data/EXAMPLES.rdoc +1 -24
- data/Gemfile +10 -4
- data/{LICENSE.rdoc → LICENSE.txt} +4 -0
- data/README.md +77 -0
- data/Rakefile +18 -3
- data/examples/latest_user_agents.rb +100 -0
- data/examples/rubygems.rb +2 -2
- data/examples/wikipedia_links_to_philosophy.rb +5 -6
- data/lib/mechanize/chunked_termination_error.rb +1 -0
- data/lib/mechanize/content_type_error.rb +1 -0
- data/lib/mechanize/cookie.rb +3 -15
- data/lib/mechanize/cookie_jar.rb +13 -9
- data/lib/mechanize/directory_saver.rb +1 -0
- data/lib/mechanize/download.rb +2 -1
- data/lib/mechanize/element_matcher.rb +1 -0
- data/lib/mechanize/element_not_found_error.rb +1 -0
- data/lib/mechanize/file.rb +2 -1
- data/lib/mechanize/file_connection.rb +5 -3
- data/lib/mechanize/file_request.rb +1 -0
- data/lib/mechanize/file_response.rb +4 -1
- data/lib/mechanize/file_saver.rb +1 -0
- data/lib/mechanize/form/button.rb +1 -0
- data/lib/mechanize/form/check_box.rb +1 -0
- data/lib/mechanize/form/field.rb +1 -0
- data/lib/mechanize/form/file_upload.rb +1 -0
- data/lib/mechanize/form/hidden.rb +1 -0
- data/lib/mechanize/form/image_button.rb +1 -0
- data/lib/mechanize/form/keygen.rb +1 -0
- data/lib/mechanize/form/multi_select_list.rb +2 -1
- data/lib/mechanize/form/option.rb +1 -0
- data/lib/mechanize/form/radio_button.rb +1 -0
- data/lib/mechanize/form/reset.rb +1 -0
- data/lib/mechanize/form/select_list.rb +1 -0
- data/lib/mechanize/form/submit.rb +1 -0
- data/lib/mechanize/form/text.rb +1 -0
- data/lib/mechanize/form/textarea.rb +1 -0
- data/lib/mechanize/form.rb +5 -13
- data/lib/mechanize/headers.rb +1 -0
- data/lib/mechanize/history.rb +1 -0
- data/lib/mechanize/http/agent.rb +83 -10
- data/lib/mechanize/http/auth_challenge.rb +1 -0
- data/lib/mechanize/http/auth_realm.rb +1 -0
- data/lib/mechanize/http/auth_store.rb +1 -0
- data/lib/mechanize/http/content_disposition_parser.rb +15 -4
- data/lib/mechanize/http/www_authenticate_parser.rb +3 -3
- data/lib/mechanize/http.rb +1 -0
- data/lib/mechanize/image.rb +1 -0
- data/lib/mechanize/page/base.rb +1 -0
- data/lib/mechanize/page/frame.rb +1 -0
- data/lib/mechanize/page/image.rb +1 -0
- data/lib/mechanize/page/label.rb +1 -0
- data/lib/mechanize/page/link.rb +8 -1
- data/lib/mechanize/page/meta_refresh.rb +1 -0
- data/lib/mechanize/page.rb +6 -8
- data/lib/mechanize/parser.rb +1 -0
- data/lib/mechanize/pluggable_parsers.rb +2 -1
- data/lib/mechanize/prependable.rb +1 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
- data/lib/mechanize/response_code_error.rb +2 -1
- data/lib/mechanize/response_read_error.rb +1 -0
- data/lib/mechanize/robots_disallowed_error.rb +1 -0
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
- data/lib/mechanize/test_case/form_servlet.rb +1 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
- data/lib/mechanize/test_case/header_servlet.rb +1 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/referer_servlet.rb +1 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
- data/lib/mechanize/test_case/robots_txt_servlet.rb +1 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/server.rb +1 -0
- data/lib/mechanize/test_case/servlets.rb +1 -0
- data/lib/mechanize/test_case/verb_servlet.rb +5 -6
- data/lib/mechanize/test_case.rb +34 -34
- data/lib/mechanize/unauthorized_error.rb +1 -0
- data/lib/mechanize/unsupported_scheme_error.rb +1 -0
- data/lib/mechanize/util.rb +2 -1
- data/lib/mechanize/version.rb +2 -1
- data/lib/mechanize/xml_file.rb +1 -0
- data/lib/mechanize.rb +56 -37
- data/mechanize.gemspec +43 -35
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/tc_links.html +1 -1
- data/test/test_mechanize.rb +21 -8
- data/test/test_mechanize_cookie.rb +38 -26
- data/test/test_mechanize_cookie_jar.rb +87 -54
- data/test/test_mechanize_directory_saver.rb +1 -0
- data/test/test_mechanize_download.rb +14 -1
- data/test/test_mechanize_element_not_found_error.rb +1 -0
- data/test/test_mechanize_file.rb +11 -0
- data/test/test_mechanize_file_connection.rb +23 -4
- data/test/test_mechanize_file_request.rb +1 -0
- data/test/test_mechanize_file_response.rb +26 -1
- data/test/test_mechanize_file_saver.rb +1 -0
- data/test/test_mechanize_form.rb +14 -1
- data/test/test_mechanize_form_check_box.rb +1 -0
- data/test/test_mechanize_form_encoding.rb +2 -1
- data/test/test_mechanize_form_field.rb +1 -0
- data/test/test_mechanize_form_file_upload.rb +1 -0
- data/test/test_mechanize_form_image_button.rb +1 -0
- data/test/test_mechanize_form_keygen.rb +2 -0
- data/test/test_mechanize_form_multi_select_list.rb +1 -0
- data/test/test_mechanize_form_option.rb +1 -0
- data/test/test_mechanize_form_radio_button.rb +1 -0
- data/test/test_mechanize_form_select_list.rb +1 -0
- data/test/test_mechanize_form_textarea.rb +1 -0
- data/test/test_mechanize_headers.rb +1 -0
- data/test/test_mechanize_history.rb +1 -0
- data/test/test_mechanize_http_agent.rb +187 -26
- data/test/test_mechanize_http_auth_challenge.rb +1 -0
- data/test/test_mechanize_http_auth_realm.rb +1 -0
- data/test/test_mechanize_http_auth_store.rb +1 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +28 -0
- data/test/test_mechanize_http_www_authenticate_parser.rb +1 -0
- data/test/test_mechanize_image.rb +1 -0
- data/test/test_mechanize_link.rb +25 -0
- data/test/test_mechanize_page.rb +15 -0
- data/test/test_mechanize_page_encoding.rb +33 -5
- data/test/test_mechanize_page_frame.rb +1 -0
- data/test/test_mechanize_page_image.rb +1 -0
- data/test/test_mechanize_page_link.rb +27 -23
- data/test/test_mechanize_page_meta_refresh.rb +1 -0
- data/test/test_mechanize_parser.rb +1 -0
- data/test/test_mechanize_pluggable_parser.rb +1 -0
- data/test/test_mechanize_redirect_limit_reached_error.rb +1 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +1 -0
- data/test/test_mechanize_response_read_error.rb +1 -0
- data/test/test_mechanize_subclass.rb +1 -0
- data/test/test_mechanize_util.rb +4 -3
- data/test/test_mechanize_xml_file.rb +1 -0
- data/test/test_multi_select.rb +1 -0
- metadata +106 -86
- data/.travis.yml +0 -36
- data/README.rdoc +0 -77
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
+
# frozen_string_literal: true
|
|
2
3
|
require 'mechanize/test_case'
|
|
3
4
|
|
|
4
5
|
# tests for Page encoding and charset and parsing
|
|
@@ -12,7 +13,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
|
12
13
|
|
|
13
14
|
@uri = URI('http://localhost/')
|
|
14
15
|
@response_headers = { 'content-type' => 'text/html' }
|
|
15
|
-
@body = '<title>hi</title>'
|
|
16
|
+
@body = +'<title>hi</title>'
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
def util_page body = @body, headers = @response_headers
|
|
@@ -118,7 +119,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
|
118
119
|
end
|
|
119
120
|
|
|
120
121
|
def test_meta_charset
|
|
121
|
-
body = '<meta http-equiv="content-type" content="text/html;charset=META">'
|
|
122
|
+
body = +'<meta http-equiv="content-type" content="text/html;charset=META">'
|
|
122
123
|
page = util_page body
|
|
123
124
|
|
|
124
125
|
assert_equal ['META'], page.meta_charset
|
|
@@ -132,7 +133,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
|
132
133
|
|
|
133
134
|
def test_encodings
|
|
134
135
|
response = {'content-type' => 'text/html;charset=HEADER'}
|
|
135
|
-
body = '<meta http-equiv="content-type" content="text/html;charset=META">'
|
|
136
|
+
body = +'<meta http-equiv="content-type" content="text/html;charset=META">'
|
|
136
137
|
@mech.default_encoding = 'DEFAULT'
|
|
137
138
|
page = util_page body, response
|
|
138
139
|
|
|
@@ -175,7 +176,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
|
175
176
|
def test_parser_encoding_when_searching_elements
|
|
176
177
|
skip "Encoding not implemented" unless have_encoding?
|
|
177
178
|
|
|
178
|
-
body = '<span id="latin1">hi</span>'
|
|
179
|
+
body = +'<span id="latin1">hi</span>'
|
|
179
180
|
page = util_page body, 'content-type' => 'text/html,charset=ISO-8859-1'
|
|
180
181
|
|
|
181
182
|
result = page.search('#latin1')
|
|
@@ -183,5 +184,32 @@ class TestMechanizePageEncoding < Mechanize::TestCase
|
|
|
183
184
|
assert_equal Encoding::UTF_8, result.text.encoding
|
|
184
185
|
end
|
|
185
186
|
|
|
186
|
-
|
|
187
|
+
def test_parser_error_message_containing_encoding_errors
|
|
188
|
+
skip if RUBY_ENGINE == 'jruby' # this is a libxml2-specific condition
|
|
189
|
+
|
|
190
|
+
# https://github.com/sparklemotion/mechanize/issues/553
|
|
191
|
+
body = +<<~EOF
|
|
192
|
+
<html>
|
|
193
|
+
<body>
|
|
194
|
+
<!--
|
|
195
|
+
## メモ
|
|
196
|
+
処理の一般化, 二重ループ, 多重ループ
|
|
197
|
+
wzxhzdk:25
|
|
198
|
+
-->
|
|
199
|
+
EOF
|
|
200
|
+
page = util_page body
|
|
201
|
+
|
|
202
|
+
# this should not raise an "invalid byte sequence in UTF-8" error while processing parsing errors
|
|
203
|
+
page.search("body")
|
|
187
204
|
|
|
205
|
+
# let's assert on the setup: a libxml2-returned parsing error itself contains an invalid character
|
|
206
|
+
# note that this problem only appears in libxml <= 2.9.10
|
|
207
|
+
error = page.parser.errors.find { |e| e.message.include?("Comment not terminated") }
|
|
208
|
+
if error
|
|
209
|
+
exception = assert_raises(ArgumentError) do
|
|
210
|
+
error.message =~ /any regex just to trigger encoding error/
|
|
211
|
+
end
|
|
212
|
+
assert_includes(exception.message, "invalid byte sequence in UTF-8")
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
# coding: utf-8
|
|
2
|
+
# frozen_string_literal: true
|
|
2
3
|
|
|
3
4
|
require 'mechanize/test_case'
|
|
4
5
|
|
|
6
|
+
puts "Nokogiri::VERSION_INFO: #{Nokogiri::VERSION_INFO}"
|
|
7
|
+
|
|
5
8
|
class TestMechanizePageLink < Mechanize::TestCase
|
|
6
9
|
|
|
7
10
|
WINDOWS_1255 = <<-HTML
|
|
@@ -9,7 +12,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
9
12
|
<title>hi</title>
|
|
10
13
|
HTML
|
|
11
14
|
|
|
12
|
-
BAD = <<-HTML
|
|
15
|
+
BAD = <<-HTML.dup
|
|
13
16
|
<meta http-equiv="content-type" content="text/html; charset=windows-1255">
|
|
14
17
|
<title>Bia\xB3ystok</title>
|
|
15
18
|
HTML
|
|
@@ -17,18 +20,16 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
17
20
|
|
|
18
21
|
SJIS_TITLE = "\x83\x65\x83\x58\x83\x67"
|
|
19
22
|
|
|
20
|
-
SJIS_AFTER_TITLE = <<-HTML
|
|
23
|
+
SJIS_AFTER_TITLE = <<-HTML.dup
|
|
21
24
|
<title>#{SJIS_TITLE}</title>
|
|
22
25
|
<meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
|
|
23
26
|
HTML
|
|
24
|
-
|
|
25
27
|
SJIS_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
|
|
26
28
|
|
|
27
|
-
SJIS_BAD_AFTER_TITLE = <<-HTML
|
|
29
|
+
SJIS_BAD_AFTER_TITLE = <<-HTML.dup
|
|
28
30
|
<title>#{SJIS_TITLE}</title>
|
|
29
31
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
|
|
30
32
|
HTML
|
|
31
|
-
|
|
32
33
|
SJIS_BAD_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
|
|
33
34
|
|
|
34
35
|
UTF8_TITLE = 'テスト'
|
|
@@ -44,20 +45,17 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
44
45
|
|
|
45
46
|
@uri = URI('http://example')
|
|
46
47
|
@res = { 'content-type' => 'text/html' }
|
|
47
|
-
@body = '<title>hi</title>'
|
|
48
|
+
@body = +'<title>hi</title>'
|
|
48
49
|
end
|
|
49
50
|
|
|
50
51
|
def util_page body = @body, res = @res
|
|
51
52
|
Mechanize::Page.new @uri, res, body && body.force_encoding(Encoding::BINARY), 200, @mech
|
|
52
53
|
end
|
|
53
54
|
|
|
54
|
-
def
|
|
55
|
-
if RUBY_ENGINE == '
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
meth = caller[0][/`(\w+)/, 1]
|
|
59
|
-
warn "#{meth}: skipped because this feature currently depends on NKF"
|
|
60
|
-
true
|
|
55
|
+
def skip_if_nkf_dependency
|
|
56
|
+
if RUBY_ENGINE == 'jruby'
|
|
57
|
+
meth = caller_locations(1,1).first.base_label
|
|
58
|
+
skip "#{meth}: skipped because this feature currently depends on NKF"
|
|
61
59
|
end
|
|
62
60
|
end
|
|
63
61
|
|
|
@@ -76,7 +74,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
76
74
|
end
|
|
77
75
|
|
|
78
76
|
def test_canonical_uri_unescaped
|
|
79
|
-
page = util_page
|
|
77
|
+
page = util_page(+<<-BODY)
|
|
80
78
|
<head>
|
|
81
79
|
<link rel="canonical" href="http://example/white space"/>
|
|
82
80
|
</head>
|
|
@@ -98,7 +96,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
98
96
|
end
|
|
99
97
|
|
|
100
98
|
def test_encoding
|
|
101
|
-
page = util_page WINDOWS_1255
|
|
99
|
+
page = util_page WINDOWS_1255.dup
|
|
102
100
|
|
|
103
101
|
assert_equal 'windows-1255', page.encoding
|
|
104
102
|
end
|
|
@@ -112,17 +110,20 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
112
110
|
end
|
|
113
111
|
|
|
114
112
|
def test_encoding_charset_after_title_bad
|
|
115
|
-
|
|
113
|
+
skip_if_nkf_dependency
|
|
116
114
|
|
|
117
|
-
|
|
115
|
+
# https://gitlab.gnome.org/GNOME/libxml2/-/issues/543
|
|
116
|
+
skip if Nokogiri.uses_libxml?([">= 2.11.0", "< 2.12.0"])
|
|
117
|
+
|
|
118
|
+
page = util_page UTF8.dup
|
|
118
119
|
|
|
119
120
|
assert_equal false, page.encoding_error?
|
|
120
121
|
|
|
121
|
-
assert_equal
|
|
122
|
+
assert_equal "UTF-8", page.encoding
|
|
122
123
|
end
|
|
123
124
|
|
|
124
125
|
def test_encoding_charset_after_title_double_bad
|
|
125
|
-
|
|
126
|
+
skip_if_nkf_dependency
|
|
126
127
|
|
|
127
128
|
page = util_page SJIS_BAD_AFTER_TITLE
|
|
128
129
|
|
|
@@ -132,9 +133,12 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
132
133
|
end
|
|
133
134
|
|
|
134
135
|
def test_encoding_charset_bad
|
|
135
|
-
|
|
136
|
+
skip_if_nkf_dependency
|
|
137
|
+
|
|
138
|
+
# https://gitlab.gnome.org/GNOME/libxml2/-/issues/543
|
|
139
|
+
skip if Nokogiri.uses_libxml?([">= 2.11.0", "< 2.12.0"])
|
|
136
140
|
|
|
137
|
-
page = util_page
|
|
141
|
+
page = util_page(+"<title>#{UTF8_TITLE}</title>")
|
|
138
142
|
page.encodings.replace %w[
|
|
139
143
|
UTF-8
|
|
140
144
|
Shift_JIS
|
|
@@ -146,7 +150,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
146
150
|
end
|
|
147
151
|
|
|
148
152
|
def test_encoding_meta_charset
|
|
149
|
-
page = util_page
|
|
153
|
+
page = util_page(+"<meta charset='UTF-8'>")
|
|
150
154
|
|
|
151
155
|
assert_equal 'UTF-8', page.encoding
|
|
152
156
|
end
|
|
@@ -337,7 +341,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
|
337
341
|
end
|
|
338
342
|
|
|
339
343
|
def test_title_none
|
|
340
|
-
page = util_page
|
|
344
|
+
page = util_page(+'') # invalid HTML
|
|
341
345
|
|
|
342
346
|
assert_nil(page.title)
|
|
343
347
|
end
|
data/test/test_mechanize_util.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# coding: utf-8
|
|
2
|
+
# frozen_string_literal: true
|
|
2
3
|
|
|
3
4
|
require 'mechanize/test_case'
|
|
4
5
|
|
|
@@ -6,7 +7,7 @@ class TestMechanizeUtil < Mechanize::TestCase
|
|
|
6
7
|
|
|
7
8
|
INPUTTED_VALUE = "テスト" # "test" in Japanese UTF-8 encoding
|
|
8
9
|
CONTENT_ENCODING = 'Shift_JIS' # one of Japanese encoding
|
|
9
|
-
ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding
|
|
10
|
+
ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".dup.force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding
|
|
10
11
|
|
|
11
12
|
ENCODING_ERRORS = [EncodingError, Encoding::ConverterNotFoundError] # and so on
|
|
12
13
|
ERROR_LOG_MESSAGE = /from_native_charset: Encoding::ConverterNotFoundError: form encoding: "UTF-eight"/
|
|
@@ -67,7 +68,7 @@ class TestMechanizeUtil < Mechanize::TestCase
|
|
|
67
68
|
end
|
|
68
69
|
|
|
69
70
|
def test_from_native_charset_logs_form_when_encoding_error_raised
|
|
70
|
-
sio = StringIO.new
|
|
71
|
+
sio = StringIO.new
|
|
71
72
|
log = Logger.new(sio)
|
|
72
73
|
log.level = Logger::DEBUG
|
|
73
74
|
|
|
@@ -79,7 +80,7 @@ class TestMechanizeUtil < Mechanize::TestCase
|
|
|
79
80
|
end
|
|
80
81
|
|
|
81
82
|
def test_from_native_charset_logs_form_when_encoding_error_is_ignored
|
|
82
|
-
sio = StringIO.new
|
|
83
|
+
sio = StringIO.new
|
|
83
84
|
log = Logger.new(sio)
|
|
84
85
|
log.level = Logger::DEBUG
|
|
85
86
|
|
data/test/test_multi_select.rb
CHANGED