mechanize 2.7.6 → 2.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. checksums.yaml +5 -5
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +43 -0
  4. data/.github/workflows/upstream.yml +51 -0
  5. data/.yardopts +8 -0
  6. data/{CHANGELOG.rdoc → CHANGELOG.md} +221 -96
  7. data/EXAMPLES.rdoc +1 -24
  8. data/Gemfile +10 -4
  9. data/{LICENSE.rdoc → LICENSE.txt} +4 -0
  10. data/README.md +77 -0
  11. data/Rakefile +18 -3
  12. data/examples/latest_user_agents.rb +100 -0
  13. data/examples/rubygems.rb +2 -2
  14. data/examples/wikipedia_links_to_philosophy.rb +5 -6
  15. data/lib/mechanize/chunked_termination_error.rb +1 -0
  16. data/lib/mechanize/content_type_error.rb +1 -0
  17. data/lib/mechanize/cookie.rb +3 -15
  18. data/lib/mechanize/cookie_jar.rb +13 -9
  19. data/lib/mechanize/directory_saver.rb +1 -0
  20. data/lib/mechanize/download.rb +2 -1
  21. data/lib/mechanize/element_matcher.rb +1 -0
  22. data/lib/mechanize/element_not_found_error.rb +1 -0
  23. data/lib/mechanize/file.rb +2 -1
  24. data/lib/mechanize/file_connection.rb +5 -3
  25. data/lib/mechanize/file_request.rb +1 -0
  26. data/lib/mechanize/file_response.rb +4 -1
  27. data/lib/mechanize/file_saver.rb +1 -0
  28. data/lib/mechanize/form/button.rb +1 -0
  29. data/lib/mechanize/form/check_box.rb +1 -0
  30. data/lib/mechanize/form/field.rb +1 -0
  31. data/lib/mechanize/form/file_upload.rb +1 -0
  32. data/lib/mechanize/form/hidden.rb +1 -0
  33. data/lib/mechanize/form/image_button.rb +1 -0
  34. data/lib/mechanize/form/keygen.rb +1 -0
  35. data/lib/mechanize/form/multi_select_list.rb +2 -1
  36. data/lib/mechanize/form/option.rb +1 -0
  37. data/lib/mechanize/form/radio_button.rb +1 -0
  38. data/lib/mechanize/form/reset.rb +1 -0
  39. data/lib/mechanize/form/select_list.rb +1 -0
  40. data/lib/mechanize/form/submit.rb +1 -0
  41. data/lib/mechanize/form/text.rb +1 -0
  42. data/lib/mechanize/form/textarea.rb +1 -0
  43. data/lib/mechanize/form.rb +5 -13
  44. data/lib/mechanize/headers.rb +1 -0
  45. data/lib/mechanize/history.rb +1 -0
  46. data/lib/mechanize/http/agent.rb +83 -10
  47. data/lib/mechanize/http/auth_challenge.rb +1 -0
  48. data/lib/mechanize/http/auth_realm.rb +1 -0
  49. data/lib/mechanize/http/auth_store.rb +1 -0
  50. data/lib/mechanize/http/content_disposition_parser.rb +15 -4
  51. data/lib/mechanize/http/www_authenticate_parser.rb +3 -3
  52. data/lib/mechanize/http.rb +1 -0
  53. data/lib/mechanize/image.rb +1 -0
  54. data/lib/mechanize/page/base.rb +1 -0
  55. data/lib/mechanize/page/frame.rb +1 -0
  56. data/lib/mechanize/page/image.rb +1 -0
  57. data/lib/mechanize/page/label.rb +1 -0
  58. data/lib/mechanize/page/link.rb +8 -1
  59. data/lib/mechanize/page/meta_refresh.rb +1 -0
  60. data/lib/mechanize/page.rb +6 -8
  61. data/lib/mechanize/parser.rb +1 -0
  62. data/lib/mechanize/pluggable_parsers.rb +2 -1
  63. data/lib/mechanize/prependable.rb +1 -0
  64. data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
  65. data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
  66. data/lib/mechanize/response_code_error.rb +2 -1
  67. data/lib/mechanize/response_read_error.rb +1 -0
  68. data/lib/mechanize/robots_disallowed_error.rb +1 -0
  69. data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
  70. data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
  71. data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
  72. data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
  73. data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
  74. data/lib/mechanize/test_case/form_servlet.rb +1 -0
  75. data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
  76. data/lib/mechanize/test_case/header_servlet.rb +1 -0
  77. data/lib/mechanize/test_case/http_refresh_servlet.rb +1 -0
  78. data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
  79. data/lib/mechanize/test_case/infinite_refresh_servlet.rb +1 -0
  80. data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
  81. data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
  82. data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
  83. data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
  84. data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
  85. data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
  86. data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
  87. data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
  88. data/lib/mechanize/test_case/referer_servlet.rb +1 -0
  89. data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
  90. data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
  91. data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
  92. data/lib/mechanize/test_case/robots_txt_servlet.rb +1 -0
  93. data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
  94. data/lib/mechanize/test_case/server.rb +1 -0
  95. data/lib/mechanize/test_case/servlets.rb +1 -0
  96. data/lib/mechanize/test_case/verb_servlet.rb +5 -6
  97. data/lib/mechanize/test_case.rb +34 -34
  98. data/lib/mechanize/unauthorized_error.rb +1 -0
  99. data/lib/mechanize/unsupported_scheme_error.rb +1 -0
  100. data/lib/mechanize/util.rb +2 -1
  101. data/lib/mechanize/version.rb +2 -1
  102. data/lib/mechanize/xml_file.rb +1 -0
  103. data/lib/mechanize.rb +56 -37
  104. data/mechanize.gemspec +43 -35
  105. data/test/htdocs/dir with spaces/foo.html +1 -0
  106. data/test/htdocs/tc_links.html +1 -1
  107. data/test/test_mechanize.rb +21 -8
  108. data/test/test_mechanize_cookie.rb +38 -26
  109. data/test/test_mechanize_cookie_jar.rb +87 -54
  110. data/test/test_mechanize_directory_saver.rb +1 -0
  111. data/test/test_mechanize_download.rb +14 -1
  112. data/test/test_mechanize_element_not_found_error.rb +1 -0
  113. data/test/test_mechanize_file.rb +11 -0
  114. data/test/test_mechanize_file_connection.rb +23 -4
  115. data/test/test_mechanize_file_request.rb +1 -0
  116. data/test/test_mechanize_file_response.rb +26 -1
  117. data/test/test_mechanize_file_saver.rb +1 -0
  118. data/test/test_mechanize_form.rb +14 -1
  119. data/test/test_mechanize_form_check_box.rb +1 -0
  120. data/test/test_mechanize_form_encoding.rb +2 -1
  121. data/test/test_mechanize_form_field.rb +1 -0
  122. data/test/test_mechanize_form_file_upload.rb +1 -0
  123. data/test/test_mechanize_form_image_button.rb +1 -0
  124. data/test/test_mechanize_form_keygen.rb +2 -0
  125. data/test/test_mechanize_form_multi_select_list.rb +1 -0
  126. data/test/test_mechanize_form_option.rb +1 -0
  127. data/test/test_mechanize_form_radio_button.rb +1 -0
  128. data/test/test_mechanize_form_select_list.rb +1 -0
  129. data/test/test_mechanize_form_textarea.rb +1 -0
  130. data/test/test_mechanize_headers.rb +1 -0
  131. data/test/test_mechanize_history.rb +1 -0
  132. data/test/test_mechanize_http_agent.rb +187 -26
  133. data/test/test_mechanize_http_auth_challenge.rb +1 -0
  134. data/test/test_mechanize_http_auth_realm.rb +1 -0
  135. data/test/test_mechanize_http_auth_store.rb +1 -0
  136. data/test/test_mechanize_http_content_disposition_parser.rb +28 -0
  137. data/test/test_mechanize_http_www_authenticate_parser.rb +1 -0
  138. data/test/test_mechanize_image.rb +1 -0
  139. data/test/test_mechanize_link.rb +25 -0
  140. data/test/test_mechanize_page.rb +15 -0
  141. data/test/test_mechanize_page_encoding.rb +33 -5
  142. data/test/test_mechanize_page_frame.rb +1 -0
  143. data/test/test_mechanize_page_image.rb +1 -0
  144. data/test/test_mechanize_page_link.rb +27 -23
  145. data/test/test_mechanize_page_meta_refresh.rb +1 -0
  146. data/test/test_mechanize_parser.rb +1 -0
  147. data/test/test_mechanize_pluggable_parser.rb +1 -0
  148. data/test/test_mechanize_redirect_limit_reached_error.rb +1 -0
  149. data/test/test_mechanize_redirect_not_get_or_head_error.rb +1 -0
  150. data/test/test_mechanize_response_read_error.rb +1 -0
  151. data/test/test_mechanize_subclass.rb +1 -0
  152. data/test/test_mechanize_util.rb +4 -3
  153. data/test/test_mechanize_xml_file.rb +1 -0
  154. data/test/test_multi_select.rb +1 -0
  155. metadata +106 -86
  156. data/.travis.yml +0 -36
  157. data/README.rdoc +0 -77
@@ -1,4 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
  require 'mechanize/test_case'
3
4
 
4
5
  # tests for Page encoding and charset and parsing
@@ -12,7 +13,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
12
13
 
13
14
  @uri = URI('http://localhost/')
14
15
  @response_headers = { 'content-type' => 'text/html' }
15
- @body = '<title>hi</title>'
16
+ @body = +'<title>hi</title>'
16
17
  end
17
18
 
18
19
  def util_page body = @body, headers = @response_headers
@@ -118,7 +119,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
118
119
  end
119
120
 
120
121
  def test_meta_charset
121
- body = '<meta http-equiv="content-type" content="text/html;charset=META">'
122
+ body = +'<meta http-equiv="content-type" content="text/html;charset=META">'
122
123
  page = util_page body
123
124
 
124
125
  assert_equal ['META'], page.meta_charset
@@ -132,7 +133,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
132
133
 
133
134
  def test_encodings
134
135
  response = {'content-type' => 'text/html;charset=HEADER'}
135
- body = '<meta http-equiv="content-type" content="text/html;charset=META">'
136
+ body = +'<meta http-equiv="content-type" content="text/html;charset=META">'
136
137
  @mech.default_encoding = 'DEFAULT'
137
138
  page = util_page body, response
138
139
 
@@ -175,7 +176,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
175
176
  def test_parser_encoding_when_searching_elements
176
177
  skip "Encoding not implemented" unless have_encoding?
177
178
 
178
- body = '<span id="latin1">hi</span>'
179
+ body = +'<span id="latin1">hi</span>'
179
180
  page = util_page body, 'content-type' => 'text/html,charset=ISO-8859-1'
180
181
 
181
182
  result = page.search('#latin1')
@@ -183,5 +184,32 @@ class TestMechanizePageEncoding < Mechanize::TestCase
183
184
  assert_equal Encoding::UTF_8, result.text.encoding
184
185
  end
185
186
 
186
- end
187
+ def test_parser_error_message_containing_encoding_errors
188
+ skip if RUBY_ENGINE == 'jruby' # this is a libxml2-specific condition
189
+
190
+ # https://github.com/sparklemotion/mechanize/issues/553
191
+ body = +<<~EOF
192
+ <html>
193
+ <body>
194
+ <!--
195
+ ## メモ
196
+ 処理の一般化, 二重ループ, 多重ループ
197
+ wzxhzdk:25
198
+ -->
199
+ EOF
200
+ page = util_page body
201
+
202
+ # this should not raise an "invalid byte sequence in UTF-8" error while processing parsing errors
203
+ page.search("body")
187
204
 
205
+ # let's assert on the setup: a libxml2-returned parsing error itself contains an invalid character
206
+ # note that this problem only appears in libxml <= 2.9.10
207
+ error = page.parser.errors.find { |e| e.message.include?("Comment not terminated") }
208
+ if error
209
+ exception = assert_raises(ArgumentError) do
210
+ error.message =~ /any regex just to trigger encoding error/
211
+ end
212
+ assert_includes(exception.message, "invalid byte sequence in UTF-8")
213
+ end
214
+ end
215
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizePageFrame < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizePageImage < Mechanize::TestCase
@@ -1,7 +1,10 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'mechanize/test_case'
4
5
 
6
+ puts "Nokogiri::VERSION_INFO: #{Nokogiri::VERSION_INFO}"
7
+
5
8
  class TestMechanizePageLink < Mechanize::TestCase
6
9
 
7
10
  WINDOWS_1255 = <<-HTML
@@ -9,7 +12,7 @@ class TestMechanizePageLink < Mechanize::TestCase
9
12
  <title>hi</title>
10
13
  HTML
11
14
 
12
- BAD = <<-HTML
15
+ BAD = <<-HTML.dup
13
16
  <meta http-equiv="content-type" content="text/html; charset=windows-1255">
14
17
  <title>Bia\xB3ystok</title>
15
18
  HTML
@@ -17,18 +20,16 @@ class TestMechanizePageLink < Mechanize::TestCase
17
20
 
18
21
  SJIS_TITLE = "\x83\x65\x83\x58\x83\x67"
19
22
 
20
- SJIS_AFTER_TITLE = <<-HTML
23
+ SJIS_AFTER_TITLE = <<-HTML.dup
21
24
  <title>#{SJIS_TITLE}</title>
22
25
  <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
23
26
  HTML
24
-
25
27
  SJIS_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
26
28
 
27
- SJIS_BAD_AFTER_TITLE = <<-HTML
29
+ SJIS_BAD_AFTER_TITLE = <<-HTML.dup
28
30
  <title>#{SJIS_TITLE}</title>
29
31
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
30
32
  HTML
31
-
32
33
  SJIS_BAD_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
33
34
 
34
35
  UTF8_TITLE = 'テスト'
@@ -44,20 +45,17 @@ class TestMechanizePageLink < Mechanize::TestCase
44
45
 
45
46
  @uri = URI('http://example')
46
47
  @res = { 'content-type' => 'text/html' }
47
- @body = '<title>hi</title>'
48
+ @body = +'<title>hi</title>'
48
49
  end
49
50
 
50
51
  def util_page body = @body, res = @res
51
52
  Mechanize::Page.new @uri, res, body && body.force_encoding(Encoding::BINARY), 200, @mech
52
53
  end
53
54
 
54
- def nkf_dependency?
55
- if RUBY_ENGINE == 'ruby'
56
- false
57
- else
58
- meth = caller[0][/`(\w+)/, 1]
59
- warn "#{meth}: skipped because this feature currently depends on NKF"
60
- true
55
+ def skip_if_nkf_dependency
56
+ if RUBY_ENGINE == 'jruby'
57
+ meth = caller_locations(1,1).first.base_label
58
+ skip "#{meth}: skipped because this feature currently depends on NKF"
61
59
  end
62
60
  end
63
61
 
@@ -76,7 +74,7 @@ class TestMechanizePageLink < Mechanize::TestCase
76
74
  end
77
75
 
78
76
  def test_canonical_uri_unescaped
79
- page = util_page <<-BODY
77
+ page = util_page(+<<-BODY)
80
78
  <head>
81
79
  <link rel="canonical" href="http://example/white space"/>
82
80
  </head>
@@ -98,7 +96,7 @@ class TestMechanizePageLink < Mechanize::TestCase
98
96
  end
99
97
 
100
98
  def test_encoding
101
- page = util_page WINDOWS_1255
99
+ page = util_page WINDOWS_1255.dup
102
100
 
103
101
  assert_equal 'windows-1255', page.encoding
104
102
  end
@@ -112,17 +110,20 @@ class TestMechanizePageLink < Mechanize::TestCase
112
110
  end
113
111
 
114
112
  def test_encoding_charset_after_title_bad
115
- return if nkf_dependency?
113
+ skip_if_nkf_dependency
116
114
 
117
- page = util_page UTF8
115
+ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/543
116
+ skip if Nokogiri.uses_libxml?([">= 2.11.0", "< 2.12.0"])
117
+
118
+ page = util_page UTF8.dup
118
119
 
119
120
  assert_equal false, page.encoding_error?
120
121
 
121
- assert_equal 'UTF-8', page.encoding
122
+ assert_equal "UTF-8", page.encoding
122
123
  end
123
124
 
124
125
  def test_encoding_charset_after_title_double_bad
125
- return if nkf_dependency?
126
+ skip_if_nkf_dependency
126
127
 
127
128
  page = util_page SJIS_BAD_AFTER_TITLE
128
129
 
@@ -132,9 +133,12 @@ class TestMechanizePageLink < Mechanize::TestCase
132
133
  end
133
134
 
134
135
  def test_encoding_charset_bad
135
- return if nkf_dependency?
136
+ skip_if_nkf_dependency
137
+
138
+ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/543
139
+ skip if Nokogiri.uses_libxml?([">= 2.11.0", "< 2.12.0"])
136
140
 
137
- page = util_page "<title>#{UTF8_TITLE}</title>"
141
+ page = util_page(+"<title>#{UTF8_TITLE}</title>")
138
142
  page.encodings.replace %w[
139
143
  UTF-8
140
144
  Shift_JIS
@@ -146,7 +150,7 @@ class TestMechanizePageLink < Mechanize::TestCase
146
150
  end
147
151
 
148
152
  def test_encoding_meta_charset
149
- page = util_page "<meta charset='UTF-8'>"
153
+ page = util_page(+"<meta charset='UTF-8'>")
150
154
 
151
155
  assert_equal 'UTF-8', page.encoding
152
156
  end
@@ -337,7 +341,7 @@ class TestMechanizePageLink < Mechanize::TestCase
337
341
  end
338
342
 
339
343
  def test_title_none
340
- page = util_page '' # invalid HTML
344
+ page = util_page(+'') # invalid HTML
341
345
 
342
346
  assert_nil(page.title)
343
347
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizePageMetaRefresh < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeParser < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizePluggableParser < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeRedirectLimitReachedError < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeRedirectNotGetOrHead < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeResponseReadError < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeSubclass < Mechanize::TestCase
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'mechanize/test_case'
4
5
 
@@ -6,7 +7,7 @@ class TestMechanizeUtil < Mechanize::TestCase
6
7
 
7
8
  INPUTTED_VALUE = "テスト" # "test" in Japanese UTF-8 encoding
8
9
  CONTENT_ENCODING = 'Shift_JIS' # one of Japanese encoding
9
- ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding
10
+ ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".dup.force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding
10
11
 
11
12
  ENCODING_ERRORS = [EncodingError, Encoding::ConverterNotFoundError] # and so on
12
13
  ERROR_LOG_MESSAGE = /from_native_charset: Encoding::ConverterNotFoundError: form encoding: "UTF-eight"/
@@ -67,7 +68,7 @@ class TestMechanizeUtil < Mechanize::TestCase
67
68
  end
68
69
 
69
70
  def test_from_native_charset_logs_form_when_encoding_error_raised
70
- sio = StringIO.new("")
71
+ sio = StringIO.new
71
72
  log = Logger.new(sio)
72
73
  log.level = Logger::DEBUG
73
74
 
@@ -79,7 +80,7 @@ class TestMechanizeUtil < Mechanize::TestCase
79
80
  end
80
81
 
81
82
  def test_from_native_charset_logs_form_when_encoding_error_is_ignored
82
- sio = StringIO.new("")
83
+ sio = StringIO.new
83
84
  log = Logger.new(sio)
84
85
  log.level = Logger::DEBUG
85
86
 
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeXmlFile < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class MultiSelectTest < Mechanize::TestCase