mechanize 2.7.6 → 2.12.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (157) hide show
  1. checksums.yaml +5 -5
  2. data/.github/dependabot.yml +11 -0
  3. data/.github/workflows/ci.yml +43 -0
  4. data/.github/workflows/upstream.yml +51 -0
  5. data/.yardopts +8 -0
  6. data/{CHANGELOG.rdoc → CHANGELOG.md} +221 -96
  7. data/EXAMPLES.rdoc +1 -24
  8. data/Gemfile +10 -4
  9. data/{LICENSE.rdoc → LICENSE.txt} +4 -0
  10. data/README.md +77 -0
  11. data/Rakefile +18 -3
  12. data/examples/latest_user_agents.rb +100 -0
  13. data/examples/rubygems.rb +2 -2
  14. data/examples/wikipedia_links_to_philosophy.rb +5 -6
  15. data/lib/mechanize/chunked_termination_error.rb +1 -0
  16. data/lib/mechanize/content_type_error.rb +1 -0
  17. data/lib/mechanize/cookie.rb +3 -15
  18. data/lib/mechanize/cookie_jar.rb +13 -9
  19. data/lib/mechanize/directory_saver.rb +1 -0
  20. data/lib/mechanize/download.rb +2 -1
  21. data/lib/mechanize/element_matcher.rb +1 -0
  22. data/lib/mechanize/element_not_found_error.rb +1 -0
  23. data/lib/mechanize/file.rb +2 -1
  24. data/lib/mechanize/file_connection.rb +5 -3
  25. data/lib/mechanize/file_request.rb +1 -0
  26. data/lib/mechanize/file_response.rb +4 -1
  27. data/lib/mechanize/file_saver.rb +1 -0
  28. data/lib/mechanize/form/button.rb +1 -0
  29. data/lib/mechanize/form/check_box.rb +1 -0
  30. data/lib/mechanize/form/field.rb +1 -0
  31. data/lib/mechanize/form/file_upload.rb +1 -0
  32. data/lib/mechanize/form/hidden.rb +1 -0
  33. data/lib/mechanize/form/image_button.rb +1 -0
  34. data/lib/mechanize/form/keygen.rb +1 -0
  35. data/lib/mechanize/form/multi_select_list.rb +2 -1
  36. data/lib/mechanize/form/option.rb +1 -0
  37. data/lib/mechanize/form/radio_button.rb +1 -0
  38. data/lib/mechanize/form/reset.rb +1 -0
  39. data/lib/mechanize/form/select_list.rb +1 -0
  40. data/lib/mechanize/form/submit.rb +1 -0
  41. data/lib/mechanize/form/text.rb +1 -0
  42. data/lib/mechanize/form/textarea.rb +1 -0
  43. data/lib/mechanize/form.rb +5 -13
  44. data/lib/mechanize/headers.rb +1 -0
  45. data/lib/mechanize/history.rb +1 -0
  46. data/lib/mechanize/http/agent.rb +83 -10
  47. data/lib/mechanize/http/auth_challenge.rb +1 -0
  48. data/lib/mechanize/http/auth_realm.rb +1 -0
  49. data/lib/mechanize/http/auth_store.rb +1 -0
  50. data/lib/mechanize/http/content_disposition_parser.rb +15 -4
  51. data/lib/mechanize/http/www_authenticate_parser.rb +3 -3
  52. data/lib/mechanize/http.rb +1 -0
  53. data/lib/mechanize/image.rb +1 -0
  54. data/lib/mechanize/page/base.rb +1 -0
  55. data/lib/mechanize/page/frame.rb +1 -0
  56. data/lib/mechanize/page/image.rb +1 -0
  57. data/lib/mechanize/page/label.rb +1 -0
  58. data/lib/mechanize/page/link.rb +8 -1
  59. data/lib/mechanize/page/meta_refresh.rb +1 -0
  60. data/lib/mechanize/page.rb +6 -8
  61. data/lib/mechanize/parser.rb +1 -0
  62. data/lib/mechanize/pluggable_parsers.rb +2 -1
  63. data/lib/mechanize/prependable.rb +1 -0
  64. data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
  65. data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
  66. data/lib/mechanize/response_code_error.rb +2 -1
  67. data/lib/mechanize/response_read_error.rb +1 -0
  68. data/lib/mechanize/robots_disallowed_error.rb +1 -0
  69. data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
  70. data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
  71. data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
  72. data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
  73. data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
  74. data/lib/mechanize/test_case/form_servlet.rb +1 -0
  75. data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
  76. data/lib/mechanize/test_case/header_servlet.rb +1 -0
  77. data/lib/mechanize/test_case/http_refresh_servlet.rb +1 -0
  78. data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
  79. data/lib/mechanize/test_case/infinite_refresh_servlet.rb +1 -0
  80. data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
  81. data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
  82. data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
  83. data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
  84. data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
  85. data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
  86. data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
  87. data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
  88. data/lib/mechanize/test_case/referer_servlet.rb +1 -0
  89. data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
  90. data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
  91. data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
  92. data/lib/mechanize/test_case/robots_txt_servlet.rb +1 -0
  93. data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
  94. data/lib/mechanize/test_case/server.rb +1 -0
  95. data/lib/mechanize/test_case/servlets.rb +1 -0
  96. data/lib/mechanize/test_case/verb_servlet.rb +5 -6
  97. data/lib/mechanize/test_case.rb +34 -34
  98. data/lib/mechanize/unauthorized_error.rb +1 -0
  99. data/lib/mechanize/unsupported_scheme_error.rb +1 -0
  100. data/lib/mechanize/util.rb +2 -1
  101. data/lib/mechanize/version.rb +2 -1
  102. data/lib/mechanize/xml_file.rb +1 -0
  103. data/lib/mechanize.rb +56 -37
  104. data/mechanize.gemspec +43 -35
  105. data/test/htdocs/dir with spaces/foo.html +1 -0
  106. data/test/htdocs/tc_links.html +1 -1
  107. data/test/test_mechanize.rb +21 -8
  108. data/test/test_mechanize_cookie.rb +38 -26
  109. data/test/test_mechanize_cookie_jar.rb +87 -54
  110. data/test/test_mechanize_directory_saver.rb +1 -0
  111. data/test/test_mechanize_download.rb +14 -1
  112. data/test/test_mechanize_element_not_found_error.rb +1 -0
  113. data/test/test_mechanize_file.rb +11 -0
  114. data/test/test_mechanize_file_connection.rb +23 -4
  115. data/test/test_mechanize_file_request.rb +1 -0
  116. data/test/test_mechanize_file_response.rb +26 -1
  117. data/test/test_mechanize_file_saver.rb +1 -0
  118. data/test/test_mechanize_form.rb +14 -1
  119. data/test/test_mechanize_form_check_box.rb +1 -0
  120. data/test/test_mechanize_form_encoding.rb +2 -1
  121. data/test/test_mechanize_form_field.rb +1 -0
  122. data/test/test_mechanize_form_file_upload.rb +1 -0
  123. data/test/test_mechanize_form_image_button.rb +1 -0
  124. data/test/test_mechanize_form_keygen.rb +2 -0
  125. data/test/test_mechanize_form_multi_select_list.rb +1 -0
  126. data/test/test_mechanize_form_option.rb +1 -0
  127. data/test/test_mechanize_form_radio_button.rb +1 -0
  128. data/test/test_mechanize_form_select_list.rb +1 -0
  129. data/test/test_mechanize_form_textarea.rb +1 -0
  130. data/test/test_mechanize_headers.rb +1 -0
  131. data/test/test_mechanize_history.rb +1 -0
  132. data/test/test_mechanize_http_agent.rb +187 -26
  133. data/test/test_mechanize_http_auth_challenge.rb +1 -0
  134. data/test/test_mechanize_http_auth_realm.rb +1 -0
  135. data/test/test_mechanize_http_auth_store.rb +1 -0
  136. data/test/test_mechanize_http_content_disposition_parser.rb +28 -0
  137. data/test/test_mechanize_http_www_authenticate_parser.rb +1 -0
  138. data/test/test_mechanize_image.rb +1 -0
  139. data/test/test_mechanize_link.rb +25 -0
  140. data/test/test_mechanize_page.rb +15 -0
  141. data/test/test_mechanize_page_encoding.rb +33 -5
  142. data/test/test_mechanize_page_frame.rb +1 -0
  143. data/test/test_mechanize_page_image.rb +1 -0
  144. data/test/test_mechanize_page_link.rb +27 -23
  145. data/test/test_mechanize_page_meta_refresh.rb +1 -0
  146. data/test/test_mechanize_parser.rb +1 -0
  147. data/test/test_mechanize_pluggable_parser.rb +1 -0
  148. data/test/test_mechanize_redirect_limit_reached_error.rb +1 -0
  149. data/test/test_mechanize_redirect_not_get_or_head_error.rb +1 -0
  150. data/test/test_mechanize_response_read_error.rb +1 -0
  151. data/test/test_mechanize_subclass.rb +1 -0
  152. data/test/test_mechanize_util.rb +4 -3
  153. data/test/test_mechanize_xml_file.rb +1 -0
  154. data/test/test_multi_select.rb +1 -0
  155. metadata +106 -86
  156. data/.travis.yml +0 -36
  157. data/README.rdoc +0 -77
@@ -1,4 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
  require 'mechanize/test_case'
3
4
 
4
5
  # tests for Page encoding and charset and parsing
@@ -12,7 +13,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
12
13
 
13
14
  @uri = URI('http://localhost/')
14
15
  @response_headers = { 'content-type' => 'text/html' }
15
- @body = '<title>hi</title>'
16
+ @body = +'<title>hi</title>'
16
17
  end
17
18
 
18
19
  def util_page body = @body, headers = @response_headers
@@ -118,7 +119,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
118
119
  end
119
120
 
120
121
  def test_meta_charset
121
- body = '<meta http-equiv="content-type" content="text/html;charset=META">'
122
+ body = +'<meta http-equiv="content-type" content="text/html;charset=META">'
122
123
  page = util_page body
123
124
 
124
125
  assert_equal ['META'], page.meta_charset
@@ -132,7 +133,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
132
133
 
133
134
  def test_encodings
134
135
  response = {'content-type' => 'text/html;charset=HEADER'}
135
- body = '<meta http-equiv="content-type" content="text/html;charset=META">'
136
+ body = +'<meta http-equiv="content-type" content="text/html;charset=META">'
136
137
  @mech.default_encoding = 'DEFAULT'
137
138
  page = util_page body, response
138
139
 
@@ -175,7 +176,7 @@ class TestMechanizePageEncoding < Mechanize::TestCase
175
176
  def test_parser_encoding_when_searching_elements
176
177
  skip "Encoding not implemented" unless have_encoding?
177
178
 
178
- body = '<span id="latin1">hi</span>'
179
+ body = +'<span id="latin1">hi</span>'
179
180
  page = util_page body, 'content-type' => 'text/html,charset=ISO-8859-1'
180
181
 
181
182
  result = page.search('#latin1')
@@ -183,5 +184,32 @@ class TestMechanizePageEncoding < Mechanize::TestCase
183
184
  assert_equal Encoding::UTF_8, result.text.encoding
184
185
  end
185
186
 
186
- end
187
+ def test_parser_error_message_containing_encoding_errors
188
+ skip if RUBY_ENGINE == 'jruby' # this is a libxml2-specific condition
189
+
190
+ # https://github.com/sparklemotion/mechanize/issues/553
191
+ body = +<<~EOF
192
+ <html>
193
+ <body>
194
+ <!--
195
+ ## メモ
196
+ 処理の一般化, 二重ループ, 多重ループ
197
+ wzxhzdk:25
198
+ -->
199
+ EOF
200
+ page = util_page body
201
+
202
+ # this should not raise an "invalid byte sequence in UTF-8" error while processing parsing errors
203
+ page.search("body")
187
204
 
205
+ # let's assert on the setup: a libxml2-returned parsing error itself contains an invalid character
206
+ # note that this problem only appears in libxml <= 2.9.10
207
+ error = page.parser.errors.find { |e| e.message.include?("Comment not terminated") }
208
+ if error
209
+ exception = assert_raises(ArgumentError) do
210
+ error.message =~ /any regex just to trigger encoding error/
211
+ end
212
+ assert_includes(exception.message, "invalid byte sequence in UTF-8")
213
+ end
214
+ end
215
+ end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizePageFrame < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizePageImage < Mechanize::TestCase
@@ -1,7 +1,10 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'mechanize/test_case'
4
5
 
6
+ puts "Nokogiri::VERSION_INFO: #{Nokogiri::VERSION_INFO}"
7
+
5
8
  class TestMechanizePageLink < Mechanize::TestCase
6
9
 
7
10
  WINDOWS_1255 = <<-HTML
@@ -9,7 +12,7 @@ class TestMechanizePageLink < Mechanize::TestCase
9
12
  <title>hi</title>
10
13
  HTML
11
14
 
12
- BAD = <<-HTML
15
+ BAD = <<-HTML.dup
13
16
  <meta http-equiv="content-type" content="text/html; charset=windows-1255">
14
17
  <title>Bia\xB3ystok</title>
15
18
  HTML
@@ -17,18 +20,16 @@ class TestMechanizePageLink < Mechanize::TestCase
17
20
 
18
21
  SJIS_TITLE = "\x83\x65\x83\x58\x83\x67"
19
22
 
20
- SJIS_AFTER_TITLE = <<-HTML
23
+ SJIS_AFTER_TITLE = <<-HTML.dup
21
24
  <title>#{SJIS_TITLE}</title>
22
25
  <meta http-equiv="Content-Type" content="text/html; charset=Shift_JIS">
23
26
  HTML
24
-
25
27
  SJIS_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
26
28
 
27
- SJIS_BAD_AFTER_TITLE = <<-HTML
29
+ SJIS_BAD_AFTER_TITLE = <<-HTML.dup
28
30
  <title>#{SJIS_TITLE}</title>
29
31
  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
30
32
  HTML
31
-
32
33
  SJIS_BAD_AFTER_TITLE.force_encoding Encoding::BINARY if defined? Encoding
33
34
 
34
35
  UTF8_TITLE = 'テスト'
@@ -44,20 +45,17 @@ class TestMechanizePageLink < Mechanize::TestCase
44
45
 
45
46
  @uri = URI('http://example')
46
47
  @res = { 'content-type' => 'text/html' }
47
- @body = '<title>hi</title>'
48
+ @body = +'<title>hi</title>'
48
49
  end
49
50
 
50
51
  def util_page body = @body, res = @res
51
52
  Mechanize::Page.new @uri, res, body && body.force_encoding(Encoding::BINARY), 200, @mech
52
53
  end
53
54
 
54
- def nkf_dependency?
55
- if RUBY_ENGINE == 'ruby'
56
- false
57
- else
58
- meth = caller[0][/`(\w+)/, 1]
59
- warn "#{meth}: skipped because this feature currently depends on NKF"
60
- true
55
+ def skip_if_nkf_dependency
56
+ if RUBY_ENGINE == 'jruby'
57
+ meth = caller_locations(1,1).first.base_label
58
+ skip "#{meth}: skipped because this feature currently depends on NKF"
61
59
  end
62
60
  end
63
61
 
@@ -76,7 +74,7 @@ class TestMechanizePageLink < Mechanize::TestCase
76
74
  end
77
75
 
78
76
  def test_canonical_uri_unescaped
79
- page = util_page <<-BODY
77
+ page = util_page(+<<-BODY)
80
78
  <head>
81
79
  <link rel="canonical" href="http://example/white space"/>
82
80
  </head>
@@ -98,7 +96,7 @@ class TestMechanizePageLink < Mechanize::TestCase
98
96
  end
99
97
 
100
98
  def test_encoding
101
- page = util_page WINDOWS_1255
99
+ page = util_page WINDOWS_1255.dup
102
100
 
103
101
  assert_equal 'windows-1255', page.encoding
104
102
  end
@@ -112,17 +110,20 @@ class TestMechanizePageLink < Mechanize::TestCase
112
110
  end
113
111
 
114
112
  def test_encoding_charset_after_title_bad
115
- return if nkf_dependency?
113
+ skip_if_nkf_dependency
116
114
 
117
- page = util_page UTF8
115
+ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/543
116
+ skip if Nokogiri.uses_libxml?([">= 2.11.0", "< 2.12.0"])
117
+
118
+ page = util_page UTF8.dup
118
119
 
119
120
  assert_equal false, page.encoding_error?
120
121
 
121
- assert_equal 'UTF-8', page.encoding
122
+ assert_equal "UTF-8", page.encoding
122
123
  end
123
124
 
124
125
  def test_encoding_charset_after_title_double_bad
125
- return if nkf_dependency?
126
+ skip_if_nkf_dependency
126
127
 
127
128
  page = util_page SJIS_BAD_AFTER_TITLE
128
129
 
@@ -132,9 +133,12 @@ class TestMechanizePageLink < Mechanize::TestCase
132
133
  end
133
134
 
134
135
  def test_encoding_charset_bad
135
- return if nkf_dependency?
136
+ skip_if_nkf_dependency
137
+
138
+ # https://gitlab.gnome.org/GNOME/libxml2/-/issues/543
139
+ skip if Nokogiri.uses_libxml?([">= 2.11.0", "< 2.12.0"])
136
140
 
137
- page = util_page "<title>#{UTF8_TITLE}</title>"
141
+ page = util_page(+"<title>#{UTF8_TITLE}</title>")
138
142
  page.encodings.replace %w[
139
143
  UTF-8
140
144
  Shift_JIS
@@ -146,7 +150,7 @@ class TestMechanizePageLink < Mechanize::TestCase
146
150
  end
147
151
 
148
152
  def test_encoding_meta_charset
149
- page = util_page "<meta charset='UTF-8'>"
153
+ page = util_page(+"<meta charset='UTF-8'>")
150
154
 
151
155
  assert_equal 'UTF-8', page.encoding
152
156
  end
@@ -337,7 +341,7 @@ class TestMechanizePageLink < Mechanize::TestCase
337
341
  end
338
342
 
339
343
  def test_title_none
340
- page = util_page '' # invalid HTML
344
+ page = util_page(+'') # invalid HTML
341
345
 
342
346
  assert_nil(page.title)
343
347
  end
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizePageMetaRefresh < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeParser < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizePluggableParser < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeRedirectLimitReachedError < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeRedirectNotGetOrHead < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeResponseReadError < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeSubclass < Mechanize::TestCase
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'mechanize/test_case'
4
5
 
@@ -6,7 +7,7 @@ class TestMechanizeUtil < Mechanize::TestCase
6
7
 
7
8
  INPUTTED_VALUE = "テスト" # "test" in Japanese UTF-8 encoding
8
9
  CONTENT_ENCODING = 'Shift_JIS' # one of Japanese encoding
9
- ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding
10
+ ENCODED_VALUE = "\x83\x65\x83\x58\x83\x67".dup.force_encoding(::Encoding::SHIFT_JIS) # "test" in Japanese Shift_JIS encoding
10
11
 
11
12
  ENCODING_ERRORS = [EncodingError, Encoding::ConverterNotFoundError] # and so on
12
13
  ERROR_LOG_MESSAGE = /from_native_charset: Encoding::ConverterNotFoundError: form encoding: "UTF-eight"/
@@ -67,7 +68,7 @@ class TestMechanizeUtil < Mechanize::TestCase
67
68
  end
68
69
 
69
70
  def test_from_native_charset_logs_form_when_encoding_error_raised
70
- sio = StringIO.new("")
71
+ sio = StringIO.new
71
72
  log = Logger.new(sio)
72
73
  log.level = Logger::DEBUG
73
74
 
@@ -79,7 +80,7 @@ class TestMechanizeUtil < Mechanize::TestCase
79
80
  end
80
81
 
81
82
  def test_from_native_charset_logs_form_when_encoding_error_is_ignored
82
- sio = StringIO.new("")
83
+ sio = StringIO.new
83
84
  log = Logger.new(sio)
84
85
  log.level = Logger::DEBUG
85
86
 
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class TestMechanizeXmlFile < Mechanize::TestCase
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'mechanize/test_case'
2
3
 
3
4
  class MultiSelectTest < Mechanize::TestCase