mechanize 2.7.3 → 2.8.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/.github/workflows/ci-test.yml +45 -0
- data/.gitignore +15 -0
- data/.yardopts +8 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +149 -62
- data/EXAMPLES.rdoc +2 -25
- data/Gemfile +3 -0
- data/{LICENSE.rdoc → LICENSE.txt} +4 -0
- data/README.md +79 -0
- data/Rakefile +36 -37
- data/examples/{rubyforge.rb → rubygems.rb} +7 -6
- data/lib/mechanize.rb +75 -33
- data/lib/mechanize/chunked_termination_error.rb +1 -0
- data/lib/mechanize/content_type_error.rb +1 -0
- data/lib/mechanize/cookie.rb +1 -13
- data/lib/mechanize/cookie_jar.rb +4 -12
- data/lib/mechanize/directory_saver.rb +15 -2
- data/lib/mechanize/download.rb +2 -1
- data/lib/mechanize/element_matcher.rb +29 -14
- data/lib/mechanize/element_not_found_error.rb +1 -0
- data/lib/mechanize/file.rb +2 -1
- data/lib/mechanize/file_connection.rb +5 -3
- data/lib/mechanize/file_request.rb +1 -0
- data/lib/mechanize/file_response.rb +5 -4
- data/lib/mechanize/file_saver.rb +1 -0
- data/lib/mechanize/form.rb +119 -46
- data/lib/mechanize/form/button.rb +1 -0
- data/lib/mechanize/form/check_box.rb +1 -0
- data/lib/mechanize/form/field.rb +47 -0
- data/lib/mechanize/form/file_upload.rb +1 -0
- data/lib/mechanize/form/hidden.rb +1 -0
- data/lib/mechanize/form/image_button.rb +1 -0
- data/lib/mechanize/form/keygen.rb +1 -0
- data/lib/mechanize/form/multi_select_list.rb +8 -14
- data/lib/mechanize/form/option.rb +3 -1
- data/lib/mechanize/form/radio_button.rb +1 -0
- data/lib/mechanize/form/reset.rb +1 -0
- data/lib/mechanize/form/select_list.rb +1 -0
- data/lib/mechanize/form/submit.rb +1 -0
- data/lib/mechanize/form/text.rb +1 -0
- data/lib/mechanize/form/textarea.rb +1 -0
- data/lib/mechanize/headers.rb +1 -0
- data/lib/mechanize/history.rb +2 -1
- data/lib/mechanize/http.rb +1 -0
- data/lib/mechanize/http/agent.rb +115 -64
- data/lib/mechanize/http/auth_challenge.rb +1 -0
- data/lib/mechanize/http/auth_realm.rb +2 -1
- data/lib/mechanize/http/auth_store.rb +3 -0
- data/lib/mechanize/http/content_disposition_parser.rb +18 -3
- data/lib/mechanize/http/www_authenticate_parser.rb +5 -5
- data/lib/mechanize/image.rb +1 -0
- data/lib/mechanize/page.rb +166 -55
- data/lib/mechanize/page/base.rb +1 -0
- data/lib/mechanize/page/frame.rb +4 -1
- data/lib/mechanize/page/image.rb +3 -0
- data/lib/mechanize/page/label.rb +1 -0
- data/lib/mechanize/page/link.rb +13 -1
- data/lib/mechanize/page/meta_refresh.rb +1 -0
- data/lib/mechanize/parser.rb +4 -3
- data/lib/mechanize/pluggable_parsers.rb +14 -1
- data/lib/mechanize/prependable.rb +1 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
- data/lib/mechanize/response_code_error.rb +2 -1
- data/lib/mechanize/response_read_error.rb +1 -0
- data/lib/mechanize/robots_disallowed_error.rb +1 -0
- data/lib/mechanize/test_case.rb +39 -29
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
- data/lib/mechanize/test_case/form_servlet.rb +1 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
- data/lib/mechanize/test_case/header_servlet.rb +1 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +2 -2
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +2 -2
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/referer_servlet.rb +1 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
- data/lib/mechanize/test_case/robots_txt_servlet.rb +15 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/server.rb +1 -0
- data/lib/mechanize/test_case/servlets.rb +4 -0
- data/lib/mechanize/test_case/verb_servlet.rb +5 -6
- data/lib/mechanize/unauthorized_error.rb +2 -1
- data/lib/mechanize/unsupported_scheme_error.rb +5 -2
- data/lib/mechanize/util.rb +90 -43
- data/lib/mechanize/version.rb +4 -0
- data/lib/mechanize/xml_file.rb +1 -0
- data/mechanize.gemspec +69 -0
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/find_link.html +1 -4
- data/test/htdocs/tc_links.html +1 -1
- data/test/test_mechanize.rb +111 -55
- data/test/test_mechanize_cookie.rb +75 -60
- data/test/test_mechanize_cookie_jar.rb +112 -59
- data/test/test_mechanize_download.rb +13 -1
- data/test/test_mechanize_file.rb +10 -0
- data/test/test_mechanize_file_connection.rb +21 -3
- data/test/test_mechanize_file_response.rb +26 -2
- data/test/test_mechanize_form.rb +46 -11
- data/test/test_mechanize_form_check_box.rb +10 -0
- data/test/test_mechanize_form_encoding.rb +3 -8
- data/test/test_mechanize_form_keygen.rb +1 -0
- data/test/test_mechanize_form_multi_select_list.rb +5 -1
- data/test/test_mechanize_http_agent.rb +175 -18
- data/test/test_mechanize_http_auth_challenge.rb +14 -0
- data/test/test_mechanize_http_auth_realm.rb +7 -1
- data/test/test_mechanize_http_auth_store.rb +37 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +35 -1
- data/test/test_mechanize_http_www_authenticate_parser.rb +24 -0
- data/test/test_mechanize_link.rb +60 -4
- data/test/test_mechanize_page.rb +82 -7
- data/test/test_mechanize_page_encoding.rb +2 -3
- data/test/test_mechanize_page_image.rb +1 -1
- data/test/test_mechanize_page_link.rb +20 -5
- data/test/test_mechanize_page_meta_refresh.rb +1 -1
- data/test/test_mechanize_parser.rb +12 -2
- data/test/test_mechanize_util.rb +46 -11
- metadata +198 -99
- data/.gemtest +0 -0
- data/.travis.yml +0 -26
- data/Manifest.txt +0 -205
- data/README.rdoc +0 -83
- data/lib/mechanize/monkey_patch.rb +0 -17
- data/test/htdocs/robots.txt +0 -2
data/lib/mechanize/page/base.rb
CHANGED
data/lib/mechanize/page/frame.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# A Frame object wraps a frame HTML element. Frame objects can be treated
|
2
3
|
# just like Link objects. They contain #src, the #link they refer to and a
|
3
4
|
# #name, the name of the frame they refer to. #src and #name are aliased to
|
4
5
|
# #href and #text respectively so that a Frame object can be treated just like
|
@@ -11,6 +12,8 @@ class Mechanize::Page::Frame < Mechanize::Page::Link
|
|
11
12
|
attr_reader :text
|
12
13
|
alias :name :text
|
13
14
|
|
15
|
+
attr_reader :node
|
16
|
+
|
14
17
|
def initialize(node, mech, referer)
|
15
18
|
super(node, mech, referer)
|
16
19
|
@node = node
|
data/lib/mechanize/page/image.rb
CHANGED
data/lib/mechanize/page/label.rb
CHANGED
data/lib/mechanize/page/link.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
##
|
2
3
|
# This class encapsulates links. It contains the text and the URI for
|
3
4
|
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
@@ -8,6 +9,8 @@
|
|
8
9
|
# <a href="http://example">Hello World</a>
|
9
10
|
# <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
|
10
11
|
|
12
|
+
require 'addressable/uri'
|
13
|
+
|
11
14
|
class Mechanize::Page::Link
|
12
15
|
attr_reader :node
|
13
16
|
attr_reader :href
|
@@ -94,10 +97,19 @@ class Mechanize::Page::Link
|
|
94
97
|
begin
|
95
98
|
URI.parse @href
|
96
99
|
rescue URI::InvalidURIError
|
97
|
-
|
100
|
+
begin
|
101
|
+
URI.parse(Addressable::URI.escape(@href))
|
102
|
+
rescue Addressable::URI::InvalidURIError
|
103
|
+
raise URI::InvalidURIError
|
104
|
+
end
|
98
105
|
end
|
99
106
|
end
|
100
107
|
end
|
101
108
|
|
109
|
+
# A fully resolved URI for the #href for this link.
|
110
|
+
def resolved_uri
|
111
|
+
@mech.resolve uri
|
112
|
+
end
|
113
|
+
|
102
114
|
end
|
103
115
|
|
data/lib/mechanize/parser.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
##
|
2
3
|
# The parser module provides standard methods for accessing the headers and
|
3
4
|
# content of a response that are shared across pluggable parsers.
|
@@ -118,13 +119,13 @@ module Mechanize::Parser
|
|
118
119
|
end
|
119
120
|
|
120
121
|
# Set the filename
|
121
|
-
if disposition = @response['content-disposition']
|
122
|
+
if (disposition = @response['content-disposition'])
|
122
123
|
content_disposition =
|
123
124
|
Mechanize::HTTP::ContentDispositionParser.parse disposition
|
124
125
|
|
125
|
-
if content_disposition && content_disposition.filename && content_disposition.filename != ''
|
126
|
+
if content_disposition && content_disposition.filename && content_disposition.filename != ''
|
126
127
|
filename = content_disposition.filename
|
127
|
-
filename = filename.
|
128
|
+
filename = filename.rpartition(/[\\\/]/).last
|
128
129
|
handled = true
|
129
130
|
end
|
130
131
|
end
|
@@ -1,7 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'mechanize/file'
|
2
3
|
require 'mechanize/file_saver'
|
3
4
|
require 'mechanize/page'
|
4
5
|
require 'mechanize/xml_file'
|
6
|
+
require 'mime/types'
|
5
7
|
|
6
8
|
##
|
7
9
|
# Mechanize allows different parsers for different content types. Mechanize
|
@@ -68,6 +70,15 @@ class Mechanize::PluggableParser
|
|
68
70
|
:xml => ['text/xml', 'application/xml'],
|
69
71
|
}
|
70
72
|
|
73
|
+
InvalidContentTypeError =
|
74
|
+
if defined?(MIME::Type::InvalidContentType)
|
75
|
+
# For mime-types >=2.1
|
76
|
+
MIME::Type::InvalidContentType
|
77
|
+
else
|
78
|
+
# For mime-types <2.1
|
79
|
+
MIME::InvalidContentType
|
80
|
+
end
|
81
|
+
|
71
82
|
attr_accessor :default
|
72
83
|
|
73
84
|
def initialize
|
@@ -97,9 +108,11 @@ class Mechanize::PluggableParser
|
|
97
108
|
|
98
109
|
parser = @parsers[mime_type.to_s] ||
|
99
110
|
@parsers[mime_type.simplified] ||
|
111
|
+
# Starting from mime-types 3.0 x-prefix is deprecated as per IANA
|
112
|
+
(@parsers[MIME::Type.simplified(mime_type.to_s, remove_x_prefix: true)] rescue nil) ||
|
100
113
|
@parsers[mime_type.media_type] ||
|
101
114
|
default
|
102
|
-
rescue
|
115
|
+
rescue InvalidContentTypeError
|
103
116
|
default
|
104
117
|
end
|
105
118
|
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# This error is raised when Mechanize encounters a response code it does not
|
2
3
|
# know how to handle. Currently, this exception will be thrown if Mechanize
|
3
4
|
# encounters response codes other than 200, 301, or 302. Any other response
|
@@ -16,7 +17,7 @@ class Mechanize::ResponseCodeError < Mechanize::Error
|
|
16
17
|
|
17
18
|
def to_s
|
18
19
|
response_class = Net::HTTPResponse::CODE_TO_OBJ[@response_code]
|
19
|
-
out = "#{@response_code} => #{response_class} "
|
20
|
+
out = String.new("#{@response_code} => #{response_class} ")
|
20
21
|
out << "for #{@page.uri} " if @page.respond_to? :uri # may be HTTPResponse
|
21
22
|
out << "-- #{super}"
|
22
23
|
end
|
data/lib/mechanize/test_case.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'mechanize'
|
2
3
|
require 'logger'
|
3
4
|
require 'tempfile'
|
@@ -14,6 +15,11 @@ end
|
|
14
15
|
|
15
16
|
require 'minitest/autorun'
|
16
17
|
|
18
|
+
begin
|
19
|
+
require 'minitest/pride'
|
20
|
+
rescue LoadError
|
21
|
+
end
|
22
|
+
|
17
23
|
##
|
18
24
|
# A generic test case for testing mechanize. Using a subclass of
|
19
25
|
# Mechanize::TestCase for your tests will create an isolated mechanize
|
@@ -52,12 +58,12 @@ class Mechanize::TestCase < Minitest::Test
|
|
52
58
|
|
53
59
|
def fake_page agent = @mech
|
54
60
|
uri = URI 'http://fake.example/'
|
55
|
-
html =
|
56
|
-
<html>
|
57
|
-
<body>
|
58
|
-
<form><input type="submit" value="submit" /></form>
|
59
|
-
</body>
|
60
|
-
</html>
|
61
|
+
html = String.new(<<~END)
|
62
|
+
<html>
|
63
|
+
<body>
|
64
|
+
<form><input type="submit" value="submit" /></form>
|
65
|
+
</body>
|
66
|
+
</html>
|
61
67
|
END
|
62
68
|
|
63
69
|
Mechanize::Page.new uri, nil, html, 200, agent
|
@@ -82,11 +88,9 @@ class Mechanize::TestCase < Minitest::Test
|
|
82
88
|
# Creates a Mechanize::CookieJar by parsing the given +str+
|
83
89
|
|
84
90
|
def cookie_jar str, uri = URI('http://example')
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
jar
|
91
|
+
Mechanize::CookieJar.new.tap do |jar|
|
92
|
+
jar.parse str, uri
|
93
|
+
end
|
90
94
|
end
|
91
95
|
|
92
96
|
##
|
@@ -104,22 +108,18 @@ class Mechanize::TestCase < Minitest::Test
|
|
104
108
|
# Creates a Nokogiri Node +element+ with the given +attributes+
|
105
109
|
|
106
110
|
def node element, attributes = {}
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
attributes.each do |name, value|
|
112
|
-
node[name] = value
|
111
|
+
Nokogiri::XML::Node.new(element, Nokogiri::HTML::Document.new).tap do |node|
|
112
|
+
attributes.each do |name, value|
|
113
|
+
node[name] = value
|
114
|
+
end
|
113
115
|
end
|
114
|
-
|
115
|
-
node
|
116
116
|
end
|
117
117
|
|
118
118
|
##
|
119
119
|
# Creates a Mechanize::Page for the given +uri+ with the given
|
120
120
|
# +content_type+, response +body+ and HTTP status +code+
|
121
121
|
|
122
|
-
def page uri, content_type = 'text/html', body =
|
122
|
+
def page uri, content_type = 'text/html', body = String.new, code = 200
|
123
123
|
uri = URI uri unless URI::Generic === uri
|
124
124
|
|
125
125
|
Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
|
@@ -168,15 +168,25 @@ UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
|
|
168
168
|
# Creates a Tempfile with +content+ that is immediately unlinked
|
169
169
|
|
170
170
|
def tempfile content
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
171
|
+
Tempfile.new(@NAME).tap do |body_io|
|
172
|
+
body_io.unlink
|
173
|
+
body_io.write content
|
174
|
+
body_io.flush
|
175
|
+
body_io.rewind
|
176
|
+
end
|
177
|
+
end
|
176
178
|
|
177
|
-
|
179
|
+
##
|
180
|
+
# Returns true if the current platform is a Windows platform
|
181
|
+
def windows?
|
182
|
+
::RUBY_PLATFORM =~ /mingw|mswin/
|
178
183
|
end
|
179
184
|
|
185
|
+
##
|
186
|
+
# Return the contents of the file without Windows carriage returns
|
187
|
+
def file_contents_without_cr(path)
|
188
|
+
File.read(path).gsub(/\r\n/, "\n")
|
189
|
+
end
|
180
190
|
end
|
181
191
|
|
182
192
|
require 'mechanize/test_case/servlets'
|
@@ -225,9 +235,9 @@ class Net::HTTP # :nodoc:
|
|
225
235
|
else
|
226
236
|
filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
|
227
237
|
unless PAGE_CACHE[filename]
|
228
|
-
open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb')
|
238
|
+
::File.open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') do |io|
|
229
239
|
PAGE_CACHE[filename] = io.read
|
230
|
-
|
240
|
+
end
|
231
241
|
end
|
232
242
|
|
233
243
|
res.body = PAGE_CACHE[filename]
|
@@ -307,7 +317,7 @@ class Response # :nodoc:
|
|
307
317
|
|
308
318
|
def initialize
|
309
319
|
@header = {}
|
310
|
-
@body =
|
320
|
+
@body = String.new
|
311
321
|
@code = nil
|
312
322
|
@query = nil
|
313
323
|
@cookies = []
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'stringio'
|
2
3
|
require 'zlib'
|
3
4
|
|
@@ -13,8 +14,8 @@ class GzipServlet < WEBrick::HTTPServlet::AbstractServlet
|
|
13
14
|
end
|
14
15
|
|
15
16
|
if name = req.query['file'] then
|
16
|
-
open
|
17
|
-
string =
|
17
|
+
::File.open("#{TEST_DIR}/htdocs/#{name}") do |io|
|
18
|
+
string = String.new
|
18
19
|
zipped = StringIO.new string, 'w'
|
19
20
|
Zlib::GzipWriter.wrap zipped do |gz|
|
20
21
|
gz.write io.read
|
@@ -22,7 +23,7 @@ class GzipServlet < WEBrick::HTTPServlet::AbstractServlet
|
|
22
23
|
res.body = string
|
23
24
|
end
|
24
25
|
else
|
25
|
-
res.body =
|
26
|
+
res.body = String.new
|
26
27
|
end
|
27
28
|
|
28
29
|
res['Content-Encoding'] = req['X-ResponseContentEncoding'] || 'gzip'
|
@@ -1,9 +1,9 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
class HttpRefreshServlet < WEBrick::HTTPServlet::AbstractServlet
|
2
3
|
def do_GET(req, res)
|
3
4
|
res['Content-Type'] = req.query['ct'] || "text/html"
|
4
5
|
refresh_time = req.query['refresh_time'] || 0
|
5
6
|
refresh_url = req.query['refresh_url'] || '/'
|
6
|
-
res['Refresh'] = " #{refresh_time};url=#{refresh_url}
|
7
|
+
res['Refresh'] = " #{refresh_time};url=#{refresh_url}";
|
7
8
|
end
|
8
9
|
end
|
9
|
-
|
@@ -1,10 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
class InfiniteRefreshServlet < WEBrick::HTTPServlet::AbstractServlet
|
2
3
|
def do_GET(req, res)
|
3
4
|
address = "#{req.host}:#{req.port}"
|
4
5
|
res['Content-Type'] = req.query['ct'] || "text/html"
|
5
6
|
res.status = req.query['code'] ? req.query['code'].to_i : '302'
|
6
7
|
number = req.query['q'] ? req.query['q'].to_i : 0
|
7
|
-
res['Refresh'] = "0;url=http://#{address}/infinite_refresh?q=#{number + 1}
|
8
|
+
res['Refresh'] = "0;url=http://#{address}/infinite_refresh?q=#{number + 1}";
|
8
9
|
end
|
9
10
|
end
|
10
|
-
|