mechanize 2.7.6 → 2.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci-test.yml +53 -0
- data/.yardopts +8 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +136 -87
- data/EXAMPLES.rdoc +1 -24
- data/Gemfile +1 -4
- data/{LICENSE.rdoc → LICENSE.txt} +4 -0
- data/README.md +77 -0
- data/Rakefile +18 -3
- data/examples/rubygems.rb +2 -2
- data/lib/mechanize.rb +3 -2
- data/lib/mechanize/chunked_termination_error.rb +1 -0
- data/lib/mechanize/content_type_error.rb +1 -0
- data/lib/mechanize/cookie.rb +1 -13
- data/lib/mechanize/cookie_jar.rb +4 -12
- data/lib/mechanize/directory_saver.rb +1 -0
- data/lib/mechanize/download.rb +2 -1
- data/lib/mechanize/element_matcher.rb +1 -0
- data/lib/mechanize/element_not_found_error.rb +1 -0
- data/lib/mechanize/file.rb +2 -1
- data/lib/mechanize/file_connection.rb +5 -3
- data/lib/mechanize/file_request.rb +1 -0
- data/lib/mechanize/file_response.rb +4 -1
- data/lib/mechanize/file_saver.rb +1 -0
- data/lib/mechanize/form.rb +2 -10
- data/lib/mechanize/form/button.rb +1 -0
- data/lib/mechanize/form/check_box.rb +1 -0
- data/lib/mechanize/form/field.rb +1 -0
- data/lib/mechanize/form/file_upload.rb +1 -0
- data/lib/mechanize/form/hidden.rb +1 -0
- data/lib/mechanize/form/image_button.rb +1 -0
- data/lib/mechanize/form/keygen.rb +1 -0
- data/lib/mechanize/form/multi_select_list.rb +1 -0
- data/lib/mechanize/form/option.rb +1 -0
- data/lib/mechanize/form/radio_button.rb +1 -0
- data/lib/mechanize/form/reset.rb +1 -0
- data/lib/mechanize/form/select_list.rb +1 -0
- data/lib/mechanize/form/submit.rb +1 -0
- data/lib/mechanize/form/text.rb +1 -0
- data/lib/mechanize/form/textarea.rb +1 -0
- data/lib/mechanize/headers.rb +1 -0
- data/lib/mechanize/history.rb +1 -0
- data/lib/mechanize/http.rb +1 -0
- data/lib/mechanize/http/agent.rb +16 -8
- data/lib/mechanize/http/auth_challenge.rb +1 -0
- data/lib/mechanize/http/auth_realm.rb +1 -0
- data/lib/mechanize/http/auth_store.rb +1 -0
- data/lib/mechanize/http/content_disposition_parser.rb +14 -2
- data/lib/mechanize/http/www_authenticate_parser.rb +3 -3
- data/lib/mechanize/image.rb +1 -0
- data/lib/mechanize/page.rb +4 -3
- data/lib/mechanize/page/base.rb +1 -0
- data/lib/mechanize/page/frame.rb +1 -0
- data/lib/mechanize/page/image.rb +1 -0
- data/lib/mechanize/page/label.rb +1 -0
- data/lib/mechanize/page/link.rb +8 -1
- data/lib/mechanize/page/meta_refresh.rb +1 -0
- data/lib/mechanize/parser.rb +1 -0
- data/lib/mechanize/pluggable_parsers.rb +1 -0
- data/lib/mechanize/prependable.rb +1 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
- data/lib/mechanize/response_code_error.rb +2 -1
- data/lib/mechanize/response_read_error.rb +1 -0
- data/lib/mechanize/robots_disallowed_error.rb +1 -0
- data/lib/mechanize/test_case.rb +34 -29
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
- data/lib/mechanize/test_case/form_servlet.rb +1 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
- data/lib/mechanize/test_case/header_servlet.rb +1 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/referer_servlet.rb +1 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
- data/lib/mechanize/test_case/robots_txt_servlet.rb +1 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/server.rb +1 -0
- data/lib/mechanize/test_case/servlets.rb +1 -0
- data/lib/mechanize/test_case/verb_servlet.rb +5 -6
- data/lib/mechanize/unauthorized_error.rb +1 -0
- data/lib/mechanize/unsupported_scheme_error.rb +1 -0
- data/lib/mechanize/util.rb +2 -1
- data/lib/mechanize/version.rb +2 -1
- data/lib/mechanize/xml_file.rb +1 -0
- data/mechanize.gemspec +45 -35
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/tc_links.html +1 -1
- data/test/test_mechanize.rb +19 -7
- data/test/test_mechanize_cookie.rb +19 -19
- data/test/test_mechanize_cookie_jar.rb +85 -53
- data/test/test_mechanize_download.rb +13 -1
- data/test/test_mechanize_file.rb +10 -0
- data/test/test_mechanize_file_connection.rb +21 -3
- data/test/test_mechanize_file_response.rb +25 -1
- data/test/test_mechanize_form.rb +12 -0
- data/test/test_mechanize_form_keygen.rb +1 -0
- data/test/test_mechanize_http_agent.rb +53 -8
- data/test/test_mechanize_http_content_disposition_parser.rb +27 -0
- data/test/test_mechanize_link.rb +24 -0
- data/test/test_mechanize_page_encoding.rb +28 -1
- metadata +117 -71
- data/.travis.yml +0 -36
- data/README.rdoc +0 -77
data/lib/mechanize/form/reset.rb
CHANGED
data/lib/mechanize/form/text.rb
CHANGED
data/lib/mechanize/headers.rb
CHANGED
data/lib/mechanize/history.rb
CHANGED
data/lib/mechanize/http.rb
CHANGED
data/lib/mechanize/http/agent.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
require 'tempfile'
|
|
2
3
|
require 'net/ntlm'
|
|
3
|
-
require 'kconv'
|
|
4
4
|
require 'webrobots'
|
|
5
5
|
|
|
6
6
|
##
|
|
@@ -9,6 +9,9 @@ require 'webrobots'
|
|
|
9
9
|
|
|
10
10
|
class Mechanize::HTTP::Agent
|
|
11
11
|
|
|
12
|
+
CREDENTIAL_HEADERS = ['Authorization', 'Cookie']
|
|
13
|
+
POST_HEADERS = ['Content-Length', 'Content-MD5', 'Content-Type']
|
|
14
|
+
|
|
12
15
|
# :section: Headers
|
|
13
16
|
|
|
14
17
|
# Disables If-Modified-Since conditional requests (enabled by default)
|
|
@@ -838,7 +841,7 @@ class Mechanize::HTTP::Agent
|
|
|
838
841
|
|
|
839
842
|
out_io
|
|
840
843
|
rescue Zlib::Error => e
|
|
841
|
-
message = "error handling content-encoding #{response['Content-Encoding']}:"
|
|
844
|
+
message = String.new("error handling content-encoding #{response['Content-Encoding']}:")
|
|
842
845
|
message << " #{e.message} (#{e.class})"
|
|
843
846
|
raise Mechanize::Error, message
|
|
844
847
|
ensure
|
|
@@ -986,14 +989,20 @@ class Mechanize::HTTP::Agent
|
|
|
986
989
|
|
|
987
990
|
redirect_method = method == :head ? :head : :get
|
|
988
991
|
|
|
992
|
+
new_uri = secure_resolve!(response['Location'].to_s, page)
|
|
993
|
+
@history.push(page, page.uri)
|
|
994
|
+
|
|
989
995
|
# Make sure we are not copying over the POST headers from the original request
|
|
990
|
-
|
|
991
|
-
headers.
|
|
996
|
+
POST_HEADERS.each do |key|
|
|
997
|
+
headers.delete_if { |h| h.casecmp?(key) }
|
|
992
998
|
end
|
|
993
999
|
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
1000
|
+
# Make sure we clear credential headers if being redirected to another site
|
|
1001
|
+
if new_uri.host != page.uri.host
|
|
1002
|
+
CREDENTIAL_HEADERS.each do |ch|
|
|
1003
|
+
headers.delete_if { |h| h.casecmp?(ch) }
|
|
1004
|
+
end
|
|
1005
|
+
end
|
|
997
1006
|
|
|
998
1007
|
fetch new_uri, redirect_method, headers, [], referer, redirects + 1
|
|
999
1008
|
end
|
|
@@ -1278,4 +1287,3 @@ class Mechanize::HTTP::Agent
|
|
|
1278
1287
|
end
|
|
1279
1288
|
|
|
1280
1289
|
require 'mechanize/http/auth_store'
|
|
1281
|
-
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
# coding: BINARY
|
|
2
3
|
|
|
3
4
|
require 'strscan'
|
|
@@ -16,6 +17,7 @@ end
|
|
|
16
17
|
# * Missing disposition-type
|
|
17
18
|
# * Multiple semicolons
|
|
18
19
|
# * Whitespace around semicolons
|
|
20
|
+
# * Dates in ISO 8601 format
|
|
19
21
|
|
|
20
22
|
class Mechanize::HTTP::ContentDispositionParser
|
|
21
23
|
|
|
@@ -93,7 +95,17 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
|
93
95
|
when /^filename$/ then
|
|
94
96
|
rfc_2045_value
|
|
95
97
|
when /^(creation|modification|read)-date$/ then
|
|
96
|
-
|
|
98
|
+
date = rfc_2045_quoted_string
|
|
99
|
+
|
|
100
|
+
begin
|
|
101
|
+
Time.rfc822 date
|
|
102
|
+
rescue ArgumentError
|
|
103
|
+
begin
|
|
104
|
+
Time.iso8601 date
|
|
105
|
+
rescue ArgumentError
|
|
106
|
+
nil
|
|
107
|
+
end
|
|
108
|
+
end
|
|
97
109
|
when /^size$/ then
|
|
98
110
|
rfc_2045_value.to_i(10)
|
|
99
111
|
else
|
|
@@ -125,7 +137,7 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
|
125
137
|
def rfc_2045_quoted_string
|
|
126
138
|
return nil unless @scanner.scan(/"/)
|
|
127
139
|
|
|
128
|
-
text =
|
|
140
|
+
text = String.new
|
|
129
141
|
|
|
130
142
|
while true do
|
|
131
143
|
chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'strscan'
|
|
4
4
|
|
|
@@ -151,10 +151,10 @@ class Mechanize::HTTP::WWWAuthenticateParser
|
|
|
151
151
|
def quoted_string
|
|
152
152
|
return nil unless @scanner.scan(/"/)
|
|
153
153
|
|
|
154
|
-
text =
|
|
154
|
+
text = String.new
|
|
155
155
|
|
|
156
156
|
while true do
|
|
157
|
-
chunk = @scanner.scan(/[\r\n \t\
|
|
157
|
+
chunk = @scanner.scan(/[\r\n \t\x21\x23-\x7e\u0080-\u00ff]+/) # not " which is \x22
|
|
158
158
|
|
|
159
159
|
if chunk then
|
|
160
160
|
text << chunk
|
data/lib/mechanize/image.rb
CHANGED
data/lib/mechanize/page.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
##
|
|
2
3
|
# This class encapsulates an HTML page. If Mechanize finds a content
|
|
3
4
|
# type of 'text/html', this class will be instantiated and returned.
|
|
@@ -103,9 +104,9 @@ class Mechanize::Page < Mechanize::File
|
|
|
103
104
|
parser = self.parser unless parser
|
|
104
105
|
return false if parser.errors.empty?
|
|
105
106
|
parser.errors.any? do |error|
|
|
106
|
-
error.message =~ /(indicate\ encoding)|
|
|
107
|
-
|
|
108
|
-
|
|
107
|
+
error.message.scrub =~ /(indicate\ encoding)|
|
|
108
|
+
(Invalid\ char)|
|
|
109
|
+
(input\ conversion\ failed)/x
|
|
109
110
|
end
|
|
110
111
|
end
|
|
111
112
|
|
data/lib/mechanize/page/base.rb
CHANGED
data/lib/mechanize/page/frame.rb
CHANGED
data/lib/mechanize/page/image.rb
CHANGED
data/lib/mechanize/page/label.rb
CHANGED
data/lib/mechanize/page/link.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
##
|
|
2
3
|
# This class encapsulates links. It contains the text and the URI for
|
|
3
4
|
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
|
@@ -8,6 +9,8 @@
|
|
|
8
9
|
# <a href="http://example">Hello World</a>
|
|
9
10
|
# <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
|
|
10
11
|
|
|
12
|
+
require 'addressable/uri'
|
|
13
|
+
|
|
11
14
|
class Mechanize::Page::Link
|
|
12
15
|
attr_reader :node
|
|
13
16
|
attr_reader :href
|
|
@@ -94,7 +97,11 @@ class Mechanize::Page::Link
|
|
|
94
97
|
begin
|
|
95
98
|
URI.parse @href
|
|
96
99
|
rescue URI::InvalidURIError
|
|
97
|
-
|
|
100
|
+
begin
|
|
101
|
+
URI.parse(Addressable::URI.escape(@href))
|
|
102
|
+
rescue Addressable::URI::InvalidURIError
|
|
103
|
+
raise URI::InvalidURIError
|
|
104
|
+
end
|
|
98
105
|
end
|
|
99
106
|
end
|
|
100
107
|
end
|
data/lib/mechanize/parser.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
# This error is raised when Mechanize encounters a response code it does not
|
|
2
3
|
# know how to handle. Currently, this exception will be thrown if Mechanize
|
|
3
4
|
# encounters response codes other than 200, 301, or 302. Any other response
|
|
@@ -16,7 +17,7 @@ class Mechanize::ResponseCodeError < Mechanize::Error
|
|
|
16
17
|
|
|
17
18
|
def to_s
|
|
18
19
|
response_class = Net::HTTPResponse::CODE_TO_OBJ[@response_code]
|
|
19
|
-
out = "#{@response_code} => #{response_class} "
|
|
20
|
+
out = String.new("#{@response_code} => #{response_class} ")
|
|
20
21
|
out << "for #{@page.uri} " if @page.respond_to? :uri # may be HTTPResponse
|
|
21
22
|
out << "-- #{super}"
|
|
22
23
|
end
|
data/lib/mechanize/test_case.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
require 'mechanize'
|
|
2
3
|
require 'logger'
|
|
3
4
|
require 'tempfile'
|
|
@@ -57,12 +58,12 @@ class Mechanize::TestCase < Minitest::Test
|
|
|
57
58
|
|
|
58
59
|
def fake_page agent = @mech
|
|
59
60
|
uri = URI 'http://fake.example/'
|
|
60
|
-
html =
|
|
61
|
-
<html>
|
|
62
|
-
<body>
|
|
63
|
-
<form><input type="submit" value="submit" /></form>
|
|
64
|
-
</body>
|
|
65
|
-
</html>
|
|
61
|
+
html = String.new(<<~END)
|
|
62
|
+
<html>
|
|
63
|
+
<body>
|
|
64
|
+
<form><input type="submit" value="submit" /></form>
|
|
65
|
+
</body>
|
|
66
|
+
</html>
|
|
66
67
|
END
|
|
67
68
|
|
|
68
69
|
Mechanize::Page.new uri, nil, html, 200, agent
|
|
@@ -87,11 +88,9 @@ class Mechanize::TestCase < Minitest::Test
|
|
|
87
88
|
# Creates a Mechanize::CookieJar by parsing the given +str+
|
|
88
89
|
|
|
89
90
|
def cookie_jar str, uri = URI('http://example')
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
jar
|
|
91
|
+
Mechanize::CookieJar.new.tap do |jar|
|
|
92
|
+
jar.parse str, uri
|
|
93
|
+
end
|
|
95
94
|
end
|
|
96
95
|
|
|
97
96
|
##
|
|
@@ -109,22 +108,18 @@ class Mechanize::TestCase < Minitest::Test
|
|
|
109
108
|
# Creates a Nokogiri Node +element+ with the given +attributes+
|
|
110
109
|
|
|
111
110
|
def node element, attributes = {}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
attributes.each do |name, value|
|
|
117
|
-
node[name] = value
|
|
111
|
+
Nokogiri::XML::Node.new(element, Nokogiri::HTML::Document.new).tap do |node|
|
|
112
|
+
attributes.each do |name, value|
|
|
113
|
+
node[name] = value
|
|
114
|
+
end
|
|
118
115
|
end
|
|
119
|
-
|
|
120
|
-
node
|
|
121
116
|
end
|
|
122
117
|
|
|
123
118
|
##
|
|
124
119
|
# Creates a Mechanize::Page for the given +uri+ with the given
|
|
125
120
|
# +content_type+, response +body+ and HTTP status +code+
|
|
126
121
|
|
|
127
|
-
def page uri, content_type = 'text/html', body =
|
|
122
|
+
def page uri, content_type = 'text/html', body = String.new, code = 200
|
|
128
123
|
uri = URI uri unless URI::Generic === uri
|
|
129
124
|
|
|
130
125
|
Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
|
|
@@ -173,15 +168,25 @@ UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
|
|
|
173
168
|
# Creates a Tempfile with +content+ that is immediately unlinked
|
|
174
169
|
|
|
175
170
|
def tempfile content
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
171
|
+
Tempfile.new(@NAME).tap do |body_io|
|
|
172
|
+
body_io.unlink
|
|
173
|
+
body_io.write content
|
|
174
|
+
body_io.flush
|
|
175
|
+
body_io.rewind
|
|
176
|
+
end
|
|
177
|
+
end
|
|
181
178
|
|
|
182
|
-
|
|
179
|
+
##
|
|
180
|
+
# Returns true if the current platform is a Windows platform
|
|
181
|
+
def windows?
|
|
182
|
+
::RUBY_PLATFORM =~ /mingw|mswin/
|
|
183
183
|
end
|
|
184
184
|
|
|
185
|
+
##
|
|
186
|
+
# Return the contents of the file without Windows carriage returns
|
|
187
|
+
def file_contents_without_cr(path)
|
|
188
|
+
File.read(path).gsub(/\r\n/, "\n")
|
|
189
|
+
end
|
|
185
190
|
end
|
|
186
191
|
|
|
187
192
|
require 'mechanize/test_case/servlets'
|
|
@@ -230,9 +235,9 @@ class Net::HTTP # :nodoc:
|
|
|
230
235
|
else
|
|
231
236
|
filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
|
|
232
237
|
unless PAGE_CACHE[filename]
|
|
233
|
-
open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb')
|
|
238
|
+
::File.open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') do |io|
|
|
234
239
|
PAGE_CACHE[filename] = io.read
|
|
235
|
-
|
|
240
|
+
end
|
|
236
241
|
end
|
|
237
242
|
|
|
238
243
|
res.body = PAGE_CACHE[filename]
|
|
@@ -312,7 +317,7 @@ class Response # :nodoc:
|
|
|
312
317
|
|
|
313
318
|
def initialize
|
|
314
319
|
@header = {}
|
|
315
|
-
@body =
|
|
320
|
+
@body = String.new
|
|
316
321
|
@code = nil
|
|
317
322
|
@query = nil
|
|
318
323
|
@cookies = []
|