mechanize 2.7.4 → 2.8.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/.github/workflows/ci-test.yml +45 -0
- data/.yardopts +8 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +151 -86
- data/EXAMPLES.rdoc +1 -24
- data/Gemfile +1 -1
- data/{LICENSE.rdoc → LICENSE.txt} +4 -0
- data/README.md +77 -0
- data/Rakefile +18 -3
- data/examples/rubygems.rb +2 -2
- data/lib/mechanize.rb +3 -2
- data/lib/mechanize/chunked_termination_error.rb +1 -0
- data/lib/mechanize/content_type_error.rb +1 -0
- data/lib/mechanize/cookie.rb +1 -13
- data/lib/mechanize/cookie_jar.rb +4 -12
- data/lib/mechanize/directory_saver.rb +1 -0
- data/lib/mechanize/download.rb +2 -1
- data/lib/mechanize/element_matcher.rb +5 -1
- data/lib/mechanize/element_not_found_error.rb +1 -0
- data/lib/mechanize/file.rb +2 -1
- data/lib/mechanize/file_connection.rb +5 -3
- data/lib/mechanize/file_request.rb +1 -0
- data/lib/mechanize/file_response.rb +4 -1
- data/lib/mechanize/file_saver.rb +1 -0
- data/lib/mechanize/form.rb +112 -45
- data/lib/mechanize/form/button.rb +1 -0
- data/lib/mechanize/form/check_box.rb +1 -0
- data/lib/mechanize/form/field.rb +47 -0
- data/lib/mechanize/form/file_upload.rb +1 -0
- data/lib/mechanize/form/hidden.rb +1 -0
- data/lib/mechanize/form/image_button.rb +1 -0
- data/lib/mechanize/form/keygen.rb +1 -0
- data/lib/mechanize/form/multi_select_list.rb +8 -14
- data/lib/mechanize/form/option.rb +3 -1
- data/lib/mechanize/form/radio_button.rb +1 -0
- data/lib/mechanize/form/reset.rb +1 -0
- data/lib/mechanize/form/select_list.rb +1 -0
- data/lib/mechanize/form/submit.rb +1 -0
- data/lib/mechanize/form/text.rb +1 -0
- data/lib/mechanize/form/textarea.rb +1 -0
- data/lib/mechanize/headers.rb +1 -0
- data/lib/mechanize/history.rb +2 -1
- data/lib/mechanize/http.rb +1 -0
- data/lib/mechanize/http/agent.rb +81 -38
- data/lib/mechanize/http/auth_challenge.rb +1 -0
- data/lib/mechanize/http/auth_realm.rb +2 -1
- data/lib/mechanize/http/auth_store.rb +1 -0
- data/lib/mechanize/http/content_disposition_parser.rb +18 -3
- data/lib/mechanize/http/www_authenticate_parser.rb +4 -4
- data/lib/mechanize/image.rb +1 -0
- data/lib/mechanize/page.rb +8 -5
- data/lib/mechanize/page/base.rb +1 -0
- data/lib/mechanize/page/frame.rb +4 -1
- data/lib/mechanize/page/image.rb +1 -0
- data/lib/mechanize/page/label.rb +1 -0
- data/lib/mechanize/page/link.rb +8 -1
- data/lib/mechanize/page/meta_refresh.rb +1 -0
- data/lib/mechanize/parser.rb +4 -3
- data/lib/mechanize/pluggable_parsers.rb +1 -0
- data/lib/mechanize/prependable.rb +1 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
- data/lib/mechanize/response_code_error.rb +2 -1
- data/lib/mechanize/response_read_error.rb +1 -0
- data/lib/mechanize/robots_disallowed_error.rb +1 -0
- data/lib/mechanize/test_case.rb +34 -29
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
- data/lib/mechanize/test_case/form_servlet.rb +1 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
- data/lib/mechanize/test_case/header_servlet.rb +1 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +2 -2
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +2 -2
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/referer_servlet.rb +1 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
- data/lib/mechanize/test_case/robots_txt_servlet.rb +15 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/server.rb +1 -0
- data/lib/mechanize/test_case/servlets.rb +4 -0
- data/lib/mechanize/test_case/verb_servlet.rb +5 -6
- data/lib/mechanize/unauthorized_error.rb +2 -1
- data/lib/mechanize/unsupported_scheme_error.rb +1 -0
- data/lib/mechanize/util.rb +5 -3
- data/lib/mechanize/version.rb +2 -1
- data/lib/mechanize/xml_file.rb +1 -0
- data/mechanize.gemspec +39 -31
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/find_link.html +1 -4
- data/test/htdocs/tc_links.html +1 -1
- data/test/test_mechanize.rb +57 -15
- data/test/test_mechanize_cookie.rb +75 -60
- data/test/test_mechanize_cookie_jar.rb +112 -59
- data/test/test_mechanize_download.rb +13 -1
- data/test/test_mechanize_file.rb +10 -0
- data/test/test_mechanize_file_connection.rb +21 -3
- data/test/test_mechanize_file_response.rb +26 -2
- data/test/test_mechanize_form.rb +27 -11
- data/test/test_mechanize_form_check_box.rb +10 -0
- data/test/test_mechanize_form_encoding.rb +1 -1
- data/test/test_mechanize_form_keygen.rb +1 -0
- data/test/test_mechanize_form_multi_select_list.rb +5 -1
- data/test/test_mechanize_http_agent.rb +116 -8
- data/test/test_mechanize_http_auth_challenge.rb +14 -0
- data/test/test_mechanize_http_auth_realm.rb +7 -1
- data/test/test_mechanize_http_auth_store.rb +37 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +35 -1
- data/test/test_mechanize_http_www_authenticate_parser.rb +16 -0
- data/test/test_mechanize_link.rb +47 -4
- data/test/test_mechanize_page.rb +29 -1
- data/test/test_mechanize_page_encoding.rb +23 -1
- data/test/test_mechanize_page_image.rb +1 -1
- data/test/test_mechanize_page_link.rb +3 -3
- data/test/test_mechanize_page_meta_refresh.rb +1 -1
- data/test/test_mechanize_parser.rb +12 -2
- data/test/test_mechanize_util.rb +1 -1
- metadata +105 -81
- data/.travis.yml +0 -25
- data/Manifest.txt +0 -204
- data/README.rdoc +0 -77
- data/test/htdocs/robots.txt +0 -2
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
class Mechanize::HTTP::AuthRealm
|
2
3
|
|
3
4
|
attr_reader :scheme
|
@@ -7,7 +8,7 @@ class Mechanize::HTTP::AuthRealm
|
|
7
8
|
def initialize scheme, uri, realm
|
8
9
|
@scheme = scheme
|
9
10
|
@uri = uri
|
10
|
-
@realm = realm
|
11
|
+
@realm = realm if realm
|
11
12
|
end
|
12
13
|
|
13
14
|
def == other
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# coding: BINARY
|
2
3
|
|
3
4
|
require 'strscan'
|
@@ -16,6 +17,7 @@ end
|
|
16
17
|
# * Missing disposition-type
|
17
18
|
# * Multiple semicolons
|
18
19
|
# * Whitespace around semicolons
|
20
|
+
# * Dates in ISO 8601 format
|
19
21
|
|
20
22
|
class Mechanize::HTTP::ContentDispositionParser
|
21
23
|
|
@@ -93,7 +95,17 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
93
95
|
when /^filename$/ then
|
94
96
|
rfc_2045_value
|
95
97
|
when /^(creation|modification|read)-date$/ then
|
96
|
-
|
98
|
+
date = rfc_2045_quoted_string
|
99
|
+
|
100
|
+
begin
|
101
|
+
Time.rfc822 date
|
102
|
+
rescue ArgumentError
|
103
|
+
begin
|
104
|
+
Time.iso8601 date
|
105
|
+
rescue ArgumentError
|
106
|
+
nil
|
107
|
+
end
|
108
|
+
end
|
97
109
|
when /^size$/ then
|
98
110
|
rfc_2045_value.to_i(10)
|
99
111
|
else
|
@@ -125,7 +137,7 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
125
137
|
def rfc_2045_quoted_string
|
126
138
|
return nil unless @scanner.scan(/"/)
|
127
139
|
|
128
|
-
text =
|
140
|
+
text = String.new
|
129
141
|
|
130
142
|
while true do
|
131
143
|
chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "
|
@@ -141,7 +153,10 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
141
153
|
text << " "
|
142
154
|
end
|
143
155
|
else
|
144
|
-
if '"' == @scanner.peek(
|
156
|
+
if '\\"' == @scanner.peek(2) then
|
157
|
+
@scanner.skip(/\\/)
|
158
|
+
text << @scanner.get_byte
|
159
|
+
elsif '"' == @scanner.peek(1) then
|
145
160
|
@scanner.get_byte
|
146
161
|
break
|
147
162
|
else
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'strscan'
|
4
4
|
|
@@ -34,7 +34,7 @@ class Mechanize::HTTP::WWWAuthenticateParser
|
|
34
34
|
scan_comma_spaces
|
35
35
|
end
|
36
36
|
|
37
|
-
|
37
|
+
break unless scheme
|
38
38
|
challenge.scheme = scheme
|
39
39
|
|
40
40
|
space = spaces
|
@@ -151,10 +151,10 @@ class Mechanize::HTTP::WWWAuthenticateParser
|
|
151
151
|
def quoted_string
|
152
152
|
return nil unless @scanner.scan(/"/)
|
153
153
|
|
154
|
-
text =
|
154
|
+
text = String.new
|
155
155
|
|
156
156
|
while true do
|
157
|
-
chunk = @scanner.scan(/[\r\n \t\
|
157
|
+
chunk = @scanner.scan(/[\r\n \t\x21\x23-\x7e\u0080-\u00ff]+/) # not " which is \x22
|
158
158
|
|
159
159
|
if chunk then
|
160
160
|
text << chunk
|
data/lib/mechanize/image.rb
CHANGED
data/lib/mechanize/page.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
##
|
2
3
|
# This class encapsulates an HTML page. If Mechanize finds a content
|
3
4
|
# type of 'text/html', this class will be instantiated and returned.
|
@@ -58,7 +59,7 @@ class Mechanize::Page < Mechanize::File
|
|
58
59
|
def title
|
59
60
|
@title ||=
|
60
61
|
if doc = parser
|
61
|
-
title = doc.
|
62
|
+
title = doc.xpath('string(((/html/head | /html | /head | /)/title)[1])').to_s
|
62
63
|
title.empty? ? nil : title
|
63
64
|
end
|
64
65
|
end
|
@@ -103,9 +104,9 @@ class Mechanize::Page < Mechanize::File
|
|
103
104
|
parser = self.parser unless parser
|
104
105
|
return false if parser.errors.empty?
|
105
106
|
parser.errors.any? do |error|
|
106
|
-
error.message =~ /(indicate\ encoding)|
|
107
|
-
|
108
|
-
|
107
|
+
error.message.scrub =~ /(indicate\ encoding)|
|
108
|
+
(Invalid\ char)|
|
109
|
+
(input\ conversion\ failed)/x
|
109
110
|
end
|
110
111
|
end
|
111
112
|
|
@@ -282,7 +283,9 @@ class Mechanize::Page < Mechanize::File
|
|
282
283
|
# value.
|
283
284
|
#
|
284
285
|
# :class, :dom_class: selects forms with a #dom_class value that
|
285
|
-
# matches this value.
|
286
|
+
# matches this value. Note that class attribute values are compared
|
287
|
+
# literally as string, so forms_with(class: "a") does not match a
|
288
|
+
# form with class="a b". Use forms_with(css: "form.a") instead.
|
286
289
|
#
|
287
290
|
# :search: only selects forms matching this selector expression.
|
288
291
|
#
|
data/lib/mechanize/page/base.rb
CHANGED
data/lib/mechanize/page/frame.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
# A Frame object wraps a frame HTML element. Frame objects can be treated
|
2
3
|
# just like Link objects. They contain #src, the #link they refer to and a
|
3
4
|
# #name, the name of the frame they refer to. #src and #name are aliased to
|
4
5
|
# #href and #text respectively so that a Frame object can be treated just like
|
@@ -11,6 +12,8 @@ class Mechanize::Page::Frame < Mechanize::Page::Link
|
|
11
12
|
attr_reader :text
|
12
13
|
alias :name :text
|
13
14
|
|
15
|
+
attr_reader :node
|
16
|
+
|
14
17
|
def initialize(node, mech, referer)
|
15
18
|
super(node, mech, referer)
|
16
19
|
@node = node
|
data/lib/mechanize/page/image.rb
CHANGED
data/lib/mechanize/page/label.rb
CHANGED
data/lib/mechanize/page/link.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
##
|
2
3
|
# This class encapsulates links. It contains the text and the URI for
|
3
4
|
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
@@ -8,6 +9,8 @@
|
|
8
9
|
# <a href="http://example">Hello World</a>
|
9
10
|
# <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
|
10
11
|
|
12
|
+
require 'addressable/uri'
|
13
|
+
|
11
14
|
class Mechanize::Page::Link
|
12
15
|
attr_reader :node
|
13
16
|
attr_reader :href
|
@@ -94,7 +97,11 @@ class Mechanize::Page::Link
|
|
94
97
|
begin
|
95
98
|
URI.parse @href
|
96
99
|
rescue URI::InvalidURIError
|
97
|
-
|
100
|
+
begin
|
101
|
+
URI.parse(Addressable::URI.escape(@href))
|
102
|
+
rescue Addressable::URI::InvalidURIError
|
103
|
+
raise URI::InvalidURIError
|
104
|
+
end
|
98
105
|
end
|
99
106
|
end
|
100
107
|
end
|
data/lib/mechanize/parser.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
##
|
2
3
|
# The parser module provides standard methods for accessing the headers and
|
3
4
|
# content of a response that are shared across pluggable parsers.
|
@@ -118,13 +119,13 @@ module Mechanize::Parser
|
|
118
119
|
end
|
119
120
|
|
120
121
|
# Set the filename
|
121
|
-
if disposition = @response['content-disposition']
|
122
|
+
if (disposition = @response['content-disposition'])
|
122
123
|
content_disposition =
|
123
124
|
Mechanize::HTTP::ContentDispositionParser.parse disposition
|
124
125
|
|
125
|
-
if content_disposition && content_disposition.filename && content_disposition.filename != ''
|
126
|
+
if content_disposition && content_disposition.filename && content_disposition.filename != ''
|
126
127
|
filename = content_disposition.filename
|
127
|
-
filename = filename.
|
128
|
+
filename = filename.rpartition(/[\\\/]/).last
|
128
129
|
handled = true
|
129
130
|
end
|
130
131
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
# This error is raised when Mechanize encounters a response code it does not
|
2
3
|
# know how to handle. Currently, this exception will be thrown if Mechanize
|
3
4
|
# encounters response codes other than 200, 301, or 302. Any other response
|
@@ -16,7 +17,7 @@ class Mechanize::ResponseCodeError < Mechanize::Error
|
|
16
17
|
|
17
18
|
def to_s
|
18
19
|
response_class = Net::HTTPResponse::CODE_TO_OBJ[@response_code]
|
19
|
-
out = "#{@response_code} => #{response_class} "
|
20
|
+
out = String.new("#{@response_code} => #{response_class} ")
|
20
21
|
out << "for #{@page.uri} " if @page.respond_to? :uri # may be HTTPResponse
|
21
22
|
out << "-- #{super}"
|
22
23
|
end
|
data/lib/mechanize/test_case.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'mechanize'
|
2
3
|
require 'logger'
|
3
4
|
require 'tempfile'
|
@@ -57,12 +58,12 @@ class Mechanize::TestCase < Minitest::Test
|
|
57
58
|
|
58
59
|
def fake_page agent = @mech
|
59
60
|
uri = URI 'http://fake.example/'
|
60
|
-
html =
|
61
|
-
<html>
|
62
|
-
<body>
|
63
|
-
<form><input type="submit" value="submit" /></form>
|
64
|
-
</body>
|
65
|
-
</html>
|
61
|
+
html = String.new(<<~END)
|
62
|
+
<html>
|
63
|
+
<body>
|
64
|
+
<form><input type="submit" value="submit" /></form>
|
65
|
+
</body>
|
66
|
+
</html>
|
66
67
|
END
|
67
68
|
|
68
69
|
Mechanize::Page.new uri, nil, html, 200, agent
|
@@ -87,11 +88,9 @@ class Mechanize::TestCase < Minitest::Test
|
|
87
88
|
# Creates a Mechanize::CookieJar by parsing the given +str+
|
88
89
|
|
89
90
|
def cookie_jar str, uri = URI('http://example')
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
jar
|
91
|
+
Mechanize::CookieJar.new.tap do |jar|
|
92
|
+
jar.parse str, uri
|
93
|
+
end
|
95
94
|
end
|
96
95
|
|
97
96
|
##
|
@@ -109,22 +108,18 @@ class Mechanize::TestCase < Minitest::Test
|
|
109
108
|
# Creates a Nokogiri Node +element+ with the given +attributes+
|
110
109
|
|
111
110
|
def node element, attributes = {}
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
attributes.each do |name, value|
|
117
|
-
node[name] = value
|
111
|
+
Nokogiri::XML::Node.new(element, Nokogiri::HTML::Document.new).tap do |node|
|
112
|
+
attributes.each do |name, value|
|
113
|
+
node[name] = value
|
114
|
+
end
|
118
115
|
end
|
119
|
-
|
120
|
-
node
|
121
116
|
end
|
122
117
|
|
123
118
|
##
|
124
119
|
# Creates a Mechanize::Page for the given +uri+ with the given
|
125
120
|
# +content_type+, response +body+ and HTTP status +code+
|
126
121
|
|
127
|
-
def page uri, content_type = 'text/html', body =
|
122
|
+
def page uri, content_type = 'text/html', body = String.new, code = 200
|
128
123
|
uri = URI uri unless URI::Generic === uri
|
129
124
|
|
130
125
|
Mechanize::Page.new(uri, { 'content-type' => content_type }, body, code,
|
@@ -173,15 +168,25 @@ UQIBATANBgkqhkiG9w0BAQUFAANBAAAB////////////////////////////////
|
|
173
168
|
# Creates a Tempfile with +content+ that is immediately unlinked
|
174
169
|
|
175
170
|
def tempfile content
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
171
|
+
Tempfile.new(@NAME).tap do |body_io|
|
172
|
+
body_io.unlink
|
173
|
+
body_io.write content
|
174
|
+
body_io.flush
|
175
|
+
body_io.rewind
|
176
|
+
end
|
177
|
+
end
|
181
178
|
|
182
|
-
|
179
|
+
##
|
180
|
+
# Returns true if the current platform is a Windows platform
|
181
|
+
def windows?
|
182
|
+
::RUBY_PLATFORM =~ /mingw|mswin/
|
183
183
|
end
|
184
184
|
|
185
|
+
##
|
186
|
+
# Return the contents of the file without Windows carriage returns
|
187
|
+
def file_contents_without_cr(path)
|
188
|
+
File.read(path).gsub(/\r\n/, "\n")
|
189
|
+
end
|
185
190
|
end
|
186
191
|
|
187
192
|
require 'mechanize/test_case/servlets'
|
@@ -230,9 +235,9 @@ class Net::HTTP # :nodoc:
|
|
230
235
|
else
|
231
236
|
filename = "htdocs#{path.gsub(/[^\/\\.\w\s]/, '_')}"
|
232
237
|
unless PAGE_CACHE[filename]
|
233
|
-
open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb')
|
238
|
+
::File.open("#{Mechanize::TestCase::TEST_DIR}/#{filename}", 'rb') do |io|
|
234
239
|
PAGE_CACHE[filename] = io.read
|
235
|
-
|
240
|
+
end
|
236
241
|
end
|
237
242
|
|
238
243
|
res.body = PAGE_CACHE[filename]
|
@@ -312,7 +317,7 @@ class Response # :nodoc:
|
|
312
317
|
|
313
318
|
def initialize
|
314
319
|
@header = {}
|
315
|
-
@body =
|
320
|
+
@body = String.new
|
316
321
|
@code = nil
|
317
322
|
@query = nil
|
318
323
|
@cookies = []
|