mechanize 2.7.7 → 2.8.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of mechanize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.github/workflows/ci-test.yml +24 -4
- data/.yardopts +8 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +81 -66
- data/Gemfile +1 -6
- data/{LICENSE.rdoc → LICENSE.txt} +4 -0
- data/README.md +79 -0
- data/Rakefile +18 -3
- data/lib/mechanize.rb +1 -0
- data/lib/mechanize/chunked_termination_error.rb +1 -0
- data/lib/mechanize/content_type_error.rb +1 -0
- data/lib/mechanize/cookie.rb +1 -13
- data/lib/mechanize/cookie_jar.rb +2 -10
- data/lib/mechanize/directory_saver.rb +1 -0
- data/lib/mechanize/download.rb +1 -0
- data/lib/mechanize/element_matcher.rb +1 -0
- data/lib/mechanize/element_not_found_error.rb +1 -0
- data/lib/mechanize/file.rb +1 -0
- data/lib/mechanize/file_connection.rb +5 -3
- data/lib/mechanize/file_request.rb +1 -0
- data/lib/mechanize/file_response.rb +3 -0
- data/lib/mechanize/file_saver.rb +1 -0
- data/lib/mechanize/form.rb +1 -9
- data/lib/mechanize/form/button.rb +1 -0
- data/lib/mechanize/form/check_box.rb +1 -0
- data/lib/mechanize/form/field.rb +1 -0
- data/lib/mechanize/form/file_upload.rb +1 -0
- data/lib/mechanize/form/hidden.rb +1 -0
- data/lib/mechanize/form/image_button.rb +1 -0
- data/lib/mechanize/form/keygen.rb +1 -0
- data/lib/mechanize/form/multi_select_list.rb +1 -0
- data/lib/mechanize/form/option.rb +1 -0
- data/lib/mechanize/form/radio_button.rb +1 -0
- data/lib/mechanize/form/reset.rb +1 -0
- data/lib/mechanize/form/select_list.rb +1 -0
- data/lib/mechanize/form/submit.rb +1 -0
- data/lib/mechanize/form/text.rb +1 -0
- data/lib/mechanize/form/textarea.rb +1 -0
- data/lib/mechanize/headers.rb +1 -0
- data/lib/mechanize/history.rb +1 -0
- data/lib/mechanize/http.rb +1 -0
- data/lib/mechanize/http/agent.rb +16 -8
- data/lib/mechanize/http/auth_challenge.rb +1 -0
- data/lib/mechanize/http/auth_realm.rb +1 -0
- data/lib/mechanize/http/auth_store.rb +1 -0
- data/lib/mechanize/http/content_disposition_parser.rb +14 -2
- data/lib/mechanize/http/www_authenticate_parser.rb +3 -3
- data/lib/mechanize/image.rb +1 -0
- data/lib/mechanize/page.rb +1 -0
- data/lib/mechanize/page/base.rb +1 -0
- data/lib/mechanize/page/frame.rb +1 -0
- data/lib/mechanize/page/image.rb +1 -0
- data/lib/mechanize/page/label.rb +1 -0
- data/lib/mechanize/page/link.rb +8 -1
- data/lib/mechanize/page/meta_refresh.rb +1 -0
- data/lib/mechanize/parser.rb +1 -0
- data/lib/mechanize/pluggable_parsers.rb +1 -0
- data/lib/mechanize/prependable.rb +1 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
- data/lib/mechanize/response_code_error.rb +2 -1
- data/lib/mechanize/response_read_error.rb +1 -0
- data/lib/mechanize/robots_disallowed_error.rb +1 -0
- data/lib/mechanize/test_case.rb +32 -27
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
- data/lib/mechanize/test_case/form_servlet.rb +1 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +1 -0
- data/lib/mechanize/test_case/header_servlet.rb +1 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/referer_servlet.rb +1 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
- data/lib/mechanize/test_case/robots_txt_servlet.rb +1 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/server.rb +1 -0
- data/lib/mechanize/test_case/servlets.rb +1 -0
- data/lib/mechanize/test_case/verb_servlet.rb +1 -0
- data/lib/mechanize/unauthorized_error.rb +1 -0
- data/lib/mechanize/unsupported_scheme_error.rb +1 -0
- data/lib/mechanize/util.rb +2 -1
- data/lib/mechanize/version.rb +2 -1
- data/lib/mechanize/xml_file.rb +1 -0
- data/mechanize.gemspec +38 -31
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/test_mechanize.rb +5 -4
- data/test/test_mechanize_cookie_jar.rb +2 -0
- data/test/test_mechanize_download.rb +1 -0
- data/test/test_mechanize_file.rb +1 -0
- data/test/test_mechanize_file_connection.rb +21 -3
- data/test/test_mechanize_file_response.rb +6 -0
- data/test/test_mechanize_http_agent.rb +47 -7
- data/test/test_mechanize_http_content_disposition_parser.rb +27 -0
- data/test/test_mechanize_link.rb +24 -0
- metadata +134 -52
- data/README.rdoc +0 -77
data/Gemfile
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
(The MIT License)
|
2
2
|
|
3
|
+
Copyright (c) 2005 by Michael Neumann (mneumann@ntecs.de)
|
4
|
+
|
5
|
+
Copyright (c) 2006-2021 by Eric Hodel, Akinori MUSHA, Aaron Patterson, Lee Jarvis, Mike Dalessio
|
6
|
+
|
3
7
|
Permission is hereby granted, free of charge, to any person obtaining
|
4
8
|
a copy of this software and associated documentation files (the
|
5
9
|
'Software'), to deal in the Software without restriction, including
|
data/README.md
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
# Mechanize
|
2
|
+
|
3
|
+
* https://www.rubydoc.info/gems/mechanize/
|
4
|
+
* https://github.com/sparklemotion/mechanize
|
5
|
+
|
6
|
+
[![Test suite](https://github.com/sparklemotion/mechanize/actions/workflows/ci-test.yml/badge.svg)](https://github.com/sparklemotion/mechanize/actions/workflows/ci-test.yml)
|
7
|
+
|
8
|
+
|
9
|
+
## Description
|
10
|
+
|
11
|
+
The Mechanize library is used for automating interaction with websites. Mechanize automatically stores and sends cookies, follows redirects, and can follow links and submit forms. Form fields can be populated and submitted. Mechanize also keeps track of the sites that you have visited as a history.
|
12
|
+
|
13
|
+
|
14
|
+
## Dependencies
|
15
|
+
|
16
|
+
* Ruby >= 2.5
|
17
|
+
* Gems:
|
18
|
+
* `addressable`
|
19
|
+
* `domain_name`
|
20
|
+
* `http-cookie`
|
21
|
+
* `mime-types`
|
22
|
+
* `net-http-digest_auth`
|
23
|
+
* `net-http-persistent`
|
24
|
+
* `nokogiri`
|
25
|
+
* `rubyntlm`
|
26
|
+
* `webrick`
|
27
|
+
* `webrobots`
|
28
|
+
|
29
|
+
|
30
|
+
## Support:
|
31
|
+
|
32
|
+
The bug tracker is available here:
|
33
|
+
|
34
|
+
* https://github.com/sparklemotion/mechanize/issues
|
35
|
+
|
36
|
+
|
37
|
+
## Examples
|
38
|
+
|
39
|
+
If you are just starting, check out [GUIDE.rdoc](https://github.com/sparklemotion/mechanize/blob/main/GUIDE.rdoc) or [EXAMPLES.rdoc](https://github.com/sparklemotion/mechanize/blob/main/EXAMPLES.rdoc).
|
40
|
+
|
41
|
+
|
42
|
+
## Developers
|
43
|
+
|
44
|
+
Use bundler to install dependencies:
|
45
|
+
|
46
|
+
```
|
47
|
+
bundle install
|
48
|
+
```
|
49
|
+
|
50
|
+
Run all tests with:
|
51
|
+
|
52
|
+
```
|
53
|
+
bundle exec rake test
|
54
|
+
```
|
55
|
+
|
56
|
+
See also Mechanize::TestCase to read about the built-in testing infrastructure.
|
57
|
+
|
58
|
+
|
59
|
+
## Authors
|
60
|
+
|
61
|
+
* Eric Hodel
|
62
|
+
* Akinori MUSHA
|
63
|
+
* Aaron Patterson
|
64
|
+
* Lee Jarvis
|
65
|
+
* Mike Dalessio
|
66
|
+
|
67
|
+
|
68
|
+
## Acknowledgments
|
69
|
+
|
70
|
+
This library was heavily influenced by its namesake in the Perl world. A big
|
71
|
+
thanks goes to [Andy Lester](http://petdance.com), the author of the original Perl module WWW::Mechanize which is available [here](http://search.cpan.org/dist/WWW-Mechanize/). Ruby Mechanize would not be around without you!
|
72
|
+
|
73
|
+
Thank you to Michael Neumann for starting the Ruby version. Thanks to everyone who's helped out in various ways. Finally, thank you to the people using this library!
|
74
|
+
|
75
|
+
## License
|
76
|
+
|
77
|
+
This library is distributed under the MIT license. Please see the [LICENSE](http://docs.seattlerb.org/mechanize/LICENSE_rdoc.html) file.
|
78
|
+
|
79
|
+
|
data/Rakefile
CHANGED
@@ -23,9 +23,9 @@ task('ssl_cert') do |p|
|
|
23
23
|
end
|
24
24
|
|
25
25
|
RDoc::Task.new do |rdoc|
|
26
|
-
rdoc.main = "README.
|
26
|
+
rdoc.main = "README.md"
|
27
27
|
rdoc.rdoc_dir = 'doc'
|
28
|
-
rdoc.rdoc_files.include( "CHANGELOG.
|
28
|
+
rdoc.rdoc_files.include( "CHANGELOG.md", "EXAMPLES.rdoc", "GUIDE.rdoc", "LICENSE.txt", "README.md", "lib/**/*.rb")
|
29
29
|
end
|
30
30
|
|
31
31
|
desc "Run tests"
|
@@ -38,4 +38,19 @@ task publish_docs: %w[rdoc] do
|
|
38
38
|
sh 'rsync', '-avzO', '--delete', 'doc/', 'docs-push.seattlerb.org:/data/www/docs.seattlerb.org/mechanize/'
|
39
39
|
end
|
40
40
|
|
41
|
-
|
41
|
+
desc "Run rubocop checks"
|
42
|
+
task :rubocop => ["rubocop:security", "rubocop:frozen_string_literals"]
|
43
|
+
|
44
|
+
namespace "rubocop" do
|
45
|
+
desc "Run rubocop security check"
|
46
|
+
task :security do
|
47
|
+
sh "rubocop lib --only Security"
|
48
|
+
end
|
49
|
+
|
50
|
+
desc "Run rubocop string literals check"
|
51
|
+
task :frozen_string_literals do
|
52
|
+
sh "rubocop lib --auto-correct-all --only Style/FrozenStringLiteralComment"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
task default: [:rubocop, :test]
|
data/lib/mechanize.rb
CHANGED
data/lib/mechanize/cookie.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
warn 'mechanize/cookie will be deprecated. Please migrate to the http-cookie APIs.' if $VERBOSE
|
2
3
|
|
3
4
|
require 'http/cookie'
|
@@ -50,19 +51,6 @@ class Mechanize
|
|
50
51
|
|
51
52
|
Cookie = ::HTTP::Cookie
|
52
53
|
|
53
|
-
# Compatibility for Ruby 1.8/1.9
|
54
|
-
unless Cookie.respond_to?(:prepend, true)
|
55
|
-
require 'mechanize/prependable'
|
56
|
-
|
57
|
-
class Cookie
|
58
|
-
extend Prependable
|
59
|
-
|
60
|
-
class << self
|
61
|
-
extend Prependable
|
62
|
-
end
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
54
|
class Cookie
|
67
55
|
prepend CookieIMethods
|
68
56
|
|
data/lib/mechanize/cookie_jar.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
warn 'mechanize/cookie_jar will be deprecated. Please migrate to the http-cookie APIs.' if $VERBOSE
|
2
3
|
|
3
4
|
require 'http/cookie_jar'
|
@@ -148,7 +149,7 @@ class Mechanize
|
|
148
149
|
return super(input, opthash) if opthash[:format] != :yaml
|
149
150
|
|
150
151
|
begin
|
151
|
-
data = YAML.load(input)
|
152
|
+
data = YAML.load(input) # rubocop:disable Security/YAMLLoad
|
152
153
|
rescue ArgumentError
|
153
154
|
@logger.warn "unloadable YAML cookie data discarded" if @logger
|
154
155
|
return self
|
@@ -175,15 +176,6 @@ class Mechanize
|
|
175
176
|
end
|
176
177
|
end
|
177
178
|
|
178
|
-
# Compatibility for Ruby 1.8/1.9
|
179
|
-
unless ::HTTP::CookieJar.respond_to?(:prepend, true)
|
180
|
-
require 'mechanize/prependable'
|
181
|
-
|
182
|
-
class ::HTTP::CookieJar
|
183
|
-
extend Prependable
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
179
|
class ::HTTP::CookieJar
|
188
180
|
prepend CookieJarIMethods
|
189
181
|
end
|
data/lib/mechanize/download.rb
CHANGED
data/lib/mechanize/file.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
##
|
2
3
|
# Wrapper to make a file URI work like an http URI
|
3
4
|
|
@@ -10,8 +11,9 @@ class Mechanize::FileConnection
|
|
10
11
|
end
|
11
12
|
|
12
13
|
def request uri, request
|
13
|
-
|
14
|
+
file_path = uri.select(:host, :path)
|
15
|
+
.select { |part| part && (part.length > 0) }
|
16
|
+
.join(":")
|
17
|
+
yield Mechanize::FileResponse.new(Mechanize::Util.uri_unescape(file_path))
|
14
18
|
end
|
15
|
-
|
16
19
|
end
|
17
|
-
|
data/lib/mechanize/file_saver.rb
CHANGED
data/lib/mechanize/form.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'mechanize/element_matcher'
|
2
3
|
|
3
4
|
# This class encapsulates a form parsed out of an HTML page. Each type of
|
@@ -643,15 +644,6 @@ class Mechanize::Form
|
|
643
644
|
end
|
644
645
|
end
|
645
646
|
|
646
|
-
unless ::String.method_defined?(:b)
|
647
|
-
# Define String#b for Ruby < 2.0
|
648
|
-
class ::String
|
649
|
-
def b
|
650
|
-
dup.force_encoding(Encoding::ASCII_8BIT)
|
651
|
-
end
|
652
|
-
end
|
653
|
-
end
|
654
|
-
|
655
647
|
def rand_string(len = 10)
|
656
648
|
chars = ("a".."z").to_a + ("A".."Z").to_a
|
657
649
|
string = ::String.new
|
data/lib/mechanize/form/field.rb
CHANGED
data/lib/mechanize/form/reset.rb
CHANGED
data/lib/mechanize/form/text.rb
CHANGED
data/lib/mechanize/headers.rb
CHANGED
data/lib/mechanize/history.rb
CHANGED
data/lib/mechanize/http.rb
CHANGED
data/lib/mechanize/http/agent.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'tempfile'
|
2
3
|
require 'net/ntlm'
|
3
|
-
require 'kconv'
|
4
4
|
require 'webrobots'
|
5
5
|
|
6
6
|
##
|
@@ -9,6 +9,9 @@ require 'webrobots'
|
|
9
9
|
|
10
10
|
class Mechanize::HTTP::Agent
|
11
11
|
|
12
|
+
CREDENTIAL_HEADERS = ['Authorization', 'Cookie']
|
13
|
+
POST_HEADERS = ['Content-Length', 'Content-MD5', 'Content-Type']
|
14
|
+
|
12
15
|
# :section: Headers
|
13
16
|
|
14
17
|
# Disables If-Modified-Since conditional requests (enabled by default)
|
@@ -838,7 +841,7 @@ class Mechanize::HTTP::Agent
|
|
838
841
|
|
839
842
|
out_io
|
840
843
|
rescue Zlib::Error => e
|
841
|
-
message = "error handling content-encoding #{response['Content-Encoding']}:"
|
844
|
+
message = String.new("error handling content-encoding #{response['Content-Encoding']}:")
|
842
845
|
message << " #{e.message} (#{e.class})"
|
843
846
|
raise Mechanize::Error, message
|
844
847
|
ensure
|
@@ -986,14 +989,20 @@ class Mechanize::HTTP::Agent
|
|
986
989
|
|
987
990
|
redirect_method = method == :head ? :head : :get
|
988
991
|
|
992
|
+
new_uri = secure_resolve!(response['Location'].to_s, page)
|
993
|
+
@history.push(page, page.uri)
|
994
|
+
|
989
995
|
# Make sure we are not copying over the POST headers from the original request
|
990
|
-
|
991
|
-
headers.
|
996
|
+
POST_HEADERS.each do |key|
|
997
|
+
headers.delete_if { |h| h.casecmp?(key) }
|
992
998
|
end
|
993
999
|
|
994
|
-
|
995
|
-
|
996
|
-
|
1000
|
+
# Make sure we clear credential headers if being redirected to another site
|
1001
|
+
if new_uri.host != page.uri.host
|
1002
|
+
CREDENTIAL_HEADERS.each do |ch|
|
1003
|
+
headers.delete_if { |h| h.casecmp?(ch) }
|
1004
|
+
end
|
1005
|
+
end
|
997
1006
|
|
998
1007
|
fetch new_uri, redirect_method, headers, [], referer, redirects + 1
|
999
1008
|
end
|
@@ -1278,4 +1287,3 @@ class Mechanize::HTTP::Agent
|
|
1278
1287
|
end
|
1279
1288
|
|
1280
1289
|
require 'mechanize/http/auth_store'
|
1281
|
-
|