mechanize 2.8.4 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ci-test.yml +14 -6
- data/CHANGELOG.md +19 -0
- data/README.md +1 -1
- data/examples/wikipedia_links_to_philosophy.rb +5 -6
- data/lib/mechanize/http/agent.rb +9 -4
- data/lib/mechanize/page.rb +0 -4
- data/lib/mechanize/version.rb +1 -1
- data/mechanize.gemspec +1 -1
- data/test/test_mechanize_http_agent.rb +39 -13
- data/test/test_mechanize_page.rb +14 -0
- data/test/test_mechanize_page_link.rb +6 -9
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25fad3bc233e4efc5e99a66ccd480450e100b2bdb76e8a7e10d00ffb5386fcf0
|
4
|
+
data.tar.gz: 6c63c1e76044803b26f687d48b12c3f43e2009afa7e5bbd6ce183e316eaad049
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f200b8ceaac133254e05d03cbe38344cf31b55bd7c4a2979fd53580eb5fb84ee4f04e4a76e111037cc6d1e6efdeabb14a494ff47194668dc8334d06dc26cd377
|
7
|
+
data.tar.gz: af78f50d64366a713f70297befb328cba35120b08850c99cf2dd09a2d670b1ffcdf5cf5e12889e12db2c7ddc7a595074707c58e7aa0c4792693cbf5cddacee3e
|
@@ -1,5 +1,9 @@
|
|
1
1
|
name: "ci"
|
2
2
|
|
3
|
+
concurrency:
|
4
|
+
group: "${{github.workflow}}-${{github.ref}}"
|
5
|
+
cancel-in-progress: true
|
6
|
+
|
3
7
|
on:
|
4
8
|
push:
|
5
9
|
branches:
|
@@ -8,15 +12,17 @@ on:
|
|
8
12
|
types: [opened, synchronize]
|
9
13
|
branches:
|
10
14
|
- main
|
15
|
+
schedule:
|
16
|
+
- cron: "0 8 * * 5" # At 08:00 on Friday # https://crontab.guru/#0_8_*_*_5
|
11
17
|
|
12
18
|
jobs:
|
13
19
|
rubocop:
|
14
20
|
runs-on: ubuntu-latest
|
15
21
|
steps:
|
16
|
-
- uses: actions/checkout@
|
22
|
+
- uses: actions/checkout@v3
|
17
23
|
- uses: ruby/setup-ruby@v1
|
18
24
|
with:
|
19
|
-
ruby-version: "3.
|
25
|
+
ruby-version: "3.2"
|
20
26
|
bundler-cache: true
|
21
27
|
- run: bundle exec rake rubocop
|
22
28
|
|
@@ -25,15 +31,16 @@ jobs:
|
|
25
31
|
strategy:
|
26
32
|
fail-fast: false
|
27
33
|
matrix:
|
28
|
-
ruby-version: ["2.
|
34
|
+
ruby-version: ["2.6", "2.7", "3.0", "3.1", "3.2", "head", "jruby-9.4", "truffleruby-head"]
|
29
35
|
|
30
36
|
runs-on: ubuntu-latest
|
31
37
|
steps:
|
32
|
-
- uses: actions/checkout@
|
38
|
+
- uses: actions/checkout@v3
|
33
39
|
- uses: ruby/setup-ruby@v1
|
34
40
|
with:
|
35
41
|
ruby-version: ${{matrix.ruby-version}}
|
36
42
|
bundler-cache: true
|
43
|
+
bundler: 2.3.26 # https://github.com/rubygems/rubygems/issues/6435
|
37
44
|
- run: bundle exec rake test
|
38
45
|
|
39
46
|
test-platform:
|
@@ -45,9 +52,10 @@ jobs:
|
|
45
52
|
|
46
53
|
runs-on: ${{matrix.platform}}
|
47
54
|
steps:
|
48
|
-
- uses: actions/checkout@
|
55
|
+
- uses: actions/checkout@v3
|
49
56
|
- uses: ruby/setup-ruby@v1
|
50
57
|
with:
|
51
|
-
ruby-version: "3.
|
58
|
+
ruby-version: "3.2"
|
52
59
|
bundler-cache: true
|
60
|
+
bundler: latest
|
53
61
|
- run: bundle exec rake test
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
# Mechanize CHANGELOG
|
2
2
|
|
3
|
+
## 2.9.0 / 2023-04-07
|
4
|
+
|
5
|
+
### Requirements
|
6
|
+
|
7
|
+
* Mechanize now requires Ruby 2.6 or newer.
|
8
|
+
|
9
|
+
|
10
|
+
### Improvement
|
11
|
+
|
12
|
+
* Mechanize can now parse frozen strings. (#610)
|
13
|
+
|
14
|
+
|
15
|
+
## 2.8.5 / 2022-06-09
|
16
|
+
|
17
|
+
### Security
|
18
|
+
|
19
|
+
Fixes low-severity CVE-2022-31033, "Authorization header leak on port redirect." See [GHSA-64qm-hrgp-pgr9](https://github.com/sparklemotion/mechanize/security/advisories/GHSA-64qm-hrgp-pgr9) for more details.
|
20
|
+
|
21
|
+
|
3
22
|
## 2.8.4 / 2022-01-17
|
4
23
|
|
5
24
|
### Fix
|
data/README.md
CHANGED
@@ -58,10 +58,10 @@ class WikipediaLinksToPhilosophy
|
|
58
58
|
# the article.
|
59
59
|
|
60
60
|
def follow_first_link
|
61
|
-
puts @title
|
61
|
+
puts "#{@title} (#{@page.uri})"
|
62
62
|
|
63
63
|
# > p > a rejects italics
|
64
|
-
links = @page.root.css('.mw-content-ltr
|
64
|
+
links = @page.root.css('.mw-content-ltr p > a[href^="/wiki/"]')
|
65
65
|
|
66
66
|
# reject disambiguation and special pages, images and files
|
67
67
|
links = links.reject do |link_node|
|
@@ -74,10 +74,9 @@ class WikipediaLinksToPhilosophy
|
|
74
74
|
|
75
75
|
link = links.first
|
76
76
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
@page.root.css('.mw-content-ltr > ul > li > a[href^="/wiki/"]').first
|
77
|
+
if link.nil?
|
78
|
+
puts "Could not parse #{@page.uri}"
|
79
|
+
exit 1
|
81
80
|
end
|
82
81
|
|
83
82
|
# convert a Nokogiri HTML element back to a mechanize link
|
data/lib/mechanize/http/agent.rb
CHANGED
@@ -9,7 +9,8 @@ require 'webrobots'
|
|
9
9
|
|
10
10
|
class Mechanize::HTTP::Agent
|
11
11
|
|
12
|
-
CREDENTIAL_HEADERS = ['Authorization'
|
12
|
+
CREDENTIAL_HEADERS = ['Authorization']
|
13
|
+
COOKIE_HEADERS = ['Cookie']
|
13
14
|
POST_HEADERS = ['Content-Length', 'Content-MD5', 'Content-Type']
|
14
15
|
|
15
16
|
# :section: Headers
|
@@ -998,10 +999,14 @@ class Mechanize::HTTP::Agent
|
|
998
999
|
end
|
999
1000
|
|
1000
1001
|
# Make sure we clear credential headers if being redirected to another site
|
1001
|
-
if new_uri.host
|
1002
|
-
|
1003
|
-
|
1002
|
+
if new_uri.host == page.uri.host
|
1003
|
+
if new_uri.port != page.uri.port
|
1004
|
+
# https://datatracker.ietf.org/doc/html/rfc6265#section-8.5
|
1005
|
+
# cookies are OK to be shared across ports on the same host
|
1006
|
+
CREDENTIAL_HEADERS.each { |ch| headers.delete_if { |h| h.casecmp?(ch) } }
|
1004
1007
|
end
|
1008
|
+
else
|
1009
|
+
(COOKIE_HEADERS + CREDENTIAL_HEADERS).each { |ch| headers.delete_if { |h| h.casecmp?(ch) } }
|
1005
1010
|
end
|
1006
1011
|
|
1007
1012
|
fetch new_uri, redirect_method, headers, [], referer, redirects + 1
|
data/lib/mechanize/page.rb
CHANGED
@@ -41,10 +41,6 @@ class Mechanize::Page < Mechanize::File
|
|
41
41
|
@encodings.concat self.class.response_header_charset(response)
|
42
42
|
|
43
43
|
if body
|
44
|
-
# Force the encoding to be 8BIT so we can perform regular expressions.
|
45
|
-
# We'll set it to the detected encoding later
|
46
|
-
body.force_encoding(Encoding::ASCII_8BIT)
|
47
|
-
|
48
44
|
@encodings.concat self.class.meta_charset body
|
49
45
|
|
50
46
|
meta_content_type = self.class.meta_content_type body
|
data/lib/mechanize/version.rb
CHANGED
data/mechanize.gemspec
CHANGED
@@ -51,7 +51,7 @@ Gem::Specification.new do |spec|
|
|
51
51
|
spec.extra_rdoc_files += Dir['*.rdoc', '*.md']
|
52
52
|
spec.rdoc_options = ["--main", "README.md"]
|
53
53
|
|
54
|
-
spec.required_ruby_version = ">= 2.
|
54
|
+
spec.required_ruby_version = ">= 2.6.0"
|
55
55
|
|
56
56
|
spec.add_runtime_dependency("addressable", "~> 2.8")
|
57
57
|
spec.add_runtime_dependency("domain_name", ">= 0.5.20190701", "~> 0.5")
|
@@ -27,13 +27,10 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
27
27
|
realm
|
28
28
|
end
|
29
29
|
|
30
|
-
def
|
30
|
+
def skip_if_jruby_zlib
|
31
31
|
if RUBY_ENGINE == 'jruby'
|
32
32
|
meth = caller[0][/`(\w+)/, 1]
|
33
|
-
|
34
|
-
true
|
35
|
-
else
|
36
|
-
false
|
33
|
+
skip "#{meth}: skipped because how Zlib handles error is different in JRuby"
|
37
34
|
end
|
38
35
|
end
|
39
36
|
|
@@ -823,7 +820,11 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
823
820
|
@res.instance_variable_set(:@header,
|
824
821
|
'www-authenticate' => ['Negotiate, NTLM'])
|
825
822
|
|
826
|
-
|
823
|
+
begin
|
824
|
+
page = @agent.response_authenticate @res, nil, @uri, @req, {}, nil, nil
|
825
|
+
rescue OpenSSL::Digest::DigestError
|
826
|
+
skip "It looks like OpenSSL is not configured to support MD4"
|
827
|
+
end
|
827
828
|
|
828
829
|
assert_equal 'ok', page.body # lame test
|
829
830
|
end
|
@@ -931,7 +932,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
931
932
|
body_io = StringIO.new \
|
932
933
|
"\037\213\b\0002\002\225M\000\003+H,*\001"
|
933
934
|
|
934
|
-
|
935
|
+
skip_if_jruby_zlib
|
935
936
|
|
936
937
|
e = assert_raises Mechanize::Error do
|
937
938
|
@agent.response_content_encoding @res, body_io
|
@@ -965,7 +966,8 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
965
966
|
|
966
967
|
assert_match %r%invalid compressed data -- crc error%, log.string
|
967
968
|
rescue IOError
|
968
|
-
|
969
|
+
skip_if_jruby_zlib
|
970
|
+
raise
|
969
971
|
end
|
970
972
|
|
971
973
|
def test_response_content_encoding_gzip_checksum_corrupt_length
|
@@ -983,7 +985,8 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
983
985
|
|
984
986
|
assert_match %r%invalid compressed data -- length error%, log.string
|
985
987
|
rescue IOError
|
986
|
-
|
988
|
+
skip_if_jruby_zlib
|
989
|
+
raise
|
987
990
|
end
|
988
991
|
|
989
992
|
def test_response_content_encoding_gzip_checksum_truncated
|
@@ -1001,7 +1004,8 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
1001
1004
|
|
1002
1005
|
assert_match %r%unable to gunzip response: footer is not found%, log.string
|
1003
1006
|
rescue IOError
|
1004
|
-
|
1007
|
+
skip_if_jruby_zlib
|
1008
|
+
raise
|
1005
1009
|
end
|
1006
1010
|
|
1007
1011
|
def test_response_content_encoding_gzip_empty
|
@@ -1042,7 +1046,8 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
1042
1046
|
|
1043
1047
|
assert body_io.closed?
|
1044
1048
|
rescue IOError
|
1045
|
-
|
1049
|
+
skip_if_jruby_zlib
|
1050
|
+
raise
|
1046
1051
|
end
|
1047
1052
|
|
1048
1053
|
def test_response_content_encoding_none
|
@@ -1569,7 +1574,7 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
1569
1574
|
refute_includes(headers.keys, "AUTHORIZATION")
|
1570
1575
|
refute_includes(headers.keys, "cookie")
|
1571
1576
|
|
1572
|
-
assert_match
|
1577
|
+
assert_match("range|bytes=0-9999", page.body)
|
1573
1578
|
refute_match("authorization|Basic xxx", page.body)
|
1574
1579
|
refute_match("cookie|name=value", page.body)
|
1575
1580
|
end
|
@@ -1590,11 +1595,32 @@ class TestMechanizeHttpAgent < Mechanize::TestCase
|
|
1590
1595
|
assert_includes(headers.keys, "AUTHORIZATION")
|
1591
1596
|
assert_includes(headers.keys, "cookie")
|
1592
1597
|
|
1593
|
-
assert_match
|
1598
|
+
assert_match("range|bytes=0-9999", page.body)
|
1594
1599
|
assert_match("authorization|Basic xxx", page.body)
|
1595
1600
|
assert_match("cookie|name=value", page.body)
|
1596
1601
|
end
|
1597
1602
|
|
1603
|
+
def test_response_redirect_to_same_site_diff_port_with_credential
|
1604
|
+
@agent.redirect_ok = true
|
1605
|
+
|
1606
|
+
headers = {
|
1607
|
+
'Range' => 'bytes=0-9999',
|
1608
|
+
'AUTHORIZATION' => 'Basic xxx',
|
1609
|
+
'cookie' => 'name=value',
|
1610
|
+
}
|
1611
|
+
|
1612
|
+
page = html_page ''
|
1613
|
+
page = @agent.response_redirect({ 'Location' => 'http://example:81/http_headers' }, :get,
|
1614
|
+
page, 0, headers)
|
1615
|
+
|
1616
|
+
refute_includes(headers.keys, "AUTHORIZATION")
|
1617
|
+
assert_includes(headers.keys, "cookie")
|
1618
|
+
|
1619
|
+
assert_match("range|bytes=0-9999", page.body)
|
1620
|
+
refute_match("authorization|Basic xxx", page.body)
|
1621
|
+
assert_match("cookie|name=value", page.body)
|
1622
|
+
end
|
1623
|
+
|
1598
1624
|
def test_response_redirect_not_ok
|
1599
1625
|
@agent.redirect_ok = false
|
1600
1626
|
|
data/test/test_mechanize_page.rb
CHANGED
@@ -276,5 +276,19 @@ class TestMechanizePage < Mechanize::TestCase
|
|
276
276
|
assert_equal page.title, "HTML>TITLE"
|
277
277
|
end
|
278
278
|
|
279
|
+
def test_frozen_string_body
|
280
|
+
html = (<<~HTML).freeze
|
281
|
+
<html>
|
282
|
+
<head>
|
283
|
+
<title>Page Title</title>
|
284
|
+
</head>
|
285
|
+
<body>
|
286
|
+
<p>Hello World</p>
|
287
|
+
</body>
|
288
|
+
</html>
|
289
|
+
HTML
|
290
|
+
|
291
|
+
html_page(html) # refute_raises
|
292
|
+
end
|
279
293
|
end
|
280
294
|
|
@@ -51,13 +51,10 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
51
51
|
Mechanize::Page.new @uri, res, body && body.force_encoding(Encoding::BINARY), 200, @mech
|
52
52
|
end
|
53
53
|
|
54
|
-
def
|
55
|
-
if RUBY_ENGINE == '
|
56
|
-
false
|
57
|
-
else
|
54
|
+
def skip_if_nkf_dependency
|
55
|
+
if RUBY_ENGINE == 'jruby'
|
58
56
|
meth = caller[0][/`(\w+)/, 1]
|
59
|
-
|
60
|
-
true
|
57
|
+
skip "#{meth}: skipped because this feature currently depends on NKF"
|
61
58
|
end
|
62
59
|
end
|
63
60
|
|
@@ -112,7 +109,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
112
109
|
end
|
113
110
|
|
114
111
|
def test_encoding_charset_after_title_bad
|
115
|
-
|
112
|
+
skip_if_nkf_dependency
|
116
113
|
|
117
114
|
page = util_page UTF8
|
118
115
|
|
@@ -122,7 +119,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
122
119
|
end
|
123
120
|
|
124
121
|
def test_encoding_charset_after_title_double_bad
|
125
|
-
|
122
|
+
skip_if_nkf_dependency
|
126
123
|
|
127
124
|
page = util_page SJIS_BAD_AFTER_TITLE
|
128
125
|
|
@@ -132,7 +129,7 @@ class TestMechanizePageLink < Mechanize::TestCase
|
|
132
129
|
end
|
133
130
|
|
134
131
|
def test_encoding_charset_bad
|
135
|
-
|
132
|
+
skip_if_nkf_dependency
|
136
133
|
|
137
134
|
page = util_page "<title>#{UTF8_TITLE}</title>"
|
138
135
|
page.encodings.replace %w[
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mechanize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric Hodel
|
@@ -12,7 +12,7 @@ authors:
|
|
12
12
|
autorequire:
|
13
13
|
bindir: bin
|
14
14
|
cert_chain: []
|
15
|
-
date:
|
15
|
+
date: 2023-04-07 00:00:00.000000000 Z
|
16
16
|
dependencies:
|
17
17
|
- !ruby/object:Gem::Dependency
|
18
18
|
name: addressable
|
@@ -495,14 +495,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
495
495
|
requirements:
|
496
496
|
- - ">="
|
497
497
|
- !ruby/object:Gem::Version
|
498
|
-
version: 2.
|
498
|
+
version: 2.6.0
|
499
499
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
500
500
|
requirements:
|
501
501
|
- - ">="
|
502
502
|
- !ruby/object:Gem::Version
|
503
503
|
version: '0'
|
504
504
|
requirements: []
|
505
|
-
rubygems_version: 3.
|
505
|
+
rubygems_version: 3.4.10
|
506
506
|
signing_key:
|
507
507
|
specification_version: 4
|
508
508
|
summary: The Mechanize library is used for automating interaction with websites
|