mechanize 2.7.6 → 2.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +11 -0
- data/.github/workflows/ci.yml +43 -0
- data/.github/workflows/upstream.yml +51 -0
- data/.yardopts +8 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +221 -96
- data/EXAMPLES.rdoc +1 -24
- data/Gemfile +10 -4
- data/{LICENSE.rdoc → LICENSE.txt} +4 -0
- data/README.md +77 -0
- data/Rakefile +18 -3
- data/examples/latest_user_agents.rb +100 -0
- data/examples/rubygems.rb +2 -2
- data/examples/wikipedia_links_to_philosophy.rb +5 -6
- data/lib/mechanize/chunked_termination_error.rb +1 -0
- data/lib/mechanize/content_type_error.rb +1 -0
- data/lib/mechanize/cookie.rb +3 -15
- data/lib/mechanize/cookie_jar.rb +13 -9
- data/lib/mechanize/directory_saver.rb +1 -0
- data/lib/mechanize/download.rb +2 -1
- data/lib/mechanize/element_matcher.rb +1 -0
- data/lib/mechanize/element_not_found_error.rb +1 -0
- data/lib/mechanize/file.rb +2 -1
- data/lib/mechanize/file_connection.rb +5 -3
- data/lib/mechanize/file_request.rb +1 -0
- data/lib/mechanize/file_response.rb +4 -1
- data/lib/mechanize/file_saver.rb +1 -0
- data/lib/mechanize/form/button.rb +1 -0
- data/lib/mechanize/form/check_box.rb +1 -0
- data/lib/mechanize/form/field.rb +1 -0
- data/lib/mechanize/form/file_upload.rb +1 -0
- data/lib/mechanize/form/hidden.rb +1 -0
- data/lib/mechanize/form/image_button.rb +1 -0
- data/lib/mechanize/form/keygen.rb +1 -0
- data/lib/mechanize/form/multi_select_list.rb +2 -1
- data/lib/mechanize/form/option.rb +1 -0
- data/lib/mechanize/form/radio_button.rb +1 -0
- data/lib/mechanize/form/reset.rb +1 -0
- data/lib/mechanize/form/select_list.rb +1 -0
- data/lib/mechanize/form/submit.rb +1 -0
- data/lib/mechanize/form/text.rb +1 -0
- data/lib/mechanize/form/textarea.rb +1 -0
- data/lib/mechanize/form.rb +5 -13
- data/lib/mechanize/headers.rb +1 -0
- data/lib/mechanize/history.rb +1 -0
- data/lib/mechanize/http/agent.rb +83 -10
- data/lib/mechanize/http/auth_challenge.rb +1 -0
- data/lib/mechanize/http/auth_realm.rb +1 -0
- data/lib/mechanize/http/auth_store.rb +1 -0
- data/lib/mechanize/http/content_disposition_parser.rb +15 -4
- data/lib/mechanize/http/www_authenticate_parser.rb +3 -3
- data/lib/mechanize/http.rb +1 -0
- data/lib/mechanize/image.rb +1 -0
- data/lib/mechanize/page/base.rb +1 -0
- data/lib/mechanize/page/frame.rb +1 -0
- data/lib/mechanize/page/image.rb +1 -0
- data/lib/mechanize/page/label.rb +1 -0
- data/lib/mechanize/page/link.rb +8 -1
- data/lib/mechanize/page/meta_refresh.rb +1 -0
- data/lib/mechanize/page.rb +6 -8
- data/lib/mechanize/parser.rb +1 -0
- data/lib/mechanize/pluggable_parsers.rb +2 -1
- data/lib/mechanize/prependable.rb +1 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
- data/lib/mechanize/response_code_error.rb +2 -1
- data/lib/mechanize/response_read_error.rb +1 -0
- data/lib/mechanize/robots_disallowed_error.rb +1 -0
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
- data/lib/mechanize/test_case/form_servlet.rb +1 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
- data/lib/mechanize/test_case/header_servlet.rb +1 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/referer_servlet.rb +1 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
- data/lib/mechanize/test_case/robots_txt_servlet.rb +1 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/server.rb +1 -0
- data/lib/mechanize/test_case/servlets.rb +1 -0
- data/lib/mechanize/test_case/verb_servlet.rb +5 -6
- data/lib/mechanize/test_case.rb +34 -34
- data/lib/mechanize/unauthorized_error.rb +1 -0
- data/lib/mechanize/unsupported_scheme_error.rb +1 -0
- data/lib/mechanize/util.rb +2 -1
- data/lib/mechanize/version.rb +2 -1
- data/lib/mechanize/xml_file.rb +1 -0
- data/lib/mechanize.rb +56 -37
- data/mechanize.gemspec +43 -35
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/tc_links.html +1 -1
- data/test/test_mechanize.rb +21 -8
- data/test/test_mechanize_cookie.rb +38 -26
- data/test/test_mechanize_cookie_jar.rb +87 -54
- data/test/test_mechanize_directory_saver.rb +1 -0
- data/test/test_mechanize_download.rb +14 -1
- data/test/test_mechanize_element_not_found_error.rb +1 -0
- data/test/test_mechanize_file.rb +11 -0
- data/test/test_mechanize_file_connection.rb +23 -4
- data/test/test_mechanize_file_request.rb +1 -0
- data/test/test_mechanize_file_response.rb +26 -1
- data/test/test_mechanize_file_saver.rb +1 -0
- data/test/test_mechanize_form.rb +14 -1
- data/test/test_mechanize_form_check_box.rb +1 -0
- data/test/test_mechanize_form_encoding.rb +2 -1
- data/test/test_mechanize_form_field.rb +1 -0
- data/test/test_mechanize_form_file_upload.rb +1 -0
- data/test/test_mechanize_form_image_button.rb +1 -0
- data/test/test_mechanize_form_keygen.rb +2 -0
- data/test/test_mechanize_form_multi_select_list.rb +1 -0
- data/test/test_mechanize_form_option.rb +1 -0
- data/test/test_mechanize_form_radio_button.rb +1 -0
- data/test/test_mechanize_form_select_list.rb +1 -0
- data/test/test_mechanize_form_textarea.rb +1 -0
- data/test/test_mechanize_headers.rb +1 -0
- data/test/test_mechanize_history.rb +1 -0
- data/test/test_mechanize_http_agent.rb +187 -26
- data/test/test_mechanize_http_auth_challenge.rb +1 -0
- data/test/test_mechanize_http_auth_realm.rb +1 -0
- data/test/test_mechanize_http_auth_store.rb +1 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +28 -0
- data/test/test_mechanize_http_www_authenticate_parser.rb +1 -0
- data/test/test_mechanize_image.rb +1 -0
- data/test/test_mechanize_link.rb +25 -0
- data/test/test_mechanize_page.rb +15 -0
- data/test/test_mechanize_page_encoding.rb +33 -5
- data/test/test_mechanize_page_frame.rb +1 -0
- data/test/test_mechanize_page_image.rb +1 -0
- data/test/test_mechanize_page_link.rb +27 -23
- data/test/test_mechanize_page_meta_refresh.rb +1 -0
- data/test/test_mechanize_parser.rb +1 -0
- data/test/test_mechanize_pluggable_parser.rb +1 -0
- data/test/test_mechanize_redirect_limit_reached_error.rb +1 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +1 -0
- data/test/test_mechanize_response_read_error.rb +1 -0
- data/test/test_mechanize_subclass.rb +1 -0
- data/test/test_mechanize_util.rb +4 -3
- data/test/test_mechanize_xml_file.rb +1 -0
- data/test/test_multi_select.rb +1 -0
- metadata +106 -86
- data/.travis.yml +0 -36
- data/README.rdoc +0 -77
data/lib/mechanize/form.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
require 'mechanize/element_matcher'
|
|
2
3
|
|
|
3
4
|
# This class encapsulates a form parsed out of an HTML page. Each type of
|
|
@@ -305,7 +306,7 @@ class Mechanize::Form
|
|
|
305
306
|
successful_controls = []
|
|
306
307
|
|
|
307
308
|
(fields + checkboxes).reject do |f|
|
|
308
|
-
f.node["disabled"]
|
|
309
|
+
f.node["disabled"] || f.node["name"] == ""
|
|
309
310
|
end.sort.each do |f|
|
|
310
311
|
case f
|
|
311
312
|
when Mechanize::Form::CheckBox
|
|
@@ -481,7 +482,7 @@ class Mechanize::Form
|
|
|
481
482
|
# form.file_upload_with(:file_name => /picture/).value = 'foo'
|
|
482
483
|
|
|
483
484
|
##
|
|
484
|
-
# :
|
|
485
|
+
# :method: file_upload_with!(criteria)
|
|
485
486
|
#
|
|
486
487
|
# Same as +file_upload_with+ but raises an ElementNotFoundError if no button
|
|
487
488
|
# matches +criteria+
|
|
@@ -505,7 +506,7 @@ class Mechanize::Form
|
|
|
505
506
|
# form.radiobutton_with(:name => /woo/).check
|
|
506
507
|
|
|
507
508
|
##
|
|
508
|
-
# :
|
|
509
|
+
# :method: radiobutton_with!(criteria)
|
|
509
510
|
#
|
|
510
511
|
# Same as +radiobutton_with+ but raises an ElementNotFoundError if no button
|
|
511
512
|
# matches +criteria+
|
|
@@ -529,7 +530,7 @@ class Mechanize::Form
|
|
|
529
530
|
# form.checkbox_with(:name => /woo/).check
|
|
530
531
|
|
|
531
532
|
##
|
|
532
|
-
# :
|
|
533
|
+
# :method: checkbox_with!(criteria)
|
|
533
534
|
#
|
|
534
535
|
# Same as +checkbox_with+ but raises an ElementNotFoundError if no button
|
|
535
536
|
# matches +criteria+
|
|
@@ -643,15 +644,6 @@ class Mechanize::Form
|
|
|
643
644
|
end
|
|
644
645
|
end
|
|
645
646
|
|
|
646
|
-
unless ::String.method_defined?(:b)
|
|
647
|
-
# Define String#b for Ruby < 2.0
|
|
648
|
-
class ::String
|
|
649
|
-
def b
|
|
650
|
-
dup.force_encoding(Encoding::ASCII_8BIT)
|
|
651
|
-
end
|
|
652
|
-
end
|
|
653
|
-
end
|
|
654
|
-
|
|
655
647
|
def rand_string(len = 10)
|
|
656
648
|
chars = ("a".."z").to_a + ("A".."Z").to_a
|
|
657
649
|
string = ::String.new
|
data/lib/mechanize/headers.rb
CHANGED
data/lib/mechanize/history.rb
CHANGED
data/lib/mechanize/http/agent.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
require 'tempfile'
|
|
2
3
|
require 'net/ntlm'
|
|
3
|
-
require 'kconv'
|
|
4
4
|
require 'webrobots'
|
|
5
5
|
|
|
6
6
|
##
|
|
@@ -9,6 +9,10 @@ require 'webrobots'
|
|
|
9
9
|
|
|
10
10
|
class Mechanize::HTTP::Agent
|
|
11
11
|
|
|
12
|
+
CREDENTIAL_HEADERS = ['Authorization']
|
|
13
|
+
COOKIE_HEADERS = ['Cookie']
|
|
14
|
+
POST_HEADERS = ['Content-Length', 'Content-MD5', 'Content-Type']
|
|
15
|
+
|
|
12
16
|
# :section: Headers
|
|
13
17
|
|
|
14
18
|
# Disables If-Modified-Since conditional requests (enabled by default)
|
|
@@ -62,7 +66,7 @@ class Mechanize::HTTP::Agent
|
|
|
62
66
|
# allowed:
|
|
63
67
|
#
|
|
64
68
|
# :all, true:: All 3xx redirects are followed (default)
|
|
65
|
-
# :permanent:: Only 301 Moved
|
|
69
|
+
# :permanent:: Only 301 Moved Permanently redirects are followed
|
|
66
70
|
# false:: No redirects are followed
|
|
67
71
|
attr_accessor :redirect_ok
|
|
68
72
|
|
|
@@ -491,6 +495,63 @@ class Mechanize::HTTP::Agent
|
|
|
491
495
|
body_io.close
|
|
492
496
|
end
|
|
493
497
|
|
|
498
|
+
##
|
|
499
|
+
# Decodes a Brotli-encoded +body_io+
|
|
500
|
+
#
|
|
501
|
+
# (Experimental, CRuby only) Although Mechanize will never request a Brotli-encoded response via
|
|
502
|
+
# `accept-encoding`, buggy servers may return brotli-encoded responses anyway. Let's try to handle
|
|
503
|
+
# that case if the Brotli gem is loaded.
|
|
504
|
+
#
|
|
505
|
+
# If you need to handle Brotli-encoded responses, install the 'brotli' gem and require it in your
|
|
506
|
+
# application. If the `Brotli` constant is defined, Mechanize will attempt to use it to inflate
|
|
507
|
+
# the response.
|
|
508
|
+
#
|
|
509
|
+
def content_encoding_brotli(body_io)
|
|
510
|
+
log.debug('deflate brotli body') if log
|
|
511
|
+
|
|
512
|
+
unless defined?(::Brotli)
|
|
513
|
+
raise Mechanize::Error, "cannot deflate brotli-encoded response. Please install and require the 'brotli' gem."
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
begin
|
|
517
|
+
return StringIO.new(Brotli.inflate(body_io.read))
|
|
518
|
+
rescue Brotli::Error
|
|
519
|
+
log.error("unable to brotli-inflate response") if log
|
|
520
|
+
raise Mechanize::Error, "error inflating brotli-encoded response."
|
|
521
|
+
end
|
|
522
|
+
ensure
|
|
523
|
+
body_io.close
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
##
|
|
527
|
+
# Decodes a Zstd-encoded +body_io+
|
|
528
|
+
#
|
|
529
|
+
# (Experimental, CRuby only) Although Mechanize will never request a zstd-encoded response via
|
|
530
|
+
# `accept-encoding`, buggy servers may return zstd-encoded responses, or you might need to
|
|
531
|
+
# inform the zstd keyword on your Accept-Encoding headers. Let's try to handle those cases if
|
|
532
|
+
# the Zstd gem is loaded.
|
|
533
|
+
#
|
|
534
|
+
# If you need to handle Zstd-encoded responses, install the 'zstd-ruby' gem and require it in your
|
|
535
|
+
# application. If the `Zstd` constant is defined, Mechanize will attempt to use it to inflate
|
|
536
|
+
# the response.
|
|
537
|
+
#
|
|
538
|
+
def content_encoding_zstd(body_io)
|
|
539
|
+
log.debug('deflate zstd body') if log
|
|
540
|
+
|
|
541
|
+
unless defined?(::Zstd)
|
|
542
|
+
raise Mechanize::Error, "cannot deflate zstd-encoded response. Please install and require the 'zstd-ruby' gem."
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
begin
|
|
546
|
+
return StringIO.new(Zstd.decompress(body_io.read))
|
|
547
|
+
rescue StandardError
|
|
548
|
+
log.error("unable to zstd#decompress response") if log
|
|
549
|
+
raise Mechanize::Error, "error decompressing zstd-encoded response."
|
|
550
|
+
end
|
|
551
|
+
ensure
|
|
552
|
+
body_io.close
|
|
553
|
+
end
|
|
554
|
+
|
|
494
555
|
def disable_keep_alive request
|
|
495
556
|
request['connection'] = 'close' unless @keep_alive
|
|
496
557
|
end
|
|
@@ -577,7 +638,6 @@ class Mechanize::HTTP::Agent
|
|
|
577
638
|
end
|
|
578
639
|
|
|
579
640
|
def request_language_charset request
|
|
580
|
-
request['accept-charset'] = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
|
|
581
641
|
request['accept-language'] = 'en-us,en;q=0.5'
|
|
582
642
|
end
|
|
583
643
|
|
|
@@ -828,6 +888,10 @@ class Mechanize::HTTP::Agent
|
|
|
828
888
|
content_encoding_inflate body_io
|
|
829
889
|
when 'gzip', 'x-gzip' then
|
|
830
890
|
content_encoding_gunzip body_io
|
|
891
|
+
when 'br' then
|
|
892
|
+
content_encoding_brotli body_io
|
|
893
|
+
when 'zstd' then
|
|
894
|
+
content_encoding_zstd body_io
|
|
831
895
|
else
|
|
832
896
|
raise Mechanize::Error,
|
|
833
897
|
"unsupported content-encoding: #{response['Content-Encoding']}"
|
|
@@ -838,7 +902,7 @@ class Mechanize::HTTP::Agent
|
|
|
838
902
|
|
|
839
903
|
out_io
|
|
840
904
|
rescue Zlib::Error => e
|
|
841
|
-
message = "error handling content-encoding #{response['Content-Encoding']}:"
|
|
905
|
+
message = String.new("error handling content-encoding #{response['Content-Encoding']}:")
|
|
842
906
|
message << " #{e.message} (#{e.class})"
|
|
843
907
|
raise Mechanize::Error, message
|
|
844
908
|
ensure
|
|
@@ -986,14 +1050,24 @@ class Mechanize::HTTP::Agent
|
|
|
986
1050
|
|
|
987
1051
|
redirect_method = method == :head ? :head : :get
|
|
988
1052
|
|
|
1053
|
+
new_uri = secure_resolve!(response['Location'].to_s, page)
|
|
1054
|
+
@history.push(page, page.uri)
|
|
1055
|
+
|
|
989
1056
|
# Make sure we are not copying over the POST headers from the original request
|
|
990
|
-
|
|
991
|
-
headers.
|
|
1057
|
+
POST_HEADERS.each do |key|
|
|
1058
|
+
headers.delete_if { |h| h.casecmp?(key) }
|
|
992
1059
|
end
|
|
993
1060
|
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
1061
|
+
# Make sure we clear credential headers if being redirected to another site
|
|
1062
|
+
if new_uri.host == page.uri.host
|
|
1063
|
+
if new_uri.port != page.uri.port
|
|
1064
|
+
# https://datatracker.ietf.org/doc/html/rfc6265#section-8.5
|
|
1065
|
+
# cookies are OK to be shared across ports on the same host
|
|
1066
|
+
CREDENTIAL_HEADERS.each { |ch| headers.delete_if { |h| h.casecmp?(ch) } }
|
|
1067
|
+
end
|
|
1068
|
+
else
|
|
1069
|
+
(COOKIE_HEADERS + CREDENTIAL_HEADERS).each { |ch| headers.delete_if { |h| h.casecmp?(ch) } }
|
|
1070
|
+
end
|
|
997
1071
|
|
|
998
1072
|
fetch new_uri, redirect_method, headers, [], referer, redirects + 1
|
|
999
1073
|
end
|
|
@@ -1278,4 +1352,3 @@ class Mechanize::HTTP::Agent
|
|
|
1278
1352
|
end
|
|
1279
1353
|
|
|
1280
1354
|
require 'mechanize/http/auth_store'
|
|
1281
|
-
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
# coding: BINARY
|
|
2
3
|
|
|
3
4
|
require 'strscan'
|
|
@@ -16,6 +17,7 @@ end
|
|
|
16
17
|
# * Missing disposition-type
|
|
17
18
|
# * Multiple semicolons
|
|
18
19
|
# * Whitespace around semicolons
|
|
20
|
+
# * Dates in ISO 8601 format
|
|
19
21
|
|
|
20
22
|
class Mechanize::HTTP::ContentDispositionParser
|
|
21
23
|
|
|
@@ -79,7 +81,7 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
|
79
81
|
end
|
|
80
82
|
|
|
81
83
|
##
|
|
82
|
-
# Extracts disposition-
|
|
84
|
+
# Extracts disposition-param and returns a Hash.
|
|
83
85
|
|
|
84
86
|
def parse_parameters
|
|
85
87
|
parameters = {}
|
|
@@ -93,7 +95,17 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
|
93
95
|
when /^filename$/ then
|
|
94
96
|
rfc_2045_value
|
|
95
97
|
when /^(creation|modification|read)-date$/ then
|
|
96
|
-
|
|
98
|
+
date = rfc_2045_quoted_string
|
|
99
|
+
|
|
100
|
+
begin
|
|
101
|
+
Time.rfc822 date
|
|
102
|
+
rescue ArgumentError
|
|
103
|
+
begin
|
|
104
|
+
Time.iso8601 date
|
|
105
|
+
rescue ArgumentError
|
|
106
|
+
nil
|
|
107
|
+
end
|
|
108
|
+
end
|
|
97
109
|
when /^size$/ then
|
|
98
110
|
rfc_2045_value.to_i(10)
|
|
99
111
|
else
|
|
@@ -125,7 +137,7 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
|
125
137
|
def rfc_2045_quoted_string
|
|
126
138
|
return nil unless @scanner.scan(/"/)
|
|
127
139
|
|
|
128
|
-
text =
|
|
140
|
+
text = String.new
|
|
129
141
|
|
|
130
142
|
while true do
|
|
131
143
|
chunk = @scanner.scan(/[\000-\014\016-\041\043-\133\135-\177]+/) # not \r "
|
|
@@ -188,4 +200,3 @@ class Mechanize::HTTP::ContentDispositionParser
|
|
|
188
200
|
end
|
|
189
201
|
|
|
190
202
|
end
|
|
191
|
-
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'strscan'
|
|
4
4
|
|
|
@@ -151,10 +151,10 @@ class Mechanize::HTTP::WWWAuthenticateParser
|
|
|
151
151
|
def quoted_string
|
|
152
152
|
return nil unless @scanner.scan(/"/)
|
|
153
153
|
|
|
154
|
-
text =
|
|
154
|
+
text = String.new
|
|
155
155
|
|
|
156
156
|
while true do
|
|
157
|
-
chunk = @scanner.scan(/[\r\n \t\
|
|
157
|
+
chunk = @scanner.scan(/[\r\n \t\x21\x23-\x7e\u0080-\u00ff]+/) # not " which is \x22
|
|
158
158
|
|
|
159
159
|
if chunk then
|
|
160
160
|
text << chunk
|
data/lib/mechanize/http.rb
CHANGED
data/lib/mechanize/image.rb
CHANGED
data/lib/mechanize/page/base.rb
CHANGED
data/lib/mechanize/page/frame.rb
CHANGED
data/lib/mechanize/page/image.rb
CHANGED
data/lib/mechanize/page/label.rb
CHANGED
data/lib/mechanize/page/link.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
##
|
|
2
3
|
# This class encapsulates links. It contains the text and the URI for
|
|
3
4
|
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
|
@@ -8,6 +9,8 @@
|
|
|
8
9
|
# <a href="http://example">Hello World</a>
|
|
9
10
|
# <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
|
|
10
11
|
|
|
12
|
+
require 'addressable/uri'
|
|
13
|
+
|
|
11
14
|
class Mechanize::Page::Link
|
|
12
15
|
attr_reader :node
|
|
13
16
|
attr_reader :href
|
|
@@ -94,7 +97,11 @@ class Mechanize::Page::Link
|
|
|
94
97
|
begin
|
|
95
98
|
URI.parse @href
|
|
96
99
|
rescue URI::InvalidURIError
|
|
97
|
-
|
|
100
|
+
begin
|
|
101
|
+
URI.parse(Addressable::URI.escape(@href))
|
|
102
|
+
rescue Addressable::URI::InvalidURIError
|
|
103
|
+
raise URI::InvalidURIError
|
|
104
|
+
end
|
|
98
105
|
end
|
|
99
106
|
end
|
|
100
107
|
end
|
data/lib/mechanize/page.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
##
|
|
2
3
|
# This class encapsulates an HTML page. If Mechanize finds a content
|
|
3
4
|
# type of 'text/html', this class will be instantiated and returned.
|
|
@@ -40,10 +41,6 @@ class Mechanize::Page < Mechanize::File
|
|
|
40
41
|
@encodings.concat self.class.response_header_charset(response)
|
|
41
42
|
|
|
42
43
|
if body
|
|
43
|
-
# Force the encoding to be 8BIT so we can perform regular expressions.
|
|
44
|
-
# We'll set it to the detected encoding later
|
|
45
|
-
body.force_encoding(Encoding::ASCII_8BIT)
|
|
46
|
-
|
|
47
44
|
@encodings.concat self.class.meta_charset body
|
|
48
45
|
|
|
49
46
|
meta_content_type = self.class.meta_content_type body
|
|
@@ -98,14 +95,15 @@ class Mechanize::Page < Mechanize::File
|
|
|
98
95
|
end
|
|
99
96
|
|
|
100
97
|
# Return whether parser result has errors related to encoding or not.
|
|
101
|
-
# false indicates just parser has no encoding errors, not encoding is
|
|
98
|
+
# false indicates just parser has no encoding errors, not encoding is valid.
|
|
102
99
|
def encoding_error?(parser=nil)
|
|
103
100
|
parser = self.parser unless parser
|
|
104
101
|
return false if parser.errors.empty?
|
|
105
102
|
parser.errors.any? do |error|
|
|
106
|
-
error.message =~ /(indicate\ encoding)|
|
|
107
|
-
|
|
108
|
-
|
|
103
|
+
error.message.scrub =~ /(indicate\ encoding)|
|
|
104
|
+
(Invalid\ bytes)|
|
|
105
|
+
(Invalid\ char)|
|
|
106
|
+
(input\ conversion\ failed)/x
|
|
109
107
|
end
|
|
110
108
|
end
|
|
111
109
|
|
data/lib/mechanize/parser.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
require 'mechanize/file'
|
|
2
3
|
require 'mechanize/file_saver'
|
|
3
4
|
require 'mechanize/page'
|
|
@@ -103,7 +104,7 @@ class Mechanize::PluggableParser
|
|
|
103
104
|
|
|
104
105
|
return parser if parser
|
|
105
106
|
|
|
106
|
-
mime_type = MIME::Type.new content_type
|
|
107
|
+
mime_type = MIME::Type.new "content-type" => content_type
|
|
107
108
|
|
|
108
109
|
parser = @parsers[mime_type.to_s] ||
|
|
109
110
|
@parsers[mime_type.simplified] ||
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
# This error is raised when Mechanize encounters a response code it does not
|
|
2
3
|
# know how to handle. Currently, this exception will be thrown if Mechanize
|
|
3
4
|
# encounters response codes other than 200, 301, or 302. Any other response
|
|
@@ -16,7 +17,7 @@ class Mechanize::ResponseCodeError < Mechanize::Error
|
|
|
16
17
|
|
|
17
18
|
def to_s
|
|
18
19
|
response_class = Net::HTTPResponse::CODE_TO_OBJ[@response_code]
|
|
19
|
-
out = "#{@response_code} => #{response_class} "
|
|
20
|
+
out = String.new("#{@response_code} => #{response_class} ")
|
|
20
21
|
out << "for #{@page.uri} " if @page.respond_to? :uri # may be HTTPResponse
|
|
21
22
|
out << "-- #{super}"
|
|
22
23
|
end
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
require 'stringio'
|
|
2
3
|
require 'zlib'
|
|
3
4
|
|
|
@@ -13,8 +14,8 @@ class GzipServlet < WEBrick::HTTPServlet::AbstractServlet
|
|
|
13
14
|
end
|
|
14
15
|
|
|
15
16
|
if name = req.query['file'] then
|
|
16
|
-
open
|
|
17
|
-
string =
|
|
17
|
+
::File.open("#{TEST_DIR}/htdocs/#{name}") do |io|
|
|
18
|
+
string = String.new
|
|
18
19
|
zipped = StringIO.new string, 'w'
|
|
19
20
|
Zlib::GzipWriter.wrap zipped do |gz|
|
|
20
21
|
gz.write io.read
|
|
@@ -22,7 +23,7 @@ class GzipServlet < WEBrick::HTTPServlet::AbstractServlet
|
|
|
22
23
|
res.body = string
|
|
23
24
|
end
|
|
24
25
|
else
|
|
25
|
-
res.body =
|
|
26
|
+
res.body = String.new
|
|
26
27
|
end
|
|
27
28
|
|
|
28
29
|
res['Content-Encoding'] = req['X-ResponseContentEncoding'] || 'gzip'
|