mechanize 2.7.4 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ci-test.yml +45 -0
- data/.yardopts +8 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +151 -86
- data/EXAMPLES.rdoc +1 -24
- data/Gemfile +1 -1
- data/{LICENSE.rdoc → LICENSE.txt} +4 -0
- data/README.md +77 -0
- data/Rakefile +18 -3
- data/examples/rubygems.rb +2 -2
- data/lib/mechanize.rb +3 -2
- data/lib/mechanize/chunked_termination_error.rb +1 -0
- data/lib/mechanize/content_type_error.rb +1 -0
- data/lib/mechanize/cookie.rb +1 -13
- data/lib/mechanize/cookie_jar.rb +4 -12
- data/lib/mechanize/directory_saver.rb +1 -0
- data/lib/mechanize/download.rb +2 -1
- data/lib/mechanize/element_matcher.rb +5 -1
- data/lib/mechanize/element_not_found_error.rb +1 -0
- data/lib/mechanize/file.rb +2 -1
- data/lib/mechanize/file_connection.rb +5 -3
- data/lib/mechanize/file_request.rb +1 -0
- data/lib/mechanize/file_response.rb +4 -1
- data/lib/mechanize/file_saver.rb +1 -0
- data/lib/mechanize/form.rb +112 -45
- data/lib/mechanize/form/button.rb +1 -0
- data/lib/mechanize/form/check_box.rb +1 -0
- data/lib/mechanize/form/field.rb +47 -0
- data/lib/mechanize/form/file_upload.rb +1 -0
- data/lib/mechanize/form/hidden.rb +1 -0
- data/lib/mechanize/form/image_button.rb +1 -0
- data/lib/mechanize/form/keygen.rb +1 -0
- data/lib/mechanize/form/multi_select_list.rb +8 -14
- data/lib/mechanize/form/option.rb +3 -1
- data/lib/mechanize/form/radio_button.rb +1 -0
- data/lib/mechanize/form/reset.rb +1 -0
- data/lib/mechanize/form/select_list.rb +1 -0
- data/lib/mechanize/form/submit.rb +1 -0
- data/lib/mechanize/form/text.rb +1 -0
- data/lib/mechanize/form/textarea.rb +1 -0
- data/lib/mechanize/headers.rb +1 -0
- data/lib/mechanize/history.rb +2 -1
- data/lib/mechanize/http.rb +1 -0
- data/lib/mechanize/http/agent.rb +81 -38
- data/lib/mechanize/http/auth_challenge.rb +1 -0
- data/lib/mechanize/http/auth_realm.rb +2 -1
- data/lib/mechanize/http/auth_store.rb +1 -0
- data/lib/mechanize/http/content_disposition_parser.rb +18 -3
- data/lib/mechanize/http/www_authenticate_parser.rb +4 -4
- data/lib/mechanize/image.rb +1 -0
- data/lib/mechanize/page.rb +8 -5
- data/lib/mechanize/page/base.rb +1 -0
- data/lib/mechanize/page/frame.rb +4 -1
- data/lib/mechanize/page/image.rb +1 -0
- data/lib/mechanize/page/label.rb +1 -0
- data/lib/mechanize/page/link.rb +8 -1
- data/lib/mechanize/page/meta_refresh.rb +1 -0
- data/lib/mechanize/parser.rb +4 -3
- data/lib/mechanize/pluggable_parsers.rb +1 -0
- data/lib/mechanize/prependable.rb +1 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +1 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +1 -0
- data/lib/mechanize/response_code_error.rb +2 -1
- data/lib/mechanize/response_read_error.rb +1 -0
- data/lib/mechanize/robots_disallowed_error.rb +1 -0
- data/lib/mechanize/test_case.rb +34 -29
- data/lib/mechanize/test_case/bad_chunking_servlet.rb +1 -0
- data/lib/mechanize/test_case/basic_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/content_type_servlet.rb +1 -0
- data/lib/mechanize/test_case/digest_auth_servlet.rb +1 -0
- data/lib/mechanize/test_case/file_upload_servlet.rb +1 -0
- data/lib/mechanize/test_case/form_servlet.rb +1 -0
- data/lib/mechanize/test_case/gzip_servlet.rb +4 -3
- data/lib/mechanize/test_case/header_servlet.rb +1 -0
- data/lib/mechanize/test_case/http_refresh_servlet.rb +2 -2
- data/lib/mechanize/test_case/infinite_redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/infinite_refresh_servlet.rb +2 -2
- data/lib/mechanize/test_case/many_cookies_as_string_servlet.rb +1 -0
- data/lib/mechanize/test_case/many_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/modified_since_servlet.rb +1 -0
- data/lib/mechanize/test_case/ntlm_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_no_spaces_servlet.rb +1 -0
- data/lib/mechanize/test_case/one_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/quoted_value_cookie_servlet.rb +1 -0
- data/lib/mechanize/test_case/redirect_servlet.rb +1 -0
- data/lib/mechanize/test_case/referer_servlet.rb +1 -0
- data/lib/mechanize/test_case/refresh_with_empty_url.rb +1 -0
- data/lib/mechanize/test_case/refresh_without_url.rb +1 -0
- data/lib/mechanize/test_case/response_code_servlet.rb +1 -0
- data/lib/mechanize/test_case/robots_txt_servlet.rb +15 -0
- data/lib/mechanize/test_case/send_cookies_servlet.rb +1 -0
- data/lib/mechanize/test_case/server.rb +1 -0
- data/lib/mechanize/test_case/servlets.rb +4 -0
- data/lib/mechanize/test_case/verb_servlet.rb +5 -6
- data/lib/mechanize/unauthorized_error.rb +2 -1
- data/lib/mechanize/unsupported_scheme_error.rb +1 -0
- data/lib/mechanize/util.rb +5 -3
- data/lib/mechanize/version.rb +2 -1
- data/lib/mechanize/xml_file.rb +1 -0
- data/mechanize.gemspec +39 -31
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/find_link.html +1 -4
- data/test/htdocs/tc_links.html +1 -1
- data/test/test_mechanize.rb +57 -15
- data/test/test_mechanize_cookie.rb +75 -60
- data/test/test_mechanize_cookie_jar.rb +112 -59
- data/test/test_mechanize_download.rb +13 -1
- data/test/test_mechanize_file.rb +10 -0
- data/test/test_mechanize_file_connection.rb +21 -3
- data/test/test_mechanize_file_response.rb +26 -2
- data/test/test_mechanize_form.rb +27 -11
- data/test/test_mechanize_form_check_box.rb +10 -0
- data/test/test_mechanize_form_encoding.rb +1 -1
- data/test/test_mechanize_form_keygen.rb +1 -0
- data/test/test_mechanize_form_multi_select_list.rb +5 -1
- data/test/test_mechanize_http_agent.rb +116 -8
- data/test/test_mechanize_http_auth_challenge.rb +14 -0
- data/test/test_mechanize_http_auth_realm.rb +7 -1
- data/test/test_mechanize_http_auth_store.rb +37 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +35 -1
- data/test/test_mechanize_http_www_authenticate_parser.rb +16 -0
- data/test/test_mechanize_link.rb +47 -4
- data/test/test_mechanize_page.rb +29 -1
- data/test/test_mechanize_page_encoding.rb +23 -1
- data/test/test_mechanize_page_image.rb +1 -1
- data/test/test_mechanize_page_link.rb +3 -3
- data/test/test_mechanize_page_meta_refresh.rb +1 -1
- data/test/test_mechanize_parser.rb +12 -2
- data/test/test_mechanize_util.rb +1 -1
- metadata +105 -81
- data/.travis.yml +0 -25
- data/Manifest.txt +0 -204
- data/README.rdoc +0 -77
- data/test/htdocs/robots.txt +0 -2
data/lib/mechanize/form/field.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
##
|
|
2
3
|
# This class represents a field in a form. It handles the following input
|
|
3
4
|
# tags found in a form:
|
|
@@ -14,6 +15,8 @@
|
|
|
14
15
|
# field.value = "foo"
|
|
15
16
|
|
|
16
17
|
class Mechanize::Form::Field
|
|
18
|
+
extend Forwardable
|
|
19
|
+
|
|
17
20
|
attr_accessor :name, :value, :node, :type
|
|
18
21
|
|
|
19
22
|
# This fields value before it's sent through Util.html_unescape.
|
|
@@ -67,6 +70,50 @@ class Mechanize::Form::Field
|
|
|
67
70
|
node['class']
|
|
68
71
|
end
|
|
69
72
|
|
|
73
|
+
##
|
|
74
|
+
# :method: search
|
|
75
|
+
#
|
|
76
|
+
# Shorthand for +node.search+.
|
|
77
|
+
#
|
|
78
|
+
# See Nokogiri::XML::Node#search for details.
|
|
79
|
+
|
|
80
|
+
##
|
|
81
|
+
# :method: css
|
|
82
|
+
#
|
|
83
|
+
# Shorthand for +node.css+.
|
|
84
|
+
#
|
|
85
|
+
# See also Nokogiri::XML::Node#css for details.
|
|
86
|
+
|
|
87
|
+
##
|
|
88
|
+
# :method: xpath
|
|
89
|
+
#
|
|
90
|
+
# Shorthand for +node.xpath+.
|
|
91
|
+
#
|
|
92
|
+
# See also Nokogiri::XML::Node#xpath for details.
|
|
93
|
+
|
|
94
|
+
##
|
|
95
|
+
# :method: at
|
|
96
|
+
#
|
|
97
|
+
# Shorthand for +node.at+.
|
|
98
|
+
#
|
|
99
|
+
# See also Nokogiri::XML::Node#at for details.
|
|
100
|
+
|
|
101
|
+
##
|
|
102
|
+
# :method: at_css
|
|
103
|
+
#
|
|
104
|
+
# Shorthand for +node.at_css+.
|
|
105
|
+
#
|
|
106
|
+
# See also Nokogiri::XML::Node#at_css for details.
|
|
107
|
+
|
|
108
|
+
##
|
|
109
|
+
# :method: at_xpath
|
|
110
|
+
#
|
|
111
|
+
# Shorthand for +node.at_xpath+.
|
|
112
|
+
#
|
|
113
|
+
# See also Nokogiri::XML::Node#at_xpath for details.
|
|
114
|
+
|
|
115
|
+
def_delegators :node, :search, :css, :xpath, :at, :at_css, :at_xpath
|
|
116
|
+
|
|
70
117
|
def inspect # :nodoc:
|
|
71
118
|
"[%s:0x%x type: %s name: %s value: %s]" % [
|
|
72
119
|
self.class.name.sub(/Mechanize::Form::/, '').downcase,
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
##
|
|
2
3
|
# This class represents a select list where multiple values can be selected.
|
|
3
4
|
# MultiSelectList#value= accepts an array, and those values are used as
|
|
@@ -12,19 +13,15 @@
|
|
|
12
13
|
# list.value = 'one'
|
|
13
14
|
|
|
14
15
|
class Mechanize::Form::MultiSelectList < Mechanize::Form::Field
|
|
15
|
-
|
|
16
16
|
extend Mechanize::ElementMatcher
|
|
17
17
|
|
|
18
18
|
attr_accessor :options
|
|
19
19
|
|
|
20
20
|
def initialize node
|
|
21
21
|
value = []
|
|
22
|
-
@options =
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
node.search('option').each do |n|
|
|
26
|
-
@options << Mechanize::Form::Option.new(n, self)
|
|
27
|
-
end
|
|
22
|
+
@options = node.search('option').map { |n|
|
|
23
|
+
Mechanize::Form::Option.new(n, self)
|
|
24
|
+
}
|
|
28
25
|
|
|
29
26
|
super node, value
|
|
30
27
|
end
|
|
@@ -64,18 +61,18 @@ class Mechanize::Form::MultiSelectList < Mechanize::Form::Field
|
|
|
64
61
|
# Select no options
|
|
65
62
|
def select_none
|
|
66
63
|
@value = []
|
|
67
|
-
options.each
|
|
64
|
+
options.each(&:untick)
|
|
68
65
|
end
|
|
69
66
|
|
|
70
67
|
# Select all options
|
|
71
68
|
def select_all
|
|
72
69
|
@value = []
|
|
73
|
-
options.each
|
|
70
|
+
options.each(&:tick)
|
|
74
71
|
end
|
|
75
72
|
|
|
76
73
|
# Get a list of all selected options
|
|
77
74
|
def selected_options
|
|
78
|
-
@options.find_all
|
|
75
|
+
@options.find_all(&:selected?)
|
|
79
76
|
end
|
|
80
77
|
|
|
81
78
|
def value=(values)
|
|
@@ -91,10 +88,7 @@ class Mechanize::Form::MultiSelectList < Mechanize::Form::Field
|
|
|
91
88
|
end
|
|
92
89
|
|
|
93
90
|
def value
|
|
94
|
-
value
|
|
95
|
-
value.concat @value
|
|
96
|
-
value.concat selected_options.map { |o| o.value }
|
|
97
|
-
value
|
|
91
|
+
@value + selected_options.map(&:value)
|
|
98
92
|
end
|
|
99
93
|
|
|
100
94
|
end
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
##
|
|
2
3
|
# This class contains an option found within SelectList. A SelectList can
|
|
3
4
|
# have many Option classes associated with it. An option can be selected by
|
|
@@ -8,12 +9,13 @@
|
|
|
8
9
|
# select_list.first.tick
|
|
9
10
|
|
|
10
11
|
class Mechanize::Form::Option
|
|
11
|
-
attr_reader :value, :selected, :text, :select_list
|
|
12
|
+
attr_reader :value, :selected, :text, :select_list, :node
|
|
12
13
|
|
|
13
14
|
alias :to_s :value
|
|
14
15
|
alias :selected? :selected
|
|
15
16
|
|
|
16
17
|
def initialize(node, select_list)
|
|
18
|
+
@node = node
|
|
17
19
|
@text = node.inner_text
|
|
18
20
|
@value = Mechanize::Util.html_unescape(node['value'] || node.inner_text)
|
|
19
21
|
@selected = node.has_attribute? 'selected'
|
data/lib/mechanize/form/reset.rb
CHANGED
data/lib/mechanize/form/text.rb
CHANGED
data/lib/mechanize/headers.rb
CHANGED
data/lib/mechanize/history.rb
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
##
|
|
2
3
|
# This class manages history for your mechanize object.
|
|
3
4
|
|
|
@@ -16,7 +17,7 @@ class Mechanize::History < Array
|
|
|
16
17
|
end
|
|
17
18
|
|
|
18
19
|
def inspect # :nodoc:
|
|
19
|
-
uris = map
|
|
20
|
+
uris = map(&:uri).join ', '
|
|
20
21
|
|
|
21
22
|
"[#{uris}]"
|
|
22
23
|
end
|
data/lib/mechanize/http.rb
CHANGED
data/lib/mechanize/http/agent.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
require 'tempfile'
|
|
2
3
|
require 'net/ntlm'
|
|
3
|
-
require 'kconv'
|
|
4
4
|
require 'webrobots'
|
|
5
5
|
|
|
6
6
|
##
|
|
@@ -9,6 +9,9 @@ require 'webrobots'
|
|
|
9
9
|
|
|
10
10
|
class Mechanize::HTTP::Agent
|
|
11
11
|
|
|
12
|
+
CREDENTIAL_HEADERS = ['Authorization', 'Cookie']
|
|
13
|
+
POST_HEADERS = ['Content-Length', 'Content-MD5', 'Content-Type']
|
|
14
|
+
|
|
12
15
|
# :section: Headers
|
|
13
16
|
|
|
14
17
|
# Disables If-Modified-Since conditional requests (enabled by default)
|
|
@@ -71,7 +74,10 @@ class Mechanize::HTTP::Agent
|
|
|
71
74
|
|
|
72
75
|
# :section: Allowed error codes
|
|
73
76
|
|
|
74
|
-
# List of error codes to handle without
|
|
77
|
+
# List of error codes (in String or Integer) to handle without
|
|
78
|
+
# raising Mechanize::ResponseCodeError, defaulted to an empty array.
|
|
79
|
+
# Note that 2xx, 3xx and 401 status codes will be handled without
|
|
80
|
+
# checking this list.
|
|
75
81
|
|
|
76
82
|
attr_accessor :allowed_error_codes
|
|
77
83
|
|
|
@@ -80,6 +86,9 @@ class Mechanize::HTTP::Agent
|
|
|
80
86
|
# When true, this agent will consult the site's robots.txt for each access.
|
|
81
87
|
attr_reader :robots
|
|
82
88
|
|
|
89
|
+
# Mutex used when fetching robots.txt
|
|
90
|
+
attr_reader :robots_mutex
|
|
91
|
+
|
|
83
92
|
# :section: SSL
|
|
84
93
|
|
|
85
94
|
# OpenSSL key password
|
|
@@ -148,6 +157,7 @@ class Mechanize::HTTP::Agent
|
|
|
148
157
|
@redirection_limit = 20
|
|
149
158
|
@request_headers = {}
|
|
150
159
|
@robots = false
|
|
160
|
+
@robots_mutex = Mutex.new
|
|
151
161
|
@user_agent = nil
|
|
152
162
|
@webrobots = nil
|
|
153
163
|
|
|
@@ -176,7 +186,13 @@ class Mechanize::HTTP::Agent
|
|
|
176
186
|
@scheme_handlers['relative'] = @scheme_handlers['http']
|
|
177
187
|
@scheme_handlers['file'] = @scheme_handlers['http']
|
|
178
188
|
|
|
179
|
-
@http =
|
|
189
|
+
@http =
|
|
190
|
+
if defined?(Net::HTTP::Persistent::DEFAULT_POOL_SIZE)
|
|
191
|
+
Net::HTTP::Persistent.new(name: connection_name)
|
|
192
|
+
else
|
|
193
|
+
# net-http-persistent < 3.0
|
|
194
|
+
Net::HTTP::Persistent.new(connection_name)
|
|
195
|
+
end
|
|
180
196
|
@http.idle_timeout = 5
|
|
181
197
|
@http.keep_alive = 300
|
|
182
198
|
end
|
|
@@ -523,8 +539,8 @@ class Mechanize::HTTP::Agent
|
|
|
523
539
|
|
|
524
540
|
def request_auth request, uri
|
|
525
541
|
base_uri = uri + '/'
|
|
526
|
-
base_uri.user
|
|
527
|
-
base_uri.password
|
|
542
|
+
base_uri.user &&= nil
|
|
543
|
+
base_uri.password &&= nil
|
|
528
544
|
schemes = @authenticate_methods[base_uri]
|
|
529
545
|
|
|
530
546
|
if realm = schemes[:digest].find { |r| r.uri == base_uri } then
|
|
@@ -637,14 +653,6 @@ class Mechanize::HTTP::Agent
|
|
|
637
653
|
scheme = uri.relative? ? 'relative' : uri.scheme.downcase
|
|
638
654
|
uri = @scheme_handlers[scheme].call(uri, referer)
|
|
639
655
|
|
|
640
|
-
if referer_uri
|
|
641
|
-
if uri.path.length == 0 && uri.relative?
|
|
642
|
-
uri.path = referer_uri.path
|
|
643
|
-
end
|
|
644
|
-
end
|
|
645
|
-
|
|
646
|
-
uri.path = '/' if uri.path.length == 0
|
|
647
|
-
|
|
648
656
|
if uri.relative?
|
|
649
657
|
raise ArgumentError, "absolute URL needed (not #{uri})" unless
|
|
650
658
|
referer_uri
|
|
@@ -674,6 +682,13 @@ class Mechanize::HTTP::Agent
|
|
|
674
682
|
raise ArgumentError, "unsupported scheme: #{uri.scheme}"
|
|
675
683
|
end
|
|
676
684
|
|
|
685
|
+
case uri.path
|
|
686
|
+
when nil
|
|
687
|
+
raise ArgumentError, "hierarchical URL needed (not #{uri})"
|
|
688
|
+
when ''.freeze
|
|
689
|
+
uri.path = '/'
|
|
690
|
+
end
|
|
691
|
+
|
|
677
692
|
uri
|
|
678
693
|
end
|
|
679
694
|
|
|
@@ -810,7 +825,7 @@ class Mechanize::HTTP::Agent
|
|
|
810
825
|
return body_io if length.zero?
|
|
811
826
|
|
|
812
827
|
out_io = case response['Content-Encoding']
|
|
813
|
-
when nil, 'none', '7bit', "" then
|
|
828
|
+
when nil, 'none', '7bit', 'identity', "" then
|
|
814
829
|
body_io
|
|
815
830
|
when 'deflate' then
|
|
816
831
|
content_encoding_inflate body_io
|
|
@@ -826,7 +841,7 @@ class Mechanize::HTTP::Agent
|
|
|
826
841
|
|
|
827
842
|
out_io
|
|
828
843
|
rescue Zlib::Error => e
|
|
829
|
-
message = "error handling content-encoding #{response['Content-Encoding']}:"
|
|
844
|
+
message = String.new("error handling content-encoding #{response['Content-Encoding']}:")
|
|
830
845
|
message << " #{e.message} (#{e.class})"
|
|
831
846
|
raise Mechanize::Error, message
|
|
832
847
|
ensure
|
|
@@ -974,25 +989,46 @@ class Mechanize::HTTP::Agent
|
|
|
974
989
|
|
|
975
990
|
redirect_method = method == :head ? :head : :get
|
|
976
991
|
|
|
992
|
+
new_uri = secure_resolve!(response['Location'].to_s, page)
|
|
993
|
+
@history.push(page, page.uri)
|
|
994
|
+
|
|
977
995
|
# Make sure we are not copying over the POST headers from the original request
|
|
978
|
-
|
|
979
|
-
headers.
|
|
996
|
+
POST_HEADERS.each do |key|
|
|
997
|
+
headers.delete_if { |h| h.casecmp?(key) }
|
|
980
998
|
end
|
|
981
999
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
1000
|
+
# Make sure we clear credential headers if being redirected to another site
|
|
1001
|
+
if new_uri.host != page.uri.host
|
|
1002
|
+
CREDENTIAL_HEADERS.each do |ch|
|
|
1003
|
+
headers.delete_if { |h| h.casecmp?(ch) }
|
|
1004
|
+
end
|
|
1005
|
+
end
|
|
985
1006
|
|
|
986
1007
|
fetch new_uri, redirect_method, headers, [], referer, redirects + 1
|
|
987
1008
|
end
|
|
988
1009
|
|
|
989
1010
|
# :section: Robots
|
|
990
1011
|
|
|
1012
|
+
RobotsKey = :__mechanize_get_robots__
|
|
1013
|
+
|
|
991
1014
|
def get_robots(uri) # :nodoc:
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
1015
|
+
robots_mutex.synchronize do
|
|
1016
|
+
Thread.current[RobotsKey] = true
|
|
1017
|
+
begin
|
|
1018
|
+
fetch(uri).body
|
|
1019
|
+
rescue Mechanize::ResponseCodeError => e
|
|
1020
|
+
case e.response_code
|
|
1021
|
+
when /\A4\d\d\z/
|
|
1022
|
+
''
|
|
1023
|
+
else
|
|
1024
|
+
raise e
|
|
1025
|
+
end
|
|
1026
|
+
rescue Mechanize::RedirectLimitReachedError
|
|
1027
|
+
''
|
|
1028
|
+
ensure
|
|
1029
|
+
Thread.current[RobotsKey] = false
|
|
1030
|
+
end
|
|
1031
|
+
end
|
|
996
1032
|
end
|
|
997
1033
|
|
|
998
1034
|
def robots= value
|
|
@@ -1006,7 +1042,7 @@ class Mechanize::HTTP::Agent
|
|
|
1006
1042
|
# robots.txt.
|
|
1007
1043
|
|
|
1008
1044
|
def robots_allowed? uri
|
|
1009
|
-
return true if
|
|
1045
|
+
return true if Thread.current[RobotsKey]
|
|
1010
1046
|
|
|
1011
1047
|
webrobots.allowed? uri
|
|
1012
1048
|
end
|
|
@@ -1190,31 +1226,39 @@ class Mechanize::HTTP::Agent
|
|
|
1190
1226
|
end
|
|
1191
1227
|
|
|
1192
1228
|
##
|
|
1193
|
-
# Sets the proxy address, port, user, and password +addr+
|
|
1194
|
-
#
|
|
1195
|
-
# number string.
|
|
1196
|
-
|
|
1197
|
-
def set_proxy addr, port, user = nil, pass = nil
|
|
1198
|
-
|
|
1229
|
+
# Sets the proxy address, port, user, and password. +addr+ may be
|
|
1230
|
+
# an HTTP URL/URI or a host name, +port+ may be a port number, service
|
|
1231
|
+
# name or port number string.
|
|
1232
|
+
|
|
1233
|
+
def set_proxy addr, port = nil, user = nil, pass = nil
|
|
1234
|
+
case addr
|
|
1235
|
+
when URI::HTTP
|
|
1236
|
+
proxy_uri = addr.dup
|
|
1237
|
+
when %r{\Ahttps?://}i
|
|
1238
|
+
proxy_uri = URI addr
|
|
1239
|
+
when String
|
|
1240
|
+
proxy_uri = URI "http://#{addr}"
|
|
1241
|
+
when nil
|
|
1199
1242
|
@http.proxy = nil
|
|
1200
|
-
|
|
1201
1243
|
return
|
|
1202
1244
|
end
|
|
1203
1245
|
|
|
1204
|
-
|
|
1246
|
+
case port
|
|
1247
|
+
when Integer
|
|
1248
|
+
proxy_uri.port = port
|
|
1249
|
+
when nil
|
|
1250
|
+
else
|
|
1205
1251
|
begin
|
|
1206
|
-
port = Socket.getservbyname port
|
|
1252
|
+
proxy_uri.port = Socket.getservbyname port
|
|
1207
1253
|
rescue SocketError
|
|
1208
1254
|
begin
|
|
1209
|
-
port = Integer port
|
|
1255
|
+
proxy_uri.port = Integer port
|
|
1210
1256
|
rescue ArgumentError
|
|
1211
1257
|
raise ArgumentError, "invalid value for port: #{port.inspect}"
|
|
1212
1258
|
end
|
|
1213
1259
|
end
|
|
1214
1260
|
end
|
|
1215
1261
|
|
|
1216
|
-
proxy_uri = URI "http://#{addr}"
|
|
1217
|
-
proxy_uri.port = port
|
|
1218
1262
|
proxy_uri.user = user if user
|
|
1219
1263
|
proxy_uri.password = pass if pass
|
|
1220
1264
|
|
|
@@ -1243,4 +1287,3 @@ class Mechanize::HTTP::Agent
|
|
|
1243
1287
|
end
|
|
1244
1288
|
|
|
1245
1289
|
require 'mechanize/http/auth_store'
|
|
1246
|
-
|