aai10-mechanize 2.0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +6 -0
- data/.gitignore +9 -0
- data/CHANGELOG.rdoc +652 -0
- data/EXAMPLES.rdoc +187 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +163 -0
- data/LICENSE.rdoc +20 -0
- data/Manifest.txt +172 -0
- data/README.rdoc +63 -0
- data/Rakefile +36 -0
- data/aai10-mechanize.gemspec +20 -0
- data/examples/flickr_upload.rb +22 -0
- data/examples/mech-dump.rb +5 -0
- data/examples/proxy_req.rb +7 -0
- data/examples/rubyforge.rb +20 -0
- data/examples/spider.rb +21 -0
- data/lib/mechanize.rb +664 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +116 -0
- data/lib/mechanize/cookie_jar.rb +202 -0
- data/lib/mechanize/element_matcher.rb +35 -0
- data/lib/mechanize/file.rb +80 -0
- data/lib/mechanize/file_connection.rb +17 -0
- data/lib/mechanize/file_request.rb +26 -0
- data/lib/mechanize/file_response.rb +74 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +478 -0
- data/lib/mechanize/form/button.rb +9 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +44 -0
- data/lib/mechanize/form/file_upload.rb +23 -0
- data/lib/mechanize/form/image_button.rb +20 -0
- data/lib/mechanize/form/multi_select_list.rb +83 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +48 -0
- data/lib/mechanize/form/select_list.rb +40 -0
- data/lib/mechanize/headers.rb +25 -0
- data/lib/mechanize/history.rb +83 -0
- data/lib/mechanize/http.rb +3 -0
- data/lib/mechanize/http/agent.rb +738 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/mechanize/monkey_patch.rb +37 -0
- data/lib/mechanize/page.rb +408 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/frame.rb +27 -0
- data/lib/mechanize/page/image.rb +30 -0
- data/lib/mechanize/page/label.rb +20 -0
- data/lib/mechanize/page/link.rb +82 -0
- data/lib/mechanize/page/meta_refresh.rb +56 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/response_read_error.rb +27 -0
- data/lib/mechanize/robots_disallowed_error.rb +29 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +113 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +175 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/canonical_uri.html +9 -0
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_referer_test.html +10 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/nofollow.html +9 -0
- data/test/htdocs/noindex.html +9 -0
- data/test/htdocs/norobots.html +8 -0
- data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
- data/test/htdocs/rel_nofollow.html +8 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/robots.html +8 -0
- data/test/htdocs/robots.txt +2 -0
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_images.html +10 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_field_precedence.html +11 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_images.html +8 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_meta_in_body.html +9 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +16 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/htdocs/test_click.html +11 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +402 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_cookies.rb +129 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +59 -0
- data/test/test_form_button.rb +46 -0
- data/test/test_frames.rb +34 -0
- data/test/test_headers.rb +33 -0
- data/test/test_history.rb +118 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +46 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_images.rb +19 -0
- data/test/test_mechanize.rb +852 -0
- data/test/test_mechanize_cookie.rb +345 -0
- data/test/test_mechanize_cookie_jar.rb +433 -0
- data/test/test_mechanize_file.rb +53 -0
- data/test/test_mechanize_file_request.rb +19 -0
- data/test/test_mechanize_file_response.rb +21 -0
- data/test/test_mechanize_form.rb +576 -0
- data/test/test_mechanize_form_check_box.rb +37 -0
- data/test/test_mechanize_form_encoding.rb +120 -0
- data/test/test_mechanize_form_field.rb +21 -0
- data/test/test_mechanize_form_image_button.rb +12 -0
- data/test/test_mechanize_form_textarea.rb +51 -0
- data/test/test_mechanize_http_agent.rb +697 -0
- data/test/test_mechanize_link.rb +84 -0
- data/test/test_mechanize_page_encoding.rb +147 -0
- data/test/test_mechanize_page_link.rb +382 -0
- data/test/test_mechanize_page_meta_refresh.rb +115 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
- data/test/test_mechanize_subclass.rb +22 -0
- data/test/test_mechanize_util.rb +92 -0
- data/test/test_multi_select.rb +118 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_pluggable_parser.rb +136 -0
- data/test/test_post_form.rb +37 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +39 -0
- data/test/test_redirect_ok.rb +25 -0
- data/test/test_referer.rb +81 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +53 -0
- data/test/test_robots.rb +72 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +119 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +18 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- metadata +360 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
# This class encapsulates a 'frame' tag. Frame objects can be treated just
|
2
|
+
# like Link objects. They contain #src, the #link they refer to and a #name,
|
3
|
+
# the name of the frame they refer to. #src and #name are aliased to #href
|
4
|
+
# and #text respectively so that a Frame object can be treated just like a
|
5
|
+
# Link.
|
6
|
+
|
7
|
+
class Mechanize::Page::Frame < Mechanize::Page::Link
|
8
|
+
|
9
|
+
alias :src :href
|
10
|
+
|
11
|
+
attr_reader :text
|
12
|
+
alias :name :text
|
13
|
+
|
14
|
+
def initialize(node, mech, referer)
|
15
|
+
super(node, mech, referer)
|
16
|
+
@node = node
|
17
|
+
@text = node['name']
|
18
|
+
@href = node['src']
|
19
|
+
@content = nil
|
20
|
+
end
|
21
|
+
|
22
|
+
def content
|
23
|
+
@content ||= @mech.get @href, [], page
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,30 @@
|
|
1
|
+
class Mechanize
|
2
|
+
class Page < Mechanize::File
|
3
|
+
class Image
|
4
|
+
attr_reader :node
|
5
|
+
attr_reader :page
|
6
|
+
|
7
|
+
def initialize(node, page)
|
8
|
+
@node = node
|
9
|
+
@page = page
|
10
|
+
end
|
11
|
+
|
12
|
+
def src
|
13
|
+
@node['src']
|
14
|
+
end
|
15
|
+
|
16
|
+
def url
|
17
|
+
case src
|
18
|
+
when %r{^https?://}
|
19
|
+
src
|
20
|
+
else
|
21
|
+
if page.bases[0]
|
22
|
+
(page.bases[0].href + src).to_s
|
23
|
+
else
|
24
|
+
(page.uri + src).to_s
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
class Mechanize
|
2
|
+
class Page < Mechanize::File
|
3
|
+
class Label
|
4
|
+
attr_reader :node
|
5
|
+
attr_reader :text
|
6
|
+
attr_reader :page
|
7
|
+
alias :to_s :text
|
8
|
+
|
9
|
+
def initialize(node, page)
|
10
|
+
@node = node
|
11
|
+
@text = node.inner_text
|
12
|
+
@page = page
|
13
|
+
end
|
14
|
+
|
15
|
+
def for
|
16
|
+
(id = @node['for']) && page.search("##{id}") || nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
##
|
2
|
+
# This class encapsulates links. It contains the text and the URI for
|
3
|
+
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
4
|
+
# the alt text will be used for that image.
|
5
|
+
#
|
6
|
+
# For example, the text for the following links with both be 'Hello World':
|
7
|
+
#
|
8
|
+
# <a href="http://example">Hello World</a>
|
9
|
+
# <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
|
10
|
+
|
11
|
+
class Mechanize::Page::Link
|
12
|
+
attr_reader :node
|
13
|
+
attr_reader :href
|
14
|
+
attr_reader :attributes
|
15
|
+
attr_reader :page
|
16
|
+
alias :referer :page
|
17
|
+
|
18
|
+
def initialize(node, mech, page)
|
19
|
+
@node = node
|
20
|
+
@attributes = node
|
21
|
+
@href = node['href']
|
22
|
+
@mech = mech
|
23
|
+
@page = page
|
24
|
+
@text = nil
|
25
|
+
@uri = nil
|
26
|
+
end
|
27
|
+
|
28
|
+
# Click on this link
|
29
|
+
def click
|
30
|
+
@mech.click self
|
31
|
+
end
|
32
|
+
|
33
|
+
# This method is a shorthand to get link's DOM id.
|
34
|
+
# Common usage:
|
35
|
+
# page.link_with(:dom_id => "links_exact_id")
|
36
|
+
def dom_id
|
37
|
+
node['id']
|
38
|
+
end
|
39
|
+
|
40
|
+
# A list of words in the rel attribute, all lower-cased.
|
41
|
+
def rel
|
42
|
+
@rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : []
|
43
|
+
end
|
44
|
+
|
45
|
+
# Test if the rel attribute includes +kind+.
|
46
|
+
def rel? kind
|
47
|
+
rel.include? kind
|
48
|
+
end
|
49
|
+
|
50
|
+
# The text content of this link
|
51
|
+
def text
|
52
|
+
return @text if @text
|
53
|
+
|
54
|
+
@text = @node.inner_text
|
55
|
+
|
56
|
+
# If there is no text, try to find an image and use it's alt text
|
57
|
+
if (@text.nil? or @text.empty?) and imgs = @node.search('img') then
|
58
|
+
@text = imgs.map do |e|
|
59
|
+
e['alt']
|
60
|
+
end.join
|
61
|
+
end
|
62
|
+
|
63
|
+
@text
|
64
|
+
end
|
65
|
+
|
66
|
+
alias :to_s :text
|
67
|
+
|
68
|
+
# A URI for the #href for this link. The link is first parsed as a raw
|
69
|
+
# link. If that fails parsing an escaped link is attepmted.
|
70
|
+
|
71
|
+
def uri
|
72
|
+
@uri ||= if @href then
|
73
|
+
begin
|
74
|
+
URI.parse @href
|
75
|
+
rescue URI::InvalidURIError
|
76
|
+
URI.parse WEBrick::HTTPUtils.escape @href
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
##
|
2
|
+
# This class encapsulates a meta element with a refresh http-equiv. Mechanize
|
3
|
+
# treats meta refresh elements just like 'a' tags. MetaRefresh objects will
|
4
|
+
# contain links, but most likely will have no text.
|
5
|
+
|
6
|
+
class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
|
7
|
+
|
8
|
+
attr_reader :delay
|
9
|
+
|
10
|
+
##
|
11
|
+
# Matches the content attribute of a meta refresh element. After the match:
|
12
|
+
#
|
13
|
+
# $1:: delay
|
14
|
+
# $3:: url
|
15
|
+
CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
|
16
|
+
|
17
|
+
##
|
18
|
+
# Parses the delay and url from the content attribute of a meta refresh
|
19
|
+
# element. Parse requires the uri of the current page to infer a url when
|
20
|
+
# no url is specified.
|
21
|
+
#
|
22
|
+
# Returns a MetaRefresh instance.
|
23
|
+
#
|
24
|
+
# Returns nil if the delay and url cannot be parsed.
|
25
|
+
|
26
|
+
def self.parse content, base_uri
|
27
|
+
return unless content =~ CONTENT_REGEXP
|
28
|
+
|
29
|
+
delay, refresh_uri = $1, $3
|
30
|
+
|
31
|
+
dest = base_uri
|
32
|
+
dest += refresh_uri if refresh_uri
|
33
|
+
|
34
|
+
return delay, dest
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.from_node node, page, uri
|
38
|
+
http_equiv = node['http-equiv']
|
39
|
+
return unless http_equiv and http_equiv.downcase == 'refresh'
|
40
|
+
|
41
|
+
delay, uri = parse node['content'], uri
|
42
|
+
|
43
|
+
return unless delay
|
44
|
+
|
45
|
+
new node, page, delay, uri.to_s
|
46
|
+
end
|
47
|
+
|
48
|
+
def initialize node, page, delay, href
|
49
|
+
super node, page.mech, page
|
50
|
+
|
51
|
+
@delay = delay.to_i
|
52
|
+
@href = href
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'mechanize/file'
|
2
|
+
require 'mechanize/file_saver'
|
3
|
+
require 'mechanize/page'
|
4
|
+
|
5
|
+
class Mechanize
|
6
|
+
# = Synopsis
|
7
|
+
# This class is used to register and maintain pluggable parsers for
|
8
|
+
# Mechanize to use.
|
9
|
+
#
|
10
|
+
# A Pluggable Parser is a parser that Mechanize uses for any particular
|
11
|
+
# content type. Mechanize will ask PluggableParser for the class it
|
12
|
+
# should initialize given any content type. This class allows users to
|
13
|
+
# register their own pluggable parsers, or modify existing pluggable
|
14
|
+
# parsers.
|
15
|
+
#
|
16
|
+
# PluggableParser returns a Mechanize::File object for content types
|
17
|
+
# that it does not know how to handle. Mechanize::File provides
|
18
|
+
# basic functionality for any content type, so it is a good class to
|
19
|
+
# extend when building your own parsers.
|
20
|
+
# == Example
|
21
|
+
# To create your own parser, just create a class that takes four
|
22
|
+
# parameters in the constructor. Here is an example of registering
|
23
|
+
# a pluggable parser that handles CSV files:
|
24
|
+
# class CSVParser < Mechanize::File
|
25
|
+
# attr_reader :csv
|
26
|
+
# def initialize(uri=nil, response=nil, body=nil, code=nil)
|
27
|
+
# super(uri, response, body, code)
|
28
|
+
# @csv = CSV.parse(body)
|
29
|
+
# end
|
30
|
+
# end
|
31
|
+
# agent = Mechanize.new
|
32
|
+
# agent.pluggable_parser.csv = CSVParser
|
33
|
+
# agent.get('http://example.com/test.csv') # => CSVParser
|
34
|
+
# Now any page that returns the content type of 'text/csv' will initialize
|
35
|
+
# a CSVParser and return that object to the caller.
|
36
|
+
#
|
37
|
+
# To register a pluggable parser for a content type that pluggable parser
|
38
|
+
# does not know about, just use the hash syntax:
|
39
|
+
# agent.pluggable_parser['text/something'] = SomeClass
|
40
|
+
#
|
41
|
+
# To set the default parser, just use the 'defaut' method:
|
42
|
+
# agent.pluggable_parser.default = SomeClass
|
43
|
+
# Now all unknown content types will be instances of SomeClass.
|
44
|
+
class PluggableParser
|
45
|
+
CONTENT_TYPES = {
|
46
|
+
:html => 'text/html',
|
47
|
+
:wap => 'application/vnd.wap.xhtml+xml',
|
48
|
+
:xhtml => 'application/xhtml+xml',
|
49
|
+
:pdf => 'application/pdf',
|
50
|
+
:csv => 'text/csv',
|
51
|
+
:xml => 'text/xml',
|
52
|
+
}
|
53
|
+
|
54
|
+
attr_accessor :default
|
55
|
+
|
56
|
+
def initialize
|
57
|
+
@parsers = { CONTENT_TYPES[:html] => Page,
|
58
|
+
CONTENT_TYPES[:xhtml] => Page,
|
59
|
+
CONTENT_TYPES[:wap] => Page,
|
60
|
+
}
|
61
|
+
@default = File
|
62
|
+
end
|
63
|
+
|
64
|
+
def parser(content_type)
|
65
|
+
content_type.nil? ? default : @parsers[content_type] || default
|
66
|
+
end
|
67
|
+
|
68
|
+
def register_parser(content_type, klass)
|
69
|
+
@parsers[content_type] = klass
|
70
|
+
end
|
71
|
+
|
72
|
+
def html=(klass)
|
73
|
+
register_parser(CONTENT_TYPES[:html], klass)
|
74
|
+
register_parser(CONTENT_TYPES[:xhtml], klass)
|
75
|
+
end
|
76
|
+
|
77
|
+
def xhtml=(klass)
|
78
|
+
register_parser(CONTENT_TYPES[:xhtml], klass)
|
79
|
+
end
|
80
|
+
|
81
|
+
def pdf=(klass)
|
82
|
+
register_parser(CONTENT_TYPES[:pdf], klass)
|
83
|
+
end
|
84
|
+
|
85
|
+
def csv=(klass)
|
86
|
+
register_parser(CONTENT_TYPES[:csv], klass)
|
87
|
+
end
|
88
|
+
|
89
|
+
def xml=(klass)
|
90
|
+
register_parser(CONTENT_TYPES[:xml], klass)
|
91
|
+
end
|
92
|
+
|
93
|
+
def [](content_type)
|
94
|
+
@parsers[content_type]
|
95
|
+
end
|
96
|
+
|
97
|
+
def []=(content_type, klass)
|
98
|
+
@parsers[content_type] = klass
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class Mechanize
|
2
|
+
# Thrown when too many redirects are sent
|
3
|
+
class RedirectLimitReachedError < Mechanize::Error
|
4
|
+
attr_reader :page, :response_code, :redirects
|
5
|
+
def initialize(page, redirects)
|
6
|
+
@page = page
|
7
|
+
@redirects = redirects
|
8
|
+
@response_code = page.code
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_s
|
12
|
+
"Maximum redirect limit (#{redirects}) reached"
|
13
|
+
end
|
14
|
+
alias :inspect :to_s
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class Mechanize
|
2
|
+
# Thrown when a POST, PUT, or DELETE request results in a redirect
|
3
|
+
# see RFC 2616 10.3.2, 10.3.3 http://www.ietf.org/rfc/rfc2616.txt
|
4
|
+
class RedirectNotGetOrHeadError < Mechanize::Error
|
5
|
+
attr_reader :page, :response_code, :verb, :uri
|
6
|
+
def initialize(page, verb)
|
7
|
+
@page = page
|
8
|
+
@verb = verb
|
9
|
+
@uri = page.uri
|
10
|
+
@response_code = page.code
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
method = @verb.to_s.upcase
|
15
|
+
"#{@response_code} redirect received after a #{method} request"
|
16
|
+
end
|
17
|
+
alias :inspect :to_s
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Mechanize
|
2
|
+
# =Synopsis
|
3
|
+
# This error is thrown when Mechanize encounters a response code it does
|
4
|
+
# not know how to handle. Currently, this exception will be thrown
|
5
|
+
# if Mechanize encounters response codes other than 200, 301, or 302.
|
6
|
+
# Any other response code is up to the user to handle.
|
7
|
+
class ResponseCodeError < Mechanize::Error
|
8
|
+
attr_reader :response_code
|
9
|
+
attr_reader :page
|
10
|
+
|
11
|
+
def initialize(page)
|
12
|
+
@page = page
|
13
|
+
@response_code = page.code.to_s
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_s
|
17
|
+
"#{@response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[@response_code]}"
|
18
|
+
end
|
19
|
+
|
20
|
+
def inspect; to_s; end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
##
|
2
|
+
# Raised when Mechanize encounters an error while reading the response body
|
3
|
+
# from the server. Contains the response headers and the response body up to
|
4
|
+
# the error along with the initial error.
|
5
|
+
|
6
|
+
class Mechanize::ResponseReadError < Mechanize::Error
|
7
|
+
|
8
|
+
attr_reader :body_io
|
9
|
+
attr_reader :error
|
10
|
+
attr_reader :response
|
11
|
+
|
12
|
+
##
|
13
|
+
# Creates a new ResponseReadError with the +error+ raised, the +response+
|
14
|
+
# and the +body_io+ for content read so far.
|
15
|
+
|
16
|
+
def initialize error, response, body_io
|
17
|
+
@error = error
|
18
|
+
@response = response
|
19
|
+
@body_io = body_io
|
20
|
+
end
|
21
|
+
|
22
|
+
def message # :nodoc:
|
23
|
+
"#{@error.message} (#{self.class})"
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class Mechanize
|
2
|
+
# Exception that is raised when an access to a resource is
|
3
|
+
# disallowed by robots.txt or by HTML document itself.
|
4
|
+
class RobotsDisallowedError < Mechanize::Error
|
5
|
+
def initialize(url)
|
6
|
+
if url.is_a?(URI)
|
7
|
+
@url = url.to_s
|
8
|
+
@uri = url
|
9
|
+
else
|
10
|
+
@url = url.to_s
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns the URL (string) of the resource that caused this error.
|
15
|
+
attr_reader :url
|
16
|
+
|
17
|
+
# Returns the URL (URI object) of the resource that caused this
|
18
|
+
# error. URI::InvalidURIError may be raised if the URL happens to
|
19
|
+
# be invalid or not understood by the URI library.
|
20
|
+
def uri
|
21
|
+
@uri ||= URI.parse(url)
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
"Robots access is disallowed for URL: #{url}"
|
26
|
+
end
|
27
|
+
alias :inspect :to_s
|
28
|
+
end
|
29
|
+
end
|