aai10-mechanize 2.0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +6 -0
- data/.gitignore +9 -0
- data/CHANGELOG.rdoc +652 -0
- data/EXAMPLES.rdoc +187 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +163 -0
- data/LICENSE.rdoc +20 -0
- data/Manifest.txt +172 -0
- data/README.rdoc +63 -0
- data/Rakefile +36 -0
- data/aai10-mechanize.gemspec +20 -0
- data/examples/flickr_upload.rb +22 -0
- data/examples/mech-dump.rb +5 -0
- data/examples/proxy_req.rb +7 -0
- data/examples/rubyforge.rb +20 -0
- data/examples/spider.rb +21 -0
- data/lib/mechanize.rb +664 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +116 -0
- data/lib/mechanize/cookie_jar.rb +202 -0
- data/lib/mechanize/element_matcher.rb +35 -0
- data/lib/mechanize/file.rb +80 -0
- data/lib/mechanize/file_connection.rb +17 -0
- data/lib/mechanize/file_request.rb +26 -0
- data/lib/mechanize/file_response.rb +74 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +478 -0
- data/lib/mechanize/form/button.rb +9 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +44 -0
- data/lib/mechanize/form/file_upload.rb +23 -0
- data/lib/mechanize/form/image_button.rb +20 -0
- data/lib/mechanize/form/multi_select_list.rb +83 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +48 -0
- data/lib/mechanize/form/select_list.rb +40 -0
- data/lib/mechanize/headers.rb +25 -0
- data/lib/mechanize/history.rb +83 -0
- data/lib/mechanize/http.rb +3 -0
- data/lib/mechanize/http/agent.rb +738 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/mechanize/monkey_patch.rb +37 -0
- data/lib/mechanize/page.rb +408 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/frame.rb +27 -0
- data/lib/mechanize/page/image.rb +30 -0
- data/lib/mechanize/page/label.rb +20 -0
- data/lib/mechanize/page/link.rb +82 -0
- data/lib/mechanize/page/meta_refresh.rb +56 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/response_read_error.rb +27 -0
- data/lib/mechanize/robots_disallowed_error.rb +29 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +113 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +175 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/canonical_uri.html +9 -0
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_referer_test.html +10 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/nofollow.html +9 -0
- data/test/htdocs/noindex.html +9 -0
- data/test/htdocs/norobots.html +8 -0
- data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
- data/test/htdocs/rel_nofollow.html +8 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/robots.html +8 -0
- data/test/htdocs/robots.txt +2 -0
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_images.html +10 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_field_precedence.html +11 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_images.html +8 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_meta_in_body.html +9 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +16 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/htdocs/test_click.html +11 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +402 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_cookies.rb +129 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +59 -0
- data/test/test_form_button.rb +46 -0
- data/test/test_frames.rb +34 -0
- data/test/test_headers.rb +33 -0
- data/test/test_history.rb +118 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +46 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_images.rb +19 -0
- data/test/test_mechanize.rb +852 -0
- data/test/test_mechanize_cookie.rb +345 -0
- data/test/test_mechanize_cookie_jar.rb +433 -0
- data/test/test_mechanize_file.rb +53 -0
- data/test/test_mechanize_file_request.rb +19 -0
- data/test/test_mechanize_file_response.rb +21 -0
- data/test/test_mechanize_form.rb +576 -0
- data/test/test_mechanize_form_check_box.rb +37 -0
- data/test/test_mechanize_form_encoding.rb +120 -0
- data/test/test_mechanize_form_field.rb +21 -0
- data/test/test_mechanize_form_image_button.rb +12 -0
- data/test/test_mechanize_form_textarea.rb +51 -0
- data/test/test_mechanize_http_agent.rb +697 -0
- data/test/test_mechanize_link.rb +84 -0
- data/test/test_mechanize_page_encoding.rb +147 -0
- data/test/test_mechanize_page_link.rb +382 -0
- data/test/test_mechanize_page_meta_refresh.rb +115 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
- data/test/test_mechanize_subclass.rb +22 -0
- data/test/test_mechanize_util.rb +92 -0
- data/test/test_multi_select.rb +118 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_pluggable_parser.rb +136 -0
- data/test/test_post_form.rb +37 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +39 -0
- data/test/test_redirect_ok.rb +25 -0
- data/test/test_referer.rb +81 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +53 -0
- data/test/test_robots.rb +72 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +119 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +18 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- metadata +360 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# This class encapsulates a 'frame' tag. Frame objects can be treated just
|
|
2
|
+
# like Link objects. They contain #src, the #link they refer to and a #name,
|
|
3
|
+
# the name of the frame they refer to. #src and #name are aliased to #href
|
|
4
|
+
# and #text respectively so that a Frame object can be treated just like a
|
|
5
|
+
# Link.
|
|
6
|
+
|
|
7
|
+
class Mechanize::Page::Frame < Mechanize::Page::Link
|
|
8
|
+
|
|
9
|
+
alias :src :href
|
|
10
|
+
|
|
11
|
+
attr_reader :text
|
|
12
|
+
alias :name :text
|
|
13
|
+
|
|
14
|
+
def initialize(node, mech, referer)
|
|
15
|
+
super(node, mech, referer)
|
|
16
|
+
@node = node
|
|
17
|
+
@text = node['name']
|
|
18
|
+
@href = node['src']
|
|
19
|
+
@content = nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def content
|
|
23
|
+
@content ||= @mech.get @href, [], page
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
class Page < Mechanize::File
|
|
3
|
+
class Image
|
|
4
|
+
attr_reader :node
|
|
5
|
+
attr_reader :page
|
|
6
|
+
|
|
7
|
+
def initialize(node, page)
|
|
8
|
+
@node = node
|
|
9
|
+
@page = page
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def src
|
|
13
|
+
@node['src']
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def url
|
|
17
|
+
case src
|
|
18
|
+
when %r{^https?://}
|
|
19
|
+
src
|
|
20
|
+
else
|
|
21
|
+
if page.bases[0]
|
|
22
|
+
(page.bases[0].href + src).to_s
|
|
23
|
+
else
|
|
24
|
+
(page.uri + src).to_s
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
class Page < Mechanize::File
|
|
3
|
+
class Label
|
|
4
|
+
attr_reader :node
|
|
5
|
+
attr_reader :text
|
|
6
|
+
attr_reader :page
|
|
7
|
+
alias :to_s :text
|
|
8
|
+
|
|
9
|
+
def initialize(node, page)
|
|
10
|
+
@node = node
|
|
11
|
+
@text = node.inner_text
|
|
12
|
+
@page = page
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def for
|
|
16
|
+
(id = @node['for']) && page.search("##{id}") || nil
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
##
|
|
2
|
+
# This class encapsulates links. It contains the text and the URI for
|
|
3
|
+
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
|
4
|
+
# the alt text will be used for that image.
|
|
5
|
+
#
|
|
6
|
+
# For example, the text for the following links with both be 'Hello World':
|
|
7
|
+
#
|
|
8
|
+
# <a href="http://example">Hello World</a>
|
|
9
|
+
# <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
|
|
10
|
+
|
|
11
|
+
class Mechanize::Page::Link
|
|
12
|
+
attr_reader :node
|
|
13
|
+
attr_reader :href
|
|
14
|
+
attr_reader :attributes
|
|
15
|
+
attr_reader :page
|
|
16
|
+
alias :referer :page
|
|
17
|
+
|
|
18
|
+
def initialize(node, mech, page)
|
|
19
|
+
@node = node
|
|
20
|
+
@attributes = node
|
|
21
|
+
@href = node['href']
|
|
22
|
+
@mech = mech
|
|
23
|
+
@page = page
|
|
24
|
+
@text = nil
|
|
25
|
+
@uri = nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Click on this link
|
|
29
|
+
def click
|
|
30
|
+
@mech.click self
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# This method is a shorthand to get link's DOM id.
|
|
34
|
+
# Common usage:
|
|
35
|
+
# page.link_with(:dom_id => "links_exact_id")
|
|
36
|
+
def dom_id
|
|
37
|
+
node['id']
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# A list of words in the rel attribute, all lower-cased.
|
|
41
|
+
def rel
|
|
42
|
+
@rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : []
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Test if the rel attribute includes +kind+.
|
|
46
|
+
def rel? kind
|
|
47
|
+
rel.include? kind
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# The text content of this link
|
|
51
|
+
def text
|
|
52
|
+
return @text if @text
|
|
53
|
+
|
|
54
|
+
@text = @node.inner_text
|
|
55
|
+
|
|
56
|
+
# If there is no text, try to find an image and use it's alt text
|
|
57
|
+
if (@text.nil? or @text.empty?) and imgs = @node.search('img') then
|
|
58
|
+
@text = imgs.map do |e|
|
|
59
|
+
e['alt']
|
|
60
|
+
end.join
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
@text
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
alias :to_s :text
|
|
67
|
+
|
|
68
|
+
# A URI for the #href for this link. The link is first parsed as a raw
|
|
69
|
+
# link. If that fails parsing an escaped link is attepmted.
|
|
70
|
+
|
|
71
|
+
def uri
|
|
72
|
+
@uri ||= if @href then
|
|
73
|
+
begin
|
|
74
|
+
URI.parse @href
|
|
75
|
+
rescue URI::InvalidURIError
|
|
76
|
+
URI.parse WEBrick::HTTPUtils.escape @href
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
end
|
|
82
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
##
|
|
2
|
+
# This class encapsulates a meta element with a refresh http-equiv. Mechanize
|
|
3
|
+
# treats meta refresh elements just like 'a' tags. MetaRefresh objects will
|
|
4
|
+
# contain links, but most likely will have no text.
|
|
5
|
+
|
|
6
|
+
class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
|
|
7
|
+
|
|
8
|
+
attr_reader :delay
|
|
9
|
+
|
|
10
|
+
##
|
|
11
|
+
# Matches the content attribute of a meta refresh element. After the match:
|
|
12
|
+
#
|
|
13
|
+
# $1:: delay
|
|
14
|
+
# $3:: url
|
|
15
|
+
CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
|
|
16
|
+
|
|
17
|
+
##
|
|
18
|
+
# Parses the delay and url from the content attribute of a meta refresh
|
|
19
|
+
# element. Parse requires the uri of the current page to infer a url when
|
|
20
|
+
# no url is specified.
|
|
21
|
+
#
|
|
22
|
+
# Returns a MetaRefresh instance.
|
|
23
|
+
#
|
|
24
|
+
# Returns nil if the delay and url cannot be parsed.
|
|
25
|
+
|
|
26
|
+
def self.parse content, base_uri
|
|
27
|
+
return unless content =~ CONTENT_REGEXP
|
|
28
|
+
|
|
29
|
+
delay, refresh_uri = $1, $3
|
|
30
|
+
|
|
31
|
+
dest = base_uri
|
|
32
|
+
dest += refresh_uri if refresh_uri
|
|
33
|
+
|
|
34
|
+
return delay, dest
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.from_node node, page, uri
|
|
38
|
+
http_equiv = node['http-equiv']
|
|
39
|
+
return unless http_equiv and http_equiv.downcase == 'refresh'
|
|
40
|
+
|
|
41
|
+
delay, uri = parse node['content'], uri
|
|
42
|
+
|
|
43
|
+
return unless delay
|
|
44
|
+
|
|
45
|
+
new node, page, delay, uri.to_s
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def initialize node, page, delay, href
|
|
49
|
+
super node, page.mech, page
|
|
50
|
+
|
|
51
|
+
@delay = delay.to_i
|
|
52
|
+
@href = href
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
require 'mechanize/file'
|
|
2
|
+
require 'mechanize/file_saver'
|
|
3
|
+
require 'mechanize/page'
|
|
4
|
+
|
|
5
|
+
class Mechanize
|
|
6
|
+
# = Synopsis
|
|
7
|
+
# This class is used to register and maintain pluggable parsers for
|
|
8
|
+
# Mechanize to use.
|
|
9
|
+
#
|
|
10
|
+
# A Pluggable Parser is a parser that Mechanize uses for any particular
|
|
11
|
+
# content type. Mechanize will ask PluggableParser for the class it
|
|
12
|
+
# should initialize given any content type. This class allows users to
|
|
13
|
+
# register their own pluggable parsers, or modify existing pluggable
|
|
14
|
+
# parsers.
|
|
15
|
+
#
|
|
16
|
+
# PluggableParser returns a Mechanize::File object for content types
|
|
17
|
+
# that it does not know how to handle. Mechanize::File provides
|
|
18
|
+
# basic functionality for any content type, so it is a good class to
|
|
19
|
+
# extend when building your own parsers.
|
|
20
|
+
# == Example
|
|
21
|
+
# To create your own parser, just create a class that takes four
|
|
22
|
+
# parameters in the constructor. Here is an example of registering
|
|
23
|
+
# a pluggable parser that handles CSV files:
|
|
24
|
+
# class CSVParser < Mechanize::File
|
|
25
|
+
# attr_reader :csv
|
|
26
|
+
# def initialize(uri=nil, response=nil, body=nil, code=nil)
|
|
27
|
+
# super(uri, response, body, code)
|
|
28
|
+
# @csv = CSV.parse(body)
|
|
29
|
+
# end
|
|
30
|
+
# end
|
|
31
|
+
# agent = Mechanize.new
|
|
32
|
+
# agent.pluggable_parser.csv = CSVParser
|
|
33
|
+
# agent.get('http://example.com/test.csv') # => CSVParser
|
|
34
|
+
# Now any page that returns the content type of 'text/csv' will initialize
|
|
35
|
+
# a CSVParser and return that object to the caller.
|
|
36
|
+
#
|
|
37
|
+
# To register a pluggable parser for a content type that pluggable parser
|
|
38
|
+
# does not know about, just use the hash syntax:
|
|
39
|
+
# agent.pluggable_parser['text/something'] = SomeClass
|
|
40
|
+
#
|
|
41
|
+
# To set the default parser, just use the 'defaut' method:
|
|
42
|
+
# agent.pluggable_parser.default = SomeClass
|
|
43
|
+
# Now all unknown content types will be instances of SomeClass.
|
|
44
|
+
class PluggableParser
|
|
45
|
+
CONTENT_TYPES = {
|
|
46
|
+
:html => 'text/html',
|
|
47
|
+
:wap => 'application/vnd.wap.xhtml+xml',
|
|
48
|
+
:xhtml => 'application/xhtml+xml',
|
|
49
|
+
:pdf => 'application/pdf',
|
|
50
|
+
:csv => 'text/csv',
|
|
51
|
+
:xml => 'text/xml',
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
attr_accessor :default
|
|
55
|
+
|
|
56
|
+
def initialize
|
|
57
|
+
@parsers = { CONTENT_TYPES[:html] => Page,
|
|
58
|
+
CONTENT_TYPES[:xhtml] => Page,
|
|
59
|
+
CONTENT_TYPES[:wap] => Page,
|
|
60
|
+
}
|
|
61
|
+
@default = File
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def parser(content_type)
|
|
65
|
+
content_type.nil? ? default : @parsers[content_type] || default
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def register_parser(content_type, klass)
|
|
69
|
+
@parsers[content_type] = klass
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def html=(klass)
|
|
73
|
+
register_parser(CONTENT_TYPES[:html], klass)
|
|
74
|
+
register_parser(CONTENT_TYPES[:xhtml], klass)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def xhtml=(klass)
|
|
78
|
+
register_parser(CONTENT_TYPES[:xhtml], klass)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def pdf=(klass)
|
|
82
|
+
register_parser(CONTENT_TYPES[:pdf], klass)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def csv=(klass)
|
|
86
|
+
register_parser(CONTENT_TYPES[:csv], klass)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def xml=(klass)
|
|
90
|
+
register_parser(CONTENT_TYPES[:xml], klass)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def [](content_type)
|
|
94
|
+
@parsers[content_type]
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def []=(content_type, klass)
|
|
98
|
+
@parsers[content_type] = klass
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
# Thrown when too many redirects are sent
|
|
3
|
+
class RedirectLimitReachedError < Mechanize::Error
|
|
4
|
+
attr_reader :page, :response_code, :redirects
|
|
5
|
+
def initialize(page, redirects)
|
|
6
|
+
@page = page
|
|
7
|
+
@redirects = redirects
|
|
8
|
+
@response_code = page.code
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_s
|
|
12
|
+
"Maximum redirect limit (#{redirects}) reached"
|
|
13
|
+
end
|
|
14
|
+
alias :inspect :to_s
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
# Thrown when a POST, PUT, or DELETE request results in a redirect
|
|
3
|
+
# see RFC 2616 10.3.2, 10.3.3 http://www.ietf.org/rfc/rfc2616.txt
|
|
4
|
+
class RedirectNotGetOrHeadError < Mechanize::Error
|
|
5
|
+
attr_reader :page, :response_code, :verb, :uri
|
|
6
|
+
def initialize(page, verb)
|
|
7
|
+
@page = page
|
|
8
|
+
@verb = verb
|
|
9
|
+
@uri = page.uri
|
|
10
|
+
@response_code = page.code
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def to_s
|
|
14
|
+
method = @verb.to_s.upcase
|
|
15
|
+
"#{@response_code} redirect received after a #{method} request"
|
|
16
|
+
end
|
|
17
|
+
alias :inspect :to_s
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
# =Synopsis
|
|
3
|
+
# This error is thrown when Mechanize encounters a response code it does
|
|
4
|
+
# not know how to handle. Currently, this exception will be thrown
|
|
5
|
+
# if Mechanize encounters response codes other than 200, 301, or 302.
|
|
6
|
+
# Any other response code is up to the user to handle.
|
|
7
|
+
class ResponseCodeError < Mechanize::Error
|
|
8
|
+
attr_reader :response_code
|
|
9
|
+
attr_reader :page
|
|
10
|
+
|
|
11
|
+
def initialize(page)
|
|
12
|
+
@page = page
|
|
13
|
+
@response_code = page.code.to_s
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def to_s
|
|
17
|
+
"#{@response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[@response_code]}"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def inspect; to_s; end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
##
|
|
2
|
+
# Raised when Mechanize encounters an error while reading the response body
|
|
3
|
+
# from the server. Contains the response headers and the response body up to
|
|
4
|
+
# the error along with the initial error.
|
|
5
|
+
|
|
6
|
+
class Mechanize::ResponseReadError < Mechanize::Error
|
|
7
|
+
|
|
8
|
+
attr_reader :body_io
|
|
9
|
+
attr_reader :error
|
|
10
|
+
attr_reader :response
|
|
11
|
+
|
|
12
|
+
##
|
|
13
|
+
# Creates a new ResponseReadError with the +error+ raised, the +response+
|
|
14
|
+
# and the +body_io+ for content read so far.
|
|
15
|
+
|
|
16
|
+
def initialize error, response, body_io
|
|
17
|
+
@error = error
|
|
18
|
+
@response = response
|
|
19
|
+
@body_io = body_io
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def message # :nodoc:
|
|
23
|
+
"#{@error.message} (#{self.class})"
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
# Exception that is raised when an access to a resource is
|
|
3
|
+
# disallowed by robots.txt or by HTML document itself.
|
|
4
|
+
class RobotsDisallowedError < Mechanize::Error
|
|
5
|
+
def initialize(url)
|
|
6
|
+
if url.is_a?(URI)
|
|
7
|
+
@url = url.to_s
|
|
8
|
+
@uri = url
|
|
9
|
+
else
|
|
10
|
+
@url = url.to_s
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Returns the URL (string) of the resource that caused this error.
|
|
15
|
+
attr_reader :url
|
|
16
|
+
|
|
17
|
+
# Returns the URL (URI object) of the resource that caused this
|
|
18
|
+
# error. URI::InvalidURIError may be raised if the URL happens to
|
|
19
|
+
# be invalid or not understood by the URI library.
|
|
20
|
+
def uri
|
|
21
|
+
@uri ||= URI.parse(url)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def to_s
|
|
25
|
+
"Robots access is disallowed for URL: #{url}"
|
|
26
|
+
end
|
|
27
|
+
alias :inspect :to_s
|
|
28
|
+
end
|
|
29
|
+
end
|