diamond-mechanize 2.1 → 2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- metadata +222 -167
- data/Rakefile +0 -49
- data/lib/mechanize/content_type_error.rb +0 -13
- data/lib/mechanize/cookie.rb +0 -232
- data/lib/mechanize/cookie_jar.rb +0 -194
- data/lib/mechanize/download.rb +0 -59
- data/lib/mechanize/element_matcher.rb +0 -36
- data/lib/mechanize/file.rb +0 -65
- data/lib/mechanize/file_connection.rb +0 -17
- data/lib/mechanize/file_request.rb +0 -26
- data/lib/mechanize/file_response.rb +0 -74
- data/lib/mechanize/file_saver.rb +0 -39
- data/lib/mechanize/form/button.rb +0 -6
- data/lib/mechanize/form/check_box.rb +0 -12
- data/lib/mechanize/form/field.rb +0 -54
- data/lib/mechanize/form/file_upload.rb +0 -21
- data/lib/mechanize/form/hidden.rb +0 -3
- data/lib/mechanize/form/image_button.rb +0 -19
- data/lib/mechanize/form/keygen.rb +0 -34
- data/lib/mechanize/form/multi_select_list.rb +0 -94
- data/lib/mechanize/form/option.rb +0 -50
- data/lib/mechanize/form/radio_button.rb +0 -55
- data/lib/mechanize/form/reset.rb +0 -3
- data/lib/mechanize/form/select_list.rb +0 -44
- data/lib/mechanize/form/submit.rb +0 -3
- data/lib/mechanize/form/text.rb +0 -3
- data/lib/mechanize/form/textarea.rb +0 -3
- data/lib/mechanize/form.rb +0 -543
- data/lib/mechanize/headers.rb +0 -23
- data/lib/mechanize/history.rb +0 -82
- data/lib/mechanize/http/agent.rb +0 -1004
- data/lib/mechanize/http/auth_challenge.rb +0 -59
- data/lib/mechanize/http/auth_realm.rb +0 -31
- data/lib/mechanize/http/content_disposition_parser.rb +0 -188
- data/lib/mechanize/http/www_authenticate_parser.rb +0 -155
- data/lib/mechanize/http.rb +0 -8
- data/lib/mechanize/monkey_patch.rb +0 -16
- data/lib/mechanize/page/base.rb +0 -7
- data/lib/mechanize/page/frame.rb +0 -27
- data/lib/mechanize/page/image.rb +0 -30
- data/lib/mechanize/page/label.rb +0 -20
- data/lib/mechanize/page/link.rb +0 -98
- data/lib/mechanize/page/meta_refresh.rb +0 -68
- data/lib/mechanize/page.rb +0 -440
- data/lib/mechanize/parser.rb +0 -173
- data/lib/mechanize/pluggable_parsers.rb +0 -144
- data/lib/mechanize/redirect_limit_reached_error.rb +0 -19
- data/lib/mechanize/redirect_not_get_or_head_error.rb +0 -21
- data/lib/mechanize/response_code_error.rb +0 -21
- data/lib/mechanize/response_read_error.rb +0 -27
- data/lib/mechanize/robots_disallowed_error.rb +0 -28
- data/lib/mechanize/test_case.rb +0 -663
- data/lib/mechanize/unauthorized_error.rb +0 -3
- data/lib/mechanize/unsupported_scheme_error.rb +0 -6
- data/lib/mechanize/util.rb +0 -101
- data/lib/mechanize.rb +0 -1079
- data/test/data/htpasswd +0 -1
- data/test/data/server.crt +0 -16
- data/test/data/server.csr +0 -12
- data/test/data/server.key +0 -15
- data/test/data/server.pem +0 -15
- data/test/htdocs/alt_text.html +0 -10
- data/test/htdocs/bad_form_test.html +0 -9
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/canonical_uri.html +0 -9
- data/test/htdocs/dir with spaces/foo.html +0 -1
- data/test/htdocs/empty_form.html +0 -6
- data/test/htdocs/file_upload.html +0 -26
- data/test/htdocs/find_link.html +0 -41
- data/test/htdocs/form_multi_select.html +0 -16
- data/test/htdocs/form_multival.html +0 -37
- data/test/htdocs/form_no_action.html +0 -18
- data/test/htdocs/form_no_input_name.html +0 -16
- data/test/htdocs/form_order_test.html +0 -11
- data/test/htdocs/form_select.html +0 -16
- data/test/htdocs/form_set_fields.html +0 -14
- data/test/htdocs/form_test.html +0 -188
- data/test/htdocs/frame_referer_test.html +0 -10
- data/test/htdocs/frame_test.html +0 -30
- data/test/htdocs/google.html +0 -13
- data/test/htdocs/index.html +0 -6
- data/test/htdocs/link with space.html +0 -5
- data/test/htdocs/meta_cookie.html +0 -11
- data/test/htdocs/no_title_test.html +0 -6
- data/test/htdocs/noindex.html +0 -9
- data/test/htdocs/rails_3_encoding_hack_form_test.html +0 -27
- data/test/htdocs/relative/tc_relative_links.html +0 -21
- data/test/htdocs/robots.html +0 -8
- data/test/htdocs/robots.txt +0 -2
- data/test/htdocs/tc_bad_charset.html +0 -9
- data/test/htdocs/tc_bad_links.html +0 -5
- data/test/htdocs/tc_base_link.html +0 -8
- data/test/htdocs/tc_blank_form.html +0 -11
- data/test/htdocs/tc_charset.html +0 -6
- data/test/htdocs/tc_checkboxes.html +0 -19
- data/test/htdocs/tc_encoded_links.html +0 -5
- data/test/htdocs/tc_field_precedence.html +0 -11
- data/test/htdocs/tc_follow_meta.html +0 -8
- data/test/htdocs/tc_form_action.html +0 -48
- data/test/htdocs/tc_links.html +0 -19
- data/test/htdocs/tc_meta_in_body.html +0 -9
- data/test/htdocs/tc_pretty_print.html +0 -17
- data/test/htdocs/tc_referer.html +0 -16
- data/test/htdocs/tc_relative_links.html +0 -19
- data/test/htdocs/tc_textarea.html +0 -23
- data/test/htdocs/test_click.html +0 -11
- data/test/htdocs/unusual______.html +0 -5
- data/test/test_mechanize.rb +0 -1164
- data/test/test_mechanize_cookie.rb +0 -451
- data/test/test_mechanize_cookie_jar.rb +0 -483
- data/test/test_mechanize_download.rb +0 -43
- data/test/test_mechanize_file.rb +0 -61
- data/test/test_mechanize_file_connection.rb +0 -21
- data/test/test_mechanize_file_request.rb +0 -19
- data/test/test_mechanize_file_saver.rb +0 -21
- data/test/test_mechanize_form.rb +0 -875
- data/test/test_mechanize_form_check_box.rb +0 -38
- data/test/test_mechanize_form_encoding.rb +0 -114
- data/test/test_mechanize_form_field.rb +0 -63
- data/test/test_mechanize_form_file_upload.rb +0 -20
- data/test/test_mechanize_form_image_button.rb +0 -12
- data/test/test_mechanize_form_keygen.rb +0 -32
- data/test/test_mechanize_form_multi_select_list.rb +0 -84
- data/test/test_mechanize_form_option.rb +0 -55
- data/test/test_mechanize_form_radio_button.rb +0 -78
- data/test/test_mechanize_form_select_list.rb +0 -76
- data/test/test_mechanize_form_textarea.rb +0 -52
- data/test/test_mechanize_headers.rb +0 -35
- data/test/test_mechanize_history.rb +0 -103
- data/test/test_mechanize_http_agent.rb +0 -1225
- data/test/test_mechanize_http_auth_challenge.rb +0 -39
- data/test/test_mechanize_http_auth_realm.rb +0 -49
- data/test/test_mechanize_http_content_disposition_parser.rb +0 -118
- data/test/test_mechanize_http_www_authenticate_parser.rb +0 -146
- data/test/test_mechanize_link.rb +0 -80
- data/test/test_mechanize_page.rb +0 -118
- data/test/test_mechanize_page_encoding.rb +0 -182
- data/test/test_mechanize_page_frame.rb +0 -16
- data/test/test_mechanize_page_link.rb +0 -390
- data/test/test_mechanize_page_meta_refresh.rb +0 -127
- data/test/test_mechanize_parser.rb +0 -289
- data/test/test_mechanize_pluggable_parser.rb +0 -52
- data/test/test_mechanize_redirect_limit_reached_error.rb +0 -24
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +0 -14
- data/test/test_mechanize_subclass.rb +0 -22
- data/test/test_mechanize_util.rb +0 -103
- data/test/test_multi_select.rb +0 -119
data/lib/mechanize/page/base.rb
DELETED
data/lib/mechanize/page/frame.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# A Frame object wrapse a frame HTML element. Frame objects can be treated
|
2
|
-
# just like Link objects. They contain #src, the #link they refer to and a
|
3
|
-
# #name, the name of the frame they refer to. #src and #name are aliased to
|
4
|
-
# #href and #text respectively so that a Frame object can be treated just like
|
5
|
-
# a Link.
|
6
|
-
|
7
|
-
class Mechanize::Page::Frame < Mechanize::Page::Link
|
8
|
-
|
9
|
-
alias :src :href
|
10
|
-
|
11
|
-
attr_reader :text
|
12
|
-
alias :name :text
|
13
|
-
|
14
|
-
def initialize(node, mech, referer)
|
15
|
-
super(node, mech, referer)
|
16
|
-
@node = node
|
17
|
-
@text = node['name']
|
18
|
-
@href = node['src']
|
19
|
-
@content = nil
|
20
|
-
end
|
21
|
-
|
22
|
-
def content
|
23
|
-
@content ||= @mech.get @href, [], page
|
24
|
-
end
|
25
|
-
|
26
|
-
end
|
27
|
-
|
data/lib/mechanize/page/image.rb
DELETED
@@ -1,30 +0,0 @@
|
|
1
|
-
##
|
2
|
-
# An image element on an HTML page
|
3
|
-
|
4
|
-
class Mechanize::Page::Image
|
5
|
-
attr_reader :node
|
6
|
-
attr_reader :page
|
7
|
-
|
8
|
-
def initialize(node, page)
|
9
|
-
@node = node
|
10
|
-
@page = page
|
11
|
-
end
|
12
|
-
|
13
|
-
def src
|
14
|
-
@node['src']
|
15
|
-
end
|
16
|
-
|
17
|
-
def url
|
18
|
-
case src
|
19
|
-
when %r{^https?://}
|
20
|
-
src
|
21
|
-
else
|
22
|
-
if page.bases[0]
|
23
|
-
(page.bases[0].href + src).to_s
|
24
|
-
else
|
25
|
-
(page.uri + src).to_s
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
data/lib/mechanize/page/label.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
##
|
2
|
-
# A form label on an HTML page
|
3
|
-
|
4
|
-
class Mechanize::Page::Label
|
5
|
-
attr_reader :node
|
6
|
-
attr_reader :text
|
7
|
-
attr_reader :page
|
8
|
-
alias :to_s :text
|
9
|
-
|
10
|
-
def initialize(node, page)
|
11
|
-
@node = node
|
12
|
-
@text = node.inner_text
|
13
|
-
@page = page
|
14
|
-
end
|
15
|
-
|
16
|
-
def for
|
17
|
-
(id = @node['for']) && page.search("##{id}") || nil
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
data/lib/mechanize/page/link.rb
DELETED
@@ -1,98 +0,0 @@
|
|
1
|
-
##
|
2
|
-
# This class encapsulates links. It contains the text and the URI for
|
3
|
-
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
4
|
-
# the alt text will be used for that image.
|
5
|
-
#
|
6
|
-
# For example, the text for the following links with both be 'Hello World':
|
7
|
-
#
|
8
|
-
# <a href="http://example">Hello World</a>
|
9
|
-
# <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
|
10
|
-
|
11
|
-
class Mechanize::Page::Link
|
12
|
-
attr_reader :node
|
13
|
-
attr_reader :href
|
14
|
-
attr_reader :attributes
|
15
|
-
attr_reader :page
|
16
|
-
alias :referer :page
|
17
|
-
|
18
|
-
def initialize(node, mech, page)
|
19
|
-
@node = node
|
20
|
-
@attributes = node
|
21
|
-
@href = node['href']
|
22
|
-
@mech = mech
|
23
|
-
@page = page
|
24
|
-
@text = nil
|
25
|
-
@uri = nil
|
26
|
-
end
|
27
|
-
|
28
|
-
# Click on this link
|
29
|
-
def click
|
30
|
-
@mech.click self
|
31
|
-
end
|
32
|
-
|
33
|
-
# This method is a shorthand to get link's DOM id.
|
34
|
-
# Common usage:
|
35
|
-
# page.link_with(:dom_id => "links_exact_id")
|
36
|
-
def dom_id
|
37
|
-
node['id']
|
38
|
-
end
|
39
|
-
|
40
|
-
# This method is a shorthand to get a link's DOM class
|
41
|
-
# Common usage:
|
42
|
-
# page.link_with(:dom_class => "links_exact_class")
|
43
|
-
def dom_class
|
44
|
-
node['class']
|
45
|
-
end
|
46
|
-
|
47
|
-
def pretty_print(q) # :nodoc:
|
48
|
-
q.object_group(self) {
|
49
|
-
q.breakable; q.pp text
|
50
|
-
q.breakable; q.pp href
|
51
|
-
}
|
52
|
-
end
|
53
|
-
|
54
|
-
alias inspect pretty_inspect # :nodoc:
|
55
|
-
|
56
|
-
# A list of words in the rel attribute, all lower-cased.
|
57
|
-
def rel
|
58
|
-
@rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : []
|
59
|
-
end
|
60
|
-
|
61
|
-
# Test if the rel attribute includes +kind+.
|
62
|
-
def rel? kind
|
63
|
-
rel.include? kind
|
64
|
-
end
|
65
|
-
|
66
|
-
# The text content of this link
|
67
|
-
def text
|
68
|
-
return @text if @text
|
69
|
-
|
70
|
-
@text = @node.inner_text
|
71
|
-
|
72
|
-
# If there is no text, try to find an image and use it's alt text
|
73
|
-
if (@text.nil? or @text.empty?) and imgs = @node.search('img') then
|
74
|
-
@text = imgs.map do |e|
|
75
|
-
e['alt']
|
76
|
-
end.join
|
77
|
-
end
|
78
|
-
|
79
|
-
@text
|
80
|
-
end
|
81
|
-
|
82
|
-
alias :to_s :text
|
83
|
-
|
84
|
-
# A URI for the #href for this link. The link is first parsed as a raw
|
85
|
-
# link. If that fails parsing an escaped link is attepmted.
|
86
|
-
|
87
|
-
def uri
|
88
|
-
@uri ||= if @href then
|
89
|
-
begin
|
90
|
-
URI.parse @href
|
91
|
-
rescue URI::InvalidURIError
|
92
|
-
URI.parse WEBrick::HTTPUtils.escape @href
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
end
|
98
|
-
|
@@ -1,68 +0,0 @@
|
|
1
|
-
##
|
2
|
-
# This class encapsulates a meta element with a refresh http-equiv. Mechanize
|
3
|
-
# treats meta refresh elements just like 'a' tags. MetaRefresh objects will
|
4
|
-
# contain links, but most likely will have no text.
|
5
|
-
|
6
|
-
class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
|
7
|
-
|
8
|
-
##
|
9
|
-
# Time to wait before next refresh
|
10
|
-
|
11
|
-
attr_reader :delay
|
12
|
-
|
13
|
-
##
|
14
|
-
# This MetaRefresh links did not contain a url= in the content attribute and
|
15
|
-
# links to itself.
|
16
|
-
|
17
|
-
attr_reader :link_self
|
18
|
-
|
19
|
-
##
|
20
|
-
# Matches the content attribute of a meta refresh element. After the match:
|
21
|
-
#
|
22
|
-
# $1:: delay
|
23
|
-
# $3:: url
|
24
|
-
|
25
|
-
CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
|
26
|
-
|
27
|
-
##
|
28
|
-
# Parses the delay and url from the content attribute of a meta refresh
|
29
|
-
# element. Parse requires the uri of the current page to infer a url when
|
30
|
-
# no url is specified.
|
31
|
-
#
|
32
|
-
# Returns an array of [delay, url]. (both in string)
|
33
|
-
#
|
34
|
-
# Returns nil if the delay and url cannot be parsed.
|
35
|
-
|
36
|
-
def self.parse content, base_uri
|
37
|
-
return unless content =~ CONTENT_REGEXP
|
38
|
-
|
39
|
-
link_self = $3.nil? || $3.empty?
|
40
|
-
delay, refresh_uri = $1, $3
|
41
|
-
|
42
|
-
dest = base_uri
|
43
|
-
dest += refresh_uri if refresh_uri
|
44
|
-
|
45
|
-
return delay, dest, link_self
|
46
|
-
end
|
47
|
-
|
48
|
-
def self.from_node node, page, uri
|
49
|
-
http_equiv = node['http-equiv']
|
50
|
-
return unless http_equiv and http_equiv.downcase == 'refresh'
|
51
|
-
|
52
|
-
delay, uri, link_self = parse node['content'], uri
|
53
|
-
|
54
|
-
return unless delay
|
55
|
-
|
56
|
-
new node, page, delay, uri.to_s, link_self
|
57
|
-
end
|
58
|
-
|
59
|
-
def initialize node, page, delay, href, link_self = false
|
60
|
-
super node, page.mech, page
|
61
|
-
|
62
|
-
@delay = delay =~ /\./ ? delay.to_f : delay.to_i
|
63
|
-
@href = href
|
64
|
-
@link_self = link_self
|
65
|
-
end
|
66
|
-
|
67
|
-
end
|
68
|
-
|
data/lib/mechanize/page.rb
DELETED
@@ -1,440 +0,0 @@
|
|
1
|
-
##
|
2
|
-
# This class encapsulates an HTML page. If Mechanize finds a content
|
3
|
-
# type of 'text/html', this class will be instantiated and returned.
|
4
|
-
#
|
5
|
-
# Example:
|
6
|
-
#
|
7
|
-
# require 'mechanize'
|
8
|
-
#
|
9
|
-
# agent = Mechanize.new
|
10
|
-
# agent.get('http://google.com/').class # => Mechanize::Page
|
11
|
-
|
12
|
-
class Mechanize::Page < Mechanize::File
|
13
|
-
extend Forwardable
|
14
|
-
extend Mechanize::ElementMatcher
|
15
|
-
|
16
|
-
attr_accessor :mech
|
17
|
-
|
18
|
-
##
|
19
|
-
# Possible encodings for this page based on HTTP headers and meta elements
|
20
|
-
|
21
|
-
attr_reader :encodings
|
22
|
-
|
23
|
-
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
|
24
|
-
raise Mechanize::ContentTypeError, response['content-type'] unless
|
25
|
-
response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
|
26
|
-
|
27
|
-
@meta_content_type = nil
|
28
|
-
@encoding = nil
|
29
|
-
@encodings = [nil]
|
30
|
-
raise 'no' if mech and not Mechanize === mech
|
31
|
-
@mech = mech
|
32
|
-
|
33
|
-
reset
|
34
|
-
|
35
|
-
@encodings << Mechanize::Util.detect_charset(body) if body
|
36
|
-
|
37
|
-
@encodings.concat self.class.response_header_charset(response)
|
38
|
-
|
39
|
-
if body
|
40
|
-
# Force the encoding to be 8BIT so we can perform regular expressions.
|
41
|
-
# We'll set it to the detected encoding later
|
42
|
-
body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding
|
43
|
-
|
44
|
-
@encodings.concat self.class.meta_charset body
|
45
|
-
|
46
|
-
meta_content_type = self.class.meta_content_type body
|
47
|
-
@meta_content_type = meta_content_type if meta_content_type
|
48
|
-
end
|
49
|
-
|
50
|
-
@encodings << mech.default_encoding if mech and mech.default_encoding
|
51
|
-
|
52
|
-
super uri, response, body, code
|
53
|
-
end
|
54
|
-
|
55
|
-
def title
|
56
|
-
@title ||=
|
57
|
-
if doc = parser
|
58
|
-
title = doc.search('title').inner_text
|
59
|
-
title.empty? ? nil : title
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
def response_header_charset
|
64
|
-
self.class.response_header_charset(response)
|
65
|
-
end
|
66
|
-
|
67
|
-
def meta_charset
|
68
|
-
self.class.meta_charset(body)
|
69
|
-
end
|
70
|
-
|
71
|
-
def detected_encoding
|
72
|
-
Mechanize::Util.detect_charset(body)
|
73
|
-
end
|
74
|
-
|
75
|
-
def encoding=(encoding)
|
76
|
-
reset
|
77
|
-
|
78
|
-
@encoding = encoding
|
79
|
-
|
80
|
-
if @parser
|
81
|
-
parser_encoding = @parser.encoding
|
82
|
-
if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
|
83
|
-
# lazy reinitialize the parser with the new encoding
|
84
|
-
@parser = nil
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
encoding
|
89
|
-
end
|
90
|
-
|
91
|
-
def encoding
|
92
|
-
parser.respond_to?(:encoding) ? parser.encoding : nil
|
93
|
-
end
|
94
|
-
|
95
|
-
# Return whether parser result has errors related to encoding or not.
|
96
|
-
# false indicates just parser has no encoding errors, not encoding is vaild.
|
97
|
-
def encoding_error?(parser=nil)
|
98
|
-
parser = self.parser unless parser
|
99
|
-
return false if parser.errors.empty?
|
100
|
-
parser.errors.any? do |error|
|
101
|
-
error.message =~ /(indicate\ encoding)|
|
102
|
-
(Invalid\ char)|
|
103
|
-
(input\ conversion\ failed)/x
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
def parser
|
108
|
-
return @parser if @parser
|
109
|
-
return nil unless @body
|
110
|
-
|
111
|
-
if @encoding then
|
112
|
-
@parser = @mech.html_parser.parse html_body, nil, @encoding
|
113
|
-
elsif mech.force_default_encoding then
|
114
|
-
@parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
|
115
|
-
else
|
116
|
-
@encodings.reverse_each do |encoding|
|
117
|
-
@parser = @mech.html_parser.parse html_body, nil, encoding
|
118
|
-
|
119
|
-
break unless encoding_error? @parser
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
@parser
|
124
|
-
end
|
125
|
-
|
126
|
-
alias :root :parser
|
127
|
-
|
128
|
-
def pretty_print(q) # :nodoc:
|
129
|
-
q.object_group(self) {
|
130
|
-
q.breakable
|
131
|
-
q.group(1, '{url', '}') {q.breakable; q.pp uri }
|
132
|
-
q.breakable
|
133
|
-
q.group(1, '{meta_refresh', '}') {
|
134
|
-
meta_refresh.each { |link| q.breakable; q.pp link }
|
135
|
-
}
|
136
|
-
q.breakable
|
137
|
-
q.group(1, '{title', '}') { q.breakable; q.pp title }
|
138
|
-
q.breakable
|
139
|
-
q.group(1, '{iframes', '}') {
|
140
|
-
iframes.each { |link| q.breakable; q.pp link }
|
141
|
-
}
|
142
|
-
q.breakable
|
143
|
-
q.group(1, '{frames', '}') {
|
144
|
-
frames.each { |link| q.breakable; q.pp link }
|
145
|
-
}
|
146
|
-
q.breakable
|
147
|
-
q.group(1, '{links', '}') {
|
148
|
-
links.each { |link| q.breakable; q.pp link }
|
149
|
-
}
|
150
|
-
q.breakable
|
151
|
-
q.group(1, '{forms', '}') {
|
152
|
-
forms.each { |form| q.breakable; q.pp form }
|
153
|
-
}
|
154
|
-
}
|
155
|
-
end
|
156
|
-
|
157
|
-
alias inspect pretty_inspect # :nodoc:
|
158
|
-
|
159
|
-
def reset
|
160
|
-
@bases = nil
|
161
|
-
@forms = nil
|
162
|
-
@frames = nil
|
163
|
-
@iframes = nil
|
164
|
-
@links = nil
|
165
|
-
@labels = nil
|
166
|
-
@labels_hash = nil
|
167
|
-
@meta_refresh = nil
|
168
|
-
@parser = nil
|
169
|
-
@title = nil
|
170
|
-
end
|
171
|
-
|
172
|
-
# Return the canonical URI for the page if there is a link tag
|
173
|
-
# with href="canonical".
|
174
|
-
def canonical_uri
|
175
|
-
link = at('link[@rel="canonical"][@href]')
|
176
|
-
return unless link
|
177
|
-
href = link['href']
|
178
|
-
|
179
|
-
URI href
|
180
|
-
rescue URI::InvalidURIError
|
181
|
-
URI Mechanize::Util.uri_escape href
|
182
|
-
end
|
183
|
-
|
184
|
-
# Get the content type
|
185
|
-
def content_type
|
186
|
-
@meta_content_type || response['content-type']
|
187
|
-
end
|
188
|
-
|
189
|
-
# Search through the page like HPricot
|
190
|
-
def_delegator :parser, :search, :search
|
191
|
-
def_delegator :parser, :/, :/
|
192
|
-
def_delegator :parser, :at, :at
|
193
|
-
|
194
|
-
##
|
195
|
-
# :method: form_with(criteria)
|
196
|
-
#
|
197
|
-
# Find a single form matching +criteria+.
|
198
|
-
# Example:
|
199
|
-
# page.form_with(:action => '/post/login.php') do |f|
|
200
|
-
# ...
|
201
|
-
# end
|
202
|
-
|
203
|
-
##
|
204
|
-
# :method: forms_with(criteria)
|
205
|
-
#
|
206
|
-
# Find all forms form matching +criteria+.
|
207
|
-
# Example:
|
208
|
-
# page.forms_with(:action => '/post/login.php').each do |f|
|
209
|
-
# ...
|
210
|
-
# end
|
211
|
-
|
212
|
-
elements_with :form
|
213
|
-
|
214
|
-
##
|
215
|
-
# :method: link_with(criteria)
|
216
|
-
#
|
217
|
-
# Find a single link matching +criteria+.
|
218
|
-
# Example:
|
219
|
-
# page.link_with(:href => /foo/).click
|
220
|
-
|
221
|
-
##
|
222
|
-
# :method: links_with(criteria)
|
223
|
-
#
|
224
|
-
# Find all links matching +criteria+.
|
225
|
-
# Example:
|
226
|
-
# page.links_with(:href => /foo/).each do |link|
|
227
|
-
# puts link.href
|
228
|
-
# end
|
229
|
-
|
230
|
-
elements_with :link
|
231
|
-
|
232
|
-
##
|
233
|
-
# :method: base_with(criteria)
|
234
|
-
#
|
235
|
-
# Find a single base tag matching +criteria+.
|
236
|
-
# Example:
|
237
|
-
# page.base_with(:href => /foo/).click
|
238
|
-
|
239
|
-
##
|
240
|
-
# :method: bases_with(criteria)
|
241
|
-
#
|
242
|
-
# Find all base tags matching +criteria+.
|
243
|
-
# Example:
|
244
|
-
# page.bases_with(:href => /foo/).each do |base|
|
245
|
-
# puts base.href
|
246
|
-
# end
|
247
|
-
|
248
|
-
elements_with :base
|
249
|
-
|
250
|
-
##
|
251
|
-
# :method: frame_with(criteria)
|
252
|
-
#
|
253
|
-
# Find a single frame tag matching +criteria+.
|
254
|
-
# Example:
|
255
|
-
# page.frame_with(:src => /foo/).click
|
256
|
-
|
257
|
-
##
|
258
|
-
# :method: frames_with(criteria)
|
259
|
-
#
|
260
|
-
# Find all frame tags matching +criteria+.
|
261
|
-
# Example:
|
262
|
-
# page.frames_with(:src => /foo/).each do |frame|
|
263
|
-
# p frame.src
|
264
|
-
# end
|
265
|
-
|
266
|
-
elements_with :frame
|
267
|
-
|
268
|
-
##
|
269
|
-
# :method: iframe_with(criteria)
|
270
|
-
#
|
271
|
-
# Find a single iframe tag matching +criteria+.
|
272
|
-
# Example:
|
273
|
-
# page.iframe_with(:src => /foo/).click
|
274
|
-
|
275
|
-
##
|
276
|
-
# :method: iframes_with(criteria)
|
277
|
-
#
|
278
|
-
# Find all iframe tags matching +criteria+.
|
279
|
-
# Example:
|
280
|
-
# page.iframes_with(:src => /foo/).each do |iframe|
|
281
|
-
# p iframe.src
|
282
|
-
# end
|
283
|
-
|
284
|
-
elements_with :iframe
|
285
|
-
|
286
|
-
##
|
287
|
-
# Return a list of all link and area tags
|
288
|
-
def links
|
289
|
-
@links ||= %w{ a area }.map do |tag|
|
290
|
-
search(tag).map do |node|
|
291
|
-
Link.new(node, @mech, self)
|
292
|
-
end
|
293
|
-
end.flatten
|
294
|
-
end
|
295
|
-
|
296
|
-
##
|
297
|
-
# Return a list of all form tags
|
298
|
-
def forms
|
299
|
-
@forms ||= search('form').map do |html_form|
|
300
|
-
form = Mechanize::Form.new(html_form, @mech, self)
|
301
|
-
form.action ||= @uri.to_s
|
302
|
-
form
|
303
|
-
end
|
304
|
-
end
|
305
|
-
|
306
|
-
##
|
307
|
-
# Return a list of all meta refresh elements
|
308
|
-
|
309
|
-
def meta_refresh
|
310
|
-
query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'
|
311
|
-
|
312
|
-
@meta_refresh ||= search(query).map do |node|
|
313
|
-
MetaRefresh.from_node node, self, uri
|
314
|
-
end.compact
|
315
|
-
end
|
316
|
-
|
317
|
-
##
|
318
|
-
# Return a list of all base tags
|
319
|
-
def bases
|
320
|
-
@bases ||=
|
321
|
-
search('base').map { |node| Base.new(node, @mech, self) }
|
322
|
-
end
|
323
|
-
|
324
|
-
##
|
325
|
-
# Return a list of all frame tags
|
326
|
-
def frames
|
327
|
-
@frames ||=
|
328
|
-
search('frame').map { |node| Frame.new(node, @mech, self) }
|
329
|
-
end
|
330
|
-
|
331
|
-
##
|
332
|
-
# Return a list of all iframe tags
|
333
|
-
def iframes
|
334
|
-
@iframes ||=
|
335
|
-
search('iframe').map { |node| Frame.new(node, @mech, self) }
|
336
|
-
end
|
337
|
-
|
338
|
-
##
|
339
|
-
# Return a list of all img tags
|
340
|
-
def images
|
341
|
-
@images ||=
|
342
|
-
search('img').map { |node| Image.new(node, self) }
|
343
|
-
end
|
344
|
-
|
345
|
-
def image_urls
|
346
|
-
@image_urls ||= images.map(&:url).uniq
|
347
|
-
end
|
348
|
-
|
349
|
-
##
|
350
|
-
# Return a list of all label tags
|
351
|
-
def labels
|
352
|
-
@labels ||=
|
353
|
-
search('label').map { |node| Label.new(node, self) }
|
354
|
-
end
|
355
|
-
|
356
|
-
def labels_hash
|
357
|
-
unless @labels_hash
|
358
|
-
hash = {}
|
359
|
-
labels.each do |label|
|
360
|
-
hash[label.node['for']] = label if label.for
|
361
|
-
end
|
362
|
-
@labels_hash = hash
|
363
|
-
end
|
364
|
-
return @labels_hash
|
365
|
-
end
|
366
|
-
|
367
|
-
def self.charset content_type
|
368
|
-
charset = content_type[/charset=([^; ]+)/i, 1]
|
369
|
-
return nil if charset == 'none'
|
370
|
-
charset
|
371
|
-
end
|
372
|
-
|
373
|
-
def self.response_header_charset response
|
374
|
-
charsets = []
|
375
|
-
response.each do |header, value|
|
376
|
-
next unless header == 'content-type'
|
377
|
-
next unless value =~ /charset/i
|
378
|
-
charsets << charset(value)
|
379
|
-
end
|
380
|
-
charsets
|
381
|
-
end
|
382
|
-
|
383
|
-
##
|
384
|
-
# Retrieves all charsets from +meta+ tags in +body+
|
385
|
-
|
386
|
-
def self.meta_charset body
|
387
|
-
# HACK use .map
|
388
|
-
body.scan(/<meta .*?>/i).map do |meta|
|
389
|
-
if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then
|
390
|
-
$2
|
391
|
-
elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
|
392
|
-
meta =~ /content\s*=\s*(["'])?(.*?)\1/i
|
393
|
-
|
394
|
-
m_charset = charset $2 if $2
|
395
|
-
|
396
|
-
m_charset if m_charset
|
397
|
-
end
|
398
|
-
end.compact
|
399
|
-
end
|
400
|
-
|
401
|
-
##
|
402
|
-
# Retrieves the last <tt>content-type</tt> set by a +meta+ tag in +body+
|
403
|
-
|
404
|
-
def self.meta_content_type body
|
405
|
-
body.scan(/<meta .*?>/i).reverse.map do |meta|
|
406
|
-
if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
|
407
|
-
meta =~ /content=(["'])?(.*?)\1/i
|
408
|
-
|
409
|
-
return $2
|
410
|
-
end
|
411
|
-
end
|
412
|
-
|
413
|
-
nil
|
414
|
-
end
|
415
|
-
|
416
|
-
private
|
417
|
-
|
418
|
-
def html_body
|
419
|
-
if @body
|
420
|
-
@body.empty? ? '<html></html>' : @body
|
421
|
-
else
|
422
|
-
''
|
423
|
-
end
|
424
|
-
end
|
425
|
-
|
426
|
-
def self.charset_from_content_type content_type
|
427
|
-
charset = content_type[/charset=([^; ]+)/i, 1]
|
428
|
-
return nil if charset == 'none'
|
429
|
-
charset
|
430
|
-
end
|
431
|
-
end
|
432
|
-
|
433
|
-
require 'mechanize/headers'
|
434
|
-
require 'mechanize/page/image'
|
435
|
-
require 'mechanize/page/label'
|
436
|
-
require 'mechanize/page/link'
|
437
|
-
require 'mechanize/page/base'
|
438
|
-
require 'mechanize/page/frame'
|
439
|
-
require 'mechanize/page/meta_refresh'
|
440
|
-
|