neocoin-mechanize 2.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +6 -0
- data/.gemtest +0 -0
- data/CHANGELOG.rdoc +638 -0
- data/EXAMPLES.rdoc +187 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +163 -0
- data/LICENSE.rdoc +20 -0
- data/Manifest.txt +172 -0
- data/README.rdoc +63 -0
- data/Rakefile +36 -0
- data/examples/flickr_upload.rb +22 -0
- data/examples/mech-dump.rb +5 -0
- data/examples/proxy_req.rb +7 -0
- data/examples/rubyforge.rb +20 -0
- data/examples/spider.rb +21 -0
- data/lib/mechanize.rb +662 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +85 -0
- data/lib/mechanize/cookie_jar.rb +241 -0
- data/lib/mechanize/element_matcher.rb +35 -0
- data/lib/mechanize/file.rb +80 -0
- data/lib/mechanize/file_connection.rb +17 -0
- data/lib/mechanize/file_request.rb +26 -0
- data/lib/mechanize/file_response.rb +74 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +478 -0
- data/lib/mechanize/form/button.rb +9 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +44 -0
- data/lib/mechanize/form/file_upload.rb +23 -0
- data/lib/mechanize/form/image_button.rb +20 -0
- data/lib/mechanize/form/multi_select_list.rb +83 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +48 -0
- data/lib/mechanize/form/select_list.rb +40 -0
- data/lib/mechanize/headers.rb +25 -0
- data/lib/mechanize/history.rb +83 -0
- data/lib/mechanize/http.rb +3 -0
- data/lib/mechanize/http/agent.rb +738 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/mechanize/monkey_patch.rb +37 -0
- data/lib/mechanize/page.rb +408 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/frame.rb +27 -0
- data/lib/mechanize/page/image.rb +30 -0
- data/lib/mechanize/page/label.rb +20 -0
- data/lib/mechanize/page/link.rb +82 -0
- data/lib/mechanize/page/meta_refresh.rb +56 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/response_read_error.rb +27 -0
- data/lib/mechanize/robots_disallowed_error.rb +29 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +113 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +175 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/canonical_uri.html +9 -0
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_referer_test.html +10 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/nofollow.html +9 -0
- data/test/htdocs/noindex.html +9 -0
- data/test/htdocs/norobots.html +8 -0
- data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
- data/test/htdocs/rel_nofollow.html +8 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/robots.html +8 -0
- data/test/htdocs/robots.txt +2 -0
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_images.html +10 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_field_precedence.html +11 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_images.html +8 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_meta_in_body.html +9 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +16 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/htdocs/test_click.html +11 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +402 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_cookies.rb +129 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +59 -0
- data/test/test_form_button.rb +46 -0
- data/test/test_frames.rb +34 -0
- data/test/test_headers.rb +33 -0
- data/test/test_history.rb +118 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +46 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_images.rb +19 -0
- data/test/test_mechanize.rb +842 -0
- data/test/test_mechanize_cookie.rb +345 -0
- data/test/test_mechanize_cookie_jar.rb +401 -0
- data/test/test_mechanize_file.rb +53 -0
- data/test/test_mechanize_file_request.rb +19 -0
- data/test/test_mechanize_file_response.rb +21 -0
- data/test/test_mechanize_form.rb +576 -0
- data/test/test_mechanize_form_check_box.rb +37 -0
- data/test/test_mechanize_form_encoding.rb +120 -0
- data/test/test_mechanize_form_field.rb +21 -0
- data/test/test_mechanize_form_image_button.rb +12 -0
- data/test/test_mechanize_form_textarea.rb +51 -0
- data/test/test_mechanize_http_agent.rb +697 -0
- data/test/test_mechanize_link.rb +84 -0
- data/test/test_mechanize_page_encoding.rb +147 -0
- data/test/test_mechanize_page_link.rb +382 -0
- data/test/test_mechanize_page_meta_refresh.rb +115 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
- data/test/test_mechanize_subclass.rb +22 -0
- data/test/test_mechanize_util.rb +92 -0
- data/test/test_multi_select.rb +118 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_pluggable_parser.rb +136 -0
- data/test/test_post_form.rb +37 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +39 -0
- data/test/test_referer.rb +81 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +53 -0
- data/test/test_robots.rb +72 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +119 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +18 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- metadata +354 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
require 'pp'
|
|
2
|
+
|
|
3
|
+
# :stopdoc:
|
|
4
|
+
class Mechanize
|
|
5
|
+
def pretty_print(q)
|
|
6
|
+
q.object_group(self) {
|
|
7
|
+
q.breakable
|
|
8
|
+
q.pp cookie_jar
|
|
9
|
+
q.breakable
|
|
10
|
+
q.pp current_page
|
|
11
|
+
}
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
class Page
|
|
15
|
+
def pretty_print(q)
|
|
16
|
+
q.object_group(self) {
|
|
17
|
+
q.breakable
|
|
18
|
+
q.group(1, '{url', '}') {q.breakable; q.pp uri }
|
|
19
|
+
q.breakable
|
|
20
|
+
q.group(1, '{meta_refresh', '}') {
|
|
21
|
+
meta_refresh.each { |link| q.breakable; q.pp link }
|
|
22
|
+
}
|
|
23
|
+
q.breakable
|
|
24
|
+
q.group(1, '{title', '}') { q.breakable; q.pp title }
|
|
25
|
+
q.breakable
|
|
26
|
+
q.group(1, '{iframes', '}') {
|
|
27
|
+
iframes.each { |link| q.breakable; q.pp link }
|
|
28
|
+
}
|
|
29
|
+
q.breakable
|
|
30
|
+
q.group(1, '{frames', '}') {
|
|
31
|
+
frames.each { |link| q.breakable; q.pp link }
|
|
32
|
+
}
|
|
33
|
+
q.breakable
|
|
34
|
+
q.group(1, '{links', '}') {
|
|
35
|
+
links.each { |link| q.breakable; q.pp link }
|
|
36
|
+
}
|
|
37
|
+
q.breakable
|
|
38
|
+
q.group(1, '{forms', '}') {
|
|
39
|
+
forms.each { |form| q.breakable; q.pp form }
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
class Link
|
|
45
|
+
def pretty_print(q)
|
|
46
|
+
q.object_group(self) {
|
|
47
|
+
q.breakable; q.pp text
|
|
48
|
+
q.breakable; q.pp href
|
|
49
|
+
}
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
class Form
|
|
55
|
+
def pretty_print(q)
|
|
56
|
+
q.object_group(self) {
|
|
57
|
+
q.breakable; q.group(1, '{name', '}') { q.breakable; q.pp name }
|
|
58
|
+
q.breakable; q.group(1, '{method', '}') { q.breakable; q.pp method }
|
|
59
|
+
q.breakable; q.group(1, '{action', '}') { q.breakable; q.pp action }
|
|
60
|
+
q.breakable; q.group(1, '{fields', '}') {
|
|
61
|
+
fields.each do |field|
|
|
62
|
+
q.breakable
|
|
63
|
+
q.pp field
|
|
64
|
+
end
|
|
65
|
+
}
|
|
66
|
+
q.breakable; q.group(1, '{radiobuttons', '}') {
|
|
67
|
+
radiobuttons.each { |b| q.breakable; q.pp b }
|
|
68
|
+
}
|
|
69
|
+
q.breakable; q.group(1, '{checkboxes', '}') {
|
|
70
|
+
checkboxes.each { |b| q.breakable; q.pp b }
|
|
71
|
+
}
|
|
72
|
+
q.breakable; q.group(1, '{file_uploads', '}') {
|
|
73
|
+
file_uploads.each { |b| q.breakable; q.pp b }
|
|
74
|
+
}
|
|
75
|
+
q.breakable; q.group(1, '{buttons', '}') {
|
|
76
|
+
buttons.each { |b| q.breakable; q.pp b }
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
class RadioButton
|
|
82
|
+
def pretty_print_instance_variables
|
|
83
|
+
[:@checked, :@name, :@value]
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
# :startdoc:
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
if RUBY_VERSION < '1.9' then
|
|
2
|
+
module Net
|
|
3
|
+
class HTTP
|
|
4
|
+
alias :old_keep_alive? :keep_alive?
|
|
5
|
+
def keep_alive?(req, res)
|
|
6
|
+
return false if /close/i =~ req['connection'].to_s
|
|
7
|
+
return false if @seems_1_0_server
|
|
8
|
+
return false if /close/i =~ res['connection'].to_s
|
|
9
|
+
return true if /keep-alive/i =~ res['connection'].to_s
|
|
10
|
+
return false if /close/i =~ res['proxy-connection'].to_s
|
|
11
|
+
return true if /keep-alive/i =~ res['proxy-connection'].to_s
|
|
12
|
+
(@curr_http_version == '1.1')
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Monkey patch for ruby 1.8.4
|
|
19
|
+
unless RUBY_VERSION > "1.8.4"
|
|
20
|
+
module Net # :nodoc:
|
|
21
|
+
class HTTPResponse # :nodoc:
|
|
22
|
+
CODE_TO_OBJ['500'] = HTTPInternalServerError
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
else
|
|
26
|
+
class Mechanize
|
|
27
|
+
class Form
|
|
28
|
+
alias :inspect :pretty_inspect
|
|
29
|
+
end
|
|
30
|
+
class Page
|
|
31
|
+
alias :inspect :pretty_inspect
|
|
32
|
+
class Link
|
|
33
|
+
alias :inspect :pretty_inspect
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
##
|
|
2
|
+
# This class encapsulates an HTML page. If Mechanize finds a content
|
|
3
|
+
# type of 'text/html', this class will be instantiated and returned.
|
|
4
|
+
#
|
|
5
|
+
# Example:
|
|
6
|
+
#
|
|
7
|
+
# require 'mechanize'
|
|
8
|
+
#
|
|
9
|
+
# agent = Mechanize.new
|
|
10
|
+
# agent.get('http://google.com/').class # => Mechanize::Page
|
|
11
|
+
|
|
12
|
+
class Mechanize::Page < Mechanize::File
|
|
13
|
+
extend Forwardable
|
|
14
|
+
extend Mechanize::ElementMatcher
|
|
15
|
+
|
|
16
|
+
attr_accessor :mech
|
|
17
|
+
|
|
18
|
+
##
|
|
19
|
+
# Possible encodings for this page based on HTTP headers and meta elements
|
|
20
|
+
|
|
21
|
+
attr_reader :encodings
|
|
22
|
+
|
|
23
|
+
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
|
|
24
|
+
raise Mechanize::ContentTypeError, response['content-type'] unless
|
|
25
|
+
response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
|
|
26
|
+
|
|
27
|
+
@meta_content_type = nil
|
|
28
|
+
@encoding = nil
|
|
29
|
+
@encodings = [nil]
|
|
30
|
+
raise 'no' if mech and not Mechanize === mech
|
|
31
|
+
@mech = mech
|
|
32
|
+
|
|
33
|
+
reset
|
|
34
|
+
|
|
35
|
+
@encodings << Mechanize::Util.detect_charset(body) if body
|
|
36
|
+
|
|
37
|
+
@encodings.concat self.class.response_header_charset(response)
|
|
38
|
+
|
|
39
|
+
if body
|
|
40
|
+
# Force the encoding to be 8BIT so we can perform regular expressions.
|
|
41
|
+
# We'll set it to the detected encoding later
|
|
42
|
+
body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding
|
|
43
|
+
|
|
44
|
+
@encodings.concat self.class.meta_charset body
|
|
45
|
+
|
|
46
|
+
meta_content_type = self.class.meta_content_type body
|
|
47
|
+
@meta_content_type = meta_content_type if meta_content_type
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
@encodings << mech.default_encoding if mech and mech.default_encoding
|
|
51
|
+
|
|
52
|
+
super uri, response, body, code
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def title
|
|
56
|
+
@title ||=
|
|
57
|
+
if doc = parser
|
|
58
|
+
title = doc.search('title').inner_text
|
|
59
|
+
title.empty? ? nil : title
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def response_header_charset
|
|
64
|
+
self.class.response_header_charset(response)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def meta_charset
|
|
68
|
+
self.class.meta_charset(body)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def detected_encoding
|
|
72
|
+
Mechanize::Util.detect_charset(body)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def encoding=(encoding)
|
|
76
|
+
reset
|
|
77
|
+
|
|
78
|
+
@encoding = encoding
|
|
79
|
+
|
|
80
|
+
if @parser
|
|
81
|
+
parser_encoding = @parser.encoding
|
|
82
|
+
if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
|
|
83
|
+
# lazy reinitialize the parser with the new encoding
|
|
84
|
+
@parser = nil
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
encoding
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def encoding
|
|
92
|
+
parser.respond_to?(:encoding) ? parser.encoding : nil
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Return whether parser result has errors related to encoding or not.
|
|
96
|
+
# false indicates just parser has no encoding errors, not encoding is vaild.
|
|
97
|
+
def encoding_error?(parser=nil)
|
|
98
|
+
parser = self.parser unless parser
|
|
99
|
+
return false if parser.errors.empty?
|
|
100
|
+
parser.errors.any? do |error|
|
|
101
|
+
error.message =~ /(indicate\ encoding)|
|
|
102
|
+
(Invalid\ char)|
|
|
103
|
+
(input\ conversion\ failed)/x
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def parser
|
|
108
|
+
return @parser if @parser
|
|
109
|
+
return nil unless @body
|
|
110
|
+
|
|
111
|
+
if @encoding then
|
|
112
|
+
@parser = @mech.html_parser.parse html_body, nil, @encoding
|
|
113
|
+
elsif mech.force_default_encoding then
|
|
114
|
+
@parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
|
|
115
|
+
else
|
|
116
|
+
@encodings.reverse_each do |encoding|
|
|
117
|
+
@parser = @mech.html_parser.parse html_body, nil, encoding
|
|
118
|
+
|
|
119
|
+
break unless encoding_error? @parser
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
@parser
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
alias :root :parser
|
|
127
|
+
|
|
128
|
+
def reset
|
|
129
|
+
@bases = nil
|
|
130
|
+
@forms = nil
|
|
131
|
+
@frames = nil
|
|
132
|
+
@iframes = nil
|
|
133
|
+
@links = nil
|
|
134
|
+
@labels = nil
|
|
135
|
+
@labels_hash = nil
|
|
136
|
+
@meta_refresh = nil
|
|
137
|
+
@parser = nil
|
|
138
|
+
@title = nil
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Return the canonical URI for the page if there is a link tag
|
|
142
|
+
# with href="canonical".
|
|
143
|
+
def canonical_uri
|
|
144
|
+
link = at('link[@rel="canonical"][@href]')
|
|
145
|
+
return unless link
|
|
146
|
+
href = link['href']
|
|
147
|
+
|
|
148
|
+
URI href
|
|
149
|
+
rescue URI::InvalidURIError
|
|
150
|
+
URI Mechanize::Util.uri_escape href
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Get the content type
|
|
154
|
+
def content_type
|
|
155
|
+
@meta_content_type || response['content-type']
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Search through the page like HPricot
|
|
159
|
+
def_delegator :parser, :search, :search
|
|
160
|
+
def_delegator :parser, :/, :/
|
|
161
|
+
def_delegator :parser, :at, :at
|
|
162
|
+
|
|
163
|
+
##
|
|
164
|
+
# :method: form_with(criteria)
|
|
165
|
+
#
|
|
166
|
+
# Find a single form matching +criteria+.
|
|
167
|
+
# Example:
|
|
168
|
+
# page.form_with(:action => '/post/login.php') do |f|
|
|
169
|
+
# ...
|
|
170
|
+
# end
|
|
171
|
+
|
|
172
|
+
##
|
|
173
|
+
# :method: forms_with(criteria)
|
|
174
|
+
#
|
|
175
|
+
# Find all forms form matching +criteria+.
|
|
176
|
+
# Example:
|
|
177
|
+
# page.forms_with(:action => '/post/login.php').each do |f|
|
|
178
|
+
# ...
|
|
179
|
+
# end
|
|
180
|
+
|
|
181
|
+
elements_with :form
|
|
182
|
+
|
|
183
|
+
##
|
|
184
|
+
# :method: link_with(criteria)
|
|
185
|
+
#
|
|
186
|
+
# Find a single link matching +criteria+.
|
|
187
|
+
# Example:
|
|
188
|
+
# page.link_with(:href => /foo/).click
|
|
189
|
+
|
|
190
|
+
##
|
|
191
|
+
# :method: links_with(criteria)
|
|
192
|
+
#
|
|
193
|
+
# Find all links matching +criteria+.
|
|
194
|
+
# Example:
|
|
195
|
+
# page.links_with(:href => /foo/).each do |link|
|
|
196
|
+
# puts link.href
|
|
197
|
+
# end
|
|
198
|
+
|
|
199
|
+
elements_with :link
|
|
200
|
+
|
|
201
|
+
##
|
|
202
|
+
# :method: base_with(criteria)
|
|
203
|
+
#
|
|
204
|
+
# Find a single base tag matching +criteria+.
|
|
205
|
+
# Example:
|
|
206
|
+
# page.base_with(:href => /foo/).click
|
|
207
|
+
|
|
208
|
+
##
|
|
209
|
+
# :method: bases_with(criteria)
|
|
210
|
+
#
|
|
211
|
+
# Find all base tags matching +criteria+.
|
|
212
|
+
# Example:
|
|
213
|
+
# page.bases_with(:href => /foo/).each do |base|
|
|
214
|
+
# puts base.href
|
|
215
|
+
# end
|
|
216
|
+
|
|
217
|
+
elements_with :base
|
|
218
|
+
|
|
219
|
+
##
|
|
220
|
+
# :method: frame_with(criteria)
|
|
221
|
+
#
|
|
222
|
+
# Find a single frame tag matching +criteria+.
|
|
223
|
+
# Example:
|
|
224
|
+
# page.frame_with(:src => /foo/).click
|
|
225
|
+
|
|
226
|
+
##
|
|
227
|
+
# :method: frames_with(criteria)
|
|
228
|
+
#
|
|
229
|
+
# Find all frame tags matching +criteria+.
|
|
230
|
+
# Example:
|
|
231
|
+
# page.frames_with(:src => /foo/).each do |frame|
|
|
232
|
+
# p frame.src
|
|
233
|
+
# end
|
|
234
|
+
|
|
235
|
+
elements_with :frame
|
|
236
|
+
|
|
237
|
+
##
|
|
238
|
+
# :method: iframe_with(criteria)
|
|
239
|
+
#
|
|
240
|
+
# Find a single iframe tag matching +criteria+.
|
|
241
|
+
# Example:
|
|
242
|
+
# page.iframe_with(:src => /foo/).click
|
|
243
|
+
|
|
244
|
+
##
|
|
245
|
+
# :method: iframes_with(criteria)
|
|
246
|
+
#
|
|
247
|
+
# Find all iframe tags matching +criteria+.
|
|
248
|
+
# Example:
|
|
249
|
+
# page.iframes_with(:src => /foo/).each do |iframe|
|
|
250
|
+
# p iframe.src
|
|
251
|
+
# end
|
|
252
|
+
|
|
253
|
+
elements_with :iframe
|
|
254
|
+
|
|
255
|
+
##
|
|
256
|
+
# Return a list of all link and area tags
|
|
257
|
+
def links
|
|
258
|
+
@links ||= %w{ a area }.map do |tag|
|
|
259
|
+
search(tag).map do |node|
|
|
260
|
+
Link.new(node, @mech, self)
|
|
261
|
+
end
|
|
262
|
+
end.flatten
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
##
|
|
266
|
+
# Return a list of all form tags
|
|
267
|
+
def forms
|
|
268
|
+
@forms ||= search('form').map do |html_form|
|
|
269
|
+
form = Mechanize::Form.new(html_form, @mech, self)
|
|
270
|
+
form.action ||= @uri.to_s
|
|
271
|
+
form
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
##
|
|
276
|
+
# Return a list of all meta refresh elements
|
|
277
|
+
|
|
278
|
+
def meta_refresh
|
|
279
|
+
query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'
|
|
280
|
+
|
|
281
|
+
@meta_refresh ||= search(query).map do |node|
|
|
282
|
+
MetaRefresh.from_node node, self, uri
|
|
283
|
+
end.compact
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
##
|
|
287
|
+
# Return a list of all base tags
|
|
288
|
+
def bases
|
|
289
|
+
@bases ||=
|
|
290
|
+
search('base').map { |node| Base.new(node, @mech, self) }
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
##
|
|
294
|
+
# Return a list of all frame tags
|
|
295
|
+
def frames
|
|
296
|
+
@frames ||=
|
|
297
|
+
search('frame').map { |node| Frame.new(node, @mech, self) }
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
##
|
|
301
|
+
# Return a list of all iframe tags
|
|
302
|
+
def iframes
|
|
303
|
+
@iframes ||=
|
|
304
|
+
search('iframe').map { |node| Frame.new(node, @mech, self) }
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
##
|
|
308
|
+
# Return a list of all img tags
|
|
309
|
+
def images
|
|
310
|
+
@images ||=
|
|
311
|
+
search('img').map { |node| Image.new(node, self) }
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def image_urls
|
|
315
|
+
@image_urls ||= images.map(&:url).uniq
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
##
|
|
319
|
+
# Return a list of all label tags
|
|
320
|
+
def labels
|
|
321
|
+
@labels ||=
|
|
322
|
+
search('label').map { |node| Label.new(node, self) }
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def labels_hash
|
|
326
|
+
unless @labels_hash
|
|
327
|
+
hash = {}
|
|
328
|
+
labels.each do |label|
|
|
329
|
+
hash[label.node['for']] = label if label.for
|
|
330
|
+
end
|
|
331
|
+
@labels_hash = hash
|
|
332
|
+
end
|
|
333
|
+
return @labels_hash
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def self.charset content_type
|
|
337
|
+
charset = content_type[/charset=([^; ]+)/i, 1]
|
|
338
|
+
return nil if charset == 'none'
|
|
339
|
+
charset
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
def self.response_header_charset response
|
|
343
|
+
charsets = []
|
|
344
|
+
response.each do |header, value|
|
|
345
|
+
next unless value =~ /charset/i
|
|
346
|
+
charsets << charset(value)
|
|
347
|
+
end
|
|
348
|
+
charsets
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
##
|
|
352
|
+
# Retrieves all charsets from +meta+ tags in +body+
|
|
353
|
+
|
|
354
|
+
def self.meta_charset body
|
|
355
|
+
# HACK use .map
|
|
356
|
+
body.scan(/<meta .*?>/i).map do |meta|
|
|
357
|
+
if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then
|
|
358
|
+
$2
|
|
359
|
+
elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
|
|
360
|
+
meta =~ /content=(["'])?(.*?)\1/i
|
|
361
|
+
|
|
362
|
+
m_charset = charset $2
|
|
363
|
+
|
|
364
|
+
m_charset if m_charset
|
|
365
|
+
end
|
|
366
|
+
end.compact
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
##
|
|
370
|
+
# Retrieves the last <tt>content-type</tt> set by a +meta+ tag in +body+
|
|
371
|
+
|
|
372
|
+
def self.meta_content_type body
|
|
373
|
+
body.scan(/<meta .*?>/i).reverse.map do |meta|
|
|
374
|
+
if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
|
|
375
|
+
meta =~ /content=(["'])?(.*?)\1/i
|
|
376
|
+
|
|
377
|
+
return $2
|
|
378
|
+
end
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
nil
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
private
|
|
385
|
+
|
|
386
|
+
def html_body
|
|
387
|
+
if @body
|
|
388
|
+
@body.empty? ? '<html></html>' : @body
|
|
389
|
+
else
|
|
390
|
+
''
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def self.charset_from_content_type content_type
|
|
395
|
+
charset = content_type[/charset=([^; ]+)/i, 1]
|
|
396
|
+
return nil if charset == 'none'
|
|
397
|
+
charset
|
|
398
|
+
end
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
require 'mechanize/headers'
|
|
402
|
+
require 'mechanize/page/image'
|
|
403
|
+
require 'mechanize/page/label'
|
|
404
|
+
require 'mechanize/page/link'
|
|
405
|
+
require 'mechanize/page/base'
|
|
406
|
+
require 'mechanize/page/frame'
|
|
407
|
+
require 'mechanize/page/meta_refresh'
|
|
408
|
+
|