diamond-mechanize 2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +718 -0
- data/EXAMPLES.rdoc +187 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +163 -0
- data/LICENSE.rdoc +20 -0
- data/Manifest.txt +159 -0
- data/README.rdoc +64 -0
- data/Rakefile +49 -0
- data/lib/mechanize.rb +1079 -0
- data/lib/mechanize/content_type_error.rb +13 -0
- data/lib/mechanize/cookie.rb +232 -0
- data/lib/mechanize/cookie_jar.rb +194 -0
- data/lib/mechanize/download.rb +59 -0
- data/lib/mechanize/element_matcher.rb +36 -0
- data/lib/mechanize/file.rb +65 -0
- data/lib/mechanize/file_connection.rb +17 -0
- data/lib/mechanize/file_request.rb +26 -0
- data/lib/mechanize/file_response.rb +74 -0
- data/lib/mechanize/file_saver.rb +39 -0
- data/lib/mechanize/form.rb +543 -0
- data/lib/mechanize/form/button.rb +6 -0
- data/lib/mechanize/form/check_box.rb +12 -0
- data/lib/mechanize/form/field.rb +54 -0
- data/lib/mechanize/form/file_upload.rb +21 -0
- data/lib/mechanize/form/hidden.rb +3 -0
- data/lib/mechanize/form/image_button.rb +19 -0
- data/lib/mechanize/form/keygen.rb +34 -0
- data/lib/mechanize/form/multi_select_list.rb +94 -0
- data/lib/mechanize/form/option.rb +50 -0
- data/lib/mechanize/form/radio_button.rb +55 -0
- data/lib/mechanize/form/reset.rb +3 -0
- data/lib/mechanize/form/select_list.rb +44 -0
- data/lib/mechanize/form/submit.rb +3 -0
- data/lib/mechanize/form/text.rb +3 -0
- data/lib/mechanize/form/textarea.rb +3 -0
- data/lib/mechanize/headers.rb +23 -0
- data/lib/mechanize/history.rb +82 -0
- data/lib/mechanize/http.rb +8 -0
- data/lib/mechanize/http/agent.rb +1004 -0
- data/lib/mechanize/http/auth_challenge.rb +59 -0
- data/lib/mechanize/http/auth_realm.rb +31 -0
- data/lib/mechanize/http/content_disposition_parser.rb +188 -0
- data/lib/mechanize/http/www_authenticate_parser.rb +155 -0
- data/lib/mechanize/monkey_patch.rb +16 -0
- data/lib/mechanize/page.rb +440 -0
- data/lib/mechanize/page/base.rb +7 -0
- data/lib/mechanize/page/frame.rb +27 -0
- data/lib/mechanize/page/image.rb +30 -0
- data/lib/mechanize/page/label.rb +20 -0
- data/lib/mechanize/page/link.rb +98 -0
- data/lib/mechanize/page/meta_refresh.rb +68 -0
- data/lib/mechanize/parser.rb +173 -0
- data/lib/mechanize/pluggable_parsers.rb +144 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +19 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +21 -0
- data/lib/mechanize/response_code_error.rb +21 -0
- data/lib/mechanize/response_read_error.rb +27 -0
- data/lib/mechanize/robots_disallowed_error.rb +28 -0
- data/lib/mechanize/test_case.rb +663 -0
- data/lib/mechanize/unauthorized_error.rb +3 -0
- data/lib/mechanize/unsupported_scheme_error.rb +6 -0
- data/lib/mechanize/util.rb +101 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/canonical_uri.html +9 -0
- data/test/htdocs/dir with spaces/foo.html +1 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_order_test.html +11 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_referer_test.html +10 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/noindex.html +9 -0
- data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/robots.html +8 -0
- data/test/htdocs/robots.txt +2 -0
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_field_precedence.html +11 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +19 -0
- data/test/htdocs/tc_meta_in_body.html +9 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_referer.html +16 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/test_click.html +11 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/test_mechanize.rb +1164 -0
- data/test/test_mechanize_cookie.rb +451 -0
- data/test/test_mechanize_cookie_jar.rb +483 -0
- data/test/test_mechanize_download.rb +43 -0
- data/test/test_mechanize_file.rb +61 -0
- data/test/test_mechanize_file_connection.rb +21 -0
- data/test/test_mechanize_file_request.rb +19 -0
- data/test/test_mechanize_file_saver.rb +21 -0
- data/test/test_mechanize_form.rb +875 -0
- data/test/test_mechanize_form_check_box.rb +38 -0
- data/test/test_mechanize_form_encoding.rb +114 -0
- data/test/test_mechanize_form_field.rb +63 -0
- data/test/test_mechanize_form_file_upload.rb +20 -0
- data/test/test_mechanize_form_image_button.rb +12 -0
- data/test/test_mechanize_form_keygen.rb +32 -0
- data/test/test_mechanize_form_multi_select_list.rb +84 -0
- data/test/test_mechanize_form_option.rb +55 -0
- data/test/test_mechanize_form_radio_button.rb +78 -0
- data/test/test_mechanize_form_select_list.rb +76 -0
- data/test/test_mechanize_form_textarea.rb +52 -0
- data/test/test_mechanize_headers.rb +35 -0
- data/test/test_mechanize_history.rb +103 -0
- data/test/test_mechanize_http_agent.rb +1225 -0
- data/test/test_mechanize_http_auth_challenge.rb +39 -0
- data/test/test_mechanize_http_auth_realm.rb +49 -0
- data/test/test_mechanize_http_content_disposition_parser.rb +118 -0
- data/test/test_mechanize_http_www_authenticate_parser.rb +146 -0
- data/test/test_mechanize_link.rb +80 -0
- data/test/test_mechanize_page.rb +118 -0
- data/test/test_mechanize_page_encoding.rb +182 -0
- data/test/test_mechanize_page_frame.rb +16 -0
- data/test/test_mechanize_page_link.rb +390 -0
- data/test/test_mechanize_page_meta_refresh.rb +127 -0
- data/test/test_mechanize_parser.rb +289 -0
- data/test/test_mechanize_pluggable_parser.rb +52 -0
- data/test/test_mechanize_redirect_limit_reached_error.rb +24 -0
- data/test/test_mechanize_redirect_not_get_or_head_error.rb +14 -0
- data/test/test_mechanize_subclass.rb +22 -0
- data/test/test_mechanize_util.rb +103 -0
- data/test/test_multi_select.rb +119 -0
- metadata +216 -0
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
##
|
|
2
|
+
# This class encapsulates an HTML page. If Mechanize finds a content
|
|
3
|
+
# type of 'text/html', this class will be instantiated and returned.
|
|
4
|
+
#
|
|
5
|
+
# Example:
|
|
6
|
+
#
|
|
7
|
+
# require 'mechanize'
|
|
8
|
+
#
|
|
9
|
+
# agent = Mechanize.new
|
|
10
|
+
# agent.get('http://google.com/').class # => Mechanize::Page
|
|
11
|
+
|
|
12
|
+
class Mechanize::Page < Mechanize::File
|
|
13
|
+
extend Forwardable
|
|
14
|
+
extend Mechanize::ElementMatcher
|
|
15
|
+
|
|
16
|
+
attr_accessor :mech
|
|
17
|
+
|
|
18
|
+
##
|
|
19
|
+
# Possible encodings for this page based on HTTP headers and meta elements
|
|
20
|
+
|
|
21
|
+
attr_reader :encodings
|
|
22
|
+
|
|
23
|
+
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
|
|
24
|
+
raise Mechanize::ContentTypeError, response['content-type'] unless
|
|
25
|
+
response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
|
|
26
|
+
|
|
27
|
+
@meta_content_type = nil
|
|
28
|
+
@encoding = nil
|
|
29
|
+
@encodings = [nil]
|
|
30
|
+
raise 'no' if mech and not Mechanize === mech
|
|
31
|
+
@mech = mech
|
|
32
|
+
|
|
33
|
+
reset
|
|
34
|
+
|
|
35
|
+
@encodings << Mechanize::Util.detect_charset(body) if body
|
|
36
|
+
|
|
37
|
+
@encodings.concat self.class.response_header_charset(response)
|
|
38
|
+
|
|
39
|
+
if body
|
|
40
|
+
# Force the encoding to be 8BIT so we can perform regular expressions.
|
|
41
|
+
# We'll set it to the detected encoding later
|
|
42
|
+
body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding
|
|
43
|
+
|
|
44
|
+
@encodings.concat self.class.meta_charset body
|
|
45
|
+
|
|
46
|
+
meta_content_type = self.class.meta_content_type body
|
|
47
|
+
@meta_content_type = meta_content_type if meta_content_type
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
@encodings << mech.default_encoding if mech and mech.default_encoding
|
|
51
|
+
|
|
52
|
+
super uri, response, body, code
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def title
|
|
56
|
+
@title ||=
|
|
57
|
+
if doc = parser
|
|
58
|
+
title = doc.search('title').inner_text
|
|
59
|
+
title.empty? ? nil : title
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def response_header_charset
|
|
64
|
+
self.class.response_header_charset(response)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def meta_charset
|
|
68
|
+
self.class.meta_charset(body)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def detected_encoding
|
|
72
|
+
Mechanize::Util.detect_charset(body)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def encoding=(encoding)
|
|
76
|
+
reset
|
|
77
|
+
|
|
78
|
+
@encoding = encoding
|
|
79
|
+
|
|
80
|
+
if @parser
|
|
81
|
+
parser_encoding = @parser.encoding
|
|
82
|
+
if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
|
|
83
|
+
# lazy reinitialize the parser with the new encoding
|
|
84
|
+
@parser = nil
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
encoding
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def encoding
|
|
92
|
+
parser.respond_to?(:encoding) ? parser.encoding : nil
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Return whether parser result has errors related to encoding or not.
|
|
96
|
+
# false indicates just parser has no encoding errors, not encoding is vaild.
|
|
97
|
+
def encoding_error?(parser=nil)
|
|
98
|
+
parser = self.parser unless parser
|
|
99
|
+
return false if parser.errors.empty?
|
|
100
|
+
parser.errors.any? do |error|
|
|
101
|
+
error.message =~ /(indicate\ encoding)|
|
|
102
|
+
(Invalid\ char)|
|
|
103
|
+
(input\ conversion\ failed)/x
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def parser
|
|
108
|
+
return @parser if @parser
|
|
109
|
+
return nil unless @body
|
|
110
|
+
|
|
111
|
+
if @encoding then
|
|
112
|
+
@parser = @mech.html_parser.parse html_body, nil, @encoding
|
|
113
|
+
elsif mech.force_default_encoding then
|
|
114
|
+
@parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
|
|
115
|
+
else
|
|
116
|
+
@encodings.reverse_each do |encoding|
|
|
117
|
+
@parser = @mech.html_parser.parse html_body, nil, encoding
|
|
118
|
+
|
|
119
|
+
break unless encoding_error? @parser
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
@parser
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
alias :root :parser
|
|
127
|
+
|
|
128
|
+
def pretty_print(q) # :nodoc:
|
|
129
|
+
q.object_group(self) {
|
|
130
|
+
q.breakable
|
|
131
|
+
q.group(1, '{url', '}') {q.breakable; q.pp uri }
|
|
132
|
+
q.breakable
|
|
133
|
+
q.group(1, '{meta_refresh', '}') {
|
|
134
|
+
meta_refresh.each { |link| q.breakable; q.pp link }
|
|
135
|
+
}
|
|
136
|
+
q.breakable
|
|
137
|
+
q.group(1, '{title', '}') { q.breakable; q.pp title }
|
|
138
|
+
q.breakable
|
|
139
|
+
q.group(1, '{iframes', '}') {
|
|
140
|
+
iframes.each { |link| q.breakable; q.pp link }
|
|
141
|
+
}
|
|
142
|
+
q.breakable
|
|
143
|
+
q.group(1, '{frames', '}') {
|
|
144
|
+
frames.each { |link| q.breakable; q.pp link }
|
|
145
|
+
}
|
|
146
|
+
q.breakable
|
|
147
|
+
q.group(1, '{links', '}') {
|
|
148
|
+
links.each { |link| q.breakable; q.pp link }
|
|
149
|
+
}
|
|
150
|
+
q.breakable
|
|
151
|
+
q.group(1, '{forms', '}') {
|
|
152
|
+
forms.each { |form| q.breakable; q.pp form }
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
alias inspect pretty_inspect # :nodoc:
|
|
158
|
+
|
|
159
|
+
def reset
|
|
160
|
+
@bases = nil
|
|
161
|
+
@forms = nil
|
|
162
|
+
@frames = nil
|
|
163
|
+
@iframes = nil
|
|
164
|
+
@links = nil
|
|
165
|
+
@labels = nil
|
|
166
|
+
@labels_hash = nil
|
|
167
|
+
@meta_refresh = nil
|
|
168
|
+
@parser = nil
|
|
169
|
+
@title = nil
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Return the canonical URI for the page if there is a link tag
|
|
173
|
+
# with href="canonical".
|
|
174
|
+
def canonical_uri
|
|
175
|
+
link = at('link[@rel="canonical"][@href]')
|
|
176
|
+
return unless link
|
|
177
|
+
href = link['href']
|
|
178
|
+
|
|
179
|
+
URI href
|
|
180
|
+
rescue URI::InvalidURIError
|
|
181
|
+
URI Mechanize::Util.uri_escape href
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# Get the content type
|
|
185
|
+
def content_type
|
|
186
|
+
@meta_content_type || response['content-type']
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Search through the page like HPricot
|
|
190
|
+
def_delegator :parser, :search, :search
|
|
191
|
+
def_delegator :parser, :/, :/
|
|
192
|
+
def_delegator :parser, :at, :at
|
|
193
|
+
|
|
194
|
+
##
|
|
195
|
+
# :method: form_with(criteria)
|
|
196
|
+
#
|
|
197
|
+
# Find a single form matching +criteria+.
|
|
198
|
+
# Example:
|
|
199
|
+
# page.form_with(:action => '/post/login.php') do |f|
|
|
200
|
+
# ...
|
|
201
|
+
# end
|
|
202
|
+
|
|
203
|
+
##
|
|
204
|
+
# :method: forms_with(criteria)
|
|
205
|
+
#
|
|
206
|
+
# Find all forms form matching +criteria+.
|
|
207
|
+
# Example:
|
|
208
|
+
# page.forms_with(:action => '/post/login.php').each do |f|
|
|
209
|
+
# ...
|
|
210
|
+
# end
|
|
211
|
+
|
|
212
|
+
elements_with :form
|
|
213
|
+
|
|
214
|
+
##
|
|
215
|
+
# :method: link_with(criteria)
|
|
216
|
+
#
|
|
217
|
+
# Find a single link matching +criteria+.
|
|
218
|
+
# Example:
|
|
219
|
+
# page.link_with(:href => /foo/).click
|
|
220
|
+
|
|
221
|
+
##
|
|
222
|
+
# :method: links_with(criteria)
|
|
223
|
+
#
|
|
224
|
+
# Find all links matching +criteria+.
|
|
225
|
+
# Example:
|
|
226
|
+
# page.links_with(:href => /foo/).each do |link|
|
|
227
|
+
# puts link.href
|
|
228
|
+
# end
|
|
229
|
+
|
|
230
|
+
elements_with :link
|
|
231
|
+
|
|
232
|
+
##
|
|
233
|
+
# :method: base_with(criteria)
|
|
234
|
+
#
|
|
235
|
+
# Find a single base tag matching +criteria+.
|
|
236
|
+
# Example:
|
|
237
|
+
# page.base_with(:href => /foo/).click
|
|
238
|
+
|
|
239
|
+
##
|
|
240
|
+
# :method: bases_with(criteria)
|
|
241
|
+
#
|
|
242
|
+
# Find all base tags matching +criteria+.
|
|
243
|
+
# Example:
|
|
244
|
+
# page.bases_with(:href => /foo/).each do |base|
|
|
245
|
+
# puts base.href
|
|
246
|
+
# end
|
|
247
|
+
|
|
248
|
+
elements_with :base
|
|
249
|
+
|
|
250
|
+
##
|
|
251
|
+
# :method: frame_with(criteria)
|
|
252
|
+
#
|
|
253
|
+
# Find a single frame tag matching +criteria+.
|
|
254
|
+
# Example:
|
|
255
|
+
# page.frame_with(:src => /foo/).click
|
|
256
|
+
|
|
257
|
+
##
|
|
258
|
+
# :method: frames_with(criteria)
|
|
259
|
+
#
|
|
260
|
+
# Find all frame tags matching +criteria+.
|
|
261
|
+
# Example:
|
|
262
|
+
# page.frames_with(:src => /foo/).each do |frame|
|
|
263
|
+
# p frame.src
|
|
264
|
+
# end
|
|
265
|
+
|
|
266
|
+
elements_with :frame
|
|
267
|
+
|
|
268
|
+
##
|
|
269
|
+
# :method: iframe_with(criteria)
|
|
270
|
+
#
|
|
271
|
+
# Find a single iframe tag matching +criteria+.
|
|
272
|
+
# Example:
|
|
273
|
+
# page.iframe_with(:src => /foo/).click
|
|
274
|
+
|
|
275
|
+
##
|
|
276
|
+
# :method: iframes_with(criteria)
|
|
277
|
+
#
|
|
278
|
+
# Find all iframe tags matching +criteria+.
|
|
279
|
+
# Example:
|
|
280
|
+
# page.iframes_with(:src => /foo/).each do |iframe|
|
|
281
|
+
# p iframe.src
|
|
282
|
+
# end
|
|
283
|
+
|
|
284
|
+
elements_with :iframe
|
|
285
|
+
|
|
286
|
+
##
|
|
287
|
+
# Return a list of all link and area tags
|
|
288
|
+
def links
|
|
289
|
+
@links ||= %w{ a area }.map do |tag|
|
|
290
|
+
search(tag).map do |node|
|
|
291
|
+
Link.new(node, @mech, self)
|
|
292
|
+
end
|
|
293
|
+
end.flatten
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
##
|
|
297
|
+
# Return a list of all form tags
|
|
298
|
+
def forms
|
|
299
|
+
@forms ||= search('form').map do |html_form|
|
|
300
|
+
form = Mechanize::Form.new(html_form, @mech, self)
|
|
301
|
+
form.action ||= @uri.to_s
|
|
302
|
+
form
|
|
303
|
+
end
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
##
|
|
307
|
+
# Return a list of all meta refresh elements
|
|
308
|
+
|
|
309
|
+
def meta_refresh
|
|
310
|
+
query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'
|
|
311
|
+
|
|
312
|
+
@meta_refresh ||= search(query).map do |node|
|
|
313
|
+
MetaRefresh.from_node node, self, uri
|
|
314
|
+
end.compact
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
##
|
|
318
|
+
# Return a list of all base tags
|
|
319
|
+
def bases
|
|
320
|
+
@bases ||=
|
|
321
|
+
search('base').map { |node| Base.new(node, @mech, self) }
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
##
|
|
325
|
+
# Return a list of all frame tags
|
|
326
|
+
def frames
|
|
327
|
+
@frames ||=
|
|
328
|
+
search('frame').map { |node| Frame.new(node, @mech, self) }
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
##
|
|
332
|
+
# Return a list of all iframe tags
|
|
333
|
+
def iframes
|
|
334
|
+
@iframes ||=
|
|
335
|
+
search('iframe').map { |node| Frame.new(node, @mech, self) }
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
##
|
|
339
|
+
# Return a list of all img tags
|
|
340
|
+
def images
|
|
341
|
+
@images ||=
|
|
342
|
+
search('img').map { |node| Image.new(node, self) }
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def image_urls
|
|
346
|
+
@image_urls ||= images.map(&:url).uniq
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
##
|
|
350
|
+
# Return a list of all label tags
|
|
351
|
+
def labels
|
|
352
|
+
@labels ||=
|
|
353
|
+
search('label').map { |node| Label.new(node, self) }
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def labels_hash
|
|
357
|
+
unless @labels_hash
|
|
358
|
+
hash = {}
|
|
359
|
+
labels.each do |label|
|
|
360
|
+
hash[label.node['for']] = label if label.for
|
|
361
|
+
end
|
|
362
|
+
@labels_hash = hash
|
|
363
|
+
end
|
|
364
|
+
return @labels_hash
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
def self.charset content_type
|
|
368
|
+
charset = content_type[/charset=([^; ]+)/i, 1]
|
|
369
|
+
return nil if charset == 'none'
|
|
370
|
+
charset
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def self.response_header_charset response
|
|
374
|
+
charsets = []
|
|
375
|
+
response.each do |header, value|
|
|
376
|
+
next unless header == 'content-type'
|
|
377
|
+
next unless value =~ /charset/i
|
|
378
|
+
charsets << charset(value)
|
|
379
|
+
end
|
|
380
|
+
charsets
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
##
|
|
384
|
+
# Retrieves all charsets from +meta+ tags in +body+
|
|
385
|
+
|
|
386
|
+
def self.meta_charset body
|
|
387
|
+
# HACK use .map
|
|
388
|
+
body.scan(/<meta .*?>/i).map do |meta|
|
|
389
|
+
if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then
|
|
390
|
+
$2
|
|
391
|
+
elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
|
|
392
|
+
meta =~ /content\s*=\s*(["'])?(.*?)\1/i
|
|
393
|
+
|
|
394
|
+
m_charset = charset $2 if $2
|
|
395
|
+
|
|
396
|
+
m_charset if m_charset
|
|
397
|
+
end
|
|
398
|
+
end.compact
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
##
|
|
402
|
+
# Retrieves the last <tt>content-type</tt> set by a +meta+ tag in +body+
|
|
403
|
+
|
|
404
|
+
def self.meta_content_type body
|
|
405
|
+
body.scan(/<meta .*?>/i).reverse.map do |meta|
|
|
406
|
+
if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
|
|
407
|
+
meta =~ /content=(["'])?(.*?)\1/i
|
|
408
|
+
|
|
409
|
+
return $2
|
|
410
|
+
end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
nil
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
private
|
|
417
|
+
|
|
418
|
+
def html_body
|
|
419
|
+
if @body
|
|
420
|
+
@body.empty? ? '<html></html>' : @body
|
|
421
|
+
else
|
|
422
|
+
''
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def self.charset_from_content_type content_type
|
|
427
|
+
charset = content_type[/charset=([^; ]+)/i, 1]
|
|
428
|
+
return nil if charset == 'none'
|
|
429
|
+
charset
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
require 'mechanize/headers'
|
|
434
|
+
require 'mechanize/page/image'
|
|
435
|
+
require 'mechanize/page/label'
|
|
436
|
+
require 'mechanize/page/link'
|
|
437
|
+
require 'mechanize/page/base'
|
|
438
|
+
require 'mechanize/page/frame'
|
|
439
|
+
require 'mechanize/page/meta_refresh'
|
|
440
|
+
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# A Frame object wrapse a frame HTML element. Frame objects can be treated
|
|
2
|
+
# just like Link objects. They contain #src, the #link they refer to and a
|
|
3
|
+
# #name, the name of the frame they refer to. #src and #name are aliased to
|
|
4
|
+
# #href and #text respectively so that a Frame object can be treated just like
|
|
5
|
+
# a Link.
|
|
6
|
+
|
|
7
|
+
class Mechanize::Page::Frame < Mechanize::Page::Link
|
|
8
|
+
|
|
9
|
+
alias :src :href
|
|
10
|
+
|
|
11
|
+
attr_reader :text
|
|
12
|
+
alias :name :text
|
|
13
|
+
|
|
14
|
+
def initialize(node, mech, referer)
|
|
15
|
+
super(node, mech, referer)
|
|
16
|
+
@node = node
|
|
17
|
+
@text = node['name']
|
|
18
|
+
@href = node['src']
|
|
19
|
+
@content = nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def content
|
|
23
|
+
@content ||= @mech.get @href, [], page
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|