kitamomonga-mechanize 0.9.3.20090724215219
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +504 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +176 -0
- data/README.rdoc +60 -0
- data/Rakefile +33 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +666 -0
- data/lib/mechanize/chain.rb +34 -0
- data/lib/mechanize/chain/auth_headers.rb +78 -0
- data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
- data/lib/mechanize/chain/connection_resolver.rb +76 -0
- data/lib/mechanize/chain/custom_headers.rb +21 -0
- data/lib/mechanize/chain/handler.rb +9 -0
- data/lib/mechanize/chain/header_resolver.rb +51 -0
- data/lib/mechanize/chain/parameter_resolver.rb +22 -0
- data/lib/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/mechanize/chain/post_page_hook.rb +18 -0
- data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
- data/lib/mechanize/chain/request_resolver.rb +30 -0
- data/lib/mechanize/chain/response_body_parser.rb +38 -0
- data/lib/mechanize/chain/response_header_handler.rb +48 -0
- data/lib/mechanize/chain/response_reader.rb +39 -0
- data/lib/mechanize/chain/ssl_resolver.rb +40 -0
- data/lib/mechanize/chain/uri_resolver.rb +75 -0
- data/lib/mechanize/content_type_error.rb +14 -0
- data/lib/mechanize/cookie.rb +70 -0
- data/lib/mechanize/cookie_jar.rb +188 -0
- data/lib/mechanize/file.rb +71 -0
- data/lib/mechanize/file_response.rb +60 -0
- data/lib/mechanize/file_saver.rb +37 -0
- data/lib/mechanize/form.rb +364 -0
- data/lib/mechanize/form/button.rb +7 -0
- data/lib/mechanize/form/check_box.rb +11 -0
- data/lib/mechanize/form/field.rb +26 -0
- data/lib/mechanize/form/file_upload.rb +22 -0
- data/lib/mechanize/form/image_button.rb +21 -0
- data/lib/mechanize/form/multi_select_list.rb +67 -0
- data/lib/mechanize/form/option.rb +49 -0
- data/lib/mechanize/form/radio_button.rb +36 -0
- data/lib/mechanize/form/select_list.rb +43 -0
- data/lib/mechanize/headers.rb +11 -0
- data/lib/mechanize/history.rb +65 -0
- data/lib/mechanize/inspect.rb +88 -0
- data/lib/mechanize/monkey_patch.rb +35 -0
- data/lib/mechanize/page.rb +279 -0
- data/lib/mechanize/page/base.rb +8 -0
- data/lib/mechanize/page/encoding.rb +61 -0
- data/lib/mechanize/page/frame.rb +20 -0
- data/lib/mechanize/page/link.rb +53 -0
- data/lib/mechanize/page/meta.rb +50 -0
- data/lib/mechanize/pluggable_parsers.rb +101 -0
- data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
- data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
- data/lib/mechanize/response_code_error.rb +22 -0
- data/lib/mechanize/unsupported_scheme_error.rb +8 -0
- data/lib/mechanize/util.rb +73 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_charset.html +9 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_charset.html +6 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +19 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/test_bad_encoding.html +52 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +577 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +73 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +127 -0
- data/test/test_mech.rb +289 -0
- data/test/test_mechanize_file.rb +72 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +127 -0
- data/test/test_page_encoding.rb +298 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +39 -0
- data/test/test_redirect_verb_handling.rb +43 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +103 -0
- data/test/test_scheme.rb +63 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +320 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
module Net
|
|
2
|
+
class HTTP
|
|
3
|
+
alias :old_keep_alive? :keep_alive?
|
|
4
|
+
def keep_alive?(req, res)
|
|
5
|
+
return false if /close/i =~ req['connection'].to_s
|
|
6
|
+
return false if @seems_1_0_server
|
|
7
|
+
return false if /close/i =~ res['connection'].to_s
|
|
8
|
+
return true if /keep-alive/i =~ res['connection'].to_s
|
|
9
|
+
return false if /close/i =~ res['proxy-connection'].to_s
|
|
10
|
+
return true if /keep-alive/i =~ res['proxy-connection'].to_s
|
|
11
|
+
(@curr_http_version == '1.1')
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Monkey patch for ruby 1.8.4
|
|
17
|
+
unless RUBY_VERSION > "1.8.4"
|
|
18
|
+
module Net # :nodoc:
|
|
19
|
+
class HTTPResponse # :nodoc:
|
|
20
|
+
CODE_TO_OBJ['500'] = HTTPInternalServerError
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
else
|
|
24
|
+
class Mechanize
|
|
25
|
+
class Form
|
|
26
|
+
alias :inspect :pretty_inspect
|
|
27
|
+
end
|
|
28
|
+
class Page
|
|
29
|
+
alias :inspect :pretty_inspect
|
|
30
|
+
class Link
|
|
31
|
+
alias :inspect :pretty_inspect
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
require 'mechanize/page/link'
|
|
2
|
+
require 'mechanize/page/meta'
|
|
3
|
+
require 'mechanize/page/base'
|
|
4
|
+
require 'mechanize/page/frame'
|
|
5
|
+
require 'mechanize/headers'
|
|
6
|
+
require 'mechanize/page/encoding'
|
|
7
|
+
|
|
8
|
+
class Mechanize
|
|
9
|
+
# = Synopsis
|
|
10
|
+
# This class encapsulates an HTML page. If Mechanize finds a content
|
|
11
|
+
# type of 'text/html', this class will be instantiated and returned.
|
|
12
|
+
#
|
|
13
|
+
# == Example
|
|
14
|
+
# require 'rubygems'
|
|
15
|
+
# require 'mechanize'
|
|
16
|
+
#
|
|
17
|
+
# agent = Mechanize.new
|
|
18
|
+
# agent.get('http://google.com/').class #=> Mechanize::Page
|
|
19
|
+
#
|
|
20
|
+
class Page < Mechanize::File
|
|
21
|
+
extend Forwardable
|
|
22
|
+
include PageEncoding
|
|
23
|
+
|
|
24
|
+
attr_accessor :mech
|
|
25
|
+
|
|
26
|
+
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
|
|
27
|
+
|
|
28
|
+
# Force the encoding to be 8BIT so we can perform regular expressions.
|
|
29
|
+
# We'll set it to the detected encoding later
|
|
30
|
+
body.force_encoding('ASCII-8BIT') if defined?(Encoding) && body
|
|
31
|
+
|
|
32
|
+
super(uri, response, body, code, mech)
|
|
33
|
+
@mech ||= mech
|
|
34
|
+
|
|
35
|
+
raise Mechanize::ContentTypeError.new(response['content-type']) unless
|
|
36
|
+
response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
|
|
37
|
+
|
|
38
|
+
reset_parser
|
|
39
|
+
after_page_chain.handle(self)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def after_page_chain
|
|
43
|
+
chain = [page_encoding_hook]
|
|
44
|
+
chain << @mech.post_page_hook if @mech
|
|
45
|
+
Chain.new(chain)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def reset_parser
|
|
49
|
+
@parser = nil
|
|
50
|
+
@links = @forms = @meta = @bases = @frames = @iframes = nil
|
|
51
|
+
@title = nil
|
|
52
|
+
@http_encoding = @meta_encoding = nil
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def title
|
|
56
|
+
@title ||= if parser && search('title').inner_text.length > 0
|
|
57
|
+
search('title').inner_text
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def parser
|
|
62
|
+
return @parser if @parser
|
|
63
|
+
|
|
64
|
+
if body && response
|
|
65
|
+
if mech.html_parser == Nokogiri::HTML
|
|
66
|
+
mech.log.debug("parser: using external encoding #{@encoding}") if mech.log && @encoding
|
|
67
|
+
@parser = mech.html_parser.parse(html_body, nil, @encoding)
|
|
68
|
+
else
|
|
69
|
+
@parser = mech.html_parser.parse(html_body)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
@parser
|
|
74
|
+
end
|
|
75
|
+
alias :root :parser
|
|
76
|
+
|
|
77
|
+
# Get the content type
|
|
78
|
+
def content_type
|
|
79
|
+
response['content-type']
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Search through the page like HPricot
|
|
83
|
+
def_delegator :parser, :search, :search
|
|
84
|
+
def_delegator :parser, :/, :/
|
|
85
|
+
def_delegator :parser, :at, :at
|
|
86
|
+
|
|
87
|
+
# # types_with generator. run me on irb
|
|
88
|
+
#
|
|
89
|
+
# [:form, :link, :base, :frame, :iframe].each do |type|
|
|
90
|
+
# puts(<<-eomethod)
|
|
91
|
+
# def #{type}s_with(criteria)
|
|
92
|
+
# criteria = {:name => criteria} if String === criteria
|
|
93
|
+
# f = #{type}s.find_all do |thing|
|
|
94
|
+
# criteria.all? { |k,v|
|
|
95
|
+
# case k
|
|
96
|
+
# when :class then v === thing.attribute_class
|
|
97
|
+
# when :id then v === thing.attribute_id
|
|
98
|
+
# else v === thing.__send__(k)
|
|
99
|
+
# end }
|
|
100
|
+
# end
|
|
101
|
+
# yield f if block_given?
|
|
102
|
+
# f
|
|
103
|
+
# end
|
|
104
|
+
#
|
|
105
|
+
# def #{type}_with(criteria)
|
|
106
|
+
# f = #{type}s_with(criteria).first
|
|
107
|
+
# yield f if block_given?
|
|
108
|
+
# f
|
|
109
|
+
# end
|
|
110
|
+
# alias :#{type} :#{type}_with
|
|
111
|
+
#
|
|
112
|
+
# eomethod
|
|
113
|
+
# end
|
|
114
|
+
|
|
115
|
+
# don't modify by hand start >>>
|
|
116
|
+
|
|
117
|
+
def forms_with(criteria)
|
|
118
|
+
criteria = {:name => criteria} if String === criteria
|
|
119
|
+
f = forms.find_all do |thing|
|
|
120
|
+
criteria.all? { |k,v|
|
|
121
|
+
case k
|
|
122
|
+
when :class then v === thing.attribute_class
|
|
123
|
+
when :id then v === thing.attribute_id
|
|
124
|
+
else v === thing.__send__(k)
|
|
125
|
+
end }
|
|
126
|
+
end
|
|
127
|
+
yield f if block_given?
|
|
128
|
+
f
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def form_with(criteria)
|
|
132
|
+
f = forms_with(criteria).first
|
|
133
|
+
yield f if block_given?
|
|
134
|
+
f
|
|
135
|
+
end
|
|
136
|
+
alias :form :form_with
|
|
137
|
+
|
|
138
|
+
def links_with(criteria)
|
|
139
|
+
criteria = {:name => criteria} if String === criteria
|
|
140
|
+
f = links.find_all do |thing|
|
|
141
|
+
criteria.all? { |k,v|
|
|
142
|
+
case k
|
|
143
|
+
when :class then v === thing.attribute_class
|
|
144
|
+
when :id then v === thing.attribute_id
|
|
145
|
+
else v === thing.__send__(k)
|
|
146
|
+
end }
|
|
147
|
+
end
|
|
148
|
+
yield f if block_given?
|
|
149
|
+
f
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def link_with(criteria)
|
|
153
|
+
f = links_with(criteria).first
|
|
154
|
+
yield f if block_given?
|
|
155
|
+
f
|
|
156
|
+
end
|
|
157
|
+
alias :link :link_with
|
|
158
|
+
|
|
159
|
+
def bases_with(criteria)
|
|
160
|
+
criteria = {:name => criteria} if String === criteria
|
|
161
|
+
f = bases.find_all do |thing|
|
|
162
|
+
criteria.all? { |k,v|
|
|
163
|
+
case k
|
|
164
|
+
when :class then v === thing.attribute_class
|
|
165
|
+
when :id then v === thing.attribute_id
|
|
166
|
+
else v === thing.__send__(k)
|
|
167
|
+
end }
|
|
168
|
+
end
|
|
169
|
+
yield f if block_given?
|
|
170
|
+
f
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def base_with(criteria)
|
|
174
|
+
f = bases_with(criteria).first
|
|
175
|
+
yield f if block_given?
|
|
176
|
+
f
|
|
177
|
+
end
|
|
178
|
+
alias :base :base_with
|
|
179
|
+
|
|
180
|
+
def frames_with(criteria)
|
|
181
|
+
criteria = {:name => criteria} if String === criteria
|
|
182
|
+
f = frames.find_all do |thing|
|
|
183
|
+
criteria.all? { |k,v|
|
|
184
|
+
case k
|
|
185
|
+
when :class then v === thing.attribute_class
|
|
186
|
+
when :id then v === thing.attribute_id
|
|
187
|
+
else v === thing.__send__(k)
|
|
188
|
+
end }
|
|
189
|
+
end
|
|
190
|
+
yield f if block_given?
|
|
191
|
+
f
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def frame_with(criteria)
|
|
195
|
+
f = frames_with(criteria).first
|
|
196
|
+
yield f if block_given?
|
|
197
|
+
f
|
|
198
|
+
end
|
|
199
|
+
alias :frame :frame_with
|
|
200
|
+
|
|
201
|
+
def iframes_with(criteria)
|
|
202
|
+
criteria = {:name => criteria} if String === criteria
|
|
203
|
+
f = iframes.find_all do |thing|
|
|
204
|
+
criteria.all? { |k,v|
|
|
205
|
+
case k
|
|
206
|
+
when :class then v === thing.attribute_class
|
|
207
|
+
when :id then v === thing.attribute_id
|
|
208
|
+
else v === thing.__send__(k)
|
|
209
|
+
end }
|
|
210
|
+
end
|
|
211
|
+
yield f if block_given?
|
|
212
|
+
f
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def iframe_with(criteria)
|
|
216
|
+
f = iframes_with(criteria).first
|
|
217
|
+
yield f if block_given?
|
|
218
|
+
f
|
|
219
|
+
end
|
|
220
|
+
alias :iframe :iframe_with
|
|
221
|
+
|
|
222
|
+
# don't modify by hand end <<<
|
|
223
|
+
|
|
224
|
+
def links
|
|
225
|
+
@links ||= %w{ a area }.map do |tag|
|
|
226
|
+
search(tag).map do |node|
|
|
227
|
+
Link.new(node, @mech, self)
|
|
228
|
+
end
|
|
229
|
+
end.flatten
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def forms
|
|
233
|
+
@forms ||= search('form').map do |html_form|
|
|
234
|
+
form = Form.new(html_form, @mech, self)
|
|
235
|
+
form.action ||= @uri.to_s
|
|
236
|
+
form
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def meta
|
|
241
|
+
@meta ||= search('meta').map do |node|
|
|
242
|
+
next unless node['http-equiv'] && node['content']
|
|
243
|
+
(equiv, content) = node['http-equiv'], node['content']
|
|
244
|
+
if equiv && equiv.downcase == 'refresh'
|
|
245
|
+
Meta.parse(content, uri) do |delay, href|
|
|
246
|
+
node['delay'] = delay
|
|
247
|
+
node['href'] = href
|
|
248
|
+
Meta.new(node, @mech, self)
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
end.compact
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def bases
|
|
255
|
+
@bases ||=
|
|
256
|
+
search('base').map { |node| Base.new(node, @mech, self) }
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def frames
|
|
260
|
+
@frames ||=
|
|
261
|
+
search('frame').map { |node| Frame.new(node, @mech, self) }
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def iframes
|
|
265
|
+
@iframes ||=
|
|
266
|
+
search('iframe').map { |node| Frame.new(node, @mech, self) }
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
private
|
|
270
|
+
|
|
271
|
+
def html_body
|
|
272
|
+
if body
|
|
273
|
+
body.length > 0 ? body : '<html></html>'
|
|
274
|
+
else
|
|
275
|
+
''
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
module PageEncoding
|
|
3
|
+
|
|
4
|
+
def encoding=(encoding)
|
|
5
|
+
@encoding = encoding
|
|
6
|
+
|
|
7
|
+
if @parser
|
|
8
|
+
parser_encoding = @parser.encoding
|
|
9
|
+
if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
|
|
10
|
+
# lazy reinitialize the parser with the new encoding
|
|
11
|
+
reset_parser
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
encoding
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def encoding
|
|
19
|
+
parser.respond_to?(:encoding) ? parser.encoding : nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def http_charset
|
|
23
|
+
unless @http_encoding
|
|
24
|
+
method = response.respond_to?(:each_header) ? :each_header : :each
|
|
25
|
+
response.send(method) do |header,v|
|
|
26
|
+
next unless v =~ /charset/i
|
|
27
|
+
encoding = v.split('=').last.strip
|
|
28
|
+
@http_encoding = encoding unless encoding == 'none'
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
@http_encoding
|
|
32
|
+
end
|
|
33
|
+
alias :http_encoding :http_charset
|
|
34
|
+
|
|
35
|
+
def meta_charset
|
|
36
|
+
@meta_encoding ||= parser.meta_encoding rescue nil
|
|
37
|
+
end
|
|
38
|
+
alias :meta_encoding :meta_charset
|
|
39
|
+
|
|
40
|
+
def body_charset
|
|
41
|
+
@body_encoding ||= Util.detect_charset(html_body)
|
|
42
|
+
end
|
|
43
|
+
alias :body_encoding :body_charset
|
|
44
|
+
|
|
45
|
+
def default_encoding
|
|
46
|
+
# If body has <meta> charset, we lely on Nokogiri's auto detection of encoding for the moment
|
|
47
|
+
# If no <meta>, Nokogiri would need encoding argument for the correct parsing
|
|
48
|
+
body_has_meta_charset? ? nil : http_encoding || body_encoding
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def body_has_meta_charset?
|
|
52
|
+
html_body =~ /<meta[^>]*charset[^>]*>/i
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def page_encoding_hook
|
|
56
|
+
# "@encoding" means "encoding for Nokogiri parsing"
|
|
57
|
+
Chain::PostPageHook.new([lambda{|page| @encoding = default_encoding}])
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
class Page < Mechanize::File
|
|
3
|
+
# This class encapsulates a 'frame' tag. Frame objects can be treated
|
|
4
|
+
# just like Link objects. They contain src, the link they refer to,
|
|
5
|
+
# name, the name of the frame. 'src' and 'name' are aliased to 'href'
|
|
6
|
+
# and 'text' respectively so that a Frame object can be treated just
|
|
7
|
+
# like a Link.
|
|
8
|
+
class Frame < Link
|
|
9
|
+
alias :src :href
|
|
10
|
+
alias :name :text
|
|
11
|
+
|
|
12
|
+
def initialize(node, mech, referer)
|
|
13
|
+
super(node, mech, referer)
|
|
14
|
+
@node = node
|
|
15
|
+
@text = node['name']
|
|
16
|
+
@href = node['src']
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
class Mechanize
|
|
2
|
+
class Page < Mechanize::File
|
|
3
|
+
# This class encapsulates links. It contains the text and the URI for
|
|
4
|
+
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
|
5
|
+
# the alt text will be used for that image.
|
|
6
|
+
#
|
|
7
|
+
# For example, the text for the following links with both be 'Hello World':
|
|
8
|
+
#
|
|
9
|
+
# <a href="http://rubyforge.org">Hello World</a>
|
|
10
|
+
# <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
|
|
11
|
+
class Link
|
|
12
|
+
attr_reader :node
|
|
13
|
+
attr_reader :href
|
|
14
|
+
attr_reader :text
|
|
15
|
+
attr_reader :attributes
|
|
16
|
+
attr_reader :page
|
|
17
|
+
alias :to_s :text
|
|
18
|
+
alias :referer :page
|
|
19
|
+
|
|
20
|
+
def initialize(node, mech, page)
|
|
21
|
+
@node = node
|
|
22
|
+
@href = node['href']
|
|
23
|
+
@text = node.inner_text
|
|
24
|
+
@page = page
|
|
25
|
+
@mech = mech
|
|
26
|
+
@attributes = node
|
|
27
|
+
|
|
28
|
+
# If there is no text, try to find an image and use it's alt text
|
|
29
|
+
if (@text.nil? || @text.length == 0) && node.search('img').length > 0
|
|
30
|
+
@text = ''
|
|
31
|
+
node.search('img').each do |e|
|
|
32
|
+
@text << ( e['alt'] || '')
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Returns class attribute (<a class="***"> of ***). If no class, returns nil.
|
|
39
|
+
def attribute_class; node['class']; end
|
|
40
|
+
# Returns id attribute (<a id="***"> of ***). If no id, returns nil.
|
|
41
|
+
def attribute_id; node['id']; end
|
|
42
|
+
|
|
43
|
+
def uri
|
|
44
|
+
@href && URI.parse(@href)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Click on this link
|
|
48
|
+
def click
|
|
49
|
+
@mech.click self
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|