knu-mechanize 0.9.3.20090623142847
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +504 -0
- data/EXAMPLES.rdoc +171 -0
- data/FAQ.rdoc +11 -0
- data/GUIDE.rdoc +122 -0
- data/LICENSE.rdoc +340 -0
- data/Manifest.txt +169 -0
- data/README.rdoc +60 -0
- data/Rakefile +43 -0
- data/examples/flickr_upload.rb +23 -0
- data/examples/mech-dump.rb +7 -0
- data/examples/proxy_req.rb +9 -0
- data/examples/rubyforge.rb +21 -0
- data/examples/spider.rb +11 -0
- data/lib/mechanize.rb +7 -0
- data/lib/www/mechanize.rb +619 -0
- data/lib/www/mechanize/chain.rb +34 -0
- data/lib/www/mechanize/chain/auth_headers.rb +80 -0
- data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
- data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
- data/lib/www/mechanize/chain/custom_headers.rb +23 -0
- data/lib/www/mechanize/chain/handler.rb +9 -0
- data/lib/www/mechanize/chain/header_resolver.rb +53 -0
- data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
- data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
- data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
- data/lib/www/mechanize/chain/request_resolver.rb +32 -0
- data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
- data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
- data/lib/www/mechanize/chain/response_reader.rb +41 -0
- data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
- data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
- data/lib/www/mechanize/content_type_error.rb +16 -0
- data/lib/www/mechanize/cookie.rb +72 -0
- data/lib/www/mechanize/cookie_jar.rb +191 -0
- data/lib/www/mechanize/file.rb +73 -0
- data/lib/www/mechanize/file_response.rb +62 -0
- data/lib/www/mechanize/file_saver.rb +39 -0
- data/lib/www/mechanize/form.rb +360 -0
- data/lib/www/mechanize/form/button.rb +8 -0
- data/lib/www/mechanize/form/check_box.rb +13 -0
- data/lib/www/mechanize/form/field.rb +28 -0
- data/lib/www/mechanize/form/file_upload.rb +24 -0
- data/lib/www/mechanize/form/image_button.rb +23 -0
- data/lib/www/mechanize/form/multi_select_list.rb +69 -0
- data/lib/www/mechanize/form/option.rb +51 -0
- data/lib/www/mechanize/form/radio_button.rb +38 -0
- data/lib/www/mechanize/form/select_list.rb +45 -0
- data/lib/www/mechanize/headers.rb +12 -0
- data/lib/www/mechanize/history.rb +67 -0
- data/lib/www/mechanize/inspect.rb +90 -0
- data/lib/www/mechanize/monkey_patch.rb +37 -0
- data/lib/www/mechanize/page.rb +181 -0
- data/lib/www/mechanize/page/base.rb +10 -0
- data/lib/www/mechanize/page/frame.rb +22 -0
- data/lib/www/mechanize/page/link.rb +50 -0
- data/lib/www/mechanize/page/meta.rb +51 -0
- data/lib/www/mechanize/pluggable_parsers.rb +103 -0
- data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
- data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
- data/lib/www/mechanize/response_code_error.rb +25 -0
- data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
- data/lib/www/mechanize/util.rb +76 -0
- data/mechanize.gemspec +41 -0
- data/test/chain/test_argument_validator.rb +14 -0
- data/test/chain/test_auth_headers.rb +25 -0
- data/test/chain/test_custom_headers.rb +18 -0
- data/test/chain/test_header_resolver.rb +28 -0
- data/test/chain/test_parameter_resolver.rb +35 -0
- data/test/chain/test_request_resolver.rb +29 -0
- data/test/chain/test_response_reader.rb +24 -0
- data/test/data/htpasswd +1 -0
- data/test/data/server.crt +16 -0
- data/test/data/server.csr +12 -0
- data/test/data/server.key +15 -0
- data/test/data/server.pem +15 -0
- data/test/helper.rb +129 -0
- data/test/htdocs/alt_text.html +10 -0
- data/test/htdocs/bad_form_test.html +9 -0
- data/test/htdocs/button.jpg +0 -0
- data/test/htdocs/empty_form.html +6 -0
- data/test/htdocs/file_upload.html +26 -0
- data/test/htdocs/find_link.html +41 -0
- data/test/htdocs/form_multi_select.html +16 -0
- data/test/htdocs/form_multival.html +37 -0
- data/test/htdocs/form_no_action.html +18 -0
- data/test/htdocs/form_no_input_name.html +16 -0
- data/test/htdocs/form_select.html +16 -0
- data/test/htdocs/form_select_all.html +16 -0
- data/test/htdocs/form_select_none.html +17 -0
- data/test/htdocs/form_select_noopts.html +10 -0
- data/test/htdocs/form_set_fields.html +14 -0
- data/test/htdocs/form_test.html +188 -0
- data/test/htdocs/frame_test.html +30 -0
- data/test/htdocs/google.html +13 -0
- data/test/htdocs/iframe_test.html +16 -0
- data/test/htdocs/index.html +6 -0
- data/test/htdocs/link with space.html +5 -0
- data/test/htdocs/meta_cookie.html +11 -0
- data/test/htdocs/no_title_test.html +6 -0
- data/test/htdocs/relative/tc_relative_links.html +21 -0
- data/test/htdocs/tc_bad_links.html +5 -0
- data/test/htdocs/tc_base_link.html +8 -0
- data/test/htdocs/tc_blank_form.html +11 -0
- data/test/htdocs/tc_checkboxes.html +19 -0
- data/test/htdocs/tc_encoded_links.html +5 -0
- data/test/htdocs/tc_follow_meta.html +8 -0
- data/test/htdocs/tc_form_action.html +48 -0
- data/test/htdocs/tc_links.html +18 -0
- data/test/htdocs/tc_no_attributes.html +16 -0
- data/test/htdocs/tc_pretty_print.html +17 -0
- data/test/htdocs/tc_radiobuttons.html +17 -0
- data/test/htdocs/tc_referer.html +10 -0
- data/test/htdocs/tc_relative_links.html +19 -0
- data/test/htdocs/tc_textarea.html +23 -0
- data/test/htdocs/unusual______.html +5 -0
- data/test/servlets.rb +365 -0
- data/test/ssl_server.rb +48 -0
- data/test/test_authenticate.rb +71 -0
- data/test/test_bad_links.rb +25 -0
- data/test/test_blank_form.rb +16 -0
- data/test/test_checkboxes.rb +61 -0
- data/test/test_content_type.rb +13 -0
- data/test/test_cookie_class.rb +338 -0
- data/test/test_cookie_jar.rb +362 -0
- data/test/test_cookies.rb +123 -0
- data/test/test_encoded_links.rb +20 -0
- data/test/test_errors.rb +49 -0
- data/test/test_follow_meta.rb +108 -0
- data/test/test_form_action.rb +52 -0
- data/test/test_form_as_hash.rb +61 -0
- data/test/test_form_button.rb +38 -0
- data/test/test_form_no_inputname.rb +15 -0
- data/test/test_forms.rb +564 -0
- data/test/test_frames.rb +25 -0
- data/test/test_get_headers.rb +52 -0
- data/test/test_gzipping.rb +22 -0
- data/test/test_hash_api.rb +45 -0
- data/test/test_history.rb +142 -0
- data/test/test_history_added.rb +16 -0
- data/test/test_html_unscape_forms.rb +39 -0
- data/test/test_if_modified_since.rb +20 -0
- data/test/test_keep_alive.rb +31 -0
- data/test/test_links.rb +120 -0
- data/test/test_mech.rb +268 -0
- data/test/test_mechanize_file.rb +47 -0
- data/test/test_meta.rb +65 -0
- data/test/test_multi_select.rb +106 -0
- data/test/test_no_attributes.rb +13 -0
- data/test/test_option.rb +18 -0
- data/test/test_page.rb +119 -0
- data/test/test_pluggable_parser.rb +145 -0
- data/test/test_post_form.rb +34 -0
- data/test/test_pretty_print.rb +22 -0
- data/test/test_radiobutton.rb +75 -0
- data/test/test_redirect_limit_reached.rb +41 -0
- data/test/test_redirect_verb_handling.rb +45 -0
- data/test/test_referer.rb +39 -0
- data/test/test_relative_links.rb +40 -0
- data/test/test_request.rb +13 -0
- data/test/test_response_code.rb +52 -0
- data/test/test_save_file.rb +48 -0
- data/test/test_scheme.rb +48 -0
- data/test/test_select.rb +106 -0
- data/test/test_select_all.rb +15 -0
- data/test/test_select_none.rb +15 -0
- data/test/test_select_noopts.rb +16 -0
- data/test/test_set_fields.rb +44 -0
- data/test/test_ssl_server.rb +20 -0
- data/test/test_subclass.rb +14 -0
- data/test/test_textarea.rb +45 -0
- data/test/test_upload.rb +109 -0
- data/test/test_verbs.rb +25 -0
- metadata +314 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
module Net
|
2
|
+
class HTTP
|
3
|
+
alias :old_keep_alive? :keep_alive?
|
4
|
+
def keep_alive?(req, res)
|
5
|
+
return false if /close/i =~ req['connection'].to_s
|
6
|
+
return false if @seems_1_0_server
|
7
|
+
return false if /close/i =~ res['connection'].to_s
|
8
|
+
return true if /keep-alive/i =~ res['connection'].to_s
|
9
|
+
return false if /close/i =~ res['proxy-connection'].to_s
|
10
|
+
return true if /keep-alive/i =~ res['proxy-connection'].to_s
|
11
|
+
(@curr_http_version == '1.1')
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Monkey patch for ruby 1.8.4
|
17
|
+
unless RUBY_VERSION > "1.8.4"
|
18
|
+
module Net # :nodoc:
|
19
|
+
class HTTPResponse # :nodoc:
|
20
|
+
CODE_TO_OBJ['500'] = HTTPInternalServerError
|
21
|
+
end
|
22
|
+
end
|
23
|
+
else
|
24
|
+
module WWW
|
25
|
+
class Mechanize
|
26
|
+
class Form
|
27
|
+
alias :inspect :pretty_inspect
|
28
|
+
end
|
29
|
+
class Page
|
30
|
+
alias :inspect :pretty_inspect
|
31
|
+
class Link
|
32
|
+
alias :inspect :pretty_inspect
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,181 @@
|
|
1
|
+
require 'www/mechanize/page/link'
|
2
|
+
require 'www/mechanize/page/meta'
|
3
|
+
require 'www/mechanize/page/base'
|
4
|
+
require 'www/mechanize/page/frame'
|
5
|
+
require 'www/mechanize/headers'
|
6
|
+
|
7
|
+
module WWW
|
8
|
+
class Mechanize
|
9
|
+
# = Synopsis
|
10
|
+
# This class encapsulates an HTML page. If Mechanize finds a content
|
11
|
+
# type of 'text/html', this class will be instantiated and returned.
|
12
|
+
#
|
13
|
+
# == Example
|
14
|
+
# require 'rubygems'
|
15
|
+
# require 'mechanize'
|
16
|
+
#
|
17
|
+
# agent = WWW::Mechanize.new
|
18
|
+
# agent.get('http://google.com/').class #=> WWW::Mechanize::Page
|
19
|
+
#
|
20
|
+
class Page < WWW::Mechanize::File
|
21
|
+
extend Forwardable
|
22
|
+
|
23
|
+
attr_accessor :mech
|
24
|
+
|
25
|
+
def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
|
26
|
+
@encoding = nil
|
27
|
+
|
28
|
+
method = response.respond_to?(:each_header) ? :each_header : :each
|
29
|
+
response.send(method) do |header,v|
|
30
|
+
next unless v =~ /charset/i
|
31
|
+
encoding = v.split('=').last.strip
|
32
|
+
@encoding = encoding unless encoding == 'none'
|
33
|
+
end
|
34
|
+
|
35
|
+
# Force the encoding to be 8BIT so we can perform regular expressions.
|
36
|
+
# We'll set it to the detected encoding later
|
37
|
+
body.force_encoding('ASCII-8BIT') if defined?(Encoding) && body
|
38
|
+
|
39
|
+
@encoding ||= Util.detect_charset(body)
|
40
|
+
|
41
|
+
super(uri, response, body, code)
|
42
|
+
@mech ||= mech
|
43
|
+
|
44
|
+
@encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i
|
45
|
+
|
46
|
+
raise Mechanize::ContentTypeError.new(response['content-type']) unless
|
47
|
+
response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
|
48
|
+
@parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
|
49
|
+
end
|
50
|
+
|
51
|
+
def title
|
52
|
+
@title ||= if parser && search('title').inner_text.length > 0
|
53
|
+
search('title').inner_text
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def encoding=(encoding)
|
58
|
+
@encoding = encoding
|
59
|
+
|
60
|
+
if @parser
|
61
|
+
parser_encoding = @parser.encoding
|
62
|
+
if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
|
63
|
+
# lazy reinitialize the parser with the new encoding
|
64
|
+
@parser = nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
encoding
|
69
|
+
end
|
70
|
+
|
71
|
+
def encoding
|
72
|
+
parser.respond_to?(:encoding) ? parser.encoding : nil
|
73
|
+
end
|
74
|
+
|
75
|
+
def parser
|
76
|
+
return @parser if @parser
|
77
|
+
|
78
|
+
if body && response
|
79
|
+
if mech.html_parser == Nokogiri::HTML
|
80
|
+
@parser = mech.html_parser.parse(html_body, nil, @encoding)
|
81
|
+
else
|
82
|
+
@parser = mech.html_parser.parse(html_body)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
@parser
|
87
|
+
end
|
88
|
+
alias :root :parser
|
89
|
+
|
90
|
+
# Get the content type
|
91
|
+
def content_type
|
92
|
+
response['content-type']
|
93
|
+
end
|
94
|
+
|
95
|
+
# Search through the page like HPricot
|
96
|
+
def_delegator :parser, :search, :search
|
97
|
+
def_delegator :parser, :/, :/
|
98
|
+
def_delegator :parser, :at, :at
|
99
|
+
|
100
|
+
# Find a form matching +criteria+.
|
101
|
+
# Example:
|
102
|
+
# page.form_with(:action => '/post/login.php') do |f|
|
103
|
+
# ...
|
104
|
+
# end
|
105
|
+
[:form, :link, :base, :frame, :iframe].each do |type|
|
106
|
+
eval(<<-eomethod)
|
107
|
+
def #{type}s_with(criteria)
|
108
|
+
criteria = {:name => criteria} if String === criteria
|
109
|
+
f = #{type}s.find_all do |thing|
|
110
|
+
criteria.all? { |k,v| v === thing.send(k) }
|
111
|
+
end
|
112
|
+
yield f if block_given?
|
113
|
+
f
|
114
|
+
end
|
115
|
+
|
116
|
+
def #{type}_with(criteria)
|
117
|
+
f = #{type}s_with(criteria).first
|
118
|
+
yield f if block_given?
|
119
|
+
f
|
120
|
+
end
|
121
|
+
alias :#{type} :#{type}_with
|
122
|
+
eomethod
|
123
|
+
end
|
124
|
+
|
125
|
+
def links
|
126
|
+
@links ||= %w{ a area }.map do |tag|
|
127
|
+
search(tag).map do |node|
|
128
|
+
Link.new(node, @mech, self)
|
129
|
+
end
|
130
|
+
end.flatten
|
131
|
+
end
|
132
|
+
|
133
|
+
def forms
|
134
|
+
@forms ||= search('form').map do |html_form|
|
135
|
+
form = Form.new(html_form, @mech, self)
|
136
|
+
form.action ||= @uri.to_s
|
137
|
+
form
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def meta
|
142
|
+
@meta ||= search('meta').map do |node|
|
143
|
+
next unless node['http-equiv'] && node['content']
|
144
|
+
(equiv, content) = node['http-equiv'], node['content']
|
145
|
+
if equiv && equiv.downcase == 'refresh'
|
146
|
+
Meta.parse(content, uri) do |delay, href|
|
147
|
+
node['delay'] = delay
|
148
|
+
node['href'] = href
|
149
|
+
Meta.new(node, @mech, self)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end.compact
|
153
|
+
end
|
154
|
+
|
155
|
+
def bases
|
156
|
+
@bases ||=
|
157
|
+
search('base').map { |node| Base.new(node, @mech, self) }
|
158
|
+
end
|
159
|
+
|
160
|
+
def frames
|
161
|
+
@frames ||=
|
162
|
+
search('frame').map { |node| Frame.new(node, @mech, self) }
|
163
|
+
end
|
164
|
+
|
165
|
+
def iframes
|
166
|
+
@iframes ||=
|
167
|
+
search('iframe').map { |node| Frame.new(node, @mech, self) }
|
168
|
+
end
|
169
|
+
|
170
|
+
private
|
171
|
+
|
172
|
+
def html_body
|
173
|
+
if body
|
174
|
+
body.length > 0 ? body : '<html></html>'
|
175
|
+
else
|
176
|
+
''
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Page < WWW::Mechanize::File
|
4
|
+
# This class encapsulates a Base tag. Mechanize treats base tags just
|
5
|
+
# like 'a' tags. Base objects will contain links, but most likely will
|
6
|
+
# have no text.
|
7
|
+
class Base < Link; end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Page < WWW::Mechanize::File
|
4
|
+
# This class encapsulates a 'frame' tag. Frame objects can be treated
|
5
|
+
# just like Link objects. They contain src, the link they refer to,
|
6
|
+
# name, the name of the frame. 'src' and 'name' are aliased to 'href'
|
7
|
+
# and 'text' respectively so that a Frame object can be treated just
|
8
|
+
# like a Link.
|
9
|
+
class Frame < Link
|
10
|
+
alias :src :href
|
11
|
+
alias :name :text
|
12
|
+
|
13
|
+
def initialize(node, mech, referer)
|
14
|
+
super(node, mech, referer)
|
15
|
+
@node = node
|
16
|
+
@text = node['name']
|
17
|
+
@href = node['src']
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Page < WWW::Mechanize::File
|
4
|
+
# This class encapsulates links. It contains the text and the URI for
|
5
|
+
# 'a' tags parsed out of an HTML page. If the link contains an image,
|
6
|
+
# the alt text will be used for that image.
|
7
|
+
#
|
8
|
+
# For example, the text for the following links with both be 'Hello World':
|
9
|
+
#
|
10
|
+
# <a href="http://rubyforge.org">Hello World</a>
|
11
|
+
# <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
|
12
|
+
class Link
|
13
|
+
attr_reader :node
|
14
|
+
attr_reader :href
|
15
|
+
attr_reader :text
|
16
|
+
attr_reader :attributes
|
17
|
+
attr_reader :page
|
18
|
+
alias :to_s :text
|
19
|
+
alias :referer :page
|
20
|
+
|
21
|
+
def initialize(node, mech, page)
|
22
|
+
@node = node
|
23
|
+
@href = node['href']
|
24
|
+
@text = node.inner_text
|
25
|
+
@page = page
|
26
|
+
@mech = mech
|
27
|
+
@attributes = node
|
28
|
+
|
29
|
+
# If there is no text, try to find an image and use it's alt text
|
30
|
+
if (@text.nil? || @text.length == 0) && node.search('img').length > 0
|
31
|
+
@text = ''
|
32
|
+
node.search('img').each do |e|
|
33
|
+
@text << ( e['alt'] || '')
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
def uri
|
40
|
+
@href && URI.parse(@href)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Click on this link
|
44
|
+
def click
|
45
|
+
@mech.click self
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module WWW
|
2
|
+
class Mechanize
|
3
|
+
class Page < WWW::Mechanize::File
|
4
|
+
# This class encapsulates a Meta tag. Mechanize treats meta tags just
|
5
|
+
# like 'a' tags. Meta objects will contain links, but most likely will
|
6
|
+
# have no text.
|
7
|
+
class Meta < Link
|
8
|
+
|
9
|
+
# Matches the content attribute of a meta tag. After the match:
|
10
|
+
#
|
11
|
+
# $1:: delay
|
12
|
+
# $3:: url
|
13
|
+
#
|
14
|
+
CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
|
15
|
+
|
16
|
+
class << self
|
17
|
+
# Parses the delay and url from the content attribute of a meta tag.
|
18
|
+
# Parse requires the uri of the current page to infer a url when no
|
19
|
+
# url is specified. If a block is given, the parsed delay and url
|
20
|
+
# will be passed to it for further processing.
|
21
|
+
#
|
22
|
+
# Returns nil if the delay and url cannot be parsed.
|
23
|
+
#
|
24
|
+
# # <meta http-equiv="refresh" content="5;url=http://example.com/" />
|
25
|
+
# uri = URI.parse('http://current.com/')
|
26
|
+
#
|
27
|
+
# Meta.parse("5;url=http://example.com/", uri) # => ['5', 'http://example.com/']
|
28
|
+
# Meta.parse("5;url=", uri) # => ['5', 'http://current.com/']
|
29
|
+
# Meta.parse("5", uri) # => ['5', 'http://current.com/']
|
30
|
+
# Meta.parse("invalid content", uri) # => nil
|
31
|
+
#
|
32
|
+
def parse(content, uri)
|
33
|
+
if content && content =~ CONTENT_REGEXP
|
34
|
+
delay, url = $1, $3
|
35
|
+
|
36
|
+
url = case url
|
37
|
+
when nil, "" then uri.to_s
|
38
|
+
when /^http/i then url
|
39
|
+
else "http://#{uri.host}#{url}"
|
40
|
+
end
|
41
|
+
|
42
|
+
block_given? ? yield(delay, url) : [delay, url]
|
43
|
+
else
|
44
|
+
nil
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'www/mechanize/file'
|
2
|
+
require 'www/mechanize/file_saver'
|
3
|
+
require 'www/mechanize/page'
|
4
|
+
|
5
|
+
module WWW
|
6
|
+
class Mechanize
|
7
|
+
# = Synopsis
|
8
|
+
# This class is used to register and maintain pluggable parsers for
|
9
|
+
# Mechanize to use.
|
10
|
+
#
|
11
|
+
# A Pluggable Parser is a parser that Mechanize uses for any particular
|
12
|
+
# content type. Mechanize will ask PluggableParser for the class it
|
13
|
+
# should initialize given any content type. This class allows users to
|
14
|
+
# register their own pluggable parsers, or modify existing pluggable
|
15
|
+
# parsers.
|
16
|
+
#
|
17
|
+
# PluggableParser returns a WWW::Mechanize::File object for content types
|
18
|
+
# that it does not know how to handle. WWW::Mechanize::File provides
|
19
|
+
# basic functionality for any content type, so it is a good class to
|
20
|
+
# extend when building your own parsers.
|
21
|
+
# == Example
|
22
|
+
# To create your own parser, just create a class that takes four
|
23
|
+
# parameters in the constructor. Here is an example of registering
|
24
|
+
# a pluggable parser that handles CSV files:
|
25
|
+
# class CSVParser < WWW::Mechanize::File
|
26
|
+
# attr_reader :csv
|
27
|
+
# def initialize(uri=nil, response=nil, body=nil, code=nil)
|
28
|
+
# super(uri, response, body, code)
|
29
|
+
# @csv = CSV.parse(body)
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
# agent = WWW::Mechanize.new
|
33
|
+
# agent.pluggable_parser.csv = CSVParser
|
34
|
+
# agent.get('http://example.com/test.csv') # => CSVParser
|
35
|
+
# Now any page that returns the content type of 'text/csv' will initialize
|
36
|
+
# a CSVParser and return that object to the caller.
|
37
|
+
#
|
38
|
+
# To register a pluggable parser for a content type that pluggable parser
|
39
|
+
# does not know about, just use the hash syntax:
|
40
|
+
# agent.pluggable_parser['text/something'] = SomeClass
|
41
|
+
#
|
42
|
+
# To set the default parser, just use the 'defaut' method:
|
43
|
+
# agent.pluggable_parser.default = SomeClass
|
44
|
+
# Now all unknown content types will be instances of SomeClass.
|
45
|
+
class PluggableParser
|
46
|
+
CONTENT_TYPES = {
|
47
|
+
:html => 'text/html',
|
48
|
+
:wap => 'application/vnd.wap.xhtml+xml',
|
49
|
+
:xhtml => 'application/xhtml+xml',
|
50
|
+
:pdf => 'application/pdf',
|
51
|
+
:csv => 'text/csv',
|
52
|
+
:xml => 'text/xml',
|
53
|
+
}
|
54
|
+
|
55
|
+
attr_accessor :default
|
56
|
+
|
57
|
+
def initialize
|
58
|
+
@parsers = { CONTENT_TYPES[:html] => Page,
|
59
|
+
CONTENT_TYPES[:xhtml] => Page,
|
60
|
+
CONTENT_TYPES[:wap] => Page,
|
61
|
+
}
|
62
|
+
@default = File
|
63
|
+
end
|
64
|
+
|
65
|
+
def parser(content_type)
|
66
|
+
content_type.nil? ? default : @parsers[content_type] || default
|
67
|
+
end
|
68
|
+
|
69
|
+
def register_parser(content_type, klass)
|
70
|
+
@parsers[content_type] = klass
|
71
|
+
end
|
72
|
+
|
73
|
+
def html=(klass)
|
74
|
+
register_parser(CONTENT_TYPES[:html], klass)
|
75
|
+
register_parser(CONTENT_TYPES[:xhtml], klass)
|
76
|
+
end
|
77
|
+
|
78
|
+
def xhtml=(klass)
|
79
|
+
register_parser(CONTENT_TYPES[:xhtml], klass)
|
80
|
+
end
|
81
|
+
|
82
|
+
def pdf=(klass)
|
83
|
+
register_parser(CONTENT_TYPES[:pdf], klass)
|
84
|
+
end
|
85
|
+
|
86
|
+
def csv=(klass)
|
87
|
+
register_parser(CONTENT_TYPES[:csv], klass)
|
88
|
+
end
|
89
|
+
|
90
|
+
def xml=(klass)
|
91
|
+
register_parser(CONTENT_TYPES[:xml], klass)
|
92
|
+
end
|
93
|
+
|
94
|
+
def [](content_type)
|
95
|
+
@parsers[content_type]
|
96
|
+
end
|
97
|
+
|
98
|
+
def []=(content_type, klass)
|
99
|
+
@parsers[content_type] = klass
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|