tenderlove-mechanize 0.9.3.20090623142847 → 0.9.3.20090911221705

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (165) hide show
  1. data/Manifest.txt +55 -48
  2. data/Rakefile +12 -22
  3. data/lib/mechanize.rb +618 -4
  4. data/lib/mechanize/chain.rb +33 -0
  5. data/lib/mechanize/chain/auth_headers.rb +78 -0
  6. data/lib/mechanize/chain/body_decoding_handler.rb +46 -0
  7. data/lib/mechanize/chain/connection_resolver.rb +76 -0
  8. data/lib/mechanize/chain/custom_headers.rb +21 -0
  9. data/lib/{www/mechanize → mechanize}/chain/handler.rb +1 -1
  10. data/lib/mechanize/chain/header_resolver.rb +51 -0
  11. data/lib/mechanize/chain/parameter_resolver.rb +22 -0
  12. data/lib/{www/mechanize → mechanize}/chain/post_connect_hook.rb +0 -0
  13. data/lib/mechanize/chain/pre_connect_hook.rb +20 -0
  14. data/lib/mechanize/chain/request_resolver.rb +30 -0
  15. data/lib/mechanize/chain/response_body_parser.rb +38 -0
  16. data/lib/mechanize/chain/response_header_handler.rb +48 -0
  17. data/lib/mechanize/chain/response_reader.rb +39 -0
  18. data/lib/mechanize/chain/ssl_resolver.rb +40 -0
  19. data/lib/mechanize/chain/uri_resolver.rb +75 -0
  20. data/lib/mechanize/content_type_error.rb +14 -0
  21. data/lib/mechanize/cookie.rb +70 -0
  22. data/lib/mechanize/cookie_jar.rb +188 -0
  23. data/lib/mechanize/file.rb +71 -0
  24. data/lib/mechanize/file_response.rb +60 -0
  25. data/lib/mechanize/file_saver.rb +37 -0
  26. data/lib/mechanize/form.rb +378 -0
  27. data/lib/mechanize/form/button.rb +9 -0
  28. data/lib/mechanize/form/check_box.rb +11 -0
  29. data/lib/mechanize/form/field.rb +30 -0
  30. data/lib/mechanize/form/file_upload.rb +22 -0
  31. data/lib/mechanize/form/image_button.rb +21 -0
  32. data/lib/mechanize/form/multi_select_list.rb +67 -0
  33. data/lib/mechanize/form/option.rb +49 -0
  34. data/lib/mechanize/form/radio_button.rb +49 -0
  35. data/lib/mechanize/form/select_list.rb +43 -0
  36. data/lib/mechanize/headers.rb +11 -0
  37. data/lib/mechanize/history.rb +65 -0
  38. data/lib/mechanize/inspect.rb +88 -0
  39. data/lib/{www/mechanize → mechanize}/monkey_patch.rb +4 -6
  40. data/lib/mechanize/page.rb +206 -0
  41. data/lib/mechanize/page/base.rb +8 -0
  42. data/lib/mechanize/page/frame.rb +20 -0
  43. data/lib/mechanize/page/image.rb +26 -0
  44. data/lib/mechanize/page/label.rb +20 -0
  45. data/lib/mechanize/page/link.rb +48 -0
  46. data/lib/mechanize/page/meta.rb +50 -0
  47. data/lib/mechanize/pluggable_parsers.rb +101 -0
  48. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  49. data/lib/mechanize/redirect_not_get_or_head_error.rb +18 -0
  50. data/lib/mechanize/response_code_error.rb +22 -0
  51. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  52. data/lib/mechanize/util.rb +67 -0
  53. data/mechanize.gemspec +8 -8
  54. data/test/chain/test_argument_validator.rb +2 -2
  55. data/test/chain/test_auth_headers.rb +2 -2
  56. data/test/chain/test_custom_headers.rb +2 -2
  57. data/test/chain/test_header_resolver.rb +3 -3
  58. data/test/chain/test_parameter_resolver.rb +4 -4
  59. data/test/chain/test_request_resolver.rb +4 -4
  60. data/test/chain/test_response_reader.rb +3 -3
  61. data/test/helper.rb +1 -1
  62. data/test/htdocs/tc_bad_charset.html +9 -0
  63. data/test/htdocs/tc_charset.html +6 -0
  64. data/test/htdocs/test_bad_encoding.html +52 -0
  65. data/test/test_authenticate.rb +3 -3
  66. data/test/test_bad_links.rb +1 -1
  67. data/test/test_blank_form.rb +1 -1
  68. data/test/test_checkboxes.rb +1 -1
  69. data/test/test_content_type.rb +2 -2
  70. data/test/test_cookie_class.rb +12 -12
  71. data/test/test_cookie_jar.rb +13 -13
  72. data/test/test_cookies.rb +1 -1
  73. data/test/test_encoded_links.rb +1 -1
  74. data/test/test_errors.rb +2 -2
  75. data/test/test_follow_meta.rb +3 -3
  76. data/test/test_form_action.rb +1 -1
  77. data/test/test_form_as_hash.rb +1 -1
  78. data/test/test_form_button.rb +2 -2
  79. data/test/test_form_no_inputname.rb +1 -1
  80. data/test/test_forms.rb +1 -1
  81. data/test/test_frames.rb +1 -1
  82. data/test/test_get_headers.rb +1 -1
  83. data/test/test_gzipping.rb +2 -2
  84. data/test/test_hash_api.rb +1 -1
  85. data/test/test_history.rb +7 -7
  86. data/test/test_history_added.rb +1 -1
  87. data/test/test_html_unscape_forms.rb +7 -7
  88. data/test/test_if_modified_since.rb +1 -1
  89. data/test/test_keep_alive.rb +1 -1
  90. data/test/test_links.rb +2 -2
  91. data/test/test_mech.rb +2 -2
  92. data/test/test_mechanize_file.rb +7 -7
  93. data/test/test_meta.rb +2 -2
  94. data/test/test_multi_select.rb +1 -1
  95. data/test/test_no_attributes.rb +1 -1
  96. data/test/test_option.rb +1 -1
  97. data/test/test_page.rb +3 -3
  98. data/test/test_pluggable_parser.rb +14 -14
  99. data/test/test_post_form.rb +1 -1
  100. data/test/test_pretty_print.rb +2 -2
  101. data/test/test_radiobutton.rb +1 -1
  102. data/test/test_redirect_limit_reached.rb +1 -3
  103. data/test/test_redirect_verb_handling.rb +1 -3
  104. data/test/test_referer.rb +1 -1
  105. data/test/test_relative_links.rb +1 -1
  106. data/test/test_request.rb +1 -1
  107. data/test/test_response_code.rb +3 -3
  108. data/test/test_save_file.rb +3 -3
  109. data/test/test_scheme.rb +3 -3
  110. data/test/test_select.rb +2 -2
  111. data/test/test_select_all.rb +1 -1
  112. data/test/test_select_none.rb +1 -1
  113. data/test/test_select_noopts.rb +1 -1
  114. data/test/test_set_fields.rb +1 -1
  115. data/test/test_ssl_server.rb +1 -1
  116. data/test/test_subclass.rb +1 -1
  117. data/test/test_textarea.rb +1 -1
  118. data/test/test_upload.rb +1 -1
  119. data/test/test_verbs.rb +1 -1
  120. metadata +61 -56
  121. data/lib/www/mechanize.rb +0 -619
  122. data/lib/www/mechanize/chain.rb +0 -34
  123. data/lib/www/mechanize/chain/auth_headers.rb +0 -80
  124. data/lib/www/mechanize/chain/body_decoding_handler.rb +0 -48
  125. data/lib/www/mechanize/chain/connection_resolver.rb +0 -78
  126. data/lib/www/mechanize/chain/custom_headers.rb +0 -23
  127. data/lib/www/mechanize/chain/header_resolver.rb +0 -53
  128. data/lib/www/mechanize/chain/parameter_resolver.rb +0 -24
  129. data/lib/www/mechanize/chain/pre_connect_hook.rb +0 -22
  130. data/lib/www/mechanize/chain/request_resolver.rb +0 -32
  131. data/lib/www/mechanize/chain/response_body_parser.rb +0 -40
  132. data/lib/www/mechanize/chain/response_header_handler.rb +0 -50
  133. data/lib/www/mechanize/chain/response_reader.rb +0 -41
  134. data/lib/www/mechanize/chain/ssl_resolver.rb +0 -42
  135. data/lib/www/mechanize/chain/uri_resolver.rb +0 -77
  136. data/lib/www/mechanize/content_type_error.rb +0 -16
  137. data/lib/www/mechanize/cookie.rb +0 -72
  138. data/lib/www/mechanize/cookie_jar.rb +0 -191
  139. data/lib/www/mechanize/file.rb +0 -73
  140. data/lib/www/mechanize/file_response.rb +0 -62
  141. data/lib/www/mechanize/file_saver.rb +0 -39
  142. data/lib/www/mechanize/form.rb +0 -360
  143. data/lib/www/mechanize/form/button.rb +0 -8
  144. data/lib/www/mechanize/form/check_box.rb +0 -13
  145. data/lib/www/mechanize/form/field.rb +0 -28
  146. data/lib/www/mechanize/form/file_upload.rb +0 -24
  147. data/lib/www/mechanize/form/image_button.rb +0 -23
  148. data/lib/www/mechanize/form/multi_select_list.rb +0 -69
  149. data/lib/www/mechanize/form/option.rb +0 -51
  150. data/lib/www/mechanize/form/radio_button.rb +0 -38
  151. data/lib/www/mechanize/form/select_list.rb +0 -45
  152. data/lib/www/mechanize/headers.rb +0 -12
  153. data/lib/www/mechanize/history.rb +0 -67
  154. data/lib/www/mechanize/inspect.rb +0 -90
  155. data/lib/www/mechanize/page.rb +0 -181
  156. data/lib/www/mechanize/page/base.rb +0 -10
  157. data/lib/www/mechanize/page/frame.rb +0 -22
  158. data/lib/www/mechanize/page/link.rb +0 -50
  159. data/lib/www/mechanize/page/meta.rb +0 -51
  160. data/lib/www/mechanize/pluggable_parsers.rb +0 -103
  161. data/lib/www/mechanize/redirect_limit_reached_error.rb +0 -18
  162. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +0 -20
  163. data/lib/www/mechanize/response_code_error.rb +0 -25
  164. data/lib/www/mechanize/unsupported_scheme_error.rb +0 -10
  165. data/lib/www/mechanize/util.rb +0 -76
@@ -15,13 +15,12 @@ end
15
15
 
16
16
  # Monkey patch for ruby 1.8.4
17
17
  unless RUBY_VERSION > "1.8.4"
18
- module Net # :nodoc:
19
- class HTTPResponse # :nodoc:
20
- CODE_TO_OBJ['500'] = HTTPInternalServerError
18
+ module Net # :nodoc:
19
+ class HTTPResponse # :nodoc:
20
+ CODE_TO_OBJ['500'] = HTTPInternalServerError
21
+ end
21
22
  end
22
- end
23
23
  else
24
- module WWW
25
24
  class Mechanize
26
25
  class Form
27
26
  alias :inspect :pretty_inspect
@@ -34,4 +33,3 @@ module WWW
34
33
  end
35
34
  end
36
35
  end
37
- end
@@ -0,0 +1,206 @@
1
+ require 'mechanize/page/link'
2
+ require 'mechanize/page/meta'
3
+ require 'mechanize/page/base'
4
+ require 'mechanize/page/frame'
5
+ require 'mechanize/page/image'
6
+ require 'mechanize/page/label'
7
+ require 'mechanize/headers'
8
+
9
+ class Mechanize
10
+ # = Synopsis
11
+ # This class encapsulates an HTML page. If Mechanize finds a content
12
+ # type of 'text/html', this class will be instantiated and returned.
13
+ #
14
+ # == Example
15
+ # require 'rubygems'
16
+ # require 'mechanize'
17
+ #
18
+ # agent = Mechanize.new
19
+ # agent.get('http://google.com/').class #=> Mechanize::Page
20
+ #
21
+ class Page < Mechanize::File
22
+ extend Forwardable
23
+
24
+ attr_accessor :mech
25
+
26
+ def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
27
+ @encoding = nil
28
+
29
+ method = response.respond_to?(:each_header) ? :each_header : :each
30
+ response.send(method) do |header,v|
31
+ next unless v =~ /charset/i
32
+ encoding = v.split('=').last.strip
33
+ @encoding = encoding unless encoding == 'none'
34
+ end
35
+
36
+ # Force the encoding to be 8BIT so we can perform regular expressions.
37
+ # We'll set it to the detected encoding later
38
+ body.force_encoding('ASCII-8BIT') if defined?(Encoding) && body
39
+
40
+ @encoding ||= Util.detect_charset(body)
41
+
42
+ super(uri, response, body, code)
43
+ @mech ||= mech
44
+
45
+ @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i
46
+
47
+ raise Mechanize::ContentTypeError.new(response['content-type']) unless
48
+ response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
49
+ @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
50
+ end
51
+
52
+ def title
53
+ @title ||= if parser && search('title').inner_text.length > 0
54
+ search('title').inner_text
55
+ end
56
+ end
57
+
58
+ def encoding=(encoding)
59
+ @encoding = encoding
60
+
61
+ if @parser
62
+ parser_encoding = @parser.encoding
63
+ if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
64
+ # lazy reinitialize the parser with the new encoding
65
+ @parser = nil
66
+ end
67
+ end
68
+
69
+ encoding
70
+ end
71
+
72
+ def encoding
73
+ parser.respond_to?(:encoding) ? parser.encoding : nil
74
+ end
75
+
76
+ def parser
77
+ return @parser if @parser
78
+
79
+ if body && response
80
+ if mech.html_parser == Nokogiri::HTML
81
+ @parser = mech.html_parser.parse(html_body, nil, @encoding)
82
+ else
83
+ @parser = mech.html_parser.parse(html_body)
84
+ end
85
+ end
86
+
87
+ @parser
88
+ end
89
+ alias :root :parser
90
+
91
+ # Get the content type
92
+ def content_type
93
+ response['content-type']
94
+ end
95
+
96
+ # Search through the page like HPricot
97
+ def_delegator :parser, :search, :search
98
+ def_delegator :parser, :/, :/
99
+ def_delegator :parser, :at, :at
100
+
101
+ # Find a form matching +criteria+.
102
+ # Example:
103
+ # page.form_with(:action => '/post/login.php') do |f|
104
+ # ...
105
+ # end
106
+ [:form, :link, :base, :frame, :iframe].each do |type|
107
+ eval(<<-eomethod)
108
+ def #{type}s_with(criteria)
109
+ criteria = {:name => criteria} if String === criteria
110
+ f = #{type}s.find_all do |thing|
111
+ criteria.all? { |k,v| v === thing.send(k) }
112
+ end
113
+ yield f if block_given?
114
+ f
115
+ end
116
+
117
+ def #{type}_with(criteria)
118
+ f = #{type}s_with(criteria).first
119
+ yield f if block_given?
120
+ f
121
+ end
122
+ alias :#{type} :#{type}_with
123
+ eomethod
124
+ end
125
+
126
+ def links
127
+ @links ||= %w{ a area }.map do |tag|
128
+ search(tag).map do |node|
129
+ Link.new(node, @mech, self)
130
+ end
131
+ end.flatten
132
+ end
133
+
134
+ def forms
135
+ @forms ||= search('form').map do |html_form|
136
+ form = Form.new(html_form, @mech, self)
137
+ form.action ||= @uri.to_s
138
+ form
139
+ end
140
+ end
141
+
142
+ def meta
143
+ @meta ||= search('meta').map do |node|
144
+ next unless node['http-equiv'] && node['content']
145
+ (equiv, content) = node['http-equiv'], node['content']
146
+ if equiv && equiv.downcase == 'refresh'
147
+ Meta.parse(content, uri) do |delay, href|
148
+ node['delay'] = delay
149
+ node['href'] = href
150
+ Meta.new(node, @mech, self)
151
+ end
152
+ end
153
+ end.compact
154
+ end
155
+
156
+ def bases
157
+ @bases ||=
158
+ search('base').map { |node| Base.new(node, @mech, self) }
159
+ end
160
+
161
+ def frames
162
+ @frames ||=
163
+ search('frame').map { |node| Frame.new(node, @mech, self) }
164
+ end
165
+
166
+ def iframes
167
+ @iframes ||=
168
+ search('iframe').map { |node| Frame.new(node, @mech, self) }
169
+ end
170
+
171
+ def images
172
+ @images ||=
173
+ search('img').map { |node| Image.new(node, self) }
174
+ end
175
+
176
+ def image_urls
177
+ @image_urls ||= images.map(&:url).uniq
178
+ end
179
+
180
+ def labels
181
+ @labels ||=
182
+ search('label').map { |node| Label.new(node, self) }
183
+ end
184
+
185
+ def labels_hash
186
+ unless @labels_hash
187
+ hash = {}
188
+ labels.each do |label|
189
+ hash[label.node['for']] = label if label.for
190
+ end
191
+ @labels_hash = hash
192
+ end
193
+ return @labels_hash
194
+ end
195
+
196
+ private
197
+
198
+ def html_body
199
+ if body
200
+ body.length > 0 ? body : '<html></html>'
201
+ else
202
+ ''
203
+ end
204
+ end
205
+ end
206
+ end
@@ -0,0 +1,8 @@
1
+ class Mechanize
2
+ class Page < Mechanize::File
3
+ # This class encapsulates a Base tag. Mechanize treats base tags just
4
+ # like 'a' tags. Base objects will contain links, but most likely will
5
+ # have no text.
6
+ class Base < Link; end
7
+ end
8
+ end
@@ -0,0 +1,20 @@
1
+ class Mechanize
2
+ class Page < Mechanize::File
3
+ # This class encapsulates a 'frame' tag. Frame objects can be treated
4
+ # just like Link objects. They contain src, the link they refer to,
5
+ # name, the name of the frame. 'src' and 'name' are aliased to 'href'
6
+ # and 'text' respectively so that a Frame object can be treated just
7
+ # like a Link.
8
+ class Frame < Link
9
+ alias :src :href
10
+ alias :name :text
11
+
12
+ def initialize(node, mech, referer)
13
+ super(node, mech, referer)
14
+ @node = node
15
+ @text = node['name']
16
+ @href = node['src']
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ class Mechanize
2
+ class Page < Mechanize::File
3
+ class Image
4
+ attr_reader :node
5
+ attr_reader :page
6
+
7
+ def initialize(node, page)
8
+ @node = node
9
+ @page = page
10
+ end
11
+
12
+ def src
13
+ @node['src']
14
+ end
15
+
16
+ def url
17
+ case src
18
+ when %r{^https?://}
19
+ src
20
+ else
21
+ (page.uri + src).to_s
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,20 @@
1
+ class Mechanize
2
+ class Page < Mechanize::File
3
+ class Label
4
+ attr_reader :node
5
+ attr_reader :text
6
+ attr_reader :page
7
+ alias :to_s :text
8
+
9
+ def initialize(node, page)
10
+ @node = node
11
+ @text = node.inner_text
12
+ @page = page
13
+ end
14
+
15
+ def for
16
+ (id = @node['for']) && page.search("##{id}") || nil
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,48 @@
1
+ class Mechanize
2
+ class Page < Mechanize::File
3
+ # This class encapsulates links. It contains the text and the URI for
4
+ # 'a' tags parsed out of an HTML page. If the link contains an image,
5
+ # the alt text will be used for that image.
6
+ #
7
+ # For example, the text for the following links with both be 'Hello World':
8
+ #
9
+ # <a href="http://rubyforge.org">Hello World</a>
10
+ # <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
11
+ class Link
12
+ attr_reader :node
13
+ attr_reader :href
14
+ attr_reader :text
15
+ attr_reader :attributes
16
+ attr_reader :page
17
+ alias :to_s :text
18
+ alias :referer :page
19
+
20
+ def initialize(node, mech, page)
21
+ @node = node
22
+ @href = node['href']
23
+ @text = node.inner_text
24
+ @page = page
25
+ @mech = mech
26
+ @attributes = node
27
+
28
+ # If there is no text, try to find an image and use it's alt text
29
+ if (@text.nil? || @text.length == 0) && node.search('img').length > 0
30
+ @text = ''
31
+ node.search('img').each do |e|
32
+ @text << ( e['alt'] || '')
33
+ end
34
+ end
35
+
36
+ end
37
+
38
+ def uri
39
+ @href && URI.parse(@href)
40
+ end
41
+
42
+ # Click on this link
43
+ def click
44
+ @mech.click self
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,50 @@
1
+ class Mechanize
2
+ class Page < Mechanize::File
3
+ # This class encapsulates a Meta tag. Mechanize treats meta tags just
4
+ # like 'a' tags. Meta objects will contain links, but most likely will
5
+ # have no text.
6
+ class Meta < Link
7
+
8
+ # Matches the content attribute of a meta tag. After the match:
9
+ #
10
+ # $1:: delay
11
+ # $3:: url
12
+ #
13
+ CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
14
+
15
+ class << self
16
+ # Parses the delay and url from the content attribute of a meta tag.
17
+ # Parse requires the uri of the current page to infer a url when no
18
+ # url is specified. If a block is given, the parsed delay and url
19
+ # will be passed to it for further processing.
20
+ #
21
+ # Returns nil if the delay and url cannot be parsed.
22
+ #
23
+ # # <meta http-equiv="refresh" content="5;url=http://example.com/" />
24
+ # uri = URI.parse('http://current.com/')
25
+ #
26
+ # Meta.parse("5;url=http://example.com/", uri) # => ['5', 'http://example.com/']
27
+ # Meta.parse("5;url=", uri) # => ['5', 'http://current.com/']
28
+ # Meta.parse("5", uri) # => ['5', 'http://current.com/']
29
+ # Meta.parse("invalid content", uri) # => nil
30
+ #
31
+ def parse(content, uri)
32
+ if content && content =~ CONTENT_REGEXP
33
+ delay, url = $1, $3
34
+
35
+ url = case url
36
+ when nil, "" then uri.to_s
37
+ when /^http/i then url
38
+ else "http://#{uri.host}#{url}"
39
+ end
40
+
41
+ block_given? ? yield(delay, url) : [delay, url]
42
+ else
43
+ nil
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
@@ -0,0 +1,101 @@
1
+ require 'mechanize/file'
2
+ require 'mechanize/file_saver'
3
+ require 'mechanize/page'
4
+
5
+ class Mechanize
6
+ # = Synopsis
7
+ # This class is used to register and maintain pluggable parsers for
8
+ # Mechanize to use.
9
+ #
10
+ # A Pluggable Parser is a parser that Mechanize uses for any particular
11
+ # content type. Mechanize will ask PluggableParser for the class it
12
+ # should initialize given any content type. This class allows users to
13
+ # register their own pluggable parsers, or modify existing pluggable
14
+ # parsers.
15
+ #
16
+ # PluggableParser returns a Mechanize::File object for content types
17
+ # that it does not know how to handle. Mechanize::File provides
18
+ # basic functionality for any content type, so it is a good class to
19
+ # extend when building your own parsers.
20
+ # == Example
21
+ # To create your own parser, just create a class that takes four
22
+ # parameters in the constructor. Here is an example of registering
23
+ # a pluggable parser that handles CSV files:
24
+ # class CSVParser < Mechanize::File
25
+ # attr_reader :csv
26
+ # def initialize(uri=nil, response=nil, body=nil, code=nil)
27
+ # super(uri, response, body, code)
28
+ # @csv = CSV.parse(body)
29
+ # end
30
+ # end
31
+ # agent = Mechanize.new
32
+ # agent.pluggable_parser.csv = CSVParser
33
+ # agent.get('http://example.com/test.csv') # => CSVParser
34
+ # Now any page that returns the content type of 'text/csv' will initialize
35
+ # a CSVParser and return that object to the caller.
36
+ #
37
+ # To register a pluggable parser for a content type that pluggable parser
38
+ # does not know about, just use the hash syntax:
39
+ # agent.pluggable_parser['text/something'] = SomeClass
40
+ #
41
+ # To set the default parser, just use the 'defaut' method:
42
+ # agent.pluggable_parser.default = SomeClass
43
+ # Now all unknown content types will be instances of SomeClass.
44
+ class PluggableParser
45
+ CONTENT_TYPES = {
46
+ :html => 'text/html',
47
+ :wap => 'application/vnd.wap.xhtml+xml',
48
+ :xhtml => 'application/xhtml+xml',
49
+ :pdf => 'application/pdf',
50
+ :csv => 'text/csv',
51
+ :xml => 'text/xml',
52
+ }
53
+
54
+ attr_accessor :default
55
+
56
+ def initialize
57
+ @parsers = { CONTENT_TYPES[:html] => Page,
58
+ CONTENT_TYPES[:xhtml] => Page,
59
+ CONTENT_TYPES[:wap] => Page,
60
+ }
61
+ @default = File
62
+ end
63
+
64
+ def parser(content_type)
65
+ content_type.nil? ? default : @parsers[content_type] || default
66
+ end
67
+
68
+ def register_parser(content_type, klass)
69
+ @parsers[content_type] = klass
70
+ end
71
+
72
+ def html=(klass)
73
+ register_parser(CONTENT_TYPES[:html], klass)
74
+ register_parser(CONTENT_TYPES[:xhtml], klass)
75
+ end
76
+
77
+ def xhtml=(klass)
78
+ register_parser(CONTENT_TYPES[:xhtml], klass)
79
+ end
80
+
81
+ def pdf=(klass)
82
+ register_parser(CONTENT_TYPES[:pdf], klass)
83
+ end
84
+
85
+ def csv=(klass)
86
+ register_parser(CONTENT_TYPES[:csv], klass)
87
+ end
88
+
89
+ def xml=(klass)
90
+ register_parser(CONTENT_TYPES[:xml], klass)
91
+ end
92
+
93
+ def [](content_type)
94
+ @parsers[content_type]
95
+ end
96
+
97
+ def []=(content_type, klass)
98
+ @parsers[content_type] = klass
99
+ end
100
+ end
101
+ end