aai10-mechanize 2.0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (176) hide show
  1. data/.autotest +6 -0
  2. data/.gitignore +9 -0
  3. data/CHANGELOG.rdoc +652 -0
  4. data/EXAMPLES.rdoc +187 -0
  5. data/FAQ.rdoc +11 -0
  6. data/GUIDE.rdoc +163 -0
  7. data/LICENSE.rdoc +20 -0
  8. data/Manifest.txt +172 -0
  9. data/README.rdoc +63 -0
  10. data/Rakefile +36 -0
  11. data/aai10-mechanize.gemspec +20 -0
  12. data/examples/flickr_upload.rb +22 -0
  13. data/examples/mech-dump.rb +5 -0
  14. data/examples/proxy_req.rb +7 -0
  15. data/examples/rubyforge.rb +20 -0
  16. data/examples/spider.rb +21 -0
  17. data/lib/mechanize.rb +664 -0
  18. data/lib/mechanize/content_type_error.rb +14 -0
  19. data/lib/mechanize/cookie.rb +116 -0
  20. data/lib/mechanize/cookie_jar.rb +202 -0
  21. data/lib/mechanize/element_matcher.rb +35 -0
  22. data/lib/mechanize/file.rb +80 -0
  23. data/lib/mechanize/file_connection.rb +17 -0
  24. data/lib/mechanize/file_request.rb +26 -0
  25. data/lib/mechanize/file_response.rb +74 -0
  26. data/lib/mechanize/file_saver.rb +37 -0
  27. data/lib/mechanize/form.rb +478 -0
  28. data/lib/mechanize/form/button.rb +9 -0
  29. data/lib/mechanize/form/check_box.rb +11 -0
  30. data/lib/mechanize/form/field.rb +44 -0
  31. data/lib/mechanize/form/file_upload.rb +23 -0
  32. data/lib/mechanize/form/image_button.rb +20 -0
  33. data/lib/mechanize/form/multi_select_list.rb +83 -0
  34. data/lib/mechanize/form/option.rb +49 -0
  35. data/lib/mechanize/form/radio_button.rb +48 -0
  36. data/lib/mechanize/form/select_list.rb +40 -0
  37. data/lib/mechanize/headers.rb +25 -0
  38. data/lib/mechanize/history.rb +83 -0
  39. data/lib/mechanize/http.rb +3 -0
  40. data/lib/mechanize/http/agent.rb +738 -0
  41. data/lib/mechanize/inspect.rb +88 -0
  42. data/lib/mechanize/monkey_patch.rb +37 -0
  43. data/lib/mechanize/page.rb +408 -0
  44. data/lib/mechanize/page/base.rb +8 -0
  45. data/lib/mechanize/page/frame.rb +27 -0
  46. data/lib/mechanize/page/image.rb +30 -0
  47. data/lib/mechanize/page/label.rb +20 -0
  48. data/lib/mechanize/page/link.rb +82 -0
  49. data/lib/mechanize/page/meta_refresh.rb +56 -0
  50. data/lib/mechanize/pluggable_parsers.rb +101 -0
  51. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  52. data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
  53. data/lib/mechanize/response_code_error.rb +22 -0
  54. data/lib/mechanize/response_read_error.rb +27 -0
  55. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  56. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  57. data/lib/mechanize/util.rb +113 -0
  58. data/test/data/htpasswd +1 -0
  59. data/test/data/server.crt +16 -0
  60. data/test/data/server.csr +12 -0
  61. data/test/data/server.key +15 -0
  62. data/test/data/server.pem +15 -0
  63. data/test/helper.rb +175 -0
  64. data/test/htdocs/alt_text.html +10 -0
  65. data/test/htdocs/bad_form_test.html +9 -0
  66. data/test/htdocs/button.jpg +0 -0
  67. data/test/htdocs/canonical_uri.html +9 -0
  68. data/test/htdocs/dir with spaces/foo.html +1 -0
  69. data/test/htdocs/empty_form.html +6 -0
  70. data/test/htdocs/file_upload.html +26 -0
  71. data/test/htdocs/find_link.html +41 -0
  72. data/test/htdocs/form_multi_select.html +16 -0
  73. data/test/htdocs/form_multival.html +37 -0
  74. data/test/htdocs/form_no_action.html +18 -0
  75. data/test/htdocs/form_no_input_name.html +16 -0
  76. data/test/htdocs/form_select.html +16 -0
  77. data/test/htdocs/form_select_all.html +16 -0
  78. data/test/htdocs/form_select_none.html +17 -0
  79. data/test/htdocs/form_select_noopts.html +10 -0
  80. data/test/htdocs/form_set_fields.html +14 -0
  81. data/test/htdocs/form_test.html +188 -0
  82. data/test/htdocs/frame_referer_test.html +10 -0
  83. data/test/htdocs/frame_test.html +30 -0
  84. data/test/htdocs/google.html +13 -0
  85. data/test/htdocs/iframe_test.html +16 -0
  86. data/test/htdocs/index.html +6 -0
  87. data/test/htdocs/link with space.html +5 -0
  88. data/test/htdocs/meta_cookie.html +11 -0
  89. data/test/htdocs/no_title_test.html +6 -0
  90. data/test/htdocs/nofollow.html +9 -0
  91. data/test/htdocs/noindex.html +9 -0
  92. data/test/htdocs/norobots.html +8 -0
  93. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  94. data/test/htdocs/rel_nofollow.html +8 -0
  95. data/test/htdocs/relative/tc_relative_links.html +21 -0
  96. data/test/htdocs/robots.html +8 -0
  97. data/test/htdocs/robots.txt +2 -0
  98. data/test/htdocs/tc_bad_charset.html +9 -0
  99. data/test/htdocs/tc_bad_links.html +5 -0
  100. data/test/htdocs/tc_base_images.html +10 -0
  101. data/test/htdocs/tc_base_link.html +8 -0
  102. data/test/htdocs/tc_blank_form.html +11 -0
  103. data/test/htdocs/tc_charset.html +6 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_field_precedence.html +11 -0
  107. data/test/htdocs/tc_follow_meta.html +8 -0
  108. data/test/htdocs/tc_form_action.html +48 -0
  109. data/test/htdocs/tc_images.html +8 -0
  110. data/test/htdocs/tc_links.html +18 -0
  111. data/test/htdocs/tc_meta_in_body.html +9 -0
  112. data/test/htdocs/tc_no_attributes.html +16 -0
  113. data/test/htdocs/tc_pretty_print.html +17 -0
  114. data/test/htdocs/tc_radiobuttons.html +17 -0
  115. data/test/htdocs/tc_referer.html +16 -0
  116. data/test/htdocs/tc_relative_links.html +19 -0
  117. data/test/htdocs/tc_textarea.html +23 -0
  118. data/test/htdocs/test_bad_encoding.html +52 -0
  119. data/test/htdocs/test_click.html +11 -0
  120. data/test/htdocs/unusual______.html +5 -0
  121. data/test/servlets.rb +402 -0
  122. data/test/ssl_server.rb +48 -0
  123. data/test/test_cookies.rb +129 -0
  124. data/test/test_form_action.rb +52 -0
  125. data/test/test_form_as_hash.rb +59 -0
  126. data/test/test_form_button.rb +46 -0
  127. data/test/test_frames.rb +34 -0
  128. data/test/test_headers.rb +33 -0
  129. data/test/test_history.rb +118 -0
  130. data/test/test_history_added.rb +16 -0
  131. data/test/test_html_unscape_forms.rb +46 -0
  132. data/test/test_if_modified_since.rb +20 -0
  133. data/test/test_images.rb +19 -0
  134. data/test/test_mechanize.rb +852 -0
  135. data/test/test_mechanize_cookie.rb +345 -0
  136. data/test/test_mechanize_cookie_jar.rb +433 -0
  137. data/test/test_mechanize_file.rb +53 -0
  138. data/test/test_mechanize_file_request.rb +19 -0
  139. data/test/test_mechanize_file_response.rb +21 -0
  140. data/test/test_mechanize_form.rb +576 -0
  141. data/test/test_mechanize_form_check_box.rb +37 -0
  142. data/test/test_mechanize_form_encoding.rb +120 -0
  143. data/test/test_mechanize_form_field.rb +21 -0
  144. data/test/test_mechanize_form_image_button.rb +12 -0
  145. data/test/test_mechanize_form_textarea.rb +51 -0
  146. data/test/test_mechanize_http_agent.rb +697 -0
  147. data/test/test_mechanize_link.rb +84 -0
  148. data/test/test_mechanize_page_encoding.rb +147 -0
  149. data/test/test_mechanize_page_link.rb +382 -0
  150. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  151. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  152. data/test/test_mechanize_subclass.rb +22 -0
  153. data/test/test_mechanize_util.rb +92 -0
  154. data/test/test_multi_select.rb +118 -0
  155. data/test/test_no_attributes.rb +13 -0
  156. data/test/test_option.rb +18 -0
  157. data/test/test_pluggable_parser.rb +136 -0
  158. data/test/test_post_form.rb +37 -0
  159. data/test/test_pretty_print.rb +22 -0
  160. data/test/test_radiobutton.rb +75 -0
  161. data/test/test_redirect_limit_reached.rb +39 -0
  162. data/test/test_redirect_ok.rb +25 -0
  163. data/test/test_referer.rb +81 -0
  164. data/test/test_relative_links.rb +40 -0
  165. data/test/test_request.rb +13 -0
  166. data/test/test_response_code.rb +53 -0
  167. data/test/test_robots.rb +72 -0
  168. data/test/test_save_file.rb +48 -0
  169. data/test/test_scheme.rb +48 -0
  170. data/test/test_select.rb +119 -0
  171. data/test/test_select_all.rb +15 -0
  172. data/test/test_select_none.rb +15 -0
  173. data/test/test_select_noopts.rb +18 -0
  174. data/test/test_set_fields.rb +44 -0
  175. data/test/test_ssl_server.rb +20 -0
  176. metadata +360 -0
@@ -0,0 +1,88 @@
1
+ require 'pp'
2
+
3
+ # :stopdoc:
4
+ class Mechanize
5
+ def pretty_print(q)
6
+ q.object_group(self) {
7
+ q.breakable
8
+ q.pp cookie_jar
9
+ q.breakable
10
+ q.pp current_page
11
+ }
12
+ end
13
+
14
+ class Page
15
+ def pretty_print(q)
16
+ q.object_group(self) {
17
+ q.breakable
18
+ q.group(1, '{url', '}') {q.breakable; q.pp uri }
19
+ q.breakable
20
+ q.group(1, '{meta_refresh', '}') {
21
+ meta_refresh.each { |link| q.breakable; q.pp link }
22
+ }
23
+ q.breakable
24
+ q.group(1, '{title', '}') { q.breakable; q.pp title }
25
+ q.breakable
26
+ q.group(1, '{iframes', '}') {
27
+ iframes.each { |link| q.breakable; q.pp link }
28
+ }
29
+ q.breakable
30
+ q.group(1, '{frames', '}') {
31
+ frames.each { |link| q.breakable; q.pp link }
32
+ }
33
+ q.breakable
34
+ q.group(1, '{links', '}') {
35
+ links.each { |link| q.breakable; q.pp link }
36
+ }
37
+ q.breakable
38
+ q.group(1, '{forms', '}') {
39
+ forms.each { |form| q.breakable; q.pp form }
40
+ }
41
+ }
42
+ end
43
+
44
+ class Link
45
+ def pretty_print(q)
46
+ q.object_group(self) {
47
+ q.breakable; q.pp text
48
+ q.breakable; q.pp href
49
+ }
50
+ end
51
+ end
52
+ end
53
+
54
+ class Form
55
+ def pretty_print(q)
56
+ q.object_group(self) {
57
+ q.breakable; q.group(1, '{name', '}') { q.breakable; q.pp name }
58
+ q.breakable; q.group(1, '{method', '}') { q.breakable; q.pp method }
59
+ q.breakable; q.group(1, '{action', '}') { q.breakable; q.pp action }
60
+ q.breakable; q.group(1, '{fields', '}') {
61
+ fields.each do |field|
62
+ q.breakable
63
+ q.pp field
64
+ end
65
+ }
66
+ q.breakable; q.group(1, '{radiobuttons', '}') {
67
+ radiobuttons.each { |b| q.breakable; q.pp b }
68
+ }
69
+ q.breakable; q.group(1, '{checkboxes', '}') {
70
+ checkboxes.each { |b| q.breakable; q.pp b }
71
+ }
72
+ q.breakable; q.group(1, '{file_uploads', '}') {
73
+ file_uploads.each { |b| q.breakable; q.pp b }
74
+ }
75
+ q.breakable; q.group(1, '{buttons', '}') {
76
+ buttons.each { |b| q.breakable; q.pp b }
77
+ }
78
+ }
79
+ end
80
+
81
+ class RadioButton
82
+ def pretty_print_instance_variables
83
+ [:@checked, :@name, :@value]
84
+ end
85
+ end
86
+ end
87
+ end
88
+ # :startdoc:
@@ -0,0 +1,37 @@
1
+ if RUBY_VERSION < '1.9' then
2
+ module Net
3
+ class HTTP
4
+ alias :old_keep_alive? :keep_alive?
5
+ def keep_alive?(req, res)
6
+ return false if /close/i =~ req['connection'].to_s
7
+ return false if @seems_1_0_server
8
+ return false if /close/i =~ res['connection'].to_s
9
+ return true if /keep-alive/i =~ res['connection'].to_s
10
+ return false if /close/i =~ res['proxy-connection'].to_s
11
+ return true if /keep-alive/i =~ res['proxy-connection'].to_s
12
+ (@curr_http_version == '1.1')
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ # Monkey patch for ruby 1.8.4
19
+ unless RUBY_VERSION > "1.8.4"
20
+ module Net # :nodoc:
21
+ class HTTPResponse # :nodoc:
22
+ CODE_TO_OBJ['500'] = HTTPInternalServerError
23
+ end
24
+ end
25
+ else
26
+ class Mechanize
27
+ class Form
28
+ alias :inspect :pretty_inspect
29
+ end
30
+ class Page
31
+ alias :inspect :pretty_inspect
32
+ class Link
33
+ alias :inspect :pretty_inspect
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,408 @@
1
+ ##
2
+ # This class encapsulates an HTML page. If Mechanize finds a content
3
+ # type of 'text/html', this class will be instantiated and returned.
4
+ #
5
+ # Example:
6
+ #
7
+ # require 'mechanize'
8
+ #
9
+ # agent = Mechanize.new
10
+ # agent.get('http://google.com/').class # => Mechanize::Page
11
+
12
+ class Mechanize::Page < Mechanize::File
13
+ extend Forwardable
14
+ extend Mechanize::ElementMatcher
15
+
16
+ attr_accessor :mech
17
+
18
+ ##
19
+ # Possible encodings for this page based on HTTP headers and meta elements
20
+
21
+ attr_reader :encodings
22
+
23
+ def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
24
+ raise Mechanize::ContentTypeError, response['content-type'] unless
25
+ response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
26
+
27
+ @meta_content_type = nil
28
+ @encoding = nil
29
+ @encodings = [nil]
30
+ raise 'no' if mech and not Mechanize === mech
31
+ @mech = mech
32
+
33
+ reset
34
+
35
+ @encodings << Mechanize::Util.detect_charset(body) if body
36
+
37
+ @encodings.concat self.class.response_header_charset(response)
38
+
39
+ if body
40
+ # Force the encoding to be 8BIT so we can perform regular expressions.
41
+ # We'll set it to the detected encoding later
42
+ body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding
43
+
44
+ @encodings.concat self.class.meta_charset body
45
+
46
+ meta_content_type = self.class.meta_content_type body
47
+ @meta_content_type = meta_content_type if meta_content_type
48
+ end
49
+
50
+ @encodings << mech.default_encoding if mech and mech.default_encoding
51
+
52
+ super uri, response, body, code
53
+ end
54
+
55
+ def title
56
+ @title ||=
57
+ if doc = parser
58
+ title = doc.search('title').inner_text
59
+ title.empty? ? nil : title
60
+ end
61
+ end
62
+
63
+ def response_header_charset
64
+ self.class.response_header_charset(response)
65
+ end
66
+
67
+ def meta_charset
68
+ self.class.meta_charset(body)
69
+ end
70
+
71
+ def detected_encoding
72
+ Mechanize::Util.detect_charset(body)
73
+ end
74
+
75
+ def encoding=(encoding)
76
+ reset
77
+
78
+ @encoding = encoding
79
+
80
+ if @parser
81
+ parser_encoding = @parser.encoding
82
+ if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
83
+ # lazy reinitialize the parser with the new encoding
84
+ @parser = nil
85
+ end
86
+ end
87
+
88
+ encoding
89
+ end
90
+
91
+ def encoding
92
+ parser.respond_to?(:encoding) ? parser.encoding : nil
93
+ end
94
+
95
+ # Return whether parser result has errors related to encoding or not.
96
+ # false indicates just parser has no encoding errors, not encoding is vaild.
97
+ def encoding_error?(parser=nil)
98
+ parser = self.parser unless parser
99
+ return false if parser.errors.empty?
100
+ parser.errors.any? do |error|
101
+ error.message =~ /(indicate\ encoding)|
102
+ (Invalid\ char)|
103
+ (input\ conversion\ failed)/x
104
+ end
105
+ end
106
+
107
+ def parser
108
+ return @parser if @parser
109
+ return nil unless @body
110
+
111
+ if @encoding then
112
+ @parser = @mech.html_parser.parse html_body, nil, @encoding
113
+ elsif mech.force_default_encoding then
114
+ @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
115
+ else
116
+ @encodings.reverse_each do |encoding|
117
+ @parser = @mech.html_parser.parse html_body, nil, encoding
118
+
119
+ break unless encoding_error? @parser
120
+ end
121
+ end
122
+
123
+ @parser
124
+ end
125
+
126
+ alias :root :parser
127
+
128
+ def reset
129
+ @bases = nil
130
+ @forms = nil
131
+ @frames = nil
132
+ @iframes = nil
133
+ @links = nil
134
+ @labels = nil
135
+ @labels_hash = nil
136
+ @meta_refresh = nil
137
+ @parser = nil
138
+ @title = nil
139
+ end
140
+
141
+ # Return the canonical URI for the page if there is a link tag
142
+ # with href="canonical".
143
+ def canonical_uri
144
+ link = at('link[@rel="canonical"][@href]')
145
+ return unless link
146
+ href = link['href']
147
+
148
+ URI href
149
+ rescue URI::InvalidURIError
150
+ URI Mechanize::Util.uri_escape href
151
+ end
152
+
153
+ # Get the content type
154
+ def content_type
155
+ @meta_content_type || response['content-type']
156
+ end
157
+
158
+ # Search through the page like HPricot
159
+ def_delegator :parser, :search, :search
160
+ def_delegator :parser, :/, :/
161
+ def_delegator :parser, :at, :at
162
+
163
+ ##
164
+ # :method: form_with(criteria)
165
+ #
166
+ # Find a single form matching +criteria+.
167
+ # Example:
168
+ # page.form_with(:action => '/post/login.php') do |f|
169
+ # ...
170
+ # end
171
+
172
+ ##
173
+ # :method: forms_with(criteria)
174
+ #
175
+ # Find all forms form matching +criteria+.
176
+ # Example:
177
+ # page.forms_with(:action => '/post/login.php').each do |f|
178
+ # ...
179
+ # end
180
+
181
+ elements_with :form
182
+
183
+ ##
184
+ # :method: link_with(criteria)
185
+ #
186
+ # Find a single link matching +criteria+.
187
+ # Example:
188
+ # page.link_with(:href => /foo/).click
189
+
190
+ ##
191
+ # :method: links_with(criteria)
192
+ #
193
+ # Find all links matching +criteria+.
194
+ # Example:
195
+ # page.links_with(:href => /foo/).each do |link|
196
+ # puts link.href
197
+ # end
198
+
199
+ elements_with :link
200
+
201
+ ##
202
+ # :method: base_with(criteria)
203
+ #
204
+ # Find a single base tag matching +criteria+.
205
+ # Example:
206
+ # page.base_with(:href => /foo/).click
207
+
208
+ ##
209
+ # :method: bases_with(criteria)
210
+ #
211
+ # Find all base tags matching +criteria+.
212
+ # Example:
213
+ # page.bases_with(:href => /foo/).each do |base|
214
+ # puts base.href
215
+ # end
216
+
217
+ elements_with :base
218
+
219
+ ##
220
+ # :method: frame_with(criteria)
221
+ #
222
+ # Find a single frame tag matching +criteria+.
223
+ # Example:
224
+ # page.frame_with(:src => /foo/).click
225
+
226
+ ##
227
+ # :method: frames_with(criteria)
228
+ #
229
+ # Find all frame tags matching +criteria+.
230
+ # Example:
231
+ # page.frames_with(:src => /foo/).each do |frame|
232
+ # p frame.src
233
+ # end
234
+
235
+ elements_with :frame
236
+
237
+ ##
238
+ # :method: iframe_with(criteria)
239
+ #
240
+ # Find a single iframe tag matching +criteria+.
241
+ # Example:
242
+ # page.iframe_with(:src => /foo/).click
243
+
244
+ ##
245
+ # :method: iframes_with(criteria)
246
+ #
247
+ # Find all iframe tags matching +criteria+.
248
+ # Example:
249
+ # page.iframes_with(:src => /foo/).each do |iframe|
250
+ # p iframe.src
251
+ # end
252
+
253
+ elements_with :iframe
254
+
255
+ ##
256
+ # Return a list of all link and area tags
257
+ def links
258
+ @links ||= %w{ a area }.map do |tag|
259
+ search(tag).map do |node|
260
+ Link.new(node, @mech, self)
261
+ end
262
+ end.flatten
263
+ end
264
+
265
+ ##
266
+ # Return a list of all form tags
267
+ def forms
268
+ @forms ||= search('form').map do |html_form|
269
+ form = Mechanize::Form.new(html_form, @mech, self)
270
+ form.action ||= @uri.to_s
271
+ form
272
+ end
273
+ end
274
+
275
+ ##
276
+ # Return a list of all meta refresh elements
277
+
278
+ def meta_refresh
279
+ query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'
280
+
281
+ @meta_refresh ||= search(query).map do |node|
282
+ MetaRefresh.from_node node, self, uri
283
+ end.compact
284
+ end
285
+
286
+ ##
287
+ # Return a list of all base tags
288
+ def bases
289
+ @bases ||=
290
+ search('base').map { |node| Base.new(node, @mech, self) }
291
+ end
292
+
293
+ ##
294
+ # Return a list of all frame tags
295
+ def frames
296
+ @frames ||=
297
+ search('frame').map { |node| Frame.new(node, @mech, self) }
298
+ end
299
+
300
+ ##
301
+ # Return a list of all iframe tags
302
+ def iframes
303
+ @iframes ||=
304
+ search('iframe').map { |node| Frame.new(node, @mech, self) }
305
+ end
306
+
307
+ ##
308
+ # Return a list of all img tags
309
+ def images
310
+ @images ||=
311
+ search('img').map { |node| Image.new(node, self) }
312
+ end
313
+
314
+ def image_urls
315
+ @image_urls ||= images.map(&:url).uniq
316
+ end
317
+
318
+ ##
319
+ # Return a list of all label tags
320
+ def labels
321
+ @labels ||=
322
+ search('label').map { |node| Label.new(node, self) }
323
+ end
324
+
325
+ def labels_hash
326
+ unless @labels_hash
327
+ hash = {}
328
+ labels.each do |label|
329
+ hash[label.node['for']] = label if label.for
330
+ end
331
+ @labels_hash = hash
332
+ end
333
+ return @labels_hash
334
+ end
335
+
336
+ def self.charset content_type
337
+ charset = content_type[/charset=([^; ]+)/i, 1]
338
+ return nil if charset == 'none'
339
+ charset
340
+ end
341
+
342
+ def self.response_header_charset response
343
+ charsets = []
344
+ response.each do |header, value|
345
+ next unless value =~ /charset/i
346
+ charsets << charset(value)
347
+ end
348
+ charsets
349
+ end
350
+
351
+ ##
352
+ # Retrieves all charsets from +meta+ tags in +body+
353
+
354
+ def self.meta_charset body
355
+ # HACK use .map
356
+ body.scan(/<meta .*?>/i).map do |meta|
357
+ if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then
358
+ $2
359
+ elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
360
+ meta =~ /content\s*=\s*(["'])?(.*?)\1/i
361
+
362
+ m_charset = charset $2
363
+
364
+ m_charset if m_charset
365
+ end
366
+ end.compact
367
+ end
368
+
369
+ ##
370
+ # Retrieves the last <tt>content-type</tt> set by a +meta+ tag in +body+
371
+
372
+ def self.meta_content_type body
373
+ body.scan(/<meta .*?>/i).reverse.map do |meta|
374
+ if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
375
+ meta =~ /content=(["'])?(.*?)\1/i
376
+
377
+ return $2
378
+ end
379
+ end
380
+
381
+ nil
382
+ end
383
+
384
+ private
385
+
386
+ def html_body
387
+ if @body
388
+ @body.empty? ? '<html></html>' : @body
389
+ else
390
+ ''
391
+ end
392
+ end
393
+
394
+ def self.charset_from_content_type content_type
395
+ charset = content_type[/charset=([^; ]+)/i, 1]
396
+ return nil if charset == 'none'
397
+ charset
398
+ end
399
+ end
400
+
401
+ require 'mechanize/headers'
402
+ require 'mechanize/page/image'
403
+ require 'mechanize/page/label'
404
+ require 'mechanize/page/link'
405
+ require 'mechanize/page/base'
406
+ require 'mechanize/page/frame'
407
+ require 'mechanize/page/meta_refresh'
408
+