neocoin-mechanize 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. data/.autotest +6 -0
  2. data/.gemtest +0 -0
  3. data/CHANGELOG.rdoc +638 -0
  4. data/EXAMPLES.rdoc +187 -0
  5. data/FAQ.rdoc +11 -0
  6. data/GUIDE.rdoc +163 -0
  7. data/LICENSE.rdoc +20 -0
  8. data/Manifest.txt +172 -0
  9. data/README.rdoc +63 -0
  10. data/Rakefile +36 -0
  11. data/examples/flickr_upload.rb +22 -0
  12. data/examples/mech-dump.rb +5 -0
  13. data/examples/proxy_req.rb +7 -0
  14. data/examples/rubyforge.rb +20 -0
  15. data/examples/spider.rb +21 -0
  16. data/lib/mechanize.rb +662 -0
  17. data/lib/mechanize/content_type_error.rb +14 -0
  18. data/lib/mechanize/cookie.rb +85 -0
  19. data/lib/mechanize/cookie_jar.rb +241 -0
  20. data/lib/mechanize/element_matcher.rb +35 -0
  21. data/lib/mechanize/file.rb +80 -0
  22. data/lib/mechanize/file_connection.rb +17 -0
  23. data/lib/mechanize/file_request.rb +26 -0
  24. data/lib/mechanize/file_response.rb +74 -0
  25. data/lib/mechanize/file_saver.rb +37 -0
  26. data/lib/mechanize/form.rb +478 -0
  27. data/lib/mechanize/form/button.rb +9 -0
  28. data/lib/mechanize/form/check_box.rb +11 -0
  29. data/lib/mechanize/form/field.rb +44 -0
  30. data/lib/mechanize/form/file_upload.rb +23 -0
  31. data/lib/mechanize/form/image_button.rb +20 -0
  32. data/lib/mechanize/form/multi_select_list.rb +83 -0
  33. data/lib/mechanize/form/option.rb +49 -0
  34. data/lib/mechanize/form/radio_button.rb +48 -0
  35. data/lib/mechanize/form/select_list.rb +40 -0
  36. data/lib/mechanize/headers.rb +25 -0
  37. data/lib/mechanize/history.rb +83 -0
  38. data/lib/mechanize/http.rb +3 -0
  39. data/lib/mechanize/http/agent.rb +738 -0
  40. data/lib/mechanize/inspect.rb +88 -0
  41. data/lib/mechanize/monkey_patch.rb +37 -0
  42. data/lib/mechanize/page.rb +408 -0
  43. data/lib/mechanize/page/base.rb +8 -0
  44. data/lib/mechanize/page/frame.rb +27 -0
  45. data/lib/mechanize/page/image.rb +30 -0
  46. data/lib/mechanize/page/label.rb +20 -0
  47. data/lib/mechanize/page/link.rb +82 -0
  48. data/lib/mechanize/page/meta_refresh.rb +56 -0
  49. data/lib/mechanize/pluggable_parsers.rb +101 -0
  50. data/lib/mechanize/redirect_limit_reached_error.rb +16 -0
  51. data/lib/mechanize/redirect_not_get_or_head_error.rb +19 -0
  52. data/lib/mechanize/response_code_error.rb +22 -0
  53. data/lib/mechanize/response_read_error.rb +27 -0
  54. data/lib/mechanize/robots_disallowed_error.rb +29 -0
  55. data/lib/mechanize/unsupported_scheme_error.rb +8 -0
  56. data/lib/mechanize/util.rb +113 -0
  57. data/test/data/htpasswd +1 -0
  58. data/test/data/server.crt +16 -0
  59. data/test/data/server.csr +12 -0
  60. data/test/data/server.key +15 -0
  61. data/test/data/server.pem +15 -0
  62. data/test/helper.rb +175 -0
  63. data/test/htdocs/alt_text.html +10 -0
  64. data/test/htdocs/bad_form_test.html +9 -0
  65. data/test/htdocs/button.jpg +0 -0
  66. data/test/htdocs/canonical_uri.html +9 -0
  67. data/test/htdocs/dir with spaces/foo.html +1 -0
  68. data/test/htdocs/empty_form.html +6 -0
  69. data/test/htdocs/file_upload.html +26 -0
  70. data/test/htdocs/find_link.html +41 -0
  71. data/test/htdocs/form_multi_select.html +16 -0
  72. data/test/htdocs/form_multival.html +37 -0
  73. data/test/htdocs/form_no_action.html +18 -0
  74. data/test/htdocs/form_no_input_name.html +16 -0
  75. data/test/htdocs/form_select.html +16 -0
  76. data/test/htdocs/form_select_all.html +16 -0
  77. data/test/htdocs/form_select_none.html +17 -0
  78. data/test/htdocs/form_select_noopts.html +10 -0
  79. data/test/htdocs/form_set_fields.html +14 -0
  80. data/test/htdocs/form_test.html +188 -0
  81. data/test/htdocs/frame_referer_test.html +10 -0
  82. data/test/htdocs/frame_test.html +30 -0
  83. data/test/htdocs/google.html +13 -0
  84. data/test/htdocs/iframe_test.html +16 -0
  85. data/test/htdocs/index.html +6 -0
  86. data/test/htdocs/link with space.html +5 -0
  87. data/test/htdocs/meta_cookie.html +11 -0
  88. data/test/htdocs/no_title_test.html +6 -0
  89. data/test/htdocs/nofollow.html +9 -0
  90. data/test/htdocs/noindex.html +9 -0
  91. data/test/htdocs/norobots.html +8 -0
  92. data/test/htdocs/rails_3_encoding_hack_form_test.html +27 -0
  93. data/test/htdocs/rel_nofollow.html +8 -0
  94. data/test/htdocs/relative/tc_relative_links.html +21 -0
  95. data/test/htdocs/robots.html +8 -0
  96. data/test/htdocs/robots.txt +2 -0
  97. data/test/htdocs/tc_bad_charset.html +9 -0
  98. data/test/htdocs/tc_bad_links.html +5 -0
  99. data/test/htdocs/tc_base_images.html +10 -0
  100. data/test/htdocs/tc_base_link.html +8 -0
  101. data/test/htdocs/tc_blank_form.html +11 -0
  102. data/test/htdocs/tc_charset.html +6 -0
  103. data/test/htdocs/tc_checkboxes.html +19 -0
  104. data/test/htdocs/tc_encoded_links.html +5 -0
  105. data/test/htdocs/tc_field_precedence.html +11 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_images.html +8 -0
  109. data/test/htdocs/tc_links.html +18 -0
  110. data/test/htdocs/tc_meta_in_body.html +9 -0
  111. data/test/htdocs/tc_no_attributes.html +16 -0
  112. data/test/htdocs/tc_pretty_print.html +17 -0
  113. data/test/htdocs/tc_radiobuttons.html +17 -0
  114. data/test/htdocs/tc_referer.html +16 -0
  115. data/test/htdocs/tc_relative_links.html +19 -0
  116. data/test/htdocs/tc_textarea.html +23 -0
  117. data/test/htdocs/test_bad_encoding.html +52 -0
  118. data/test/htdocs/test_click.html +11 -0
  119. data/test/htdocs/unusual______.html +5 -0
  120. data/test/servlets.rb +402 -0
  121. data/test/ssl_server.rb +48 -0
  122. data/test/test_cookies.rb +129 -0
  123. data/test/test_form_action.rb +52 -0
  124. data/test/test_form_as_hash.rb +59 -0
  125. data/test/test_form_button.rb +46 -0
  126. data/test/test_frames.rb +34 -0
  127. data/test/test_headers.rb +33 -0
  128. data/test/test_history.rb +118 -0
  129. data/test/test_history_added.rb +16 -0
  130. data/test/test_html_unscape_forms.rb +46 -0
  131. data/test/test_if_modified_since.rb +20 -0
  132. data/test/test_images.rb +19 -0
  133. data/test/test_mechanize.rb +842 -0
  134. data/test/test_mechanize_cookie.rb +345 -0
  135. data/test/test_mechanize_cookie_jar.rb +401 -0
  136. data/test/test_mechanize_file.rb +53 -0
  137. data/test/test_mechanize_file_request.rb +19 -0
  138. data/test/test_mechanize_file_response.rb +21 -0
  139. data/test/test_mechanize_form.rb +576 -0
  140. data/test/test_mechanize_form_check_box.rb +37 -0
  141. data/test/test_mechanize_form_encoding.rb +120 -0
  142. data/test/test_mechanize_form_field.rb +21 -0
  143. data/test/test_mechanize_form_image_button.rb +12 -0
  144. data/test/test_mechanize_form_textarea.rb +51 -0
  145. data/test/test_mechanize_http_agent.rb +697 -0
  146. data/test/test_mechanize_link.rb +84 -0
  147. data/test/test_mechanize_page_encoding.rb +147 -0
  148. data/test/test_mechanize_page_link.rb +382 -0
  149. data/test/test_mechanize_page_meta_refresh.rb +115 -0
  150. data/test/test_mechanize_redirect_not_get_or_head_error.rb +18 -0
  151. data/test/test_mechanize_subclass.rb +22 -0
  152. data/test/test_mechanize_util.rb +92 -0
  153. data/test/test_multi_select.rb +118 -0
  154. data/test/test_no_attributes.rb +13 -0
  155. data/test/test_option.rb +18 -0
  156. data/test/test_pluggable_parser.rb +136 -0
  157. data/test/test_post_form.rb +37 -0
  158. data/test/test_pretty_print.rb +22 -0
  159. data/test/test_radiobutton.rb +75 -0
  160. data/test/test_redirect_limit_reached.rb +39 -0
  161. data/test/test_referer.rb +81 -0
  162. data/test/test_relative_links.rb +40 -0
  163. data/test/test_request.rb +13 -0
  164. data/test/test_response_code.rb +53 -0
  165. data/test/test_robots.rb +72 -0
  166. data/test/test_save_file.rb +48 -0
  167. data/test/test_scheme.rb +48 -0
  168. data/test/test_select.rb +119 -0
  169. data/test/test_select_all.rb +15 -0
  170. data/test/test_select_none.rb +15 -0
  171. data/test/test_select_noopts.rb +18 -0
  172. data/test/test_set_fields.rb +44 -0
  173. data/test/test_ssl_server.rb +20 -0
  174. metadata +354 -0
@@ -0,0 +1,88 @@
1
+ require 'pp'
2
+
3
+ # :stopdoc:
4
+ class Mechanize
5
+ def pretty_print(q)
6
+ q.object_group(self) {
7
+ q.breakable
8
+ q.pp cookie_jar
9
+ q.breakable
10
+ q.pp current_page
11
+ }
12
+ end
13
+
14
+ class Page
15
+ def pretty_print(q)
16
+ q.object_group(self) {
17
+ q.breakable
18
+ q.group(1, '{url', '}') {q.breakable; q.pp uri }
19
+ q.breakable
20
+ q.group(1, '{meta_refresh', '}') {
21
+ meta_refresh.each { |link| q.breakable; q.pp link }
22
+ }
23
+ q.breakable
24
+ q.group(1, '{title', '}') { q.breakable; q.pp title }
25
+ q.breakable
26
+ q.group(1, '{iframes', '}') {
27
+ iframes.each { |link| q.breakable; q.pp link }
28
+ }
29
+ q.breakable
30
+ q.group(1, '{frames', '}') {
31
+ frames.each { |link| q.breakable; q.pp link }
32
+ }
33
+ q.breakable
34
+ q.group(1, '{links', '}') {
35
+ links.each { |link| q.breakable; q.pp link }
36
+ }
37
+ q.breakable
38
+ q.group(1, '{forms', '}') {
39
+ forms.each { |form| q.breakable; q.pp form }
40
+ }
41
+ }
42
+ end
43
+
44
+ class Link
45
+ def pretty_print(q)
46
+ q.object_group(self) {
47
+ q.breakable; q.pp text
48
+ q.breakable; q.pp href
49
+ }
50
+ end
51
+ end
52
+ end
53
+
54
+ class Form
55
+ def pretty_print(q)
56
+ q.object_group(self) {
57
+ q.breakable; q.group(1, '{name', '}') { q.breakable; q.pp name }
58
+ q.breakable; q.group(1, '{method', '}') { q.breakable; q.pp method }
59
+ q.breakable; q.group(1, '{action', '}') { q.breakable; q.pp action }
60
+ q.breakable; q.group(1, '{fields', '}') {
61
+ fields.each do |field|
62
+ q.breakable
63
+ q.pp field
64
+ end
65
+ }
66
+ q.breakable; q.group(1, '{radiobuttons', '}') {
67
+ radiobuttons.each { |b| q.breakable; q.pp b }
68
+ }
69
+ q.breakable; q.group(1, '{checkboxes', '}') {
70
+ checkboxes.each { |b| q.breakable; q.pp b }
71
+ }
72
+ q.breakable; q.group(1, '{file_uploads', '}') {
73
+ file_uploads.each { |b| q.breakable; q.pp b }
74
+ }
75
+ q.breakable; q.group(1, '{buttons', '}') {
76
+ buttons.each { |b| q.breakable; q.pp b }
77
+ }
78
+ }
79
+ end
80
+
81
+ class RadioButton
82
+ def pretty_print_instance_variables
83
+ [:@checked, :@name, :@value]
84
+ end
85
+ end
86
+ end
87
+ end
88
+ # :startdoc:
@@ -0,0 +1,37 @@
1
+ if RUBY_VERSION < '1.9' then
2
+ module Net
3
+ class HTTP
4
+ alias :old_keep_alive? :keep_alive?
5
+ def keep_alive?(req, res)
6
+ return false if /close/i =~ req['connection'].to_s
7
+ return false if @seems_1_0_server
8
+ return false if /close/i =~ res['connection'].to_s
9
+ return true if /keep-alive/i =~ res['connection'].to_s
10
+ return false if /close/i =~ res['proxy-connection'].to_s
11
+ return true if /keep-alive/i =~ res['proxy-connection'].to_s
12
+ (@curr_http_version == '1.1')
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ # Monkey patch for ruby 1.8.4
19
+ unless RUBY_VERSION > "1.8.4"
20
+ module Net # :nodoc:
21
+ class HTTPResponse # :nodoc:
22
+ CODE_TO_OBJ['500'] = HTTPInternalServerError
23
+ end
24
+ end
25
+ else
26
+ class Mechanize
27
+ class Form
28
+ alias :inspect :pretty_inspect
29
+ end
30
+ class Page
31
+ alias :inspect :pretty_inspect
32
+ class Link
33
+ alias :inspect :pretty_inspect
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,408 @@
1
+ ##
2
+ # This class encapsulates an HTML page. If Mechanize finds a content
3
+ # type of 'text/html', this class will be instantiated and returned.
4
+ #
5
+ # Example:
6
+ #
7
+ # require 'mechanize'
8
+ #
9
+ # agent = Mechanize.new
10
+ # agent.get('http://google.com/').class # => Mechanize::Page
11
+
12
+ class Mechanize::Page < Mechanize::File
13
+ extend Forwardable
14
+ extend Mechanize::ElementMatcher
15
+
16
+ attr_accessor :mech
17
+
18
+ ##
19
+ # Possible encodings for this page based on HTTP headers and meta elements
20
+
21
+ attr_reader :encodings
22
+
23
+ def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
24
+ raise Mechanize::ContentTypeError, response['content-type'] unless
25
+ response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
26
+
27
+ @meta_content_type = nil
28
+ @encoding = nil
29
+ @encodings = [nil]
30
+ raise 'no' if mech and not Mechanize === mech
31
+ @mech = mech
32
+
33
+ reset
34
+
35
+ @encodings << Mechanize::Util.detect_charset(body) if body
36
+
37
+ @encodings.concat self.class.response_header_charset(response)
38
+
39
+ if body
40
+ # Force the encoding to be 8BIT so we can perform regular expressions.
41
+ # We'll set it to the detected encoding later
42
+ body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding
43
+
44
+ @encodings.concat self.class.meta_charset body
45
+
46
+ meta_content_type = self.class.meta_content_type body
47
+ @meta_content_type = meta_content_type if meta_content_type
48
+ end
49
+
50
+ @encodings << mech.default_encoding if mech and mech.default_encoding
51
+
52
+ super uri, response, body, code
53
+ end
54
+
55
+ def title
56
+ @title ||=
57
+ if doc = parser
58
+ title = doc.search('title').inner_text
59
+ title.empty? ? nil : title
60
+ end
61
+ end
62
+
63
+ def response_header_charset
64
+ self.class.response_header_charset(response)
65
+ end
66
+
67
+ def meta_charset
68
+ self.class.meta_charset(body)
69
+ end
70
+
71
+ def detected_encoding
72
+ Mechanize::Util.detect_charset(body)
73
+ end
74
+
75
+ def encoding=(encoding)
76
+ reset
77
+
78
+ @encoding = encoding
79
+
80
+ if @parser
81
+ parser_encoding = @parser.encoding
82
+ if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
83
+ # lazy reinitialize the parser with the new encoding
84
+ @parser = nil
85
+ end
86
+ end
87
+
88
+ encoding
89
+ end
90
+
91
+ def encoding
92
+ parser.respond_to?(:encoding) ? parser.encoding : nil
93
+ end
94
+
95
+ # Return whether parser result has errors related to encoding or not.
96
+ # false indicates just parser has no encoding errors, not encoding is vaild.
97
+ def encoding_error?(parser=nil)
98
+ parser = self.parser unless parser
99
+ return false if parser.errors.empty?
100
+ parser.errors.any? do |error|
101
+ error.message =~ /(indicate\ encoding)|
102
+ (Invalid\ char)|
103
+ (input\ conversion\ failed)/x
104
+ end
105
+ end
106
+
107
+ def parser
108
+ return @parser if @parser
109
+ return nil unless @body
110
+
111
+ if @encoding then
112
+ @parser = @mech.html_parser.parse html_body, nil, @encoding
113
+ elsif mech.force_default_encoding then
114
+ @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
115
+ else
116
+ @encodings.reverse_each do |encoding|
117
+ @parser = @mech.html_parser.parse html_body, nil, encoding
118
+
119
+ break unless encoding_error? @parser
120
+ end
121
+ end
122
+
123
+ @parser
124
+ end
125
+
126
+ alias :root :parser
127
+
128
+ def reset
129
+ @bases = nil
130
+ @forms = nil
131
+ @frames = nil
132
+ @iframes = nil
133
+ @links = nil
134
+ @labels = nil
135
+ @labels_hash = nil
136
+ @meta_refresh = nil
137
+ @parser = nil
138
+ @title = nil
139
+ end
140
+
141
+ # Return the canonical URI for the page if there is a link tag
142
+ # with href="canonical".
143
+ def canonical_uri
144
+ link = at('link[@rel="canonical"][@href]')
145
+ return unless link
146
+ href = link['href']
147
+
148
+ URI href
149
+ rescue URI::InvalidURIError
150
+ URI Mechanize::Util.uri_escape href
151
+ end
152
+
153
+ # Get the content type
154
+ def content_type
155
+ @meta_content_type || response['content-type']
156
+ end
157
+
158
+ # Search through the page like HPricot
159
+ def_delegator :parser, :search, :search
160
+ def_delegator :parser, :/, :/
161
+ def_delegator :parser, :at, :at
162
+
163
+ ##
164
+ # :method: form_with(criteria)
165
+ #
166
+ # Find a single form matching +criteria+.
167
+ # Example:
168
+ # page.form_with(:action => '/post/login.php') do |f|
169
+ # ...
170
+ # end
171
+
172
+ ##
173
+ # :method: forms_with(criteria)
174
+ #
175
+ # Find all forms form matching +criteria+.
176
+ # Example:
177
+ # page.forms_with(:action => '/post/login.php').each do |f|
178
+ # ...
179
+ # end
180
+
181
+ elements_with :form
182
+
183
+ ##
184
+ # :method: link_with(criteria)
185
+ #
186
+ # Find a single link matching +criteria+.
187
+ # Example:
188
+ # page.link_with(:href => /foo/).click
189
+
190
+ ##
191
+ # :method: links_with(criteria)
192
+ #
193
+ # Find all links matching +criteria+.
194
+ # Example:
195
+ # page.links_with(:href => /foo/).each do |link|
196
+ # puts link.href
197
+ # end
198
+
199
+ elements_with :link
200
+
201
+ ##
202
+ # :method: base_with(criteria)
203
+ #
204
+ # Find a single base tag matching +criteria+.
205
+ # Example:
206
+ # page.base_with(:href => /foo/).click
207
+
208
+ ##
209
+ # :method: bases_with(criteria)
210
+ #
211
+ # Find all base tags matching +criteria+.
212
+ # Example:
213
+ # page.bases_with(:href => /foo/).each do |base|
214
+ # puts base.href
215
+ # end
216
+
217
+ elements_with :base
218
+
219
+ ##
220
+ # :method: frame_with(criteria)
221
+ #
222
+ # Find a single frame tag matching +criteria+.
223
+ # Example:
224
+ # page.frame_with(:src => /foo/).click
225
+
226
+ ##
227
+ # :method: frames_with(criteria)
228
+ #
229
+ # Find all frame tags matching +criteria+.
230
+ # Example:
231
+ # page.frames_with(:src => /foo/).each do |frame|
232
+ # p frame.src
233
+ # end
234
+
235
+ elements_with :frame
236
+
237
+ ##
238
+ # :method: iframe_with(criteria)
239
+ #
240
+ # Find a single iframe tag matching +criteria+.
241
+ # Example:
242
+ # page.iframe_with(:src => /foo/).click
243
+
244
+ ##
245
+ # :method: iframes_with(criteria)
246
+ #
247
+ # Find all iframe tags matching +criteria+.
248
+ # Example:
249
+ # page.iframes_with(:src => /foo/).each do |iframe|
250
+ # p iframe.src
251
+ # end
252
+
253
+ elements_with :iframe
254
+
255
+ ##
256
+ # Return a list of all link and area tags
257
+ def links
258
+ @links ||= %w{ a area }.map do |tag|
259
+ search(tag).map do |node|
260
+ Link.new(node, @mech, self)
261
+ end
262
+ end.flatten
263
+ end
264
+
265
+ ##
266
+ # Return a list of all form tags
267
+ def forms
268
+ @forms ||= search('form').map do |html_form|
269
+ form = Mechanize::Form.new(html_form, @mech, self)
270
+ form.action ||= @uri.to_s
271
+ form
272
+ end
273
+ end
274
+
275
+ ##
276
+ # Return a list of all meta refresh elements
277
+
278
+ def meta_refresh
279
+ query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'
280
+
281
+ @meta_refresh ||= search(query).map do |node|
282
+ MetaRefresh.from_node node, self, uri
283
+ end.compact
284
+ end
285
+
286
+ ##
287
+ # Return a list of all base tags
288
+ def bases
289
+ @bases ||=
290
+ search('base').map { |node| Base.new(node, @mech, self) }
291
+ end
292
+
293
+ ##
294
+ # Return a list of all frame tags
295
+ def frames
296
+ @frames ||=
297
+ search('frame').map { |node| Frame.new(node, @mech, self) }
298
+ end
299
+
300
+ ##
301
+ # Return a list of all iframe tags
302
+ def iframes
303
+ @iframes ||=
304
+ search('iframe').map { |node| Frame.new(node, @mech, self) }
305
+ end
306
+
307
+ ##
308
+ # Return a list of all img tags
309
+ def images
310
+ @images ||=
311
+ search('img').map { |node| Image.new(node, self) }
312
+ end
313
+
314
+ def image_urls
315
+ @image_urls ||= images.map(&:url).uniq
316
+ end
317
+
318
+ ##
319
+ # Return a list of all label tags
320
+ def labels
321
+ @labels ||=
322
+ search('label').map { |node| Label.new(node, self) }
323
+ end
324
+
325
+ def labels_hash
326
+ unless @labels_hash
327
+ hash = {}
328
+ labels.each do |label|
329
+ hash[label.node['for']] = label if label.for
330
+ end
331
+ @labels_hash = hash
332
+ end
333
+ return @labels_hash
334
+ end
335
+
336
+ def self.charset content_type
337
+ charset = content_type[/charset=([^; ]+)/i, 1]
338
+ return nil if charset == 'none'
339
+ charset
340
+ end
341
+
342
+ def self.response_header_charset response
343
+ charsets = []
344
+ response.each do |header, value|
345
+ next unless value =~ /charset/i
346
+ charsets << charset(value)
347
+ end
348
+ charsets
349
+ end
350
+
351
+ ##
352
+ # Retrieves all charsets from +meta+ tags in +body+
353
+
354
+ def self.meta_charset body
355
+ # HACK use .map
356
+ body.scan(/<meta .*?>/i).map do |meta|
357
+ if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then
358
+ $2
359
+ elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
360
+ meta =~ /content=(["'])?(.*?)\1/i
361
+
362
+ m_charset = charset $2
363
+
364
+ m_charset if m_charset
365
+ end
366
+ end.compact
367
+ end
368
+
369
+ ##
370
+ # Retrieves the last <tt>content-type</tt> set by a +meta+ tag in +body+
371
+
372
+ def self.meta_content_type body
373
+ body.scan(/<meta .*?>/i).reverse.map do |meta|
374
+ if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
375
+ meta =~ /content=(["'])?(.*?)\1/i
376
+
377
+ return $2
378
+ end
379
+ end
380
+
381
+ nil
382
+ end
383
+
384
+ private
385
+
386
+ def html_body
387
+ if @body
388
+ @body.empty? ? '<html></html>' : @body
389
+ else
390
+ ''
391
+ end
392
+ end
393
+
394
+ def self.charset_from_content_type content_type
395
+ charset = content_type[/charset=([^; ]+)/i, 1]
396
+ return nil if charset == 'none'
397
+ charset
398
+ end
399
+ end
400
+
401
+ require 'mechanize/headers'
402
+ require 'mechanize/page/image'
403
+ require 'mechanize/page/label'
404
+ require 'mechanize/page/link'
405
+ require 'mechanize/page/base'
406
+ require 'mechanize/page/frame'
407
+ require 'mechanize/page/meta_refresh'
408
+