diamond-mechanize 2.1 → 2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. metadata +222 -167
  2. data/Rakefile +0 -49
  3. data/lib/mechanize/content_type_error.rb +0 -13
  4. data/lib/mechanize/cookie.rb +0 -232
  5. data/lib/mechanize/cookie_jar.rb +0 -194
  6. data/lib/mechanize/download.rb +0 -59
  7. data/lib/mechanize/element_matcher.rb +0 -36
  8. data/lib/mechanize/file.rb +0 -65
  9. data/lib/mechanize/file_connection.rb +0 -17
  10. data/lib/mechanize/file_request.rb +0 -26
  11. data/lib/mechanize/file_response.rb +0 -74
  12. data/lib/mechanize/file_saver.rb +0 -39
  13. data/lib/mechanize/form/button.rb +0 -6
  14. data/lib/mechanize/form/check_box.rb +0 -12
  15. data/lib/mechanize/form/field.rb +0 -54
  16. data/lib/mechanize/form/file_upload.rb +0 -21
  17. data/lib/mechanize/form/hidden.rb +0 -3
  18. data/lib/mechanize/form/image_button.rb +0 -19
  19. data/lib/mechanize/form/keygen.rb +0 -34
  20. data/lib/mechanize/form/multi_select_list.rb +0 -94
  21. data/lib/mechanize/form/option.rb +0 -50
  22. data/lib/mechanize/form/radio_button.rb +0 -55
  23. data/lib/mechanize/form/reset.rb +0 -3
  24. data/lib/mechanize/form/select_list.rb +0 -44
  25. data/lib/mechanize/form/submit.rb +0 -3
  26. data/lib/mechanize/form/text.rb +0 -3
  27. data/lib/mechanize/form/textarea.rb +0 -3
  28. data/lib/mechanize/form.rb +0 -543
  29. data/lib/mechanize/headers.rb +0 -23
  30. data/lib/mechanize/history.rb +0 -82
  31. data/lib/mechanize/http/agent.rb +0 -1004
  32. data/lib/mechanize/http/auth_challenge.rb +0 -59
  33. data/lib/mechanize/http/auth_realm.rb +0 -31
  34. data/lib/mechanize/http/content_disposition_parser.rb +0 -188
  35. data/lib/mechanize/http/www_authenticate_parser.rb +0 -155
  36. data/lib/mechanize/http.rb +0 -8
  37. data/lib/mechanize/monkey_patch.rb +0 -16
  38. data/lib/mechanize/page/base.rb +0 -7
  39. data/lib/mechanize/page/frame.rb +0 -27
  40. data/lib/mechanize/page/image.rb +0 -30
  41. data/lib/mechanize/page/label.rb +0 -20
  42. data/lib/mechanize/page/link.rb +0 -98
  43. data/lib/mechanize/page/meta_refresh.rb +0 -68
  44. data/lib/mechanize/page.rb +0 -440
  45. data/lib/mechanize/parser.rb +0 -173
  46. data/lib/mechanize/pluggable_parsers.rb +0 -144
  47. data/lib/mechanize/redirect_limit_reached_error.rb +0 -19
  48. data/lib/mechanize/redirect_not_get_or_head_error.rb +0 -21
  49. data/lib/mechanize/response_code_error.rb +0 -21
  50. data/lib/mechanize/response_read_error.rb +0 -27
  51. data/lib/mechanize/robots_disallowed_error.rb +0 -28
  52. data/lib/mechanize/test_case.rb +0 -663
  53. data/lib/mechanize/unauthorized_error.rb +0 -3
  54. data/lib/mechanize/unsupported_scheme_error.rb +0 -6
  55. data/lib/mechanize/util.rb +0 -101
  56. data/lib/mechanize.rb +0 -1079
  57. data/test/data/htpasswd +0 -1
  58. data/test/data/server.crt +0 -16
  59. data/test/data/server.csr +0 -12
  60. data/test/data/server.key +0 -15
  61. data/test/data/server.pem +0 -15
  62. data/test/htdocs/alt_text.html +0 -10
  63. data/test/htdocs/bad_form_test.html +0 -9
  64. data/test/htdocs/button.jpg +0 -0
  65. data/test/htdocs/canonical_uri.html +0 -9
  66. data/test/htdocs/dir with spaces/foo.html +0 -1
  67. data/test/htdocs/empty_form.html +0 -6
  68. data/test/htdocs/file_upload.html +0 -26
  69. data/test/htdocs/find_link.html +0 -41
  70. data/test/htdocs/form_multi_select.html +0 -16
  71. data/test/htdocs/form_multival.html +0 -37
  72. data/test/htdocs/form_no_action.html +0 -18
  73. data/test/htdocs/form_no_input_name.html +0 -16
  74. data/test/htdocs/form_order_test.html +0 -11
  75. data/test/htdocs/form_select.html +0 -16
  76. data/test/htdocs/form_set_fields.html +0 -14
  77. data/test/htdocs/form_test.html +0 -188
  78. data/test/htdocs/frame_referer_test.html +0 -10
  79. data/test/htdocs/frame_test.html +0 -30
  80. data/test/htdocs/google.html +0 -13
  81. data/test/htdocs/index.html +0 -6
  82. data/test/htdocs/link with space.html +0 -5
  83. data/test/htdocs/meta_cookie.html +0 -11
  84. data/test/htdocs/no_title_test.html +0 -6
  85. data/test/htdocs/noindex.html +0 -9
  86. data/test/htdocs/rails_3_encoding_hack_form_test.html +0 -27
  87. data/test/htdocs/relative/tc_relative_links.html +0 -21
  88. data/test/htdocs/robots.html +0 -8
  89. data/test/htdocs/robots.txt +0 -2
  90. data/test/htdocs/tc_bad_charset.html +0 -9
  91. data/test/htdocs/tc_bad_links.html +0 -5
  92. data/test/htdocs/tc_base_link.html +0 -8
  93. data/test/htdocs/tc_blank_form.html +0 -11
  94. data/test/htdocs/tc_charset.html +0 -6
  95. data/test/htdocs/tc_checkboxes.html +0 -19
  96. data/test/htdocs/tc_encoded_links.html +0 -5
  97. data/test/htdocs/tc_field_precedence.html +0 -11
  98. data/test/htdocs/tc_follow_meta.html +0 -8
  99. data/test/htdocs/tc_form_action.html +0 -48
  100. data/test/htdocs/tc_links.html +0 -19
  101. data/test/htdocs/tc_meta_in_body.html +0 -9
  102. data/test/htdocs/tc_pretty_print.html +0 -17
  103. data/test/htdocs/tc_referer.html +0 -16
  104. data/test/htdocs/tc_relative_links.html +0 -19
  105. data/test/htdocs/tc_textarea.html +0 -23
  106. data/test/htdocs/test_click.html +0 -11
  107. data/test/htdocs/unusual______.html +0 -5
  108. data/test/test_mechanize.rb +0 -1164
  109. data/test/test_mechanize_cookie.rb +0 -451
  110. data/test/test_mechanize_cookie_jar.rb +0 -483
  111. data/test/test_mechanize_download.rb +0 -43
  112. data/test/test_mechanize_file.rb +0 -61
  113. data/test/test_mechanize_file_connection.rb +0 -21
  114. data/test/test_mechanize_file_request.rb +0 -19
  115. data/test/test_mechanize_file_saver.rb +0 -21
  116. data/test/test_mechanize_form.rb +0 -875
  117. data/test/test_mechanize_form_check_box.rb +0 -38
  118. data/test/test_mechanize_form_encoding.rb +0 -114
  119. data/test/test_mechanize_form_field.rb +0 -63
  120. data/test/test_mechanize_form_file_upload.rb +0 -20
  121. data/test/test_mechanize_form_image_button.rb +0 -12
  122. data/test/test_mechanize_form_keygen.rb +0 -32
  123. data/test/test_mechanize_form_multi_select_list.rb +0 -84
  124. data/test/test_mechanize_form_option.rb +0 -55
  125. data/test/test_mechanize_form_radio_button.rb +0 -78
  126. data/test/test_mechanize_form_select_list.rb +0 -76
  127. data/test/test_mechanize_form_textarea.rb +0 -52
  128. data/test/test_mechanize_headers.rb +0 -35
  129. data/test/test_mechanize_history.rb +0 -103
  130. data/test/test_mechanize_http_agent.rb +0 -1225
  131. data/test/test_mechanize_http_auth_challenge.rb +0 -39
  132. data/test/test_mechanize_http_auth_realm.rb +0 -49
  133. data/test/test_mechanize_http_content_disposition_parser.rb +0 -118
  134. data/test/test_mechanize_http_www_authenticate_parser.rb +0 -146
  135. data/test/test_mechanize_link.rb +0 -80
  136. data/test/test_mechanize_page.rb +0 -118
  137. data/test/test_mechanize_page_encoding.rb +0 -182
  138. data/test/test_mechanize_page_frame.rb +0 -16
  139. data/test/test_mechanize_page_link.rb +0 -390
  140. data/test/test_mechanize_page_meta_refresh.rb +0 -127
  141. data/test/test_mechanize_parser.rb +0 -289
  142. data/test/test_mechanize_pluggable_parser.rb +0 -52
  143. data/test/test_mechanize_redirect_limit_reached_error.rb +0 -24
  144. data/test/test_mechanize_redirect_not_get_or_head_error.rb +0 -14
  145. data/test/test_mechanize_subclass.rb +0 -22
  146. data/test/test_mechanize_util.rb +0 -103
  147. data/test/test_multi_select.rb +0 -119
@@ -1,7 +0,0 @@
1
- ##
2
- # A base element on an HTML page. Mechanize treats base tags just like 'a'
3
- # tags. Base objects will contain links, but most likely will have no text.
4
-
5
- class Mechanize::Page::Base < Mechanize::Page::Link
6
- end
7
-
@@ -1,27 +0,0 @@
1
- # A Frame object wrapse a frame HTML element. Frame objects can be treated
2
- # just like Link objects. They contain #src, the #link they refer to and a
3
- # #name, the name of the frame they refer to. #src and #name are aliased to
4
- # #href and #text respectively so that a Frame object can be treated just like
5
- # a Link.
6
-
7
- class Mechanize::Page::Frame < Mechanize::Page::Link
8
-
9
- alias :src :href
10
-
11
- attr_reader :text
12
- alias :name :text
13
-
14
- def initialize(node, mech, referer)
15
- super(node, mech, referer)
16
- @node = node
17
- @text = node['name']
18
- @href = node['src']
19
- @content = nil
20
- end
21
-
22
- def content
23
- @content ||= @mech.get @href, [], page
24
- end
25
-
26
- end
27
-
@@ -1,30 +0,0 @@
1
- ##
2
- # An image element on an HTML page
3
-
4
- class Mechanize::Page::Image
5
- attr_reader :node
6
- attr_reader :page
7
-
8
- def initialize(node, page)
9
- @node = node
10
- @page = page
11
- end
12
-
13
- def src
14
- @node['src']
15
- end
16
-
17
- def url
18
- case src
19
- when %r{^https?://}
20
- src
21
- else
22
- if page.bases[0]
23
- (page.bases[0].href + src).to_s
24
- else
25
- (page.uri + src).to_s
26
- end
27
- end
28
- end
29
- end
30
-
@@ -1,20 +0,0 @@
1
- ##
2
- # A form label on an HTML page
3
-
4
- class Mechanize::Page::Label
5
- attr_reader :node
6
- attr_reader :text
7
- attr_reader :page
8
- alias :to_s :text
9
-
10
- def initialize(node, page)
11
- @node = node
12
- @text = node.inner_text
13
- @page = page
14
- end
15
-
16
- def for
17
- (id = @node['for']) && page.search("##{id}") || nil
18
- end
19
- end
20
-
@@ -1,98 +0,0 @@
1
- ##
2
- # This class encapsulates links. It contains the text and the URI for
3
- # 'a' tags parsed out of an HTML page. If the link contains an image,
4
- # the alt text will be used for that image.
5
- #
6
- # For example, the text for the following links with both be 'Hello World':
7
- #
8
- # <a href="http://example">Hello World</a>
9
- # <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
10
-
11
- class Mechanize::Page::Link
12
- attr_reader :node
13
- attr_reader :href
14
- attr_reader :attributes
15
- attr_reader :page
16
- alias :referer :page
17
-
18
- def initialize(node, mech, page)
19
- @node = node
20
- @attributes = node
21
- @href = node['href']
22
- @mech = mech
23
- @page = page
24
- @text = nil
25
- @uri = nil
26
- end
27
-
28
- # Click on this link
29
- def click
30
- @mech.click self
31
- end
32
-
33
- # This method is a shorthand to get link's DOM id.
34
- # Common usage:
35
- # page.link_with(:dom_id => "links_exact_id")
36
- def dom_id
37
- node['id']
38
- end
39
-
40
- # This method is a shorthand to get a link's DOM class
41
- # Common usage:
42
- # page.link_with(:dom_class => "links_exact_class")
43
- def dom_class
44
- node['class']
45
- end
46
-
47
- def pretty_print(q) # :nodoc:
48
- q.object_group(self) {
49
- q.breakable; q.pp text
50
- q.breakable; q.pp href
51
- }
52
- end
53
-
54
- alias inspect pretty_inspect # :nodoc:
55
-
56
- # A list of words in the rel attribute, all lower-cased.
57
- def rel
58
- @rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : []
59
- end
60
-
61
- # Test if the rel attribute includes +kind+.
62
- def rel? kind
63
- rel.include? kind
64
- end
65
-
66
- # The text content of this link
67
- def text
68
- return @text if @text
69
-
70
- @text = @node.inner_text
71
-
72
- # If there is no text, try to find an image and use it's alt text
73
- if (@text.nil? or @text.empty?) and imgs = @node.search('img') then
74
- @text = imgs.map do |e|
75
- e['alt']
76
- end.join
77
- end
78
-
79
- @text
80
- end
81
-
82
- alias :to_s :text
83
-
84
- # A URI for the #href for this link. The link is first parsed as a raw
85
- # link. If that fails parsing an escaped link is attepmted.
86
-
87
- def uri
88
- @uri ||= if @href then
89
- begin
90
- URI.parse @href
91
- rescue URI::InvalidURIError
92
- URI.parse WEBrick::HTTPUtils.escape @href
93
- end
94
- end
95
- end
96
-
97
- end
98
-
@@ -1,68 +0,0 @@
1
- ##
2
- # This class encapsulates a meta element with a refresh http-equiv. Mechanize
3
- # treats meta refresh elements just like 'a' tags. MetaRefresh objects will
4
- # contain links, but most likely will have no text.
5
-
6
- class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
7
-
8
- ##
9
- # Time to wait before next refresh
10
-
11
- attr_reader :delay
12
-
13
- ##
14
- # This MetaRefresh links did not contain a url= in the content attribute and
15
- # links to itself.
16
-
17
- attr_reader :link_self
18
-
19
- ##
20
- # Matches the content attribute of a meta refresh element. After the match:
21
- #
22
- # $1:: delay
23
- # $3:: url
24
-
25
- CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
26
-
27
- ##
28
- # Parses the delay and url from the content attribute of a meta refresh
29
- # element. Parse requires the uri of the current page to infer a url when
30
- # no url is specified.
31
- #
32
- # Returns an array of [delay, url]. (both in string)
33
- #
34
- # Returns nil if the delay and url cannot be parsed.
35
-
36
- def self.parse content, base_uri
37
- return unless content =~ CONTENT_REGEXP
38
-
39
- link_self = $3.nil? || $3.empty?
40
- delay, refresh_uri = $1, $3
41
-
42
- dest = base_uri
43
- dest += refresh_uri if refresh_uri
44
-
45
- return delay, dest, link_self
46
- end
47
-
48
- def self.from_node node, page, uri
49
- http_equiv = node['http-equiv']
50
- return unless http_equiv and http_equiv.downcase == 'refresh'
51
-
52
- delay, uri, link_self = parse node['content'], uri
53
-
54
- return unless delay
55
-
56
- new node, page, delay, uri.to_s, link_self
57
- end
58
-
59
- def initialize node, page, delay, href, link_self = false
60
- super node, page.mech, page
61
-
62
- @delay = delay =~ /\./ ? delay.to_f : delay.to_i
63
- @href = href
64
- @link_self = link_self
65
- end
66
-
67
- end
68
-
@@ -1,440 +0,0 @@
1
- ##
2
- # This class encapsulates an HTML page. If Mechanize finds a content
3
- # type of 'text/html', this class will be instantiated and returned.
4
- #
5
- # Example:
6
- #
7
- # require 'mechanize'
8
- #
9
- # agent = Mechanize.new
10
- # agent.get('http://google.com/').class # => Mechanize::Page
11
-
12
- class Mechanize::Page < Mechanize::File
13
- extend Forwardable
14
- extend Mechanize::ElementMatcher
15
-
16
- attr_accessor :mech
17
-
18
- ##
19
- # Possible encodings for this page based on HTTP headers and meta elements
20
-
21
- attr_reader :encodings
22
-
23
- def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
24
- raise Mechanize::ContentTypeError, response['content-type'] unless
25
- response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
26
-
27
- @meta_content_type = nil
28
- @encoding = nil
29
- @encodings = [nil]
30
- raise 'no' if mech and not Mechanize === mech
31
- @mech = mech
32
-
33
- reset
34
-
35
- @encodings << Mechanize::Util.detect_charset(body) if body
36
-
37
- @encodings.concat self.class.response_header_charset(response)
38
-
39
- if body
40
- # Force the encoding to be 8BIT so we can perform regular expressions.
41
- # We'll set it to the detected encoding later
42
- body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding
43
-
44
- @encodings.concat self.class.meta_charset body
45
-
46
- meta_content_type = self.class.meta_content_type body
47
- @meta_content_type = meta_content_type if meta_content_type
48
- end
49
-
50
- @encodings << mech.default_encoding if mech and mech.default_encoding
51
-
52
- super uri, response, body, code
53
- end
54
-
55
- def title
56
- @title ||=
57
- if doc = parser
58
- title = doc.search('title').inner_text
59
- title.empty? ? nil : title
60
- end
61
- end
62
-
63
- def response_header_charset
64
- self.class.response_header_charset(response)
65
- end
66
-
67
- def meta_charset
68
- self.class.meta_charset(body)
69
- end
70
-
71
- def detected_encoding
72
- Mechanize::Util.detect_charset(body)
73
- end
74
-
75
- def encoding=(encoding)
76
- reset
77
-
78
- @encoding = encoding
79
-
80
- if @parser
81
- parser_encoding = @parser.encoding
82
- if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
83
- # lazy reinitialize the parser with the new encoding
84
- @parser = nil
85
- end
86
- end
87
-
88
- encoding
89
- end
90
-
91
- def encoding
92
- parser.respond_to?(:encoding) ? parser.encoding : nil
93
- end
94
-
95
- # Return whether parser result has errors related to encoding or not.
96
- # false indicates just parser has no encoding errors, not encoding is vaild.
97
- def encoding_error?(parser=nil)
98
- parser = self.parser unless parser
99
- return false if parser.errors.empty?
100
- parser.errors.any? do |error|
101
- error.message =~ /(indicate\ encoding)|
102
- (Invalid\ char)|
103
- (input\ conversion\ failed)/x
104
- end
105
- end
106
-
107
- def parser
108
- return @parser if @parser
109
- return nil unless @body
110
-
111
- if @encoding then
112
- @parser = @mech.html_parser.parse html_body, nil, @encoding
113
- elsif mech.force_default_encoding then
114
- @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
115
- else
116
- @encodings.reverse_each do |encoding|
117
- @parser = @mech.html_parser.parse html_body, nil, encoding
118
-
119
- break unless encoding_error? @parser
120
- end
121
- end
122
-
123
- @parser
124
- end
125
-
126
- alias :root :parser
127
-
128
- def pretty_print(q) # :nodoc:
129
- q.object_group(self) {
130
- q.breakable
131
- q.group(1, '{url', '}') {q.breakable; q.pp uri }
132
- q.breakable
133
- q.group(1, '{meta_refresh', '}') {
134
- meta_refresh.each { |link| q.breakable; q.pp link }
135
- }
136
- q.breakable
137
- q.group(1, '{title', '}') { q.breakable; q.pp title }
138
- q.breakable
139
- q.group(1, '{iframes', '}') {
140
- iframes.each { |link| q.breakable; q.pp link }
141
- }
142
- q.breakable
143
- q.group(1, '{frames', '}') {
144
- frames.each { |link| q.breakable; q.pp link }
145
- }
146
- q.breakable
147
- q.group(1, '{links', '}') {
148
- links.each { |link| q.breakable; q.pp link }
149
- }
150
- q.breakable
151
- q.group(1, '{forms', '}') {
152
- forms.each { |form| q.breakable; q.pp form }
153
- }
154
- }
155
- end
156
-
157
- alias inspect pretty_inspect # :nodoc:
158
-
159
- def reset
160
- @bases = nil
161
- @forms = nil
162
- @frames = nil
163
- @iframes = nil
164
- @links = nil
165
- @labels = nil
166
- @labels_hash = nil
167
- @meta_refresh = nil
168
- @parser = nil
169
- @title = nil
170
- end
171
-
172
- # Return the canonical URI for the page if there is a link tag
173
- # with href="canonical".
174
- def canonical_uri
175
- link = at('link[@rel="canonical"][@href]')
176
- return unless link
177
- href = link['href']
178
-
179
- URI href
180
- rescue URI::InvalidURIError
181
- URI Mechanize::Util.uri_escape href
182
- end
183
-
184
- # Get the content type
185
- def content_type
186
- @meta_content_type || response['content-type']
187
- end
188
-
189
- # Search through the page like HPricot
190
- def_delegator :parser, :search, :search
191
- def_delegator :parser, :/, :/
192
- def_delegator :parser, :at, :at
193
-
194
- ##
195
- # :method: form_with(criteria)
196
- #
197
- # Find a single form matching +criteria+.
198
- # Example:
199
- # page.form_with(:action => '/post/login.php') do |f|
200
- # ...
201
- # end
202
-
203
- ##
204
- # :method: forms_with(criteria)
205
- #
206
- # Find all forms form matching +criteria+.
207
- # Example:
208
- # page.forms_with(:action => '/post/login.php').each do |f|
209
- # ...
210
- # end
211
-
212
- elements_with :form
213
-
214
- ##
215
- # :method: link_with(criteria)
216
- #
217
- # Find a single link matching +criteria+.
218
- # Example:
219
- # page.link_with(:href => /foo/).click
220
-
221
- ##
222
- # :method: links_with(criteria)
223
- #
224
- # Find all links matching +criteria+.
225
- # Example:
226
- # page.links_with(:href => /foo/).each do |link|
227
- # puts link.href
228
- # end
229
-
230
- elements_with :link
231
-
232
- ##
233
- # :method: base_with(criteria)
234
- #
235
- # Find a single base tag matching +criteria+.
236
- # Example:
237
- # page.base_with(:href => /foo/).click
238
-
239
- ##
240
- # :method: bases_with(criteria)
241
- #
242
- # Find all base tags matching +criteria+.
243
- # Example:
244
- # page.bases_with(:href => /foo/).each do |base|
245
- # puts base.href
246
- # end
247
-
248
- elements_with :base
249
-
250
- ##
251
- # :method: frame_with(criteria)
252
- #
253
- # Find a single frame tag matching +criteria+.
254
- # Example:
255
- # page.frame_with(:src => /foo/).click
256
-
257
- ##
258
- # :method: frames_with(criteria)
259
- #
260
- # Find all frame tags matching +criteria+.
261
- # Example:
262
- # page.frames_with(:src => /foo/).each do |frame|
263
- # p frame.src
264
- # end
265
-
266
- elements_with :frame
267
-
268
- ##
269
- # :method: iframe_with(criteria)
270
- #
271
- # Find a single iframe tag matching +criteria+.
272
- # Example:
273
- # page.iframe_with(:src => /foo/).click
274
-
275
- ##
276
- # :method: iframes_with(criteria)
277
- #
278
- # Find all iframe tags matching +criteria+.
279
- # Example:
280
- # page.iframes_with(:src => /foo/).each do |iframe|
281
- # p iframe.src
282
- # end
283
-
284
- elements_with :iframe
285
-
286
- ##
287
- # Return a list of all link and area tags
288
- def links
289
- @links ||= %w{ a area }.map do |tag|
290
- search(tag).map do |node|
291
- Link.new(node, @mech, self)
292
- end
293
- end.flatten
294
- end
295
-
296
- ##
297
- # Return a list of all form tags
298
- def forms
299
- @forms ||= search('form').map do |html_form|
300
- form = Mechanize::Form.new(html_form, @mech, self)
301
- form.action ||= @uri.to_s
302
- form
303
- end
304
- end
305
-
306
- ##
307
- # Return a list of all meta refresh elements
308
-
309
- def meta_refresh
310
- query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'
311
-
312
- @meta_refresh ||= search(query).map do |node|
313
- MetaRefresh.from_node node, self, uri
314
- end.compact
315
- end
316
-
317
- ##
318
- # Return a list of all base tags
319
- def bases
320
- @bases ||=
321
- search('base').map { |node| Base.new(node, @mech, self) }
322
- end
323
-
324
- ##
325
- # Return a list of all frame tags
326
- def frames
327
- @frames ||=
328
- search('frame').map { |node| Frame.new(node, @mech, self) }
329
- end
330
-
331
- ##
332
- # Return a list of all iframe tags
333
- def iframes
334
- @iframes ||=
335
- search('iframe').map { |node| Frame.new(node, @mech, self) }
336
- end
337
-
338
- ##
339
- # Return a list of all img tags
340
- def images
341
- @images ||=
342
- search('img').map { |node| Image.new(node, self) }
343
- end
344
-
345
- def image_urls
346
- @image_urls ||= images.map(&:url).uniq
347
- end
348
-
349
- ##
350
- # Return a list of all label tags
351
- def labels
352
- @labels ||=
353
- search('label').map { |node| Label.new(node, self) }
354
- end
355
-
356
- def labels_hash
357
- unless @labels_hash
358
- hash = {}
359
- labels.each do |label|
360
- hash[label.node['for']] = label if label.for
361
- end
362
- @labels_hash = hash
363
- end
364
- return @labels_hash
365
- end
366
-
367
- def self.charset content_type
368
- charset = content_type[/charset=([^; ]+)/i, 1]
369
- return nil if charset == 'none'
370
- charset
371
- end
372
-
373
- def self.response_header_charset response
374
- charsets = []
375
- response.each do |header, value|
376
- next unless header == 'content-type'
377
- next unless value =~ /charset/i
378
- charsets << charset(value)
379
- end
380
- charsets
381
- end
382
-
383
- ##
384
- # Retrieves all charsets from +meta+ tags in +body+
385
-
386
- def self.meta_charset body
387
- # HACK use .map
388
- body.scan(/<meta .*?>/i).map do |meta|
389
- if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then
390
- $2
391
- elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
392
- meta =~ /content\s*=\s*(["'])?(.*?)\1/i
393
-
394
- m_charset = charset $2 if $2
395
-
396
- m_charset if m_charset
397
- end
398
- end.compact
399
- end
400
-
401
- ##
402
- # Retrieves the last <tt>content-type</tt> set by a +meta+ tag in +body+
403
-
404
- def self.meta_content_type body
405
- body.scan(/<meta .*?>/i).reverse.map do |meta|
406
- if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
407
- meta =~ /content=(["'])?(.*?)\1/i
408
-
409
- return $2
410
- end
411
- end
412
-
413
- nil
414
- end
415
-
416
- private
417
-
418
- def html_body
419
- if @body
420
- @body.empty? ? '<html></html>' : @body
421
- else
422
- ''
423
- end
424
- end
425
-
426
- def self.charset_from_content_type content_type
427
- charset = content_type[/charset=([^; ]+)/i, 1]
428
- return nil if charset == 'none'
429
- charset
430
- end
431
- end
432
-
433
- require 'mechanize/headers'
434
- require 'mechanize/page/image'
435
- require 'mechanize/page/label'
436
- require 'mechanize/page/link'
437
- require 'mechanize/page/base'
438
- require 'mechanize/page/frame'
439
- require 'mechanize/page/meta_refresh'
440
-