diamond-mechanize 2.1 → 2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. metadata +222 -167
  2. data/Rakefile +0 -49
  3. data/lib/mechanize/content_type_error.rb +0 -13
  4. data/lib/mechanize/cookie.rb +0 -232
  5. data/lib/mechanize/cookie_jar.rb +0 -194
  6. data/lib/mechanize/download.rb +0 -59
  7. data/lib/mechanize/element_matcher.rb +0 -36
  8. data/lib/mechanize/file.rb +0 -65
  9. data/lib/mechanize/file_connection.rb +0 -17
  10. data/lib/mechanize/file_request.rb +0 -26
  11. data/lib/mechanize/file_response.rb +0 -74
  12. data/lib/mechanize/file_saver.rb +0 -39
  13. data/lib/mechanize/form/button.rb +0 -6
  14. data/lib/mechanize/form/check_box.rb +0 -12
  15. data/lib/mechanize/form/field.rb +0 -54
  16. data/lib/mechanize/form/file_upload.rb +0 -21
  17. data/lib/mechanize/form/hidden.rb +0 -3
  18. data/lib/mechanize/form/image_button.rb +0 -19
  19. data/lib/mechanize/form/keygen.rb +0 -34
  20. data/lib/mechanize/form/multi_select_list.rb +0 -94
  21. data/lib/mechanize/form/option.rb +0 -50
  22. data/lib/mechanize/form/radio_button.rb +0 -55
  23. data/lib/mechanize/form/reset.rb +0 -3
  24. data/lib/mechanize/form/select_list.rb +0 -44
  25. data/lib/mechanize/form/submit.rb +0 -3
  26. data/lib/mechanize/form/text.rb +0 -3
  27. data/lib/mechanize/form/textarea.rb +0 -3
  28. data/lib/mechanize/form.rb +0 -543
  29. data/lib/mechanize/headers.rb +0 -23
  30. data/lib/mechanize/history.rb +0 -82
  31. data/lib/mechanize/http/agent.rb +0 -1004
  32. data/lib/mechanize/http/auth_challenge.rb +0 -59
  33. data/lib/mechanize/http/auth_realm.rb +0 -31
  34. data/lib/mechanize/http/content_disposition_parser.rb +0 -188
  35. data/lib/mechanize/http/www_authenticate_parser.rb +0 -155
  36. data/lib/mechanize/http.rb +0 -8
  37. data/lib/mechanize/monkey_patch.rb +0 -16
  38. data/lib/mechanize/page/base.rb +0 -7
  39. data/lib/mechanize/page/frame.rb +0 -27
  40. data/lib/mechanize/page/image.rb +0 -30
  41. data/lib/mechanize/page/label.rb +0 -20
  42. data/lib/mechanize/page/link.rb +0 -98
  43. data/lib/mechanize/page/meta_refresh.rb +0 -68
  44. data/lib/mechanize/page.rb +0 -440
  45. data/lib/mechanize/parser.rb +0 -173
  46. data/lib/mechanize/pluggable_parsers.rb +0 -144
  47. data/lib/mechanize/redirect_limit_reached_error.rb +0 -19
  48. data/lib/mechanize/redirect_not_get_or_head_error.rb +0 -21
  49. data/lib/mechanize/response_code_error.rb +0 -21
  50. data/lib/mechanize/response_read_error.rb +0 -27
  51. data/lib/mechanize/robots_disallowed_error.rb +0 -28
  52. data/lib/mechanize/test_case.rb +0 -663
  53. data/lib/mechanize/unauthorized_error.rb +0 -3
  54. data/lib/mechanize/unsupported_scheme_error.rb +0 -6
  55. data/lib/mechanize/util.rb +0 -101
  56. data/lib/mechanize.rb +0 -1079
  57. data/test/data/htpasswd +0 -1
  58. data/test/data/server.crt +0 -16
  59. data/test/data/server.csr +0 -12
  60. data/test/data/server.key +0 -15
  61. data/test/data/server.pem +0 -15
  62. data/test/htdocs/alt_text.html +0 -10
  63. data/test/htdocs/bad_form_test.html +0 -9
  64. data/test/htdocs/button.jpg +0 -0
  65. data/test/htdocs/canonical_uri.html +0 -9
  66. data/test/htdocs/dir with spaces/foo.html +0 -1
  67. data/test/htdocs/empty_form.html +0 -6
  68. data/test/htdocs/file_upload.html +0 -26
  69. data/test/htdocs/find_link.html +0 -41
  70. data/test/htdocs/form_multi_select.html +0 -16
  71. data/test/htdocs/form_multival.html +0 -37
  72. data/test/htdocs/form_no_action.html +0 -18
  73. data/test/htdocs/form_no_input_name.html +0 -16
  74. data/test/htdocs/form_order_test.html +0 -11
  75. data/test/htdocs/form_select.html +0 -16
  76. data/test/htdocs/form_set_fields.html +0 -14
  77. data/test/htdocs/form_test.html +0 -188
  78. data/test/htdocs/frame_referer_test.html +0 -10
  79. data/test/htdocs/frame_test.html +0 -30
  80. data/test/htdocs/google.html +0 -13
  81. data/test/htdocs/index.html +0 -6
  82. data/test/htdocs/link with space.html +0 -5
  83. data/test/htdocs/meta_cookie.html +0 -11
  84. data/test/htdocs/no_title_test.html +0 -6
  85. data/test/htdocs/noindex.html +0 -9
  86. data/test/htdocs/rails_3_encoding_hack_form_test.html +0 -27
  87. data/test/htdocs/relative/tc_relative_links.html +0 -21
  88. data/test/htdocs/robots.html +0 -8
  89. data/test/htdocs/robots.txt +0 -2
  90. data/test/htdocs/tc_bad_charset.html +0 -9
  91. data/test/htdocs/tc_bad_links.html +0 -5
  92. data/test/htdocs/tc_base_link.html +0 -8
  93. data/test/htdocs/tc_blank_form.html +0 -11
  94. data/test/htdocs/tc_charset.html +0 -6
  95. data/test/htdocs/tc_checkboxes.html +0 -19
  96. data/test/htdocs/tc_encoded_links.html +0 -5
  97. data/test/htdocs/tc_field_precedence.html +0 -11
  98. data/test/htdocs/tc_follow_meta.html +0 -8
  99. data/test/htdocs/tc_form_action.html +0 -48
  100. data/test/htdocs/tc_links.html +0 -19
  101. data/test/htdocs/tc_meta_in_body.html +0 -9
  102. data/test/htdocs/tc_pretty_print.html +0 -17
  103. data/test/htdocs/tc_referer.html +0 -16
  104. data/test/htdocs/tc_relative_links.html +0 -19
  105. data/test/htdocs/tc_textarea.html +0 -23
  106. data/test/htdocs/test_click.html +0 -11
  107. data/test/htdocs/unusual______.html +0 -5
  108. data/test/test_mechanize.rb +0 -1164
  109. data/test/test_mechanize_cookie.rb +0 -451
  110. data/test/test_mechanize_cookie_jar.rb +0 -483
  111. data/test/test_mechanize_download.rb +0 -43
  112. data/test/test_mechanize_file.rb +0 -61
  113. data/test/test_mechanize_file_connection.rb +0 -21
  114. data/test/test_mechanize_file_request.rb +0 -19
  115. data/test/test_mechanize_file_saver.rb +0 -21
  116. data/test/test_mechanize_form.rb +0 -875
  117. data/test/test_mechanize_form_check_box.rb +0 -38
  118. data/test/test_mechanize_form_encoding.rb +0 -114
  119. data/test/test_mechanize_form_field.rb +0 -63
  120. data/test/test_mechanize_form_file_upload.rb +0 -20
  121. data/test/test_mechanize_form_image_button.rb +0 -12
  122. data/test/test_mechanize_form_keygen.rb +0 -32
  123. data/test/test_mechanize_form_multi_select_list.rb +0 -84
  124. data/test/test_mechanize_form_option.rb +0 -55
  125. data/test/test_mechanize_form_radio_button.rb +0 -78
  126. data/test/test_mechanize_form_select_list.rb +0 -76
  127. data/test/test_mechanize_form_textarea.rb +0 -52
  128. data/test/test_mechanize_headers.rb +0 -35
  129. data/test/test_mechanize_history.rb +0 -103
  130. data/test/test_mechanize_http_agent.rb +0 -1225
  131. data/test/test_mechanize_http_auth_challenge.rb +0 -39
  132. data/test/test_mechanize_http_auth_realm.rb +0 -49
  133. data/test/test_mechanize_http_content_disposition_parser.rb +0 -118
  134. data/test/test_mechanize_http_www_authenticate_parser.rb +0 -146
  135. data/test/test_mechanize_link.rb +0 -80
  136. data/test/test_mechanize_page.rb +0 -118
  137. data/test/test_mechanize_page_encoding.rb +0 -182
  138. data/test/test_mechanize_page_frame.rb +0 -16
  139. data/test/test_mechanize_page_link.rb +0 -390
  140. data/test/test_mechanize_page_meta_refresh.rb +0 -127
  141. data/test/test_mechanize_parser.rb +0 -289
  142. data/test/test_mechanize_pluggable_parser.rb +0 -52
  143. data/test/test_mechanize_redirect_limit_reached_error.rb +0 -24
  144. data/test/test_mechanize_redirect_not_get_or_head_error.rb +0 -14
  145. data/test/test_mechanize_subclass.rb +0 -22
  146. data/test/test_mechanize_util.rb +0 -103
  147. data/test/test_multi_select.rb +0 -119
@@ -1,7 +0,0 @@
1
- ##
2
- # A base element on an HTML page. Mechanize treats base tags just like 'a'
3
- # tags. Base objects will contain links, but most likely will have no text.
4
-
5
- class Mechanize::Page::Base < Mechanize::Page::Link
6
- end
7
-
@@ -1,27 +0,0 @@
1
- # A Frame object wrapse a frame HTML element. Frame objects can be treated
2
- # just like Link objects. They contain #src, the #link they refer to and a
3
- # #name, the name of the frame they refer to. #src and #name are aliased to
4
- # #href and #text respectively so that a Frame object can be treated just like
5
- # a Link.
6
-
7
- class Mechanize::Page::Frame < Mechanize::Page::Link
8
-
9
- alias :src :href
10
-
11
- attr_reader :text
12
- alias :name :text
13
-
14
- def initialize(node, mech, referer)
15
- super(node, mech, referer)
16
- @node = node
17
- @text = node['name']
18
- @href = node['src']
19
- @content = nil
20
- end
21
-
22
- def content
23
- @content ||= @mech.get @href, [], page
24
- end
25
-
26
- end
27
-
@@ -1,30 +0,0 @@
1
- ##
2
- # An image element on an HTML page
3
-
4
- class Mechanize::Page::Image
5
- attr_reader :node
6
- attr_reader :page
7
-
8
- def initialize(node, page)
9
- @node = node
10
- @page = page
11
- end
12
-
13
- def src
14
- @node['src']
15
- end
16
-
17
- def url
18
- case src
19
- when %r{^https?://}
20
- src
21
- else
22
- if page.bases[0]
23
- (page.bases[0].href + src).to_s
24
- else
25
- (page.uri + src).to_s
26
- end
27
- end
28
- end
29
- end
30
-
@@ -1,20 +0,0 @@
1
- ##
2
- # A form label on an HTML page
3
-
4
- class Mechanize::Page::Label
5
- attr_reader :node
6
- attr_reader :text
7
- attr_reader :page
8
- alias :to_s :text
9
-
10
- def initialize(node, page)
11
- @node = node
12
- @text = node.inner_text
13
- @page = page
14
- end
15
-
16
- def for
17
- (id = @node['for']) && page.search("##{id}") || nil
18
- end
19
- end
20
-
@@ -1,98 +0,0 @@
1
- ##
2
- # This class encapsulates links. It contains the text and the URI for
3
- # 'a' tags parsed out of an HTML page. If the link contains an image,
4
- # the alt text will be used for that image.
5
- #
6
- # For example, the text for the following links with both be 'Hello World':
7
- #
8
- # <a href="http://example">Hello World</a>
9
- # <a href="http://example"><img src="test.jpg" alt="Hello World"></a>
10
-
11
- class Mechanize::Page::Link
12
- attr_reader :node
13
- attr_reader :href
14
- attr_reader :attributes
15
- attr_reader :page
16
- alias :referer :page
17
-
18
- def initialize(node, mech, page)
19
- @node = node
20
- @attributes = node
21
- @href = node['href']
22
- @mech = mech
23
- @page = page
24
- @text = nil
25
- @uri = nil
26
- end
27
-
28
- # Click on this link
29
- def click
30
- @mech.click self
31
- end
32
-
33
- # This method is a shorthand to get link's DOM id.
34
- # Common usage:
35
- # page.link_with(:dom_id => "links_exact_id")
36
- def dom_id
37
- node['id']
38
- end
39
-
40
- # This method is a shorthand to get a link's DOM class
41
- # Common usage:
42
- # page.link_with(:dom_class => "links_exact_class")
43
- def dom_class
44
- node['class']
45
- end
46
-
47
- def pretty_print(q) # :nodoc:
48
- q.object_group(self) {
49
- q.breakable; q.pp text
50
- q.breakable; q.pp href
51
- }
52
- end
53
-
54
- alias inspect pretty_inspect # :nodoc:
55
-
56
- # A list of words in the rel attribute, all lower-cased.
57
- def rel
58
- @rel ||= (val = attributes['rel']) ? val.downcase.split(' ') : []
59
- end
60
-
61
- # Test if the rel attribute includes +kind+.
62
- def rel? kind
63
- rel.include? kind
64
- end
65
-
66
- # The text content of this link
67
- def text
68
- return @text if @text
69
-
70
- @text = @node.inner_text
71
-
72
- # If there is no text, try to find an image and use it's alt text
73
- if (@text.nil? or @text.empty?) and imgs = @node.search('img') then
74
- @text = imgs.map do |e|
75
- e['alt']
76
- end.join
77
- end
78
-
79
- @text
80
- end
81
-
82
- alias :to_s :text
83
-
84
- # A URI for the #href for this link. The link is first parsed as a raw
85
- # link. If that fails parsing an escaped link is attepmted.
86
-
87
- def uri
88
- @uri ||= if @href then
89
- begin
90
- URI.parse @href
91
- rescue URI::InvalidURIError
92
- URI.parse WEBrick::HTTPUtils.escape @href
93
- end
94
- end
95
- end
96
-
97
- end
98
-
@@ -1,68 +0,0 @@
1
- ##
2
- # This class encapsulates a meta element with a refresh http-equiv. Mechanize
3
- # treats meta refresh elements just like 'a' tags. MetaRefresh objects will
4
- # contain links, but most likely will have no text.
5
-
6
- class Mechanize::Page::MetaRefresh < Mechanize::Page::Link
7
-
8
- ##
9
- # Time to wait before next refresh
10
-
11
- attr_reader :delay
12
-
13
- ##
14
- # This MetaRefresh links did not contain a url= in the content attribute and
15
- # links to itself.
16
-
17
- attr_reader :link_self
18
-
19
- ##
20
- # Matches the content attribute of a meta refresh element. After the match:
21
- #
22
- # $1:: delay
23
- # $3:: url
24
-
25
- CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
26
-
27
- ##
28
- # Parses the delay and url from the content attribute of a meta refresh
29
- # element. Parse requires the uri of the current page to infer a url when
30
- # no url is specified.
31
- #
32
- # Returns an array of [delay, url]. (both in string)
33
- #
34
- # Returns nil if the delay and url cannot be parsed.
35
-
36
- def self.parse content, base_uri
37
- return unless content =~ CONTENT_REGEXP
38
-
39
- link_self = $3.nil? || $3.empty?
40
- delay, refresh_uri = $1, $3
41
-
42
- dest = base_uri
43
- dest += refresh_uri if refresh_uri
44
-
45
- return delay, dest, link_self
46
- end
47
-
48
- def self.from_node node, page, uri
49
- http_equiv = node['http-equiv']
50
- return unless http_equiv and http_equiv.downcase == 'refresh'
51
-
52
- delay, uri, link_self = parse node['content'], uri
53
-
54
- return unless delay
55
-
56
- new node, page, delay, uri.to_s, link_self
57
- end
58
-
59
- def initialize node, page, delay, href, link_self = false
60
- super node, page.mech, page
61
-
62
- @delay = delay =~ /\./ ? delay.to_f : delay.to_i
63
- @href = href
64
- @link_self = link_self
65
- end
66
-
67
- end
68
-
@@ -1,440 +0,0 @@
1
- ##
2
- # This class encapsulates an HTML page. If Mechanize finds a content
3
- # type of 'text/html', this class will be instantiated and returned.
4
- #
5
- # Example:
6
- #
7
- # require 'mechanize'
8
- #
9
- # agent = Mechanize.new
10
- # agent.get('http://google.com/').class # => Mechanize::Page
11
-
12
- class Mechanize::Page < Mechanize::File
13
- extend Forwardable
14
- extend Mechanize::ElementMatcher
15
-
16
- attr_accessor :mech
17
-
18
- ##
19
- # Possible encodings for this page based on HTTP headers and meta elements
20
-
21
- attr_reader :encodings
22
-
23
- def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
24
- raise Mechanize::ContentTypeError, response['content-type'] unless
25
- response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
26
-
27
- @meta_content_type = nil
28
- @encoding = nil
29
- @encodings = [nil]
30
- raise 'no' if mech and not Mechanize === mech
31
- @mech = mech
32
-
33
- reset
34
-
35
- @encodings << Mechanize::Util.detect_charset(body) if body
36
-
37
- @encodings.concat self.class.response_header_charset(response)
38
-
39
- if body
40
- # Force the encoding to be 8BIT so we can perform regular expressions.
41
- # We'll set it to the detected encoding later
42
- body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding
43
-
44
- @encodings.concat self.class.meta_charset body
45
-
46
- meta_content_type = self.class.meta_content_type body
47
- @meta_content_type = meta_content_type if meta_content_type
48
- end
49
-
50
- @encodings << mech.default_encoding if mech and mech.default_encoding
51
-
52
- super uri, response, body, code
53
- end
54
-
55
- def title
56
- @title ||=
57
- if doc = parser
58
- title = doc.search('title').inner_text
59
- title.empty? ? nil : title
60
- end
61
- end
62
-
63
- def response_header_charset
64
- self.class.response_header_charset(response)
65
- end
66
-
67
- def meta_charset
68
- self.class.meta_charset(body)
69
- end
70
-
71
- def detected_encoding
72
- Mechanize::Util.detect_charset(body)
73
- end
74
-
75
- def encoding=(encoding)
76
- reset
77
-
78
- @encoding = encoding
79
-
80
- if @parser
81
- parser_encoding = @parser.encoding
82
- if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
83
- # lazy reinitialize the parser with the new encoding
84
- @parser = nil
85
- end
86
- end
87
-
88
- encoding
89
- end
90
-
91
- def encoding
92
- parser.respond_to?(:encoding) ? parser.encoding : nil
93
- end
94
-
95
- # Return whether parser result has errors related to encoding or not.
96
- # false indicates just parser has no encoding errors, not encoding is vaild.
97
- def encoding_error?(parser=nil)
98
- parser = self.parser unless parser
99
- return false if parser.errors.empty?
100
- parser.errors.any? do |error|
101
- error.message =~ /(indicate\ encoding)|
102
- (Invalid\ char)|
103
- (input\ conversion\ failed)/x
104
- end
105
- end
106
-
107
- def parser
108
- return @parser if @parser
109
- return nil unless @body
110
-
111
- if @encoding then
112
- @parser = @mech.html_parser.parse html_body, nil, @encoding
113
- elsif mech.force_default_encoding then
114
- @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding
115
- else
116
- @encodings.reverse_each do |encoding|
117
- @parser = @mech.html_parser.parse html_body, nil, encoding
118
-
119
- break unless encoding_error? @parser
120
- end
121
- end
122
-
123
- @parser
124
- end
125
-
126
- alias :root :parser
127
-
128
- def pretty_print(q) # :nodoc:
129
- q.object_group(self) {
130
- q.breakable
131
- q.group(1, '{url', '}') {q.breakable; q.pp uri }
132
- q.breakable
133
- q.group(1, '{meta_refresh', '}') {
134
- meta_refresh.each { |link| q.breakable; q.pp link }
135
- }
136
- q.breakable
137
- q.group(1, '{title', '}') { q.breakable; q.pp title }
138
- q.breakable
139
- q.group(1, '{iframes', '}') {
140
- iframes.each { |link| q.breakable; q.pp link }
141
- }
142
- q.breakable
143
- q.group(1, '{frames', '}') {
144
- frames.each { |link| q.breakable; q.pp link }
145
- }
146
- q.breakable
147
- q.group(1, '{links', '}') {
148
- links.each { |link| q.breakable; q.pp link }
149
- }
150
- q.breakable
151
- q.group(1, '{forms', '}') {
152
- forms.each { |form| q.breakable; q.pp form }
153
- }
154
- }
155
- end
156
-
157
- alias inspect pretty_inspect # :nodoc:
158
-
159
- def reset
160
- @bases = nil
161
- @forms = nil
162
- @frames = nil
163
- @iframes = nil
164
- @links = nil
165
- @labels = nil
166
- @labels_hash = nil
167
- @meta_refresh = nil
168
- @parser = nil
169
- @title = nil
170
- end
171
-
172
- # Return the canonical URI for the page if there is a link tag
173
- # with href="canonical".
174
- def canonical_uri
175
- link = at('link[@rel="canonical"][@href]')
176
- return unless link
177
- href = link['href']
178
-
179
- URI href
180
- rescue URI::InvalidURIError
181
- URI Mechanize::Util.uri_escape href
182
- end
183
-
184
- # Get the content type
185
- def content_type
186
- @meta_content_type || response['content-type']
187
- end
188
-
189
- # Search through the page like HPricot
190
- def_delegator :parser, :search, :search
191
- def_delegator :parser, :/, :/
192
- def_delegator :parser, :at, :at
193
-
194
- ##
195
- # :method: form_with(criteria)
196
- #
197
- # Find a single form matching +criteria+.
198
- # Example:
199
- # page.form_with(:action => '/post/login.php') do |f|
200
- # ...
201
- # end
202
-
203
- ##
204
- # :method: forms_with(criteria)
205
- #
206
- # Find all forms form matching +criteria+.
207
- # Example:
208
- # page.forms_with(:action => '/post/login.php').each do |f|
209
- # ...
210
- # end
211
-
212
- elements_with :form
213
-
214
- ##
215
- # :method: link_with(criteria)
216
- #
217
- # Find a single link matching +criteria+.
218
- # Example:
219
- # page.link_with(:href => /foo/).click
220
-
221
- ##
222
- # :method: links_with(criteria)
223
- #
224
- # Find all links matching +criteria+.
225
- # Example:
226
- # page.links_with(:href => /foo/).each do |link|
227
- # puts link.href
228
- # end
229
-
230
- elements_with :link
231
-
232
- ##
233
- # :method: base_with(criteria)
234
- #
235
- # Find a single base tag matching +criteria+.
236
- # Example:
237
- # page.base_with(:href => /foo/).click
238
-
239
- ##
240
- # :method: bases_with(criteria)
241
- #
242
- # Find all base tags matching +criteria+.
243
- # Example:
244
- # page.bases_with(:href => /foo/).each do |base|
245
- # puts base.href
246
- # end
247
-
248
- elements_with :base
249
-
250
- ##
251
- # :method: frame_with(criteria)
252
- #
253
- # Find a single frame tag matching +criteria+.
254
- # Example:
255
- # page.frame_with(:src => /foo/).click
256
-
257
- ##
258
- # :method: frames_with(criteria)
259
- #
260
- # Find all frame tags matching +criteria+.
261
- # Example:
262
- # page.frames_with(:src => /foo/).each do |frame|
263
- # p frame.src
264
- # end
265
-
266
- elements_with :frame
267
-
268
- ##
269
- # :method: iframe_with(criteria)
270
- #
271
- # Find a single iframe tag matching +criteria+.
272
- # Example:
273
- # page.iframe_with(:src => /foo/).click
274
-
275
- ##
276
- # :method: iframes_with(criteria)
277
- #
278
- # Find all iframe tags matching +criteria+.
279
- # Example:
280
- # page.iframes_with(:src => /foo/).each do |iframe|
281
- # p iframe.src
282
- # end
283
-
284
- elements_with :iframe
285
-
286
- ##
287
- # Return a list of all link and area tags
288
- def links
289
- @links ||= %w{ a area }.map do |tag|
290
- search(tag).map do |node|
291
- Link.new(node, @mech, self)
292
- end
293
- end.flatten
294
- end
295
-
296
- ##
297
- # Return a list of all form tags
298
- def forms
299
- @forms ||= search('form').map do |html_form|
300
- form = Mechanize::Form.new(html_form, @mech, self)
301
- form.action ||= @uri.to_s
302
- form
303
- end
304
- end
305
-
306
- ##
307
- # Return a list of all meta refresh elements
308
-
309
- def meta_refresh
310
- query = @mech.follow_meta_refresh == :anywhere ? 'meta' : 'head > meta'
311
-
312
- @meta_refresh ||= search(query).map do |node|
313
- MetaRefresh.from_node node, self, uri
314
- end.compact
315
- end
316
-
317
- ##
318
- # Return a list of all base tags
319
- def bases
320
- @bases ||=
321
- search('base').map { |node| Base.new(node, @mech, self) }
322
- end
323
-
324
- ##
325
- # Return a list of all frame tags
326
- def frames
327
- @frames ||=
328
- search('frame').map { |node| Frame.new(node, @mech, self) }
329
- end
330
-
331
- ##
332
- # Return a list of all iframe tags
333
- def iframes
334
- @iframes ||=
335
- search('iframe').map { |node| Frame.new(node, @mech, self) }
336
- end
337
-
338
- ##
339
- # Return a list of all img tags
340
- def images
341
- @images ||=
342
- search('img').map { |node| Image.new(node, self) }
343
- end
344
-
345
- def image_urls
346
- @image_urls ||= images.map(&:url).uniq
347
- end
348
-
349
- ##
350
- # Return a list of all label tags
351
- def labels
352
- @labels ||=
353
- search('label').map { |node| Label.new(node, self) }
354
- end
355
-
356
- def labels_hash
357
- unless @labels_hash
358
- hash = {}
359
- labels.each do |label|
360
- hash[label.node['for']] = label if label.for
361
- end
362
- @labels_hash = hash
363
- end
364
- return @labels_hash
365
- end
366
-
367
- def self.charset content_type
368
- charset = content_type[/charset=([^; ]+)/i, 1]
369
- return nil if charset == 'none'
370
- charset
371
- end
372
-
373
- def self.response_header_charset response
374
- charsets = []
375
- response.each do |header, value|
376
- next unless header == 'content-type'
377
- next unless value =~ /charset/i
378
- charsets << charset(value)
379
- end
380
- charsets
381
- end
382
-
383
- ##
384
- # Retrieves all charsets from +meta+ tags in +body+
385
-
386
- def self.meta_charset body
387
- # HACK use .map
388
- body.scan(/<meta .*?>/i).map do |meta|
389
- if meta =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then
390
- $2
391
- elsif meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
392
- meta =~ /content\s*=\s*(["'])?(.*?)\1/i
393
-
394
- m_charset = charset $2 if $2
395
-
396
- m_charset if m_charset
397
- end
398
- end.compact
399
- end
400
-
401
- ##
402
- # Retrieves the last <tt>content-type</tt> set by a +meta+ tag in +body+
403
-
404
- def self.meta_content_type body
405
- body.scan(/<meta .*?>/i).reverse.map do |meta|
406
- if meta =~ /http-equiv\s*=\s*(["'])?content-type\1/i then
407
- meta =~ /content=(["'])?(.*?)\1/i
408
-
409
- return $2
410
- end
411
- end
412
-
413
- nil
414
- end
415
-
416
- private
417
-
418
- def html_body
419
- if @body
420
- @body.empty? ? '<html></html>' : @body
421
- else
422
- ''
423
- end
424
- end
425
-
426
- def self.charset_from_content_type content_type
427
- charset = content_type[/charset=([^; ]+)/i, 1]
428
- return nil if charset == 'none'
429
- charset
430
- end
431
- end
432
-
433
- require 'mechanize/headers'
434
- require 'mechanize/page/image'
435
- require 'mechanize/page/label'
436
- require 'mechanize/page/link'
437
- require 'mechanize/page/base'
438
- require 'mechanize/page/frame'
439
- require 'mechanize/page/meta_refresh'
440
-