tenderlove-mechanize 0.9.3.20090617085936

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. data/CHANGELOG.rdoc +496 -0
  2. data/EXAMPLES.rdoc +171 -0
  3. data/FAQ.rdoc +11 -0
  4. data/GUIDE.rdoc +122 -0
  5. data/LICENSE.rdoc +340 -0
  6. data/Manifest.txt +169 -0
  7. data/README.rdoc +60 -0
  8. data/Rakefile +43 -0
  9. data/examples/flickr_upload.rb +23 -0
  10. data/examples/mech-dump.rb +7 -0
  11. data/examples/proxy_req.rb +9 -0
  12. data/examples/rubyforge.rb +21 -0
  13. data/examples/spider.rb +11 -0
  14. data/lib/mechanize.rb +7 -0
  15. data/lib/www/mechanize/chain/auth_headers.rb +80 -0
  16. data/lib/www/mechanize/chain/body_decoding_handler.rb +48 -0
  17. data/lib/www/mechanize/chain/connection_resolver.rb +78 -0
  18. data/lib/www/mechanize/chain/custom_headers.rb +23 -0
  19. data/lib/www/mechanize/chain/handler.rb +9 -0
  20. data/lib/www/mechanize/chain/header_resolver.rb +53 -0
  21. data/lib/www/mechanize/chain/parameter_resolver.rb +24 -0
  22. data/lib/www/mechanize/chain/post_connect_hook.rb +0 -0
  23. data/lib/www/mechanize/chain/pre_connect_hook.rb +22 -0
  24. data/lib/www/mechanize/chain/request_resolver.rb +32 -0
  25. data/lib/www/mechanize/chain/response_body_parser.rb +40 -0
  26. data/lib/www/mechanize/chain/response_header_handler.rb +50 -0
  27. data/lib/www/mechanize/chain/response_reader.rb +41 -0
  28. data/lib/www/mechanize/chain/ssl_resolver.rb +42 -0
  29. data/lib/www/mechanize/chain/uri_resolver.rb +77 -0
  30. data/lib/www/mechanize/chain.rb +34 -0
  31. data/lib/www/mechanize/content_type_error.rb +16 -0
  32. data/lib/www/mechanize/cookie.rb +72 -0
  33. data/lib/www/mechanize/cookie_jar.rb +191 -0
  34. data/lib/www/mechanize/file.rb +73 -0
  35. data/lib/www/mechanize/file_response.rb +62 -0
  36. data/lib/www/mechanize/file_saver.rb +39 -0
  37. data/lib/www/mechanize/form/button.rb +8 -0
  38. data/lib/www/mechanize/form/check_box.rb +13 -0
  39. data/lib/www/mechanize/form/field.rb +28 -0
  40. data/lib/www/mechanize/form/file_upload.rb +24 -0
  41. data/lib/www/mechanize/form/image_button.rb +23 -0
  42. data/lib/www/mechanize/form/multi_select_list.rb +69 -0
  43. data/lib/www/mechanize/form/option.rb +51 -0
  44. data/lib/www/mechanize/form/radio_button.rb +38 -0
  45. data/lib/www/mechanize/form/select_list.rb +45 -0
  46. data/lib/www/mechanize/form.rb +360 -0
  47. data/lib/www/mechanize/headers.rb +12 -0
  48. data/lib/www/mechanize/history.rb +67 -0
  49. data/lib/www/mechanize/inspect.rb +90 -0
  50. data/lib/www/mechanize/monkey_patch.rb +37 -0
  51. data/lib/www/mechanize/page/base.rb +10 -0
  52. data/lib/www/mechanize/page/frame.rb +22 -0
  53. data/lib/www/mechanize/page/link.rb +50 -0
  54. data/lib/www/mechanize/page/meta.rb +51 -0
  55. data/lib/www/mechanize/page.rb +176 -0
  56. data/lib/www/mechanize/pluggable_parsers.rb +103 -0
  57. data/lib/www/mechanize/redirect_limit_reached_error.rb +18 -0
  58. data/lib/www/mechanize/redirect_not_get_or_head_error.rb +20 -0
  59. data/lib/www/mechanize/response_code_error.rb +25 -0
  60. data/lib/www/mechanize/unsupported_scheme_error.rb +10 -0
  61. data/lib/www/mechanize/util.rb +76 -0
  62. data/lib/www/mechanize.rb +619 -0
  63. data/mechanize.gemspec +41 -0
  64. data/test/chain/test_argument_validator.rb +14 -0
  65. data/test/chain/test_auth_headers.rb +25 -0
  66. data/test/chain/test_custom_headers.rb +18 -0
  67. data/test/chain/test_header_resolver.rb +28 -0
  68. data/test/chain/test_parameter_resolver.rb +35 -0
  69. data/test/chain/test_request_resolver.rb +29 -0
  70. data/test/chain/test_response_reader.rb +24 -0
  71. data/test/data/htpasswd +1 -0
  72. data/test/data/server.crt +16 -0
  73. data/test/data/server.csr +12 -0
  74. data/test/data/server.key +15 -0
  75. data/test/data/server.pem +15 -0
  76. data/test/helper.rb +129 -0
  77. data/test/htdocs/alt_text.html +10 -0
  78. data/test/htdocs/bad_form_test.html +9 -0
  79. data/test/htdocs/button.jpg +0 -0
  80. data/test/htdocs/empty_form.html +6 -0
  81. data/test/htdocs/file_upload.html +26 -0
  82. data/test/htdocs/find_link.html +41 -0
  83. data/test/htdocs/form_multi_select.html +16 -0
  84. data/test/htdocs/form_multival.html +37 -0
  85. data/test/htdocs/form_no_action.html +18 -0
  86. data/test/htdocs/form_no_input_name.html +16 -0
  87. data/test/htdocs/form_select.html +16 -0
  88. data/test/htdocs/form_select_all.html +16 -0
  89. data/test/htdocs/form_select_none.html +17 -0
  90. data/test/htdocs/form_select_noopts.html +10 -0
  91. data/test/htdocs/form_set_fields.html +14 -0
  92. data/test/htdocs/form_test.html +188 -0
  93. data/test/htdocs/frame_test.html +30 -0
  94. data/test/htdocs/google.html +13 -0
  95. data/test/htdocs/iframe_test.html +16 -0
  96. data/test/htdocs/index.html +6 -0
  97. data/test/htdocs/link with space.html +5 -0
  98. data/test/htdocs/meta_cookie.html +11 -0
  99. data/test/htdocs/no_title_test.html +6 -0
  100. data/test/htdocs/relative/tc_relative_links.html +21 -0
  101. data/test/htdocs/tc_bad_links.html +5 -0
  102. data/test/htdocs/tc_base_link.html +8 -0
  103. data/test/htdocs/tc_blank_form.html +11 -0
  104. data/test/htdocs/tc_checkboxes.html +19 -0
  105. data/test/htdocs/tc_encoded_links.html +5 -0
  106. data/test/htdocs/tc_follow_meta.html +8 -0
  107. data/test/htdocs/tc_form_action.html +48 -0
  108. data/test/htdocs/tc_links.html +18 -0
  109. data/test/htdocs/tc_no_attributes.html +16 -0
  110. data/test/htdocs/tc_pretty_print.html +17 -0
  111. data/test/htdocs/tc_radiobuttons.html +17 -0
  112. data/test/htdocs/tc_referer.html +10 -0
  113. data/test/htdocs/tc_relative_links.html +19 -0
  114. data/test/htdocs/tc_textarea.html +23 -0
  115. data/test/htdocs/unusual______.html +5 -0
  116. data/test/servlets.rb +365 -0
  117. data/test/ssl_server.rb +48 -0
  118. data/test/test_authenticate.rb +71 -0
  119. data/test/test_bad_links.rb +25 -0
  120. data/test/test_blank_form.rb +16 -0
  121. data/test/test_checkboxes.rb +61 -0
  122. data/test/test_content_type.rb +13 -0
  123. data/test/test_cookie_class.rb +338 -0
  124. data/test/test_cookie_jar.rb +362 -0
  125. data/test/test_cookies.rb +123 -0
  126. data/test/test_encoded_links.rb +20 -0
  127. data/test/test_errors.rb +49 -0
  128. data/test/test_follow_meta.rb +108 -0
  129. data/test/test_form_action.rb +44 -0
  130. data/test/test_form_as_hash.rb +61 -0
  131. data/test/test_form_button.rb +38 -0
  132. data/test/test_form_no_inputname.rb +15 -0
  133. data/test/test_forms.rb +564 -0
  134. data/test/test_frames.rb +25 -0
  135. data/test/test_get_headers.rb +52 -0
  136. data/test/test_gzipping.rb +22 -0
  137. data/test/test_hash_api.rb +45 -0
  138. data/test/test_history.rb +142 -0
  139. data/test/test_history_added.rb +16 -0
  140. data/test/test_html_unscape_forms.rb +39 -0
  141. data/test/test_if_modified_since.rb +20 -0
  142. data/test/test_keep_alive.rb +31 -0
  143. data/test/test_links.rb +120 -0
  144. data/test/test_mech.rb +268 -0
  145. data/test/test_mechanize_file.rb +47 -0
  146. data/test/test_meta.rb +65 -0
  147. data/test/test_multi_select.rb +106 -0
  148. data/test/test_no_attributes.rb +13 -0
  149. data/test/test_option.rb +18 -0
  150. data/test/test_page.rb +119 -0
  151. data/test/test_pluggable_parser.rb +145 -0
  152. data/test/test_post_form.rb +34 -0
  153. data/test/test_pretty_print.rb +22 -0
  154. data/test/test_radiobutton.rb +75 -0
  155. data/test/test_redirect_limit_reached.rb +41 -0
  156. data/test/test_redirect_verb_handling.rb +45 -0
  157. data/test/test_referer.rb +39 -0
  158. data/test/test_relative_links.rb +40 -0
  159. data/test/test_request.rb +13 -0
  160. data/test/test_response_code.rb +52 -0
  161. data/test/test_save_file.rb +48 -0
  162. data/test/test_scheme.rb +48 -0
  163. data/test/test_select.rb +106 -0
  164. data/test/test_select_all.rb +15 -0
  165. data/test/test_select_none.rb +15 -0
  166. data/test/test_select_noopts.rb +16 -0
  167. data/test/test_set_fields.rb +44 -0
  168. data/test/test_ssl_server.rb +20 -0
  169. data/test/test_subclass.rb +14 -0
  170. data/test/test_textarea.rb +45 -0
  171. data/test/test_upload.rb +109 -0
  172. data/test/test_verbs.rb +25 -0
  173. metadata +314 -0
@@ -0,0 +1,67 @@
1
+ module WWW
2
+ class Mechanize
3
+ ##
4
+ # This class manages history for your mechanize object.
5
+ class History < Array
6
+ attr_accessor :max_size
7
+
8
+ def initialize(max_size = nil)
9
+ @max_size = max_size
10
+ @history_index = {}
11
+ end
12
+
13
+ def initialize_copy(orig)
14
+ super
15
+ @history_index = orig.instance_variable_get(:@history_index).dup
16
+ end
17
+
18
+ def push(page, uri = nil)
19
+ super(page)
20
+ @history_index[(uri ? uri : page.uri).to_s] = page
21
+ if @max_size && self.length > @max_size
22
+ while self.length > @max_size
23
+ self.shift
24
+ end
25
+ end
26
+ self
27
+ end
28
+ alias :<< :push
29
+
30
+ def visited?(url)
31
+ ! visited_page(url).nil?
32
+ end
33
+
34
+ def visited_page(url)
35
+ @history_index[(url.respond_to?(:uri) ? url.uri : url).to_s]
36
+ end
37
+
38
+ def clear
39
+ @history_index.clear
40
+ super
41
+ end
42
+
43
+ def shift
44
+ return nil if length == 0
45
+ page = self[0]
46
+ self[0] = nil
47
+ super
48
+ remove_from_index(page)
49
+ page
50
+ end
51
+
52
+ def pop
53
+ return nil if length == 0
54
+ page = super
55
+ remove_from_index(page)
56
+ page
57
+ end
58
+
59
+ private
60
+ def remove_from_index(page)
61
+ @history_index.each do |k,v|
62
+ @history_index.delete(k) if v == page
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,90 @@
1
+ require 'pp'
2
+
3
+ module WWW
4
+ # :stopdoc:
5
+ class Mechanize
6
+ def pretty_print(q)
7
+ q.object_group(self) {
8
+ q.breakable
9
+ q.pp cookie_jar
10
+ q.breakable
11
+ q.pp current_page
12
+ }
13
+ end
14
+
15
+ class Page
16
+ def pretty_print(q)
17
+ q.object_group(self) {
18
+ q.breakable
19
+ q.group(1, '{url', '}') {q.breakable; q.pp uri }
20
+ q.breakable
21
+ q.group(1, '{meta', '}') {
22
+ meta.each { |link| q.breakable; q.pp link }
23
+ }
24
+ q.breakable
25
+ q.group(1, '{title', '}') { q.breakable; q.pp title }
26
+ q.breakable
27
+ q.group(1, '{iframes', '}') {
28
+ iframes.each { |link| q.breakable; q.pp link }
29
+ }
30
+ q.breakable
31
+ q.group(1, '{frames', '}') {
32
+ frames.each { |link| q.breakable; q.pp link }
33
+ }
34
+ q.breakable
35
+ q.group(1, '{links', '}') {
36
+ links.each { |link| q.breakable; q.pp link }
37
+ }
38
+ q.breakable
39
+ q.group(1, '{forms', '}') {
40
+ forms.each { |form| q.breakable; q.pp form }
41
+ }
42
+ }
43
+ end
44
+
45
+ class Link
46
+ def pretty_print(q)
47
+ q.object_group(self) {
48
+ q.breakable; q.pp text
49
+ q.breakable; q.pp href
50
+ }
51
+ end
52
+ end
53
+ end
54
+
55
+ class Form
56
+ def pretty_print(q)
57
+ q.object_group(self) {
58
+ q.breakable; q.group(1, '{name', '}') { q.breakable; q.pp name }
59
+ q.breakable; q.group(1, '{method', '}') { q.breakable; q.pp method }
60
+ q.breakable; q.group(1, '{action', '}') { q.breakable; q.pp action }
61
+ q.breakable; q.group(1, '{fields', '}') {
62
+ fields.each do |field|
63
+ q.breakable
64
+ q.pp field
65
+ end
66
+ }
67
+ q.breakable; q.group(1, '{radiobuttons', '}') {
68
+ radiobuttons.each { |b| q.breakable; q.pp b }
69
+ }
70
+ q.breakable; q.group(1, '{checkboxes', '}') {
71
+ checkboxes.each { |b| q.breakable; q.pp b }
72
+ }
73
+ q.breakable; q.group(1, '{file_uploads', '}') {
74
+ file_uploads.each { |b| q.breakable; q.pp b }
75
+ }
76
+ q.breakable; q.group(1, '{buttons', '}') {
77
+ buttons.each { |b| q.breakable; q.pp b }
78
+ }
79
+ }
80
+ end
81
+
82
+ class RadioButton
83
+ def pretty_print_instance_variables
84
+ [:@checked, :@name, :@value]
85
+ end
86
+ end
87
+ end
88
+ end
89
+ # :startdoc:
90
+ end
@@ -0,0 +1,37 @@
1
+ module Net
2
+ class HTTP
3
+ alias :old_keep_alive? :keep_alive?
4
+ def keep_alive?(req, res)
5
+ return false if /close/i =~ req['connection'].to_s
6
+ return false if @seems_1_0_server
7
+ return false if /close/i =~ res['connection'].to_s
8
+ return true if /keep-alive/i =~ res['connection'].to_s
9
+ return false if /close/i =~ res['proxy-connection'].to_s
10
+ return true if /keep-alive/i =~ res['proxy-connection'].to_s
11
+ (@curr_http_version == '1.1')
12
+ end
13
+ end
14
+ end
15
+
16
+ # Monkey patch for ruby 1.8.4
17
+ unless RUBY_VERSION > "1.8.4"
18
+ module Net # :nodoc:
19
+ class HTTPResponse # :nodoc:
20
+ CODE_TO_OBJ['500'] = HTTPInternalServerError
21
+ end
22
+ end
23
+ else
24
+ module WWW
25
+ class Mechanize
26
+ class Form
27
+ alias :inspect :pretty_inspect
28
+ end
29
+ class Page
30
+ alias :inspect :pretty_inspect
31
+ class Link
32
+ alias :inspect :pretty_inspect
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,10 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Page < WWW::Mechanize::File
4
+ # This class encapsulates a Base tag. Mechanize treats base tags just
5
+ # like 'a' tags. Base objects will contain links, but most likely will
6
+ # have no text.
7
+ class Base < Link; end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,22 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Page < WWW::Mechanize::File
4
+ # This class encapsulates a 'frame' tag. Frame objects can be treated
5
+ # just like Link objects. They contain src, the link they refer to,
6
+ # name, the name of the frame. 'src' and 'name' are aliased to 'href'
7
+ # and 'text' respectively so that a Frame object can be treated just
8
+ # like a Link.
9
+ class Frame < Link
10
+ alias :src :href
11
+ alias :name :text
12
+
13
+ def initialize(node, mech, referer)
14
+ super(node, mech, referer)
15
+ @node = node
16
+ @text = node['name']
17
+ @href = node['src']
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,50 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Page < WWW::Mechanize::File
4
+ # This class encapsulates links. It contains the text and the URI for
5
+ # 'a' tags parsed out of an HTML page. If the link contains an image,
6
+ # the alt text will be used for that image.
7
+ #
8
+ # For example, the text for the following links with both be 'Hello World':
9
+ #
10
+ # <a href="http://rubyforge.org">Hello World</a>
11
+ # <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
12
+ class Link
13
+ attr_reader :node
14
+ attr_reader :href
15
+ attr_reader :text
16
+ attr_reader :attributes
17
+ attr_reader :page
18
+ alias :to_s :text
19
+ alias :referer :page
20
+
21
+ def initialize(node, mech, page)
22
+ @node = node
23
+ @href = node['href']
24
+ @text = node.inner_text
25
+ @page = page
26
+ @mech = mech
27
+ @attributes = node
28
+
29
+ # If there is no text, try to find an image and use it's alt text
30
+ if (@text.nil? || @text.length == 0) && node.search('img').length > 0
31
+ @text = ''
32
+ node.search('img').each do |e|
33
+ @text << ( e['alt'] || '')
34
+ end
35
+ end
36
+
37
+ end
38
+
39
+ def uri
40
+ @href && URI.parse(@href)
41
+ end
42
+
43
+ # Click on this link
44
+ def click
45
+ @mech.click self
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,51 @@
1
+ module WWW
2
+ class Mechanize
3
+ class Page < WWW::Mechanize::File
4
+ # This class encapsulates a Meta tag. Mechanize treats meta tags just
5
+ # like 'a' tags. Meta objects will contain links, but most likely will
6
+ # have no text.
7
+ class Meta < Link
8
+
9
+ # Matches the content attribute of a meta tag. After the match:
10
+ #
11
+ # $1:: delay
12
+ # $3:: url
13
+ #
14
+ CONTENT_REGEXP = /^\s*(\d+\.?\d*)(;|;\s*url=\s*['"]?(\S*?)['"]?)?\s*$/i
15
+
16
+ class << self
17
+ # Parses the delay and url from the content attribute of a meta tag.
18
+ # Parse requires the uri of the current page to infer a url when no
19
+ # url is specified. If a block is given, the parsed delay and url
20
+ # will be passed to it for further processing.
21
+ #
22
+ # Returns nil if the delay and url cannot be parsed.
23
+ #
24
+ # # <meta http-equiv="refresh" content="5;url=http://example.com/" />
25
+ # uri = URI.parse('http://current.com/')
26
+ #
27
+ # Meta.parse("5;url=http://example.com/", uri) # => ['5', 'http://example.com/']
28
+ # Meta.parse("5;url=", uri) # => ['5', 'http://current.com/']
29
+ # Meta.parse("5", uri) # => ['5', 'http://current.com/']
30
+ # Meta.parse("invalid content", uri) # => nil
31
+ #
32
+ def parse(content, uri)
33
+ if content && content =~ CONTENT_REGEXP
34
+ delay, url = $1, $3
35
+
36
+ url = case url
37
+ when nil, "" then uri.to_s
38
+ when /^http/i then url
39
+ else "http://#{uri.host}#{url}"
40
+ end
41
+
42
+ block_given? ? yield(delay, url) : [delay, url]
43
+ else
44
+ nil
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,176 @@
1
+ require 'www/mechanize/page/link'
2
+ require 'www/mechanize/page/meta'
3
+ require 'www/mechanize/page/base'
4
+ require 'www/mechanize/page/frame'
5
+ require 'www/mechanize/headers'
6
+
7
+ module WWW
8
+ class Mechanize
9
+ # = Synopsis
10
+ # This class encapsulates an HTML page. If Mechanize finds a content
11
+ # type of 'text/html', this class will be instantiated and returned.
12
+ #
13
+ # == Example
14
+ # require 'rubygems'
15
+ # require 'mechanize'
16
+ #
17
+ # agent = WWW::Mechanize.new
18
+ # agent.get('http://google.com/').class #=> WWW::Mechanize::Page
19
+ #
20
+ class Page < WWW::Mechanize::File
21
+ extend Forwardable
22
+
23
+ attr_accessor :mech
24
+
25
+ def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
26
+ @encoding = nil
27
+
28
+ method = response.respond_to?(:each_header) ? :each_header : :each
29
+ response.send(method) do |header,v|
30
+ next unless v =~ /charset/i
31
+ encoding = v.split('=').last.strip
32
+ @encoding = encoding unless encoding == 'none'
33
+ end
34
+
35
+ # Force the encoding to be 8BIT so we can perform regular expressions.
36
+ # We'll set it to the detected encoding later
37
+ body.force_encoding('ASCII-8BIT') if defined?(Encoding) && body
38
+
39
+ @encoding ||= Util.detect_charset(body)
40
+
41
+ super(uri, response, body, code)
42
+ @mech ||= mech
43
+
44
+ @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i
45
+
46
+ raise Mechanize::ContentTypeError.new(response['content-type']) unless
47
+ response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
48
+ @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
49
+ end
50
+
51
+ def title
52
+ @title ||= if parser && search('title').inner_text.length > 0
53
+ search('title').inner_text
54
+ end
55
+ end
56
+
57
+ def encoding=(encoding)
58
+ @encoding = encoding
59
+
60
+ if @parser && @parser.encoding.downcase != encoding.downcase
61
+ # lazy reinitialize the parser with the new encoding
62
+ @parser = nil
63
+ end
64
+ end
65
+
66
+ def encoding
67
+ parser.respond_to?(:encoding) ? parser.encoding : nil
68
+ end
69
+
70
+ def parser
71
+ return @parser if @parser
72
+
73
+ if body && response
74
+ if mech.html_parser == Nokogiri::HTML
75
+ @parser = mech.html_parser.parse(html_body, nil, @encoding)
76
+ else
77
+ @parser = mech.html_parser.parse(html_body)
78
+ end
79
+ end
80
+
81
+ @parser
82
+ end
83
+ alias :root :parser
84
+
85
+ # Get the content type
86
+ def content_type
87
+ response['content-type']
88
+ end
89
+
90
+ # Search through the page like HPricot
91
+ def_delegator :parser, :search, :search
92
+ def_delegator :parser, :/, :/
93
+ def_delegator :parser, :at, :at
94
+
95
+ # Find a form matching +criteria+.
96
+ # Example:
97
+ # page.form_with(:action => '/post/login.php') do |f|
98
+ # ...
99
+ # end
100
+ [:form, :link, :base, :frame, :iframe].each do |type|
101
+ eval(<<-eomethod)
102
+ def #{type}s_with(criteria)
103
+ criteria = {:name => criteria} if String === criteria
104
+ f = #{type}s.find_all do |thing|
105
+ criteria.all? { |k,v| v === thing.send(k) }
106
+ end
107
+ yield f if block_given?
108
+ f
109
+ end
110
+
111
+ def #{type}_with(criteria)
112
+ f = #{type}s_with(criteria).first
113
+ yield f if block_given?
114
+ f
115
+ end
116
+ alias :#{type} :#{type}_with
117
+ eomethod
118
+ end
119
+
120
+ def links
121
+ @links ||= %w{ a area }.map do |tag|
122
+ search(tag).map do |node|
123
+ Link.new(node, @mech, self)
124
+ end
125
+ end.flatten
126
+ end
127
+
128
+ def forms
129
+ @forms ||= search('form').map do |html_form|
130
+ form = Form.new(html_form, @mech, self)
131
+ form.action ||= @uri.to_s
132
+ form
133
+ end
134
+ end
135
+
136
+ def meta
137
+ @meta ||= search('meta').map do |node|
138
+ next unless node['http-equiv'] && node['content']
139
+ (equiv, content) = node['http-equiv'], node['content']
140
+ if equiv && equiv.downcase == 'refresh'
141
+ Meta.parse(content, uri) do |delay, href|
142
+ node['delay'] = delay
143
+ node['href'] = href
144
+ Meta.new(node, @mech, self)
145
+ end
146
+ end
147
+ end.compact
148
+ end
149
+
150
+ def bases
151
+ @bases ||=
152
+ search('base').map { |node| Base.new(node, @mech, self) }
153
+ end
154
+
155
+ def frames
156
+ @frames ||=
157
+ search('frame').map { |node| Frame.new(node, @mech, self) }
158
+ end
159
+
160
+ def iframes
161
+ @iframes ||=
162
+ search('iframe').map { |node| Frame.new(node, @mech, self) }
163
+ end
164
+
165
+ private
166
+
167
+ def html_body
168
+ if body
169
+ body.length > 0 ? body : '<html></html>'
170
+ else
171
+ ''
172
+ end
173
+ end
174
+ end
175
+ end
176
+ end
@@ -0,0 +1,103 @@
1
+ require 'www/mechanize/file'
2
+ require 'www/mechanize/file_saver'
3
+ require 'www/mechanize/page'
4
+
5
+ module WWW
6
+ class Mechanize
7
+ # = Synopsis
8
+ # This class is used to register and maintain pluggable parsers for
9
+ # Mechanize to use.
10
+ #
11
+ # A Pluggable Parser is a parser that Mechanize uses for any particular
12
+ # content type. Mechanize will ask PluggableParser for the class it
13
+ # should initialize given any content type. This class allows users to
14
+ # register their own pluggable parsers, or modify existing pluggable
15
+ # parsers.
16
+ #
17
+ # PluggableParser returns a WWW::Mechanize::File object for content types
18
+ # that it does not know how to handle. WWW::Mechanize::File provides
19
+ # basic functionality for any content type, so it is a good class to
20
+ # extend when building your own parsers.
21
+ # == Example
22
+ # To create your own parser, just create a class that takes four
23
+ # parameters in the constructor. Here is an example of registering
24
+ # a pluggable parser that handles CSV files:
25
+ # class CSVParser < WWW::Mechanize::File
26
+ # attr_reader :csv
27
+ # def initialize(uri=nil, response=nil, body=nil, code=nil)
28
+ # super(uri, response, body, code)
29
+ # @csv = CSV.parse(body)
30
+ # end
31
+ # end
32
+ # agent = WWW::Mechanize.new
33
+ # agent.pluggable_parser.csv = CSVParser
34
+ # agent.get('http://example.com/test.csv') # => CSVParser
35
+ # Now any page that returns the content type of 'text/csv' will initialize
36
+ # a CSVParser and return that object to the caller.
37
+ #
38
+ # To register a pluggable parser for a content type that pluggable parser
39
+ # does not know about, just use the hash syntax:
40
+ # agent.pluggable_parser['text/something'] = SomeClass
41
+ #
42
+ # To set the default parser, just use the 'defaut' method:
43
+ # agent.pluggable_parser.default = SomeClass
44
+ # Now all unknown content types will be instances of SomeClass.
45
+ class PluggableParser
46
+ CONTENT_TYPES = {
47
+ :html => 'text/html',
48
+ :wap => 'application/vnd.wap.xhtml+xml',
49
+ :xhtml => 'application/xhtml+xml',
50
+ :pdf => 'application/pdf',
51
+ :csv => 'text/csv',
52
+ :xml => 'text/xml',
53
+ }
54
+
55
+ attr_accessor :default
56
+
57
+ def initialize
58
+ @parsers = { CONTENT_TYPES[:html] => Page,
59
+ CONTENT_TYPES[:xhtml] => Page,
60
+ CONTENT_TYPES[:wap] => Page,
61
+ }
62
+ @default = File
63
+ end
64
+
65
+ def parser(content_type)
66
+ content_type.nil? ? default : @parsers[content_type] || default
67
+ end
68
+
69
+ def register_parser(content_type, klass)
70
+ @parsers[content_type] = klass
71
+ end
72
+
73
+ def html=(klass)
74
+ register_parser(CONTENT_TYPES[:html], klass)
75
+ register_parser(CONTENT_TYPES[:xhtml], klass)
76
+ end
77
+
78
+ def xhtml=(klass)
79
+ register_parser(CONTENT_TYPES[:xhtml], klass)
80
+ end
81
+
82
+ def pdf=(klass)
83
+ register_parser(CONTENT_TYPES[:pdf], klass)
84
+ end
85
+
86
+ def csv=(klass)
87
+ register_parser(CONTENT_TYPES[:csv], klass)
88
+ end
89
+
90
+ def xml=(klass)
91
+ register_parser(CONTENT_TYPES[:xml], klass)
92
+ end
93
+
94
+ def [](content_type)
95
+ @parsers[content_type]
96
+ end
97
+
98
+ def []=(content_type, klass)
99
+ @parsers[content_type] = klass
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,18 @@
1
+ module WWW
2
+ class Mechanize
3
+ # Thrown when too many redirects are sent
4
+ class RedirectLimitReachedError < RuntimeError
5
+ attr_reader :page, :response_code, :redirects
6
+ def initialize(page, redirects)
7
+ @page = page
8
+ @redirects = redirects
9
+ @response_code = page.code
10
+ end
11
+
12
+ def to_s
13
+ "Maximum redirect limit (#{redirects}) reached"
14
+ end
15
+ alias :inspect :to_s
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,20 @@
1
+ module WWW
2
+ class Mechanize
3
+ # Thrown when a POST, PUT, or DELETE request results in a redirect
4
+ # see RFC 2616 10.3.2, 10.3.3 http://www.ietf.org/rfc/rfc2616.txt
5
+ class RedirectNotGetOrHeadError < RuntimeError
6
+ attr_reader :page, :response_code, :verb, :uri
7
+ def initialize(page, verb)
8
+ @page = page
9
+ @verb = verb
10
+ @uri = page.uri
11
+ @response_code = page.code
12
+ end
13
+
14
+ def to_s
15
+ "#{@response_code} redirect received after a #{@verb} request"
16
+ end
17
+ alias :inspect :to_s
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,25 @@
1
+ module WWW
2
+ class Mechanize
3
+ # =Synopsis
4
+ # This error is thrown when Mechanize encounters a response code it does
5
+ # not know how to handle. Currently, this exception will be thrown
6
+ # if Mechanize encounters response codes other than 200, 301, or 302.
7
+ # Any other response code is up to the user to handle.
8
+ class ResponseCodeError < RuntimeError
9
+ attr_reader :response_code
10
+ attr_reader :page
11
+
12
+ def initialize(page)
13
+ @page = page
14
+ @response_code = page.code
15
+ end
16
+
17
+ def to_s
18
+ "#{response_code} => #{Net::HTTPResponse::CODE_TO_OBJ[response_code]}"
19
+ end
20
+
21
+ def inspect; to_s; end
22
+ end
23
+ end
24
+ end
25
+
@@ -0,0 +1,10 @@
1
+ module WWW
2
+ class Mechanize
3
+ class UnsupportedSchemeError < RuntimeError
4
+ attr_accessor :scheme
5
+ def initialize(scheme)
6
+ @scheme = scheme
7
+ end
8
+ end
9
+ end
10
+ end