mechanize 0.4.7 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of mechanize might be problematic. Click here for more details.

Files changed (48) hide show
  1. data/CHANGELOG +17 -0
  2. data/EXAMPLES +23 -44
  3. data/NOTES +49 -0
  4. data/lib/mechanize.rb +95 -80
  5. data/lib/mechanize/cookie.rb +147 -148
  6. data/lib/mechanize/cookie.rb.rej +16 -0
  7. data/lib/mechanize/errors.rb +29 -0
  8. data/lib/mechanize/form.rb +211 -186
  9. data/lib/mechanize/form_elements.rb +31 -71
  10. data/lib/mechanize/list.rb +34 -0
  11. data/lib/mechanize/mech_version.rb +3 -1
  12. data/lib/mechanize/module.rb +1 -1
  13. data/lib/mechanize/page.rb +162 -180
  14. data/lib/mechanize/page_elements.rb +53 -40
  15. data/lib/mechanize/parsing.rb +11 -3
  16. data/lib/mechanize/pluggable_parsers.rb +147 -0
  17. data/test/data/server.crt +14 -0
  18. data/test/data/server.csr +11 -0
  19. data/test/data/server.key +18 -0
  20. data/test/data/server.pem +15 -0
  21. data/test/htdocs/no_title_test.html +6 -0
  22. data/test/parse.rb +39 -0
  23. data/test/proxy.rb +30 -0
  24. data/test/server.rb +2 -0
  25. data/test/servlets.rb +8 -0
  26. data/test/ssl_server.rb +49 -0
  27. data/test/tc_authenticate.rb +8 -6
  28. data/test/tc_cookie_class.rb +28 -18
  29. data/test/tc_cookie_jar.rb +88 -27
  30. data/test/tc_cookies.rb +41 -44
  31. data/test/tc_errors.rb +9 -23
  32. data/test/tc_forms.rb +36 -32
  33. data/test/tc_frames.rb +6 -4
  34. data/test/tc_links.rb +7 -6
  35. data/test/tc_mech.rb +43 -46
  36. data/test/tc_page.rb +24 -0
  37. data/test/tc_pluggable_parser.rb +103 -0
  38. data/test/tc_post_form.rb +41 -0
  39. data/test/tc_proxy.rb +25 -0
  40. data/test/tc_response_code.rb +13 -10
  41. data/test/tc_save_file.rb +25 -0
  42. data/test/tc_ssl_server.rb +27 -0
  43. data/test/tc_upload.rb +8 -6
  44. data/test/tc_watches.rb +5 -2
  45. data/test/test_includes.rb +3 -3
  46. data/test/ts_mech.rb +11 -2
  47. metadata +100 -86
  48. data/test/tc_filter.rb +0 -34
@@ -1,51 +1,64 @@
1
1
  module WWW
2
- class Link
3
- attr_reader :node
4
- attr_reader :href
5
- attr_reader :text
6
- alias :to_s :text
7
-
8
- def initialize(node)
9
- @node = node
10
- @href = node.attributes['href']
11
- @text = node.all_text
2
+ class Mechanize
3
+ # This class encapsulates links. It contains the text and the URI for
4
+ # 'a' tags parsed out of an HTML page. If the link contains an image,
5
+ # the alt text will be used for that image.
6
+ #
7
+ # For example, the text for the following links with both be 'Hello World':
8
+ #
9
+ # <a href="http://rubyforge.org">Hello World</a>
10
+ # <a href="http://rubyforge.org"><img src="test.jpg" alt="Hello World"></a>
11
+ class Link
12
+ attr_reader :node
13
+ attr_reader :href
14
+ attr_reader :text
15
+ alias :to_s :text
16
+
17
+ def initialize(node)
18
+ @node = node
19
+ @href = node.attributes['href']
20
+ @text = node.all_text
12
21
 
13
- # If there is no text, try to find an image and use it's alt text
14
- if (@text.nil? || @text.length == 0) && @node.has_elements?
15
- @text = ''
16
- @node.each_element { |e|
17
- if e.name == 'img'
18
- @text << (e.has_attributes? ? e.attributes['alt'] || '' : '')
19
- end
20
- }
22
+ # If there is no text, try to find an image and use it's alt text
23
+ if (@text.nil? || @text.length == 0) && @node.has_elements?
24
+ @text = ''
25
+ @node.each_element { |e|
26
+ if e.name == 'img'
27
+ @text << (e.has_attributes? ? e.attributes['alt'] || '' : '')
28
+ end
29
+ }
30
+ end
21
31
  end
22
- end
23
32
 
24
- def uri
25
- URI.parse(@href)
26
- end
33
+ def uri
34
+ URI.parse(@href)
35
+ end
27
36
 
28
- def inspect
29
- "'#{@text}' -> #{@href}\n"
37
+ def inspect
38
+ "'#{@text}' -> #{@href}"
39
+ end
30
40
  end
31
- end
32
-
33
- class Meta < Link
34
- end
35
-
36
- class Frame
37
- attr_reader :node
38
- attr_reader :name
39
- attr_reader :src
40
-
41
- def initialize(node)
42
- @node = node
43
- @name = node.attributes['name']
44
- @src = node.attributes['src']
41
+
42
+ # This class encapsulates a Meta tag. Mechanize treats meta tags just
43
+ # like 'a' tags. Meta objects will contain links, but most likely will
44
+ # have no text.
45
+ class Meta < Link
45
46
  end
46
47
 
47
- def inspect
48
- "'#{@name}' -> #{@src}\n"
48
+ # This class encapsulates a 'frame' tag. Frame objects can be treated
49
+ # just like Link objects. They contain src, the link they refer to,
50
+ # name, the name of the frame. 'src' and 'name' are aliased to 'href'
51
+ # and 'text' respectively so that a Frame object can be treated just
52
+ # like a Link.
53
+ class Frame < Link
54
+ alias :src :href
55
+ alias :name :text
56
+
57
+ def initialize(node)
58
+ @node = node
59
+ @text = node.attributes['name']
60
+ @href = node.attributes['src']
61
+ end
49
62
  end
50
63
  end
51
64
  end
@@ -19,6 +19,14 @@ end
19
19
 
20
20
  module REXML::Node
21
21
 
22
+ # Aliasing functions to get rid of warnings. Remove when support for 1.8.2
23
+ # is dropped.
24
+ if RUBY_VERSION > "1.8.2"
25
+ alias :old_each_recursive :each_recursive
26
+ alias :old_find_first_recursive :find_first_recursive
27
+ alias :old_index_in_parent :index_in_parent
28
+ end
29
+
22
30
  # Visit all subnodes of +self+ recursively
23
31
 
24
32
  def each_recursive(&block) # :yields: node
@@ -54,7 +62,7 @@ module REXML::Node
54
62
  # node == node.parent.elements[node.index_in_parent]
55
63
 
56
64
  def index_in_parent
57
- parent.elements.index(self)
65
+ parent.index(self)+1
58
66
  end
59
67
 
60
68
  # Recursivly collects all text strings starting into an array.
@@ -161,9 +169,9 @@ def extract_from_table(root_node, headers, header_tags = %w(td th))
161
169
  # for each row we collect the elements at the same positions as the header_nodes.
162
170
  # this is what we finally return from the method.
163
171
 
164
- (header_row.index_in_parent+1 .. table.elements.size).collect do |inx|
172
+ (header_row.index_in_parent .. table.elements.size).collect do |inx|
165
173
  row = table.elements[inx]
166
- header_nodes.collect { |n| row.elements[ n.index_in_parent ].text }
174
+ header_nodes.collect { |n| row.elements[ n.parent.elements.index(n) ].text }
167
175
  end
168
176
  end
169
177
 
@@ -0,0 +1,147 @@
1
+ module WWW
2
+ class Mechanize
3
+ # = Synopsis
4
+ # This is the default (and base) class for the Pluggable Parsers. If
5
+ # Mechanize cannot find an appropriate class to use for the content type,
6
+ # this class will be used. For example, if you download a JPG, Mechanize
7
+ # will not know how to parse it, so this class will be instantiated.
8
+ #
9
+ # This is a good class to use as the base class for building your own
10
+ # pluggable parsers.
11
+ #
12
+ # == Example
13
+ # require 'rubygems'
14
+ # require 'mechanize'
15
+ #
16
+ # agent = WWW::Mechanize.new
17
+ # agent.get('http://example.com/foo.jpg').class #=> WWW::Mechanize::File
18
+ #
19
+ class File
20
+ attr_accessor :uri, :response, :body, :code
21
+
22
+ alias :content :body
23
+
24
+ def initialize(uri=nil, response=nil, body=nil, code=nil)
25
+ @uri, @response, @body, @code = uri, response, body, code
26
+ end
27
+
28
+ # Use this method to save the content of this object to filename
29
+ def save_as(filename)
30
+ ::File::open(filename, "w") { |f|
31
+ f.write body
32
+ }
33
+ end
34
+ end
35
+
36
+ # = Synopsis
37
+ # This is a pluggable parser that automatically saves every file
38
+ # it encounters. It saves the files as a tree, reflecting the
39
+ # host and file path.
40
+ #
41
+ # == Example to save all PDF's
42
+ # require 'rubygems'
43
+ # require 'mechanize'
44
+ #
45
+ # agent = WWW::Mechanize.new
46
+ # agent.pluggable_parser.pdf = WWW::Mechanize::FileSaver
47
+ # agent.get('http://example.com/foo.pdf')
48
+ #
49
+ class FileSaver < File
50
+ def initialize(uri=nil, response=nil, body=nil, code=nil)
51
+ @uri, @response, @body, @code = uri, response, body, code
52
+ path = uri.path == '/' ? '/index.html' : uri.path
53
+ path =~ /^(.*)\/([^\/]*)$/
54
+ filename = $2
55
+ path = "#{uri.host}#{$1}"
56
+ FileUtils.mkdir_p(path)
57
+ save_as("#{path}/#{filename}")
58
+ end
59
+ end
60
+
61
+ # = Synopsis
62
+ # This class is used to register and maintain pluggable parsers for
63
+ # Mechanize to use.
64
+ #
65
+ # A Pluggable Parser is a parser that Mechanize uses for any particular
66
+ # content type. Mechanize will ask PluggableParser for the class it
67
+ # should initialize given any content type. This class allows users to
68
+ # register their own pluggable parsers, or modify existing pluggable
69
+ # parsers.
70
+ #
71
+ # PluggableParser returns a WWW::Mechanize::File object for content types
72
+ # that it does not know how to handle. WWW::Mechanize::File provides
73
+ # basic functionality for any content type, so it is a good class to
74
+ # extend when building your own parsers.
75
+ # == Example
76
+ # To create your own parser, just create a class that takes four
77
+ # parameters in the constructor. Here is an example of registering
78
+ # a pluggable parser that handles CSV files:
79
+ # class CSVParser < WWW::Mechanize::File
80
+ # attr_reader :csv
81
+ # def initialize(uri=nil, response=nil, body=nil, code=nil)
82
+ # super(uri, response, body, code)
83
+ # @csv = CSV.parse(body)
84
+ # end
85
+ # end
86
+ # agent = WWW::Mechanize.new
87
+ # agent.pluggable_parser.csv = CSVParser
88
+ # agent.get('http://example.com/test.csv') # => CSVParser
89
+ # Now any page that returns the content type of 'text/csv' will initialize
90
+ # a CSVParser and return that object to the caller.
91
+ #
92
+ # To register a pluggable parser for a content type that pluggable parser
93
+ # does not know about, just use the hash syntax:
94
+ # agent.pluggable_parser['text/something'] = SomeClass
95
+ #
96
+ # To set the default parser, just use the 'defaut' method:
97
+ # agent.pluggable_parser.default = SomeClass
98
+ # Now all unknown content types will be instances of SomeClass.
99
+ class PluggableParser
100
+ CONTENT_TYPES = {
101
+ :html => 'text/html',
102
+ :pdf => 'application/pdf',
103
+ :csv => 'text/csv',
104
+ :xml => 'text/xml',
105
+ }
106
+
107
+ attr_accessor :default
108
+
109
+ def initialize
110
+ @parsers = { CONTENT_TYPES[:html] => Page }
111
+ @default = File
112
+ end
113
+
114
+ def parser(content_type)
115
+ content_type.nil? ? default : @parsers[content_type] || default
116
+ end
117
+
118
+ def register_parser(content_type, klass)
119
+ @parsers[content_type] = klass
120
+ end
121
+
122
+ def html=(klass)
123
+ register_parser(CONTENT_TYPES[:html], klass)
124
+ end
125
+
126
+ def pdf=(klass)
127
+ register_parser(CONTENT_TYPES[:pdf], klass)
128
+ end
129
+
130
+ def csv=(klass)
131
+ register_parser(CONTENT_TYPES[:csv], klass)
132
+ end
133
+
134
+ def xml=(klass)
135
+ register_parser(CONTENT_TYPES[:xml], klass)
136
+ end
137
+
138
+ def [](content_type)
139
+ @parsers[content_type]
140
+ end
141
+
142
+ def []=(content_type, klass)
143
+ @parsers[content_type] = klass
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,14 @@
1
+ -----BEGIN CERTIFICATE-----
2
+ MIICLzCCAZgCCQDS5ue63ULFQDANBgkqhkiG9w0BAQUFADBcMQswCQYDVQQGEwJV
3
+ UzETMBEGA1UECBMKV2FzaGluZ3RvbjEQMA4GA1UEBxMHU2VhdHRsZTESMBAGA1UE
4
+ ChMJTWVjaGFuaXplMRIwEAYDVQQDEwlsb2NhbGhvc3QwHhcNMDYwNjI5MjEzMjIy
5
+ WhcNMDYwNzI5MjEzMjIyWjBcMQswCQYDVQQGEwJVUzETMBEGA1UECBMKV2FzaGlu
6
+ Z3RvbjEQMA4GA1UEBxMHU2VhdHRsZTESMBAGA1UEChMJTWVjaGFuaXplMRIwEAYD
7
+ VQQDEwlsb2NhbGhvc3QwgZ8wDQYJKoZIhvcNAQEBBQADgY0AMIGJAoGBAL3qHTIQ
8
+ 32nJtgwvL86UYhO3W8WPAPWEmY1FgsAxboWDdoHr/klGCKuPXR5tUovymD26/G3Z
9
+ yAN+ev7IJUIA6E++jCIQ9v7l22NOJyN/7bS9gNXFfKeNWJXSd0D6DWypgPURHVhi
10
+ A4viRFYiv/Q1XDU/UxNTqu2/OoQo/KjWEGQvAgMBAAEwDQYJKoZIhvcNAQEFBQAD
11
+ gYEAZhacqH+7sknkjnptrBkYjbmmlj8STYXwKs9+xWsUW1NSW01jT61e7qlqOTR3
12
+ 26tUms1aq4OTBovGSBboNKI2NqWSHD0stdudjPMyNj0eZBJVLlaYiS7/1AqV6fM/
13
+ OGmX/Alaaa3fTytbuocHtQfm9ue18dTzabfIw2Wp6Hscm/Q=
14
+ -----END CERTIFICATE-----
@@ -0,0 +1,11 @@
1
+ -----BEGIN CERTIFICATE REQUEST-----
2
+ MIIBnDCCAQUCAQAwXDELMAkGA1UEBhMCVVMxEzARBgNVBAgTCldhc2hpbmd0b24x
3
+ EDAOBgNVBAcTB1NlYXR0bGUxEjAQBgNVBAoTCU1lY2hhbml6ZTESMBAGA1UEAxMJ
4
+ bG9jYWxob3N0MIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQC96h0yEN9pybYM
5
+ Ly/OlGITt1vFjwD1hJmNRYLAMW6Fg3aB6/5JRgirj10ebVKL8pg9uvxt2cgDfnr+
6
+ yCVCAOhPvowiEPb+5dtjTicjf+20vYDVxXynjViV0ndA+g1sqYD1ER1YYgOL4kRW
7
+ Ir/0NVw1P1MTU6rtvzqEKPyo1hBkLwIDAQABoAAwDQYJKoZIhvcNAQEFBQADgYEA
8
+ BdaUO9CUvFe6RIXPxJfeHnU39SDrzXAgQ4zoi9EwbJO1rs/cid3qcF6sjGgtSLgF
9
+ qJqpJplLa0wezecjHtDKAIwchNYrv+MrchWCKWlVtxYdCX6kjn796Tpjl0w7CUfm
10
+ mYhE04+mqjhS3SMMCiIyxnM/zGDiMmxsxyhUF+WUppo=
11
+ -----END CERTIFICATE REQUEST-----
@@ -0,0 +1,18 @@
1
+ -----BEGIN RSA PRIVATE KEY-----
2
+ Proc-Type: 4,ENCRYPTED
3
+ DEK-Info: DES-EDE3-CBC,A32B4789D9FBDA52
4
+
5
+ Y937xENBSIMLD1Vkh95DjHW30UZc00xkpLa/JrPQPVQWY8gyFX47bbvE2SkyPp9D
6
+ vsriBwTOfNB7PZpRWo5DRQMK+ZRWvfUstkvXqbG/dQfg8hWdwUbTiaiZC8pTqdVv
7
+ qupHruzu6LLzlWqvd03lb/TlcOveFnvgeqwICJ23LMvwexJWzvOaY2+q7YKq1XSg
8
+ /rpMTxPmXrx8+QRfI7bHkJk/aPaGFMvzgckf3pSFSARWm/1n7cYgiBTYCbgQOHEP
9
+ 99JG0WbmFQF0/Rfb8+V5eCeIEBvSatoBjTs/Mb61Ah4bNo1DIgeSvKAShfumu/Tb
10
+ rB9wa164/eHfA0u65gKF8HS7rGzP994KEfzGQ62zRm2V1UW0aOz+3/CaCe/0bkAo
11
+ Wy4V72psOZ5QhUH3yuyoCDze/wiuoBz3zBX3JZzQ9Za4Z7u2eQP7qhh8Dz4IOYU0
12
+ lhM67QhIRkgGQqgXM80NbSi6VKF1EwAF3BaRIhEnjRC9spUwmKfMvxxJDjRU1YVC
13
+ X7cSZp2budXcr13jf8g3+fSKN1vD3byliJwdnVtB9a+t1gStxJ5FSuQ2A/I0JszY
14
+ eL2kNvb8GwakOxV1PL4T8UIobHVsaP+evrbk3iwloI2CfCya3t9T5JYubCoo6rK3
15
+ /0PAxOR6x+HzRBDc/5cMA6WUNWrGK/QQPKmGFGyZW8oSYTCEmeiKxFSHNCv38bVq
16
+ 11TlZ5Lj1/+jYbz88pN4qkVYNIN7tqUUOxIowc3atZLBQzn21toJcRygv9abkRSh
17
+ XmgzQYK6N3laTSr1R7KsmIA90+yDXTo44064gOu6VaTMeOJoMegetA==
18
+ -----END RSA PRIVATE KEY-----
@@ -0,0 +1,15 @@
1
+ -----BEGIN RSA PRIVATE KEY-----
2
+ MIICXAIBAAKBgQC96h0yEN9pybYMLy/OlGITt1vFjwD1hJmNRYLAMW6Fg3aB6/5J
3
+ Rgirj10ebVKL8pg9uvxt2cgDfnr+yCVCAOhPvowiEPb+5dtjTicjf+20vYDVxXyn
4
+ jViV0ndA+g1sqYD1ER1YYgOL4kRWIr/0NVw1P1MTU6rtvzqEKPyo1hBkLwIDAQAB
5
+ AoGAJP/XcEmZPSLvNLB8v+JouS6f0n1p0mti/wLr1kXLInfaPrWbdOsQBVyI58Sk
6
+ sF8zCdPEGiKztPqPO9QCbjl/CLPn0AmQ/L+MAc/4RnKdB/noC2l9zRBs6A/vxPuR
7
+ 8H8fYDgfVZTITaWqaFZMIScpQpVjDf50hR5REsvHtnC3vwECQQDynzUxX5bJjOPr
8
+ MMwFG9fWyWmi/GP6xktTluvI2t4BfQVuHO/ds+N1X+Jhpv0gSmW3HB2NRoxpyW7W
9
+ rjSnqbQFAkEAyGLmwCHd1Y39OLwl26HZ3/qvJs66oCqAdCH/oakDFg2Sxkx8ANMj
10
+ VU/1/Fs4x95JdsdAz/c8YnhwfguuuA/BowJAK8fS32/yU7hawGvzxNA7aIS7zS9i
11
+ EzSJDzJipcHo4bffrPkHx6fYWavNu/VYI+WI6TvnkUCzwbsZhx+0XLTgyQJAXOuv
12
+ wOxzZ7Hd+E8W5w/Ybkpaqg428EqFIlUZqycqbTGCHD4fSmqd+dOIZ6sNW3Eyc8fV
13
+ wBoPgEIFOA3GyUdVLQJBAMI/b3c0zYYXTDYeU/nGmqheQlT8MRqN9a2am3Zq74wO
14
+ xUZTU/SbjWH/qor1EUtF8is0MxPoh9Gmx2Gt3QDbnM4=
15
+ -----END RSA PRIVATE KEY-----
@@ -0,0 +1,6 @@
1
+ <html>
2
+ <title></title>
3
+ <body>
4
+ No title in the title tag
5
+ </body>
6
+ </html>
data/test/parse.rb ADDED
@@ -0,0 +1,39 @@
1
+ require 'rubygems'
2
+
3
+ require 'web/htmltools/xmltree'
4
+
5
+ parser = HTMLTree::XMLParser.new
6
+ parser.feed(DATA.read.chomp)
7
+ root = parser.document
8
+
9
+ root.each_recursive { |node|
10
+ name = node.name.downcase
11
+ case name
12
+ when 'form'
13
+ node.each_recursive { |n|
14
+ puts n.name.downcase
15
+ }
16
+ end
17
+ }
18
+
19
+ __END__
20
+ <html>
21
+ <body>
22
+ <table>
23
+ <tr>
24
+ <td>
25
+ <form name="foo">
26
+ <table>
27
+ <tr><td><h1>Header</h1></td></tr>
28
+ <tr>
29
+ <td>
30
+ <input type="text" name="hey" value="" />
31
+ </td>
32
+ </tr>
33
+ </table>
34
+ </form>
35
+ </td>
36
+ </tr>
37
+ </table>
38
+ </body>
39
+ </html>
data/test/proxy.rb ADDED
@@ -0,0 +1,30 @@
1
+ # This is a simple proxy that assumes the destination server will
2
+ # close the connection after sending data, otherwise it will get blocked
3
+ # on reads.
4
+
5
+ require 'rubygems'
6
+ require 'eventmachine'
7
+ require 'socket'
8
+
9
+ module HttpProxy
10
+ include Socket::Constants
11
+
12
+ def receive_data(data)
13
+ if data =~ /Host: (.*)$/
14
+ (host, port) = $1.chomp.split(/:/)
15
+ port ||= 80
16
+ socket = Socket.new( AF_INET, SOCK_STREAM, 0 )
17
+ puts port.to_i
18
+ puts host
19
+ sockaddr = Socket.pack_sockaddr_in( port.to_i, host )
20
+ socket.connect(sockaddr)
21
+ socket.write(data)
22
+ results = socket.read
23
+ send_data results
24
+ end
25
+ end
26
+ end
27
+
28
+ EventMachine::run {
29
+ EventMachine::start_server "127.0.0.1", 2001, HttpProxy
30
+ }
data/test/server.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require 'webrick'
2
+ require 'base64'
2
3
  require 'servlets'
3
4
  require 'logger'
4
5
 
@@ -20,6 +21,7 @@ s.mount("/form post", FormTest)
20
21
  s.mount("/response_code", ResponseCodeTest)
21
22
  s.mount("/file_upload", FileUploadTest)
22
23
  s.mount("/bad_content_type", BadContentTypeTest)
24
+ s.mount("/content_type_test", ContentTypeTest)
23
25
 
24
26
  htpasswd = WEBrick::HTTPAuth::Htpasswd.new(base_dir + '/data/htpasswd')
25
27
  auth = WEBrick::HTTPAuth::BasicAuth.new(