tap-mechanize 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,30 @@
1
+ require 'tap/mechanize/request'
2
+
3
+ module Tap
4
+ module Mechanize
5
+ # :startdoc::manifest submits a captured http request
6
+ #
7
+ # Performs a series of HTTP requests and returns the content of the final
8
+ # page. Requests must be hashes that at least specify the uri of the next
9
+ # request, but more typically also specify the request method, parameters,
10
+ # and even headers.
11
+ #
12
+ # All requests are peformed in the same session, so logins and subsequent
13
+ # actions are allowed. The easiest way to capture HTTP requests is to
14
+ # use the capture server distributed with tap-mechanize. Simply start a
15
+ # tap server and work through the tutorial (http://localhost:8080/capture/tutorial).
16
+ #
17
+ # % tap server
18
+ #
19
+ # Once you have a request file:
20
+ #
21
+ # % tap run -- load/yaml --:s submit --: dump < request.yml
22
+ #
23
+ # Remember, only the results of the final page are returned!
24
+ class Submit < Request
25
+ def process(*requests)
26
+ super(requests).content
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,59 @@
1
+ require 'rack'
2
+ require 'webrick'
3
+ require 'thread'
4
+ require 'stringio'
5
+ require 'tap/mechanize/test/mock_server'
6
+ require 'tap/mechanize/test/echo_server'
7
+
8
+ module Tap
9
+ module Mechanize
10
+
11
+ # Mechanize::Test allows you to mock out remote servers to test HTTP
12
+ # clients. The mechanize_test method launches a WEBrick server hosting
13
+ # a Rack application for the duration of the block. Typically the
14
+ # application is constructed to send back an expected response.
15
+ #
16
+ # class SampleTest < Test::Unit::TestCase
17
+ # include Tap::Mechanize::Test
18
+ #
19
+ # def test_mechanize_test
20
+ # m = MockServer.new {|env| ['success'] }
21
+ # a = WWW::Mechanize.new
22
+ # mechanize_test(m) do
23
+ # assert_equal 'success', a.get('http://localhost:2000/').body
24
+ # end
25
+ # end
26
+ # end
27
+ #
28
+ module Test
29
+
30
+ # The default WEBRick config for a mechanize_test(. By default the sever runs
31
+ # on port 2000 and logs all data to the input log device.
32
+ def default_webrick_config(log_dev=StringIO.new(''))
33
+ common_logger = WEBrick::Log.new(log_dev, WEBrick::Log.const_get(:WARN) )
34
+ {
35
+ :Port => 2000,
36
+ :Logger => common_logger,
37
+ :AccessLog => common_logger
38
+ }
39
+ end
40
+
41
+ # Sets up a local WEBRick server that runs the Rack app and yields to the
42
+ # block. The server runs on its own thread and will be shutdown after
43
+ # the test completes. See default_webrick_config for setup information.
44
+ def mechanize_test(app=EchoServer, config=default_webrick_config)
45
+ begin
46
+ server = ::WEBrick::HTTPServer.new(config);
47
+ server.mount("/", Rack::Handler::WEBrick, app);
48
+ Thread.new { server.start }
49
+ yield
50
+ ensure
51
+ server.shutdown
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+
59
+
@@ -0,0 +1,20 @@
1
+ module Tap
2
+ module Mechanize
3
+ module Test
4
+ # EchoServer is a Rack application that simply echos back the request
5
+ # environment. The response is:
6
+ #
7
+ # [200, {request headers}, [request body]]
8
+ #
9
+ class EchoServer
10
+ def self.call(env)
11
+ body = env['rack.input'].read
12
+ headers = {}
13
+ env.each_pair {|key, value| headers[key] = [value] unless key =~ /^rack/ }
14
+
15
+ [200, headers, [body]]
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,31 @@
1
+ module Tap
2
+ module Mechanize
3
+ module Test
4
+
5
+ # MockServer allows easy creation of a lazy Rack application that calls
6
+ # the block for content. The status and headers of the response are
7
+ # setup during initialize.
8
+ #
9
+ # env = Rack::MockRequest.env_for('http://localhost:2000/')
10
+ #
11
+ # m = MockServer.new {|env| ['yo'] }
12
+ # m.call(env) # => [200, {'Content-Type' => 'text/html'}, ['yo']]
13
+ #
14
+ class MockServer
15
+ def initialize(status=200, headers={'Content-Type' => 'text/html'}, &block)
16
+ @status = status
17
+ @headers = headers
18
+ @block = block
19
+ end
20
+
21
+ # Calls the initialization block with env. The block must return the
22
+ # content of the response.
23
+ #
24
+ # Returns: [status, headers, block-return]
25
+ def call(env)
26
+ [@status, @headers, @block.call(env)]
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,296 @@
1
+ autoload(:WEBrick, 'webrick')
2
+ autoload(:StringIO, 'stringio')
3
+ require 'rack'
4
+
5
+ module Tap
6
+ module Mechanize
7
+ module Utils
8
+ module_function
9
+
10
+ # Parses a WEBrick::HTTPRequest from the input socket into a hash that
11
+ # may be resubmitted by Dispatch. Sockets can be any kind of IO (File,
12
+ # StringIO, etc..) and should be positioned such that the next line is
13
+ # the start of an HTTP request. Strings used as sockets are converted
14
+ # into StringIO objects.
15
+ #
16
+ # parse_http_request("GET /path HTTP/1.1\n")
17
+ # # => {
18
+ # # :request_method => "GET",
19
+ # # :url => "/path",
20
+ # # :version => "1.1",
21
+ # # :headers => {},
22
+ # # :params => {},
23
+ # # }
24
+ #
25
+ # ==== WEBrick parsing of HTTP format
26
+ #
27
+ # WEBrick will parse headers then the body of a request, and currently
28
+ # (1.8.6) considers an empty line as a break between the headers and
29
+ # body. In general header parsing is forgiving with end-line
30
+ # characters (ie "\r\n" and "\n" are both acceptable) but parsing of
31
+ # multipart/form data IS NOT.
32
+ #
33
+ # Multipart/form data REQUIRES that the end-line characters are "\r\n".
34
+ # A boundary is always started with "--" and the last boundary completed
35
+ # with "--". As always, the content-length must be correct.
36
+ #
37
+ # # Notice an empty line between the last header
38
+ # # (in this case 'Content-Length') and the body.
39
+ # msg = <<-_end_of_message_
40
+ # POST /path HTTP/1.1
41
+ # Host: localhost:8080
42
+ # Content-Type: multipart/form-data; boundary=1234567890
43
+ # Content-Length: 158
44
+ #
45
+ # --1234567890
46
+ # Content-Disposition: form-data; name="one"
47
+ #
48
+ # value one
49
+ # --1234567890
50
+ # Content-Disposition: form-data; name="two"
51
+ #
52
+ # value two
53
+ # --1234567890--
54
+ # _end_of_message_
55
+ #
56
+ # # ensure the end of line characters are correct...
57
+ # socket = StringIO.new msg.gsub(/\n/, "\r\n")
58
+ #
59
+ # Tap::Net.parse_http_request(socket)
60
+ # # => {
61
+ # # :request_method => "POST",
62
+ # # :url => "http://localhost:8080/path",
63
+ # # :version => "HTTP/1.1",
64
+ # # :headers => {
65
+ # # "Host" => "localhost:8080",
66
+ # # "Content-Type" => "multipart/form-data; boundary=1234567890",
67
+ # # "Content-Length" => "158"},
68
+ # # :params => {
69
+ # # "one" => "value one",
70
+ # # "two" => "value two"}}
71
+ #
72
+ #--
73
+ # TODO: check if there are other headers to capture from
74
+ # a multipart/form file. Currently only
75
+ # 'Filename' and 'Content-Type' are added
76
+ def parse_http_request(socket, keep_content=true)
77
+ socket = StringIO.new(socket) if socket.kind_of?(String)
78
+
79
+ req = WEBrick::HTTPRequest.new(WEBrick::Config::HTTP)
80
+ req.parse(socket)
81
+
82
+ parse_webrick_request(req, keep_content)
83
+ end
84
+
85
+ # Parses a WEBrick::HTTPRequest, with the same activity as
86
+ # parse_http_request.
87
+ def parse_webrick_request(req, keep_content=true)
88
+ headers = {}
89
+ req.header.each_pair do |key, values|
90
+ headers[headerize(key)] = splat(values)
91
+ end if req.header
92
+
93
+ params = {}
94
+ req.query.each_pair do |key, value|
95
+ # no sense for how robust this is...
96
+ # In tests value is (always?) a WEBrick::HTTPUtils::FormData. Each
97
+ # data is likewise a FormData. If FormData is a file, it has a
98
+ # filename and you have to try [] to get the content-type.
99
+ # Senseless. No wonder WEBrick has no documentation, who could
100
+ # write it?
101
+ values = []
102
+ value.each_data do |data|
103
+ values << if data.filename
104
+ hash = {'Filename' => data.filename, 'Content-Type' => data['Content-Type']}
105
+ hash['Content'] = data.to_a.join("\n") if keep_content
106
+ hash
107
+ else
108
+ data.to_s
109
+ end
110
+ end
111
+
112
+ params[key] = splat(values)
113
+ end if req.query
114
+
115
+ { :uri => headers['Host'] ? File.join("http://", headers['Host'], req.path_info) : req.path_info,
116
+ :request_method => req.request_method,
117
+ :version => req.http_version.to_s,
118
+ :headers => headers,
119
+ :params => params}
120
+ end
121
+
122
+ # Parses a Rack::Request, with the same activity as parse_http_request.
123
+ def parse_rack_request(request, keep_content=true)
124
+ headers = {}
125
+ request.env.each_pair do |key, value|
126
+ key = case key
127
+ when "HTTP_VERSION" then next
128
+ when /^HTTP_(.*)/ then $1
129
+ when 'CONTENT_TYPE' then key
130
+ else next
131
+ end
132
+
133
+ headers[headerize(key)] = value
134
+ end
135
+
136
+ params = {}
137
+ request.params.each_pair do |key, value|
138
+ params[key] = each_member(value) do |obj|
139
+ if obj.kind_of?(Hash)
140
+ file = {'Content-Type' => obj[:type], 'Filename' => obj[:filename]}
141
+ file['Content'] = obj[:tempfile].read if keep_content
142
+ file
143
+ else
144
+ obj
145
+ end
146
+ end
147
+ end
148
+
149
+ {
150
+ :uri => File.join("http://", headers['Host'], request.env['PATH_INFO']),
151
+ :request_method => request.request_method,
152
+ :version => request.env['HTTP_VERSION'] =~ /^HTTP\/(.*)$/ ? $1.to_f : request.env['HTTP_VERSION'],
153
+ :headers => headers,
154
+ :params => params
155
+ }
156
+ end
157
+
158
+ # Yields each member of an input array to the block and collects the
159
+ # result. If obj is not an array, the value is simply yielded to the
160
+ # block.
161
+ def each_member(obj)
162
+ if obj.kind_of?(Array)
163
+ obj.collect {|value| yield(value) }
164
+ else
165
+ yield(obj)
166
+ end
167
+ end
168
+
169
+ # Returns the first member of arrays length <= 1, or the array in all
170
+ # other cases. Splat is useful to simplify hashes of http headers
171
+ # and parameters that may have multiple values, but typically only
172
+ # have one.
173
+ #
174
+ # splat([]) # => nil
175
+ # splat([:one]) # => :one
176
+ # splat([:one, :two]) # => [:one, :two]
177
+ #
178
+ def splat(array)
179
+ return array unless array.kind_of?(Array)
180
+
181
+ case array.length
182
+ when 0 then nil
183
+ when 1 then array.first
184
+ else array
185
+ end
186
+ end
187
+
188
+ # Headerizes an underscored string. The input is be converted to
189
+ # a string using to_s.
190
+ #
191
+ # headerize('SOME_STRING') # => 'Some-String'
192
+ # headerize('some string') # => 'Some-String'
193
+ # headerize('Some-String') # => 'Some-String'
194
+ #
195
+ def headerize(str)
196
+ str.to_s.gsub(/\s|-/, "_").split("_").collect do |s|
197
+ s =~ /^(.)(.*)/
198
+ $1.upcase + $2.downcase
199
+ end.join("-")
200
+ end
201
+
202
+ EOL = Rack::Utils::Multipart::EOL
203
+ # Lifted from Rack::Utils::Multipart, and modified to collect
204
+ # overloaded params and params with names suffixed by '[]' as
205
+ # arrays.
206
+ def parse_multipart(env)
207
+ unless env['CONTENT_TYPE'] =~
208
+ %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n
209
+ nil
210
+ else
211
+ boundary = "--#{$1}"
212
+
213
+ params = {}
214
+ buf = ""
215
+ content_length = env['CONTENT_LENGTH'].to_i
216
+ input = env['rack.input']
217
+
218
+ boundary_size = boundary.size + EOL.size
219
+ bufsize = 16384
220
+
221
+ content_length -= boundary_size
222
+
223
+ status = input.read(boundary_size)
224
+ raise EOFError, "bad content body" unless status == boundary + EOL
225
+
226
+ rx = /(?:#{EOL})?#{Regexp.quote boundary}(#{EOL}|--)/
227
+
228
+ loop {
229
+ head = nil
230
+ body = ''
231
+ filename = content_type = name = nil
232
+
233
+ until head && buf =~ rx
234
+ if !head && i = buf.index("\r\n\r\n")
235
+ head = buf.slice!(0, i+2) # First \r\n
236
+ buf.slice!(0, 2) # Second \r\n
237
+
238
+ filename = head[/Content-Disposition:.* filename="?([^\";]*)"?/ni, 1]
239
+ content_type = head[/Content-Type: (.*)\r\n/ni, 1]
240
+ name = head[/Content-Disposition:.* name="?([^\";]*)"?/ni, 1]
241
+
242
+ if filename
243
+ body = Tempfile.new("RackMultipart")
244
+ body.binmode if body.respond_to?(:binmode)
245
+ end
246
+
247
+ next
248
+ end
249
+
250
+ # Save the read body part.
251
+ if head && (boundary_size+4 < buf.size)
252
+ body << buf.slice!(0, buf.size - (boundary_size+4))
253
+ end
254
+
255
+ c = input.read(bufsize < content_length ? bufsize : content_length)
256
+ raise EOFError, "bad content body" if c.nil? || c.empty?
257
+ buf << c
258
+ content_length -= c.size
259
+ end
260
+
261
+ # Save the rest.
262
+ if i = buf.index(rx)
263
+ body << buf.slice!(0, i)
264
+ buf.slice!(0, boundary_size+2)
265
+
266
+ content_length = -1 if $1 == "--"
267
+ end
268
+
269
+ if filename
270
+ body.rewind
271
+ data = {:filename => filename, :type => content_type,
272
+ :name => name, :tempfile => body, :head => head}
273
+ else
274
+ data = body
275
+ end
276
+
277
+ if name
278
+ case current = params[name]
279
+ when nil
280
+ params[name] = (name =~ /\[\]\z/ ? [data] : data)
281
+ when Array
282
+ params[name] << data
283
+ else
284
+ params[name] = [current, data]
285
+ end
286
+ end
287
+
288
+ break if buf.empty? || content_length == -1
289
+ }
290
+
291
+ params
292
+ end
293
+ end
294
+ end
295
+ end
296
+ end
data/tap.yml ADDED
@@ -0,0 +1 @@
1
+ gems: tap-tasks
@@ -0,0 +1,31 @@
1
+ <h1>Parse HTTP Parameters</h1>
2
+
3
+ <p>Enter an HTTP request, like the ones you can capture using the
4
+ <a href='https://addons.mozilla.org/en-US/firefox/addon/3829'>LiveHTTPHeaders</a> addon for
5
+ <a href='http://www.mozilla.com/en-US/firefox/'>Firefox</a>.
6
+ </p>
7
+
8
+ <form action='<%= uri(:http) %>' method='post'>
9
+ <textarea rows='20' cols='60' name='http'></textarea>
10
+ <br/>
11
+ <input type='checkbox' name='keep_content' value='true' checked='true'> Keep File Content
12
+ <input type='submit' value='Parse'>
13
+ </form>
14
+
15
+ <p>Note the request must be properly formated. For example:</p>
16
+
17
+ <pre>
18
+ GET / HTTP/1.1
19
+ Host: tap.rubyforge.org
20
+ User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12
21
+ Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
22
+ Accept-Language: en-us,en;q=0.5
23
+ Accept-Encoding: gzip,deflate
24
+ Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
25
+ Keep-Alive: 300
26
+ Connection: keep-alive
27
+ </pre>
28
+
29
+ <p>Proper formatting for POST requires an empty line between the headers and body.
30
+ Without it you get a WEBrick::HTTPStatus::BadRequest error.</p>
31
+