tap-mechanize 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ require 'tap/mechanize/request'
2
+
3
+ module Tap
4
+ module Mechanize
5
+ # :startdoc::manifest submits a captured http request
6
+ #
7
+ # Performs a series of HTTP requests and returns the content of the final
8
+ # page. Requests must be hashes that at least specify the uri of the next
9
+ # request, but more typically also specify the request method, parameters,
10
+ # and even headers.
11
+ #
12
+ # All requests are peformed in the same session, so logins and subsequent
13
+ # actions are allowed. The easiest way to capture HTTP requests is to
14
+ # use the capture server distributed with tap-mechanize. Simply start a
15
+ # tap server and work through the tutorial (http://localhost:8080/capture/tutorial).
16
+ #
17
+ # % tap server
18
+ #
19
+ # Once you have a request file:
20
+ #
21
+ # % tap run -- load/yaml --:s submit --: dump < request.yml
22
+ #
23
+ # Remember, only the results of the final page are returned!
24
+ class Submit < Request
25
+ def process(*requests)
26
+ super(requests).content
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,59 @@
1
+ require 'rack'
2
+ require 'webrick'
3
+ require 'thread'
4
+ require 'stringio'
5
+ require 'tap/mechanize/test/mock_server'
6
+ require 'tap/mechanize/test/echo_server'
7
+
8
+ module Tap
9
+ module Mechanize
10
+
11
+ # Mechanize::Test allows you to mock out remote servers to test HTTP
12
+ # clients. The mechanize_test method launches a WEBrick server hosting
13
+ # a Rack application for the duration of the block. Typically the
14
+ # application is constructed to send back an expected response.
15
+ #
16
+ # class SampleTest < Test::Unit::TestCase
17
+ # include Tap::Mechanize::Test
18
+ #
19
+ # def test_mechanize_test
20
+ # m = MockServer.new {|env| ['success'] }
21
+ # a = WWW::Mechanize.new
22
+ # mechanize_test(m) do
23
+ # assert_equal 'success', a.get('http://localhost:2000/').body
24
+ # end
25
+ # end
26
+ # end
27
+ #
28
+ module Test
29
+
30
+ # The default WEBRick config for a mechanize_test(. By default the sever runs
31
+ # on port 2000 and logs all data to the input log device.
32
+ def default_webrick_config(log_dev=StringIO.new(''))
33
+ common_logger = WEBrick::Log.new(log_dev, WEBrick::Log.const_get(:WARN) )
34
+ {
35
+ :Port => 2000,
36
+ :Logger => common_logger,
37
+ :AccessLog => common_logger
38
+ }
39
+ end
40
+
41
+ # Sets up a local WEBRick server that runs the Rack app and yields to the
42
+ # block. The server runs on its own thread and will be shutdown after
43
+ # the test completes. See default_webrick_config for setup information.
44
+ def mechanize_test(app=EchoServer, config=default_webrick_config)
45
+ begin
46
+ server = ::WEBrick::HTTPServer.new(config);
47
+ server.mount("/", Rack::Handler::WEBrick, app);
48
+ Thread.new { server.start }
49
+ yield
50
+ ensure
51
+ server.shutdown
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+
59
+
@@ -0,0 +1,20 @@
1
+ module Tap
2
+ module Mechanize
3
+ module Test
4
+ # EchoServer is a Rack application that simply echos back the request
5
+ # environment. The response is:
6
+ #
7
+ # [200, {request headers}, [request body]]
8
+ #
9
+ class EchoServer
10
+ def self.call(env)
11
+ body = env['rack.input'].read
12
+ headers = {}
13
+ env.each_pair {|key, value| headers[key] = [value] unless key =~ /^rack/ }
14
+
15
+ [200, headers, [body]]
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,31 @@
1
+ module Tap
2
+ module Mechanize
3
+ module Test
4
+
5
+ # MockServer allows easy creation of a lazy Rack application that calls
6
+ # the block for content. The status and headers of the response are
7
+ # setup during initialize.
8
+ #
9
+ # env = Rack::MockRequest.env_for('http://localhost:2000/')
10
+ #
11
+ # m = MockServer.new {|env| ['yo'] }
12
+ # m.call(env) # => [200, {'Content-Type' => 'text/html'}, ['yo']]
13
+ #
14
+ class MockServer
15
+ def initialize(status=200, headers={'Content-Type' => 'text/html'}, &block)
16
+ @status = status
17
+ @headers = headers
18
+ @block = block
19
+ end
20
+
21
+ # Calls the initialization block with env. The block must return the
22
+ # content of the response.
23
+ #
24
+ # Returns: [status, headers, block-return]
25
+ def call(env)
26
+ [@status, @headers, @block.call(env)]
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,296 @@
1
+ autoload(:WEBrick, 'webrick')
2
+ autoload(:StringIO, 'stringio')
3
+ require 'rack'
4
+
5
+ module Tap
6
+ module Mechanize
7
+ module Utils
8
+ module_function
9
+
10
+ # Parses a WEBrick::HTTPRequest from the input socket into a hash that
11
+ # may be resubmitted by Dispatch. Sockets can be any kind of IO (File,
12
+ # StringIO, etc..) and should be positioned such that the next line is
13
+ # the start of an HTTP request. Strings used as sockets are converted
14
+ # into StringIO objects.
15
+ #
16
+ # parse_http_request("GET /path HTTP/1.1\n")
17
+ # # => {
18
+ # # :request_method => "GET",
19
+ # # :url => "/path",
20
+ # # :version => "1.1",
21
+ # # :headers => {},
22
+ # # :params => {},
23
+ # # }
24
+ #
25
+ # ==== WEBrick parsing of HTTP format
26
+ #
27
+ # WEBrick will parse headers then the body of a request, and currently
28
+ # (1.8.6) considers an empty line as a break between the headers and
29
+ # body. In general header parsing is forgiving with end-line
30
+ # characters (ie "\r\n" and "\n" are both acceptable) but parsing of
31
+ # multipart/form data IS NOT.
32
+ #
33
+ # Multipart/form data REQUIRES that the end-line characters are "\r\n".
34
+ # A boundary is always started with "--" and the last boundary completed
35
+ # with "--". As always, the content-length must be correct.
36
+ #
37
+ # # Notice an empty line between the last header
38
+ # # (in this case 'Content-Length') and the body.
39
+ # msg = <<-_end_of_message_
40
+ # POST /path HTTP/1.1
41
+ # Host: localhost:8080
42
+ # Content-Type: multipart/form-data; boundary=1234567890
43
+ # Content-Length: 158
44
+ #
45
+ # --1234567890
46
+ # Content-Disposition: form-data; name="one"
47
+ #
48
+ # value one
49
+ # --1234567890
50
+ # Content-Disposition: form-data; name="two"
51
+ #
52
+ # value two
53
+ # --1234567890--
54
+ # _end_of_message_
55
+ #
56
+ # # ensure the end of line characters are correct...
57
+ # socket = StringIO.new msg.gsub(/\n/, "\r\n")
58
+ #
59
+ # Tap::Net.parse_http_request(socket)
60
+ # # => {
61
+ # # :request_method => "POST",
62
+ # # :url => "http://localhost:8080/path",
63
+ # # :version => "HTTP/1.1",
64
+ # # :headers => {
65
+ # # "Host" => "localhost:8080",
66
+ # # "Content-Type" => "multipart/form-data; boundary=1234567890",
67
+ # # "Content-Length" => "158"},
68
+ # # :params => {
69
+ # # "one" => "value one",
70
+ # # "two" => "value two"}}
71
+ #
72
+ #--
73
+ # TODO: check if there are other headers to capture from
74
+ # a multipart/form file. Currently only
75
+ # 'Filename' and 'Content-Type' are added
76
+ def parse_http_request(socket, keep_content=true)
77
+ socket = StringIO.new(socket) if socket.kind_of?(String)
78
+
79
+ req = WEBrick::HTTPRequest.new(WEBrick::Config::HTTP)
80
+ req.parse(socket)
81
+
82
+ parse_webrick_request(req, keep_content)
83
+ end
84
+
85
+ # Parses a WEBrick::HTTPRequest, with the same activity as
86
+ # parse_http_request.
87
+ def parse_webrick_request(req, keep_content=true)
88
+ headers = {}
89
+ req.header.each_pair do |key, values|
90
+ headers[headerize(key)] = splat(values)
91
+ end if req.header
92
+
93
+ params = {}
94
+ req.query.each_pair do |key, value|
95
+ # no sense for how robust this is...
96
+ # In tests value is (always?) a WEBrick::HTTPUtils::FormData. Each
97
+ # data is likewise a FormData. If FormData is a file, it has a
98
+ # filename and you have to try [] to get the content-type.
99
+ # Senseless. No wonder WEBrick has no documentation, who could
100
+ # write it?
101
+ values = []
102
+ value.each_data do |data|
103
+ values << if data.filename
104
+ hash = {'Filename' => data.filename, 'Content-Type' => data['Content-Type']}
105
+ hash['Content'] = data.to_a.join("\n") if keep_content
106
+ hash
107
+ else
108
+ data.to_s
109
+ end
110
+ end
111
+
112
+ params[key] = splat(values)
113
+ end if req.query
114
+
115
+ { :uri => headers['Host'] ? File.join("http://", headers['Host'], req.path_info) : req.path_info,
116
+ :request_method => req.request_method,
117
+ :version => req.http_version.to_s,
118
+ :headers => headers,
119
+ :params => params}
120
+ end
121
+
122
+ # Parses a Rack::Request, with the same activity as parse_http_request.
123
+ def parse_rack_request(request, keep_content=true)
124
+ headers = {}
125
+ request.env.each_pair do |key, value|
126
+ key = case key
127
+ when "HTTP_VERSION" then next
128
+ when /^HTTP_(.*)/ then $1
129
+ when 'CONTENT_TYPE' then key
130
+ else next
131
+ end
132
+
133
+ headers[headerize(key)] = value
134
+ end
135
+
136
+ params = {}
137
+ request.params.each_pair do |key, value|
138
+ params[key] = each_member(value) do |obj|
139
+ if obj.kind_of?(Hash)
140
+ file = {'Content-Type' => obj[:type], 'Filename' => obj[:filename]}
141
+ file['Content'] = obj[:tempfile].read if keep_content
142
+ file
143
+ else
144
+ obj
145
+ end
146
+ end
147
+ end
148
+
149
+ {
150
+ :uri => File.join("http://", headers['Host'], request.env['PATH_INFO']),
151
+ :request_method => request.request_method,
152
+ :version => request.env['HTTP_VERSION'] =~ /^HTTP\/(.*)$/ ? $1.to_f : request.env['HTTP_VERSION'],
153
+ :headers => headers,
154
+ :params => params
155
+ }
156
+ end
157
+
158
+ # Yields each member of an input array to the block and collects the
159
+ # result. If obj is not an array, the value is simply yielded to the
160
+ # block.
161
+ def each_member(obj)
162
+ if obj.kind_of?(Array)
163
+ obj.collect {|value| yield(value) }
164
+ else
165
+ yield(obj)
166
+ end
167
+ end
168
+
169
+ # Returns the first member of arrays length <= 1, or the array in all
170
+ # other cases. Splat is useful to simplify hashes of http headers
171
+ # and parameters that may have multiple values, but typically only
172
+ # have one.
173
+ #
174
+ # splat([]) # => nil
175
+ # splat([:one]) # => :one
176
+ # splat([:one, :two]) # => [:one, :two]
177
+ #
178
+ def splat(array)
179
+ return array unless array.kind_of?(Array)
180
+
181
+ case array.length
182
+ when 0 then nil
183
+ when 1 then array.first
184
+ else array
185
+ end
186
+ end
187
+
188
+ # Headerizes an underscored string. The input is be converted to
189
+ # a string using to_s.
190
+ #
191
+ # headerize('SOME_STRING') # => 'Some-String'
192
+ # headerize('some string') # => 'Some-String'
193
+ # headerize('Some-String') # => 'Some-String'
194
+ #
195
+ def headerize(str)
196
+ str.to_s.gsub(/\s|-/, "_").split("_").collect do |s|
197
+ s =~ /^(.)(.*)/
198
+ $1.upcase + $2.downcase
199
+ end.join("-")
200
+ end
201
+
202
+ EOL = Rack::Utils::Multipart::EOL
203
+ # Lifted from Rack::Utils::Multipart, and modified to collect
204
+ # overloaded params and params with names suffixed by '[]' as
205
+ # arrays.
206
+ def parse_multipart(env)
207
+ unless env['CONTENT_TYPE'] =~
208
+ %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n
209
+ nil
210
+ else
211
+ boundary = "--#{$1}"
212
+
213
+ params = {}
214
+ buf = ""
215
+ content_length = env['CONTENT_LENGTH'].to_i
216
+ input = env['rack.input']
217
+
218
+ boundary_size = boundary.size + EOL.size
219
+ bufsize = 16384
220
+
221
+ content_length -= boundary_size
222
+
223
+ status = input.read(boundary_size)
224
+ raise EOFError, "bad content body" unless status == boundary + EOL
225
+
226
+ rx = /(?:#{EOL})?#{Regexp.quote boundary}(#{EOL}|--)/
227
+
228
+ loop {
229
+ head = nil
230
+ body = ''
231
+ filename = content_type = name = nil
232
+
233
+ until head && buf =~ rx
234
+ if !head && i = buf.index("\r\n\r\n")
235
+ head = buf.slice!(0, i+2) # First \r\n
236
+ buf.slice!(0, 2) # Second \r\n
237
+
238
+ filename = head[/Content-Disposition:.* filename="?([^\";]*)"?/ni, 1]
239
+ content_type = head[/Content-Type: (.*)\r\n/ni, 1]
240
+ name = head[/Content-Disposition:.* name="?([^\";]*)"?/ni, 1]
241
+
242
+ if filename
243
+ body = Tempfile.new("RackMultipart")
244
+ body.binmode if body.respond_to?(:binmode)
245
+ end
246
+
247
+ next
248
+ end
249
+
250
+ # Save the read body part.
251
+ if head && (boundary_size+4 < buf.size)
252
+ body << buf.slice!(0, buf.size - (boundary_size+4))
253
+ end
254
+
255
+ c = input.read(bufsize < content_length ? bufsize : content_length)
256
+ raise EOFError, "bad content body" if c.nil? || c.empty?
257
+ buf << c
258
+ content_length -= c.size
259
+ end
260
+
261
+ # Save the rest.
262
+ if i = buf.index(rx)
263
+ body << buf.slice!(0, i)
264
+ buf.slice!(0, boundary_size+2)
265
+
266
+ content_length = -1 if $1 == "--"
267
+ end
268
+
269
+ if filename
270
+ body.rewind
271
+ data = {:filename => filename, :type => content_type,
272
+ :name => name, :tempfile => body, :head => head}
273
+ else
274
+ data = body
275
+ end
276
+
277
+ if name
278
+ case current = params[name]
279
+ when nil
280
+ params[name] = (name =~ /\[\]\z/ ? [data] : data)
281
+ when Array
282
+ params[name] << data
283
+ else
284
+ params[name] = [current, data]
285
+ end
286
+ end
287
+
288
+ break if buf.empty? || content_length == -1
289
+ }
290
+
291
+ params
292
+ end
293
+ end
294
+ end
295
+ end
296
+ end
data/tap.yml ADDED
@@ -0,0 +1 @@
1
+ gems: tap-tasks
@@ -0,0 +1,31 @@
1
+ <h1>Parse HTTP Parameters</h1>
2
+
3
+ <p>Enter an HTTP request, like the ones you can capture using the
4
+ <a href='https://addons.mozilla.org/en-US/firefox/addon/3829'>LiveHTTPHeaders</a> addon for
5
+ <a href='http://www.mozilla.com/en-US/firefox/'>Firefox</a>.
6
+ </p>
7
+
8
+ <form action='<%= uri(:http) %>' method='post'>
9
+ <textarea rows='20' cols='60' name='http'></textarea>
10
+ <br/>
11
+ <input type='checkbox' name='keep_content' value='true' checked='true'> Keep File Content
12
+ <input type='submit' value='Parse'>
13
+ </form>
14
+
15
+ <p>Note the request must be properly formated. For example:</p>
16
+
17
+ <pre>
18
+ GET / HTTP/1.1
19
+ Host: tap.rubyforge.org
20
+ User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12
21
+ Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
22
+ Accept-Language: en-us,en;q=0.5
23
+ Accept-Encoding: gzip,deflate
24
+ Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
25
+ Keep-Alive: 300
26
+ Connection: keep-alive
27
+ </pre>
28
+
29
+ <p>Proper formatting for POST requires an empty line between the headers and body.
30
+ Without it you get a WEBrick::HTTPStatus::BadRequest error.</p>
31
+