tap-mechanize 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History +41 -0
- data/MIT-LICENSE +19 -0
- data/README +58 -0
- data/lib/tap/mechanize/agent.rb +101 -0
- data/lib/tap/mechanize/capture.rb +194 -0
- data/lib/tap/mechanize/get.rb +20 -0
- data/lib/tap/mechanize/request.rb +33 -0
- data/lib/tap/mechanize/submit.rb +30 -0
- data/lib/tap/mechanize/test.rb +59 -0
- data/lib/tap/mechanize/test/echo_server.rb +20 -0
- data/lib/tap/mechanize/test/mock_server.rb +31 -0
- data/lib/tap/mechanize/utils.rb +296 -0
- data/tap.yml +1 -0
- data/views/tap/mechanize/capture/http.erb +31 -0
- data/views/tap/mechanize/capture/index.erb +11 -0
- data/views/tap/mechanize/capture/redirect.css +28 -0
- data/views/tap/mechanize/capture/redirect.js +184 -0
- data/views/tap/mechanize/capture/redirect_http.erb +15 -0
- data/views/tap/mechanize/capture/test.erb +108 -0
- data/views/tap/mechanize/capture/tutorial.erb +57 -0
- metadata +108 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'tap/mechanize/request'
|
2
|
+
|
3
|
+
module Tap
|
4
|
+
module Mechanize
|
5
|
+
# :startdoc::manifest submits a captured http request
|
6
|
+
#
|
7
|
+
# Performs a series of HTTP requests and returns the content of the final
|
8
|
+
# page. Requests must be hashes that at least specify the uri of the next
|
9
|
+
# request, but more typically also specify the request method, parameters,
|
10
|
+
# and even headers.
|
11
|
+
#
|
12
|
+
# All requests are peformed in the same session, so logins and subsequent
|
13
|
+
# actions are allowed. The easiest way to capture HTTP requests is to
|
14
|
+
# use the capture server distributed with tap-mechanize. Simply start a
|
15
|
+
# tap server and work through the tutorial (http://localhost:8080/capture/tutorial).
|
16
|
+
#
|
17
|
+
# % tap server
|
18
|
+
#
|
19
|
+
# Once you have a request file:
|
20
|
+
#
|
21
|
+
# % tap run -- load/yaml --:s submit --: dump < request.yml
|
22
|
+
#
|
23
|
+
# Remember, only the results of the final page are returned!
|
24
|
+
class Submit < Request
|
25
|
+
def process(*requests)
|
26
|
+
super(requests).content
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'rack'
|
2
|
+
require 'webrick'
|
3
|
+
require 'thread'
|
4
|
+
require 'stringio'
|
5
|
+
require 'tap/mechanize/test/mock_server'
|
6
|
+
require 'tap/mechanize/test/echo_server'
|
7
|
+
|
8
|
+
module Tap
|
9
|
+
module Mechanize
|
10
|
+
|
11
|
+
# Mechanize::Test allows you to mock out remote servers to test HTTP
|
12
|
+
# clients. The mechanize_test method launches a WEBrick server hosting
|
13
|
+
# a Rack application for the duration of the block. Typically the
|
14
|
+
# application is constructed to send back an expected response.
|
15
|
+
#
|
16
|
+
# class SampleTest < Test::Unit::TestCase
|
17
|
+
# include Tap::Mechanize::Test
|
18
|
+
#
|
19
|
+
# def test_mechanize_test
|
20
|
+
# m = MockServer.new {|env| ['success'] }
|
21
|
+
# a = WWW::Mechanize.new
|
22
|
+
# mechanize_test(m) do
|
23
|
+
# assert_equal 'success', a.get('http://localhost:2000/').body
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
module Test
|
29
|
+
|
30
|
+
# The default WEBRick config for a mechanize_test(. By default the sever runs
|
31
|
+
# on port 2000 and logs all data to the input log device.
|
32
|
+
def default_webrick_config(log_dev=StringIO.new(''))
|
33
|
+
common_logger = WEBrick::Log.new(log_dev, WEBrick::Log.const_get(:WARN) )
|
34
|
+
{
|
35
|
+
:Port => 2000,
|
36
|
+
:Logger => common_logger,
|
37
|
+
:AccessLog => common_logger
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Sets up a local WEBRick server that runs the Rack app and yields to the
|
42
|
+
# block. The server runs on its own thread and will be shutdown after
|
43
|
+
# the test completes. See default_webrick_config for setup information.
|
44
|
+
def mechanize_test(app=EchoServer, config=default_webrick_config)
|
45
|
+
begin
|
46
|
+
server = ::WEBrick::HTTPServer.new(config);
|
47
|
+
server.mount("/", Rack::Handler::WEBrick, app);
|
48
|
+
Thread.new { server.start }
|
49
|
+
yield
|
50
|
+
ensure
|
51
|
+
server.shutdown
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Tap
|
2
|
+
module Mechanize
|
3
|
+
module Test
|
4
|
+
# EchoServer is a Rack application that simply echos back the request
|
5
|
+
# environment. The response is:
|
6
|
+
#
|
7
|
+
# [200, {request headers}, [request body]]
|
8
|
+
#
|
9
|
+
class EchoServer
|
10
|
+
def self.call(env)
|
11
|
+
body = env['rack.input'].read
|
12
|
+
headers = {}
|
13
|
+
env.each_pair {|key, value| headers[key] = [value] unless key =~ /^rack/ }
|
14
|
+
|
15
|
+
[200, headers, [body]]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Tap
|
2
|
+
module Mechanize
|
3
|
+
module Test
|
4
|
+
|
5
|
+
# MockServer allows easy creation of a lazy Rack application that calls
|
6
|
+
# the block for content. The status and headers of the response are
|
7
|
+
# setup during initialize.
|
8
|
+
#
|
9
|
+
# env = Rack::MockRequest.env_for('http://localhost:2000/')
|
10
|
+
#
|
11
|
+
# m = MockServer.new {|env| ['yo'] }
|
12
|
+
# m.call(env) # => [200, {'Content-Type' => 'text/html'}, ['yo']]
|
13
|
+
#
|
14
|
+
class MockServer
|
15
|
+
def initialize(status=200, headers={'Content-Type' => 'text/html'}, &block)
|
16
|
+
@status = status
|
17
|
+
@headers = headers
|
18
|
+
@block = block
|
19
|
+
end
|
20
|
+
|
21
|
+
# Calls the initialization block with env. The block must return the
|
22
|
+
# content of the response.
|
23
|
+
#
|
24
|
+
# Returns: [status, headers, block-return]
|
25
|
+
def call(env)
|
26
|
+
[@status, @headers, @block.call(env)]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,296 @@
|
|
1
|
+
autoload(:WEBrick, 'webrick')
|
2
|
+
autoload(:StringIO, 'stringio')
|
3
|
+
require 'rack'
|
4
|
+
|
5
|
+
module Tap
|
6
|
+
module Mechanize
|
7
|
+
module Utils
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# Parses a WEBrick::HTTPRequest from the input socket into a hash that
|
11
|
+
# may be resubmitted by Dispatch. Sockets can be any kind of IO (File,
|
12
|
+
# StringIO, etc..) and should be positioned such that the next line is
|
13
|
+
# the start of an HTTP request. Strings used as sockets are converted
|
14
|
+
# into StringIO objects.
|
15
|
+
#
|
16
|
+
# parse_http_request("GET /path HTTP/1.1\n")
|
17
|
+
# # => {
|
18
|
+
# # :request_method => "GET",
|
19
|
+
# # :url => "/path",
|
20
|
+
# # :version => "1.1",
|
21
|
+
# # :headers => {},
|
22
|
+
# # :params => {},
|
23
|
+
# # }
|
24
|
+
#
|
25
|
+
# ==== WEBrick parsing of HTTP format
|
26
|
+
#
|
27
|
+
# WEBrick will parse headers then the body of a request, and currently
|
28
|
+
# (1.8.6) considers an empty line as a break between the headers and
|
29
|
+
# body. In general header parsing is forgiving with end-line
|
30
|
+
# characters (ie "\r\n" and "\n" are both acceptable) but parsing of
|
31
|
+
# multipart/form data IS NOT.
|
32
|
+
#
|
33
|
+
# Multipart/form data REQUIRES that the end-line characters are "\r\n".
|
34
|
+
# A boundary is always started with "--" and the last boundary completed
|
35
|
+
# with "--". As always, the content-length must be correct.
|
36
|
+
#
|
37
|
+
# # Notice an empty line between the last header
|
38
|
+
# # (in this case 'Content-Length') and the body.
|
39
|
+
# msg = <<-_end_of_message_
|
40
|
+
# POST /path HTTP/1.1
|
41
|
+
# Host: localhost:8080
|
42
|
+
# Content-Type: multipart/form-data; boundary=1234567890
|
43
|
+
# Content-Length: 158
|
44
|
+
#
|
45
|
+
# --1234567890
|
46
|
+
# Content-Disposition: form-data; name="one"
|
47
|
+
#
|
48
|
+
# value one
|
49
|
+
# --1234567890
|
50
|
+
# Content-Disposition: form-data; name="two"
|
51
|
+
#
|
52
|
+
# value two
|
53
|
+
# --1234567890--
|
54
|
+
# _end_of_message_
|
55
|
+
#
|
56
|
+
# # ensure the end of line characters are correct...
|
57
|
+
# socket = StringIO.new msg.gsub(/\n/, "\r\n")
|
58
|
+
#
|
59
|
+
# Tap::Net.parse_http_request(socket)
|
60
|
+
# # => {
|
61
|
+
# # :request_method => "POST",
|
62
|
+
# # :url => "http://localhost:8080/path",
|
63
|
+
# # :version => "HTTP/1.1",
|
64
|
+
# # :headers => {
|
65
|
+
# # "Host" => "localhost:8080",
|
66
|
+
# # "Content-Type" => "multipart/form-data; boundary=1234567890",
|
67
|
+
# # "Content-Length" => "158"},
|
68
|
+
# # :params => {
|
69
|
+
# # "one" => "value one",
|
70
|
+
# # "two" => "value two"}}
|
71
|
+
#
|
72
|
+
#--
|
73
|
+
# TODO: check if there are other headers to capture from
|
74
|
+
# a multipart/form file. Currently only
|
75
|
+
# 'Filename' and 'Content-Type' are added
|
76
|
+
def parse_http_request(socket, keep_content=true)
|
77
|
+
socket = StringIO.new(socket) if socket.kind_of?(String)
|
78
|
+
|
79
|
+
req = WEBrick::HTTPRequest.new(WEBrick::Config::HTTP)
|
80
|
+
req.parse(socket)
|
81
|
+
|
82
|
+
parse_webrick_request(req, keep_content)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Parses a WEBrick::HTTPRequest, with the same activity as
|
86
|
+
# parse_http_request.
|
87
|
+
def parse_webrick_request(req, keep_content=true)
|
88
|
+
headers = {}
|
89
|
+
req.header.each_pair do |key, values|
|
90
|
+
headers[headerize(key)] = splat(values)
|
91
|
+
end if req.header
|
92
|
+
|
93
|
+
params = {}
|
94
|
+
req.query.each_pair do |key, value|
|
95
|
+
# no sense for how robust this is...
|
96
|
+
# In tests value is (always?) a WEBrick::HTTPUtils::FormData. Each
|
97
|
+
# data is likewise a FormData. If FormData is a file, it has a
|
98
|
+
# filename and you have to try [] to get the content-type.
|
99
|
+
# Senseless. No wonder WEBrick has no documentation, who could
|
100
|
+
# write it?
|
101
|
+
values = []
|
102
|
+
value.each_data do |data|
|
103
|
+
values << if data.filename
|
104
|
+
hash = {'Filename' => data.filename, 'Content-Type' => data['Content-Type']}
|
105
|
+
hash['Content'] = data.to_a.join("\n") if keep_content
|
106
|
+
hash
|
107
|
+
else
|
108
|
+
data.to_s
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
params[key] = splat(values)
|
113
|
+
end if req.query
|
114
|
+
|
115
|
+
{ :uri => headers['Host'] ? File.join("http://", headers['Host'], req.path_info) : req.path_info,
|
116
|
+
:request_method => req.request_method,
|
117
|
+
:version => req.http_version.to_s,
|
118
|
+
:headers => headers,
|
119
|
+
:params => params}
|
120
|
+
end
|
121
|
+
|
122
|
+
# Parses a Rack::Request, with the same activity as parse_http_request.
|
123
|
+
def parse_rack_request(request, keep_content=true)
|
124
|
+
headers = {}
|
125
|
+
request.env.each_pair do |key, value|
|
126
|
+
key = case key
|
127
|
+
when "HTTP_VERSION" then next
|
128
|
+
when /^HTTP_(.*)/ then $1
|
129
|
+
when 'CONTENT_TYPE' then key
|
130
|
+
else next
|
131
|
+
end
|
132
|
+
|
133
|
+
headers[headerize(key)] = value
|
134
|
+
end
|
135
|
+
|
136
|
+
params = {}
|
137
|
+
request.params.each_pair do |key, value|
|
138
|
+
params[key] = each_member(value) do |obj|
|
139
|
+
if obj.kind_of?(Hash)
|
140
|
+
file = {'Content-Type' => obj[:type], 'Filename' => obj[:filename]}
|
141
|
+
file['Content'] = obj[:tempfile].read if keep_content
|
142
|
+
file
|
143
|
+
else
|
144
|
+
obj
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
{
|
150
|
+
:uri => File.join("http://", headers['Host'], request.env['PATH_INFO']),
|
151
|
+
:request_method => request.request_method,
|
152
|
+
:version => request.env['HTTP_VERSION'] =~ /^HTTP\/(.*)$/ ? $1.to_f : request.env['HTTP_VERSION'],
|
153
|
+
:headers => headers,
|
154
|
+
:params => params
|
155
|
+
}
|
156
|
+
end
|
157
|
+
|
158
|
+
# Yields each member of an input array to the block and collects the
|
159
|
+
# result. If obj is not an array, the value is simply yielded to the
|
160
|
+
# block.
|
161
|
+
def each_member(obj)
|
162
|
+
if obj.kind_of?(Array)
|
163
|
+
obj.collect {|value| yield(value) }
|
164
|
+
else
|
165
|
+
yield(obj)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Returns the first member of arrays length <= 1, or the array in all
|
170
|
+
# other cases. Splat is useful to simplify hashes of http headers
|
171
|
+
# and parameters that may have multiple values, but typically only
|
172
|
+
# have one.
|
173
|
+
#
|
174
|
+
# splat([]) # => nil
|
175
|
+
# splat([:one]) # => :one
|
176
|
+
# splat([:one, :two]) # => [:one, :two]
|
177
|
+
#
|
178
|
+
def splat(array)
|
179
|
+
return array unless array.kind_of?(Array)
|
180
|
+
|
181
|
+
case array.length
|
182
|
+
when 0 then nil
|
183
|
+
when 1 then array.first
|
184
|
+
else array
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Headerizes an underscored string. The input is be converted to
|
189
|
+
# a string using to_s.
|
190
|
+
#
|
191
|
+
# headerize('SOME_STRING') # => 'Some-String'
|
192
|
+
# headerize('some string') # => 'Some-String'
|
193
|
+
# headerize('Some-String') # => 'Some-String'
|
194
|
+
#
|
195
|
+
def headerize(str)
|
196
|
+
str.to_s.gsub(/\s|-/, "_").split("_").collect do |s|
|
197
|
+
s =~ /^(.)(.*)/
|
198
|
+
$1.upcase + $2.downcase
|
199
|
+
end.join("-")
|
200
|
+
end
|
201
|
+
|
202
|
+
EOL = Rack::Utils::Multipart::EOL
|
203
|
+
# Lifted from Rack::Utils::Multipart, and modified to collect
|
204
|
+
# overloaded params and params with names suffixed by '[]' as
|
205
|
+
# arrays.
|
206
|
+
def parse_multipart(env)
|
207
|
+
unless env['CONTENT_TYPE'] =~
|
208
|
+
%r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n
|
209
|
+
nil
|
210
|
+
else
|
211
|
+
boundary = "--#{$1}"
|
212
|
+
|
213
|
+
params = {}
|
214
|
+
buf = ""
|
215
|
+
content_length = env['CONTENT_LENGTH'].to_i
|
216
|
+
input = env['rack.input']
|
217
|
+
|
218
|
+
boundary_size = boundary.size + EOL.size
|
219
|
+
bufsize = 16384
|
220
|
+
|
221
|
+
content_length -= boundary_size
|
222
|
+
|
223
|
+
status = input.read(boundary_size)
|
224
|
+
raise EOFError, "bad content body" unless status == boundary + EOL
|
225
|
+
|
226
|
+
rx = /(?:#{EOL})?#{Regexp.quote boundary}(#{EOL}|--)/
|
227
|
+
|
228
|
+
loop {
|
229
|
+
head = nil
|
230
|
+
body = ''
|
231
|
+
filename = content_type = name = nil
|
232
|
+
|
233
|
+
until head && buf =~ rx
|
234
|
+
if !head && i = buf.index("\r\n\r\n")
|
235
|
+
head = buf.slice!(0, i+2) # First \r\n
|
236
|
+
buf.slice!(0, 2) # Second \r\n
|
237
|
+
|
238
|
+
filename = head[/Content-Disposition:.* filename="?([^\";]*)"?/ni, 1]
|
239
|
+
content_type = head[/Content-Type: (.*)\r\n/ni, 1]
|
240
|
+
name = head[/Content-Disposition:.* name="?([^\";]*)"?/ni, 1]
|
241
|
+
|
242
|
+
if filename
|
243
|
+
body = Tempfile.new("RackMultipart")
|
244
|
+
body.binmode if body.respond_to?(:binmode)
|
245
|
+
end
|
246
|
+
|
247
|
+
next
|
248
|
+
end
|
249
|
+
|
250
|
+
# Save the read body part.
|
251
|
+
if head && (boundary_size+4 < buf.size)
|
252
|
+
body << buf.slice!(0, buf.size - (boundary_size+4))
|
253
|
+
end
|
254
|
+
|
255
|
+
c = input.read(bufsize < content_length ? bufsize : content_length)
|
256
|
+
raise EOFError, "bad content body" if c.nil? || c.empty?
|
257
|
+
buf << c
|
258
|
+
content_length -= c.size
|
259
|
+
end
|
260
|
+
|
261
|
+
# Save the rest.
|
262
|
+
if i = buf.index(rx)
|
263
|
+
body << buf.slice!(0, i)
|
264
|
+
buf.slice!(0, boundary_size+2)
|
265
|
+
|
266
|
+
content_length = -1 if $1 == "--"
|
267
|
+
end
|
268
|
+
|
269
|
+
if filename
|
270
|
+
body.rewind
|
271
|
+
data = {:filename => filename, :type => content_type,
|
272
|
+
:name => name, :tempfile => body, :head => head}
|
273
|
+
else
|
274
|
+
data = body
|
275
|
+
end
|
276
|
+
|
277
|
+
if name
|
278
|
+
case current = params[name]
|
279
|
+
when nil
|
280
|
+
params[name] = (name =~ /\[\]\z/ ? [data] : data)
|
281
|
+
when Array
|
282
|
+
params[name] << data
|
283
|
+
else
|
284
|
+
params[name] = [current, data]
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
break if buf.empty? || content_length == -1
|
289
|
+
}
|
290
|
+
|
291
|
+
params
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
data/tap.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
gems: tap-tasks
|
@@ -0,0 +1,31 @@
|
|
1
|
+
<h1>Parse HTTP Parameters</h1>
|
2
|
+
|
3
|
+
<p>Enter an HTTP request, like the ones you can capture using the
|
4
|
+
<a href='https://addons.mozilla.org/en-US/firefox/addon/3829'>LiveHTTPHeaders</a> addon for
|
5
|
+
<a href='http://www.mozilla.com/en-US/firefox/'>Firefox</a>.
|
6
|
+
</p>
|
7
|
+
|
8
|
+
<form action='<%= uri(:http) %>' method='post'>
|
9
|
+
<textarea rows='20' cols='60' name='http'></textarea>
|
10
|
+
<br/>
|
11
|
+
<input type='checkbox' name='keep_content' value='true' checked='true'> Keep File Content
|
12
|
+
<input type='submit' value='Parse'>
|
13
|
+
</form>
|
14
|
+
|
15
|
+
<p>Note the request must be properly formated. For example:</p>
|
16
|
+
|
17
|
+
<pre>
|
18
|
+
GET / HTTP/1.1
|
19
|
+
Host: tap.rubyforge.org
|
20
|
+
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12
|
21
|
+
Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
|
22
|
+
Accept-Language: en-us,en;q=0.5
|
23
|
+
Accept-Encoding: gzip,deflate
|
24
|
+
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
25
|
+
Keep-Alive: 300
|
26
|
+
Connection: keep-alive
|
27
|
+
</pre>
|
28
|
+
|
29
|
+
<p>Proper formatting for POST requires an empty line between the headers and body.
|
30
|
+
Without it you get a WEBrick::HTTPStatus::BadRequest error.</p>
|
31
|
+
|