tap-mechanize 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History +41 -0
- data/MIT-LICENSE +19 -0
- data/README +58 -0
- data/lib/tap/mechanize/agent.rb +101 -0
- data/lib/tap/mechanize/capture.rb +194 -0
- data/lib/tap/mechanize/get.rb +20 -0
- data/lib/tap/mechanize/request.rb +33 -0
- data/lib/tap/mechanize/submit.rb +30 -0
- data/lib/tap/mechanize/test.rb +59 -0
- data/lib/tap/mechanize/test/echo_server.rb +20 -0
- data/lib/tap/mechanize/test/mock_server.rb +31 -0
- data/lib/tap/mechanize/utils.rb +296 -0
- data/tap.yml +1 -0
- data/views/tap/mechanize/capture/http.erb +31 -0
- data/views/tap/mechanize/capture/index.erb +11 -0
- data/views/tap/mechanize/capture/redirect.css +28 -0
- data/views/tap/mechanize/capture/redirect.js +184 -0
- data/views/tap/mechanize/capture/redirect_http.erb +15 -0
- data/views/tap/mechanize/capture/test.erb +108 -0
- data/views/tap/mechanize/capture/tutorial.erb +57 -0
- metadata +108 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'tap/mechanize/request'
|
2
|
+
|
3
|
+
module Tap
|
4
|
+
module Mechanize
|
5
|
+
# :startdoc::manifest submits a captured http request
|
6
|
+
#
|
7
|
+
# Performs a series of HTTP requests and returns the content of the final
|
8
|
+
# page. Requests must be hashes that at least specify the uri of the next
|
9
|
+
# request, but more typically also specify the request method, parameters,
|
10
|
+
# and even headers.
|
11
|
+
#
|
12
|
+
# All requests are peformed in the same session, so logins and subsequent
|
13
|
+
# actions are allowed. The easiest way to capture HTTP requests is to
|
14
|
+
# use the capture server distributed with tap-mechanize. Simply start a
|
15
|
+
# tap server and work through the tutorial (http://localhost:8080/capture/tutorial).
|
16
|
+
#
|
17
|
+
# % tap server
|
18
|
+
#
|
19
|
+
# Once you have a request file:
|
20
|
+
#
|
21
|
+
# % tap run -- load/yaml --:s submit --: dump < request.yml
|
22
|
+
#
|
23
|
+
# Remember, only the results of the final page are returned!
|
24
|
+
class Submit < Request
|
25
|
+
def process(*requests)
|
26
|
+
super(requests).content
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'rack'
|
2
|
+
require 'webrick'
|
3
|
+
require 'thread'
|
4
|
+
require 'stringio'
|
5
|
+
require 'tap/mechanize/test/mock_server'
|
6
|
+
require 'tap/mechanize/test/echo_server'
|
7
|
+
|
8
|
+
module Tap
|
9
|
+
module Mechanize
|
10
|
+
|
11
|
+
# Mechanize::Test allows you to mock out remote servers to test HTTP
|
12
|
+
# clients. The mechanize_test method launches a WEBrick server hosting
|
13
|
+
# a Rack application for the duration of the block. Typically the
|
14
|
+
# application is constructed to send back an expected response.
|
15
|
+
#
|
16
|
+
# class SampleTest < Test::Unit::TestCase
|
17
|
+
# include Tap::Mechanize::Test
|
18
|
+
#
|
19
|
+
# def test_mechanize_test
|
20
|
+
# m = MockServer.new {|env| ['success'] }
|
21
|
+
# a = WWW::Mechanize.new
|
22
|
+
# mechanize_test(m) do
|
23
|
+
# assert_equal 'success', a.get('http://localhost:2000/').body
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
module Test
|
29
|
+
|
30
|
+
# The default WEBRick config for a mechanize_test(. By default the sever runs
|
31
|
+
# on port 2000 and logs all data to the input log device.
|
32
|
+
def default_webrick_config(log_dev=StringIO.new(''))
|
33
|
+
common_logger = WEBrick::Log.new(log_dev, WEBrick::Log.const_get(:WARN) )
|
34
|
+
{
|
35
|
+
:Port => 2000,
|
36
|
+
:Logger => common_logger,
|
37
|
+
:AccessLog => common_logger
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
# Sets up a local WEBRick server that runs the Rack app and yields to the
|
42
|
+
# block. The server runs on its own thread and will be shutdown after
|
43
|
+
# the test completes. See default_webrick_config for setup information.
|
44
|
+
def mechanize_test(app=EchoServer, config=default_webrick_config)
|
45
|
+
begin
|
46
|
+
server = ::WEBrick::HTTPServer.new(config);
|
47
|
+
server.mount("/", Rack::Handler::WEBrick, app);
|
48
|
+
Thread.new { server.start }
|
49
|
+
yield
|
50
|
+
ensure
|
51
|
+
server.shutdown
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Tap
|
2
|
+
module Mechanize
|
3
|
+
module Test
|
4
|
+
# EchoServer is a Rack application that simply echos back the request
|
5
|
+
# environment. The response is:
|
6
|
+
#
|
7
|
+
# [200, {request headers}, [request body]]
|
8
|
+
#
|
9
|
+
class EchoServer
|
10
|
+
def self.call(env)
|
11
|
+
body = env['rack.input'].read
|
12
|
+
headers = {}
|
13
|
+
env.each_pair {|key, value| headers[key] = [value] unless key =~ /^rack/ }
|
14
|
+
|
15
|
+
[200, headers, [body]]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Tap
|
2
|
+
module Mechanize
|
3
|
+
module Test
|
4
|
+
|
5
|
+
# MockServer allows easy creation of a lazy Rack application that calls
|
6
|
+
# the block for content. The status and headers of the response are
|
7
|
+
# setup during initialize.
|
8
|
+
#
|
9
|
+
# env = Rack::MockRequest.env_for('http://localhost:2000/')
|
10
|
+
#
|
11
|
+
# m = MockServer.new {|env| ['yo'] }
|
12
|
+
# m.call(env) # => [200, {'Content-Type' => 'text/html'}, ['yo']]
|
13
|
+
#
|
14
|
+
class MockServer
|
15
|
+
def initialize(status=200, headers={'Content-Type' => 'text/html'}, &block)
|
16
|
+
@status = status
|
17
|
+
@headers = headers
|
18
|
+
@block = block
|
19
|
+
end
|
20
|
+
|
21
|
+
# Calls the initialization block with env. The block must return the
|
22
|
+
# content of the response.
|
23
|
+
#
|
24
|
+
# Returns: [status, headers, block-return]
|
25
|
+
def call(env)
|
26
|
+
[@status, @headers, @block.call(env)]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,296 @@
|
|
1
|
+
autoload(:WEBrick, 'webrick')
|
2
|
+
autoload(:StringIO, 'stringio')
|
3
|
+
require 'rack'
|
4
|
+
|
5
|
+
module Tap
|
6
|
+
module Mechanize
|
7
|
+
module Utils
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# Parses a WEBrick::HTTPRequest from the input socket into a hash that
|
11
|
+
# may be resubmitted by Dispatch. Sockets can be any kind of IO (File,
|
12
|
+
# StringIO, etc..) and should be positioned such that the next line is
|
13
|
+
# the start of an HTTP request. Strings used as sockets are converted
|
14
|
+
# into StringIO objects.
|
15
|
+
#
|
16
|
+
# parse_http_request("GET /path HTTP/1.1\n")
|
17
|
+
# # => {
|
18
|
+
# # :request_method => "GET",
|
19
|
+
# # :url => "/path",
|
20
|
+
# # :version => "1.1",
|
21
|
+
# # :headers => {},
|
22
|
+
# # :params => {},
|
23
|
+
# # }
|
24
|
+
#
|
25
|
+
# ==== WEBrick parsing of HTTP format
|
26
|
+
#
|
27
|
+
# WEBrick will parse headers then the body of a request, and currently
|
28
|
+
# (1.8.6) considers an empty line as a break between the headers and
|
29
|
+
# body. In general header parsing is forgiving with end-line
|
30
|
+
# characters (ie "\r\n" and "\n" are both acceptable) but parsing of
|
31
|
+
# multipart/form data IS NOT.
|
32
|
+
#
|
33
|
+
# Multipart/form data REQUIRES that the end-line characters are "\r\n".
|
34
|
+
# A boundary is always started with "--" and the last boundary completed
|
35
|
+
# with "--". As always, the content-length must be correct.
|
36
|
+
#
|
37
|
+
# # Notice an empty line between the last header
|
38
|
+
# # (in this case 'Content-Length') and the body.
|
39
|
+
# msg = <<-_end_of_message_
|
40
|
+
# POST /path HTTP/1.1
|
41
|
+
# Host: localhost:8080
|
42
|
+
# Content-Type: multipart/form-data; boundary=1234567890
|
43
|
+
# Content-Length: 158
|
44
|
+
#
|
45
|
+
# --1234567890
|
46
|
+
# Content-Disposition: form-data; name="one"
|
47
|
+
#
|
48
|
+
# value one
|
49
|
+
# --1234567890
|
50
|
+
# Content-Disposition: form-data; name="two"
|
51
|
+
#
|
52
|
+
# value two
|
53
|
+
# --1234567890--
|
54
|
+
# _end_of_message_
|
55
|
+
#
|
56
|
+
# # ensure the end of line characters are correct...
|
57
|
+
# socket = StringIO.new msg.gsub(/\n/, "\r\n")
|
58
|
+
#
|
59
|
+
# Tap::Net.parse_http_request(socket)
|
60
|
+
# # => {
|
61
|
+
# # :request_method => "POST",
|
62
|
+
# # :url => "http://localhost:8080/path",
|
63
|
+
# # :version => "HTTP/1.1",
|
64
|
+
# # :headers => {
|
65
|
+
# # "Host" => "localhost:8080",
|
66
|
+
# # "Content-Type" => "multipart/form-data; boundary=1234567890",
|
67
|
+
# # "Content-Length" => "158"},
|
68
|
+
# # :params => {
|
69
|
+
# # "one" => "value one",
|
70
|
+
# # "two" => "value two"}}
|
71
|
+
#
|
72
|
+
#--
|
73
|
+
# TODO: check if there are other headers to capture from
|
74
|
+
# a multipart/form file. Currently only
|
75
|
+
# 'Filename' and 'Content-Type' are added
|
76
|
+
def parse_http_request(socket, keep_content=true)
|
77
|
+
socket = StringIO.new(socket) if socket.kind_of?(String)
|
78
|
+
|
79
|
+
req = WEBrick::HTTPRequest.new(WEBrick::Config::HTTP)
|
80
|
+
req.parse(socket)
|
81
|
+
|
82
|
+
parse_webrick_request(req, keep_content)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Parses a WEBrick::HTTPRequest, with the same activity as
|
86
|
+
# parse_http_request.
|
87
|
+
def parse_webrick_request(req, keep_content=true)
|
88
|
+
headers = {}
|
89
|
+
req.header.each_pair do |key, values|
|
90
|
+
headers[headerize(key)] = splat(values)
|
91
|
+
end if req.header
|
92
|
+
|
93
|
+
params = {}
|
94
|
+
req.query.each_pair do |key, value|
|
95
|
+
# no sense for how robust this is...
|
96
|
+
# In tests value is (always?) a WEBrick::HTTPUtils::FormData. Each
|
97
|
+
# data is likewise a FormData. If FormData is a file, it has a
|
98
|
+
# filename and you have to try [] to get the content-type.
|
99
|
+
# Senseless. No wonder WEBrick has no documentation, who could
|
100
|
+
# write it?
|
101
|
+
values = []
|
102
|
+
value.each_data do |data|
|
103
|
+
values << if data.filename
|
104
|
+
hash = {'Filename' => data.filename, 'Content-Type' => data['Content-Type']}
|
105
|
+
hash['Content'] = data.to_a.join("\n") if keep_content
|
106
|
+
hash
|
107
|
+
else
|
108
|
+
data.to_s
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
params[key] = splat(values)
|
113
|
+
end if req.query
|
114
|
+
|
115
|
+
{ :uri => headers['Host'] ? File.join("http://", headers['Host'], req.path_info) : req.path_info,
|
116
|
+
:request_method => req.request_method,
|
117
|
+
:version => req.http_version.to_s,
|
118
|
+
:headers => headers,
|
119
|
+
:params => params}
|
120
|
+
end
|
121
|
+
|
122
|
+
# Parses a Rack::Request, with the same activity as parse_http_request.
|
123
|
+
def parse_rack_request(request, keep_content=true)
|
124
|
+
headers = {}
|
125
|
+
request.env.each_pair do |key, value|
|
126
|
+
key = case key
|
127
|
+
when "HTTP_VERSION" then next
|
128
|
+
when /^HTTP_(.*)/ then $1
|
129
|
+
when 'CONTENT_TYPE' then key
|
130
|
+
else next
|
131
|
+
end
|
132
|
+
|
133
|
+
headers[headerize(key)] = value
|
134
|
+
end
|
135
|
+
|
136
|
+
params = {}
|
137
|
+
request.params.each_pair do |key, value|
|
138
|
+
params[key] = each_member(value) do |obj|
|
139
|
+
if obj.kind_of?(Hash)
|
140
|
+
file = {'Content-Type' => obj[:type], 'Filename' => obj[:filename]}
|
141
|
+
file['Content'] = obj[:tempfile].read if keep_content
|
142
|
+
file
|
143
|
+
else
|
144
|
+
obj
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
{
|
150
|
+
:uri => File.join("http://", headers['Host'], request.env['PATH_INFO']),
|
151
|
+
:request_method => request.request_method,
|
152
|
+
:version => request.env['HTTP_VERSION'] =~ /^HTTP\/(.*)$/ ? $1.to_f : request.env['HTTP_VERSION'],
|
153
|
+
:headers => headers,
|
154
|
+
:params => params
|
155
|
+
}
|
156
|
+
end
|
157
|
+
|
158
|
+
# Yields each member of an input array to the block and collects the
|
159
|
+
# result. If obj is not an array, the value is simply yielded to the
|
160
|
+
# block.
|
161
|
+
def each_member(obj)
|
162
|
+
if obj.kind_of?(Array)
|
163
|
+
obj.collect {|value| yield(value) }
|
164
|
+
else
|
165
|
+
yield(obj)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
# Returns the first member of arrays length <= 1, or the array in all
|
170
|
+
# other cases. Splat is useful to simplify hashes of http headers
|
171
|
+
# and parameters that may have multiple values, but typically only
|
172
|
+
# have one.
|
173
|
+
#
|
174
|
+
# splat([]) # => nil
|
175
|
+
# splat([:one]) # => :one
|
176
|
+
# splat([:one, :two]) # => [:one, :two]
|
177
|
+
#
|
178
|
+
def splat(array)
|
179
|
+
return array unless array.kind_of?(Array)
|
180
|
+
|
181
|
+
case array.length
|
182
|
+
when 0 then nil
|
183
|
+
when 1 then array.first
|
184
|
+
else array
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Headerizes an underscored string. The input is be converted to
|
189
|
+
# a string using to_s.
|
190
|
+
#
|
191
|
+
# headerize('SOME_STRING') # => 'Some-String'
|
192
|
+
# headerize('some string') # => 'Some-String'
|
193
|
+
# headerize('Some-String') # => 'Some-String'
|
194
|
+
#
|
195
|
+
def headerize(str)
|
196
|
+
str.to_s.gsub(/\s|-/, "_").split("_").collect do |s|
|
197
|
+
s =~ /^(.)(.*)/
|
198
|
+
$1.upcase + $2.downcase
|
199
|
+
end.join("-")
|
200
|
+
end
|
201
|
+
|
202
|
+
EOL = Rack::Utils::Multipart::EOL
|
203
|
+
# Lifted from Rack::Utils::Multipart, and modified to collect
|
204
|
+
# overloaded params and params with names suffixed by '[]' as
|
205
|
+
# arrays.
|
206
|
+
def parse_multipart(env)
|
207
|
+
unless env['CONTENT_TYPE'] =~
|
208
|
+
%r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n
|
209
|
+
nil
|
210
|
+
else
|
211
|
+
boundary = "--#{$1}"
|
212
|
+
|
213
|
+
params = {}
|
214
|
+
buf = ""
|
215
|
+
content_length = env['CONTENT_LENGTH'].to_i
|
216
|
+
input = env['rack.input']
|
217
|
+
|
218
|
+
boundary_size = boundary.size + EOL.size
|
219
|
+
bufsize = 16384
|
220
|
+
|
221
|
+
content_length -= boundary_size
|
222
|
+
|
223
|
+
status = input.read(boundary_size)
|
224
|
+
raise EOFError, "bad content body" unless status == boundary + EOL
|
225
|
+
|
226
|
+
rx = /(?:#{EOL})?#{Regexp.quote boundary}(#{EOL}|--)/
|
227
|
+
|
228
|
+
loop {
|
229
|
+
head = nil
|
230
|
+
body = ''
|
231
|
+
filename = content_type = name = nil
|
232
|
+
|
233
|
+
until head && buf =~ rx
|
234
|
+
if !head && i = buf.index("\r\n\r\n")
|
235
|
+
head = buf.slice!(0, i+2) # First \r\n
|
236
|
+
buf.slice!(0, 2) # Second \r\n
|
237
|
+
|
238
|
+
filename = head[/Content-Disposition:.* filename="?([^\";]*)"?/ni, 1]
|
239
|
+
content_type = head[/Content-Type: (.*)\r\n/ni, 1]
|
240
|
+
name = head[/Content-Disposition:.* name="?([^\";]*)"?/ni, 1]
|
241
|
+
|
242
|
+
if filename
|
243
|
+
body = Tempfile.new("RackMultipart")
|
244
|
+
body.binmode if body.respond_to?(:binmode)
|
245
|
+
end
|
246
|
+
|
247
|
+
next
|
248
|
+
end
|
249
|
+
|
250
|
+
# Save the read body part.
|
251
|
+
if head && (boundary_size+4 < buf.size)
|
252
|
+
body << buf.slice!(0, buf.size - (boundary_size+4))
|
253
|
+
end
|
254
|
+
|
255
|
+
c = input.read(bufsize < content_length ? bufsize : content_length)
|
256
|
+
raise EOFError, "bad content body" if c.nil? || c.empty?
|
257
|
+
buf << c
|
258
|
+
content_length -= c.size
|
259
|
+
end
|
260
|
+
|
261
|
+
# Save the rest.
|
262
|
+
if i = buf.index(rx)
|
263
|
+
body << buf.slice!(0, i)
|
264
|
+
buf.slice!(0, boundary_size+2)
|
265
|
+
|
266
|
+
content_length = -1 if $1 == "--"
|
267
|
+
end
|
268
|
+
|
269
|
+
if filename
|
270
|
+
body.rewind
|
271
|
+
data = {:filename => filename, :type => content_type,
|
272
|
+
:name => name, :tempfile => body, :head => head}
|
273
|
+
else
|
274
|
+
data = body
|
275
|
+
end
|
276
|
+
|
277
|
+
if name
|
278
|
+
case current = params[name]
|
279
|
+
when nil
|
280
|
+
params[name] = (name =~ /\[\]\z/ ? [data] : data)
|
281
|
+
when Array
|
282
|
+
params[name] << data
|
283
|
+
else
|
284
|
+
params[name] = [current, data]
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
break if buf.empty? || content_length == -1
|
289
|
+
}
|
290
|
+
|
291
|
+
params
|
292
|
+
end
|
293
|
+
end
|
294
|
+
end
|
295
|
+
end
|
296
|
+
end
|
data/tap.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
gems: tap-tasks
|
@@ -0,0 +1,31 @@
|
|
1
|
+
<h1>Parse HTTP Parameters</h1>
|
2
|
+
|
3
|
+
<p>Enter an HTTP request, like the ones you can capture using the
|
4
|
+
<a href='https://addons.mozilla.org/en-US/firefox/addon/3829'>LiveHTTPHeaders</a> addon for
|
5
|
+
<a href='http://www.mozilla.com/en-US/firefox/'>Firefox</a>.
|
6
|
+
</p>
|
7
|
+
|
8
|
+
<form action='<%= uri(:http) %>' method='post'>
|
9
|
+
<textarea rows='20' cols='60' name='http'></textarea>
|
10
|
+
<br/>
|
11
|
+
<input type='checkbox' name='keep_content' value='true' checked='true'> Keep File Content
|
12
|
+
<input type='submit' value='Parse'>
|
13
|
+
</form>
|
14
|
+
|
15
|
+
<p>Note the request must be properly formated. For example:</p>
|
16
|
+
|
17
|
+
<pre>
|
18
|
+
GET / HTTP/1.1
|
19
|
+
Host: tap.rubyforge.org
|
20
|
+
User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12
|
21
|
+
Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5
|
22
|
+
Accept-Language: en-us,en;q=0.5
|
23
|
+
Accept-Encoding: gzip,deflate
|
24
|
+
Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
25
|
+
Keep-Alive: 300
|
26
|
+
Connection: keep-alive
|
27
|
+
</pre>
|
28
|
+
|
29
|
+
<p>Proper formatting for POST requires an empty line between the headers and body.
|
30
|
+
Without it you get a WEBrick::HTTPStatus::BadRequest error.</p>
|
31
|
+
|