http_parser 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,22 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
22
+ *.rbc
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Graham Batty
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,72 @@
1
+ = http_parser
2
+
3
+ This gem provides a (hopefully) high quality http parser library that can
4
+ build request information iteratively as data comes over the line without
5
+ requiring the caller to maintain the entire body of the request as a single
6
+ string in memory.
7
+
8
+ It will also have a full set of specs and a Ruby-native reference library
9
+ so that it can be used in implementations or environments that do not
10
+ support C extensions.
11
+
12
+ Simple usage example:
13
+
14
+ p = Http::Parser.new
15
+ p.parse("GET / HTTP/1.1\r\n")
16
+ p.parse("Host: blah.com\r\n")
17
+ p.parse("Cookie: blorp=blah\r\n")
18
+ p.parse("\r\n")
19
+
20
+ p.method => "GET"
21
+ p.version => [1,1]
22
+ p.path => "/"
23
+ p.headers["HOST"] => "blah.com"
24
+ p.headers["COOKIE"] => "blorp=blah"
25
+
26
+ If the request is a type that has a body, the body will be available
27
+ as a stream object via p.body:
28
+
29
+ p = Http::Parser.new
30
+ p.parse("PUT / HTTP/1.1\r\n")
31
+ p.parse("Host: blah.com\r\n")
32
+ p.parse("Content-Type: text/text\r\n")
33
+ p.parse("Content-Length: 5\r\n")
34
+ p.parse("\r\n")
35
+ p.parse("stuff")
36
+
37
+ p.body.read => "stuff"
38
+
39
+ If you use p.parse!, any trailing text that isn't immediately parseable
40
+ will be left in the string object you pass in while what was parsed will be
41
+ removed. This allows for you to hand the parser a large glob of data and allow
42
+ it to figure out what it needs and what it doesn't. When you get more data, you
43
+ can append it to your existing string and pass that in again until the request
44
+ is done. You can test if the request is done by using p.done?
45
+
46
+ p = Http::Parser.new
47
+ s = "GET / HTTP/1.1\r\nHost:"
48
+ p.parse!(s)
49
+ s => "Host:"
50
+ p.done? => false
51
+ s << " blah.com\r\n"
52
+ p.parse!(s)
53
+ s => ""
54
+ p.done? => false
55
+ s << "\r\n"
56
+ p.parse!(s)
57
+ s => ""
58
+ p.done? => true
59
+
60
+ == Note on Patches/Pull Requests
61
+
62
+ * Fork the project.
63
+ * Make your feature addition or bug fix.
64
+ * Add tests for it. This is important so I don't break it in a
65
+ future version unintentionally.
66
+ * Commit, do not mess with rakefile, version, or history.
67
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
68
+ * Send me a pull request. Bonus points for topic branches.
69
+
70
+ == Copyright
71
+
72
+ Copyright (c) 2010 Graham Batty. See LICENSE for details.
@@ -0,0 +1,48 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "http_parser"
8
+ gem.summary = %Q{HTTP Parser Library}
9
+ gem.description = %Q{This gem provides a (hopefully) high quality http parser library that can
10
+ build request information iteratively as data comes over the line without
11
+ requiring the caller to maintain the entire body of the request as a single
12
+ string in memory.}
13
+ gem.email = "graham@stormbrew.ca"
14
+ gem.homepage = "http://github.com/stormbrew/http_parser"
15
+ gem.authors = ["Graham Batty"]
16
+ gem.add_development_dependency "rspec", ">= 1.2.9"
17
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
+ end
19
+ Jeweler::GemcutterTasks.new
20
+ rescue LoadError
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
22
+ end
23
+
24
+ require 'spec/rake/spectask'
25
+ Spec::Rake::SpecTask.new(:spec) do |spec|
26
+ spec.libs << 'lib' << 'spec'
27
+ spec.spec_files = FileList['spec/**/*_spec.rb']
28
+ end
29
+
30
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
31
+ spec.libs << 'lib' << 'spec'
32
+ spec.pattern = 'spec/**/*_spec.rb'
33
+ spec.rcov = true
34
+ end
35
+
36
+ task :spec => :check_dependencies
37
+
38
+ task :default => :spec
39
+
40
+ require 'rake/rdoctask'
41
+ Rake::RDocTask.new do |rdoc|
42
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
43
+
44
+ rdoc.rdoc_dir = 'rdoc'
45
+ rdoc.title = "http_parser #{version}"
46
+ rdoc.rdoc_files.include('README*')
47
+ rdoc.rdoc_files.include('lib/**/*.rb')
48
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,5 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+
4
+ require 'rubygems'
5
+ require 'benchmark'
@@ -0,0 +1,31 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/bench_helper')
2
+
3
+ require 'http/parser'
4
+
5
+ request_body = <<REQ
6
+ GET /blakjsdfkas HTTP/1.1\r
7
+ Host: blooperblorp\r
8
+ Cookie: blah=woop\r
9
+ \r
10
+ REQ
11
+
12
+ #File.read(File.expand_path(File.dirname(__FILE__) + '/sample_request.http'))
13
+
14
+ Benchmark.bmbm(20) do |bm|
15
+ bm.report("Http::NativeParser") do
16
+ 0.upto(100000) do
17
+ Http::NativeParser.new.parse(request_body)
18
+ end
19
+ end
20
+ begin
21
+ require 'http11'
22
+ bm.report("Mongrel::HttpParser") do
23
+ 0.upto(100000) do
24
+ Mongrel::HttpParser.new.execute({}, request_body.dup, 0)
25
+ end
26
+ end
27
+ rescue LoadError
28
+ puts("Can't benchmark Mongrel::HttpParser as it couldn't be loaded.")
29
+ end
30
+ end
31
+
@@ -0,0 +1,307 @@
1
+ require 'stringio'
2
+ require 'tempfile'
3
+ require 'strscan'
4
+
5
+ module Http
6
+ # This is a native ruby implementation of the http parser. It is also
7
+ # the reference implementation for this library. Later there will be one
8
+ # written in C for performance reasons, and it will have to pass the same
9
+ # specs as this one.
10
+ class NativeParser
11
+ # The HTTP method string used. Will always be a string and all-capsed.
12
+ # Valid values are: "GET", "HEAD", "POST", "PUT", "DELETE".
13
+ # Other values will cause an exception since then we don't know
14
+ # whether the request has a body.
15
+ attr_reader :method
16
+
17
+ # The path given by the client as a string. No processing is done on
18
+ # this and nearly anything is considered valid.
19
+ attr_reader :path
20
+
21
+ # The HTTP version of the request as an array of two integers.
22
+ # [1,0] and [1,1] are the most likely values currently.
23
+ attr_reader :version
24
+
25
+ # A hash of headers passed to the server with the request. All
26
+ # headers will be normalized to ALLCAPS_WITH_UNDERSCORES for
27
+ # consistency's sake.
28
+ attr_reader :headers
29
+
30
+ # The body of the request as a stream object. May be either
31
+ # a StringIO or a TempFile, depending on request length.
32
+ attr_reader :body
33
+
34
+ # The default set of parse options for the request.
35
+ DefaultOptions = {
36
+ # maximum length of an individual header line.
37
+ :max_header_length => 10240,
38
+ # maximum number of headers that can be passed to the server
39
+ :max_headers => 100,
40
+ # the size of the request body before it will be spilled
41
+ # to a tempfile instead of being stored in memory.
42
+ :min_tempfile_size => 1048576,
43
+ # the class to use to create and manage the temporary file.
44
+ # Must conform to the same interface as the stdlib Tempfile class
45
+ :tempfile_class => Tempfile,
46
+ }
47
+
48
+ # Constants for method information
49
+ MethodInfo = Struct.new(:must_have_body, :can_have_body)
50
+ Methods = {
51
+ "OPTIONS" => MethodInfo[false, true],
52
+ "GET" => MethodInfo[false, false],
53
+ "HEAD" => MethodInfo[false, false],
54
+ "POST" => MethodInfo[true, true],
55
+ "PUT" => MethodInfo[true, true],
56
+ "DELETE" => MethodInfo[false, false],
57
+ "TRACE" => MethodInfo[false, false],
58
+ "CONNECT" => MethodInfo[false, false],
59
+ }
60
+
61
+ # Regex used to match the Request-Line
62
+ RequestLineMatch = %r{^([a-zA-Z]+) (.+) HTTP/([0-9]+)\.([0-9]+)\r?\n}
63
+ # Regex used to match a header line. Lines suspected of
64
+ # being headers are also checked against the HeaderContinueMatch
65
+ # to deal with multiline headers
66
+ HeaderLineMatch = %r{^([a-zA-Z-]+):[ \t]*([[:print:]]+)\r?\n}
67
+ HeaderContinueMatch = %r{^[ \t]+([[:print:]]+)\r?\n}
68
+ EmptyLineMatch = %r{^\r?\n}
69
+
70
+ # Regex used to match a size specification for a chunked segment
71
+ ChunkSizeLineMatch = %r{^[0-9]+\r?\n}
72
+
73
+ # Used as a fallback in error detection for a malformed request line or header.
74
+ AnyLineMatch = %r{^.+?\r?\n}
75
+
76
+ def initialize(options = DefaultOptions)
77
+ @method = nil
78
+ @path = nil
79
+ @version = nil
80
+ @headers = {}
81
+ @body = nil
82
+ @state = :request_line
83
+ @options = DefaultOptions.merge(options)
84
+ end
85
+
86
+ # Returns true if the http method being parsed (if
87
+ # known at this point in the parse) must have a body.
88
+ # If the method hasn't been determined yet, returns false.
89
+ def must_have_body?
90
+ Methods[@method].must_have_body
91
+ end
92
+
93
+ # Returns true if the http method being parsed (if
94
+ # known at this point in the parse) can have a body.
95
+ # If the method hasn't been determined yet, returns false.
96
+ def can_have_body?
97
+ Methods[@method].can_have_body
98
+ end
99
+
100
+ # Returns true if the request has a body.
101
+ def has_body?
102
+ @body
103
+ end
104
+
105
+ # Takes a string and runs it through the parser. Note that
106
+ # it does not consume anything it can't completely parse, so
107
+ # you should always pass complete request chunks (lines or body data)
108
+ # to this method. It's mostly for testing and convenience.
109
+ # In practical use, you want to use parse!, which will remove parsed
110
+ # data from the string you pass in.
111
+ def parse(str)
112
+ parse!(str.dup)
113
+ end
114
+
115
+ def parse_request_line(scanner)
116
+ if (scanner.scan(RequestLineMatch))
117
+ @method = scanner[1]
118
+ @path = scanner[2]
119
+ @version = [scanner[3].to_i, scanner[4].to_i]
120
+
121
+ @state = :headers
122
+
123
+ if (!Methods[@method])
124
+ raise Http::ParserError::NotImplemented
125
+ end
126
+ elsif (scanner.scan(EmptyLineMatch))
127
+ # ignore an empty line before a request line.
128
+ elsif (scanner.scan(AnyLineMatch))
129
+ raise Http::ParserError::BadRequest
130
+ end
131
+ end
132
+ private :parse_request_line
133
+
134
+ def parse_headers(scanner)
135
+ if (scanner.scan(HeaderLineMatch))
136
+ header = normalize_header(scanner[1])
137
+ if (@headers[header])
138
+ @headers[header] << "," << scanner[2]
139
+ else
140
+ @headers[header] = scanner[2]
141
+ end
142
+ @last_header = header
143
+ elsif (@last_header && scanner.scan(HeaderContinueMatch))
144
+ @headers[@last_header] << " " << scanner[1]
145
+ elsif (scanner.scan(EmptyLineMatch))
146
+ req_has_body = @headers["CONTENT_LENGTH"] || @headers["TRANSFER_ENCODING"]
147
+ if (req_has_body)
148
+ if (@headers["TRANSFER_ENCODING"] && @headers["TRANSFER_ENCODING"] != 'identity')
149
+ @state = :body_chunked
150
+ @body_length = 0 # this will get updated as we go.
151
+ @body_read = 0
152
+ @chunk_remain = nil
153
+ elsif (@headers["CONTENT_LENGTH"])
154
+ @body_length = @headers["CONTENT_LENGTH"].to_i
155
+ @body_read = 0
156
+ if (@body_length > 0)
157
+ @state = :body_identity
158
+ else
159
+ @state = :done
160
+ end
161
+ end
162
+
163
+ if (can_have_body?)
164
+ if (@body_length >= @options[:min_tempfile_size])
165
+ @body = @options[:tempfile_class].new("http_parser")
166
+ @body.unlink # unlink immediately so we don't rely on the caller to do it.
167
+ else
168
+ @body = StringIO.new
169
+ end
170
+ else
171
+ @body = nil
172
+ end
173
+ else
174
+ if (must_have_body?)
175
+ # we assume it has a body and the client just didn't tell us
176
+ # how big it was. This is more useful than BadRequest.
177
+ raise ParserError::LengthRequired
178
+ else
179
+ @state = :done
180
+ end
181
+ end
182
+ elsif (scanner.scan(AnyLineMatch))
183
+ raise Http::ParserError::BadRequest
184
+ end
185
+ end
186
+ private :parse_headers
187
+
188
+ def parse_body_identity(scanner)
189
+ remain = @body_length - @body_read
190
+ addition = scanner.string[scanner.pos, remain]
191
+ scanner.pos += addition.length
192
+ @body_read += addition.length
193
+
194
+ @body << addition if @body
195
+
196
+ if (@body_read >= @body_length)
197
+ @body.rewind if (@body)
198
+ @state = :done
199
+ end
200
+ end
201
+ private :parse_body_identity
202
+
203
+ def parse_body_chunked(scanner)
204
+ if (@chunk_remain)
205
+ if (@chunk_remain > 0)
206
+ addition = scanner.string[scanner.pos, @chunk_remain]
207
+ scanner.pos += addition.length
208
+ @chunk_remain -= addition.length
209
+ @body_length += addition.length
210
+
211
+ @body << addition if @body
212
+
213
+ if (@body.length >= @options[:min_tempfile_size] && @body.kind_of?(StringIO))
214
+ @body_str = @body.string
215
+ @body = @options[:tempfile_class].new("http_parser")
216
+ @body.unlink # unlink immediately so we don't rely on the caller to do it.
217
+ @body << @body_str
218
+ end
219
+ else
220
+ if (scanner.scan(EmptyLineMatch))
221
+ # the chunk is done.
222
+ @chunk_remain = nil
223
+ elsif (scanner.scan(AnyLineMatch))
224
+ # there was a line with stuff in it,
225
+ # which is invalid here.
226
+ raise ParserError::BadRequest
227
+ end
228
+ end
229
+ elsif (scanner.scan(ChunkSizeLineMatch))
230
+ @chunk_remain = scanner[0].to_i
231
+ if (@chunk_remain < 1)
232
+ @state = :body_chunked_tail
233
+ end
234
+ elsif (scanner.scan(AnyLineMatch))
235
+ raise ParserError::BadRequest
236
+ end
237
+ end
238
+ private :parse_body_chunked
239
+
240
+ def parse_body_chunked_tail(scanner)
241
+ # It's not actually clear if tail headers are even
242
+ # legal in a chunked request entity. The docs seem
243
+ # to indicate that they should only be sent if the other
244
+ # end is known to accept them, and there's no way to ensure
245
+ # that when the client is the originator. As such, we'll
246
+ # just ignore them for now. We'll do this by ignoring
247
+ # any line until we hit an empty line, which will be treated
248
+ # as the end of the entity.
249
+ if (scanner.scan(EmptyLineMatch))
250
+ @state = :done
251
+ @body.rewind
252
+ elsif (scanner.scan(AnyLineMatch))
253
+ # ignore the line.
254
+ end
255
+ end
256
+ private :parse_body_chunked_tail
257
+
258
+ def parse_done(scanner)
259
+ # do nothing, the parse is done.
260
+ end
261
+ private :parse_done
262
+
263
+ # Consumes as much of str as it can and then removes it from str. This
264
+ # allows you to iteratively pass data into the parser as it comes from
265
+ # the client.
266
+ def parse!(str)
267
+ scanner = StringScanner.new(str)
268
+ begin
269
+ while (!scanner.eos?)
270
+ start_pos = scanner.pos
271
+ send(:"parse_#{@state}", scanner)
272
+ if (scanner.pos == start_pos)
273
+ # if we didn't move forward, we've run out of useful string so throw it back.
274
+ return str
275
+ end
276
+ end
277
+ ensure
278
+ # clear out whatever we managed to scan.
279
+ str[0, scanner.pos] = ""
280
+ end
281
+ end
282
+
283
+ # Normalizes a header name to be UPPERCASE_WITH_UNDERSCORES
284
+ def normalize_header(str)
285
+ str.upcase.gsub('-', '_')
286
+ end
287
+ private :normalize_header
288
+
289
+ # Returns true if the request is completely done.
290
+ def done?
291
+ @state == :done
292
+ end
293
+
294
+ # Returns true if the request has parsed the request-line (GET / HTTP/1.1)
295
+ def done_request_line?
296
+ [:headers, :body_identity, :body_chunked, :body_chunked_tail, :done].include?(@state)
297
+ end
298
+ # Returns true if all the headers from the request have been consumed.
299
+ def done_headers?
300
+ [:body_identity, :body_chunked, :body_chunked_tail, :done].include?(@state)
301
+ end
302
+ # Returns true if the request's body has been consumed (really the same as done?)
303
+ def done_body?
304
+ done?
305
+ end
306
+ end
307
+ end
@@ -0,0 +1,32 @@
1
+ module Http
2
+ require 'http/native_parser'
3
+ begin
4
+ require 'http/fast_parser'
5
+ Parser = FastParser
6
+ rescue LoadError => e
7
+ Parser = NativeParser
8
+ end
9
+
10
+ # An exception class for HTTP parser errors. Includes
11
+ # an HTTP Error Code number that corresponds to the
12
+ # difficulty parsing (ie. 414 for Request-URI Too Long)
13
+ class ParserError < RuntimeError
14
+ # The error code that corresponds to the parsing error.
15
+ attr_reader :code
16
+ # Headers that should be sent back with the error reply as a hash.
17
+ attr_reader :headers
18
+
19
+ def initialize(string = "Bad Request", code = 400, headers = {})
20
+ super(string)
21
+ @code = code
22
+ @headers = headers
23
+ end
24
+
25
+ class BadRequest < ParserError; end
26
+ class RequestTimeout < ParserError; def initialize(); super("Request Timeout", 408); end; end
27
+ class LengthRequired < ParserError; def initialize(); super("Length Required", 411); end; end
28
+ class RequestEntityTooLarge < ParserError; def initialize(); super("Request Entity Too Large", 413); end; end
29
+ class RequestURITooLong < ParserError; def initialize(); super("Request-URI Too Long", 414); end; end
30
+ class NotImplemented < ParserError; def initialize(); super("Method Not Implemented", 501); end; end # Send Allow header
31
+ end
32
+ end
@@ -0,0 +1,428 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ require 'http/parser'
4
+
5
+ test_parsers = [Http::NativeParser]
6
+ test_parsers << Http::FastParser if Http.const_defined? :FastParser
7
+
8
+ describe Http::Parser do
9
+ it "should be a reference to Http::NativeParser, or if present Http::FastParser" do
10
+ Http.const_defined?(:Parser).should be_true
11
+ if (Http.const_defined?(:FastParser))
12
+ Http::Parser.should == Http::FastParser
13
+ else
14
+ Http::Parser.should == Http::NativeParser
15
+ end
16
+ end
17
+ end
18
+
19
+ test_parsers.each do |parser|
20
+ describe parser do
21
+ it "should be able to parse a simple GET request" do
22
+ p = parser.new
23
+ p.parse("GET / HTTP/1.1\r\n")
24
+ p.parse("Host: blah.com\r\n")
25
+ p.parse("Cookie: blorp=blah\r\n")
26
+ p.parse("\r\n")
27
+
28
+ p.done?.should be_true
29
+ p.method.should == "GET"
30
+ p.version.should == [1,1]
31
+ p.path.should == "/"
32
+ p.headers["HOST"].should == "blah.com"
33
+ p.headers["COOKIE"].should == "blorp=blah"
34
+ end
35
+
36
+ it "should raise an error on a malformed request line" do
37
+ p = parser.new
38
+ proc {
39
+ p.parse("GET / HTTx/balh.blorp\r\n")
40
+ }.should raise_error(Http::ParserError::BadRequest)
41
+ proc {
42
+ p.parse("GET HTTP/1.1\r\n")
43
+ }.should raise_error(Http::ParserError::BadRequest)
44
+ end
45
+
46
+ it "should raise an error on a malformed header line" do
47
+ p = parser.new
48
+ p.parse("GET / HTTP/1.1\r\n")
49
+ proc {
50
+ p.parse("Stuff\r\n")
51
+ }.should raise_error(Http::ParserError::BadRequest)
52
+ end
53
+
54
+ it "should be able to parse a request with a body defined by a Content-Length (ie. PUT)" do
55
+ p = parser.new
56
+ p.parse("PUT / HTTP/1.1\r\n")
57
+ p.parse("Host: blah.com\r\n")
58
+ p.parse("Content-Type: text/text\r\n")
59
+ p.parse("Content-Length: 5\r\n")
60
+ p.parse("\r\n")
61
+ p.parse("stuff")
62
+
63
+ p.body.read.should == "stuff"
64
+ end
65
+
66
+ it "should be able to parse two simple requests from the same string" do
67
+ req = <<REQ
68
+ GET /first HTTP/1.1\r
69
+ Host: blah.com\r
70
+ \r
71
+ GET /second HTTP/1.1\r
72
+ Host: blorp.com\r
73
+ \r
74
+ REQ
75
+ p = parser.new
76
+ p.parse!(req)
77
+ p.done?.should be_true
78
+ p.method.should == "GET"
79
+ p.version.should == [1,1]
80
+ p.path.should == "/first"
81
+ p.headers["HOST"].should == "blah.com"
82
+ p.has_body?.should be_false
83
+
84
+ p = parser.new
85
+ p.parse!(req)
86
+ p.done?.should be_true
87
+ p.method.should == "GET"
88
+ p.version.should == [1,1]
89
+ p.path.should == "/second"
90
+ p.headers["HOST"].should == "blorp.com"
91
+ p.has_body?.should be_false
92
+ end
93
+
94
+ it "should be able to parse two requests with length-prefixed entities from the same string" do
95
+ req = <<REQ
96
+ POST /first HTTP/1.1\r
97
+ Host: blah.com\r
98
+ Content-Length: 5\r
99
+ \r
100
+ test
101
+ POST /second HTTP/1.1\r
102
+ Host: blorp.com\r
103
+ Content-Length: 5\r
104
+ \r
105
+ haha
106
+ REQ
107
+ p = parser.new
108
+ p.parse!(req)
109
+ p.done?.should be_true
110
+ p.method.should == "POST"
111
+ p.version.should == [1,1]
112
+ p.path.should == "/first"
113
+ p.headers["HOST"].should == "blah.com"
114
+ p.body.read.should == "test\n"
115
+
116
+ p = parser.new
117
+ p.parse!(req)
118
+ p.done?.should be_true
119
+ p.method.should == "POST"
120
+ p.version.should == [1,1]
121
+ p.path.should == "/second"
122
+ p.headers["HOST"].should == "blorp.com"
123
+ p.body.read.should == "haha\n"
124
+ end
125
+
126
+ it "should be able to parse two requests with chunked entities from the same string" do
127
+ req = <<REQ
128
+ POST /first HTTP/1.1\r
129
+ Host: blah.com\r
130
+ Transfer-Encoding: chunked\r
131
+ \r
132
+ 5\r
133
+ test
134
+ \r
135
+ 0\r
136
+ \r
137
+ POST /second HTTP/1.1\r
138
+ Host: blorp.com\r
139
+ Transfer-Encoding: chunked\r
140
+ \r
141
+ 5\r
142
+ haha
143
+ \r
144
+ 0\r
145
+ \r
146
+ REQ
147
+ p = parser.new
148
+ p.parse!(req)
149
+ p.done?.should be_true
150
+ p.method.should == "POST"
151
+ p.version.should == [1,1]
152
+ p.path.should == "/first"
153
+ p.headers["HOST"].should == "blah.com"
154
+ p.body.read.should == "test\n"
155
+
156
+ p = parser.new
157
+ p.parse!(req)
158
+ p.done?.should be_true
159
+ p.method.should == "POST"
160
+ p.version.should == [1,1]
161
+ p.path.should == "/second"
162
+ p.headers["HOST"].should == "blorp.com"
163
+ p.body.read.should == "haha\n"
164
+ end
165
+
166
+ it "should be able to parse a request with a body defined by Transfer-Encoding: chunked" do
167
+ p = parser.new
168
+ p.parse(<<REQ)
169
+ POST / HTTP/1.1\r
170
+ Host: blah.com\r
171
+ Transfer-Encoding: chunked\r
172
+ \r
173
+ 10\r
174
+ stuffstuff\r
175
+ 0\r
176
+ \r
177
+ REQ
178
+ p.done?.should be_true
179
+ p.body.read.should == "stuffstuff"
180
+ end
181
+
182
+ it "should deal with a properly set 0 length body on a PUT/POST request" do
183
+ p = parser.new
184
+ p.parse <<REQ
185
+ PUT / HTTP/1.1\r
186
+ Host: blah.com\r
187
+ Content-Length: 0\r
188
+ \r
189
+ REQ
190
+ p.done?.should be_true
191
+ p.body.read.should == ""
192
+ end
193
+
194
+ it "should handle a body that's too long to store in memory with a Content-Length by putting it out to a tempfile." do
195
+ p = parser.new(:min_tempfile_size => 1024)
196
+ p.parse <<REQ
197
+ POST / HTTP/1.1\r
198
+ Host: blah.com\r
199
+ Content-Length: 2048\r
200
+ \r
201
+ REQ
202
+ p.parse("x"*2048)
203
+ p.done?.should be_true
204
+ p.body.should be_kind_of(Tempfile)
205
+ p.body.read.should == "x" * 2048
206
+ end
207
+
208
+ it "should handle a body that's too long to store in memory with a Transfer-Encoding of chunked by putting it out to a tempfile" do
209
+ p = parser.new(:min_tempfile_size => 1024)
210
+ p.parse <<REQ
211
+ POST / HTTP/1.1\r
212
+ Host: blah.com\r
213
+ Transfer-Encoding: chunked\r
214
+ \r
215
+ REQ
216
+ 1.upto(200) do
217
+ p.parse("10\r\n")
218
+ p.parse("x"*10 + "\r\n")
219
+ end
220
+ p.parse("0\r\n\r\n")
221
+ p.done?.should be_true
222
+ p.body.should be_kind_of(Tempfile)
223
+ p.body.read.should == "x" * 2000
224
+ end
225
+
226
+ it "Should be able to incrementally parse a request with arbitrarily placed string endings" do
227
+ p = parser.new
228
+ s = "GET / HTT"
229
+ p.parse!(s)
230
+ s.should == "GET / HTT"
231
+ p.done_request_line?.should be_false
232
+ p.done_headers?.should be_false
233
+ p.done?.should be_false
234
+
235
+ s << "P/1.1\r\nHost:"
236
+ p.parse!(s)
237
+ s.should == "Host:"
238
+ p.method.should == "GET"
239
+ p.path.should == "/"
240
+ p.version.should == [1,1]
241
+ p.done_request_line?.should be_true
242
+ p.done_headers?.should be_false
243
+ p.done?.should be_false
244
+
245
+ s << " blah.com\r\n"
246
+ p.parse!(s)
247
+ s.should == ""
248
+ p.headers["HOST"].should == "blah.com"
249
+ p.done_headers?.should be_false
250
+ p.done?.should be_false
251
+
252
+ s << "\r\n"
253
+ p.parse!(s)
254
+ s.should == ""
255
+ p.done_headers?.should be_true
256
+ p.done?.should be_true
257
+ end
258
+
259
+ describe "RFC2616 sec 3.1 (HTTP Version)" do
260
+ it "MUST accept arbitrary numbers for the version string" do
261
+ p = parser.new
262
+ p.parse("GET / HTTP/12.3445\r\n")
263
+
264
+ p.done_request_line?.should be_true
265
+ p.version.should == [12,3445]
266
+ end
267
+ end
268
+
269
+ describe "RFC2616 sec 4.1 (Message Type)" do
270
+ it "SHOULD ignore leading whitespace lines before a request-line" do
271
+ p = parser.new
272
+ p.parse("\r\n")
273
+ p.parse("GET / HTTP/1.1\r\n")
274
+
275
+ p.done_request_line?.should be_true
276
+ end
277
+ end
278
+
279
+ describe "RFC2616 sec 4.2 (Message Headers)" do
280
+ it "MUST ignore leading spaces on header values" do
281
+ p = parser.new
282
+ p.parse("GET / HTTP/1.1\r\n")
283
+ p.parse("Blah: wat?\r\n")
284
+ p.parse("\r\n")
285
+
286
+ p.done?.should be_true
287
+ p.headers["BLAH"].should == "wat?"
288
+ end
289
+
290
+ it "MUST be able to handle a header that spans more then one line" do
291
+ p = parser.new
292
+ p.parse("GET / HTTP/1.1\r\n")
293
+ p.parse("Blah: blorp\r\n")
294
+ p.parse(" woop\r\n")
295
+ p.parse("\r\n")
296
+
297
+ p.done?.should be_true
298
+ p.headers["BLAH"].should == "blorp woop"
299
+ end
300
+
301
+ it "MUST ignore any amount of leading whitespace on multiline headers" do
302
+ p = parser.new
303
+ p.parse("GET / HTTP/1.1\r\n")
304
+ p.parse("Blah: blorp\r\n")
305
+ p.parse(" \t woop\r\n")
306
+ p.parse("\r\n")
307
+
308
+ p.done?.should be_true
309
+ p.headers["BLAH"].should == "blorp woop"
310
+ end
311
+
312
+ it "MUST be able to merge multiple headers into one comma separated header with order preserved" do
313
+ p = parser.new
314
+ p.parse("GET / HTTP/1.1\r\n")
315
+ p.parse("Blah: blorp\r\n")
316
+ p.parse("Blah: woop\r\n")
317
+ p.parse("Woop: bloop\r\n")
318
+ p.parse("Woop: noop\r\n")
319
+ p.parse("\r\n")
320
+
321
+ p.done?.should be_true
322
+ p.headers["BLAH"].should == "blorp,woop"
323
+ p.headers["WOOP"].should == "bloop,noop"
324
+ end
325
+ end
326
+
327
+ describe "RFC2616 sec 4.3 (Message Body)" do
328
+ ["GET","DELETE","HEAD","TRACE","CONNECT"].each do |method|
329
+ it "MUST NOT require a body on #{method} requests" do
330
+ p = parser.new
331
+ p.parse("#{method} / HTTP/1.1\r\n")
332
+ p.parse("Host: blah.com\r\n")
333
+ p.parse("\r\n")
334
+
335
+ p.done?.should be_true
336
+ p.body.should be_nil
337
+ end
338
+
339
+ it "SHOULD accept (but ignore) a message body on #{method} requests" do
340
+ p = parser.new
341
+ req = <<REQ
342
+ #{method} / HTTP/1.1\r
343
+ Content-Length: 6\r
344
+ \r
345
+ stuff
346
+ REQ
347
+ p.parse!(req)
348
+
349
+ p.done?.should be_true
350
+ p.headers["CONTENT_LENGTH"].should == "6"
351
+ p.body.should be_nil
352
+
353
+ req.should == ""
354
+ end
355
+ end
356
+
357
+ ["POST","PUT"].each do |method|
358
+ it "MUST accept a body on #{method} requests" do
359
+ p = parser.new
360
+ p.parse("#{method} / HTTP/1.1\r\n")
361
+ p.parse("Content-Length: 5\r\n")
362
+ p.parse("\r\n")
363
+ p.parse("stuff")
364
+
365
+ p.done?.should be_true
366
+ p.body.should_not be_nil
367
+ p.body.read.should == "stuff"
368
+ end
369
+
370
+ it "MUST require a body on #{method} requests" do
371
+ p = parser.new
372
+ proc {
373
+ p.parse("#{method} / HTTP/1.1\r\n")
374
+ p.parse("Host: blah.com\r\n")
375
+ p.parse("\r\n")
376
+ }.should raise_error(Http::ParserError::LengthRequired)
377
+ end
378
+ end
379
+
380
+ it "SHOULD accept and allow a body on OPTIONS requests" do
381
+ p = parser.new
382
+ p.parse("OPTIONS / HTTP/1.1\r\n")
383
+ p.parse("Content-Length: 5\r\n")
384
+ p.parse("\r\n")
385
+ p.parse("stuff")
386
+
387
+ p.done?.should be_true
388
+ p.body.should_not be_nil
389
+ p.body.read.should == "stuff"
390
+ end
391
+
392
+ it "MUST accept an OPTIONS request with no body" do
393
+ p = parser.new
394
+ p.parse("OPTIONS / HTTP/1.1\r\n")
395
+ p.parse("\r\n")
396
+
397
+ p.done?.should be_true
398
+ p.body.should be_nil
399
+ end
400
+
401
+ it "MUST choose chunked-encoding length over content-length header" do
402
+ p = parser.new
403
+ p.parse(<<REQ)
404
+ POST / HTTP/1.1\r
405
+ Content-Length: 5
406
+ Transfer-Encoding: chunked
407
+
408
+ 10
409
+ stuffstuff
410
+ 0
411
+
412
+ REQ
413
+ p.done?.should be_true
414
+ p.body.should_not be_nil
415
+ p.body.read.should == "stuffstuff"
416
+ end
417
+ end
418
+
419
+ describe "RFC2616 sec 5.1" do
420
+ it "SHOULD raise a 501 error if given an unrecognized method" do
421
+ p = parser.new
422
+ proc {
423
+ p.parse("OOGABOOGAH / HTTP/1.1\r\n")
424
+ }.should raise_error(Http::ParserError::NotImplemented)
425
+ end
426
+ end
427
+ end
428
+ end
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,8 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'spec'
4
+ require 'spec/autorun'
5
+
6
+ Spec::Runner.configure do |config|
7
+
8
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: http_parser
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Graham Batty
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-03-07 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 2
30
+ - 9
31
+ version: 1.2.9
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: |-
35
+ This gem provides a (hopefully) high quality http parser library that can
36
+ build request information iteratively as data comes over the line without
37
+ requiring the caller to maintain the entire body of the request as a single
38
+ string in memory.
39
+ email: graham@stormbrew.ca
40
+ executables: []
41
+
42
+ extensions: []
43
+
44
+ extra_rdoc_files:
45
+ - LICENSE
46
+ - README.rdoc
47
+ files:
48
+ - .document
49
+ - .gitignore
50
+ - LICENSE
51
+ - README.rdoc
52
+ - Rakefile
53
+ - VERSION
54
+ - bench/bench_helper.rb
55
+ - bench/http_parser_bench.rb
56
+ - lib/http/native_parser.rb
57
+ - lib/http/parser.rb
58
+ - spec/http_parser_spec.rb
59
+ - spec/spec.opts
60
+ - spec/spec_helper.rb
61
+ has_rdoc: true
62
+ homepage: http://github.com/stormbrew/http_parser
63
+ licenses: []
64
+
65
+ post_install_message:
66
+ rdoc_options:
67
+ - --charset=UTF-8
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.3.6
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: HTTP Parser Library
91
+ test_files:
92
+ - spec/http_parser_spec.rb
93
+ - spec/spec_helper.rb