http_parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,22 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
22
+ *.rbc
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Graham Batty
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,72 @@
1
+ = http_parser
2
+
3
+ This gem provides a (hopefully) high quality http parser library that can
4
+ build request information iteratively as data comes over the line without
5
+ requiring the caller to maintain the entire body of the request as a single
6
+ string in memory.
7
+
8
+ It will also have a full set of specs and a Ruby-native reference library
9
+ so that it can be used in implementations or environments that do not
10
+ support C extensions.
11
+
12
+ Simple usage example:
13
+
14
+ p = Http::Parser.new
15
+ p.parse("GET / HTTP/1.1\r\n")
16
+ p.parse("Host: blah.com\r\n")
17
+ p.parse("Cookie: blorp=blah\r\n")
18
+ p.parse("\r\n")
19
+
20
+ p.method => "GET"
21
+ p.version => [1,1]
22
+ p.path => "/"
23
+ p.headers["HOST"] => "blah.com"
24
+ p.headers["COOKIE"] => "blorp=blah"
25
+
26
+ If the request is a type that has a body, the body will be available
27
+ as a stream object via p.body:
28
+
29
+ p = Http::Parser.new
30
+ p.parse("PUT / HTTP/1.1\r\n")
31
+ p.parse("Host: blah.com\r\n")
32
+ p.parse("Content-Type: text/text\r\n")
33
+ p.parse("Content-Length: 5\r\n")
34
+ p.parse("\r\n")
35
+ p.parse("stuff")
36
+
37
+ p.body.read => "stuff"
38
+
39
+ If you use p.parse!, any trailing text that isn't immediately parseable
40
+ will be left in the string object you pass in while what was parsed will be
41
+ removed. This allows for you to hand the parser a large glob of data and allow
42
+ it to figure out what it needs and what it doesn't. When you get more data, you
43
+ can append it to your existing string and pass that in again until the request
44
+ is done. You can test if the request is done by using p.done?
45
+
46
+ p = Http::Parser.new
47
+ s = "GET / HTTP/1.1\r\nHost:"
48
+ p.parse!(s)
49
+ s => "Host:"
50
+ p.done? => false
51
+ s << " blah.com\r\n"
52
+ p.parse!(s)
53
+ s => ""
54
+ p.done? => false
55
+ s << "\r\n"
56
+ p.parse!(s)
57
+ s => ""
58
+ p.done? => true
59
+
60
+ == Note on Patches/Pull Requests
61
+
62
+ * Fork the project.
63
+ * Make your feature addition or bug fix.
64
+ * Add tests for it. This is important so I don't break it in a
65
+ future version unintentionally.
66
+ * Commit, do not mess with rakefile, version, or history.
67
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
68
+ * Send me a pull request. Bonus points for topic branches.
69
+
70
+ == Copyright
71
+
72
+ Copyright (c) 2010 Graham Batty. See LICENSE for details.
@@ -0,0 +1,48 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "http_parser"
8
+ gem.summary = %Q{HTTP Parser Library}
9
+ gem.description = %Q{This gem provides a (hopefully) high quality http parser library that can
10
+ build request information iteratively as data comes over the line without
11
+ requiring the caller to maintain the entire body of the request as a single
12
+ string in memory.}
13
+ gem.email = "graham@stormbrew.ca"
14
+ gem.homepage = "http://github.com/stormbrew/http_parser"
15
+ gem.authors = ["Graham Batty"]
16
+ gem.add_development_dependency "rspec", ">= 1.2.9"
17
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
+ end
19
+ Jeweler::GemcutterTasks.new
20
+ rescue LoadError
21
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
22
+ end
23
+
24
+ require 'spec/rake/spectask'
25
+ Spec::Rake::SpecTask.new(:spec) do |spec|
26
+ spec.libs << 'lib' << 'spec'
27
+ spec.spec_files = FileList['spec/**/*_spec.rb']
28
+ end
29
+
30
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
31
+ spec.libs << 'lib' << 'spec'
32
+ spec.pattern = 'spec/**/*_spec.rb'
33
+ spec.rcov = true
34
+ end
35
+
36
+ task :spec => :check_dependencies
37
+
38
+ task :default => :spec
39
+
40
+ require 'rake/rdoctask'
41
+ Rake::RDocTask.new do |rdoc|
42
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
43
+
44
+ rdoc.rdoc_dir = 'rdoc'
45
+ rdoc.title = "http_parser #{version}"
46
+ rdoc.rdoc_files.include('README*')
47
+ rdoc.rdoc_files.include('lib/**/*.rb')
48
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,5 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+
4
+ require 'rubygems'
5
+ require 'benchmark'
@@ -0,0 +1,31 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/bench_helper')
2
+
3
+ require 'http/parser'
4
+
5
+ request_body = <<REQ
6
+ GET /blakjsdfkas HTTP/1.1\r
7
+ Host: blooperblorp\r
8
+ Cookie: blah=woop\r
9
+ \r
10
+ REQ
11
+
12
+ #File.read(File.expand_path(File.dirname(__FILE__) + '/sample_request.http'))
13
+
14
+ Benchmark.bmbm(20) do |bm|
15
+ bm.report("Http::NativeParser") do
16
+ 0.upto(100000) do
17
+ Http::NativeParser.new.parse(request_body)
18
+ end
19
+ end
20
+ begin
21
+ require 'http11'
22
+ bm.report("Mongrel::HttpParser") do
23
+ 0.upto(100000) do
24
+ Mongrel::HttpParser.new.execute({}, request_body.dup, 0)
25
+ end
26
+ end
27
+ rescue LoadError
28
+ puts("Can't benchmark Mongrel::HttpParser as it couldn't be loaded.")
29
+ end
30
+ end
31
+
@@ -0,0 +1,307 @@
1
+ require 'stringio'
2
+ require 'tempfile'
3
+ require 'strscan'
4
+
5
+ module Http
6
+ # This is a native ruby implementation of the http parser. It is also
7
+ # the reference implementation for this library. Later there will be one
8
+ # written in C for performance reasons, and it will have to pass the same
9
+ # specs as this one.
10
+ class NativeParser
11
+ # The HTTP method string used. Will always be a string and all-capsed.
12
+ # Valid values are: "GET", "HEAD", "POST", "PUT", "DELETE".
13
+ # Other values will cause an exception since then we don't know
14
+ # whether the request has a body.
15
+ attr_reader :method
16
+
17
+ # The path given by the client as a string. No processing is done on
18
+ # this and nearly anything is considered valid.
19
+ attr_reader :path
20
+
21
+ # The HTTP version of the request as an array of two integers.
22
+ # [1,0] and [1,1] are the most likely values currently.
23
+ attr_reader :version
24
+
25
+ # A hash of headers passed to the server with the request. All
26
+ # headers will be normalized to ALLCAPS_WITH_UNDERSCORES for
27
+ # consistency's sake.
28
+ attr_reader :headers
29
+
30
+ # The body of the request as a stream object. May be either
31
+ # a StringIO or a TempFile, depending on request length.
32
+ attr_reader :body
33
+
34
+ # The default set of parse options for the request.
35
+ DefaultOptions = {
36
+ # maximum length of an individual header line.
37
+ :max_header_length => 10240,
38
+ # maximum number of headers that can be passed to the server
39
+ :max_headers => 100,
40
+ # the size of the request body before it will be spilled
41
+ # to a tempfile instead of being stored in memory.
42
+ :min_tempfile_size => 1048576,
43
+ # the class to use to create and manage the temporary file.
44
+ # Must conform to the same interface as the stdlib Tempfile class
45
+ :tempfile_class => Tempfile,
46
+ }
47
+
48
+ # Constants for method information
49
+ MethodInfo = Struct.new(:must_have_body, :can_have_body)
50
+ Methods = {
51
+ "OPTIONS" => MethodInfo[false, true],
52
+ "GET" => MethodInfo[false, false],
53
+ "HEAD" => MethodInfo[false, false],
54
+ "POST" => MethodInfo[true, true],
55
+ "PUT" => MethodInfo[true, true],
56
+ "DELETE" => MethodInfo[false, false],
57
+ "TRACE" => MethodInfo[false, false],
58
+ "CONNECT" => MethodInfo[false, false],
59
+ }
60
+
61
+ # Regex used to match the Request-Line
62
+ RequestLineMatch = %r{^([a-zA-Z]+) (.+) HTTP/([0-9]+)\.([0-9]+)\r?\n}
63
+ # Regex used to match a header line. Lines suspected of
64
+ # being headers are also checked against the HeaderContinueMatch
65
+ # to deal with multiline headers
66
+ HeaderLineMatch = %r{^([a-zA-Z-]+):[ \t]*([[:print:]]+)\r?\n}
67
+ HeaderContinueMatch = %r{^[ \t]+([[:print:]]+)\r?\n}
68
+ EmptyLineMatch = %r{^\r?\n}
69
+
70
+ # Regex used to match a size specification for a chunked segment
71
+ ChunkSizeLineMatch = %r{^[0-9]+\r?\n}
72
+
73
+ # Used as a fallback in error detection for a malformed request line or header.
74
+ AnyLineMatch = %r{^.+?\r?\n}
75
+
76
+ def initialize(options = DefaultOptions)
77
+ @method = nil
78
+ @path = nil
79
+ @version = nil
80
+ @headers = {}
81
+ @body = nil
82
+ @state = :request_line
83
+ @options = DefaultOptions.merge(options)
84
+ end
85
+
86
+ # Returns true if the http method being parsed (if
87
+ # known at this point in the parse) must have a body.
88
+ # If the method hasn't been determined yet, returns false.
89
+ def must_have_body?
90
+ Methods[@method].must_have_body
91
+ end
92
+
93
+ # Returns true if the http method being parsed (if
94
+ # known at this point in the parse) can have a body.
95
+ # If the method hasn't been determined yet, returns false.
96
+ def can_have_body?
97
+ Methods[@method].can_have_body
98
+ end
99
+
100
+ # Returns true if the request has a body.
101
+ def has_body?
102
+ @body
103
+ end
104
+
105
+ # Takes a string and runs it through the parser. Note that
106
+ # it does not consume anything it can't completely parse, so
107
+ # you should always pass complete request chunks (lines or body data)
108
+ # to this method. It's mostly for testing and convenience.
109
+ # In practical use, you want to use parse!, which will remove parsed
110
+ # data from the string you pass in.
111
+ def parse(str)
112
+ parse!(str.dup)
113
+ end
114
+
115
+ def parse_request_line(scanner)
116
+ if (scanner.scan(RequestLineMatch))
117
+ @method = scanner[1]
118
+ @path = scanner[2]
119
+ @version = [scanner[3].to_i, scanner[4].to_i]
120
+
121
+ @state = :headers
122
+
123
+ if (!Methods[@method])
124
+ raise Http::ParserError::NotImplemented
125
+ end
126
+ elsif (scanner.scan(EmptyLineMatch))
127
+ # ignore an empty line before a request line.
128
+ elsif (scanner.scan(AnyLineMatch))
129
+ raise Http::ParserError::BadRequest
130
+ end
131
+ end
132
+ private :parse_request_line
133
+
134
+ def parse_headers(scanner)
135
+ if (scanner.scan(HeaderLineMatch))
136
+ header = normalize_header(scanner[1])
137
+ if (@headers[header])
138
+ @headers[header] << "," << scanner[2]
139
+ else
140
+ @headers[header] = scanner[2]
141
+ end
142
+ @last_header = header
143
+ elsif (@last_header && scanner.scan(HeaderContinueMatch))
144
+ @headers[@last_header] << " " << scanner[1]
145
+ elsif (scanner.scan(EmptyLineMatch))
146
+ req_has_body = @headers["CONTENT_LENGTH"] || @headers["TRANSFER_ENCODING"]
147
+ if (req_has_body)
148
+ if (@headers["TRANSFER_ENCODING"] && @headers["TRANSFER_ENCODING"] != 'identity')
149
+ @state = :body_chunked
150
+ @body_length = 0 # this will get updated as we go.
151
+ @body_read = 0
152
+ @chunk_remain = nil
153
+ elsif (@headers["CONTENT_LENGTH"])
154
+ @body_length = @headers["CONTENT_LENGTH"].to_i
155
+ @body_read = 0
156
+ if (@body_length > 0)
157
+ @state = :body_identity
158
+ else
159
+ @state = :done
160
+ end
161
+ end
162
+
163
+ if (can_have_body?)
164
+ if (@body_length >= @options[:min_tempfile_size])
165
+ @body = @options[:tempfile_class].new("http_parser")
166
+ @body.unlink # unlink immediately so we don't rely on the caller to do it.
167
+ else
168
+ @body = StringIO.new
169
+ end
170
+ else
171
+ @body = nil
172
+ end
173
+ else
174
+ if (must_have_body?)
175
+ # we assume it has a body and the client just didn't tell us
176
+ # how big it was. This is more useful than BadRequest.
177
+ raise ParserError::LengthRequired
178
+ else
179
+ @state = :done
180
+ end
181
+ end
182
+ elsif (scanner.scan(AnyLineMatch))
183
+ raise Http::ParserError::BadRequest
184
+ end
185
+ end
186
+ private :parse_headers
187
+
188
+ def parse_body_identity(scanner)
189
+ remain = @body_length - @body_read
190
+ addition = scanner.string[scanner.pos, remain]
191
+ scanner.pos += addition.length
192
+ @body_read += addition.length
193
+
194
+ @body << addition if @body
195
+
196
+ if (@body_read >= @body_length)
197
+ @body.rewind if (@body)
198
+ @state = :done
199
+ end
200
+ end
201
+ private :parse_body_identity
202
+
203
+ def parse_body_chunked(scanner)
204
+ if (@chunk_remain)
205
+ if (@chunk_remain > 0)
206
+ addition = scanner.string[scanner.pos, @chunk_remain]
207
+ scanner.pos += addition.length
208
+ @chunk_remain -= addition.length
209
+ @body_length += addition.length
210
+
211
+ @body << addition if @body
212
+
213
+ if (@body.length >= @options[:min_tempfile_size] && @body.kind_of?(StringIO))
214
+ @body_str = @body.string
215
+ @body = @options[:tempfile_class].new("http_parser")
216
+ @body.unlink # unlink immediately so we don't rely on the caller to do it.
217
+ @body << @body_str
218
+ end
219
+ else
220
+ if (scanner.scan(EmptyLineMatch))
221
+ # the chunk is done.
222
+ @chunk_remain = nil
223
+ elsif (scanner.scan(AnyLineMatch))
224
+ # there was a line with stuff in it,
225
+ # which is invalid here.
226
+ raise ParserError::BadRequest
227
+ end
228
+ end
229
+ elsif (scanner.scan(ChunkSizeLineMatch))
230
+ @chunk_remain = scanner[0].to_i
231
+ if (@chunk_remain < 1)
232
+ @state = :body_chunked_tail
233
+ end
234
+ elsif (scanner.scan(AnyLineMatch))
235
+ raise ParserError::BadRequest
236
+ end
237
+ end
238
+ private :parse_body_chunked
239
+
240
+ def parse_body_chunked_tail(scanner)
241
+ # It's not actually clear if tail headers are even
242
+ # legal in a chunked request entity. The docs seem
243
+ # to indicate that they should only be sent if the other
244
+ # end is known to accept them, and there's no way to ensure
245
+ # that when the client is the originator. As such, we'll
246
+ # just ignore them for now. We'll do this by ignoring
247
+ # any line until we hit an empty line, which will be treated
248
+ # as the end of the entity.
249
+ if (scanner.scan(EmptyLineMatch))
250
+ @state = :done
251
+ @body.rewind
252
+ elsif (scanner.scan(AnyLineMatch))
253
+ # ignore the line.
254
+ end
255
+ end
256
+ private :parse_body_chunked_tail
257
+
258
+ def parse_done(scanner)
259
+ # do nothing, the parse is done.
260
+ end
261
+ private :parse_done
262
+
263
+ # Consumes as much of str as it can and then removes it from str. This
264
+ # allows you to iteratively pass data into the parser as it comes from
265
+ # the client.
266
+ def parse!(str)
267
+ scanner = StringScanner.new(str)
268
+ begin
269
+ while (!scanner.eos?)
270
+ start_pos = scanner.pos
271
+ send(:"parse_#{@state}", scanner)
272
+ if (scanner.pos == start_pos)
273
+ # if we didn't move forward, we've run out of useful string so throw it back.
274
+ return str
275
+ end
276
+ end
277
+ ensure
278
+ # clear out whatever we managed to scan.
279
+ str[0, scanner.pos] = ""
280
+ end
281
+ end
282
+
283
+ # Normalizes a header name to be UPPERCASE_WITH_UNDERSCORES
284
+ def normalize_header(str)
285
+ str.upcase.gsub('-', '_')
286
+ end
287
+ private :normalize_header
288
+
289
+ # Returns true if the request is completely done.
290
+ def done?
291
+ @state == :done
292
+ end
293
+
294
+ # Returns true if the request has parsed the request-line (GET / HTTP/1.1)
295
+ def done_request_line?
296
+ [:headers, :body_identity, :body_chunked, :body_chunked_tail, :done].include?(@state)
297
+ end
298
+ # Returns true if all the headers from the request have been consumed.
299
+ def done_headers?
300
+ [:body_identity, :body_chunked, :body_chunked_tail, :done].include?(@state)
301
+ end
302
+ # Returns true if the request's body has been consumed (really the same as done?)
303
+ def done_body?
304
+ done?
305
+ end
306
+ end
307
+ end
@@ -0,0 +1,32 @@
1
+ module Http
2
+ require 'http/native_parser'
3
+ begin
4
+ require 'http/fast_parser'
5
+ Parser = FastParser
6
+ rescue LoadError => e
7
+ Parser = NativeParser
8
+ end
9
+
10
+ # An exception class for HTTP parser errors. Includes
11
+ # an HTTP Error Code number that corresponds to the
12
+ # difficulty parsing (ie. 414 for Request-URI Too Long)
13
+ class ParserError < RuntimeError
14
+ # The error code that corresponds to the parsing error.
15
+ attr_reader :code
16
+ # Headers that should be sent back with the error reply as a hash.
17
+ attr_reader :headers
18
+
19
+ def initialize(string = "Bad Request", code = 400, headers = {})
20
+ super(string)
21
+ @code = code
22
+ @headers = headers
23
+ end
24
+
25
+ class BadRequest < ParserError; end
26
+ class RequestTimeout < ParserError; def initialize(); super("Request Timeout", 408); end; end
27
+ class LengthRequired < ParserError; def initialize(); super("Length Required", 411); end; end
28
+ class RequestEntityTooLarge < ParserError; def initialize(); super("Request Entity Too Large", 413); end; end
29
+ class RequestURITooLong < ParserError; def initialize(); super("Request-URI Too Long", 414); end; end
30
+ class NotImplemented < ParserError; def initialize(); super("Method Not Implemented", 501); end; end # Send Allow header
31
+ end
32
+ end
@@ -0,0 +1,428 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ require 'http/parser'
4
+
5
+ test_parsers = [Http::NativeParser]
6
+ test_parsers << Http::FastParser if Http.const_defined? :FastParser
7
+
8
+ describe Http::Parser do
9
+ it "should be a reference to Http::NativeParser, or if present Http::FastParser" do
10
+ Http.const_defined?(:Parser).should be_true
11
+ if (Http.const_defined?(:FastParser))
12
+ Http::Parser.should == Http::FastParser
13
+ else
14
+ Http::Parser.should == Http::NativeParser
15
+ end
16
+ end
17
+ end
18
+
19
+ test_parsers.each do |parser|
20
+ describe parser do
21
+ it "should be able to parse a simple GET request" do
22
+ p = parser.new
23
+ p.parse("GET / HTTP/1.1\r\n")
24
+ p.parse("Host: blah.com\r\n")
25
+ p.parse("Cookie: blorp=blah\r\n")
26
+ p.parse("\r\n")
27
+
28
+ p.done?.should be_true
29
+ p.method.should == "GET"
30
+ p.version.should == [1,1]
31
+ p.path.should == "/"
32
+ p.headers["HOST"].should == "blah.com"
33
+ p.headers["COOKIE"].should == "blorp=blah"
34
+ end
35
+
36
+ it "should raise an error on a malformed request line" do
37
+ p = parser.new
38
+ proc {
39
+ p.parse("GET / HTTx/balh.blorp\r\n")
40
+ }.should raise_error(Http::ParserError::BadRequest)
41
+ proc {
42
+ p.parse("GET HTTP/1.1\r\n")
43
+ }.should raise_error(Http::ParserError::BadRequest)
44
+ end
45
+
46
+ it "should raise an error on a malformed header line" do
47
+ p = parser.new
48
+ p.parse("GET / HTTP/1.1\r\n")
49
+ proc {
50
+ p.parse("Stuff\r\n")
51
+ }.should raise_error(Http::ParserError::BadRequest)
52
+ end
53
+
54
+ it "should be able to parse a request with a body defined by a Content-Length (ie. PUT)" do
55
+ p = parser.new
56
+ p.parse("PUT / HTTP/1.1\r\n")
57
+ p.parse("Host: blah.com\r\n")
58
+ p.parse("Content-Type: text/text\r\n")
59
+ p.parse("Content-Length: 5\r\n")
60
+ p.parse("\r\n")
61
+ p.parse("stuff")
62
+
63
+ p.body.read.should == "stuff"
64
+ end
65
+
66
+ it "should be able to parse two simple requests from the same string" do
67
+ req = <<REQ
68
+ GET /first HTTP/1.1\r
69
+ Host: blah.com\r
70
+ \r
71
+ GET /second HTTP/1.1\r
72
+ Host: blorp.com\r
73
+ \r
74
+ REQ
75
+ p = parser.new
76
+ p.parse!(req)
77
+ p.done?.should be_true
78
+ p.method.should == "GET"
79
+ p.version.should == [1,1]
80
+ p.path.should == "/first"
81
+ p.headers["HOST"].should == "blah.com"
82
+ p.has_body?.should be_false
83
+
84
+ p = parser.new
85
+ p.parse!(req)
86
+ p.done?.should be_true
87
+ p.method.should == "GET"
88
+ p.version.should == [1,1]
89
+ p.path.should == "/second"
90
+ p.headers["HOST"].should == "blorp.com"
91
+ p.has_body?.should be_false
92
+ end
93
+
94
+ it "should be able to parse two requests with length-prefixed entities from the same string" do
95
+ req = <<REQ
96
+ POST /first HTTP/1.1\r
97
+ Host: blah.com\r
98
+ Content-Length: 5\r
99
+ \r
100
+ test
101
+ POST /second HTTP/1.1\r
102
+ Host: blorp.com\r
103
+ Content-Length: 5\r
104
+ \r
105
+ haha
106
+ REQ
107
+ p = parser.new
108
+ p.parse!(req)
109
+ p.done?.should be_true
110
+ p.method.should == "POST"
111
+ p.version.should == [1,1]
112
+ p.path.should == "/first"
113
+ p.headers["HOST"].should == "blah.com"
114
+ p.body.read.should == "test\n"
115
+
116
+ p = parser.new
117
+ p.parse!(req)
118
+ p.done?.should be_true
119
+ p.method.should == "POST"
120
+ p.version.should == [1,1]
121
+ p.path.should == "/second"
122
+ p.headers["HOST"].should == "blorp.com"
123
+ p.body.read.should == "haha\n"
124
+ end
125
+
126
+ it "should be able to parse two requests with chunked entities from the same string" do
127
+ req = <<REQ
128
+ POST /first HTTP/1.1\r
129
+ Host: blah.com\r
130
+ Transfer-Encoding: chunked\r
131
+ \r
132
+ 5\r
133
+ test
134
+ \r
135
+ 0\r
136
+ \r
137
+ POST /second HTTP/1.1\r
138
+ Host: blorp.com\r
139
+ Transfer-Encoding: chunked\r
140
+ \r
141
+ 5\r
142
+ haha
143
+ \r
144
+ 0\r
145
+ \r
146
+ REQ
147
+ p = parser.new
148
+ p.parse!(req)
149
+ p.done?.should be_true
150
+ p.method.should == "POST"
151
+ p.version.should == [1,1]
152
+ p.path.should == "/first"
153
+ p.headers["HOST"].should == "blah.com"
154
+ p.body.read.should == "test\n"
155
+
156
+ p = parser.new
157
+ p.parse!(req)
158
+ p.done?.should be_true
159
+ p.method.should == "POST"
160
+ p.version.should == [1,1]
161
+ p.path.should == "/second"
162
+ p.headers["HOST"].should == "blorp.com"
163
+ p.body.read.should == "haha\n"
164
+ end
165
+
166
+ it "should be able to parse a request with a body defined by Transfer-Encoding: chunked" do
167
+ p = parser.new
168
+ p.parse(<<REQ)
169
+ POST / HTTP/1.1\r
170
+ Host: blah.com\r
171
+ Transfer-Encoding: chunked\r
172
+ \r
173
+ 10\r
174
+ stuffstuff\r
175
+ 0\r
176
+ \r
177
+ REQ
178
+ p.done?.should be_true
179
+ p.body.read.should == "stuffstuff"
180
+ end
181
+
182
+ it "should deal with a properly set 0 length body on a PUT/POST request" do
183
+ p = parser.new
184
+ p.parse <<REQ
185
+ PUT / HTTP/1.1\r
186
+ Host: blah.com\r
187
+ Content-Length: 0\r
188
+ \r
189
+ REQ
190
+ p.done?.should be_true
191
+ p.body.read.should == ""
192
+ end
193
+
194
+ it "should handle a body that's too long to store in memory with a Content-Length by putting it out to a tempfile." do
195
+ p = parser.new(:min_tempfile_size => 1024)
196
+ p.parse <<REQ
197
+ POST / HTTP/1.1\r
198
+ Host: blah.com\r
199
+ Content-Length: 2048\r
200
+ \r
201
+ REQ
202
+ p.parse("x"*2048)
203
+ p.done?.should be_true
204
+ p.body.should be_kind_of(Tempfile)
205
+ p.body.read.should == "x" * 2048
206
+ end
207
+
208
+ it "should handle a body that's too long to store in memory with a Transfer-Encoding of chunked by putting it out to a tempfile" do
209
+ p = parser.new(:min_tempfile_size => 1024)
210
+ p.parse <<REQ
211
+ POST / HTTP/1.1\r
212
+ Host: blah.com\r
213
+ Transfer-Encoding: chunked\r
214
+ \r
215
+ REQ
216
+ 1.upto(200) do
217
+ p.parse("10\r\n")
218
+ p.parse("x"*10 + "\r\n")
219
+ end
220
+ p.parse("0\r\n\r\n")
221
+ p.done?.should be_true
222
+ p.body.should be_kind_of(Tempfile)
223
+ p.body.read.should == "x" * 2000
224
+ end
225
+
226
+ it "Should be able to incrementally parse a request with arbitrarily placed string endings" do
227
+ p = parser.new
228
+ s = "GET / HTT"
229
+ p.parse!(s)
230
+ s.should == "GET / HTT"
231
+ p.done_request_line?.should be_false
232
+ p.done_headers?.should be_false
233
+ p.done?.should be_false
234
+
235
+ s << "P/1.1\r\nHost:"
236
+ p.parse!(s)
237
+ s.should == "Host:"
238
+ p.method.should == "GET"
239
+ p.path.should == "/"
240
+ p.version.should == [1,1]
241
+ p.done_request_line?.should be_true
242
+ p.done_headers?.should be_false
243
+ p.done?.should be_false
244
+
245
+ s << " blah.com\r\n"
246
+ p.parse!(s)
247
+ s.should == ""
248
+ p.headers["HOST"].should == "blah.com"
249
+ p.done_headers?.should be_false
250
+ p.done?.should be_false
251
+
252
+ s << "\r\n"
253
+ p.parse!(s)
254
+ s.should == ""
255
+ p.done_headers?.should be_true
256
+ p.done?.should be_true
257
+ end
258
+
259
+ describe "RFC2616 sec 3.1 (HTTP Version)" do
260
+ it "MUST accept arbitrary numbers for the version string" do
261
+ p = parser.new
262
+ p.parse("GET / HTTP/12.3445\r\n")
263
+
264
+ p.done_request_line?.should be_true
265
+ p.version.should == [12,3445]
266
+ end
267
+ end
268
+
269
+ describe "RFC2616 sec 4.1 (Message Type)" do
270
+ it "SHOULD ignore leading whitespace lines before a request-line" do
271
+ p = parser.new
272
+ p.parse("\r\n")
273
+ p.parse("GET / HTTP/1.1\r\n")
274
+
275
+ p.done_request_line?.should be_true
276
+ end
277
+ end
278
+
279
+ describe "RFC2616 sec 4.2 (Message Headers)" do
280
+ it "MUST ignore leading spaces on header values" do
281
+ p = parser.new
282
+ p.parse("GET / HTTP/1.1\r\n")
283
+ p.parse("Blah: wat?\r\n")
284
+ p.parse("\r\n")
285
+
286
+ p.done?.should be_true
287
+ p.headers["BLAH"].should == "wat?"
288
+ end
289
+
290
+ it "MUST be able to handle a header that spans more then one line" do
291
+ p = parser.new
292
+ p.parse("GET / HTTP/1.1\r\n")
293
+ p.parse("Blah: blorp\r\n")
294
+ p.parse(" woop\r\n")
295
+ p.parse("\r\n")
296
+
297
+ p.done?.should be_true
298
+ p.headers["BLAH"].should == "blorp woop"
299
+ end
300
+
301
+ it "MUST ignore any amount of leading whitespace on multiline headers" do
302
+ p = parser.new
303
+ p.parse("GET / HTTP/1.1\r\n")
304
+ p.parse("Blah: blorp\r\n")
305
+ p.parse(" \t woop\r\n")
306
+ p.parse("\r\n")
307
+
308
+ p.done?.should be_true
309
+ p.headers["BLAH"].should == "blorp woop"
310
+ end
311
+
312
+ it "MUST be able to merge multiple headers into one comma separated header with order preserved" do
313
+ p = parser.new
314
+ p.parse("GET / HTTP/1.1\r\n")
315
+ p.parse("Blah: blorp\r\n")
316
+ p.parse("Blah: woop\r\n")
317
+ p.parse("Woop: bloop\r\n")
318
+ p.parse("Woop: noop\r\n")
319
+ p.parse("\r\n")
320
+
321
+ p.done?.should be_true
322
+ p.headers["BLAH"].should == "blorp,woop"
323
+ p.headers["WOOP"].should == "bloop,noop"
324
+ end
325
+ end
326
+
327
+ describe "RFC2616 sec 4.3 (Message Body)" do
328
+ ["GET","DELETE","HEAD","TRACE","CONNECT"].each do |method|
329
+ it "MUST NOT require a body on #{method} requests" do
330
+ p = parser.new
331
+ p.parse("#{method} / HTTP/1.1\r\n")
332
+ p.parse("Host: blah.com\r\n")
333
+ p.parse("\r\n")
334
+
335
+ p.done?.should be_true
336
+ p.body.should be_nil
337
+ end
338
+
339
+ it "SHOULD accept (but ignore) a message body on #{method} requests" do
340
+ p = parser.new
341
+ req = <<REQ
342
+ #{method} / HTTP/1.1\r
343
+ Content-Length: 6\r
344
+ \r
345
+ stuff
346
+ REQ
347
+ p.parse!(req)
348
+
349
+ p.done?.should be_true
350
+ p.headers["CONTENT_LENGTH"].should == "6"
351
+ p.body.should be_nil
352
+
353
+ req.should == ""
354
+ end
355
+ end
356
+
357
+ ["POST","PUT"].each do |method|
358
+ it "MUST accept a body on #{method} requests" do
359
+ p = parser.new
360
+ p.parse("#{method} / HTTP/1.1\r\n")
361
+ p.parse("Content-Length: 5\r\n")
362
+ p.parse("\r\n")
363
+ p.parse("stuff")
364
+
365
+ p.done?.should be_true
366
+ p.body.should_not be_nil
367
+ p.body.read.should == "stuff"
368
+ end
369
+
370
+ it "MUST require a body on #{method} requests" do
371
+ p = parser.new
372
+ proc {
373
+ p.parse("#{method} / HTTP/1.1\r\n")
374
+ p.parse("Host: blah.com\r\n")
375
+ p.parse("\r\n")
376
+ }.should raise_error(Http::ParserError::LengthRequired)
377
+ end
378
+ end
379
+
380
+ it "SHOULD accept and allow a body on OPTIONS requests" do
381
+ p = parser.new
382
+ p.parse("OPTIONS / HTTP/1.1\r\n")
383
+ p.parse("Content-Length: 5\r\n")
384
+ p.parse("\r\n")
385
+ p.parse("stuff")
386
+
387
+ p.done?.should be_true
388
+ p.body.should_not be_nil
389
+ p.body.read.should == "stuff"
390
+ end
391
+
392
+ it "MUST accept an OPTIONS request with no body" do
393
+ p = parser.new
394
+ p.parse("OPTIONS / HTTP/1.1\r\n")
395
+ p.parse("\r\n")
396
+
397
+ p.done?.should be_true
398
+ p.body.should be_nil
399
+ end
400
+
401
+ it "MUST choose chunked-encoding length over content-length header" do
402
+ p = parser.new
403
+ p.parse(<<REQ)
404
+ POST / HTTP/1.1\r
405
+ Content-Length: 5
406
+ Transfer-Encoding: chunked
407
+
408
+ 10
409
+ stuffstuff
410
+ 0
411
+
412
+ REQ
413
+ p.done?.should be_true
414
+ p.body.should_not be_nil
415
+ p.body.read.should == "stuffstuff"
416
+ end
417
+ end
418
+
419
+ describe "RFC2616 sec 5.1" do
420
+ it "SHOULD raise a 501 error if given an unrecognized method" do
421
+ p = parser.new
422
+ proc {
423
+ p.parse("OOGABOOGAH / HTTP/1.1\r\n")
424
+ }.should raise_error(Http::ParserError::NotImplemented)
425
+ end
426
+ end
427
+ end
428
+ end
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,8 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'spec'
4
+ require 'spec/autorun'
5
+
6
+ Spec::Runner.configure do |config|
7
+
8
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: http_parser
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Graham Batty
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-03-07 00:00:00 -07:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: rspec
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 2
30
+ - 9
31
+ version: 1.2.9
32
+ type: :development
33
+ version_requirements: *id001
34
+ description: |-
35
+ This gem provides a (hopefully) high quality http parser library that can
36
+ build request information iteratively as data comes over the line without
37
+ requiring the caller to maintain the entire body of the request as a single
38
+ string in memory.
39
+ email: graham@stormbrew.ca
40
+ executables: []
41
+
42
+ extensions: []
43
+
44
+ extra_rdoc_files:
45
+ - LICENSE
46
+ - README.rdoc
47
+ files:
48
+ - .document
49
+ - .gitignore
50
+ - LICENSE
51
+ - README.rdoc
52
+ - Rakefile
53
+ - VERSION
54
+ - bench/bench_helper.rb
55
+ - bench/http_parser_bench.rb
56
+ - lib/http/native_parser.rb
57
+ - lib/http/parser.rb
58
+ - spec/http_parser_spec.rb
59
+ - spec/spec.opts
60
+ - spec/spec_helper.rb
61
+ has_rdoc: true
62
+ homepage: http://github.com/stormbrew/http_parser
63
+ licenses: []
64
+
65
+ post_install_message:
66
+ rdoc_options:
67
+ - --charset=UTF-8
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ segments:
82
+ - 0
83
+ version: "0"
84
+ requirements: []
85
+
86
+ rubyforge_project:
87
+ rubygems_version: 1.3.6
88
+ signing_key:
89
+ specification_version: 3
90
+ summary: HTTP Parser Library
91
+ test_files:
92
+ - spec/http_parser_spec.rb
93
+ - spec/spec_helper.rb