RubyGems - http_parser - Versions diffs - 0.1.0 - Mend

http_parser 0.1.0

Files changed (14) hide show

data/.document ADDED

@@ -0,0 +1,5 @@
+README.rdoc
+lib/**/*.rb
+bin/*
+features/**/*.feature
+LICENSE

data/.gitignore ADDED

@@ -0,0 +1,22 @@
+## MAC OS
+.DS_Store
+## TEXTMATE
+*.tmproj
+tmtags
+## EMACS
+*~
+\#*
+.\#*
+## VIM
+*.swp
+## PROJECT::GENERAL
+coverage
+rdoc
+pkg
+## PROJECT::SPECIFIC
+*.rbc

data/LICENSE ADDED

@@ -0,0 +1,20 @@
+Copyright (c) 2009 Graham Batty
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.rdoc ADDED

@@ -0,0 +1,72 @@
+= http_parser
+This gem provides a (hopefully) high quality http parser library that can
+build request information iteratively as data comes over the line without
+requiring the caller to maintain the entire body of the request as a single
+string in memory.
+It will also have a full set of specs and a Ruby-native reference library
+so that it can be used in implementations or environments that do not
+support C extensions.
+Simple usage example:
+	p = Http::Parser.new
+	p.parse("GET / HTTP/1.1\r\n")
+	p.parse("Host: blah.com\r\n")
+	p.parse("Cookie: blorp=blah\r\n")
+	p.parse("\r\n")
+	p.method => "GET"
+	p.version => [1,1]
+	p.path => "/"
+	p.headers["HOST"] => "blah.com"
+	p.headers["COOKIE"] => "blorp=blah"
+If the request is a type that has a body, the body will be available
+as a stream object via p.body:
+	p = Http::Parser.new
+	p.parse("PUT / HTTP/1.1\r\n")
+	p.parse("Host: blah.com\r\n")
+	p.parse("Content-Type: text/text\r\n")
+	p.parse("Content-Length: 5\r\n")
+	p.parse("\r\n")
+	p.parse("stuff")
+	p.body.read => "stuff"
+If you use p.parse!, any trailing text that isn't immediately parseable
+will be left in the string object you pass in while what was parsed will be
+removed. This allows for you to hand the parser a large glob of data and allow
+it to figure out what it needs and what it doesn't. When you get more data, you
+can append it to your existing string and pass that in again until the request
+is done. You can test if the request is done by using p.done?
+	p = Http::Parser.new
+	s = "GET / HTTP/1.1\r\nHost:"
+	p.parse!(s)
+	s => "Host:"
+	p.done? => false
+	s << " blah.com\r\n"
+	p.parse!(s)
+	s => ""
+	p.done? => false
+	s << "\r\n"
+	p.parse!(s)
+	s => ""
+	p.done? => true
+== Note on Patches/Pull Requests
+* Fork the project.
+* Make your feature addition or bug fix.
+* Add tests for it. This is important so I don't break it in a
+  future version unintentionally.
+* Commit, do not mess with rakefile, version, or history.
+  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
+* Send me a pull request. Bonus points for topic branches.
+== Copyright
+Copyright (c) 2010 Graham Batty. See LICENSE for details.

data/Rakefile ADDED

@@ -0,0 +1,48 @@
+require 'rubygems'
+require 'rake'
+begin
+  require 'jeweler'
+  Jeweler::Tasks.new do |gem|
+    gem.name = "http_parser"
+    gem.summary = %Q{HTTP Parser Library}
+    gem.description = %Q{This gem provides a (hopefully) high quality http parser library that can
+    build request information iteratively as data comes over the line without
+    requiring the caller to maintain the entire body of the request as a single
+    string in memory.}
+    gem.email = "graham@stormbrew.ca"
+    gem.homepage = "http://github.com/stormbrew/http_parser"
+    gem.authors = ["Graham Batty"]
+    gem.add_development_dependency "rspec", ">= 1.2.9"
+    # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
+  end
+  Jeweler::GemcutterTasks.new
+rescue LoadError
+  puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
+end
+require 'spec/rake/spectask'
+Spec::Rake::SpecTask.new(:spec) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.spec_files = FileList['spec/**/*_spec.rb']
+end
+Spec::Rake::SpecTask.new(:rcov) do |spec|
+  spec.libs << 'lib' << 'spec'
+  spec.pattern = 'spec/**/*_spec.rb'
+  spec.rcov = true
+end
+task :spec => :check_dependencies
+task :default => :spec
+require 'rake/rdoctask'
+Rake::RDocTask.new do |rdoc|
+  version = File.exist?('VERSION') ? File.read('VERSION') : ""
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title = "http_parser #{version}"
+  rdoc.rdoc_files.include('README*')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end

data/VERSION ADDED

	@@ -0,0 +1 @@
1	+ 0.1.0

data/bench/bench_helper.rb ADDED

@@ -0,0 +1,5 @@
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+require 'rubygems'
+require 'benchmark'

data/bench/http_parser_bench.rb ADDED

@@ -0,0 +1,31 @@
+require File.expand_path(File.dirname(__FILE__) + '/bench_helper')
+require 'http/parser'
+request_body = <<REQ
+GET /blakjsdfkas HTTP/1.1\r
+Host: blooperblorp\r
+Cookie: blah=woop\r
+\r
+REQ
+#File.read(File.expand_path(File.dirname(__FILE__) + '/sample_request.http'))
+Benchmark.bmbm(20) do |bm|
+  bm.report("Http::NativeParser") do
+    0.upto(100000) do
+      Http::NativeParser.new.parse(request_body)
+    end
+  end
+  begin
+    require 'http11'
+    bm.report("Mongrel::HttpParser") do
+      0.upto(100000) do
+        Mongrel::HttpParser.new.execute({}, request_body.dup, 0)
+      end
+    end
+  rescue LoadError
+    puts("Can't benchmark Mongrel::HttpParser as it couldn't be loaded.")
+  end
+end

data/lib/http/native_parser.rb ADDED

@@ -0,0 +1,307 @@
+require 'stringio'
+require 'tempfile'
+require 'strscan'
+module Http
+  # This is a native ruby implementation of the http parser. It is also
+  # the reference implementation for this library. Later there will be one
+  # written in C for performance reasons, and it will have to pass the same
+  # specs as this one.
+  class NativeParser
+    # The HTTP method string used. Will always be a string and all-capsed.
+    # Valid values are: "GET", "HEAD", "POST", "PUT", "DELETE".
+    # Other values will cause an exception since then we don't know
+    # whether the request has a body.
+    attr_reader :method
+    # The path given by the client as a string. No processing is done on
+    # this and nearly anything is considered valid.
+    attr_reader :path
+    # The HTTP version of the request as an array of two integers.
+    # [1,0] and [1,1] are the most likely values currently.
+    attr_reader :version
+    # A hash of headers passed to the server with the request. All
+    # headers will be normalized to ALLCAPS_WITH_UNDERSCORES for
+    # consistency's sake.
+    attr_reader :headers
+    # The body of the request as a stream object. May be either
+    # a StringIO or a TempFile, depending on request length.
+    attr_reader :body
+    # The default set of parse options for the request.
+    DefaultOptions = {
+      # maximum length of an individual header line.
+      :max_header_length => 10240,
+      # maximum number of headers that can be passed to the server
+      :max_headers => 100,
+      # the size of the request body before it will be spilled
+      # to a tempfile instead of being stored in memory.
+      :min_tempfile_size => 1048576,
+      # the class to use to create and manage the temporary file.
+      # Must conform to the same interface as the stdlib Tempfile class
+      :tempfile_class => Tempfile,
+    }
+    # Constants for method information
+    MethodInfo = Struct.new(:must_have_body, :can_have_body)
+    Methods = {
+      "OPTIONS" => MethodInfo[false, true],
+      "GET" => MethodInfo[false, false],
+      "HEAD" => MethodInfo[false, false],
+      "POST" => MethodInfo[true, true],
+      "PUT" => MethodInfo[true, true],
+      "DELETE" => MethodInfo[false, false],
+      "TRACE" => MethodInfo[false, false],
+      "CONNECT" => MethodInfo[false, false],
+    }
+    # Regex used to match the Request-Line
+    RequestLineMatch = %r{^([a-zA-Z]+) (.+) HTTP/([0-9]+)\.([0-9]+)\r?\n}
+    # Regex used to match a header line. Lines suspected of
+    # being headers are also checked against the HeaderContinueMatch
+    # to deal with multiline headers
+    HeaderLineMatch = %r{^([a-zA-Z-]+):[ \t]*([[:print:]]+)\r?\n}
+    HeaderContinueMatch = %r{^[ \t]+([[:print:]]+)\r?\n}
+    EmptyLineMatch = %r{^\r?\n}
+    # Regex used to match a size specification for a chunked segment
+    ChunkSizeLineMatch = %r{^[0-9]+\r?\n}
+    # Used as a fallback in error detection for a malformed request line or header.
+    AnyLineMatch = %r{^.+?\r?\n}
+    def initialize(options = DefaultOptions)
+      @method = nil
+      @path = nil
+      @version = nil
+      @headers = {}
+      @body = nil
+      @state = :request_line
+      @options = DefaultOptions.merge(options)
+    end
+    # Returns true if the http method being parsed (if
+    # known at this point in the parse) must have a body.
+    # If the method hasn't been determined yet, returns false.
+    def must_have_body?
+      Methods[@method].must_have_body
+    end
+    # Returns true if the http method being parsed (if
+    # known at this point in the parse) can have a body.
+    # If the method hasn't been determined yet, returns false.
+    def can_have_body?
+      Methods[@method].can_have_body
+    end
+    # Returns true if the request has a body.
+    def has_body?
+      @body
+    end
+    # Takes a string and runs it through the parser. Note that
+    # it does not consume anything it can't completely parse, so
+    # you should always pass complete request chunks (lines or body data)
+    # to this method. It's mostly for testing and convenience.
+    # In practical use, you want to use parse!, which will remove parsed
+    # data from the string you pass in.
+    def parse(str)
+      parse!(str.dup)
+    end
+    def parse_request_line(scanner)
+      if (scanner.scan(RequestLineMatch))
+        @method = scanner[1]
+        @path = scanner[2]
+        @version = [scanner[3].to_i, scanner[4].to_i]
+        @state = :headers
+        if (!Methods[@method])
+          raise Http::ParserError::NotImplemented
+        end
+      elsif (scanner.scan(EmptyLineMatch))
+        # ignore an empty line before a request line.
+      elsif (scanner.scan(AnyLineMatch))
+        raise Http::ParserError::BadRequest
+      end
+    end
+    private :parse_request_line
+    def parse_headers(scanner)
+      if (scanner.scan(HeaderLineMatch))
+        header = normalize_header(scanner[1])
+        if (@headers[header])
+          @headers[header] << "," << scanner[2]
+        else
+          @headers[header] = scanner[2]
+        end
+        @last_header = header
+      elsif (@last_header && scanner.scan(HeaderContinueMatch))
+        @headers[@last_header] << " " << scanner[1]
+      elsif (scanner.scan(EmptyLineMatch))
+        req_has_body = @headers["CONTENT_LENGTH"] || @headers["TRANSFER_ENCODING"]
+        if (req_has_body)
+          if (@headers["TRANSFER_ENCODING"] && @headers["TRANSFER_ENCODING"] != 'identity')
+            @state = :body_chunked
+            @body_length = 0 # this will get updated as we go.
+            @body_read = 0
+            @chunk_remain = nil
+          elsif (@headers["CONTENT_LENGTH"])
+            @body_length = @headers["CONTENT_LENGTH"].to_i
+            @body_read = 0
+            if (@body_length > 0)
+              @state = :body_identity
+            else
+              @state = :done
+            end
+          end
+          if (can_have_body?)
+            if (@body_length >= @options[:min_tempfile_size])
+              @body = @options[:tempfile_class].new("http_parser")
+              @body.unlink # unlink immediately so we don't rely on the caller to do it.
+            else
+              @body = StringIO.new
+            end
+          else
+            @body = nil
+          end
+        else
+          if (must_have_body?)
+            # we assume it has a body and the client just didn't tell us
+            # how big it was. This is more useful than BadRequest.
+            raise ParserError::LengthRequired
+          else
+            @state = :done
+          end
+        end
+      elsif (scanner.scan(AnyLineMatch))
+        raise Http::ParserError::BadRequest
+      end
+    end
+    private :parse_headers
+    def parse_body_identity(scanner)
+      remain = @body_length - @body_read
+      addition = scanner.string[scanner.pos, remain]
+      scanner.pos += addition.length
+      @body_read += addition.length
+      @body << addition if @body
+      if (@body_read >= @body_length)
+        @body.rewind if (@body)
+        @state = :done
+      end
+    end
+    private :parse_body_identity
+    def parse_body_chunked(scanner)
+      if (@chunk_remain)
+        if (@chunk_remain > 0)
+          addition = scanner.string[scanner.pos, @chunk_remain]
+          scanner.pos += addition.length
+          @chunk_remain -= addition.length
+          @body_length += addition.length
+          @body << addition if @body
+          if (@body.length >= @options[:min_tempfile_size] && @body.kind_of?(StringIO))
+            @body_str = @body.string
+            @body = @options[:tempfile_class].new("http_parser")
+            @body.unlink # unlink immediately so we don't rely on the caller to do it.
+            @body << @body_str
+          end
+        else
+          if (scanner.scan(EmptyLineMatch))
+            # the chunk is done.
+            @chunk_remain = nil
+          elsif (scanner.scan(AnyLineMatch))
+            # there was a line with stuff in it,
+            # which is invalid here.
+            raise ParserError::BadRequest
+          end
+        end
+      elsif (scanner.scan(ChunkSizeLineMatch))
+        @chunk_remain = scanner[0].to_i
+        if (@chunk_remain < 1)
+          @state = :body_chunked_tail
+        end
+      elsif (scanner.scan(AnyLineMatch))
+        raise ParserError::BadRequest
+      end
+    end
+    private :parse_body_chunked
+    def parse_body_chunked_tail(scanner)
+      # It's not actually clear if tail headers are even
+      # legal in a chunked request entity. The docs seem
+      # to indicate that they should only be sent if the other
+      # end is known to accept them, and there's no way to ensure
+      # that when the client is the originator. As such, we'll
+      # just ignore them for now. We'll do this by ignoring
+      # any line until we hit an empty line, which will be treated
+      # as the end of the entity.
+      if (scanner.scan(EmptyLineMatch))
+        @state = :done
+        @body.rewind
+      elsif (scanner.scan(AnyLineMatch))
+        # ignore the line.
+      end
+    end
+    private :parse_body_chunked_tail
+    def parse_done(scanner)
+      # do nothing, the parse is done.
+    end
+    private :parse_done
+    # Consumes as much of str as it can and then removes it from str. This
+    # allows you to iteratively pass data into the parser as it comes from
+    # the client.
+    def parse!(str)
+      scanner = StringScanner.new(str)
+      begin
+        while (!scanner.eos?)
+          start_pos = scanner.pos
+          send(:"parse_#{@state}", scanner)
+          if (scanner.pos == start_pos)
+            # if we didn't move forward, we've run out of useful string so throw it back.
+            return str
+          end
+        end
+      ensure
+        # clear out whatever we managed to scan.
+        str[0, scanner.pos] = ""
+      end
+    end
+    # Normalizes a header name to be UPPERCASE_WITH_UNDERSCORES
+    def normalize_header(str)
+      str.upcase.gsub('-', '_')
+    end
+    private :normalize_header
+    # Returns true if the request is completely done.
+    def done?
+      @state == :done
+    end
+    # Returns true if the request has parsed the request-line (GET / HTTP/1.1)
+    def done_request_line?
+      [:headers, :body_identity, :body_chunked, :body_chunked_tail, :done].include?(@state)
+    end
+    # Returns true if all the headers from the request have been consumed.
+    def done_headers?
+      [:body_identity, :body_chunked, :body_chunked_tail, :done].include?(@state)
+    end
+    # Returns true if the request's body has been consumed (really the same as done?)
+    def done_body?
+      done?
+    end
+  end
+end

data/lib/http/parser.rb ADDED

@@ -0,0 +1,32 @@
+module Http
+  require 'http/native_parser'
+  begin
+    require 'http/fast_parser'
+    Parser = FastParser
+  rescue LoadError => e
+    Parser = NativeParser
+  end
+  # An exception class for HTTP parser errors. Includes
+  # an HTTP Error Code number that corresponds to the
+  # difficulty parsing (ie. 414 for Request-URI Too Long)
+  class ParserError < RuntimeError
+    # The error code that corresponds to the parsing error.
+    attr_reader :code
+    # Headers that should be sent back with the error reply as a hash.
+    attr_reader :headers
+    def initialize(string = "Bad Request", code = 400, headers = {})
+      super(string)
+      @code = code
+      @headers = headers
+    end
+    class BadRequest < ParserError; end
+    class RequestTimeout < ParserError; def initialize(); super("Request Timeout", 408); end; end
+    class LengthRequired < ParserError; def initialize(); super("Length Required", 411); end; end
+    class RequestEntityTooLarge < ParserError; def initialize(); super("Request Entity Too Large", 413); end; end
+    class RequestURITooLong < ParserError; def initialize(); super("Request-URI Too Long", 414); end; end
+    class NotImplemented < ParserError; def initialize(); super("Method Not Implemented", 501); end; end # Send Allow header
+  end
+end

data/spec/http_parser_spec.rb ADDED

@@ -0,0 +1,428 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+require 'http/parser'
+test_parsers = [Http::NativeParser]
+test_parsers << Http::FastParser if Http.const_defined? :FastParser
+describe Http::Parser do
+  it "should be a reference to Http::NativeParser, or if present Http::FastParser" do
+    Http.const_defined?(:Parser).should be_true
+    if (Http.const_defined?(:FastParser))
+      Http::Parser.should == Http::FastParser
+    else
+      Http::Parser.should == Http::NativeParser
+    end
+  end
+end
+test_parsers.each do |parser|
+  describe parser do
+    it "should be able to parse a simple GET request" do
+      p = parser.new
+    	p.parse("GET / HTTP/1.1\r\n")
+    	p.parse("Host: blah.com\r\n")
+    	p.parse("Cookie: blorp=blah\r\n")
+    	p.parse("\r\n")
+      p.done?.should be_true
+    	p.method.should == "GET"
+    	p.version.should == [1,1]
+    	p.path.should == "/"
+    	p.headers["HOST"].should == "blah.com"
+    	p.headers["COOKIE"].should == "blorp=blah"
+  	end
+    it "should raise an error on a malformed request line" do
+      p = parser.new
+      proc {
+        p.parse("GET / HTTx/balh.blorp\r\n")
+      }.should raise_error(Http::ParserError::BadRequest)
+      proc {
+        p.parse("GET HTTP/1.1\r\n")
+      }.should raise_error(Http::ParserError::BadRequest)
+    end
+    it "should raise an error on a malformed header line" do
+      p = parser.new
+      p.parse("GET / HTTP/1.1\r\n")
+      proc {
+        p.parse("Stuff\r\n")
+      }.should raise_error(Http::ParserError::BadRequest)
+    end
+  	it "should be able to parse a request with a body defined by a Content-Length (ie. PUT)" do
+  	  p = parser.new
+    	p.parse("PUT / HTTP/1.1\r\n")
+    	p.parse("Host: blah.com\r\n")
+    	p.parse("Content-Type: text/text\r\n")
+    	p.parse("Content-Length: 5\r\n")
+    	p.parse("\r\n")
+    	p.parse("stuff")
+    	p.body.read.should == "stuff"
+  	end
+  	it "should be able to parse two simple requests from the same string" do
+  	  req = <<REQ
+GET /first HTTP/1.1\r
+Host: blah.com\r
+\r
+GET /second HTTP/1.1\r
+Host: blorp.com\r
+\r
+REQ
+      p = parser.new
+      p.parse!(req)
+      p.done?.should be_true
+      p.method.should == "GET"
+      p.version.should == [1,1]
+      p.path.should == "/first"
+      p.headers["HOST"].should == "blah.com"
+      p.has_body?.should be_false
+      p = parser.new
+      p.parse!(req)
+      p.done?.should be_true
+      p.method.should == "GET"
+      p.version.should == [1,1]
+      p.path.should == "/second"
+      p.headers["HOST"].should == "blorp.com"
+      p.has_body?.should be_false
+    end
+  	it "should be able to parse two requests with length-prefixed entities from the same string" do
+  	  req = <<REQ
+POST /first HTTP/1.1\r
+Host: blah.com\r
+Content-Length: 5\r
+\r
+test
+POST /second HTTP/1.1\r
+Host: blorp.com\r
+Content-Length: 5\r
+\r
+haha
+REQ
+      p = parser.new
+      p.parse!(req)
+      p.done?.should be_true
+      p.method.should == "POST"
+      p.version.should == [1,1]
+      p.path.should == "/first"
+      p.headers["HOST"].should == "blah.com"
+      p.body.read.should == "test\n"
+      p = parser.new
+      p.parse!(req)
+      p.done?.should be_true
+      p.method.should == "POST"
+      p.version.should == [1,1]
+      p.path.should == "/second"
+      p.headers["HOST"].should == "blorp.com"
+      p.body.read.should == "haha\n"
+    end
+  	it "should be able to parse two requests with chunked entities from the same string" do
+  	  req = <<REQ
+POST /first HTTP/1.1\r
+Host: blah.com\r
+Transfer-Encoding: chunked\r
+\r
+5\r
+test
+\r
+0\r
+\r
+POST /second HTTP/1.1\r
+Host: blorp.com\r
+Transfer-Encoding: chunked\r
+\r
+5\r
+haha
+\r
+0\r
+\r
+REQ
+      p = parser.new
+      p.parse!(req)
+      p.done?.should be_true
+      p.method.should == "POST"
+      p.version.should == [1,1]
+      p.path.should == "/first"
+      p.headers["HOST"].should == "blah.com"
+      p.body.read.should == "test\n"
+      p = parser.new
+      p.parse!(req)
+      p.done?.should be_true
+      p.method.should == "POST"
+      p.version.should == [1,1]
+      p.path.should == "/second"
+      p.headers["HOST"].should == "blorp.com"
+      p.body.read.should == "haha\n"
+    end
+  	it "should be able to parse a request with a body defined by Transfer-Encoding: chunked" do
+  	  p = parser.new
+  	  p.parse(<<REQ)
+POST / HTTP/1.1\r
+Host: blah.com\r
+Transfer-Encoding: chunked\r
+\r
+10\r
+stuffstuff\r
+0\r
+\r
+REQ
+      p.done?.should be_true
+      p.body.read.should == "stuffstuff"
+    end
+    it "should deal with a properly set 0 length body on a PUT/POST request" do
+      p = parser.new
+      p.parse <<REQ
+PUT / HTTP/1.1\r
+Host: blah.com\r
+Content-Length: 0\r
+\r
+REQ
+      p.done?.should be_true
+      p.body.read.should == ""
+    end
+    it "should handle a body that's too long to store in memory with a Content-Length by putting it out to a tempfile." do
+      p = parser.new(:min_tempfile_size => 1024)
+      p.parse <<REQ
+POST / HTTP/1.1\r
+Host: blah.com\r
+Content-Length: 2048\r
+\r
+REQ
+      p.parse("x"*2048)
+      p.done?.should be_true
+      p.body.should be_kind_of(Tempfile)
+      p.body.read.should == "x" * 2048
+    end
+    it "should handle a body that's too long to store in memory with a Transfer-Encoding of chunked by putting it out to a tempfile" do
+      p = parser.new(:min_tempfile_size => 1024)
+      p.parse <<REQ
+POST / HTTP/1.1\r
+Host: blah.com\r
+Transfer-Encoding: chunked\r
+\r
+REQ
+      1.upto(200) do
+        p.parse("10\r\n")
+        p.parse("x"*10 + "\r\n")
+      end
+      p.parse("0\r\n\r\n")
+      p.done?.should be_true
+      p.body.should be_kind_of(Tempfile)
+      p.body.read.should == "x" * 2000
+    end
+  	it "Should be able to incrementally parse a request with arbitrarily placed string endings" do
+  	  p = parser.new
+    	s = "GET / HTT"
+    	p.parse!(s)
+    	s.should == "GET / HTT"
+    	p.done_request_line?.should be_false
+    	p.done_headers?.should be_false
+    	p.done?.should be_false
+    	s << "P/1.1\r\nHost:"
+    	p.parse!(s)
+    	s.should == "Host:"
+    	p.method.should == "GET"
+    	p.path.should == "/"
+    	p.version.should == [1,1]
+    	p.done_request_line?.should be_true
+    	p.done_headers?.should be_false
+    	p.done?.should be_false
+    	s << " blah.com\r\n"
+    	p.parse!(s)
+    	s.should == ""
+    	p.headers["HOST"].should == "blah.com"
+    	p.done_headers?.should be_false
+    	p.done?.should be_false
+    	s << "\r\n"
+    	p.parse!(s)
+    	s.should == ""
+    	p.done_headers?.should be_true
+    	p.done?.should be_true
+  	end
+  	describe "RFC2616 sec 3.1 (HTTP Version)" do
+  	  it "MUST accept arbitrary numbers for the version string" do
+  	    p = parser.new
+  	    p.parse("GET / HTTP/12.3445\r\n")
+  	    p.done_request_line?.should be_true
+  	    p.version.should == [12,3445]
+	    end
+    end
+    describe "RFC2616 sec 4.1 (Message Type)" do
+      it "SHOULD ignore leading whitespace lines before a request-line" do
+        p = parser.new
+        p.parse("\r\n")
+        p.parse("GET / HTTP/1.1\r\n")
+        p.done_request_line?.should be_true
+      end
+    end
+  	describe "RFC2616 sec 4.2 (Message Headers)" do
+    	it "MUST ignore leading spaces on header values" do
+  	    p = parser.new
+  	    p.parse("GET / HTTP/1.1\r\n")
+  	    p.parse("Blah:    wat?\r\n")
+  	    p.parse("\r\n")
+  	    p.done?.should be_true
+  	    p.headers["BLAH"].should == "wat?"
+      end
+    	it "MUST be able to handle a header that spans more then one line" do
+    	  p = parser.new
+    	  p.parse("GET / HTTP/1.1\r\n")
+    	  p.parse("Blah: blorp\r\n")
+    	  p.parse(" woop\r\n")
+    	  p.parse("\r\n")
+    	  p.done?.should be_true
+    	  p.headers["BLAH"].should == "blorp woop"
+  	  end
+    	it "MUST ignore any amount of leading whitespace on multiline headers" do
+    	  p = parser.new
+    	  p.parse("GET / HTTP/1.1\r\n")
+    	  p.parse("Blah: blorp\r\n")
+    	  p.parse(" \t woop\r\n")
+    	  p.parse("\r\n")
+    	  p.done?.should be_true
+    	  p.headers["BLAH"].should == "blorp woop"
+  	  end
+  	  it "MUST be able to merge multiple headers into one comma separated header with order preserved" do
+  	    p = parser.new
+  	    p.parse("GET / HTTP/1.1\r\n")
+  	    p.parse("Blah: blorp\r\n")
+  	    p.parse("Blah: woop\r\n")
+  	    p.parse("Woop: bloop\r\n")
+  	    p.parse("Woop: noop\r\n")
+  	    p.parse("\r\n")
+  	    p.done?.should be_true
+  	    p.headers["BLAH"].should == "blorp,woop"
+  	    p.headers["WOOP"].should == "bloop,noop"
+	    end
+	  end
+	  describe "RFC2616 sec 4.3 (Message Body)" do
+      ["GET","DELETE","HEAD","TRACE","CONNECT"].each do |method|
+  	    it "MUST NOT require a body on #{method} requests" do
+  	      p = parser.new
+  	      p.parse("#{method} / HTTP/1.1\r\n")
+  	      p.parse("Host: blah.com\r\n")
+  	      p.parse("\r\n")
+  	      p.done?.should be_true
+  	      p.body.should be_nil
+	      end
+  	    it "SHOULD accept (but ignore) a message body on #{method} requests" do
+  	      p = parser.new
+  	      req = <<REQ
+#{method} / HTTP/1.1\r
+Content-Length: 6\r
+\r
+stuff
+REQ
+          p.parse!(req)
+          p.done?.should be_true
+  	      p.headers["CONTENT_LENGTH"].should == "6"
+  	      p.body.should be_nil
+  	      req.should == ""
+        end
+      end
+      ["POST","PUT"].each do |method|
+        it "MUST accept a body on #{method} requests" do
+          p = parser.new
+          p.parse("#{method} / HTTP/1.1\r\n")
+          p.parse("Content-Length: 5\r\n")
+          p.parse("\r\n")
+          p.parse("stuff")
+          p.done?.should be_true
+          p.body.should_not be_nil
+          p.body.read.should == "stuff"
+        end
+        it "MUST require a body on #{method} requests" do
+          p = parser.new
+          proc {
+            p.parse("#{method} / HTTP/1.1\r\n")
+            p.parse("Host: blah.com\r\n")
+            p.parse("\r\n")
+          }.should raise_error(Http::ParserError::LengthRequired)
+        end
+      end
+      it "SHOULD accept and allow a body on OPTIONS requests" do
+        p = parser.new
+        p.parse("OPTIONS / HTTP/1.1\r\n")
+        p.parse("Content-Length: 5\r\n")
+        p.parse("\r\n")
+        p.parse("stuff")
+        p.done?.should be_true
+        p.body.should_not be_nil
+        p.body.read.should == "stuff"
+      end
+      it "MUST accept an OPTIONS request with no body" do
+        p = parser.new
+        p.parse("OPTIONS / HTTP/1.1\r\n")
+        p.parse("\r\n")
+        p.done?.should be_true
+        p.body.should be_nil
+      end
+      it "MUST choose chunked-encoding length over content-length header" do
+        p = parser.new
+        p.parse(<<REQ)
+POST / HTTP/1.1\r
+Content-Length: 5
+Transfer-Encoding: chunked
+10
+stuffstuff
+0
+REQ
+        p.done?.should be_true
+        p.body.should_not be_nil
+        p.body.read.should == "stuffstuff"
+      end
+    end
+    describe "RFC2616 sec 5.1" do
+      it "SHOULD raise a 501 error if given an unrecognized method" do
+        p = parser.new
+        proc {
+          p.parse("OOGABOOGAH / HTTP/1.1\r\n")
+        }.should raise_error(Http::ParserError::NotImplemented)
+      end
+    end
+  end
+end

data/spec/spec.opts ADDED

	@@ -0,0 +1 @@
1	+ --color

data/spec/spec_helper.rb ADDED

@@ -0,0 +1,8 @@
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+require 'spec'
+require 'spec/autorun'
+Spec::Runner.configure do |config|
+end

metadata ADDED

@@ -0,0 +1,93 @@
+--- !ruby/object:Gem::Specification
+name: http_parser
+version: !ruby/object:Gem::Version
+  prerelease: false
+  segments:
+  - 0
+  - 1
+  - 0
+  version: 0.1.0
+platform: ruby
+authors:
+- Graham Batty
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2010-03-07 00:00:00 -07:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+        - 1
+        - 2
+        - 9
+        version: 1.2.9
+  type: :development
+  version_requirements: *id001
+description: |-
+  This gem provides a (hopefully) high quality http parser library that can
+      build request information iteratively as data comes over the line without
+      requiring the caller to maintain the entire body of the request as a single
+      string in memory.
+email: graham@stormbrew.ca
+executables: []
+extensions: []
+extra_rdoc_files:
+- LICENSE
+- README.rdoc
+files:
+- .document
+- .gitignore
+- LICENSE
+- README.rdoc
+- Rakefile
+- VERSION
+- bench/bench_helper.rb
+- bench/http_parser_bench.rb
+- lib/http/native_parser.rb
+- lib/http/parser.rb
+- spec/http_parser_spec.rb
+- spec/spec.opts
+- spec/spec_helper.rb
+has_rdoc: true
+homepage: http://github.com/stormbrew/http_parser
+licenses: []
+post_install_message:
+rdoc_options:
+- --charset=UTF-8
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.6
+signing_key:
+specification_version: 3
+summary: HTTP Parser Library
+test_files:
+- spec/http_parser_spec.rb
+- spec/spec_helper.rb