RubyGems - unicorn - Versions diffs - 0.1.0 - Mend

unicorn 0.1.0

Files changed (43) hide show

data/.document +12 -0
data/.gitignore +12 -0
data/CHANGELOG +19 -0
data/CONTRIBUTORS +20 -0
data/DESIGN +80 -0
data/GNUmakefile +111 -0
data/LICENSE +53 -0
data/Manifest +41 -0
data/README +73 -0
data/Rakefile +37 -0
data/SIGNALS +34 -0
data/TODO +5 -0
data/bin/unicorn +189 -0
data/ext/unicorn/http11/ext_help.h +15 -0
data/ext/unicorn/http11/extconf.rb +5 -0
data/ext/unicorn/http11/http11.c +526 -0
data/ext/unicorn/http11/http11_parser.c +1220 -0
data/ext/unicorn/http11/http11_parser.h +45 -0
data/ext/unicorn/http11/http11_parser.rl +153 -0
data/ext/unicorn/http11/http11_parser_common.rl +55 -0
data/lib/unicorn.rb +548 -0
data/lib/unicorn/configurator.rb +253 -0
data/lib/unicorn/const.rb +116 -0
data/lib/unicorn/http_request.rb +178 -0
data/lib/unicorn/http_response.rb +72 -0
data/lib/unicorn/socket.rb +142 -0
data/lib/unicorn/util.rb +40 -0
data/setup.rb +1585 -0
data/test/aggregate.rb +13 -0
data/test/benchmark/previous.rb +11 -0
data/test/benchmark/simple.rb +11 -0
data/test/benchmark/utils.rb +82 -0
data/test/exec/README +5 -0
data/test/exec/test_exec.rb +570 -0
data/test/test_helper.rb +103 -0
data/test/tools/trickletest.rb +45 -0
data/test/unit/test_configurator.rb +48 -0
data/test/unit/test_http_parser.rb +161 -0
data/test/unit/test_response.rb +45 -0
data/test/unit/test_server.rb +96 -0
data/test/unit/test_upload.rb +151 -0
data/unicorn.gemspec +35 -0
metadata +122 -0

data/lib/unicorn/configurator.rb ADDED Viewed

@@ -0,0 +1,253 @@
+require 'unicorn/socket'
+require 'unicorn/const'
+require 'logger'
+module Unicorn
+  # Implements a simple DSL for configuring a unicorn server.
+  #
+  # Example (when used with the unicorn config file):
+  #   worker_processes 4
+  #   listeners %w(0.0.0.0:9292 /tmp/my_app.sock)
+  #   timeout 10
+  #   pid "/tmp/my_app.pid"
+  #   after_fork do |server,worker_nr|
+  #     server.listen("127.0.0.1:#{9293 + worker_nr}") rescue nil
+  #   end
+  class Configurator
+    include ::Unicorn::SocketHelper
+    # The default logger writes its output to $stderr
+    DEFAULT_LOGGER = Logger.new($stderr) unless defined?(DEFAULT_LOGGER)
+    # Default settings for Unicorn
+    DEFAULTS = {
+      :timeout => 60,
+      :listeners => [ Const::DEFAULT_LISTEN ],
+      :logger => DEFAULT_LOGGER,
+      :worker_processes => 1,
+      :after_fork => lambda { |server, worker_nr|
+          server.logger.info("worker=#{worker_nr} spawned pid=#{$$}")
+          # per-process listener ports for debugging/admin:
+          # "rescue nil" statement is needed because USR2 will
+          # cause the master process to reexecute itself and the
+          # per-worker ports can be taken, necessitating another
+          # HUP after QUIT-ing the original master:
+          # server.listen("127.0.0.1:#{8081 + worker_nr}") rescue nil
+        },
+      :before_fork => lambda { |server, worker_nr|
+          server.logger.info("worker=#{worker_nr} spawning...")
+        },
+      :before_exec => lambda { |server|
+          server.logger.info("forked child re-executing...")
+        },
+      :pid => nil,
+      :backlog => 1024,
+      :preload_app => false,
+      :stderr_path => nil,
+      :stdout_path => nil,
+    }
+    attr_reader :config_file #:nodoc:
+    def initialize(defaults = {}) #:nodoc:
+      @set = Hash.new(:unset)
+      use_defaults = defaults.delete(:use_defaults)
+      @config_file = defaults.delete(:config_file)
+      @config_file.freeze
+      @set.merge!(DEFAULTS) if use_defaults
+      defaults.each { |key, value| self.send(key, value) }
+      reload
+    end
+    def reload #:nodoc:
+      instance_eval(File.read(@config_file)) if @config_file
+    end
+    def commit!(server, options = {}) #:nodoc:
+      skip = options[:skip] || []
+      @set.each do |key, value|
+        (Symbol === value && value == :unset) and next
+        skip.include?(key) and next
+        setter = "#{key}="
+        if server.respond_to?(setter)
+          server.send(setter, value)
+        else
+          server.instance_variable_set("@#{key}", value)
+        end
+      end
+    end
+    def [](key) # :nodoc:
+      @set[key]
+    end
+    # Changes the listen() syscall backlog to +nr+ for yet-to-be-created
+    # sockets.  Due to limitations of the OS, this cannot affect
+    # existing listener sockets in any way, sockets must be completely
+    # closed and rebound (inherited sockets preserve their existing
+    # backlog setting).  Some operating systems allow negative values
+    # here to specify the maximum allowable value.  See the listen(2)
+    # syscall documentation of your OS for the exact semantics of this.
+    #
+    # If you are running unicorn on multiple machines, lowering this number
+    # can help your load balancer detect when a machine is overloaded
+    # and give requests to a different machine.
+    def backlog(nr)
+      Integer === nr or raise ArgumentError,
+         "not an integer: backlog=#{nr.inspect}"
+      @set[:backlog] = nr
+    end
+    # sets object to the +new+ Logger-like object.  The new logger-like
+    # object must respond to the following methods:
+    #  +debug+, +info+, +warn+, +error+, +fatal+, +close+
+    def logger(new)
+      %w(debug info warn error fatal close).each do |m|
+        new.respond_to?(m) and next
+        raise ArgumentError, "logger=#{new} does not respond to method=#{m}"
+      end
+      @set[:logger] = new
+    end
+    # sets after_fork hook to a given block.  This block will be called by
+    # the worker after forking.  The following is an example hook which adds
+    # a per-process listener to every worker:
+    #
+    #  after_fork do |server,worker_nr|
+    #    # per-process listener ports for debugging/admin:
+    #    # "rescue nil" statement is needed because USR2 will
+    #    # cause the master process to reexecute itself and the
+    #    # per-worker ports can be taken, necessitating another
+    #    # HUP after QUIT-ing the original master:
+    #    server.listen("127.0.0.1:#{9293 + worker_nr}") rescue nil
+    #  end
+    def after_fork(&block)
+      set_hook(:after_fork, block)
+    end
+    # sets before_fork got be a given Proc object.  This Proc
+    # object will be called by the master process before forking
+    # each worker.
+    def before_fork(&block)
+      set_hook(:before_fork, block)
+    end
+    # sets the before_exec hook to a given Proc object.  This
+    # Proc object will be called by the master process right
+    # before exec()-ing the new unicorn binary.  This is useful
+    # for freeing certain OS resources that you do NOT wish to
+    # share with the reexeced child process.
+    # There is no corresponding after_exec hook (for obvious reasons).
+    def before_exec(&block)
+      set_hook(:before_exec, block, 1)
+    end
+    # sets the timeout of worker processes to +seconds+.  Workers
+    # handling the request/app.call/response cycle taking longer than
+    # this time period will be forcibly killed (via SIGKILL).  This
+    # timeout is enforced by the master process itself and not subject
+    # to the scheduling limitations by the worker process.
+    def timeout(seconds)
+      Numeric === seconds or raise ArgumentError,
+                                  "not numeric: timeout=#{seconds.inspect}"
+      seconds > 0 or raise ArgumentError,
+                                  "not positive: timeout=#{seconds.inspect}"
+      @set[:timeout] = seconds
+    end
+    # sets the current number of worker_processes to +nr+.  Each worker
+    # process will serve exactly one client at a time.
+    def worker_processes(nr)
+      Integer === nr or raise ArgumentError,
+                             "not an integer: worker_processes=#{nr.inspect}"
+      nr >= 0 or raise ArgumentError,
+                             "not non-negative: worker_processes=#{nr.inspect}"
+      @set[:worker_processes] = nr
+    end
+    # sets listeners to the given +addresses+, replacing or augmenting the
+    # current set.  This is for the global listener pool shared by all
+    # worker processes.  For per-worker listeners, see the after_fork example
+    def listeners(addresses)
+      Array === addresses or addresses = Array(addresses)
+      @set[:listeners] = addresses
+    end
+    # adds an +address+ to the existing listener set
+    def listen(address)
+      @set[:listeners] = [] unless Array === @set[:listeners]
+      @set[:listeners] << address
+    end
+    # sets the +path+ for the PID file of the unicorn master process
+    def pid(path); set_path(:pid, path); end
+    # Enabling this preloads an application before forking worker
+    # processes.  This allows memory savings when using a
+    # copy-on-write-friendly GC but can cause bad things to happen when
+    # resources like sockets are opened at load time by the master
+    # process and shared by multiple children.  People enabling this are
+    # highly encouraged to look at the before_fork/after_fork hooks to
+    # properly close/reopen sockets.  Files opened for logging do not
+    # have to be reopened as (unbuffered-in-userspace) files opened with
+    # the File::APPEND flag are written to atomically on UNIX.
+    def preload_app(bool)
+      case bool
+      when TrueClass, FalseClass
+        @set[:preload_app] = bool
+      else
+        raise ArgumentError, "preload_app=#{bool.inspect} not a boolean"
+      end
+    end
+    # Allow redirecting $stderr to a given path.  Unlike doing this from
+    # the shell, this allows the unicorn process to know the path its
+    # writing to and rotate the file if it is used for logging.  The
+    # file will be opened with the File::APPEND flag and writes
+    # synchronized to the kernel (but not necessarily to _disk_) so
+    # multiple processes can safely append to it.
+    def stderr_path(path)
+      set_path(:stderr_path, path)
+    end
+    # Same as stderr_path, except for $stdout
+    def stdout_path(path)
+      set_path(:stdout_path, path)
+    end
+    private
+    def set_path(var, path) #:nodoc:
+      case path
+      when NilClass
+      when String
+        path = File.expand_path(path)
+        File.writable?(File.dirname(path)) or \
+               raise ArgumentError, "directory for #{var}=#{path} not writable"
+      else
+        raise ArgumentError
+      end
+      @set[var] = path
+    end
+    def set_hook(var, my_proc, req_arity = 2) #:nodoc:
+      case my_proc
+      when Proc
+        arity = my_proc.arity
+        (arity == req_arity) or \
+          raise ArgumentError,
+                "#{var}=#{my_proc.inspect} has invalid arity: " \
+                "#{arity} (need #{req_arity})"
+      when NilClass
+        my_proc = DEFAULTS[var]
+      else
+        raise ArgumentError, "invalid type: #{var}=#{my_proc.inspect}"
+      end
+      @set[var] = my_proc
+    end
+  end
+end

data/lib/unicorn/const.rb ADDED Viewed

@@ -0,0 +1,116 @@
+module Unicorn
+  # Every standard HTTP code mapped to the appropriate message.  These are
+  # used so frequently that they are placed directly in Unicorn for easy
+  # access rather than Unicorn::Const itself.
+  HTTP_STATUS_CODES = {
+    100  => 'Continue',
+    101  => 'Switching Protocols',
+    200  => 'OK',
+    201  => 'Created',
+    202  => 'Accepted',
+    203  => 'Non-Authoritative Information',
+    204  => 'No Content',
+    205  => 'Reset Content',
+    206  => 'Partial Content',
+    300  => 'Multiple Choices',
+    301  => 'Moved Permanently',
+    302  => 'Moved Temporarily',
+    303  => 'See Other',
+    304  => 'Not Modified',
+    305  => 'Use Proxy',
+    400  => 'Bad Request',
+    401  => 'Unauthorized',
+    402  => 'Payment Required',
+    403  => 'Forbidden',
+    404  => 'Not Found',
+    405  => 'Method Not Allowed',
+    406  => 'Not Acceptable',
+    407  => 'Proxy Authentication Required',
+    408  => 'Request Time-out',
+    409  => 'Conflict',
+    410  => 'Gone',
+    411  => 'Length Required',
+    412  => 'Precondition Failed',
+    413  => 'Request Entity Too Large',
+    414  => 'Request-URI Too Large',
+    415  => 'Unsupported Media Type',
+    500  => 'Internal Server Error',
+    501  => 'Not Implemented',
+    502  => 'Bad Gateway',
+    503  => 'Service Unavailable',
+    504  => 'Gateway Time-out',
+    505  => 'HTTP Version not supported'
+  }
+  # Frequently used constants when constructing requests or responses.  Many times
+  # the constant just refers to a string with the same contents.  Using these constants
+  # gave about a 3% to 10% performance improvement over using the strings directly.
+  # Symbols did not really improve things much compared to constants.
+  #
+  # While Unicorn does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
+  # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
+  # too taxing on performance.
+  module Const
+    DATE="Date".freeze
+    # This is the part of the path after the SCRIPT_NAME.
+    PATH_INFO="PATH_INFO".freeze
+    # Request body
+    HTTP_BODY="HTTP_BODY".freeze
+    # This is the initial part that your handler is identified as by URIClassifier.
+    SCRIPT_NAME="SCRIPT_NAME".freeze
+    # The original URI requested by the client.  Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
+    REQUEST_URI='REQUEST_URI'.freeze
+    REQUEST_PATH='REQUEST_PATH'.freeze
+    UNICORN_VERSION="0.1.0".freeze
+    UNICORN_TMP_BASE="unicorn".freeze
+    DEFAULT_HOST = "0.0.0.0".freeze # default TCP listen host address
+    DEFAULT_PORT = "8080".freeze    # default TCP listen port
+    DEFAULT_LISTEN = "#{DEFAULT_HOST}:#{DEFAULT_PORT}".freeze
+    # The standard empty 404 response for bad requests.  Use Error4040Handler for custom stuff.
+    ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Unicorn #{UNICORN_VERSION}\r\n\r\nNOT FOUND".freeze
+    CONTENT_LENGTH="CONTENT_LENGTH".freeze
+    # A common header for indicating the server is too busy.  Not used yet.
+    ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
+    # The basic max request size we'll try to read.
+    CHUNK_SIZE=(16 * 1024)
+    # This is the maximum header that is allowed before a client is booted.  The parser detects
+    # this, but we'd also like to do this as well.
+    MAX_HEADER=1024 * (80 + 32)
+    # Maximum request body size before it is moved out of memory and into a tempfile for reading.
+    MAX_BODY=MAX_HEADER
+    # A frozen format for this is about 15% faster
+    CONTENT_TYPE = "Content-Type".freeze
+    LAST_MODIFIED = "Last-Modified".freeze
+    ETAG = "ETag".freeze
+    REQUEST_METHOD="REQUEST_METHOD".freeze
+    GET="GET".freeze
+    HEAD="HEAD".freeze
+    # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
+    ETAG_FORMAT="\"%x-%x-%x\"".freeze
+    LINE_END="\r\n".freeze
+    REMOTE_ADDR="REMOTE_ADDR".freeze
+    HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze
+    HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze
+    HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze
+    REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
+    HOST = "HOST".freeze
+    CONNECTION = "Connection".freeze
+  end
+end

data/lib/unicorn/http_request.rb ADDED Viewed

@@ -0,0 +1,178 @@
+require 'tempfile'
+require 'uri'
+require 'stringio'
+# compiled extension
+require 'unicorn/http11'
+module Unicorn
+  #
+  # The HttpRequest.initialize method will convert any request that is larger than
+  # Const::MAX_BODY into a Tempfile and use that as the body.  Otherwise it uses
+  # a StringIO object.  To be safe, you should assume it works like a file.
+  #
+  class HttpRequest
+    def initialize(logger)
+      @logger = logger
+      @body = nil
+      @buffer = ' ' * Const::CHUNK_SIZE # initial size, may grow
+      @parser = HttpParser.new
+      @params = Hash.new
+    end
+    def reset
+      @parser.reset
+      @params.clear
+      @body.close rescue nil
+      @body = nil
+    end
+    #
+    # Does the majority of the IO processing.  It has been written in
+    # Ruby using about 7 different IO processing strategies and no
+    # matter how it's done the performance just does not improve.  It is
+    # currently carefully constructed to make sure that it gets the best
+    # possible performance, but anyone who thinks they can make it
+    # faster is more than welcome to take a crack at it.
+    #
+    # returns an environment hash suitable for Rack if successful
+    # This does minimal exception trapping and it is up to the caller
+    # to handle any socket errors (e.g. user aborted upload).
+    def read(socket)
+      data = String.new(read_socket(socket))
+      nparsed = 0
+      # Assumption: nparsed will always be less since data will get
+      # filled with more after each parsing.  If it doesn't get more
+      # then there was a problem with the read operation on the client
+      # socket.  Effect is to stop processing when the socket can't
+      # fill the buffer for further parsing.
+      while nparsed < data.length
+        nparsed = @parser.execute(@params, data, nparsed)
+        if @parser.finished?
+          # From http://www.ietf.org/rfc/rfc3875:
+          # "Script authors should be aware that the REMOTE_ADDR and
+          #  REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
+          #  may not identify the ultimate source of the request.  They
+          #  identify the client for the immediate request to the server;
+          #  that client may be a proxy, gateway, or other intermediary
+          #  acting on behalf of the actual source client."
+          @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr
+          handle_body(socket) and return rack_env # success!
+          return nil # fail
+        else
+          # Parser is not done, queue up more data to read and continue
+          # parsing
+          data << read_socket(socket)
+          if data.length >= Const::MAX_HEADER
+            raise HttpParserError.new("HEADER is longer than allowed, " \
+                                      "aborting client early.")
+          end
+        end
+      end
+      nil # XXX bug?
+      rescue HttpParserError => e
+        @logger.error "HTTP parse error, malformed request " \
+                      "(#{@params[Const::HTTP_X_FORWARDED_FOR] ||
+                          socket.unicorn_peeraddr}): #{e.inspect}"
+        @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
+                      "PARAMS: #{@params.inspect}\n---\n"
+        socket.closed? or socket.close rescue nil
+        nil
+    end
+    private
+    # Handles dealing with the rest of the request
+    # returns true if successful, false if not
+    def handle_body(socket)
+      http_body = @params[Const::HTTP_BODY]
+      content_length = @params[Const::CONTENT_LENGTH].to_i
+      remain = content_length - http_body.length
+      # must read more data to complete body
+      if remain < Const::MAX_BODY
+        # small body, just use that
+        @body = StringIO.new(http_body)
+      else # huge body, put it in a tempfile
+        @body = Tempfile.new(Const::UNICORN_TMP_BASE)
+        @body.binmode
+        @body.sync = true
+        @body.syswrite(http_body)
+      end
+      # Some clients (like FF1.0) report 0 for body and then send a body.
+      # This will probably truncate them but at least the request goes through
+      # usually.
+      if remain > 0
+        read_body(socket, remain) or return false # fail!
+      end
+      @body.rewind
+      @body.sysseek(0) if @body.respond_to?(:sysseek)
+      # in case read_body overread because the client tried to pipeline
+      # another request, we'll truncate it.  Again, we don't do pipelining
+      # or keepalive
+      @body.truncate(content_length)
+      true
+    end
+    # Returns an environment which is rackable:
+    # http://rack.rubyforge.org/doc/files/SPEC.html
+    # Based on Rack's old Mongrel handler.
+    def rack_env
+      # It might be a dumbass full host request header
+      @params[Const::REQUEST_PATH] ||=
+                           URI.parse(@params[Const::REQUEST_URI]).path
+      raise "No REQUEST PATH" unless @params[Const::REQUEST_PATH]
+      @params["QUERY_STRING"] ||= ''
+      @params.delete "HTTP_CONTENT_TYPE"
+      @params.delete "HTTP_CONTENT_LENGTH"
+      @params.update({ "rack.version" => [0,1],
+                      "rack.input" => @body,
+                      "rack.errors" => $stderr,
+                      "rack.multithread" => false,
+                      "rack.multiprocess" => true,
+                      "rack.run_once" => false,
+                      "rack.url_scheme" => "http",
+                      Const::PATH_INFO => @params[Const::REQUEST_PATH],
+                      Const::SCRIPT_NAME => "",
+                    })
+    end
+    # Does the heavy lifting of properly reading the larger body requests in
+    # small chunks.  It expects @body to be an IO object, socket to be valid,
+    # It also expects any initial part of the body that has been read to be in
+    # the @body already.  It will return true if successful and false if not.
+    def read_body(socket, remain)
+      while remain > 0
+        # writes always write the requested amount on a POSIX filesystem
+        remain -= @body.syswrite(read_socket(socket))
+      end
+      true # success!
+    rescue Object => e
+      logger.error "Error reading HTTP body: #{e.inspect}"
+      socket.closed? or socket.close rescue nil
+      # Any errors means we should delete the file, including if the file
+      # is dumped.  Truncate it ASAP to help avoid page flushes to disk.
+      @body.truncate(0) rescue nil
+      reset
+      false
+    end
+    # read(2) on "slow" devices like sockets can be interrupted by signals
+    def read_socket(socket)
+      begin
+        socket.sysread(Const::CHUNK_SIZE, @buffer)
+      rescue Errno::EINTR
+        retry
+      end
+    end
+  end
+end