unicorn 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,253 @@
1
+ require 'unicorn/socket'
2
+ require 'unicorn/const'
3
+ require 'logger'
4
+
5
+ module Unicorn
6
+
7
+ # Implements a simple DSL for configuring a unicorn server.
8
+ #
9
+ # Example (when used with the unicorn config file):
10
+ # worker_processes 4
11
+ # listeners %w(0.0.0.0:9292 /tmp/my_app.sock)
12
+ # timeout 10
13
+ # pid "/tmp/my_app.pid"
14
+ # after_fork do |server,worker_nr|
15
+ # server.listen("127.0.0.1:#{9293 + worker_nr}") rescue nil
16
+ # end
17
+ class Configurator
18
+ include ::Unicorn::SocketHelper
19
+
20
+ # The default logger writes its output to $stderr
21
+ DEFAULT_LOGGER = Logger.new($stderr) unless defined?(DEFAULT_LOGGER)
22
+
23
+ # Default settings for Unicorn
24
+ DEFAULTS = {
25
+ :timeout => 60,
26
+ :listeners => [ Const::DEFAULT_LISTEN ],
27
+ :logger => DEFAULT_LOGGER,
28
+ :worker_processes => 1,
29
+ :after_fork => lambda { |server, worker_nr|
30
+ server.logger.info("worker=#{worker_nr} spawned pid=#{$$}")
31
+
32
+ # per-process listener ports for debugging/admin:
33
+ # "rescue nil" statement is needed because USR2 will
34
+ # cause the master process to reexecute itself and the
35
+ # per-worker ports can be taken, necessitating another
36
+ # HUP after QUIT-ing the original master:
37
+ # server.listen("127.0.0.1:#{8081 + worker_nr}") rescue nil
38
+ },
39
+ :before_fork => lambda { |server, worker_nr|
40
+ server.logger.info("worker=#{worker_nr} spawning...")
41
+ },
42
+ :before_exec => lambda { |server|
43
+ server.logger.info("forked child re-executing...")
44
+ },
45
+ :pid => nil,
46
+ :backlog => 1024,
47
+ :preload_app => false,
48
+ :stderr_path => nil,
49
+ :stdout_path => nil,
50
+ }
51
+
52
+ attr_reader :config_file #:nodoc:
53
+
54
+ def initialize(defaults = {}) #:nodoc:
55
+ @set = Hash.new(:unset)
56
+ use_defaults = defaults.delete(:use_defaults)
57
+ @config_file = defaults.delete(:config_file)
58
+ @config_file.freeze
59
+ @set.merge!(DEFAULTS) if use_defaults
60
+ defaults.each { |key, value| self.send(key, value) }
61
+ reload
62
+ end
63
+
64
+ def reload #:nodoc:
65
+ instance_eval(File.read(@config_file)) if @config_file
66
+ end
67
+
68
+ def commit!(server, options = {}) #:nodoc:
69
+ skip = options[:skip] || []
70
+ @set.each do |key, value|
71
+ (Symbol === value && value == :unset) and next
72
+ skip.include?(key) and next
73
+ setter = "#{key}="
74
+ if server.respond_to?(setter)
75
+ server.send(setter, value)
76
+ else
77
+ server.instance_variable_set("@#{key}", value)
78
+ end
79
+ end
80
+ end
81
+
82
+ def [](key) # :nodoc:
83
+ @set[key]
84
+ end
85
+
86
+ # Changes the listen() syscall backlog to +nr+ for yet-to-be-created
87
+ # sockets. Due to limitations of the OS, this cannot affect
88
+ # existing listener sockets in any way, sockets must be completely
89
+ # closed and rebound (inherited sockets preserve their existing
90
+ # backlog setting). Some operating systems allow negative values
91
+ # here to specify the maximum allowable value. See the listen(2)
92
+ # syscall documentation of your OS for the exact semantics of this.
93
+ #
94
+ # If you are running unicorn on multiple machines, lowering this number
95
+ # can help your load balancer detect when a machine is overloaded
96
+ # and give requests to a different machine.
97
+ def backlog(nr)
98
+ Integer === nr or raise ArgumentError,
99
+ "not an integer: backlog=#{nr.inspect}"
100
+ @set[:backlog] = nr
101
+ end
102
+
103
+ # sets object to the +new+ Logger-like object. The new logger-like
104
+ # object must respond to the following methods:
105
+ # +debug+, +info+, +warn+, +error+, +fatal+, +close+
106
+ def logger(new)
107
+ %w(debug info warn error fatal close).each do |m|
108
+ new.respond_to?(m) and next
109
+ raise ArgumentError, "logger=#{new} does not respond to method=#{m}"
110
+ end
111
+
112
+ @set[:logger] = new
113
+ end
114
+
115
+ # sets after_fork hook to a given block. This block will be called by
116
+ # the worker after forking. The following is an example hook which adds
117
+ # a per-process listener to every worker:
118
+ #
119
+ # after_fork do |server,worker_nr|
120
+ # # per-process listener ports for debugging/admin:
121
+ # # "rescue nil" statement is needed because USR2 will
122
+ # # cause the master process to reexecute itself and the
123
+ # # per-worker ports can be taken, necessitating another
124
+ # # HUP after QUIT-ing the original master:
125
+ # server.listen("127.0.0.1:#{9293 + worker_nr}") rescue nil
126
+ # end
127
+ def after_fork(&block)
128
+ set_hook(:after_fork, block)
129
+ end
130
+
131
+ # sets before_fork got be a given Proc object. This Proc
132
+ # object will be called by the master process before forking
133
+ # each worker.
134
+ def before_fork(&block)
135
+ set_hook(:before_fork, block)
136
+ end
137
+
138
+ # sets the before_exec hook to a given Proc object. This
139
+ # Proc object will be called by the master process right
140
+ # before exec()-ing the new unicorn binary. This is useful
141
+ # for freeing certain OS resources that you do NOT wish to
142
+ # share with the reexeced child process.
143
+ # There is no corresponding after_exec hook (for obvious reasons).
144
+ def before_exec(&block)
145
+ set_hook(:before_exec, block, 1)
146
+ end
147
+
148
+ # sets the timeout of worker processes to +seconds+. Workers
149
+ # handling the request/app.call/response cycle taking longer than
150
+ # this time period will be forcibly killed (via SIGKILL). This
151
+ # timeout is enforced by the master process itself and not subject
152
+ # to the scheduling limitations by the worker process.
153
+ def timeout(seconds)
154
+ Numeric === seconds or raise ArgumentError,
155
+ "not numeric: timeout=#{seconds.inspect}"
156
+ seconds > 0 or raise ArgumentError,
157
+ "not positive: timeout=#{seconds.inspect}"
158
+ @set[:timeout] = seconds
159
+ end
160
+
161
+ # sets the current number of worker_processes to +nr+. Each worker
162
+ # process will serve exactly one client at a time.
163
+ def worker_processes(nr)
164
+ Integer === nr or raise ArgumentError,
165
+ "not an integer: worker_processes=#{nr.inspect}"
166
+ nr >= 0 or raise ArgumentError,
167
+ "not non-negative: worker_processes=#{nr.inspect}"
168
+ @set[:worker_processes] = nr
169
+ end
170
+
171
+ # sets listeners to the given +addresses+, replacing or augmenting the
172
+ # current set. This is for the global listener pool shared by all
173
+ # worker processes. For per-worker listeners, see the after_fork example
174
+ def listeners(addresses)
175
+ Array === addresses or addresses = Array(addresses)
176
+ @set[:listeners] = addresses
177
+ end
178
+
179
+ # adds an +address+ to the existing listener set
180
+ def listen(address)
181
+ @set[:listeners] = [] unless Array === @set[:listeners]
182
+ @set[:listeners] << address
183
+ end
184
+
185
+ # sets the +path+ for the PID file of the unicorn master process
186
+ def pid(path); set_path(:pid, path); end
187
+
188
+ # Enabling this preloads an application before forking worker
189
+ # processes. This allows memory savings when using a
190
+ # copy-on-write-friendly GC but can cause bad things to happen when
191
+ # resources like sockets are opened at load time by the master
192
+ # process and shared by multiple children. People enabling this are
193
+ # highly encouraged to look at the before_fork/after_fork hooks to
194
+ # properly close/reopen sockets. Files opened for logging do not
195
+ # have to be reopened as (unbuffered-in-userspace) files opened with
196
+ # the File::APPEND flag are written to atomically on UNIX.
197
+ def preload_app(bool)
198
+ case bool
199
+ when TrueClass, FalseClass
200
+ @set[:preload_app] = bool
201
+ else
202
+ raise ArgumentError, "preload_app=#{bool.inspect} not a boolean"
203
+ end
204
+ end
205
+
206
+ # Allow redirecting $stderr to a given path. Unlike doing this from
207
+ # the shell, this allows the unicorn process to know the path its
208
+ # writing to and rotate the file if it is used for logging. The
209
+ # file will be opened with the File::APPEND flag and writes
210
+ # synchronized to the kernel (but not necessarily to _disk_) so
211
+ # multiple processes can safely append to it.
212
+ def stderr_path(path)
213
+ set_path(:stderr_path, path)
214
+ end
215
+
216
+ # Same as stderr_path, except for $stdout
217
+ def stdout_path(path)
218
+ set_path(:stdout_path, path)
219
+ end
220
+
221
+ private
222
+
223
+ def set_path(var, path) #:nodoc:
224
+ case path
225
+ when NilClass
226
+ when String
227
+ path = File.expand_path(path)
228
+ File.writable?(File.dirname(path)) or \
229
+ raise ArgumentError, "directory for #{var}=#{path} not writable"
230
+ else
231
+ raise ArgumentError
232
+ end
233
+ @set[var] = path
234
+ end
235
+
236
+ def set_hook(var, my_proc, req_arity = 2) #:nodoc:
237
+ case my_proc
238
+ when Proc
239
+ arity = my_proc.arity
240
+ (arity == req_arity) or \
241
+ raise ArgumentError,
242
+ "#{var}=#{my_proc.inspect} has invalid arity: " \
243
+ "#{arity} (need #{req_arity})"
244
+ when NilClass
245
+ my_proc = DEFAULTS[var]
246
+ else
247
+ raise ArgumentError, "invalid type: #{var}=#{my_proc.inspect}"
248
+ end
249
+ @set[var] = my_proc
250
+ end
251
+
252
+ end
253
+ end
@@ -0,0 +1,116 @@
1
+
2
+ module Unicorn
3
+
4
+ # Every standard HTTP code mapped to the appropriate message. These are
5
+ # used so frequently that they are placed directly in Unicorn for easy
6
+ # access rather than Unicorn::Const itself.
7
+ HTTP_STATUS_CODES = {
8
+ 100 => 'Continue',
9
+ 101 => 'Switching Protocols',
10
+ 200 => 'OK',
11
+ 201 => 'Created',
12
+ 202 => 'Accepted',
13
+ 203 => 'Non-Authoritative Information',
14
+ 204 => 'No Content',
15
+ 205 => 'Reset Content',
16
+ 206 => 'Partial Content',
17
+ 300 => 'Multiple Choices',
18
+ 301 => 'Moved Permanently',
19
+ 302 => 'Moved Temporarily',
20
+ 303 => 'See Other',
21
+ 304 => 'Not Modified',
22
+ 305 => 'Use Proxy',
23
+ 400 => 'Bad Request',
24
+ 401 => 'Unauthorized',
25
+ 402 => 'Payment Required',
26
+ 403 => 'Forbidden',
27
+ 404 => 'Not Found',
28
+ 405 => 'Method Not Allowed',
29
+ 406 => 'Not Acceptable',
30
+ 407 => 'Proxy Authentication Required',
31
+ 408 => 'Request Time-out',
32
+ 409 => 'Conflict',
33
+ 410 => 'Gone',
34
+ 411 => 'Length Required',
35
+ 412 => 'Precondition Failed',
36
+ 413 => 'Request Entity Too Large',
37
+ 414 => 'Request-URI Too Large',
38
+ 415 => 'Unsupported Media Type',
39
+ 500 => 'Internal Server Error',
40
+ 501 => 'Not Implemented',
41
+ 502 => 'Bad Gateway',
42
+ 503 => 'Service Unavailable',
43
+ 504 => 'Gateway Time-out',
44
+ 505 => 'HTTP Version not supported'
45
+ }
46
+
47
+ # Frequently used constants when constructing requests or responses. Many times
48
+ # the constant just refers to a string with the same contents. Using these constants
49
+ # gave about a 3% to 10% performance improvement over using the strings directly.
50
+ # Symbols did not really improve things much compared to constants.
51
+ #
52
+ # While Unicorn does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
53
+ # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
54
+ # too taxing on performance.
55
+ module Const
56
+ DATE="Date".freeze
57
+
58
+ # This is the part of the path after the SCRIPT_NAME.
59
+ PATH_INFO="PATH_INFO".freeze
60
+
61
+ # Request body
62
+ HTTP_BODY="HTTP_BODY".freeze
63
+
64
+ # This is the initial part that your handler is identified as by URIClassifier.
65
+ SCRIPT_NAME="SCRIPT_NAME".freeze
66
+
67
+ # The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
68
+ REQUEST_URI='REQUEST_URI'.freeze
69
+ REQUEST_PATH='REQUEST_PATH'.freeze
70
+
71
+ UNICORN_VERSION="0.1.0".freeze
72
+
73
+ UNICORN_TMP_BASE="unicorn".freeze
74
+
75
+ DEFAULT_HOST = "0.0.0.0".freeze # default TCP listen host address
76
+ DEFAULT_PORT = "8080".freeze # default TCP listen port
77
+ DEFAULT_LISTEN = "#{DEFAULT_HOST}:#{DEFAULT_PORT}".freeze
78
+
79
+ # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff.
80
+ ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Unicorn #{UNICORN_VERSION}\r\n\r\nNOT FOUND".freeze
81
+
82
+ CONTENT_LENGTH="CONTENT_LENGTH".freeze
83
+
84
+ # A common header for indicating the server is too busy. Not used yet.
85
+ ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
86
+
87
+ # The basic max request size we'll try to read.
88
+ CHUNK_SIZE=(16 * 1024)
89
+
90
+ # This is the maximum header that is allowed before a client is booted. The parser detects
91
+ # this, but we'd also like to do this as well.
92
+ MAX_HEADER=1024 * (80 + 32)
93
+
94
+ # Maximum request body size before it is moved out of memory and into a tempfile for reading.
95
+ MAX_BODY=MAX_HEADER
96
+
97
+ # A frozen format for this is about 15% faster
98
+ CONTENT_TYPE = "Content-Type".freeze
99
+ LAST_MODIFIED = "Last-Modified".freeze
100
+ ETAG = "ETag".freeze
101
+ REQUEST_METHOD="REQUEST_METHOD".freeze
102
+ GET="GET".freeze
103
+ HEAD="HEAD".freeze
104
+ # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
105
+ ETAG_FORMAT="\"%x-%x-%x\"".freeze
106
+ LINE_END="\r\n".freeze
107
+ REMOTE_ADDR="REMOTE_ADDR".freeze
108
+ HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze
109
+ HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze
110
+ HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze
111
+ REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
112
+ HOST = "HOST".freeze
113
+ CONNECTION = "Connection".freeze
114
+ end
115
+
116
+ end
@@ -0,0 +1,178 @@
1
+ require 'tempfile'
2
+ require 'uri'
3
+ require 'stringio'
4
+
5
+ # compiled extension
6
+ require 'unicorn/http11'
7
+
8
+ module Unicorn
9
+ #
10
+ # The HttpRequest.initialize method will convert any request that is larger than
11
+ # Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses
12
+ # a StringIO object. To be safe, you should assume it works like a file.
13
+ #
14
+ class HttpRequest
15
+
16
+ def initialize(logger)
17
+ @logger = logger
18
+ @body = nil
19
+ @buffer = ' ' * Const::CHUNK_SIZE # initial size, may grow
20
+ @parser = HttpParser.new
21
+ @params = Hash.new
22
+ end
23
+
24
+ def reset
25
+ @parser.reset
26
+ @params.clear
27
+ @body.close rescue nil
28
+ @body = nil
29
+ end
30
+
31
+ #
32
+ # Does the majority of the IO processing. It has been written in
33
+ # Ruby using about 7 different IO processing strategies and no
34
+ # matter how it's done the performance just does not improve. It is
35
+ # currently carefully constructed to make sure that it gets the best
36
+ # possible performance, but anyone who thinks they can make it
37
+ # faster is more than welcome to take a crack at it.
38
+ #
39
+ # returns an environment hash suitable for Rack if successful
40
+ # This does minimal exception trapping and it is up to the caller
41
+ # to handle any socket errors (e.g. user aborted upload).
42
+ def read(socket)
43
+ data = String.new(read_socket(socket))
44
+ nparsed = 0
45
+
46
+ # Assumption: nparsed will always be less since data will get
47
+ # filled with more after each parsing. If it doesn't get more
48
+ # then there was a problem with the read operation on the client
49
+ # socket. Effect is to stop processing when the socket can't
50
+ # fill the buffer for further parsing.
51
+ while nparsed < data.length
52
+ nparsed = @parser.execute(@params, data, nparsed)
53
+
54
+ if @parser.finished?
55
+ # From http://www.ietf.org/rfc/rfc3875:
56
+ # "Script authors should be aware that the REMOTE_ADDR and
57
+ # REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
58
+ # may not identify the ultimate source of the request. They
59
+ # identify the client for the immediate request to the server;
60
+ # that client may be a proxy, gateway, or other intermediary
61
+ # acting on behalf of the actual source client."
62
+ @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr
63
+
64
+ handle_body(socket) and return rack_env # success!
65
+ return nil # fail
66
+ else
67
+ # Parser is not done, queue up more data to read and continue
68
+ # parsing
69
+ data << read_socket(socket)
70
+ if data.length >= Const::MAX_HEADER
71
+ raise HttpParserError.new("HEADER is longer than allowed, " \
72
+ "aborting client early.")
73
+ end
74
+ end
75
+ end
76
+ nil # XXX bug?
77
+ rescue HttpParserError => e
78
+ @logger.error "HTTP parse error, malformed request " \
79
+ "(#{@params[Const::HTTP_X_FORWARDED_FOR] ||
80
+ socket.unicorn_peeraddr}): #{e.inspect}"
81
+ @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
82
+ "PARAMS: #{@params.inspect}\n---\n"
83
+ socket.closed? or socket.close rescue nil
84
+ nil
85
+ end
86
+
87
+ private
88
+
89
+ # Handles dealing with the rest of the request
90
+ # returns true if successful, false if not
91
+ def handle_body(socket)
92
+ http_body = @params[Const::HTTP_BODY]
93
+ content_length = @params[Const::CONTENT_LENGTH].to_i
94
+ remain = content_length - http_body.length
95
+
96
+ # must read more data to complete body
97
+ if remain < Const::MAX_BODY
98
+ # small body, just use that
99
+ @body = StringIO.new(http_body)
100
+ else # huge body, put it in a tempfile
101
+ @body = Tempfile.new(Const::UNICORN_TMP_BASE)
102
+ @body.binmode
103
+ @body.sync = true
104
+ @body.syswrite(http_body)
105
+ end
106
+
107
+ # Some clients (like FF1.0) report 0 for body and then send a body.
108
+ # This will probably truncate them but at least the request goes through
109
+ # usually.
110
+ if remain > 0
111
+ read_body(socket, remain) or return false # fail!
112
+ end
113
+ @body.rewind
114
+ @body.sysseek(0) if @body.respond_to?(:sysseek)
115
+
116
+ # in case read_body overread because the client tried to pipeline
117
+ # another request, we'll truncate it. Again, we don't do pipelining
118
+ # or keepalive
119
+ @body.truncate(content_length)
120
+ true
121
+ end
122
+
123
+ # Returns an environment which is rackable:
124
+ # http://rack.rubyforge.org/doc/files/SPEC.html
125
+ # Based on Rack's old Mongrel handler.
126
+ def rack_env
127
+ # It might be a dumbass full host request header
128
+ @params[Const::REQUEST_PATH] ||=
129
+ URI.parse(@params[Const::REQUEST_URI]).path
130
+ raise "No REQUEST PATH" unless @params[Const::REQUEST_PATH]
131
+
132
+ @params["QUERY_STRING"] ||= ''
133
+ @params.delete "HTTP_CONTENT_TYPE"
134
+ @params.delete "HTTP_CONTENT_LENGTH"
135
+ @params.update({ "rack.version" => [0,1],
136
+ "rack.input" => @body,
137
+ "rack.errors" => $stderr,
138
+ "rack.multithread" => false,
139
+ "rack.multiprocess" => true,
140
+ "rack.run_once" => false,
141
+ "rack.url_scheme" => "http",
142
+ Const::PATH_INFO => @params[Const::REQUEST_PATH],
143
+ Const::SCRIPT_NAME => "",
144
+ })
145
+ end
146
+
147
+ # Does the heavy lifting of properly reading the larger body requests in
148
+ # small chunks. It expects @body to be an IO object, socket to be valid,
149
+ # It also expects any initial part of the body that has been read to be in
150
+ # the @body already. It will return true if successful and false if not.
151
+ def read_body(socket, remain)
152
+ while remain > 0
153
+ # writes always write the requested amount on a POSIX filesystem
154
+ remain -= @body.syswrite(read_socket(socket))
155
+ end
156
+ true # success!
157
+ rescue Object => e
158
+ logger.error "Error reading HTTP body: #{e.inspect}"
159
+ socket.closed? or socket.close rescue nil
160
+
161
+ # Any errors means we should delete the file, including if the file
162
+ # is dumped. Truncate it ASAP to help avoid page flushes to disk.
163
+ @body.truncate(0) rescue nil
164
+ reset
165
+ false
166
+ end
167
+
168
+ # read(2) on "slow" devices like sockets can be interrupted by signals
169
+ def read_socket(socket)
170
+ begin
171
+ socket.sysread(Const::CHUNK_SIZE, @buffer)
172
+ rescue Errno::EINTR
173
+ retry
174
+ end
175
+ end
176
+
177
+ end
178
+ end