unicorn 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,253 @@
1
+ require 'unicorn/socket'
2
+ require 'unicorn/const'
3
+ require 'logger'
4
+
5
+ module Unicorn
6
+
7
+ # Implements a simple DSL for configuring a unicorn server.
8
+ #
9
+ # Example (when used with the unicorn config file):
10
+ # worker_processes 4
11
+ # listeners %w(0.0.0.0:9292 /tmp/my_app.sock)
12
+ # timeout 10
13
+ # pid "/tmp/my_app.pid"
14
+ # after_fork do |server,worker_nr|
15
+ # server.listen("127.0.0.1:#{9293 + worker_nr}") rescue nil
16
+ # end
17
+ class Configurator
18
+ include ::Unicorn::SocketHelper
19
+
20
+ # The default logger writes its output to $stderr
21
+ DEFAULT_LOGGER = Logger.new($stderr) unless defined?(DEFAULT_LOGGER)
22
+
23
+ # Default settings for Unicorn
24
+ DEFAULTS = {
25
+ :timeout => 60,
26
+ :listeners => [ Const::DEFAULT_LISTEN ],
27
+ :logger => DEFAULT_LOGGER,
28
+ :worker_processes => 1,
29
+ :after_fork => lambda { |server, worker_nr|
30
+ server.logger.info("worker=#{worker_nr} spawned pid=#{$$}")
31
+
32
+ # per-process listener ports for debugging/admin:
33
+ # "rescue nil" statement is needed because USR2 will
34
+ # cause the master process to reexecute itself and the
35
+ # per-worker ports can be taken, necessitating another
36
+ # HUP after QUIT-ing the original master:
37
+ # server.listen("127.0.0.1:#{8081 + worker_nr}") rescue nil
38
+ },
39
+ :before_fork => lambda { |server, worker_nr|
40
+ server.logger.info("worker=#{worker_nr} spawning...")
41
+ },
42
+ :before_exec => lambda { |server|
43
+ server.logger.info("forked child re-executing...")
44
+ },
45
+ :pid => nil,
46
+ :backlog => 1024,
47
+ :preload_app => false,
48
+ :stderr_path => nil,
49
+ :stdout_path => nil,
50
+ }
51
+
52
+ attr_reader :config_file #:nodoc:
53
+
54
+ def initialize(defaults = {}) #:nodoc:
55
+ @set = Hash.new(:unset)
56
+ use_defaults = defaults.delete(:use_defaults)
57
+ @config_file = defaults.delete(:config_file)
58
+ @config_file.freeze
59
+ @set.merge!(DEFAULTS) if use_defaults
60
+ defaults.each { |key, value| self.send(key, value) }
61
+ reload
62
+ end
63
+
64
+ def reload #:nodoc:
65
+ instance_eval(File.read(@config_file)) if @config_file
66
+ end
67
+
68
+ def commit!(server, options = {}) #:nodoc:
69
+ skip = options[:skip] || []
70
+ @set.each do |key, value|
71
+ (Symbol === value && value == :unset) and next
72
+ skip.include?(key) and next
73
+ setter = "#{key}="
74
+ if server.respond_to?(setter)
75
+ server.send(setter, value)
76
+ else
77
+ server.instance_variable_set("@#{key}", value)
78
+ end
79
+ end
80
+ end
81
+
82
+ def [](key) # :nodoc:
83
+ @set[key]
84
+ end
85
+
86
+ # Changes the listen() syscall backlog to +nr+ for yet-to-be-created
87
+ # sockets. Due to limitations of the OS, this cannot affect
88
+ # existing listener sockets in any way, sockets must be completely
89
+ # closed and rebound (inherited sockets preserve their existing
90
+ # backlog setting). Some operating systems allow negative values
91
+ # here to specify the maximum allowable value. See the listen(2)
92
+ # syscall documentation of your OS for the exact semantics of this.
93
+ #
94
+ # If you are running unicorn on multiple machines, lowering this number
95
+ # can help your load balancer detect when a machine is overloaded
96
+ # and give requests to a different machine.
97
+ def backlog(nr)
98
+ Integer === nr or raise ArgumentError,
99
+ "not an integer: backlog=#{nr.inspect}"
100
+ @set[:backlog] = nr
101
+ end
102
+
103
+ # sets object to the +new+ Logger-like object. The new logger-like
104
+ # object must respond to the following methods:
105
+ # +debug+, +info+, +warn+, +error+, +fatal+, +close+
106
+ def logger(new)
107
+ %w(debug info warn error fatal close).each do |m|
108
+ new.respond_to?(m) and next
109
+ raise ArgumentError, "logger=#{new} does not respond to method=#{m}"
110
+ end
111
+
112
+ @set[:logger] = new
113
+ end
114
+
115
+ # sets after_fork hook to a given block. This block will be called by
116
+ # the worker after forking. The following is an example hook which adds
117
+ # a per-process listener to every worker:
118
+ #
119
+ # after_fork do |server,worker_nr|
120
+ # # per-process listener ports for debugging/admin:
121
+ # # "rescue nil" statement is needed because USR2 will
122
+ # # cause the master process to reexecute itself and the
123
+ # # per-worker ports can be taken, necessitating another
124
+ # # HUP after QUIT-ing the original master:
125
+ # server.listen("127.0.0.1:#{9293 + worker_nr}") rescue nil
126
+ # end
127
+ def after_fork(&block)
128
+ set_hook(:after_fork, block)
129
+ end
130
+
131
+ # sets before_fork got be a given Proc object. This Proc
132
+ # object will be called by the master process before forking
133
+ # each worker.
134
+ def before_fork(&block)
135
+ set_hook(:before_fork, block)
136
+ end
137
+
138
+ # sets the before_exec hook to a given Proc object. This
139
+ # Proc object will be called by the master process right
140
+ # before exec()-ing the new unicorn binary. This is useful
141
+ # for freeing certain OS resources that you do NOT wish to
142
+ # share with the reexeced child process.
143
+ # There is no corresponding after_exec hook (for obvious reasons).
144
+ def before_exec(&block)
145
+ set_hook(:before_exec, block, 1)
146
+ end
147
+
148
+ # sets the timeout of worker processes to +seconds+. Workers
149
+ # handling the request/app.call/response cycle taking longer than
150
+ # this time period will be forcibly killed (via SIGKILL). This
151
+ # timeout is enforced by the master process itself and not subject
152
+ # to the scheduling limitations by the worker process.
153
+ def timeout(seconds)
154
+ Numeric === seconds or raise ArgumentError,
155
+ "not numeric: timeout=#{seconds.inspect}"
156
+ seconds > 0 or raise ArgumentError,
157
+ "not positive: timeout=#{seconds.inspect}"
158
+ @set[:timeout] = seconds
159
+ end
160
+
161
+ # sets the current number of worker_processes to +nr+. Each worker
162
+ # process will serve exactly one client at a time.
163
+ def worker_processes(nr)
164
+ Integer === nr or raise ArgumentError,
165
+ "not an integer: worker_processes=#{nr.inspect}"
166
+ nr >= 0 or raise ArgumentError,
167
+ "not non-negative: worker_processes=#{nr.inspect}"
168
+ @set[:worker_processes] = nr
169
+ end
170
+
171
+ # sets listeners to the given +addresses+, replacing or augmenting the
172
+ # current set. This is for the global listener pool shared by all
173
+ # worker processes. For per-worker listeners, see the after_fork example
174
+ def listeners(addresses)
175
+ Array === addresses or addresses = Array(addresses)
176
+ @set[:listeners] = addresses
177
+ end
178
+
179
+ # adds an +address+ to the existing listener set
180
+ def listen(address)
181
+ @set[:listeners] = [] unless Array === @set[:listeners]
182
+ @set[:listeners] << address
183
+ end
184
+
185
+ # sets the +path+ for the PID file of the unicorn master process
186
+ def pid(path); set_path(:pid, path); end
187
+
188
+ # Enabling this preloads an application before forking worker
189
+ # processes. This allows memory savings when using a
190
+ # copy-on-write-friendly GC but can cause bad things to happen when
191
+ # resources like sockets are opened at load time by the master
192
+ # process and shared by multiple children. People enabling this are
193
+ # highly encouraged to look at the before_fork/after_fork hooks to
194
+ # properly close/reopen sockets. Files opened for logging do not
195
+ # have to be reopened as (unbuffered-in-userspace) files opened with
196
+ # the File::APPEND flag are written to atomically on UNIX.
197
+ def preload_app(bool)
198
+ case bool
199
+ when TrueClass, FalseClass
200
+ @set[:preload_app] = bool
201
+ else
202
+ raise ArgumentError, "preload_app=#{bool.inspect} not a boolean"
203
+ end
204
+ end
205
+
206
+ # Allow redirecting $stderr to a given path. Unlike doing this from
207
+ # the shell, this allows the unicorn process to know the path its
208
+ # writing to and rotate the file if it is used for logging. The
209
+ # file will be opened with the File::APPEND flag and writes
210
+ # synchronized to the kernel (but not necessarily to _disk_) so
211
+ # multiple processes can safely append to it.
212
+ def stderr_path(path)
213
+ set_path(:stderr_path, path)
214
+ end
215
+
216
+ # Same as stderr_path, except for $stdout
217
+ def stdout_path(path)
218
+ set_path(:stdout_path, path)
219
+ end
220
+
221
+ private
222
+
223
+ def set_path(var, path) #:nodoc:
224
+ case path
225
+ when NilClass
226
+ when String
227
+ path = File.expand_path(path)
228
+ File.writable?(File.dirname(path)) or \
229
+ raise ArgumentError, "directory for #{var}=#{path} not writable"
230
+ else
231
+ raise ArgumentError
232
+ end
233
+ @set[var] = path
234
+ end
235
+
236
+ def set_hook(var, my_proc, req_arity = 2) #:nodoc:
237
+ case my_proc
238
+ when Proc
239
+ arity = my_proc.arity
240
+ (arity == req_arity) or \
241
+ raise ArgumentError,
242
+ "#{var}=#{my_proc.inspect} has invalid arity: " \
243
+ "#{arity} (need #{req_arity})"
244
+ when NilClass
245
+ my_proc = DEFAULTS[var]
246
+ else
247
+ raise ArgumentError, "invalid type: #{var}=#{my_proc.inspect}"
248
+ end
249
+ @set[var] = my_proc
250
+ end
251
+
252
+ end
253
+ end
@@ -0,0 +1,116 @@
1
+
2
+ module Unicorn
3
+
4
+ # Every standard HTTP code mapped to the appropriate message. These are
5
+ # used so frequently that they are placed directly in Unicorn for easy
6
+ # access rather than Unicorn::Const itself.
7
+ HTTP_STATUS_CODES = {
8
+ 100 => 'Continue',
9
+ 101 => 'Switching Protocols',
10
+ 200 => 'OK',
11
+ 201 => 'Created',
12
+ 202 => 'Accepted',
13
+ 203 => 'Non-Authoritative Information',
14
+ 204 => 'No Content',
15
+ 205 => 'Reset Content',
16
+ 206 => 'Partial Content',
17
+ 300 => 'Multiple Choices',
18
+ 301 => 'Moved Permanently',
19
+ 302 => 'Moved Temporarily',
20
+ 303 => 'See Other',
21
+ 304 => 'Not Modified',
22
+ 305 => 'Use Proxy',
23
+ 400 => 'Bad Request',
24
+ 401 => 'Unauthorized',
25
+ 402 => 'Payment Required',
26
+ 403 => 'Forbidden',
27
+ 404 => 'Not Found',
28
+ 405 => 'Method Not Allowed',
29
+ 406 => 'Not Acceptable',
30
+ 407 => 'Proxy Authentication Required',
31
+ 408 => 'Request Time-out',
32
+ 409 => 'Conflict',
33
+ 410 => 'Gone',
34
+ 411 => 'Length Required',
35
+ 412 => 'Precondition Failed',
36
+ 413 => 'Request Entity Too Large',
37
+ 414 => 'Request-URI Too Large',
38
+ 415 => 'Unsupported Media Type',
39
+ 500 => 'Internal Server Error',
40
+ 501 => 'Not Implemented',
41
+ 502 => 'Bad Gateway',
42
+ 503 => 'Service Unavailable',
43
+ 504 => 'Gateway Time-out',
44
+ 505 => 'HTTP Version not supported'
45
+ }
46
+
47
+ # Frequently used constants when constructing requests or responses. Many times
48
+ # the constant just refers to a string with the same contents. Using these constants
49
+ # gave about a 3% to 10% performance improvement over using the strings directly.
50
+ # Symbols did not really improve things much compared to constants.
51
+ #
52
+ # While Unicorn does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
53
+ # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
54
+ # too taxing on performance.
55
+ module Const
56
+ DATE="Date".freeze
57
+
58
+ # This is the part of the path after the SCRIPT_NAME.
59
+ PATH_INFO="PATH_INFO".freeze
60
+
61
+ # Request body
62
+ HTTP_BODY="HTTP_BODY".freeze
63
+
64
+ # This is the initial part that your handler is identified as by URIClassifier.
65
+ SCRIPT_NAME="SCRIPT_NAME".freeze
66
+
67
+ # The original URI requested by the client. Passed to URIClassifier to build PATH_INFO and SCRIPT_NAME.
68
+ REQUEST_URI='REQUEST_URI'.freeze
69
+ REQUEST_PATH='REQUEST_PATH'.freeze
70
+
71
+ UNICORN_VERSION="0.1.0".freeze
72
+
73
+ UNICORN_TMP_BASE="unicorn".freeze
74
+
75
+ DEFAULT_HOST = "0.0.0.0".freeze # default TCP listen host address
76
+ DEFAULT_PORT = "8080".freeze # default TCP listen port
77
+ DEFAULT_LISTEN = "#{DEFAULT_HOST}:#{DEFAULT_PORT}".freeze
78
+
79
+ # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff.
80
+ ERROR_404_RESPONSE="HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: Unicorn #{UNICORN_VERSION}\r\n\r\nNOT FOUND".freeze
81
+
82
+ CONTENT_LENGTH="CONTENT_LENGTH".freeze
83
+
84
+ # A common header for indicating the server is too busy. Not used yet.
85
+ ERROR_503_RESPONSE="HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
86
+
87
+ # The basic max request size we'll try to read.
88
+ CHUNK_SIZE=(16 * 1024)
89
+
90
+ # This is the maximum header that is allowed before a client is booted. The parser detects
91
+ # this, but we'd also like to do this as well.
92
+ MAX_HEADER=1024 * (80 + 32)
93
+
94
+ # Maximum request body size before it is moved out of memory and into a tempfile for reading.
95
+ MAX_BODY=MAX_HEADER
96
+
97
+ # A frozen format for this is about 15% faster
98
+ CONTENT_TYPE = "Content-Type".freeze
99
+ LAST_MODIFIED = "Last-Modified".freeze
100
+ ETAG = "ETag".freeze
101
+ REQUEST_METHOD="REQUEST_METHOD".freeze
102
+ GET="GET".freeze
103
+ HEAD="HEAD".freeze
104
+ # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
105
+ ETAG_FORMAT="\"%x-%x-%x\"".freeze
106
+ LINE_END="\r\n".freeze
107
+ REMOTE_ADDR="REMOTE_ADDR".freeze
108
+ HTTP_X_FORWARDED_FOR="HTTP_X_FORWARDED_FOR".freeze
109
+ HTTP_IF_MODIFIED_SINCE="HTTP_IF_MODIFIED_SINCE".freeze
110
+ HTTP_IF_NONE_MATCH="HTTP_IF_NONE_MATCH".freeze
111
+ REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
112
+ HOST = "HOST".freeze
113
+ CONNECTION = "Connection".freeze
114
+ end
115
+
116
+ end
@@ -0,0 +1,178 @@
1
+ require 'tempfile'
2
+ require 'uri'
3
+ require 'stringio'
4
+
5
+ # compiled extension
6
+ require 'unicorn/http11'
7
+
8
+ module Unicorn
9
+ #
10
+ # The HttpRequest.initialize method will convert any request that is larger than
11
+ # Const::MAX_BODY into a Tempfile and use that as the body. Otherwise it uses
12
+ # a StringIO object. To be safe, you should assume it works like a file.
13
+ #
14
+ class HttpRequest
15
+
16
+ def initialize(logger)
17
+ @logger = logger
18
+ @body = nil
19
+ @buffer = ' ' * Const::CHUNK_SIZE # initial size, may grow
20
+ @parser = HttpParser.new
21
+ @params = Hash.new
22
+ end
23
+
24
+ def reset
25
+ @parser.reset
26
+ @params.clear
27
+ @body.close rescue nil
28
+ @body = nil
29
+ end
30
+
31
+ #
32
+ # Does the majority of the IO processing. It has been written in
33
+ # Ruby using about 7 different IO processing strategies and no
34
+ # matter how it's done the performance just does not improve. It is
35
+ # currently carefully constructed to make sure that it gets the best
36
+ # possible performance, but anyone who thinks they can make it
37
+ # faster is more than welcome to take a crack at it.
38
+ #
39
+ # returns an environment hash suitable for Rack if successful
40
+ # This does minimal exception trapping and it is up to the caller
41
+ # to handle any socket errors (e.g. user aborted upload).
42
+ def read(socket)
43
+ data = String.new(read_socket(socket))
44
+ nparsed = 0
45
+
46
+ # Assumption: nparsed will always be less since data will get
47
+ # filled with more after each parsing. If it doesn't get more
48
+ # then there was a problem with the read operation on the client
49
+ # socket. Effect is to stop processing when the socket can't
50
+ # fill the buffer for further parsing.
51
+ while nparsed < data.length
52
+ nparsed = @parser.execute(@params, data, nparsed)
53
+
54
+ if @parser.finished?
55
+ # From http://www.ietf.org/rfc/rfc3875:
56
+ # "Script authors should be aware that the REMOTE_ADDR and
57
+ # REMOTE_HOST meta-variables (see sections 4.1.8 and 4.1.9)
58
+ # may not identify the ultimate source of the request. They
59
+ # identify the client for the immediate request to the server;
60
+ # that client may be a proxy, gateway, or other intermediary
61
+ # acting on behalf of the actual source client."
62
+ @params[Const::REMOTE_ADDR] = socket.unicorn_peeraddr
63
+
64
+ handle_body(socket) and return rack_env # success!
65
+ return nil # fail
66
+ else
67
+ # Parser is not done, queue up more data to read and continue
68
+ # parsing
69
+ data << read_socket(socket)
70
+ if data.length >= Const::MAX_HEADER
71
+ raise HttpParserError.new("HEADER is longer than allowed, " \
72
+ "aborting client early.")
73
+ end
74
+ end
75
+ end
76
+ nil # XXX bug?
77
+ rescue HttpParserError => e
78
+ @logger.error "HTTP parse error, malformed request " \
79
+ "(#{@params[Const::HTTP_X_FORWARDED_FOR] ||
80
+ socket.unicorn_peeraddr}): #{e.inspect}"
81
+ @logger.error "REQUEST DATA: #{data.inspect}\n---\n" \
82
+ "PARAMS: #{@params.inspect}\n---\n"
83
+ socket.closed? or socket.close rescue nil
84
+ nil
85
+ end
86
+
87
+ private
88
+
89
+ # Handles dealing with the rest of the request
90
+ # returns true if successful, false if not
91
+ def handle_body(socket)
92
+ http_body = @params[Const::HTTP_BODY]
93
+ content_length = @params[Const::CONTENT_LENGTH].to_i
94
+ remain = content_length - http_body.length
95
+
96
+ # must read more data to complete body
97
+ if remain < Const::MAX_BODY
98
+ # small body, just use that
99
+ @body = StringIO.new(http_body)
100
+ else # huge body, put it in a tempfile
101
+ @body = Tempfile.new(Const::UNICORN_TMP_BASE)
102
+ @body.binmode
103
+ @body.sync = true
104
+ @body.syswrite(http_body)
105
+ end
106
+
107
+ # Some clients (like FF1.0) report 0 for body and then send a body.
108
+ # This will probably truncate them but at least the request goes through
109
+ # usually.
110
+ if remain > 0
111
+ read_body(socket, remain) or return false # fail!
112
+ end
113
+ @body.rewind
114
+ @body.sysseek(0) if @body.respond_to?(:sysseek)
115
+
116
+ # in case read_body overread because the client tried to pipeline
117
+ # another request, we'll truncate it. Again, we don't do pipelining
118
+ # or keepalive
119
+ @body.truncate(content_length)
120
+ true
121
+ end
122
+
123
+ # Returns an environment which is rackable:
124
+ # http://rack.rubyforge.org/doc/files/SPEC.html
125
+ # Based on Rack's old Mongrel handler.
126
+ def rack_env
127
+ # It might be a dumbass full host request header
128
+ @params[Const::REQUEST_PATH] ||=
129
+ URI.parse(@params[Const::REQUEST_URI]).path
130
+ raise "No REQUEST PATH" unless @params[Const::REQUEST_PATH]
131
+
132
+ @params["QUERY_STRING"] ||= ''
133
+ @params.delete "HTTP_CONTENT_TYPE"
134
+ @params.delete "HTTP_CONTENT_LENGTH"
135
+ @params.update({ "rack.version" => [0,1],
136
+ "rack.input" => @body,
137
+ "rack.errors" => $stderr,
138
+ "rack.multithread" => false,
139
+ "rack.multiprocess" => true,
140
+ "rack.run_once" => false,
141
+ "rack.url_scheme" => "http",
142
+ Const::PATH_INFO => @params[Const::REQUEST_PATH],
143
+ Const::SCRIPT_NAME => "",
144
+ })
145
+ end
146
+
147
+ # Does the heavy lifting of properly reading the larger body requests in
148
+ # small chunks. It expects @body to be an IO object, socket to be valid,
149
+ # It also expects any initial part of the body that has been read to be in
150
+ # the @body already. It will return true if successful and false if not.
151
+ def read_body(socket, remain)
152
+ while remain > 0
153
+ # writes always write the requested amount on a POSIX filesystem
154
+ remain -= @body.syswrite(read_socket(socket))
155
+ end
156
+ true # success!
157
+ rescue Object => e
158
+ logger.error "Error reading HTTP body: #{e.inspect}"
159
+ socket.closed? or socket.close rescue nil
160
+
161
+ # Any errors means we should delete the file, including if the file
162
+ # is dumped. Truncate it ASAP to help avoid page flushes to disk.
163
+ @body.truncate(0) rescue nil
164
+ reset
165
+ false
166
+ end
167
+
168
+ # read(2) on "slow" devices like sockets can be interrupted by signals
169
+ def read_socket(socket)
170
+ begin
171
+ socket.sysread(Const::CHUNK_SIZE, @buffer)
172
+ rescue Errno::EINTR
173
+ retry
174
+ end
175
+ end
176
+
177
+ end
178
+ end