spider-gazelle 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,198 @@
1
+ # Thanks to Puma https://github.com/puma/puma/blob/master/lib/puma/const.rb
2
+ require "rack"
3
+
4
+ module SpiderGazelle
5
+ # UnsupportedOption = Class.new(RuntimeError)
6
+
7
+ # Every standard HTTP code mapped to the appropriate message. These are
8
+ # used so frequently that they are placed directly in SpiderGazelle for easy
9
+ # access rather than SpiderGazelle::Const itself.
10
+ HTTP_STATUS_CODES = Rack::Utils::HTTP_STATUS_CODES
11
+
12
+ # # For some HTTP status codes the client only expects headers.
13
+ # STATUS_WITH_NO_ENTITY_BODY = Hash[Rack::Utils::STATUS_WITH_NO_ENTITY_BODY.map { |s|
14
+ # [s, true]
15
+ # }]
16
+
17
+ # Based on http://rack.rubyforge.org/doc/SPEC.html
18
+ # Frequently used constants when constructing requests or responses. Many times
19
+ # the constant just refers to a string with the same contents. Using these constants
20
+ # gave about a 3% to 10% performance improvement over using the strings directly.
21
+ #
22
+ # The constants are frozen because Hash#[]= when called with a String key dups
23
+ # the String UNLESS the String is frozen. This saves us therefore 2 object
24
+ # allocations when creating the env hash later.
25
+ #
26
+ # While SpiderGazelle does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
27
+ # REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
28
+ # too taxing on performance.
29
+ module Const
30
+ SPIDER_GAZELLE_VERSION = VERSION = "0.1.7".freeze
31
+ # CODE_NAME = "Earl of Sandwich Partition"
32
+ SERVER = "SpiderGazelle".freeze
33
+
34
+ # FAST_TRACK_KA_TIMEOUT = 0.2
35
+
36
+ # # The default number of seconds for another request within a persistent
37
+ # # session.
38
+ # PERSISTENT_TIMEOUT = 20
39
+
40
+ # # The default number of seconds to wait until we get the first data
41
+ # # for the request
42
+ # FIRST_DATA_TIMEOUT = 30
43
+
44
+ # # How long to wait when getting some write blocking on the socket when
45
+ # # sending data back
46
+ # WRITE_TIMEOUT = 10
47
+
48
+ # DATE = "Date".freeze
49
+
50
+ SCRIPT_NAME = "SCRIPT_NAME".freeze
51
+
52
+ # The original URI requested by the client.
53
+ REQUEST_URI= "REQUEST_URI".freeze
54
+ REQUEST_PATH = "REQUEST_PATH".freeze
55
+
56
+ PATH_INFO = "PATH_INFO".freeze
57
+
58
+ # SPIDER_GAZELLE_TMP_BASE = "spider-gazelle".freeze
59
+
60
+ # # Indicate that we couldn"t parse the request
61
+ ERROR_400_RESPONSE = "HTTP/1.1 400 Bad Request\r\n\r\n"
62
+
63
+ # The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff.
64
+ ERROR_404_RESPONSE = "HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: #{SERVER} #{SPIDER_GAZELLE_VERSION}\r\n\r\nNOT FOUND".freeze
65
+
66
+ # The standard empty 408 response for requests that timed out.
67
+ ERROR_408_RESPONSE = "HTTP/1.1 408 Request Timeout\r\nConnection: close\r\nServer: #{SERVER} #{SPIDER_GAZELLE_VERSION}\r\n\r\n".freeze
68
+
69
+ # Indicate that there was an internal error, obviously.
70
+ ERROR_500_RESPONSE = "HTTP/1.1 500 Internal Server Error\r\n\r\n"
71
+
72
+ # A common header for indicating the server is too busy. Not used yet.
73
+ ERROR_503_RESPONSE = "HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
74
+
75
+ # # The basic max request size we"ll try to read.
76
+ # CHUNK_SIZE = 16 * 1024
77
+ HEX_SIZE_CHUNKED_RESPONSE = 16
78
+ INTERNAL_PIPE_BACKLOG = 16
79
+
80
+ # # This is the maximum header that is allowed before a client is booted. The parser detects
81
+ # # this, but we"d also like to do this as well.
82
+ # MAX_HEADER = 1024 * (80 + 32)
83
+
84
+ # # Maximum request body size before it is moved out of memory and into a tempfile for reading.
85
+ # MAX_BODY = MAX_HEADER
86
+
87
+ # # A frozen format for this is about 15% faster
88
+ # STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze
89
+
90
+ CONTENT_TYPE = "CONTENT_TYPE".freeze
91
+ HTTP_CONTENT_TYPE = "HTTP_CONTENT_TYPE".freeze
92
+ DEFAULT_TYPE = "text/plain".freeze
93
+
94
+ # LAST_MODIFIED = "Last-Modified".freeze
95
+ # ETAG = "ETag".freeze
96
+ # SLASH = "/".freeze
97
+ REQUEST_METHOD = "REQUEST_METHOD".freeze
98
+ # GET = "GET".freeze
99
+ HEAD = "HEAD".freeze
100
+ # # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
101
+ # ETAG_FORMAT = "\"%x-%x-%x\"".freeze
102
+ LINE_END = "\r\n".freeze
103
+ REMOTE_ADDR = "REMOTE_ADDR".freeze
104
+ # HTTP_X_FORWARDED_FOR = "HTTP_X_FORWARDED_FOR".freeze
105
+ # HTTP_IF_MODIFIED_SINCE = "HTTP_IF_MODIFIED_SINCE".freeze
106
+ # HTTP_IF_NONE_MATCH = "HTTP_IF_NONE_MATCH".freeze
107
+ # REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
108
+ # HOST = "HOST".freeze
109
+
110
+ HTTP_META = "HTTP_".freeze
111
+ # Portion of the request following a "?" (empty if none)
112
+ QUERY_STRING = "QUERY_STRING".freeze
113
+ # Required although HTTP_HOST takes priority if set
114
+ SERVER_NAME = "SERVER_NAME".freeze
115
+ # Required (set in spider.rb init)
116
+ SERVER_PORT = "SERVER_PORT".freeze
117
+ HTTP_HOST = "HTTP_HOST".freeze
118
+ PORT_80 = "80".freeze
119
+ PORT_443 = "443".freeze
120
+ LOCALHOST = "localhost".freeze
121
+
122
+ HTTP_STATUS_DEFAULT = proc { "CUSTOM" }
123
+ SERVER_PROTOCOL = "SERVER_PROTOCOL".freeze
124
+ HTTP_11 = "HTTP/1.1".freeze
125
+ # HTTP_10 = "HTTP/1.0".freeze
126
+
127
+ SERVER_SOFTWARE = "SERVER_SOFTWARE".freeze
128
+ GATEWAY_INTERFACE = "GATEWAY_INTERFACE".freeze
129
+ CGI_VER = "CGI/1.2".freeze
130
+
131
+ # STOP_COMMAND = "?".freeze
132
+ # HALT_COMMAND = "!".freeze
133
+ # RESTART_COMMAND = "R".freeze
134
+
135
+ RACK = "rack".freeze
136
+ RACK_VERSION = "rack.version".freeze
137
+ RACK_ERRORS = "rack.errors".freeze
138
+ RACK_MULTITHREAD = "rack.multithread".freeze
139
+ RACK_MULTIPROCESS = "rack.multiprocess".freeze
140
+ RACK_RUN_ONCE = "rack.run_once".freeze
141
+
142
+ # An IO like object containing all the request body
143
+ RACK_INPUT = "rack.input".freeze
144
+ # http or https
145
+ RACK_URL_SCHEME = "rack.url_scheme".freeze
146
+ # RACK_AFTER_REPLY = "rack.after_reply".freeze
147
+ # SPIDER_GAZELLE_SOCKET = "spider-gazelle.socket".freeze
148
+ # SPIDER_GAZELLE_CONFIG = "spider-gazelle.config".freeze
149
+
150
+ HTTP = "http".freeze
151
+ HTTPS = "https".freeze
152
+
153
+ # HTTPS_KEY = "HTTPS".freeze
154
+
155
+ # HTTP_VERSION = "HTTP_VERSION".freeze
156
+ # HTTP_CONNECTION = "HTTP_CONNECTION".freeze
157
+
158
+ # HTTP_11_200 = "HTTP/1.1 200 OK\r\n".freeze
159
+ # HTTP_10_200 = "HTTP/1.0 200 OK\r\n".freeze
160
+
161
+ CLOSE = "close".freeze
162
+ KEEP_ALIVE = "Keep-Alive".freeze
163
+
164
+ CONTENT_LENGTH = "CONTENT_LENGTH".freeze
165
+ HTTP_CONTENT_LENGTH = "HTTP_CONTENT_LENGTH".freeze
166
+ CONTENT_LENGTH2 = "Content-Length".freeze
167
+ # CONTENT_LENGTH_S = "Content-Length: ".freeze
168
+ TRANSFER_ENCODING = "Transfer-Encoding".freeze
169
+
170
+ CONNECTION = "Connection".freeze
171
+ # CONNECTION_CLOSE = "Connection: close\r\n".freeze
172
+ # CONNECTION_KEEP_ALIVE = "Connection: Keep-Alive\r\n".freeze
173
+
174
+ CHUNKED = "chunked".freeze
175
+ # TRANSFER_ENCODING_CHUNKED = "Transfer-Encoding: chunked\r\n".freeze
176
+ CLOSE_CHUNKED = "0\r\n\r\n".freeze
177
+
178
+ COMMA = ", ".freeze
179
+ COLON = ": ".freeze
180
+ NEWLINE = "\n".freeze
181
+ EMPTY = "".freeze
182
+
183
+ ZERO = "0".freeze
184
+
185
+ # Hijacking IO is supported
186
+ HIJACK_P = "rack.hijack?".freeze
187
+ # Callback for indicating that this socket will be hijacked
188
+ HIJACK = "rack.hijack".freeze
189
+ # The object for performing IO on after hijack is called
190
+ HIJACK_IO = "rack.hijack_io".freeze
191
+
192
+ ASYNC = "async.callback".freeze
193
+
194
+ USE_TLS = 'T'.freeze
195
+ NO_TLS = 'F'.freeze
196
+ KILL_GAZELLE = 'k'.freeze
197
+ end
198
+ end
@@ -1,172 +1,154 @@
1
+ require 'spider-gazelle/const'
1
2
  require 'set'
2
3
 
3
-
4
4
  module SpiderGazelle
5
- class Gazelle
6
-
7
-
8
- HTTP_META = 'HTTP_'.freeze
9
- REQUEST_METHOD = 'REQUEST_METHOD'.freeze # GET, POST, etc
10
- COMMA = ', '.freeze
11
-
5
+ class Gazelle
6
+ include Const
12
7
 
13
- attr_reader :parser_cache, :connections, :logger
8
+ attr_reader :parser_cache, :connections, :logger
14
9
 
10
+ def set_instance_type(inst)
11
+ inst.type = :request
12
+ end
15
13
 
16
- def set_instance_type(inst)
17
- inst.type = :request
18
- end
19
-
20
-
14
+ def initialize(loop, logger, mode)
15
+ @gazelle = loop
16
+ # Set of active connections on this thread
17
+ @connections = Set.new
18
+ # Stale parser objects cached for reuse
19
+ @parser_cache = []
21
20
 
22
- def initialize(loop, logger, mode)
23
- @gazelle = loop
24
- @connections = Set.new # Set of active connections on this thread
25
- @parser_cache = [] # Stale parser objects cached for reuse
21
+ @mode = mode
22
+ @logger = logger
23
+ @app_cache = {}
24
+ @connection_queue = ::Libuv::Q::ResolvedPromise.new @gazelle, true
26
25
 
27
- @mode = mode
28
- @logger = logger
29
- @app_cache = {}
30
- @connection_queue = ::Libuv::Q::ResolvedPromise.new(@gazelle, true)
26
+ # A single parser instance for processing requests for each gazelle
27
+ @parser = ::HttpParser::Parser.new self
28
+ @set_instance_type = method :set_instance_type
31
29
 
32
- # A single parser instance for processing requests for each gazelle
33
- @parser = ::HttpParser::Parser.new(self)
34
- @set_instance_type = method(:set_instance_type)
30
+ # Single progress callback for each gazelle
31
+ @on_progress = method :on_progress
32
+ end
35
33
 
36
- # Single progress callback for each gazelle
37
- @on_progress = method(:on_progress)
34
+ def run
35
+ @gazelle.run do |logger|
36
+ logger.progress do |level, errorid, error|
37
+ begin
38
+ msg = "Gazelle log: #{level}: #{errorid}\n#{error.message}\n#{error.backtrace.join("\n") if error.backtrace}\n"
39
+ @logger.error msg
40
+ rescue Exception
41
+ puts 'error in gazelle logger'
42
+ end
38
43
  end
39
44
 
40
- def run
41
- @gazelle.run do |logger|
42
- logger.progress do |level, errorid, error|
43
- begin
44
- msg = "Gazelle log: #{level}: #{errorid}\n#{error.message}\n#{error.backtrace.join("\n") if error.backtrace}\n"
45
- @logger.error msg
46
- puts msg
47
- rescue Exception
48
- p 'error in gazelle logger'
49
- end
50
- end
51
-
52
- unless @mode == :no_ipc
53
- # A pipe used to forward connections to different threads
54
- @socket_server = @gazelle.pipe(true)
55
- @socket_server.connect(DELEGATE_PIPE) do
56
- @socket_server.progress &method(:new_connection)
57
- @socket_server.start_read2
58
- end
59
-
60
- # A pipe used to signal various control commands (shutdown, etc)
61
- @signal_server = @gazelle.pipe
62
- @signal_server.connect(SIGNAL_PIPE) do
63
- @signal_server.progress &method(:process_signal)
64
- @signal_server.start_read
65
- end
66
- end
67
- end
45
+ unless @mode == :no_ipc
46
+ # A pipe used to forward connections to different threads
47
+ @socket_server = @gazelle.pipe true
48
+ @socket_server.connect(DELEGATE_PIPE) do
49
+ @socket_server.progress &method(:new_connection)
50
+ @socket_server.start_read2
51
+ end
52
+
53
+ # A pipe used to signal various control commands (shutdown, etc)
54
+ @signal_server = @gazelle.pipe
55
+ @signal_server.connect(SIGNAL_PIPE) do
56
+ @signal_server.progress &method(:process_signal)
57
+ @signal_server.start_read
58
+ end
68
59
  end
60
+ end
61
+ end
69
62
 
63
+ # HTTP Parser callbacks:
64
+ def on_message_begin(parser)
65
+ @connection.start_parsing
66
+ end
70
67
 
71
- # HTTP Parser callbacks:
72
- def on_message_begin(parser)
73
- @connection.start_parsing
74
- end
68
+ def on_url(parser, url)
69
+ @connection.parsing.url << url
70
+ end
75
71
 
76
- def on_url(parser, url)
77
- @connection.parsing.url << url
78
- end
72
+ def on_header_field(parser, header)
73
+ req = @connection.parsing
74
+ req.header.frozen? ? req.header = header : req.header << header
75
+ end
79
76
 
80
- def on_header_field(parser, header)
81
- req = @connection.parsing
82
- if req.header.frozen?
83
- req.header = header
84
- else
85
- req.header << header
86
- end
77
+ def on_header_value(parser, value)
78
+ req = @connection.parsing
79
+ if req.header.frozen?
80
+ req.env[req.header] << value
81
+ else
82
+ header = req.header
83
+ header.upcase!
84
+ header.gsub!('-', '_')
85
+ header.prepend(HTTP_META)
86
+ header.freeze
87
+ if req.env[header]
88
+ req.env[header] << COMMA
89
+ req.env[header] << value
90
+ else
91
+ req.env[header] = value
87
92
  end
93
+ end
94
+ end
88
95
 
89
- def on_header_value(parser, value)
90
- req = @connection.parsing
91
- if req.header.frozen?
92
- req.env[req.header] << value
93
- else
94
- header = req.header
95
- header.upcase!
96
- header.gsub!('-', '_')
97
- header.prepend(HTTP_META)
98
- header.freeze
99
- if req.env[header]
100
- req.env[header] << COMMA
101
- req.env[header] << value
102
- else
103
- req.env[header] = value
104
- end
105
- end
106
- end
96
+ def on_headers_complete(parser)
97
+ @connection.parsing.env[REQUEST_METHOD] = @connection.state.http_method.to_s
98
+ end
107
99
 
108
- def on_headers_complete(parser)
109
- @connection.parsing.env[REQUEST_METHOD] = @connection.state.http_method.to_s
110
- end
100
+ def on_body(parser, data)
101
+ @connection.parsing.body << data
102
+ end
111
103
 
112
- def on_body(parser, data)
113
- @connection.parsing.body << data
114
- end
104
+ def on_message_complete(parser)
105
+ @connection.finished_parsing
106
+ end
115
107
 
116
- def on_message_complete(parser)
117
- @connection.finished_parsing
118
- end
108
+ def discard(connection)
109
+ @connections.delete(connection)
110
+ state = connection.state
111
+ state.reset!
112
+ @parser_cache << state
113
+ end
119
114
 
120
- def discard(connection)
121
- @connections.delete(connection)
122
- @parser_cache << connection.state
123
- end
115
+ protected
124
116
 
117
+ def on_progress(data, socket)
118
+ # Keep track of which connection we are processing for the callbacks
119
+ @connection = socket.storage
125
120
 
126
- protected
121
+ # Check for errors during the parsing of the request
122
+ @connection.parsing_error if @parser.parse(@connection.state, data)
123
+ end
127
124
 
125
+ def new_connection(data, socket)
126
+ # Data == "TLS_indicator Port APP_ID"
127
+ tls, port, app_id = data.split(' ', 3)
128
+ app = @app_cache[app_id.to_sym] ||= AppStore.get(app_id)
129
+ inst = @parser_cache.pop || ::HttpParser::Parser.new_instance(&@set_instance_type)
128
130
 
129
- def on_progress(data, socket)
130
- # Keep track of which connection we are processing for the callbacks
131
- @connection = socket.storage
131
+ # process any data coming from the socket
132
+ socket.progress @on_progress
133
+ # TODO:: Allow some globals for supplying the certs
134
+ socket.start_tls(:server => true) if tls == 'T'
132
135
 
133
- # Check for errors during the parsing of the request
134
- if @parser.parse(@connection.state, data)
135
- @connection.parsing_error
136
- end
137
- end
136
+ # Keep track of the connection
137
+ connection = Connection.new self, @gazelle, socket, port, inst, app, @connection_queue
138
+ @connections.add connection
139
+ # This allows us to re-use the one proc for parsing
140
+ socket.storage = connection
138
141
 
139
- def new_connection(data, socket)
140
- # Data == "TLS_indicator Port APP_ID"
141
- tls, port, app_id = data.split(' ', 3)
142
- app = @app_cache[app_id.to_sym] ||= AppStore.get(app_id)
143
- inst = @parser_cache.pop || ::HttpParser::Parser.new_instance(&@set_instance_type)
144
-
145
- # process any data coming from the socket
146
- socket.progress @on_progress
147
- if tls == 'T'
148
- # TODO:: Allow some globals for supplying the certs
149
- socket.start_tls(:server => true)
150
- end
151
-
152
- # Keep track of the connection
153
- connection = Connection.new self, @gazelle, socket, port, inst, app, @connection_queue
154
- @connections.add connection
155
- socket.storage = connection # This allows us to re-use the one proc for parsing
156
-
157
- socket.start_read
158
- end
142
+ socket.start_read
143
+ end
159
144
 
160
- def process_signal(data, pipe)
161
- if data == Spider::KILL_GAZELLE
162
- shutdown
163
- end
164
- end
145
+ def process_signal(data, pipe)
146
+ shutdown if data == KILL_GAZELLE
147
+ end
165
148
 
166
- def shutdown
167
- # TODO:: do this nicely
168
- # Need to signal the connections to close
169
- @gazelle.stop
170
- end
149
+ def shutdown
150
+ # TODO:: do this nicely. Need to signal the connections to close
151
+ @gazelle.stop
171
152
  end
153
+ end
172
154
  end