spider-gazelle 0.1.6 → 0.1.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -0
- data/lib/rack/handler/spider-gazelle.rb +30 -32
- data/lib/spider-gazelle.rb +4 -6
- data/lib/spider-gazelle/app_store.rb +48 -53
- data/lib/spider-gazelle/binding.rb +48 -54
- data/lib/spider-gazelle/connection.rb +286 -322
- data/lib/spider-gazelle/const.rb +198 -0
- data/lib/spider-gazelle/gazelle.rb +121 -139
- data/lib/spider-gazelle/request.rb +95 -141
- data/lib/spider-gazelle/spider.rb +335 -351
- data/lib/spider-gazelle/upgrades/websocket.rb +88 -98
- data/spider-gazelle.gemspec +8 -3
- metadata +3 -4
- data/lib/spider-gazelle/error.rb +0 -16
- data/lib/spider-gazelle/version.rb +0 -3
@@ -0,0 +1,198 @@
|
|
1
|
+
# Thanks to Puma https://github.com/puma/puma/blob/master/lib/puma/const.rb
|
2
|
+
require "rack"
|
3
|
+
|
4
|
+
module SpiderGazelle
|
5
|
+
# UnsupportedOption = Class.new(RuntimeError)
|
6
|
+
|
7
|
+
# Every standard HTTP code mapped to the appropriate message. These are
|
8
|
+
# used so frequently that they are placed directly in SpiderGazelle for easy
|
9
|
+
# access rather than SpiderGazelle::Const itself.
|
10
|
+
HTTP_STATUS_CODES = Rack::Utils::HTTP_STATUS_CODES
|
11
|
+
|
12
|
+
# # For some HTTP status codes the client only expects headers.
|
13
|
+
# STATUS_WITH_NO_ENTITY_BODY = Hash[Rack::Utils::STATUS_WITH_NO_ENTITY_BODY.map { |s|
|
14
|
+
# [s, true]
|
15
|
+
# }]
|
16
|
+
|
17
|
+
# Based on http://rack.rubyforge.org/doc/SPEC.html
|
18
|
+
# Frequently used constants when constructing requests or responses. Many times
|
19
|
+
# the constant just refers to a string with the same contents. Using these constants
|
20
|
+
# gave about a 3% to 10% performance improvement over using the strings directly.
|
21
|
+
#
|
22
|
+
# The constants are frozen because Hash#[]= when called with a String key dups
|
23
|
+
# the String UNLESS the String is frozen. This saves us therefore 2 object
|
24
|
+
# allocations when creating the env hash later.
|
25
|
+
#
|
26
|
+
# While SpiderGazelle does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
|
27
|
+
# REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
|
28
|
+
# too taxing on performance.
|
29
|
+
module Const
|
30
|
+
SPIDER_GAZELLE_VERSION = VERSION = "0.1.7".freeze
|
31
|
+
# CODE_NAME = "Earl of Sandwich Partition"
|
32
|
+
SERVER = "SpiderGazelle".freeze
|
33
|
+
|
34
|
+
# FAST_TRACK_KA_TIMEOUT = 0.2
|
35
|
+
|
36
|
+
# # The default number of seconds for another request within a persistent
|
37
|
+
# # session.
|
38
|
+
# PERSISTENT_TIMEOUT = 20
|
39
|
+
|
40
|
+
# # The default number of seconds to wait until we get the first data
|
41
|
+
# # for the request
|
42
|
+
# FIRST_DATA_TIMEOUT = 30
|
43
|
+
|
44
|
+
# # How long to wait when getting some write blocking on the socket when
|
45
|
+
# # sending data back
|
46
|
+
# WRITE_TIMEOUT = 10
|
47
|
+
|
48
|
+
# DATE = "Date".freeze
|
49
|
+
|
50
|
+
SCRIPT_NAME = "SCRIPT_NAME".freeze
|
51
|
+
|
52
|
+
# The original URI requested by the client.
|
53
|
+
REQUEST_URI= "REQUEST_URI".freeze
|
54
|
+
REQUEST_PATH = "REQUEST_PATH".freeze
|
55
|
+
|
56
|
+
PATH_INFO = "PATH_INFO".freeze
|
57
|
+
|
58
|
+
# SPIDER_GAZELLE_TMP_BASE = "spider-gazelle".freeze
|
59
|
+
|
60
|
+
# # Indicate that we couldn"t parse the request
|
61
|
+
ERROR_400_RESPONSE = "HTTP/1.1 400 Bad Request\r\n\r\n"
|
62
|
+
|
63
|
+
# The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff.
|
64
|
+
ERROR_404_RESPONSE = "HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: #{SERVER} #{SPIDER_GAZELLE_VERSION}\r\n\r\nNOT FOUND".freeze
|
65
|
+
|
66
|
+
# The standard empty 408 response for requests that timed out.
|
67
|
+
ERROR_408_RESPONSE = "HTTP/1.1 408 Request Timeout\r\nConnection: close\r\nServer: #{SERVER} #{SPIDER_GAZELLE_VERSION}\r\n\r\n".freeze
|
68
|
+
|
69
|
+
# Indicate that there was an internal error, obviously.
|
70
|
+
ERROR_500_RESPONSE = "HTTP/1.1 500 Internal Server Error\r\n\r\n"
|
71
|
+
|
72
|
+
# A common header for indicating the server is too busy. Not used yet.
|
73
|
+
ERROR_503_RESPONSE = "HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
|
74
|
+
|
75
|
+
# # The basic max request size we"ll try to read.
|
76
|
+
# CHUNK_SIZE = 16 * 1024
|
77
|
+
HEX_SIZE_CHUNKED_RESPONSE = 16
|
78
|
+
INTERNAL_PIPE_BACKLOG = 16
|
79
|
+
|
80
|
+
# # This is the maximum header that is allowed before a client is booted. The parser detects
|
81
|
+
# # this, but we"d also like to do this as well.
|
82
|
+
# MAX_HEADER = 1024 * (80 + 32)
|
83
|
+
|
84
|
+
# # Maximum request body size before it is moved out of memory and into a tempfile for reading.
|
85
|
+
# MAX_BODY = MAX_HEADER
|
86
|
+
|
87
|
+
# # A frozen format for this is about 15% faster
|
88
|
+
# STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze
|
89
|
+
|
90
|
+
CONTENT_TYPE = "CONTENT_TYPE".freeze
|
91
|
+
HTTP_CONTENT_TYPE = "HTTP_CONTENT_TYPE".freeze
|
92
|
+
DEFAULT_TYPE = "text/plain".freeze
|
93
|
+
|
94
|
+
# LAST_MODIFIED = "Last-Modified".freeze
|
95
|
+
# ETAG = "ETag".freeze
|
96
|
+
# SLASH = "/".freeze
|
97
|
+
REQUEST_METHOD = "REQUEST_METHOD".freeze
|
98
|
+
# GET = "GET".freeze
|
99
|
+
HEAD = "HEAD".freeze
|
100
|
+
# # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
|
101
|
+
# ETAG_FORMAT = "\"%x-%x-%x\"".freeze
|
102
|
+
LINE_END = "\r\n".freeze
|
103
|
+
REMOTE_ADDR = "REMOTE_ADDR".freeze
|
104
|
+
# HTTP_X_FORWARDED_FOR = "HTTP_X_FORWARDED_FOR".freeze
|
105
|
+
# HTTP_IF_MODIFIED_SINCE = "HTTP_IF_MODIFIED_SINCE".freeze
|
106
|
+
# HTTP_IF_NONE_MATCH = "HTTP_IF_NONE_MATCH".freeze
|
107
|
+
# REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
|
108
|
+
# HOST = "HOST".freeze
|
109
|
+
|
110
|
+
HTTP_META = "HTTP_".freeze
|
111
|
+
# Portion of the request following a "?" (empty if none)
|
112
|
+
QUERY_STRING = "QUERY_STRING".freeze
|
113
|
+
# Required although HTTP_HOST takes priority if set
|
114
|
+
SERVER_NAME = "SERVER_NAME".freeze
|
115
|
+
# Required (set in spider.rb init)
|
116
|
+
SERVER_PORT = "SERVER_PORT".freeze
|
117
|
+
HTTP_HOST = "HTTP_HOST".freeze
|
118
|
+
PORT_80 = "80".freeze
|
119
|
+
PORT_443 = "443".freeze
|
120
|
+
LOCALHOST = "localhost".freeze
|
121
|
+
|
122
|
+
HTTP_STATUS_DEFAULT = proc { "CUSTOM" }
|
123
|
+
SERVER_PROTOCOL = "SERVER_PROTOCOL".freeze
|
124
|
+
HTTP_11 = "HTTP/1.1".freeze
|
125
|
+
# HTTP_10 = "HTTP/1.0".freeze
|
126
|
+
|
127
|
+
SERVER_SOFTWARE = "SERVER_SOFTWARE".freeze
|
128
|
+
GATEWAY_INTERFACE = "GATEWAY_INTERFACE".freeze
|
129
|
+
CGI_VER = "CGI/1.2".freeze
|
130
|
+
|
131
|
+
# STOP_COMMAND = "?".freeze
|
132
|
+
# HALT_COMMAND = "!".freeze
|
133
|
+
# RESTART_COMMAND = "R".freeze
|
134
|
+
|
135
|
+
RACK = "rack".freeze
|
136
|
+
RACK_VERSION = "rack.version".freeze
|
137
|
+
RACK_ERRORS = "rack.errors".freeze
|
138
|
+
RACK_MULTITHREAD = "rack.multithread".freeze
|
139
|
+
RACK_MULTIPROCESS = "rack.multiprocess".freeze
|
140
|
+
RACK_RUN_ONCE = "rack.run_once".freeze
|
141
|
+
|
142
|
+
# An IO like object containing all the request body
|
143
|
+
RACK_INPUT = "rack.input".freeze
|
144
|
+
# http or https
|
145
|
+
RACK_URL_SCHEME = "rack.url_scheme".freeze
|
146
|
+
# RACK_AFTER_REPLY = "rack.after_reply".freeze
|
147
|
+
# SPIDER_GAZELLE_SOCKET = "spider-gazelle.socket".freeze
|
148
|
+
# SPIDER_GAZELLE_CONFIG = "spider-gazelle.config".freeze
|
149
|
+
|
150
|
+
HTTP = "http".freeze
|
151
|
+
HTTPS = "https".freeze
|
152
|
+
|
153
|
+
# HTTPS_KEY = "HTTPS".freeze
|
154
|
+
|
155
|
+
# HTTP_VERSION = "HTTP_VERSION".freeze
|
156
|
+
# HTTP_CONNECTION = "HTTP_CONNECTION".freeze
|
157
|
+
|
158
|
+
# HTTP_11_200 = "HTTP/1.1 200 OK\r\n".freeze
|
159
|
+
# HTTP_10_200 = "HTTP/1.0 200 OK\r\n".freeze
|
160
|
+
|
161
|
+
CLOSE = "close".freeze
|
162
|
+
KEEP_ALIVE = "Keep-Alive".freeze
|
163
|
+
|
164
|
+
CONTENT_LENGTH = "CONTENT_LENGTH".freeze
|
165
|
+
HTTP_CONTENT_LENGTH = "HTTP_CONTENT_LENGTH".freeze
|
166
|
+
CONTENT_LENGTH2 = "Content-Length".freeze
|
167
|
+
# CONTENT_LENGTH_S = "Content-Length: ".freeze
|
168
|
+
TRANSFER_ENCODING = "Transfer-Encoding".freeze
|
169
|
+
|
170
|
+
CONNECTION = "Connection".freeze
|
171
|
+
# CONNECTION_CLOSE = "Connection: close\r\n".freeze
|
172
|
+
# CONNECTION_KEEP_ALIVE = "Connection: Keep-Alive\r\n".freeze
|
173
|
+
|
174
|
+
CHUNKED = "chunked".freeze
|
175
|
+
# TRANSFER_ENCODING_CHUNKED = "Transfer-Encoding: chunked\r\n".freeze
|
176
|
+
CLOSE_CHUNKED = "0\r\n\r\n".freeze
|
177
|
+
|
178
|
+
COMMA = ", ".freeze
|
179
|
+
COLON = ": ".freeze
|
180
|
+
NEWLINE = "\n".freeze
|
181
|
+
EMPTY = "".freeze
|
182
|
+
|
183
|
+
ZERO = "0".freeze
|
184
|
+
|
185
|
+
# Hijacking IO is supported
|
186
|
+
HIJACK_P = "rack.hijack?".freeze
|
187
|
+
# Callback for indicating that this socket will be hijacked
|
188
|
+
HIJACK = "rack.hijack".freeze
|
189
|
+
# The object for performing IO on after hijack is called
|
190
|
+
HIJACK_IO = "rack.hijack_io".freeze
|
191
|
+
|
192
|
+
ASYNC = "async.callback".freeze
|
193
|
+
|
194
|
+
USE_TLS = 'T'.freeze
|
195
|
+
NO_TLS = 'F'.freeze
|
196
|
+
KILL_GAZELLE = 'k'.freeze
|
197
|
+
end
|
198
|
+
end
|
@@ -1,172 +1,154 @@
|
|
1
|
+
require 'spider-gazelle/const'
|
1
2
|
require 'set'
|
2
3
|
|
3
|
-
|
4
4
|
module SpiderGazelle
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
HTTP_META = 'HTTP_'.freeze
|
9
|
-
REQUEST_METHOD = 'REQUEST_METHOD'.freeze # GET, POST, etc
|
10
|
-
COMMA = ', '.freeze
|
11
|
-
|
5
|
+
class Gazelle
|
6
|
+
include Const
|
12
7
|
|
13
|
-
|
8
|
+
attr_reader :parser_cache, :connections, :logger
|
14
9
|
|
10
|
+
def set_instance_type(inst)
|
11
|
+
inst.type = :request
|
12
|
+
end
|
15
13
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
14
|
+
def initialize(loop, logger, mode)
|
15
|
+
@gazelle = loop
|
16
|
+
# Set of active connections on this thread
|
17
|
+
@connections = Set.new
|
18
|
+
# Stale parser objects cached for reuse
|
19
|
+
@parser_cache = []
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
21
|
+
@mode = mode
|
22
|
+
@logger = logger
|
23
|
+
@app_cache = {}
|
24
|
+
@connection_queue = ::Libuv::Q::ResolvedPromise.new @gazelle, true
|
26
25
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
@connection_queue = ::Libuv::Q::ResolvedPromise.new(@gazelle, true)
|
26
|
+
# A single parser instance for processing requests for each gazelle
|
27
|
+
@parser = ::HttpParser::Parser.new self
|
28
|
+
@set_instance_type = method :set_instance_type
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
# Single progress callback for each gazelle
|
31
|
+
@on_progress = method :on_progress
|
32
|
+
end
|
35
33
|
|
36
|
-
|
37
|
-
|
34
|
+
def run
|
35
|
+
@gazelle.run do |logger|
|
36
|
+
logger.progress do |level, errorid, error|
|
37
|
+
begin
|
38
|
+
msg = "Gazelle log: #{level}: #{errorid}\n#{error.message}\n#{error.backtrace.join("\n") if error.backtrace}\n"
|
39
|
+
@logger.error msg
|
40
|
+
rescue Exception
|
41
|
+
puts 'error in gazelle logger'
|
42
|
+
end
|
38
43
|
end
|
39
44
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
@socket_server = @gazelle.pipe(true)
|
55
|
-
@socket_server.connect(DELEGATE_PIPE) do
|
56
|
-
@socket_server.progress &method(:new_connection)
|
57
|
-
@socket_server.start_read2
|
58
|
-
end
|
59
|
-
|
60
|
-
# A pipe used to signal various control commands (shutdown, etc)
|
61
|
-
@signal_server = @gazelle.pipe
|
62
|
-
@signal_server.connect(SIGNAL_PIPE) do
|
63
|
-
@signal_server.progress &method(:process_signal)
|
64
|
-
@signal_server.start_read
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
45
|
+
unless @mode == :no_ipc
|
46
|
+
# A pipe used to forward connections to different threads
|
47
|
+
@socket_server = @gazelle.pipe true
|
48
|
+
@socket_server.connect(DELEGATE_PIPE) do
|
49
|
+
@socket_server.progress &method(:new_connection)
|
50
|
+
@socket_server.start_read2
|
51
|
+
end
|
52
|
+
|
53
|
+
# A pipe used to signal various control commands (shutdown, etc)
|
54
|
+
@signal_server = @gazelle.pipe
|
55
|
+
@signal_server.connect(SIGNAL_PIPE) do
|
56
|
+
@signal_server.progress &method(:process_signal)
|
57
|
+
@signal_server.start_read
|
58
|
+
end
|
68
59
|
end
|
60
|
+
end
|
61
|
+
end
|
69
62
|
|
63
|
+
# HTTP Parser callbacks:
|
64
|
+
def on_message_begin(parser)
|
65
|
+
@connection.start_parsing
|
66
|
+
end
|
70
67
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
end
|
68
|
+
def on_url(parser, url)
|
69
|
+
@connection.parsing.url << url
|
70
|
+
end
|
75
71
|
|
76
|
-
|
77
|
-
|
78
|
-
|
72
|
+
def on_header_field(parser, header)
|
73
|
+
req = @connection.parsing
|
74
|
+
req.header.frozen? ? req.header = header : req.header << header
|
75
|
+
end
|
79
76
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
77
|
+
def on_header_value(parser, value)
|
78
|
+
req = @connection.parsing
|
79
|
+
if req.header.frozen?
|
80
|
+
req.env[req.header] << value
|
81
|
+
else
|
82
|
+
header = req.header
|
83
|
+
header.upcase!
|
84
|
+
header.gsub!('-', '_')
|
85
|
+
header.prepend(HTTP_META)
|
86
|
+
header.freeze
|
87
|
+
if req.env[header]
|
88
|
+
req.env[header] << COMMA
|
89
|
+
req.env[header] << value
|
90
|
+
else
|
91
|
+
req.env[header] = value
|
87
92
|
end
|
93
|
+
end
|
94
|
+
end
|
88
95
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
req.env[req.header] << value
|
93
|
-
else
|
94
|
-
header = req.header
|
95
|
-
header.upcase!
|
96
|
-
header.gsub!('-', '_')
|
97
|
-
header.prepend(HTTP_META)
|
98
|
-
header.freeze
|
99
|
-
if req.env[header]
|
100
|
-
req.env[header] << COMMA
|
101
|
-
req.env[header] << value
|
102
|
-
else
|
103
|
-
req.env[header] = value
|
104
|
-
end
|
105
|
-
end
|
106
|
-
end
|
96
|
+
def on_headers_complete(parser)
|
97
|
+
@connection.parsing.env[REQUEST_METHOD] = @connection.state.http_method.to_s
|
98
|
+
end
|
107
99
|
|
108
|
-
|
109
|
-
|
110
|
-
|
100
|
+
def on_body(parser, data)
|
101
|
+
@connection.parsing.body << data
|
102
|
+
end
|
111
103
|
|
112
|
-
|
113
|
-
|
114
|
-
|
104
|
+
def on_message_complete(parser)
|
105
|
+
@connection.finished_parsing
|
106
|
+
end
|
115
107
|
|
116
|
-
|
117
|
-
|
118
|
-
|
108
|
+
def discard(connection)
|
109
|
+
@connections.delete(connection)
|
110
|
+
state = connection.state
|
111
|
+
state.reset!
|
112
|
+
@parser_cache << state
|
113
|
+
end
|
119
114
|
|
120
|
-
|
121
|
-
@connections.delete(connection)
|
122
|
-
@parser_cache << connection.state
|
123
|
-
end
|
115
|
+
protected
|
124
116
|
|
117
|
+
def on_progress(data, socket)
|
118
|
+
# Keep track of which connection we are processing for the callbacks
|
119
|
+
@connection = socket.storage
|
125
120
|
|
126
|
-
|
121
|
+
# Check for errors during the parsing of the request
|
122
|
+
@connection.parsing_error if @parser.parse(@connection.state, data)
|
123
|
+
end
|
127
124
|
|
125
|
+
def new_connection(data, socket)
|
126
|
+
# Data == "TLS_indicator Port APP_ID"
|
127
|
+
tls, port, app_id = data.split(' ', 3)
|
128
|
+
app = @app_cache[app_id.to_sym] ||= AppStore.get(app_id)
|
129
|
+
inst = @parser_cache.pop || ::HttpParser::Parser.new_instance(&@set_instance_type)
|
128
130
|
|
129
|
-
|
130
|
-
|
131
|
-
|
131
|
+
# process any data coming from the socket
|
132
|
+
socket.progress @on_progress
|
133
|
+
# TODO:: Allow some globals for supplying the certs
|
134
|
+
socket.start_tls(:server => true) if tls == 'T'
|
132
135
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
136
|
+
# Keep track of the connection
|
137
|
+
connection = Connection.new self, @gazelle, socket, port, inst, app, @connection_queue
|
138
|
+
@connections.add connection
|
139
|
+
# This allows us to re-use the one proc for parsing
|
140
|
+
socket.storage = connection
|
138
141
|
|
139
|
-
|
140
|
-
|
141
|
-
tls, port, app_id = data.split(' ', 3)
|
142
|
-
app = @app_cache[app_id.to_sym] ||= AppStore.get(app_id)
|
143
|
-
inst = @parser_cache.pop || ::HttpParser::Parser.new_instance(&@set_instance_type)
|
144
|
-
|
145
|
-
# process any data coming from the socket
|
146
|
-
socket.progress @on_progress
|
147
|
-
if tls == 'T'
|
148
|
-
# TODO:: Allow some globals for supplying the certs
|
149
|
-
socket.start_tls(:server => true)
|
150
|
-
end
|
151
|
-
|
152
|
-
# Keep track of the connection
|
153
|
-
connection = Connection.new self, @gazelle, socket, port, inst, app, @connection_queue
|
154
|
-
@connections.add connection
|
155
|
-
socket.storage = connection # This allows us to re-use the one proc for parsing
|
156
|
-
|
157
|
-
socket.start_read
|
158
|
-
end
|
142
|
+
socket.start_read
|
143
|
+
end
|
159
144
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
end
|
164
|
-
end
|
145
|
+
def process_signal(data, pipe)
|
146
|
+
shutdown if data == KILL_GAZELLE
|
147
|
+
end
|
165
148
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
@gazelle.stop
|
170
|
-
end
|
149
|
+
def shutdown
|
150
|
+
# TODO:: do this nicely. Need to signal the connections to close
|
151
|
+
@gazelle.stop
|
171
152
|
end
|
153
|
+
end
|
172
154
|
end
|