spider-gazelle 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/sg +1 -64
- data/lib/rack/handler/spider-gazelle.rb +17 -26
- data/lib/rack/lock_patch.rb +27 -27
- data/lib/spider-gazelle.rb +165 -16
- data/lib/spider-gazelle/gazelle.rb +151 -134
- data/lib/spider-gazelle/gazelle/app_store.rb +86 -0
- data/lib/spider-gazelle/gazelle/http1.rb +496 -0
- data/lib/spider-gazelle/gazelle/request.rb +155 -0
- data/lib/spider-gazelle/logger.rb +122 -0
- data/lib/spider-gazelle/options.rb +213 -0
- data/lib/spider-gazelle/reactor.rb +69 -0
- data/lib/spider-gazelle/signaller.rb +214 -0
- data/lib/spider-gazelle/signaller/signal_parser.rb +66 -0
- data/lib/spider-gazelle/spider.rb +305 -343
- data/lib/spider-gazelle/spider/binding.rb +80 -0
- data/lib/spider-gazelle/upgrades/websocket.rb +92 -88
- data/spec/http1_spec.rb +173 -0
- data/spec/rack_lock_spec.rb +97 -97
- data/spider-gazelle.gemspec +6 -6
- metadata +24 -17
- data/lib/spider-gazelle/app_store.rb +0 -64
- data/lib/spider-gazelle/binding.rb +0 -53
- data/lib/spider-gazelle/connection.rb +0 -371
- data/lib/spider-gazelle/const.rb +0 -206
- data/lib/spider-gazelle/request.rb +0 -103
data/lib/spider-gazelle/const.rb
DELETED
@@ -1,206 +0,0 @@
|
|
1
|
-
# Thanks to Puma https://github.com/puma/puma/blob/master/lib/puma/const.rb
|
2
|
-
require "rack"
|
3
|
-
|
4
|
-
module SpiderGazelle
|
5
|
-
# UnsupportedOption = Class.new(RuntimeError)
|
6
|
-
|
7
|
-
# Every standard HTTP code mapped to the appropriate message. These are
|
8
|
-
# used so frequently that they are placed directly in SpiderGazelle for easy
|
9
|
-
# access rather than SpiderGazelle::Const itself.
|
10
|
-
HTTP_STATUS_CODES = Rack::Utils::HTTP_STATUS_CODES
|
11
|
-
|
12
|
-
# # For some HTTP status codes the client only expects headers.
|
13
|
-
# STATUS_WITH_NO_ENTITY_BODY = Hash[Rack::Utils::STATUS_WITH_NO_ENTITY_BODY.map { |s|
|
14
|
-
# [s, true]
|
15
|
-
# }]
|
16
|
-
|
17
|
-
# Based on http://rack.rubyforge.org/doc/SPEC.html
|
18
|
-
# Frequently used constants when constructing requests or responses. Many times
|
19
|
-
# the constant just refers to a string with the same contents. Using these constants
|
20
|
-
# gave about a 3% to 10% performance improvement over using the strings directly.
|
21
|
-
#
|
22
|
-
# The constants are frozen because Hash#[]= when called with a String key dups
|
23
|
-
# the String UNLESS the String is frozen. This saves us therefore 2 object
|
24
|
-
# allocations when creating the env hash later.
|
25
|
-
#
|
26
|
-
# While SpiderGazelle does try to emulate the CGI/1.2 protocol, it does not use the REMOTE_IDENT,
|
27
|
-
# REMOTE_USER, or REMOTE_HOST parameters since those are either a security problem or
|
28
|
-
# too taxing on performance.
|
29
|
-
module Const
|
30
|
-
SPIDER_GAZELLE_VERSION = VERSION = "1.2.0".freeze
|
31
|
-
# CODE_NAME = "Earl of Sandwich Partition"
|
32
|
-
SERVER = "SpiderGazelle".freeze
|
33
|
-
|
34
|
-
# FAST_TRACK_KA_TIMEOUT = 0.2
|
35
|
-
|
36
|
-
# # The default number of seconds for another request within a persistent
|
37
|
-
# # session.
|
38
|
-
# PERSISTENT_TIMEOUT = 20
|
39
|
-
|
40
|
-
# # The default number of seconds to wait until we get the first data
|
41
|
-
# # for the request
|
42
|
-
# FIRST_DATA_TIMEOUT = 30
|
43
|
-
|
44
|
-
# # How long to wait when getting some write blocking on the socket when
|
45
|
-
# # sending data back
|
46
|
-
# WRITE_TIMEOUT = 10
|
47
|
-
|
48
|
-
# DATE = "Date".freeze
|
49
|
-
|
50
|
-
SCRIPT_NAME = "SCRIPT_NAME".freeze
|
51
|
-
|
52
|
-
# The original URI requested by the client.
|
53
|
-
REQUEST_URI= "REQUEST_URI".freeze
|
54
|
-
REQUEST_PATH = "REQUEST_PATH".freeze
|
55
|
-
|
56
|
-
PATH_INFO = "PATH_INFO".freeze
|
57
|
-
|
58
|
-
# SPIDER_GAZELLE_TMP_BASE = "spider-gazelle".freeze
|
59
|
-
|
60
|
-
# # Indicate that we couldn"t parse the request
|
61
|
-
ERROR_400_RESPONSE = "HTTP/1.1 400 Bad Request\r\n\r\n".freeze
|
62
|
-
|
63
|
-
# The standard empty 404 response for bad requests. Use Error4040Handler for custom stuff.
|
64
|
-
ERROR_404_RESPONSE = "HTTP/1.1 404 Not Found\r\nConnection: close\r\nServer: #{SERVER} #{SPIDER_GAZELLE_VERSION}\r\n\r\nNOT FOUND".freeze
|
65
|
-
|
66
|
-
# The standard empty 408 response for requests that timed out.
|
67
|
-
ERROR_408_RESPONSE = "HTTP/1.1 408 Request Timeout\r\nConnection: close\r\nServer: #{SERVER} #{SPIDER_GAZELLE_VERSION}\r\n\r\n".freeze
|
68
|
-
|
69
|
-
# Indicate that there was an internal error, obviously.
|
70
|
-
ERROR_500_RESPONSE = "HTTP/1.1 500 Internal Server Error\r\n\r\n".freeze
|
71
|
-
EMPTY_RESPONSE = [''.freeze].freeze
|
72
|
-
|
73
|
-
# A common header for indicating the server is too busy. Not used yet.
|
74
|
-
ERROR_503_RESPONSE = "HTTP/1.1 503 Service Unavailable\r\n\r\nBUSY".freeze
|
75
|
-
|
76
|
-
# # The basic max request size we"ll try to read.
|
77
|
-
# CHUNK_SIZE = 16 * 1024
|
78
|
-
HEX_SIZE_CHUNKED_RESPONSE = 16
|
79
|
-
INTERNAL_PIPE_BACKLOG = 16
|
80
|
-
|
81
|
-
# # This is the maximum header that is allowed before a client is booted. The parser detects
|
82
|
-
# # this, but we"d also like to do this as well.
|
83
|
-
# MAX_HEADER = 1024 * (80 + 32)
|
84
|
-
|
85
|
-
# # Maximum request body size before it is moved out of memory and into a tempfile for reading.
|
86
|
-
# MAX_BODY = MAX_HEADER
|
87
|
-
|
88
|
-
# # A frozen format for this is about 15% faster
|
89
|
-
# STATUS_FORMAT = "HTTP/1.1 %d %s\r\nConnection: close\r\n".freeze
|
90
|
-
|
91
|
-
CONTENT_TYPE = "CONTENT_TYPE".freeze
|
92
|
-
HTTP_CONTENT_TYPE = "HTTP_CONTENT_TYPE".freeze
|
93
|
-
DEFAULT_TYPE = "text/plain".freeze
|
94
|
-
|
95
|
-
# LAST_MODIFIED = "Last-Modified".freeze
|
96
|
-
ETAG = "ETag".freeze
|
97
|
-
# SLASH = "/".freeze
|
98
|
-
REQUEST_METHOD = "REQUEST_METHOD".freeze
|
99
|
-
# GET = "GET".freeze
|
100
|
-
HEAD = "HEAD".freeze
|
101
|
-
# # ETag is based on the apache standard of hex mtime-size-inode (inode is 0 on win32)
|
102
|
-
# ETAG_FORMAT = "\"%x-%x-%x\"".freeze
|
103
|
-
LINE_END = "\r\n".freeze
|
104
|
-
REMOTE_ADDR = "REMOTE_ADDR".freeze
|
105
|
-
# HTTP_X_FORWARDED_FOR = "HTTP_X_FORWARDED_FOR".freeze
|
106
|
-
# HTTP_IF_MODIFIED_SINCE = "HTTP_IF_MODIFIED_SINCE".freeze
|
107
|
-
# HTTP_IF_NONE_MATCH = "HTTP_IF_NONE_MATCH".freeze
|
108
|
-
# REDIRECT = "HTTP/1.1 302 Found\r\nLocation: %s\r\nConnection: close\r\n\r\n".freeze
|
109
|
-
# HOST = "HOST".freeze
|
110
|
-
|
111
|
-
HTTP_META = "HTTP_".freeze
|
112
|
-
# Portion of the request following a "?" (empty if none)
|
113
|
-
QUERY_STRING = "QUERY_STRING".freeze
|
114
|
-
# Required although HTTP_HOST takes priority if set
|
115
|
-
SERVER_NAME = "SERVER_NAME".freeze
|
116
|
-
# Required (set in spider.rb init)
|
117
|
-
SERVER_PORT = "SERVER_PORT".freeze
|
118
|
-
HTTP_HOST = "HTTP_HOST".freeze
|
119
|
-
PORT_80 = "80".freeze
|
120
|
-
PORT_443 = "443".freeze
|
121
|
-
LOCALHOST = "localhost".freeze
|
122
|
-
|
123
|
-
HTTP_STATUS_DEFAULT = proc { "CUSTOM" }
|
124
|
-
SERVER_PROTOCOL = "SERVER_PROTOCOL".freeze
|
125
|
-
HTTP_11 = "HTTP/1.1".freeze
|
126
|
-
# HTTP_10 = "HTTP/1.0".freeze
|
127
|
-
|
128
|
-
SERVER_SOFTWARE = "SERVER_SOFTWARE".freeze
|
129
|
-
GATEWAY_INTERFACE = "GATEWAY_INTERFACE".freeze
|
130
|
-
CGI_VER = "CGI/1.2".freeze
|
131
|
-
|
132
|
-
# STOP_COMMAND = "?".freeze
|
133
|
-
# HALT_COMMAND = "!".freeze
|
134
|
-
# RESTART_COMMAND = "R".freeze
|
135
|
-
|
136
|
-
RACK = "rack".freeze
|
137
|
-
RACK_VERSION = "rack.version".freeze
|
138
|
-
RACK_ERRORS = "rack.errors".freeze
|
139
|
-
RACK_MULTITHREAD = "rack.multithread".freeze
|
140
|
-
RACK_MULTIPROCESS = "rack.multiprocess".freeze
|
141
|
-
RACK_RUN_ONCE = "rack.run_once".freeze
|
142
|
-
|
143
|
-
# An IO like object containing all the request body
|
144
|
-
RACK_INPUT = "rack.input".freeze
|
145
|
-
# http or https
|
146
|
-
RACK_URL_SCHEME = "rack.url_scheme".freeze
|
147
|
-
# RACK_AFTER_REPLY = "rack.after_reply".freeze
|
148
|
-
# SPIDER_GAZELLE_SOCKET = "spider-gazelle.socket".freeze
|
149
|
-
# SPIDER_GAZELLE_CONFIG = "spider-gazelle.config".freeze
|
150
|
-
|
151
|
-
ASCII_8BIT = "ASCII-8BIT".freeze
|
152
|
-
|
153
|
-
HTTP = "http".freeze
|
154
|
-
HTTPS = "https".freeze
|
155
|
-
|
156
|
-
# HTTPS_KEY = "HTTPS".freeze
|
157
|
-
|
158
|
-
# HTTP_VERSION = "HTTP_VERSION".freeze
|
159
|
-
# HTTP_CONNECTION = "HTTP_CONNECTION".freeze
|
160
|
-
|
161
|
-
# HTTP_11_200 = "HTTP/1.1 200 OK\r\n".freeze
|
162
|
-
# HTTP_10_200 = "HTTP/1.0 200 OK\r\n".freeze
|
163
|
-
|
164
|
-
CLOSE = "close".freeze
|
165
|
-
KEEP_ALIVE = "Keep-Alive".freeze
|
166
|
-
|
167
|
-
CONTENT_LENGTH = "CONTENT_LENGTH".freeze
|
168
|
-
HTTP_CONTENT_LENGTH = "HTTP_CONTENT_LENGTH".freeze
|
169
|
-
CONTENT_LENGTH2 = "Content-Length".freeze
|
170
|
-
# CONTENT_LENGTH_S = "Content-Length: ".freeze
|
171
|
-
TRANSFER_ENCODING = "Transfer-Encoding".freeze
|
172
|
-
|
173
|
-
CONNECTION = "Connection".freeze
|
174
|
-
# CONNECTION_CLOSE = "Connection: close\r\n".freeze
|
175
|
-
# CONNECTION_KEEP_ALIVE = "Connection: Keep-Alive\r\n".freeze
|
176
|
-
|
177
|
-
CHUNKED = "chunked".freeze
|
178
|
-
# TRANSFER_ENCODING_CHUNKED = "Transfer-Encoding: chunked\r\n".freeze
|
179
|
-
CLOSE_CHUNKED = "0\r\n\r\n".freeze
|
180
|
-
|
181
|
-
COMMA = ", ".freeze
|
182
|
-
COLON_SPACE = ": ".freeze
|
183
|
-
COLON = ":".freeze
|
184
|
-
DASH = "-".freeze
|
185
|
-
UNDERSCORE = "_".freeze
|
186
|
-
SPACE = " ".freeze
|
187
|
-
NEWLINE = "\n".freeze
|
188
|
-
EMPTY = "".freeze
|
189
|
-
QUESTION_MARK = "?".freeze
|
190
|
-
|
191
|
-
ZERO = "0".freeze
|
192
|
-
|
193
|
-
# Hijacking IO is supported
|
194
|
-
HIJACK_P = "rack.hijack?".freeze
|
195
|
-
# Callback for indicating that this socket will be hijacked
|
196
|
-
HIJACK = "rack.hijack".freeze
|
197
|
-
# The object for performing IO on after hijack is called
|
198
|
-
HIJACK_IO = "rack.hijack_io".freeze
|
199
|
-
|
200
|
-
ASYNC = "async.callback".freeze
|
201
|
-
|
202
|
-
USE_TLS = 'T'.freeze
|
203
|
-
NO_TLS = 'F'.freeze
|
204
|
-
KILL_GAZELLE = 'k'.freeze
|
205
|
-
end
|
206
|
-
end
|
@@ -1,103 +0,0 @@
|
|
1
|
-
require 'spider-gazelle/const'
|
2
|
-
require 'stringio'
|
3
|
-
|
4
|
-
module SpiderGazelle
|
5
|
-
class Request
|
6
|
-
include Const
|
7
|
-
|
8
|
-
# TODO:: Add HTTP headers to the env and capitalise them and prefix them with HTTP_
|
9
|
-
# convert - signs to underscores
|
10
|
-
PROTO_ENV = {
|
11
|
-
RACK_VERSION => ::Rack::VERSION, # Should be an array of integers
|
12
|
-
RACK_ERRORS => $stderr, # An error stream that supports: puts, write and flush
|
13
|
-
RACK_MULTITHREAD => true, # can the app be simultaneously invoked by another thread?
|
14
|
-
RACK_MULTIPROCESS => false, # will the app be simultaneously be invoked in a separate process?
|
15
|
-
RACK_RUN_ONCE => false, # this isn't CGI so will always be false
|
16
|
-
|
17
|
-
SCRIPT_NAME => ENV['SCRIPT_NAME'] || EMPTY, # The virtual path of the app base (empty if root)
|
18
|
-
SERVER_PROTOCOL => HTTP_11,
|
19
|
-
|
20
|
-
GATEWAY_INTERFACE => CGI_VER,
|
21
|
-
SERVER_SOFTWARE => SERVER
|
22
|
-
}
|
23
|
-
|
24
|
-
attr_accessor :env, :url, :header, :body, :keep_alive, :upgrade, :deferred
|
25
|
-
attr_reader :hijacked, :response
|
26
|
-
|
27
|
-
def initialize(connection, app)
|
28
|
-
@app = app
|
29
|
-
@body = ''
|
30
|
-
@header = ''
|
31
|
-
@url = ''
|
32
|
-
@env = PROTO_ENV.dup
|
33
|
-
@loop = connection.loop
|
34
|
-
@env[SERVER_PORT] = connection.port
|
35
|
-
@env[REMOTE_ADDR] = connection.remote_ip
|
36
|
-
@env[RACK_URL_SCHEME] = connection.tls ? HTTPS : HTTP
|
37
|
-
@env[ASYNC] = connection.async_callback
|
38
|
-
end
|
39
|
-
|
40
|
-
def execute!
|
41
|
-
@env[CONTENT_LENGTH] = @env.delete(HTTP_CONTENT_LENGTH) || @body.length
|
42
|
-
@env[CONTENT_TYPE] = @env.delete(HTTP_CONTENT_TYPE) || DEFAULT_TYPE
|
43
|
-
@env[REQUEST_URI] = @url.freeze
|
44
|
-
|
45
|
-
# For Rack::Lint on 1.9, ensure that the encoding is always for spec
|
46
|
-
@body.force_encoding(ASCII_8BIT) if @body.respond_to?(:force_encoding)
|
47
|
-
@env[RACK_INPUT] = StringIO.new @body
|
48
|
-
|
49
|
-
# Break the request into its components
|
50
|
-
query_start = @url.index QUESTION_MARK
|
51
|
-
if query_start
|
52
|
-
path = @url[0...query_start].freeze
|
53
|
-
@env[PATH_INFO] = path
|
54
|
-
@env[REQUEST_PATH] = path
|
55
|
-
@env[QUERY_STRING] = @url[query_start + 1..-1].freeze
|
56
|
-
else
|
57
|
-
@env[PATH_INFO] = @url
|
58
|
-
@env[REQUEST_PATH] = @url
|
59
|
-
@env[QUERY_STRING] = EMPTY
|
60
|
-
end
|
61
|
-
|
62
|
-
# Grab the host name from the request
|
63
|
-
if host = @env[HTTP_HOST]
|
64
|
-
if colon = host.index(COLON)
|
65
|
-
@env[SERVER_NAME] = host[0, colon]
|
66
|
-
@env[SERVER_PORT] = host[colon+1, host.bytesize]
|
67
|
-
else
|
68
|
-
@env[SERVER_NAME] = host
|
69
|
-
@env[SERVER_PORT] = PROTO_ENV[SERVER_PORT]
|
70
|
-
end
|
71
|
-
else
|
72
|
-
@env[SERVER_NAME] = LOCALHOST
|
73
|
-
@env[SERVER_PORT] = PROTO_ENV[SERVER_PORT]
|
74
|
-
end
|
75
|
-
|
76
|
-
# Provide hijack options if this is an upgrade request
|
77
|
-
if @upgrade == true
|
78
|
-
@env[HIJACK_P] = true
|
79
|
-
@env[HIJACK] = method :hijack
|
80
|
-
end
|
81
|
-
|
82
|
-
# Execute the request
|
83
|
-
@response = catch(:async) { @app.call @env }
|
84
|
-
if @response.nil? || @response[0] == -1
|
85
|
-
@deferred = @loop.defer
|
86
|
-
|
87
|
-
# close the body for deferred responses
|
88
|
-
unless @response.nil?
|
89
|
-
body = @response[2]
|
90
|
-
body.close if body.respond_to?(:close)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
@response
|
94
|
-
end
|
95
|
-
|
96
|
-
protected
|
97
|
-
|
98
|
-
def hijack
|
99
|
-
@hijacked = @loop.defer
|
100
|
-
@env[HIJACK_IO] = @hijacked.promise
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|