log-courier 1.1 → 1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/log-courier/client.rb +23 -16
- data/lib/log-courier/client_tls.rb +66 -46
- data/lib/log-courier/event_queue.rb +34 -32
- data/lib/log-courier/server.rb +27 -10
- data/lib/log-courier/server_tcp.rb +95 -54
- data/lib/log-courier/server_zmq.rb +280 -127
- metadata +18 -4
data/lib/log-courier/server.rb
CHANGED
@@ -40,6 +40,7 @@ module LogCourier
|
|
40
40
|
}.merge!(options)
|
41
41
|
|
42
42
|
@logger = @options[:logger]
|
43
|
+
@logger['plugin'] = 'input/courier'
|
43
44
|
|
44
45
|
case @options[:transport]
|
45
46
|
when 'tcp', 'tls'
|
@@ -49,11 +50,12 @@ module LogCourier
|
|
49
50
|
require 'log-courier/server_zmq'
|
50
51
|
@server = ServerZmq.new(@options)
|
51
52
|
else
|
52
|
-
|
53
|
+
fail 'input/courier: \'transport\' must be tcp, tls, plainzmq or zmq'
|
53
54
|
end
|
54
55
|
|
55
|
-
# Grab the port back
|
56
|
+
# Grab the port back and update the logger context
|
56
57
|
@port = @server.port
|
58
|
+
@logger['port'] = @port unless @logger.nil?
|
57
59
|
|
58
60
|
# Load the json adapter
|
59
61
|
@json_adapter = MultiJson.adapter.instance
|
@@ -75,7 +77,11 @@ module LogCourier
|
|
75
77
|
when 'JDAT'
|
76
78
|
process_jdat message, comm, event_queue
|
77
79
|
else
|
78
|
-
|
80
|
+
if comm.peer.nil?
|
81
|
+
@logger.warn 'Unknown message received', :from => 'unknown' unless @logger.nil?
|
82
|
+
else
|
83
|
+
@logger.warn 'Unknown message received', :from => comm.peer unless @logger.nil?
|
84
|
+
end
|
79
85
|
# Don't kill a client that sends a bad message
|
80
86
|
# Just reject it and let it send it again, potentially to another server
|
81
87
|
comm.send '????', ''
|
@@ -93,17 +99,21 @@ module LogCourier
|
|
93
99
|
server_thread.join
|
94
100
|
end
|
95
101
|
end
|
102
|
+
return
|
96
103
|
end
|
97
104
|
|
105
|
+
private
|
106
|
+
|
98
107
|
def process_ping(message, comm)
|
99
108
|
# Size of message should be 0
|
100
109
|
if message.length != 0
|
101
|
-
|
110
|
+
fail ProtocolError, "unexpected data attached to ping message (#{message.length})"
|
102
111
|
end
|
103
112
|
|
104
113
|
# PONG!
|
105
114
|
# NOTE: comm.send can raise a Timeout::Error of its own
|
106
115
|
comm.send 'PONG', ''
|
116
|
+
return
|
107
117
|
end
|
108
118
|
|
109
119
|
def process_jdat(message, comm, event_queue)
|
@@ -114,11 +124,17 @@ module LogCourier
|
|
114
124
|
# This allows the client to know what is being acknowledged
|
115
125
|
# Nonce is 16 so check we have enough
|
116
126
|
if message.length < 17
|
117
|
-
|
127
|
+
fail ProtocolError, "JDAT message too small (#{message.length})"
|
118
128
|
end
|
119
129
|
|
120
130
|
nonce = message[0...16]
|
121
131
|
|
132
|
+
if !@logger.nil? && @logger.debug?
|
133
|
+
nonce_str = nonce.each_byte.map do |b|
|
134
|
+
b.to_s(16).rjust(2, '0')
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
122
138
|
# The remainder of the message is the compressed data block
|
123
139
|
message = StringIO.new Zlib::Inflate.inflate(message[16...message.length])
|
124
140
|
|
@@ -136,7 +152,7 @@ module LogCourier
|
|
136
152
|
# Finished!
|
137
153
|
break
|
138
154
|
elsif length_buf.length < 4
|
139
|
-
|
155
|
+
fail ProtocolError, "JDAT length extraction failed (#{ret} #{length_buf.length})"
|
140
156
|
end
|
141
157
|
|
142
158
|
length = length_buf.unpack('N').first
|
@@ -145,7 +161,7 @@ module LogCourier
|
|
145
161
|
ret = message.read length, data_buf
|
146
162
|
if ret.nil? or data_buf.length < length
|
147
163
|
@logger.warn()
|
148
|
-
|
164
|
+
fail ProtocolError, "JDAT message extraction failed #{ret} #{data_buf.length}"
|
149
165
|
end
|
150
166
|
|
151
167
|
data_buf.force_encoding('utf-8')
|
@@ -161,7 +177,7 @@ module LogCourier
|
|
161
177
|
begin
|
162
178
|
event = @json_adapter.load(data_buf, @json_options)
|
163
179
|
rescue MultiJson::ParseError => e
|
164
|
-
@logger.warn
|
180
|
+
@logger.warn e, :hint => 'JSON parse failure, falling back to plain-text' unless @logger.nil?
|
165
181
|
event = { 'message' => data_buf }
|
166
182
|
end
|
167
183
|
|
@@ -171,7 +187,7 @@ module LogCourier
|
|
171
187
|
rescue TimeoutError
|
172
188
|
# Full pipeline, partial ack
|
173
189
|
# NOTE: comm.send can raise a Timeout::Error of its own
|
174
|
-
@logger.debug
|
190
|
+
@logger.debug 'Partially acknowledging message', :nonce => nonce_str.join, :sequence => sequence if !@logger.nil? && @logger.debug?
|
175
191
|
comm.send 'ACKN', [nonce, sequence].pack('A*N')
|
176
192
|
ack_timeout = Time.now.to_i + 5
|
177
193
|
retry
|
@@ -182,8 +198,9 @@ module LogCourier
|
|
182
198
|
|
183
199
|
# Acknowledge the full message
|
184
200
|
# NOTE: comm.send can raise a Timeout::Error
|
185
|
-
@logger.debug
|
201
|
+
@logger.debug 'Acknowledging message', :nonce => nonce_str.join, :sequence => sequence if !@logger.nil? && @logger.debug?
|
186
202
|
comm.send 'ACKN', [nonce, sequence].pack('A*N')
|
203
|
+
return
|
187
204
|
end
|
188
205
|
end
|
189
206
|
end
|
@@ -24,13 +24,25 @@ require 'thread'
|
|
24
24
|
module LogCourier
|
25
25
|
# Wrap around TCPServer to grab last error for use in reporting which peer had an error
|
26
26
|
class ExtendedTCPServer < TCPServer
|
27
|
-
|
27
|
+
attr_reader :peer
|
28
|
+
|
29
|
+
def initialise
|
30
|
+
reset_peer
|
31
|
+
super
|
32
|
+
end
|
33
|
+
|
34
|
+
# Save the peer
|
28
35
|
def accept
|
29
36
|
sock = super
|
30
37
|
peer = sock.peeraddr(:numeric)
|
31
|
-
|
38
|
+
@peer = "#{peer[2]}:#{peer[1]}"
|
32
39
|
return sock
|
33
40
|
end
|
41
|
+
|
42
|
+
def reset_peer
|
43
|
+
@peer = 'unknown'
|
44
|
+
return
|
45
|
+
end
|
34
46
|
end
|
35
47
|
|
36
48
|
# TLS transport implementation for server
|
@@ -57,11 +69,11 @@ module LogCourier
|
|
57
69
|
|
58
70
|
if @options[:transport] == 'tls'
|
59
71
|
[:ssl_certificate, :ssl_key].each do |k|
|
60
|
-
|
72
|
+
fail "input/courier: '#{k}' is required" if @options[k].nil?
|
61
73
|
end
|
62
74
|
|
63
75
|
if @options[:ssl_verify] and (!@options[:ssl_verify_default_ca] && @options[:ssl_verify_ca].nil?)
|
64
|
-
|
76
|
+
fail 'input/courier: Either \'ssl_verify_default_ca\' or \'ssl_verify_ca\' must be specified when ssl_verify is true'
|
65
77
|
end
|
66
78
|
end
|
67
79
|
|
@@ -94,16 +106,18 @@ module LogCourier
|
|
94
106
|
ssl.verify_mode = OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT
|
95
107
|
end
|
96
108
|
|
109
|
+
# Create the OpenSSL server - set start_immediately to false so we can multithread handshake
|
97
110
|
@server = OpenSSL::SSL::SSLServer.new(@tcp_server, ssl)
|
111
|
+
@server.start_immediately = false
|
98
112
|
else
|
99
113
|
@server = @tcp_server
|
100
114
|
end
|
101
115
|
|
102
116
|
if @options[:port] == 0
|
103
|
-
@logger.warn '
|
117
|
+
@logger.warn 'Ephemeral port allocated', :transport => @options[:transport], :port => @port unless @logger.nil?
|
104
118
|
end
|
105
119
|
rescue => e
|
106
|
-
raise "
|
120
|
+
raise "input/courier: Failed to initialise: #{e}"
|
107
121
|
end
|
108
122
|
end # def initialize
|
109
123
|
|
@@ -111,20 +125,20 @@ module LogCourier
|
|
111
125
|
client_threads = {}
|
112
126
|
|
113
127
|
loop do
|
114
|
-
#
|
128
|
+
# Because start_immediately is false, TCP accept is single thread but
|
129
|
+
# handshake is essentiall multithreaded as we defer it to the thread
|
130
|
+
@tcp_server.reset_peer
|
131
|
+
client = nil
|
115
132
|
begin
|
116
133
|
client = @server.accept
|
117
134
|
rescue EOFError, OpenSSL::SSL::SSLError, IOError => e
|
118
|
-
#
|
119
|
-
peer
|
120
|
-
|
121
|
-
client.close rescue nil
|
135
|
+
# Accept failure or other issue
|
136
|
+
@logger.warn 'Connection failed to accept', :error => e.message, :peer => @tcp_server.peer unless @logger.nil
|
137
|
+
client.close rescue nil unless client.nil?
|
122
138
|
next
|
123
139
|
end
|
124
140
|
|
125
|
-
|
126
|
-
|
127
|
-
@logger.info "[LogCourierServer] New connection from #{peer}" unless @logger.nil?
|
141
|
+
@logger.info 'New connection', :peer => @tcp_server.peer unless @logger.nil?
|
128
142
|
|
129
143
|
# Clear up finished threads
|
130
144
|
client_threads.delete_if do |_, thr|
|
@@ -132,17 +146,16 @@ module LogCourier
|
|
132
146
|
end
|
133
147
|
|
134
148
|
# Start a new connection thread
|
135
|
-
client_threads[client] = Thread.new(client, peer) do |client_copy, peer_copy|
|
136
|
-
|
149
|
+
client_threads[client] = Thread.new(client, @tcp_server.peer) do |client_copy, peer_copy|
|
150
|
+
run_thread client_copy, peer_copy, &block
|
137
151
|
end
|
138
152
|
end
|
153
|
+
return
|
139
154
|
rescue ShutdownSignal
|
140
|
-
|
141
|
-
0
|
155
|
+
return
|
142
156
|
rescue StandardError, NativeException => e
|
143
157
|
# Some other unknown problem
|
144
|
-
@logger.warn
|
145
|
-
@logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
|
158
|
+
@logger.warn e, :hint => 'Unknown error, shutting down' unless @logger.nil?
|
146
159
|
raise e
|
147
160
|
ensure
|
148
161
|
# Raise shutdown in all client threads and join then
|
@@ -154,6 +167,24 @@ module LogCourier
|
|
154
167
|
|
155
168
|
@tcp_server.close
|
156
169
|
end
|
170
|
+
|
171
|
+
private
|
172
|
+
|
173
|
+
def run_thread(client, peer, &block)
|
174
|
+
# Perform the handshake inside the new thread so we don't block TCP accept
|
175
|
+
if @options[:transport] == 'tls'
|
176
|
+
begin
|
177
|
+
client.accept
|
178
|
+
rescue EOFError, OpenSSL::SSL::SSLError, IOError => e
|
179
|
+
# Handshake failure or other issue
|
180
|
+
@logger.warn 'Connection failed to initialise', :error => e.message, :peer => peer unless @logger.nil?
|
181
|
+
client.close
|
182
|
+
return
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
ConnectionTcp.new(@logger, client, peer, @options).run(&block)
|
187
|
+
end
|
157
188
|
end
|
158
189
|
|
159
190
|
# Representation of a single connected client
|
@@ -179,7 +210,7 @@ module LogCourier
|
|
179
210
|
|
180
211
|
# Sanity
|
181
212
|
if length > @options[:max_packet_size]
|
182
|
-
|
213
|
+
fail ProtocolError, "packet too large (#{length} > #{@options[:max_packet_size]})"
|
183
214
|
end
|
184
215
|
|
185
216
|
# While we're processing, EOF is bad as it may occur during send
|
@@ -198,32 +229,61 @@ module LogCourier
|
|
198
229
|
# If we EOF next it's a graceful close
|
199
230
|
@in_progress = false
|
200
231
|
end
|
232
|
+
return
|
201
233
|
rescue TimeoutError
|
202
234
|
# Timeout of the connection, we were idle too long without a ping/pong
|
203
|
-
@logger.warn
|
235
|
+
@logger.warn 'Connection timed out', :peer => @peer unless @logger.nil?
|
236
|
+
return
|
204
237
|
rescue EOFError
|
205
238
|
if @in_progress
|
206
|
-
@logger.warn
|
239
|
+
@logger.warn 'Unexpected EOF', :peer => @peer unless @logger.nil?
|
207
240
|
else
|
208
|
-
@logger.info
|
241
|
+
@logger.info 'Connection closed', :peer => @peer unless @logger.nil?
|
209
242
|
end
|
243
|
+
return
|
210
244
|
rescue OpenSSL::SSL::SSLError, IOError, Errno::ECONNRESET => e
|
211
245
|
# Read errors, only action is to shutdown which we'll do in ensure
|
212
|
-
@logger.warn
|
246
|
+
@logger.warn 'SSL error, connection aborted', :error => e.message, :peer => @peer unless @logger.nil?
|
247
|
+
return
|
213
248
|
rescue ProtocolError => e
|
214
249
|
# Connection abort request due to a protocol error
|
215
|
-
@logger.warn
|
250
|
+
@logger.warn 'Protocol error, connection aborted', :error => e.message, :peer => @peer unless @logger.nil?
|
251
|
+
return
|
216
252
|
rescue ShutdownSignal
|
217
253
|
# Shutting down
|
218
|
-
@logger.
|
219
|
-
|
254
|
+
@logger.info 'Server shutting down, closing connection', :peer => @peer unless @logger.nil?
|
255
|
+
return
|
256
|
+
rescue StandardError, NativeException => e
|
220
257
|
# Some other unknown problem
|
221
|
-
@logger.warn
|
222
|
-
|
258
|
+
@logger.warn e, :hint => 'Unknown error, connection aborted', :peer => @peer unless @logger.nil?
|
259
|
+
return
|
223
260
|
ensure
|
224
261
|
@fd.close rescue nil
|
225
262
|
end
|
226
263
|
|
264
|
+
def send(signature, message)
|
265
|
+
reset_timeout
|
266
|
+
data = signature + [message.length].pack('N') + message
|
267
|
+
done = 0
|
268
|
+
loop do
|
269
|
+
begin
|
270
|
+
written = @fd.write_nonblock(data[done...data.length])
|
271
|
+
rescue IO::WaitReadable
|
272
|
+
fail TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
|
273
|
+
retry
|
274
|
+
rescue IO::WaitWritable
|
275
|
+
fail TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
|
276
|
+
retry
|
277
|
+
end
|
278
|
+
fail ProtocolError, "write failure (#{done}/#{data.length})" if written == 0
|
279
|
+
done += written
|
280
|
+
break if done >= data.length
|
281
|
+
end
|
282
|
+
return
|
283
|
+
end
|
284
|
+
|
285
|
+
private
|
286
|
+
|
227
287
|
def recv(need)
|
228
288
|
reset_timeout
|
229
289
|
have = ''
|
@@ -231,16 +291,16 @@ module LogCourier
|
|
231
291
|
begin
|
232
292
|
buffer = @fd.read_nonblock need - have.length
|
233
293
|
rescue IO::WaitReadable
|
234
|
-
|
294
|
+
fail TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
|
235
295
|
retry
|
236
296
|
rescue IO::WaitWritable
|
237
|
-
|
297
|
+
fail TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
|
238
298
|
retry
|
239
299
|
end
|
240
300
|
if buffer.nil?
|
241
|
-
|
301
|
+
fail EOFError
|
242
302
|
elsif buffer.length == 0
|
243
|
-
|
303
|
+
fail ProtocolError, "read failure (#{have.length}/#{need})"
|
244
304
|
end
|
245
305
|
if have.length == 0
|
246
306
|
have = buffer
|
@@ -252,29 +312,10 @@ module LogCourier
|
|
252
312
|
have
|
253
313
|
end
|
254
314
|
|
255
|
-
def send(signature, message)
|
256
|
-
reset_timeout
|
257
|
-
data = signature + [message.length].pack('N') + message
|
258
|
-
done = 0
|
259
|
-
loop do
|
260
|
-
begin
|
261
|
-
written = @fd.write_nonblock(data[done...data.length])
|
262
|
-
rescue IO::WaitReadable
|
263
|
-
raise TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
|
264
|
-
retry
|
265
|
-
rescue IO::WaitWritable
|
266
|
-
raise TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
|
267
|
-
retry
|
268
|
-
end
|
269
|
-
raise ProtocolError, "write failure (#{done}/#{data.length})" if written == 0
|
270
|
-
done += written
|
271
|
-
break if done >= data.length
|
272
|
-
end
|
273
|
-
end
|
274
|
-
|
275
315
|
def reset_timeout
|
276
316
|
# TODO: Make configurable
|
277
317
|
@timeout = Time.now.to_i + 1_800
|
318
|
+
return
|
278
319
|
end
|
279
320
|
end
|
280
321
|
end
|
@@ -14,20 +14,31 @@
|
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
require 'ffi-rzmq-core/version'
|
20
|
-
require 'ffi-rzmq'
|
21
|
-
require 'ffi-rzmq/version'
|
22
|
-
rescue LoadError => e
|
23
|
-
raise "[LogCourierServer] Could not initialise: #{e}"
|
24
|
-
end
|
17
|
+
require 'thread'
|
18
|
+
require 'log-courier/zmq_qpoll'
|
25
19
|
|
26
20
|
module LogCourier
|
27
21
|
# ZMQ transport implementation for the server
|
28
22
|
class ServerZmq
|
29
23
|
class ZMQError < StandardError; end
|
30
24
|
|
25
|
+
class << self
|
26
|
+
@print_zmq_versions = false
|
27
|
+
|
28
|
+
def print_zmq_versions(logger)
|
29
|
+
return if @print_zmq_versions || logger.nil?
|
30
|
+
|
31
|
+
libversion = LibZMQ.version
|
32
|
+
libversion = "#{libversion[:major]}.#{libversion[:minor]}.#{libversion[:patch]}"
|
33
|
+
|
34
|
+
logger.info 'libzmq', :version => libversion
|
35
|
+
logger.info 'ffi-rzmq-core', :version => LibZMQ::VERSION
|
36
|
+
logger.info 'ffi-rzmq', :version => ZMQ.version
|
37
|
+
|
38
|
+
@print_zmq_versions = true
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
31
42
|
attr_reader :port
|
32
43
|
|
33
44
|
def initialize(options = {})
|
@@ -38,19 +49,19 @@ module LogCourier
|
|
38
49
|
address: '0.0.0.0',
|
39
50
|
curve_secret_key: nil,
|
40
51
|
max_packet_size: 10_485_760,
|
52
|
+
peer_recv_queue: 10,
|
41
53
|
}.merge!(options)
|
42
54
|
|
43
55
|
@logger = @options[:logger]
|
44
56
|
|
45
|
-
|
46
|
-
libversion = "#{libversion[:major]}.#{libversion[:minor]}.#{libversion[:patch]}"
|
57
|
+
self.class.print_zmq_versions @logger
|
47
58
|
|
48
59
|
if @options[:transport] == 'zmq'
|
49
|
-
|
60
|
+
fail "input/courier: Transport 'zmq' requires libzmq version >= 4" unless LibZMQ.version4?
|
50
61
|
|
51
|
-
|
62
|
+
fail 'input/courier: \'curve_secret_key\' is required' if @options[:curve_secret_key].nil?
|
52
63
|
|
53
|
-
|
64
|
+
fail 'input/courier: \'curve_secret_key\' must be a valid 40 character Z85 encoded string' if @options[:curve_secret_key].length != 40 || !z85validate(@options[:curve_secret_key])
|
54
65
|
end
|
55
66
|
|
56
67
|
begin
|
@@ -60,128 +71,306 @@ module LogCourier
|
|
60
71
|
|
61
72
|
if @options[:transport] == 'zmq'
|
62
73
|
rc = @socket.setsockopt(ZMQ::CURVE_SERVER, 1)
|
63
|
-
|
74
|
+
fail 'setsockopt CURVE_SERVER failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
|
64
75
|
|
65
76
|
rc = @socket.setsockopt(ZMQ::CURVE_SECRETKEY, @options[:curve_secret_key])
|
66
|
-
|
77
|
+
fail 'setsockopt CURVE_SECRETKEY failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
|
67
78
|
end
|
68
79
|
|
69
80
|
bind = 'tcp://' + @options[:address] + (@options[:port] == 0 ? ':*' : ':' + @options[:port].to_s)
|
70
81
|
rc = @socket.bind(bind)
|
71
|
-
|
82
|
+
fail 'failed to bind at ' + bind + ': ' + rZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
|
72
83
|
|
73
84
|
# Lookup port number that was allocated in case it was set to 0
|
74
85
|
endpoint = ''
|
75
86
|
rc = @socket.getsockopt(ZMQ::LAST_ENDPOINT, endpoint)
|
76
|
-
|
87
|
+
fail 'getsockopt LAST_ENDPOINT failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc) && %r{\Atcp://(?:.*):(?<endpoint_port>\d+)\0\z} =~ endpoint
|
77
88
|
@port = endpoint_port.to_i
|
78
89
|
|
79
|
-
@poller = ZMQ::Poller.new
|
80
|
-
|
81
90
|
if @options[:port] == 0
|
82
|
-
@logger.warn '
|
91
|
+
@logger.warn 'Ephemeral port allocated', :transport => @options[:transport], :port => @port unless @logger.nil?
|
83
92
|
end
|
84
93
|
rescue => e
|
85
|
-
raise "
|
94
|
+
raise "input/courier: Failed to initialise: #{e}"
|
86
95
|
end
|
87
96
|
|
88
|
-
@logger.info "[LogCourierServer] libzmq version #{libversion}" unless @logger.nil?
|
89
|
-
@logger.info "[LogCourierServer] ffi-rzmq-core version #{LibZMQ::VERSION}" unless @logger.nil?
|
90
|
-
@logger.info "[LogCourierServer] ffi-rzmq version #{ZMQ.version}" unless @logger.nil?
|
91
|
-
|
92
97
|
# TODO: Implement workers option by receiving on a ROUTER and proxying to a DEALER, with workers connecting to the DEALER
|
93
98
|
|
94
|
-
|
99
|
+
# TODO: Make this send queue configurable?
|
100
|
+
@send_queue = EventQueue.new 2
|
101
|
+
@factory = ClientFactoryZmq.new(@options, @send_queue)
|
102
|
+
|
103
|
+
# Setup poller
|
104
|
+
@poller = ZMQPoll::ZMQPoll.new(@context)
|
105
|
+
@poller.register_socket @socket, ZMQ::POLLIN
|
106
|
+
@poller.register_queue_to_socket @send_queue, @socket
|
107
|
+
|
108
|
+
# Register a finaliser that sets @context to nil
|
109
|
+
# This allows us to detect the JRuby bug where during "exit!" finalisers
|
110
|
+
# are run but threads are not killed - which leaves us in a situation of
|
111
|
+
# a terminated @context (it has a terminate finalizer) and an IO thread
|
112
|
+
# looping retries
|
113
|
+
# JRuby will still crash and burn, but at least we don't spam STDOUT with
|
114
|
+
# errors
|
115
|
+
ObjectSpace.define_finalizer(self, Proc.new do
|
116
|
+
@context = nil
|
117
|
+
end)
|
118
|
+
end
|
119
|
+
|
120
|
+
def run(&block)
|
121
|
+
loop do
|
122
|
+
begin
|
123
|
+
@poller.poll(5_000) do |socket, r, w|
|
124
|
+
next if socket != @socket
|
125
|
+
next if !r
|
95
126
|
|
96
|
-
|
127
|
+
receive &block
|
128
|
+
end
|
129
|
+
rescue ZMQPoll::ZMQError => e
|
130
|
+
# Detect JRuby bug
|
131
|
+
fail e if @context.nil?
|
132
|
+
@logger.warn e, :hint => 'ZMQ recv_string failure' unless @logger.nil?
|
133
|
+
next
|
134
|
+
rescue ZMQPoll::TimeoutError
|
135
|
+
# We'll let ZeroMQ manage reconnections and new connections
|
136
|
+
# There is no point in us doing any form of reconnect ourselves
|
137
|
+
next
|
138
|
+
end
|
139
|
+
end
|
140
|
+
return
|
141
|
+
rescue ShutdownSignal
|
142
|
+
# Shutting down
|
143
|
+
@logger.warn 'Server shutting down' unless @logger.nil?
|
144
|
+
return
|
145
|
+
rescue StandardError, NativeException => e
|
146
|
+
# Some other unknown problem
|
147
|
+
@logger.warn e, :hint => 'Unknown error, shutting down' unless @logger.nil?
|
148
|
+
raise e
|
149
|
+
ensure
|
150
|
+
@poller.shutdown
|
151
|
+
@factory.shutdown
|
152
|
+
@socket.close
|
153
|
+
@context.terminate
|
97
154
|
end
|
98
155
|
|
156
|
+
private
|
157
|
+
|
99
158
|
def z85validate(z85)
|
100
159
|
# ffi-rzmq does not implement decode - but we want to validate during startup
|
101
160
|
decoded = FFI::MemoryPointer.from_string(' ' * (8 * z85.length / 10))
|
102
161
|
ret = LibZMQ.zmq_z85_decode decoded, z85
|
103
162
|
return false if ret.nil?
|
104
|
-
|
105
163
|
true
|
106
164
|
end
|
107
165
|
|
108
|
-
def
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
rc = @socket.recv_strings(data, ZMQ::DONTWAIT)
|
116
|
-
unless ZMQ::Util.resultcode_ok?(rc)
|
117
|
-
raise ZMQError, 'recv_string error: ' + ZMQ::Util.error_string if ZMQ::Util.errno != ZMQ::EAGAIN
|
118
|
-
|
119
|
-
# Wait for a message to arrive, handling timeouts
|
120
|
-
@poller.deregister @socket, ZMQ::POLLIN | ZMQ::POLLOUT
|
121
|
-
@poller.register @socket, ZMQ::POLLIN
|
122
|
-
while @poller.poll(1_000) == 0
|
123
|
-
# Using this inner while triggers pollThreadEvents in JRuby which checks for Thread.raise immediately
|
124
|
-
raise TimeoutError while Time.now.to_i >= @timeout
|
125
|
-
end
|
126
|
-
next
|
127
|
-
end
|
128
|
-
rescue ZMQError => e
|
129
|
-
@logger.warn "[LogCourierServer] ZMQ recv_string failed: #{e}" unless @logger.nil?
|
130
|
-
next
|
131
|
-
end
|
166
|
+
def receive(&block)
|
167
|
+
# Try to receive a message
|
168
|
+
data = []
|
169
|
+
rc = @socket.recv_strings(data, ZMQ::DONTWAIT)
|
170
|
+
unless ZMQ::Util.resultcode_ok?(rc)
|
171
|
+
fail ZMQError, 'recv_string error: ' + ZMQ::Util.error_string if ZMQ::Util.errno != ZMQ::EAGAIN
|
172
|
+
end
|
132
173
|
|
133
|
-
|
134
|
-
|
135
|
-
|
174
|
+
# Save the source information that appears before the null messages
|
175
|
+
source = []
|
176
|
+
source.push data.shift until data.length == 0 || data[0] == ''
|
136
177
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
178
|
+
if data.length == 0
|
179
|
+
@logger.warn 'Invalid message: no data', :source_length => source.length unless @logger.nil?
|
180
|
+
return
|
181
|
+
elsif data.length == 1
|
182
|
+
@logger.warn 'Invalid message: empty data', :source_length => source.length unless @logger.nil?
|
183
|
+
return
|
184
|
+
end
|
185
|
+
|
186
|
+
# Drop the null message separator
|
187
|
+
data.shift
|
188
|
+
|
189
|
+
if data.length != 1
|
190
|
+
@logger.warn 'Invalid message: multipart unexpected', :source_length => source.length, :data_length => data.length unless @logger.nil?
|
191
|
+
if !@logger.nil? && @logger.debug?
|
192
|
+
i = 0
|
193
|
+
parts = {}
|
194
|
+
data.each do |msg|
|
195
|
+
i += 1
|
196
|
+
parts[i] = "#{part.length}:[#{msg[0..31].gsub(/[^[:print:]]/, '.')}]"
|
143
197
|
end
|
198
|
+
@logger.debug 'Data', parts
|
199
|
+
end
|
200
|
+
return
|
201
|
+
end
|
202
|
+
|
203
|
+
@factory.deliver source, data.first, &block
|
204
|
+
return
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
class ClientFactoryZmq
|
209
|
+
attr_reader :options
|
210
|
+
attr_reader :send_queue
|
211
|
+
|
212
|
+
def initialize(options, send_queue)
|
213
|
+
@options = options
|
214
|
+
@logger = @options[:logger]
|
215
|
+
|
216
|
+
@send_queue = send_queue
|
217
|
+
@index = {}
|
218
|
+
@client_threads = {}
|
219
|
+
@mutex = Mutex.new
|
220
|
+
end
|
221
|
+
|
222
|
+
def shutdown
|
223
|
+
# Stop other threads from try_drop collisions
|
224
|
+
client_threads = @mutex.synchronize do
|
225
|
+
client_threads = @client_threads
|
226
|
+
@client_threads = {}
|
227
|
+
client_threads
|
228
|
+
end
|
229
|
+
|
230
|
+
client_threads.each_value do |thr|
|
231
|
+
thr.raise ShutdownSignal
|
232
|
+
end
|
233
|
+
|
234
|
+
client_threads.each_value(&:join)
|
235
|
+
return
|
236
|
+
end
|
144
237
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
238
|
+
def deliver(source, data, &block)
|
239
|
+
# Find the handling thread
|
240
|
+
# We separate each source into threads so that each thread can respond
|
241
|
+
# with partial ACKs if we hit a slow down
|
242
|
+
# If we processed in a single thread, we'd only be able to respond to
|
243
|
+
# a single client with partial ACKs
|
244
|
+
@mutex.synchronize do
|
245
|
+
index = @index
|
246
|
+
source.each do |identity|
|
247
|
+
index[identity] = {} if !index.key?(identity)
|
248
|
+
index = index[identity]
|
249
|
+
end
|
250
|
+
|
251
|
+
if !index.key?('')
|
252
|
+
source_str = source.map do |s|
|
253
|
+
s.each_byte.map do |b|
|
254
|
+
b.to_s(16).rjust(2, '0')
|
157
255
|
end
|
158
|
-
|
159
|
-
|
256
|
+
end.join
|
257
|
+
|
258
|
+
@logger.info 'New source', :source => source_str unless @logger.nil?
|
259
|
+
|
260
|
+
# Create the client and associated thread
|
261
|
+
client = ClientZmq.new(self, source, source_str) do
|
262
|
+
try_drop(source)
|
160
263
|
end
|
264
|
+
|
265
|
+
thread = Thread.new do
|
266
|
+
client.run &block
|
267
|
+
end
|
268
|
+
|
269
|
+
@client_threads[thread] = thread
|
270
|
+
|
271
|
+
index[''] = {
|
272
|
+
'client' => client,
|
273
|
+
'thread' => thread,
|
274
|
+
}
|
275
|
+
end
|
276
|
+
|
277
|
+
# Existing thread, throw on the queue, if not enough room drop the message
|
278
|
+
index['']['client'].push data, 0
|
279
|
+
end
|
280
|
+
return
|
281
|
+
end
|
282
|
+
|
283
|
+
private
|
284
|
+
|
285
|
+
def try_drop(source, source_str)
|
286
|
+
# This is called when a client goes idle, to cleanup resources
|
287
|
+
# We may tie this into zmq monitor
|
288
|
+
@mutex.synchronize do
|
289
|
+
index = @index
|
290
|
+
parents = []
|
291
|
+
source.each do |identity|
|
292
|
+
if !index.key?(identity)
|
293
|
+
@logger.warn 'Unknown idle source failed to shutdown', :source => source_str unless @logger.nil?
|
294
|
+
break
|
295
|
+
end
|
296
|
+
parents.push [index, identity]
|
297
|
+
index = index[identity]
|
298
|
+
end
|
299
|
+
|
300
|
+
if !index.key?('')
|
301
|
+
@logger.warn 'Unknown idle source failed to shutdown', :source => source_str unless @logger.nil?
|
302
|
+
break
|
303
|
+
end
|
304
|
+
|
305
|
+
# Don't allow drop if we have messages in the queue
|
306
|
+
if index['']['client'].length != 0
|
307
|
+
@logger.warn 'Failed idle source shutdown as message queue is not empty', :source => source_str unless @logger.nil?
|
308
|
+
return false
|
309
|
+
end
|
310
|
+
|
311
|
+
@logger.info 'Idle source shutting down', :source => source_str unless @logger.nil?
|
312
|
+
|
313
|
+
# Delete the entry
|
314
|
+
@client_threads.delete(index['']['thread'])
|
315
|
+
index.delete('')
|
316
|
+
|
317
|
+
# Cleanup orphaned leafs
|
318
|
+
parents.reverse_each do |path|
|
319
|
+
path[0].delete(path[1]) if path[0][path[1]].length == 0
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
return true
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
class ClientZmq < EventQueue
|
328
|
+
def initialize(factory, source, source_str, &try_drop)
|
329
|
+
@factory = factory
|
330
|
+
@logger = @factory.options[:logger]
|
331
|
+
@send_queue = @factory.send_queue
|
332
|
+
@source = source
|
333
|
+
@source_str = source_str
|
334
|
+
@try_drop = try_drop
|
335
|
+
|
336
|
+
# Setup the queue for receiving events to process
|
337
|
+
super @factory.options[:peer_recv_queue]
|
338
|
+
end
|
339
|
+
|
340
|
+
def run(&block)
|
341
|
+
loop do
|
342
|
+
begin
|
343
|
+
# TODO: Make timeout configurable?
|
344
|
+
data = self.pop(30)
|
345
|
+
recv(data, &block)
|
161
346
|
rescue TimeoutError
|
162
|
-
#
|
163
|
-
|
164
|
-
|
165
|
-
reset_timeout
|
166
|
-
next
|
347
|
+
# Try to clean up resources - if we fail, new messages have arrived
|
348
|
+
retry if !@try_drop.call(@source)
|
349
|
+
break
|
167
350
|
end
|
168
351
|
end
|
352
|
+
return
|
169
353
|
rescue ShutdownSignal
|
170
354
|
# Shutting down
|
171
|
-
@logger.
|
355
|
+
@logger.info 'Source shutting down', :source => @source_str unless @logger.nil?
|
356
|
+
return
|
172
357
|
rescue StandardError, NativeException => e
|
173
358
|
# Some other unknown problem
|
174
|
-
@logger.warn
|
175
|
-
@logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
|
359
|
+
@logger.warn e, :hint => 'Unknown error, connection aborted', :source => @source_str unless @logger.nil?
|
176
360
|
raise e
|
177
|
-
ensure
|
178
|
-
@socket.close
|
179
|
-
@context.terminate
|
180
361
|
end
|
181
362
|
|
363
|
+
def send(signature, message)
|
364
|
+
data = signature + [message.length].pack('N') + message
|
365
|
+
@send_queue.push @source + ['', data]
|
366
|
+
return
|
367
|
+
end
|
368
|
+
|
369
|
+
private
|
370
|
+
|
182
371
|
def recv(data)
|
183
372
|
if data.length < 8
|
184
|
-
@logger.warn '
|
373
|
+
@logger.warn 'Invalid message: not enough data', :data_length => data.length, :source => @source_str unless @logger.nil?
|
185
374
|
return
|
186
375
|
end
|
187
376
|
|
@@ -190,52 +379,16 @@ module LogCourier
|
|
190
379
|
|
191
380
|
# Verify length
|
192
381
|
if data.length - 8 != length
|
193
|
-
@logger.warn
|
382
|
+
@logger.warn 'Invalid message: data has invalid length', :data_length => data.length - 8, :encoded_length => length, :source => @source_str unless @logger.nil?
|
194
383
|
return
|
195
|
-
elsif length > @options[:max_packet_size]
|
196
|
-
@logger.warn
|
384
|
+
elsif length > @factory.options[:max_packet_size]
|
385
|
+
@logger.warn 'Invalid message: packet too large', :size => length, :max_packet_size => @options[:max_packet_size], :source => @source_str unless @logger.nil?
|
197
386
|
return
|
198
387
|
end
|
199
388
|
|
200
389
|
# Yield the parts
|
201
390
|
yield signature, data[8, length], self
|
202
|
-
|
203
|
-
|
204
|
-
def send(signature, message)
|
205
|
-
data = signature + [message.length].pack('N') + message
|
206
|
-
|
207
|
-
# Send the return route and then the message
|
208
|
-
reset_timeout
|
209
|
-
@return_route.each do |msg|
|
210
|
-
send_with_poll msg, true
|
211
|
-
end
|
212
|
-
send_with_poll '', true
|
213
|
-
send_with_poll data
|
214
|
-
end
|
215
|
-
|
216
|
-
def send_with_poll(data, more = false)
|
217
|
-
loop do
|
218
|
-
# Try to send a message but never block
|
219
|
-
rc = @socket.send_string(data, (more ? ZMQ::SNDMORE : 0) | ZMQ::DONTWAIT)
|
220
|
-
break if ZMQ::Util.resultcode_ok?(rc)
|
221
|
-
if ZMQ::Util.errno != ZMQ::EAGAIN
|
222
|
-
@logger.warn "[LogCourierServer] Message send failed: #{ZMQ::Util.error_string}" unless @logger.nil?
|
223
|
-
raise TimeoutError
|
224
|
-
end
|
225
|
-
|
226
|
-
# Wait for send to become available, handling timeouts
|
227
|
-
@poller.deregister @socket, ZMQ::POLLIN | ZMQ::POLLOUT
|
228
|
-
@poller.register @socket, ZMQ::POLLOUT
|
229
|
-
while @poller.poll(1_000) == 0
|
230
|
-
# Using this inner while triggers pollThreadEvents in JRuby which checks for Thread.raise immediately
|
231
|
-
raise TimeoutError while Time.now.to_i >= @timeout
|
232
|
-
end
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
def reset_timeout
|
237
|
-
# TODO: Make configurable?
|
238
|
-
@timeout = Time.now.to_i + 1_800
|
391
|
+
return
|
239
392
|
end
|
240
393
|
end
|
241
394
|
end
|