log-courier 1.1 → 1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/log-courier/client.rb +23 -16
- data/lib/log-courier/client_tls.rb +66 -46
- data/lib/log-courier/event_queue.rb +34 -32
- data/lib/log-courier/server.rb +27 -10
- data/lib/log-courier/server_tcp.rb +95 -54
- data/lib/log-courier/server_zmq.rb +280 -127
- metadata +18 -4
data/lib/log-courier/server.rb
CHANGED
@@ -40,6 +40,7 @@ module LogCourier
|
|
40
40
|
}.merge!(options)
|
41
41
|
|
42
42
|
@logger = @options[:logger]
|
43
|
+
@logger['plugin'] = 'input/courier'
|
43
44
|
|
44
45
|
case @options[:transport]
|
45
46
|
when 'tcp', 'tls'
|
@@ -49,11 +50,12 @@ module LogCourier
|
|
49
50
|
require 'log-courier/server_zmq'
|
50
51
|
@server = ServerZmq.new(@options)
|
51
52
|
else
|
52
|
-
|
53
|
+
fail 'input/courier: \'transport\' must be tcp, tls, plainzmq or zmq'
|
53
54
|
end
|
54
55
|
|
55
|
-
# Grab the port back
|
56
|
+
# Grab the port back and update the logger context
|
56
57
|
@port = @server.port
|
58
|
+
@logger['port'] = @port unless @logger.nil?
|
57
59
|
|
58
60
|
# Load the json adapter
|
59
61
|
@json_adapter = MultiJson.adapter.instance
|
@@ -75,7 +77,11 @@ module LogCourier
|
|
75
77
|
when 'JDAT'
|
76
78
|
process_jdat message, comm, event_queue
|
77
79
|
else
|
78
|
-
|
80
|
+
if comm.peer.nil?
|
81
|
+
@logger.warn 'Unknown message received', :from => 'unknown' unless @logger.nil?
|
82
|
+
else
|
83
|
+
@logger.warn 'Unknown message received', :from => comm.peer unless @logger.nil?
|
84
|
+
end
|
79
85
|
# Don't kill a client that sends a bad message
|
80
86
|
# Just reject it and let it send it again, potentially to another server
|
81
87
|
comm.send '????', ''
|
@@ -93,17 +99,21 @@ module LogCourier
|
|
93
99
|
server_thread.join
|
94
100
|
end
|
95
101
|
end
|
102
|
+
return
|
96
103
|
end
|
97
104
|
|
105
|
+
private
|
106
|
+
|
98
107
|
def process_ping(message, comm)
|
99
108
|
# Size of message should be 0
|
100
109
|
if message.length != 0
|
101
|
-
|
110
|
+
fail ProtocolError, "unexpected data attached to ping message (#{message.length})"
|
102
111
|
end
|
103
112
|
|
104
113
|
# PONG!
|
105
114
|
# NOTE: comm.send can raise a Timeout::Error of its own
|
106
115
|
comm.send 'PONG', ''
|
116
|
+
return
|
107
117
|
end
|
108
118
|
|
109
119
|
def process_jdat(message, comm, event_queue)
|
@@ -114,11 +124,17 @@ module LogCourier
|
|
114
124
|
# This allows the client to know what is being acknowledged
|
115
125
|
# Nonce is 16 so check we have enough
|
116
126
|
if message.length < 17
|
117
|
-
|
127
|
+
fail ProtocolError, "JDAT message too small (#{message.length})"
|
118
128
|
end
|
119
129
|
|
120
130
|
nonce = message[0...16]
|
121
131
|
|
132
|
+
if !@logger.nil? && @logger.debug?
|
133
|
+
nonce_str = nonce.each_byte.map do |b|
|
134
|
+
b.to_s(16).rjust(2, '0')
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
122
138
|
# The remainder of the message is the compressed data block
|
123
139
|
message = StringIO.new Zlib::Inflate.inflate(message[16...message.length])
|
124
140
|
|
@@ -136,7 +152,7 @@ module LogCourier
|
|
136
152
|
# Finished!
|
137
153
|
break
|
138
154
|
elsif length_buf.length < 4
|
139
|
-
|
155
|
+
fail ProtocolError, "JDAT length extraction failed (#{ret} #{length_buf.length})"
|
140
156
|
end
|
141
157
|
|
142
158
|
length = length_buf.unpack('N').first
|
@@ -145,7 +161,7 @@ module LogCourier
|
|
145
161
|
ret = message.read length, data_buf
|
146
162
|
if ret.nil? or data_buf.length < length
|
147
163
|
@logger.warn()
|
148
|
-
|
164
|
+
fail ProtocolError, "JDAT message extraction failed #{ret} #{data_buf.length}"
|
149
165
|
end
|
150
166
|
|
151
167
|
data_buf.force_encoding('utf-8')
|
@@ -161,7 +177,7 @@ module LogCourier
|
|
161
177
|
begin
|
162
178
|
event = @json_adapter.load(data_buf, @json_options)
|
163
179
|
rescue MultiJson::ParseError => e
|
164
|
-
@logger.warn
|
180
|
+
@logger.warn e, :hint => 'JSON parse failure, falling back to plain-text' unless @logger.nil?
|
165
181
|
event = { 'message' => data_buf }
|
166
182
|
end
|
167
183
|
|
@@ -171,7 +187,7 @@ module LogCourier
|
|
171
187
|
rescue TimeoutError
|
172
188
|
# Full pipeline, partial ack
|
173
189
|
# NOTE: comm.send can raise a Timeout::Error of its own
|
174
|
-
@logger.debug
|
190
|
+
@logger.debug 'Partially acknowledging message', :nonce => nonce_str.join, :sequence => sequence if !@logger.nil? && @logger.debug?
|
175
191
|
comm.send 'ACKN', [nonce, sequence].pack('A*N')
|
176
192
|
ack_timeout = Time.now.to_i + 5
|
177
193
|
retry
|
@@ -182,8 +198,9 @@ module LogCourier
|
|
182
198
|
|
183
199
|
# Acknowledge the full message
|
184
200
|
# NOTE: comm.send can raise a Timeout::Error
|
185
|
-
@logger.debug
|
201
|
+
@logger.debug 'Acknowledging message', :nonce => nonce_str.join, :sequence => sequence if !@logger.nil? && @logger.debug?
|
186
202
|
comm.send 'ACKN', [nonce, sequence].pack('A*N')
|
203
|
+
return
|
187
204
|
end
|
188
205
|
end
|
189
206
|
end
|
@@ -24,13 +24,25 @@ require 'thread'
|
|
24
24
|
module LogCourier
|
25
25
|
# Wrap around TCPServer to grab last error for use in reporting which peer had an error
|
26
26
|
class ExtendedTCPServer < TCPServer
|
27
|
-
|
27
|
+
attr_reader :peer
|
28
|
+
|
29
|
+
def initialise
|
30
|
+
reset_peer
|
31
|
+
super
|
32
|
+
end
|
33
|
+
|
34
|
+
# Save the peer
|
28
35
|
def accept
|
29
36
|
sock = super
|
30
37
|
peer = sock.peeraddr(:numeric)
|
31
|
-
|
38
|
+
@peer = "#{peer[2]}:#{peer[1]}"
|
32
39
|
return sock
|
33
40
|
end
|
41
|
+
|
42
|
+
def reset_peer
|
43
|
+
@peer = 'unknown'
|
44
|
+
return
|
45
|
+
end
|
34
46
|
end
|
35
47
|
|
36
48
|
# TLS transport implementation for server
|
@@ -57,11 +69,11 @@ module LogCourier
|
|
57
69
|
|
58
70
|
if @options[:transport] == 'tls'
|
59
71
|
[:ssl_certificate, :ssl_key].each do |k|
|
60
|
-
|
72
|
+
fail "input/courier: '#{k}' is required" if @options[k].nil?
|
61
73
|
end
|
62
74
|
|
63
75
|
if @options[:ssl_verify] and (!@options[:ssl_verify_default_ca] && @options[:ssl_verify_ca].nil?)
|
64
|
-
|
76
|
+
fail 'input/courier: Either \'ssl_verify_default_ca\' or \'ssl_verify_ca\' must be specified when ssl_verify is true'
|
65
77
|
end
|
66
78
|
end
|
67
79
|
|
@@ -94,16 +106,18 @@ module LogCourier
|
|
94
106
|
ssl.verify_mode = OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT
|
95
107
|
end
|
96
108
|
|
109
|
+
# Create the OpenSSL server - set start_immediately to false so we can multithread handshake
|
97
110
|
@server = OpenSSL::SSL::SSLServer.new(@tcp_server, ssl)
|
111
|
+
@server.start_immediately = false
|
98
112
|
else
|
99
113
|
@server = @tcp_server
|
100
114
|
end
|
101
115
|
|
102
116
|
if @options[:port] == 0
|
103
|
-
@logger.warn '
|
117
|
+
@logger.warn 'Ephemeral port allocated', :transport => @options[:transport], :port => @port unless @logger.nil?
|
104
118
|
end
|
105
119
|
rescue => e
|
106
|
-
raise "
|
120
|
+
raise "input/courier: Failed to initialise: #{e}"
|
107
121
|
end
|
108
122
|
end # def initialize
|
109
123
|
|
@@ -111,20 +125,20 @@ module LogCourier
|
|
111
125
|
client_threads = {}
|
112
126
|
|
113
127
|
loop do
|
114
|
-
#
|
128
|
+
# Because start_immediately is false, TCP accept is single thread but
|
129
|
+
# handshake is essentiall multithreaded as we defer it to the thread
|
130
|
+
@tcp_server.reset_peer
|
131
|
+
client = nil
|
115
132
|
begin
|
116
133
|
client = @server.accept
|
117
134
|
rescue EOFError, OpenSSL::SSL::SSLError, IOError => e
|
118
|
-
#
|
119
|
-
peer
|
120
|
-
|
121
|
-
client.close rescue nil
|
135
|
+
# Accept failure or other issue
|
136
|
+
@logger.warn 'Connection failed to accept', :error => e.message, :peer => @tcp_server.peer unless @logger.nil
|
137
|
+
client.close rescue nil unless client.nil?
|
122
138
|
next
|
123
139
|
end
|
124
140
|
|
125
|
-
|
126
|
-
|
127
|
-
@logger.info "[LogCourierServer] New connection from #{peer}" unless @logger.nil?
|
141
|
+
@logger.info 'New connection', :peer => @tcp_server.peer unless @logger.nil?
|
128
142
|
|
129
143
|
# Clear up finished threads
|
130
144
|
client_threads.delete_if do |_, thr|
|
@@ -132,17 +146,16 @@ module LogCourier
|
|
132
146
|
end
|
133
147
|
|
134
148
|
# Start a new connection thread
|
135
|
-
client_threads[client] = Thread.new(client, peer) do |client_copy, peer_copy|
|
136
|
-
|
149
|
+
client_threads[client] = Thread.new(client, @tcp_server.peer) do |client_copy, peer_copy|
|
150
|
+
run_thread client_copy, peer_copy, &block
|
137
151
|
end
|
138
152
|
end
|
153
|
+
return
|
139
154
|
rescue ShutdownSignal
|
140
|
-
|
141
|
-
0
|
155
|
+
return
|
142
156
|
rescue StandardError, NativeException => e
|
143
157
|
# Some other unknown problem
|
144
|
-
@logger.warn
|
145
|
-
@logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
|
158
|
+
@logger.warn e, :hint => 'Unknown error, shutting down' unless @logger.nil?
|
146
159
|
raise e
|
147
160
|
ensure
|
148
161
|
# Raise shutdown in all client threads and join then
|
@@ -154,6 +167,24 @@ module LogCourier
|
|
154
167
|
|
155
168
|
@tcp_server.close
|
156
169
|
end
|
170
|
+
|
171
|
+
private
|
172
|
+
|
173
|
+
def run_thread(client, peer, &block)
|
174
|
+
# Perform the handshake inside the new thread so we don't block TCP accept
|
175
|
+
if @options[:transport] == 'tls'
|
176
|
+
begin
|
177
|
+
client.accept
|
178
|
+
rescue EOFError, OpenSSL::SSL::SSLError, IOError => e
|
179
|
+
# Handshake failure or other issue
|
180
|
+
@logger.warn 'Connection failed to initialise', :error => e.message, :peer => peer unless @logger.nil?
|
181
|
+
client.close
|
182
|
+
return
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
ConnectionTcp.new(@logger, client, peer, @options).run(&block)
|
187
|
+
end
|
157
188
|
end
|
158
189
|
|
159
190
|
# Representation of a single connected client
|
@@ -179,7 +210,7 @@ module LogCourier
|
|
179
210
|
|
180
211
|
# Sanity
|
181
212
|
if length > @options[:max_packet_size]
|
182
|
-
|
213
|
+
fail ProtocolError, "packet too large (#{length} > #{@options[:max_packet_size]})"
|
183
214
|
end
|
184
215
|
|
185
216
|
# While we're processing, EOF is bad as it may occur during send
|
@@ -198,32 +229,61 @@ module LogCourier
|
|
198
229
|
# If we EOF next it's a graceful close
|
199
230
|
@in_progress = false
|
200
231
|
end
|
232
|
+
return
|
201
233
|
rescue TimeoutError
|
202
234
|
# Timeout of the connection, we were idle too long without a ping/pong
|
203
|
-
@logger.warn
|
235
|
+
@logger.warn 'Connection timed out', :peer => @peer unless @logger.nil?
|
236
|
+
return
|
204
237
|
rescue EOFError
|
205
238
|
if @in_progress
|
206
|
-
@logger.warn
|
239
|
+
@logger.warn 'Unexpected EOF', :peer => @peer unless @logger.nil?
|
207
240
|
else
|
208
|
-
@logger.info
|
241
|
+
@logger.info 'Connection closed', :peer => @peer unless @logger.nil?
|
209
242
|
end
|
243
|
+
return
|
210
244
|
rescue OpenSSL::SSL::SSLError, IOError, Errno::ECONNRESET => e
|
211
245
|
# Read errors, only action is to shutdown which we'll do in ensure
|
212
|
-
@logger.warn
|
246
|
+
@logger.warn 'SSL error, connection aborted', :error => e.message, :peer => @peer unless @logger.nil?
|
247
|
+
return
|
213
248
|
rescue ProtocolError => e
|
214
249
|
# Connection abort request due to a protocol error
|
215
|
-
@logger.warn
|
250
|
+
@logger.warn 'Protocol error, connection aborted', :error => e.message, :peer => @peer unless @logger.nil?
|
251
|
+
return
|
216
252
|
rescue ShutdownSignal
|
217
253
|
# Shutting down
|
218
|
-
@logger.
|
219
|
-
|
254
|
+
@logger.info 'Server shutting down, closing connection', :peer => @peer unless @logger.nil?
|
255
|
+
return
|
256
|
+
rescue StandardError, NativeException => e
|
220
257
|
# Some other unknown problem
|
221
|
-
@logger.warn
|
222
|
-
|
258
|
+
@logger.warn e, :hint => 'Unknown error, connection aborted', :peer => @peer unless @logger.nil?
|
259
|
+
return
|
223
260
|
ensure
|
224
261
|
@fd.close rescue nil
|
225
262
|
end
|
226
263
|
|
264
|
+
def send(signature, message)
|
265
|
+
reset_timeout
|
266
|
+
data = signature + [message.length].pack('N') + message
|
267
|
+
done = 0
|
268
|
+
loop do
|
269
|
+
begin
|
270
|
+
written = @fd.write_nonblock(data[done...data.length])
|
271
|
+
rescue IO::WaitReadable
|
272
|
+
fail TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
|
273
|
+
retry
|
274
|
+
rescue IO::WaitWritable
|
275
|
+
fail TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
|
276
|
+
retry
|
277
|
+
end
|
278
|
+
fail ProtocolError, "write failure (#{done}/#{data.length})" if written == 0
|
279
|
+
done += written
|
280
|
+
break if done >= data.length
|
281
|
+
end
|
282
|
+
return
|
283
|
+
end
|
284
|
+
|
285
|
+
private
|
286
|
+
|
227
287
|
def recv(need)
|
228
288
|
reset_timeout
|
229
289
|
have = ''
|
@@ -231,16 +291,16 @@ module LogCourier
|
|
231
291
|
begin
|
232
292
|
buffer = @fd.read_nonblock need - have.length
|
233
293
|
rescue IO::WaitReadable
|
234
|
-
|
294
|
+
fail TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
|
235
295
|
retry
|
236
296
|
rescue IO::WaitWritable
|
237
|
-
|
297
|
+
fail TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
|
238
298
|
retry
|
239
299
|
end
|
240
300
|
if buffer.nil?
|
241
|
-
|
301
|
+
fail EOFError
|
242
302
|
elsif buffer.length == 0
|
243
|
-
|
303
|
+
fail ProtocolError, "read failure (#{have.length}/#{need})"
|
244
304
|
end
|
245
305
|
if have.length == 0
|
246
306
|
have = buffer
|
@@ -252,29 +312,10 @@ module LogCourier
|
|
252
312
|
have
|
253
313
|
end
|
254
314
|
|
255
|
-
def send(signature, message)
|
256
|
-
reset_timeout
|
257
|
-
data = signature + [message.length].pack('N') + message
|
258
|
-
done = 0
|
259
|
-
loop do
|
260
|
-
begin
|
261
|
-
written = @fd.write_nonblock(data[done...data.length])
|
262
|
-
rescue IO::WaitReadable
|
263
|
-
raise TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
|
264
|
-
retry
|
265
|
-
rescue IO::WaitWritable
|
266
|
-
raise TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
|
267
|
-
retry
|
268
|
-
end
|
269
|
-
raise ProtocolError, "write failure (#{done}/#{data.length})" if written == 0
|
270
|
-
done += written
|
271
|
-
break if done >= data.length
|
272
|
-
end
|
273
|
-
end
|
274
|
-
|
275
315
|
def reset_timeout
|
276
316
|
# TODO: Make configurable
|
277
317
|
@timeout = Time.now.to_i + 1_800
|
318
|
+
return
|
278
319
|
end
|
279
320
|
end
|
280
321
|
end
|
@@ -14,20 +14,31 @@
|
|
14
14
|
# See the License for the specific language governing permissions and
|
15
15
|
# limitations under the License.
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
require 'ffi-rzmq-core/version'
|
20
|
-
require 'ffi-rzmq'
|
21
|
-
require 'ffi-rzmq/version'
|
22
|
-
rescue LoadError => e
|
23
|
-
raise "[LogCourierServer] Could not initialise: #{e}"
|
24
|
-
end
|
17
|
+
require 'thread'
|
18
|
+
require 'log-courier/zmq_qpoll'
|
25
19
|
|
26
20
|
module LogCourier
|
27
21
|
# ZMQ transport implementation for the server
|
28
22
|
class ServerZmq
|
29
23
|
class ZMQError < StandardError; end
|
30
24
|
|
25
|
+
class << self
|
26
|
+
@print_zmq_versions = false
|
27
|
+
|
28
|
+
def print_zmq_versions(logger)
|
29
|
+
return if @print_zmq_versions || logger.nil?
|
30
|
+
|
31
|
+
libversion = LibZMQ.version
|
32
|
+
libversion = "#{libversion[:major]}.#{libversion[:minor]}.#{libversion[:patch]}"
|
33
|
+
|
34
|
+
logger.info 'libzmq', :version => libversion
|
35
|
+
logger.info 'ffi-rzmq-core', :version => LibZMQ::VERSION
|
36
|
+
logger.info 'ffi-rzmq', :version => ZMQ.version
|
37
|
+
|
38
|
+
@print_zmq_versions = true
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
31
42
|
attr_reader :port
|
32
43
|
|
33
44
|
def initialize(options = {})
|
@@ -38,19 +49,19 @@ module LogCourier
|
|
38
49
|
address: '0.0.0.0',
|
39
50
|
curve_secret_key: nil,
|
40
51
|
max_packet_size: 10_485_760,
|
52
|
+
peer_recv_queue: 10,
|
41
53
|
}.merge!(options)
|
42
54
|
|
43
55
|
@logger = @options[:logger]
|
44
56
|
|
45
|
-
|
46
|
-
libversion = "#{libversion[:major]}.#{libversion[:minor]}.#{libversion[:patch]}"
|
57
|
+
self.class.print_zmq_versions @logger
|
47
58
|
|
48
59
|
if @options[:transport] == 'zmq'
|
49
|
-
|
60
|
+
fail "input/courier: Transport 'zmq' requires libzmq version >= 4" unless LibZMQ.version4?
|
50
61
|
|
51
|
-
|
62
|
+
fail 'input/courier: \'curve_secret_key\' is required' if @options[:curve_secret_key].nil?
|
52
63
|
|
53
|
-
|
64
|
+
fail 'input/courier: \'curve_secret_key\' must be a valid 40 character Z85 encoded string' if @options[:curve_secret_key].length != 40 || !z85validate(@options[:curve_secret_key])
|
54
65
|
end
|
55
66
|
|
56
67
|
begin
|
@@ -60,128 +71,306 @@ module LogCourier
|
|
60
71
|
|
61
72
|
if @options[:transport] == 'zmq'
|
62
73
|
rc = @socket.setsockopt(ZMQ::CURVE_SERVER, 1)
|
63
|
-
|
74
|
+
fail 'setsockopt CURVE_SERVER failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
|
64
75
|
|
65
76
|
rc = @socket.setsockopt(ZMQ::CURVE_SECRETKEY, @options[:curve_secret_key])
|
66
|
-
|
77
|
+
fail 'setsockopt CURVE_SECRETKEY failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
|
67
78
|
end
|
68
79
|
|
69
80
|
bind = 'tcp://' + @options[:address] + (@options[:port] == 0 ? ':*' : ':' + @options[:port].to_s)
|
70
81
|
rc = @socket.bind(bind)
|
71
|
-
|
82
|
+
fail 'failed to bind at ' + bind + ': ' + rZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
|
72
83
|
|
73
84
|
# Lookup port number that was allocated in case it was set to 0
|
74
85
|
endpoint = ''
|
75
86
|
rc = @socket.getsockopt(ZMQ::LAST_ENDPOINT, endpoint)
|
76
|
-
|
87
|
+
fail 'getsockopt LAST_ENDPOINT failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc) && %r{\Atcp://(?:.*):(?<endpoint_port>\d+)\0\z} =~ endpoint
|
77
88
|
@port = endpoint_port.to_i
|
78
89
|
|
79
|
-
@poller = ZMQ::Poller.new
|
80
|
-
|
81
90
|
if @options[:port] == 0
|
82
|
-
@logger.warn '
|
91
|
+
@logger.warn 'Ephemeral port allocated', :transport => @options[:transport], :port => @port unless @logger.nil?
|
83
92
|
end
|
84
93
|
rescue => e
|
85
|
-
raise "
|
94
|
+
raise "input/courier: Failed to initialise: #{e}"
|
86
95
|
end
|
87
96
|
|
88
|
-
@logger.info "[LogCourierServer] libzmq version #{libversion}" unless @logger.nil?
|
89
|
-
@logger.info "[LogCourierServer] ffi-rzmq-core version #{LibZMQ::VERSION}" unless @logger.nil?
|
90
|
-
@logger.info "[LogCourierServer] ffi-rzmq version #{ZMQ.version}" unless @logger.nil?
|
91
|
-
|
92
97
|
# TODO: Implement workers option by receiving on a ROUTER and proxying to a DEALER, with workers connecting to the DEALER
|
93
98
|
|
94
|
-
|
99
|
+
# TODO: Make this send queue configurable?
|
100
|
+
@send_queue = EventQueue.new 2
|
101
|
+
@factory = ClientFactoryZmq.new(@options, @send_queue)
|
102
|
+
|
103
|
+
# Setup poller
|
104
|
+
@poller = ZMQPoll::ZMQPoll.new(@context)
|
105
|
+
@poller.register_socket @socket, ZMQ::POLLIN
|
106
|
+
@poller.register_queue_to_socket @send_queue, @socket
|
107
|
+
|
108
|
+
# Register a finaliser that sets @context to nil
|
109
|
+
# This allows us to detect the JRuby bug where during "exit!" finalisers
|
110
|
+
# are run but threads are not killed - which leaves us in a situation of
|
111
|
+
# a terminated @context (it has a terminate finalizer) and an IO thread
|
112
|
+
# looping retries
|
113
|
+
# JRuby will still crash and burn, but at least we don't spam STDOUT with
|
114
|
+
# errors
|
115
|
+
ObjectSpace.define_finalizer(self, Proc.new do
|
116
|
+
@context = nil
|
117
|
+
end)
|
118
|
+
end
|
119
|
+
|
120
|
+
def run(&block)
|
121
|
+
loop do
|
122
|
+
begin
|
123
|
+
@poller.poll(5_000) do |socket, r, w|
|
124
|
+
next if socket != @socket
|
125
|
+
next if !r
|
95
126
|
|
96
|
-
|
127
|
+
receive &block
|
128
|
+
end
|
129
|
+
rescue ZMQPoll::ZMQError => e
|
130
|
+
# Detect JRuby bug
|
131
|
+
fail e if @context.nil?
|
132
|
+
@logger.warn e, :hint => 'ZMQ recv_string failure' unless @logger.nil?
|
133
|
+
next
|
134
|
+
rescue ZMQPoll::TimeoutError
|
135
|
+
# We'll let ZeroMQ manage reconnections and new connections
|
136
|
+
# There is no point in us doing any form of reconnect ourselves
|
137
|
+
next
|
138
|
+
end
|
139
|
+
end
|
140
|
+
return
|
141
|
+
rescue ShutdownSignal
|
142
|
+
# Shutting down
|
143
|
+
@logger.warn 'Server shutting down' unless @logger.nil?
|
144
|
+
return
|
145
|
+
rescue StandardError, NativeException => e
|
146
|
+
# Some other unknown problem
|
147
|
+
@logger.warn e, :hint => 'Unknown error, shutting down' unless @logger.nil?
|
148
|
+
raise e
|
149
|
+
ensure
|
150
|
+
@poller.shutdown
|
151
|
+
@factory.shutdown
|
152
|
+
@socket.close
|
153
|
+
@context.terminate
|
97
154
|
end
|
98
155
|
|
156
|
+
private
|
157
|
+
|
99
158
|
def z85validate(z85)
|
100
159
|
# ffi-rzmq does not implement decode - but we want to validate during startup
|
101
160
|
decoded = FFI::MemoryPointer.from_string(' ' * (8 * z85.length / 10))
|
102
161
|
ret = LibZMQ.zmq_z85_decode decoded, z85
|
103
162
|
return false if ret.nil?
|
104
|
-
|
105
163
|
true
|
106
164
|
end
|
107
165
|
|
108
|
-
def
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
rc = @socket.recv_strings(data, ZMQ::DONTWAIT)
|
116
|
-
unless ZMQ::Util.resultcode_ok?(rc)
|
117
|
-
raise ZMQError, 'recv_string error: ' + ZMQ::Util.error_string if ZMQ::Util.errno != ZMQ::EAGAIN
|
118
|
-
|
119
|
-
# Wait for a message to arrive, handling timeouts
|
120
|
-
@poller.deregister @socket, ZMQ::POLLIN | ZMQ::POLLOUT
|
121
|
-
@poller.register @socket, ZMQ::POLLIN
|
122
|
-
while @poller.poll(1_000) == 0
|
123
|
-
# Using this inner while triggers pollThreadEvents in JRuby which checks for Thread.raise immediately
|
124
|
-
raise TimeoutError while Time.now.to_i >= @timeout
|
125
|
-
end
|
126
|
-
next
|
127
|
-
end
|
128
|
-
rescue ZMQError => e
|
129
|
-
@logger.warn "[LogCourierServer] ZMQ recv_string failed: #{e}" unless @logger.nil?
|
130
|
-
next
|
131
|
-
end
|
166
|
+
def receive(&block)
|
167
|
+
# Try to receive a message
|
168
|
+
data = []
|
169
|
+
rc = @socket.recv_strings(data, ZMQ::DONTWAIT)
|
170
|
+
unless ZMQ::Util.resultcode_ok?(rc)
|
171
|
+
fail ZMQError, 'recv_string error: ' + ZMQ::Util.error_string if ZMQ::Util.errno != ZMQ::EAGAIN
|
172
|
+
end
|
132
173
|
|
133
|
-
|
134
|
-
|
135
|
-
|
174
|
+
# Save the source information that appears before the null messages
|
175
|
+
source = []
|
176
|
+
source.push data.shift until data.length == 0 || data[0] == ''
|
136
177
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
178
|
+
if data.length == 0
|
179
|
+
@logger.warn 'Invalid message: no data', :source_length => source.length unless @logger.nil?
|
180
|
+
return
|
181
|
+
elsif data.length == 1
|
182
|
+
@logger.warn 'Invalid message: empty data', :source_length => source.length unless @logger.nil?
|
183
|
+
return
|
184
|
+
end
|
185
|
+
|
186
|
+
# Drop the null message separator
|
187
|
+
data.shift
|
188
|
+
|
189
|
+
if data.length != 1
|
190
|
+
@logger.warn 'Invalid message: multipart unexpected', :source_length => source.length, :data_length => data.length unless @logger.nil?
|
191
|
+
if !@logger.nil? && @logger.debug?
|
192
|
+
i = 0
|
193
|
+
parts = {}
|
194
|
+
data.each do |msg|
|
195
|
+
i += 1
|
196
|
+
parts[i] = "#{part.length}:[#{msg[0..31].gsub(/[^[:print:]]/, '.')}]"
|
143
197
|
end
|
198
|
+
@logger.debug 'Data', parts
|
199
|
+
end
|
200
|
+
return
|
201
|
+
end
|
202
|
+
|
203
|
+
@factory.deliver source, data.first, &block
|
204
|
+
return
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
class ClientFactoryZmq
|
209
|
+
attr_reader :options
|
210
|
+
attr_reader :send_queue
|
211
|
+
|
212
|
+
def initialize(options, send_queue)
|
213
|
+
@options = options
|
214
|
+
@logger = @options[:logger]
|
215
|
+
|
216
|
+
@send_queue = send_queue
|
217
|
+
@index = {}
|
218
|
+
@client_threads = {}
|
219
|
+
@mutex = Mutex.new
|
220
|
+
end
|
221
|
+
|
222
|
+
def shutdown
|
223
|
+
# Stop other threads from try_drop collisions
|
224
|
+
client_threads = @mutex.synchronize do
|
225
|
+
client_threads = @client_threads
|
226
|
+
@client_threads = {}
|
227
|
+
client_threads
|
228
|
+
end
|
229
|
+
|
230
|
+
client_threads.each_value do |thr|
|
231
|
+
thr.raise ShutdownSignal
|
232
|
+
end
|
233
|
+
|
234
|
+
client_threads.each_value(&:join)
|
235
|
+
return
|
236
|
+
end
|
144
237
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
238
|
+
def deliver(source, data, &block)
|
239
|
+
# Find the handling thread
|
240
|
+
# We separate each source into threads so that each thread can respond
|
241
|
+
# with partial ACKs if we hit a slow down
|
242
|
+
# If we processed in a single thread, we'd only be able to respond to
|
243
|
+
# a single client with partial ACKs
|
244
|
+
@mutex.synchronize do
|
245
|
+
index = @index
|
246
|
+
source.each do |identity|
|
247
|
+
index[identity] = {} if !index.key?(identity)
|
248
|
+
index = index[identity]
|
249
|
+
end
|
250
|
+
|
251
|
+
if !index.key?('')
|
252
|
+
source_str = source.map do |s|
|
253
|
+
s.each_byte.map do |b|
|
254
|
+
b.to_s(16).rjust(2, '0')
|
157
255
|
end
|
158
|
-
|
159
|
-
|
256
|
+
end.join
|
257
|
+
|
258
|
+
@logger.info 'New source', :source => source_str unless @logger.nil?
|
259
|
+
|
260
|
+
# Create the client and associated thread
|
261
|
+
client = ClientZmq.new(self, source, source_str) do
|
262
|
+
try_drop(source)
|
160
263
|
end
|
264
|
+
|
265
|
+
thread = Thread.new do
|
266
|
+
client.run &block
|
267
|
+
end
|
268
|
+
|
269
|
+
@client_threads[thread] = thread
|
270
|
+
|
271
|
+
index[''] = {
|
272
|
+
'client' => client,
|
273
|
+
'thread' => thread,
|
274
|
+
}
|
275
|
+
end
|
276
|
+
|
277
|
+
# Existing thread, throw on the queue, if not enough room drop the message
|
278
|
+
index['']['client'].push data, 0
|
279
|
+
end
|
280
|
+
return
|
281
|
+
end
|
282
|
+
|
283
|
+
private
|
284
|
+
|
285
|
+
def try_drop(source, source_str)
|
286
|
+
# This is called when a client goes idle, to cleanup resources
|
287
|
+
# We may tie this into zmq monitor
|
288
|
+
@mutex.synchronize do
|
289
|
+
index = @index
|
290
|
+
parents = []
|
291
|
+
source.each do |identity|
|
292
|
+
if !index.key?(identity)
|
293
|
+
@logger.warn 'Unknown idle source failed to shutdown', :source => source_str unless @logger.nil?
|
294
|
+
break
|
295
|
+
end
|
296
|
+
parents.push [index, identity]
|
297
|
+
index = index[identity]
|
298
|
+
end
|
299
|
+
|
300
|
+
if !index.key?('')
|
301
|
+
@logger.warn 'Unknown idle source failed to shutdown', :source => source_str unless @logger.nil?
|
302
|
+
break
|
303
|
+
end
|
304
|
+
|
305
|
+
# Don't allow drop if we have messages in the queue
|
306
|
+
if index['']['client'].length != 0
|
307
|
+
@logger.warn 'Failed idle source shutdown as message queue is not empty', :source => source_str unless @logger.nil?
|
308
|
+
return false
|
309
|
+
end
|
310
|
+
|
311
|
+
@logger.info 'Idle source shutting down', :source => source_str unless @logger.nil?
|
312
|
+
|
313
|
+
# Delete the entry
|
314
|
+
@client_threads.delete(index['']['thread'])
|
315
|
+
index.delete('')
|
316
|
+
|
317
|
+
# Cleanup orphaned leafs
|
318
|
+
parents.reverse_each do |path|
|
319
|
+
path[0].delete(path[1]) if path[0][path[1]].length == 0
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
return true
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
class ClientZmq < EventQueue
|
328
|
+
def initialize(factory, source, source_str, &try_drop)
|
329
|
+
@factory = factory
|
330
|
+
@logger = @factory.options[:logger]
|
331
|
+
@send_queue = @factory.send_queue
|
332
|
+
@source = source
|
333
|
+
@source_str = source_str
|
334
|
+
@try_drop = try_drop
|
335
|
+
|
336
|
+
# Setup the queue for receiving events to process
|
337
|
+
super @factory.options[:peer_recv_queue]
|
338
|
+
end
|
339
|
+
|
340
|
+
def run(&block)
|
341
|
+
loop do
|
342
|
+
begin
|
343
|
+
# TODO: Make timeout configurable?
|
344
|
+
data = self.pop(30)
|
345
|
+
recv(data, &block)
|
161
346
|
rescue TimeoutError
|
162
|
-
#
|
163
|
-
|
164
|
-
|
165
|
-
reset_timeout
|
166
|
-
next
|
347
|
+
# Try to clean up resources - if we fail, new messages have arrived
|
348
|
+
retry if !@try_drop.call(@source)
|
349
|
+
break
|
167
350
|
end
|
168
351
|
end
|
352
|
+
return
|
169
353
|
rescue ShutdownSignal
|
170
354
|
# Shutting down
|
171
|
-
@logger.
|
355
|
+
@logger.info 'Source shutting down', :source => @source_str unless @logger.nil?
|
356
|
+
return
|
172
357
|
rescue StandardError, NativeException => e
|
173
358
|
# Some other unknown problem
|
174
|
-
@logger.warn
|
175
|
-
@logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
|
359
|
+
@logger.warn e, :hint => 'Unknown error, connection aborted', :source => @source_str unless @logger.nil?
|
176
360
|
raise e
|
177
|
-
ensure
|
178
|
-
@socket.close
|
179
|
-
@context.terminate
|
180
361
|
end
|
181
362
|
|
363
|
+
def send(signature, message)
|
364
|
+
data = signature + [message.length].pack('N') + message
|
365
|
+
@send_queue.push @source + ['', data]
|
366
|
+
return
|
367
|
+
end
|
368
|
+
|
369
|
+
private
|
370
|
+
|
182
371
|
def recv(data)
|
183
372
|
if data.length < 8
|
184
|
-
@logger.warn '
|
373
|
+
@logger.warn 'Invalid message: not enough data', :data_length => data.length, :source => @source_str unless @logger.nil?
|
185
374
|
return
|
186
375
|
end
|
187
376
|
|
@@ -190,52 +379,16 @@ module LogCourier
|
|
190
379
|
|
191
380
|
# Verify length
|
192
381
|
if data.length - 8 != length
|
193
|
-
@logger.warn
|
382
|
+
@logger.warn 'Invalid message: data has invalid length', :data_length => data.length - 8, :encoded_length => length, :source => @source_str unless @logger.nil?
|
194
383
|
return
|
195
|
-
elsif length > @options[:max_packet_size]
|
196
|
-
@logger.warn
|
384
|
+
elsif length > @factory.options[:max_packet_size]
|
385
|
+
@logger.warn 'Invalid message: packet too large', :size => length, :max_packet_size => @options[:max_packet_size], :source => @source_str unless @logger.nil?
|
197
386
|
return
|
198
387
|
end
|
199
388
|
|
200
389
|
# Yield the parts
|
201
390
|
yield signature, data[8, length], self
|
202
|
-
|
203
|
-
|
204
|
-
def send(signature, message)
|
205
|
-
data = signature + [message.length].pack('N') + message
|
206
|
-
|
207
|
-
# Send the return route and then the message
|
208
|
-
reset_timeout
|
209
|
-
@return_route.each do |msg|
|
210
|
-
send_with_poll msg, true
|
211
|
-
end
|
212
|
-
send_with_poll '', true
|
213
|
-
send_with_poll data
|
214
|
-
end
|
215
|
-
|
216
|
-
def send_with_poll(data, more = false)
|
217
|
-
loop do
|
218
|
-
# Try to send a message but never block
|
219
|
-
rc = @socket.send_string(data, (more ? ZMQ::SNDMORE : 0) | ZMQ::DONTWAIT)
|
220
|
-
break if ZMQ::Util.resultcode_ok?(rc)
|
221
|
-
if ZMQ::Util.errno != ZMQ::EAGAIN
|
222
|
-
@logger.warn "[LogCourierServer] Message send failed: #{ZMQ::Util.error_string}" unless @logger.nil?
|
223
|
-
raise TimeoutError
|
224
|
-
end
|
225
|
-
|
226
|
-
# Wait for send to become available, handling timeouts
|
227
|
-
@poller.deregister @socket, ZMQ::POLLIN | ZMQ::POLLOUT
|
228
|
-
@poller.register @socket, ZMQ::POLLOUT
|
229
|
-
while @poller.poll(1_000) == 0
|
230
|
-
# Using this inner while triggers pollThreadEvents in JRuby which checks for Thread.raise immediately
|
231
|
-
raise TimeoutError while Time.now.to_i >= @timeout
|
232
|
-
end
|
233
|
-
end
|
234
|
-
end
|
235
|
-
|
236
|
-
def reset_timeout
|
237
|
-
# TODO: Make configurable?
|
238
|
-
@timeout = Time.now.to_i + 1_800
|
391
|
+
return
|
239
392
|
end
|
240
393
|
end
|
241
394
|
end
|