log-courier 1.1 → 1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -40,6 +40,7 @@ module LogCourier
40
40
  }.merge!(options)
41
41
 
42
42
  @logger = @options[:logger]
43
+ @logger['plugin'] = 'input/courier'
43
44
 
44
45
  case @options[:transport]
45
46
  when 'tcp', 'tls'
@@ -49,11 +50,12 @@ module LogCourier
49
50
  require 'log-courier/server_zmq'
50
51
  @server = ServerZmq.new(@options)
51
52
  else
52
- raise '[LogCourierServer] \'transport\' must be tcp, tls, plainzmq or zmq'
53
+ fail 'input/courier: \'transport\' must be tcp, tls, plainzmq or zmq'
53
54
  end
54
55
 
55
- # Grab the port back
56
+ # Grab the port back and update the logger context
56
57
  @port = @server.port
58
+ @logger['port'] = @port unless @logger.nil?
57
59
 
58
60
  # Load the json adapter
59
61
  @json_adapter = MultiJson.adapter.instance
@@ -75,7 +77,11 @@ module LogCourier
75
77
  when 'JDAT'
76
78
  process_jdat message, comm, event_queue
77
79
  else
78
- @logger.warn("[LogCourierServer] Unknown message received from #{comm.peer}") unless @logger.nil?
80
+ if comm.peer.nil?
81
+ @logger.warn 'Unknown message received', :from => 'unknown' unless @logger.nil?
82
+ else
83
+ @logger.warn 'Unknown message received', :from => comm.peer unless @logger.nil?
84
+ end
79
85
  # Don't kill a client that sends a bad message
80
86
  # Just reject it and let it send it again, potentially to another server
81
87
  comm.send '????', ''
@@ -93,17 +99,21 @@ module LogCourier
93
99
  server_thread.join
94
100
  end
95
101
  end
102
+ return
96
103
  end
97
104
 
105
+ private
106
+
98
107
  def process_ping(message, comm)
99
108
  # Size of message should be 0
100
109
  if message.length != 0
101
- raise ProtocolError, "unexpected data attached to ping message (#{message.length})"
110
+ fail ProtocolError, "unexpected data attached to ping message (#{message.length})"
102
111
  end
103
112
 
104
113
  # PONG!
105
114
  # NOTE: comm.send can raise a Timeout::Error of its own
106
115
  comm.send 'PONG', ''
116
+ return
107
117
  end
108
118
 
109
119
  def process_jdat(message, comm, event_queue)
@@ -114,11 +124,17 @@ module LogCourier
114
124
  # This allows the client to know what is being acknowledged
115
125
  # Nonce is 16 so check we have enough
116
126
  if message.length < 17
117
- raise ProtocolError, "JDAT message too small (#{message.length})"
127
+ fail ProtocolError, "JDAT message too small (#{message.length})"
118
128
  end
119
129
 
120
130
  nonce = message[0...16]
121
131
 
132
+ if !@logger.nil? && @logger.debug?
133
+ nonce_str = nonce.each_byte.map do |b|
134
+ b.to_s(16).rjust(2, '0')
135
+ end
136
+ end
137
+
122
138
  # The remainder of the message is the compressed data block
123
139
  message = StringIO.new Zlib::Inflate.inflate(message[16...message.length])
124
140
 
@@ -136,7 +152,7 @@ module LogCourier
136
152
  # Finished!
137
153
  break
138
154
  elsif length_buf.length < 4
139
- raise ProtocolError, "JDAT length extraction failed (#{ret} #{length_buf.length})"
155
+ fail ProtocolError, "JDAT length extraction failed (#{ret} #{length_buf.length})"
140
156
  end
141
157
 
142
158
  length = length_buf.unpack('N').first
@@ -145,7 +161,7 @@ module LogCourier
145
161
  ret = message.read length, data_buf
146
162
  if ret.nil? or data_buf.length < length
147
163
  @logger.warn()
148
- raise ProtocolError, "JDAT message extraction failed #{ret} #{data_buf.length}"
164
+ fail ProtocolError, "JDAT message extraction failed #{ret} #{data_buf.length}"
149
165
  end
150
166
 
151
167
  data_buf.force_encoding('utf-8')
@@ -161,7 +177,7 @@ module LogCourier
161
177
  begin
162
178
  event = @json_adapter.load(data_buf, @json_options)
163
179
  rescue MultiJson::ParseError => e
164
- @logger.warn("[LogCourierServer] JSON parse failure, falling back to plain-text: #{e}") unless @logger.nil?
180
+ @logger.warn e, :hint => 'JSON parse failure, falling back to plain-text' unless @logger.nil?
165
181
  event = { 'message' => data_buf }
166
182
  end
167
183
 
@@ -171,7 +187,7 @@ module LogCourier
171
187
  rescue TimeoutError
172
188
  # Full pipeline, partial ack
173
189
  # NOTE: comm.send can raise a Timeout::Error of its own
174
- @logger.debug "[LogCourierServer] Partially acknowledging message #{nonce.hash} sequence #{sequence}" unless @logger.nil?
190
+ @logger.debug 'Partially acknowledging message', :nonce => nonce_str.join, :sequence => sequence if !@logger.nil? && @logger.debug?
175
191
  comm.send 'ACKN', [nonce, sequence].pack('A*N')
176
192
  ack_timeout = Time.now.to_i + 5
177
193
  retry
@@ -182,8 +198,9 @@ module LogCourier
182
198
 
183
199
  # Acknowledge the full message
184
200
  # NOTE: comm.send can raise a Timeout::Error
185
- @logger.debug "[LogCourierServer] Acknowledging message #{nonce.hash} sequence #{sequence}" unless @logger.nil?
201
+ @logger.debug 'Acknowledging message', :nonce => nonce_str.join, :sequence => sequence if !@logger.nil? && @logger.debug?
186
202
  comm.send 'ACKN', [nonce, sequence].pack('A*N')
203
+ return
187
204
  end
188
205
  end
189
206
  end
@@ -24,13 +24,25 @@ require 'thread'
24
24
  module LogCourier
25
25
  # Wrap around TCPServer to grab last error for use in reporting which peer had an error
26
26
  class ExtendedTCPServer < TCPServer
27
- # Yield the peer
27
+ attr_reader :peer
28
+
29
+ def initialise
30
+ reset_peer
31
+ super
32
+ end
33
+
34
+ # Save the peer
28
35
  def accept
29
36
  sock = super
30
37
  peer = sock.peeraddr(:numeric)
31
- Thread.current['LogCourierPeer'] = "#{peer[2]}:#{peer[1]}"
38
+ @peer = "#{peer[2]}:#{peer[1]}"
32
39
  return sock
33
40
  end
41
+
42
+ def reset_peer
43
+ @peer = 'unknown'
44
+ return
45
+ end
34
46
  end
35
47
 
36
48
  # TLS transport implementation for server
@@ -57,11 +69,11 @@ module LogCourier
57
69
 
58
70
  if @options[:transport] == 'tls'
59
71
  [:ssl_certificate, :ssl_key].each do |k|
60
- raise "[LogCourierServer] '#{k}' is required" if @options[k].nil?
72
+ fail "input/courier: '#{k}' is required" if @options[k].nil?
61
73
  end
62
74
 
63
75
  if @options[:ssl_verify] and (!@options[:ssl_verify_default_ca] && @options[:ssl_verify_ca].nil?)
64
- raise '[LogCourierServer] Either \'ssl_verify_default_ca\' or \'ssl_verify_ca\' must be specified when ssl_verify is true'
76
+ fail 'input/courier: Either \'ssl_verify_default_ca\' or \'ssl_verify_ca\' must be specified when ssl_verify is true'
65
77
  end
66
78
  end
67
79
 
@@ -94,16 +106,18 @@ module LogCourier
94
106
  ssl.verify_mode = OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT
95
107
  end
96
108
 
109
+ # Create the OpenSSL server - set start_immediately to false so we can multithread handshake
97
110
  @server = OpenSSL::SSL::SSLServer.new(@tcp_server, ssl)
111
+ @server.start_immediately = false
98
112
  else
99
113
  @server = @tcp_server
100
114
  end
101
115
 
102
116
  if @options[:port] == 0
103
- @logger.warn '[LogCourierServer] Transport ' + @options[:transport] + ' is listening on ephemeral port ' + @port.to_s unless @logger.nil?
117
+ @logger.warn 'Ephemeral port allocated', :transport => @options[:transport], :port => @port unless @logger.nil?
104
118
  end
105
119
  rescue => e
106
- raise "[LogCourierServer] Failed to initialise: #{e}"
120
+ raise "input/courier: Failed to initialise: #{e}"
107
121
  end
108
122
  end # def initialize
109
123
 
@@ -111,20 +125,20 @@ module LogCourier
111
125
  client_threads = {}
112
126
 
113
127
  loop do
114
- # This means ssl accepting is single-threaded.
128
+ # Because start_immediately is false, TCP accept is single thread but
129
+ # handshake is essentiall multithreaded as we defer it to the thread
130
+ @tcp_server.reset_peer
131
+ client = nil
115
132
  begin
116
133
  client = @server.accept
117
134
  rescue EOFError, OpenSSL::SSL::SSLError, IOError => e
118
- # Handshake failure or other issue
119
- peer = Thread.current['LogCourierPeer'] || 'unknown'
120
- @logger.warn "[LogCourierServer] Connection from #{peer} failed to initialise: #{e}" unless @logger.nil?
121
- client.close rescue nil
135
+ # Accept failure or other issue
136
+ @logger.warn 'Connection failed to accept', :error => e.message, :peer => @tcp_server.peer unless @logger.nil
137
+ client.close rescue nil unless client.nil?
122
138
  next
123
139
  end
124
140
 
125
- peer = Thread.current['LogCourierPeer'] || 'unknown'
126
-
127
- @logger.info "[LogCourierServer] New connection from #{peer}" unless @logger.nil?
141
+ @logger.info 'New connection', :peer => @tcp_server.peer unless @logger.nil?
128
142
 
129
143
  # Clear up finished threads
130
144
  client_threads.delete_if do |_, thr|
@@ -132,17 +146,16 @@ module LogCourier
132
146
  end
133
147
 
134
148
  # Start a new connection thread
135
- client_threads[client] = Thread.new(client, peer) do |client_copy, peer_copy|
136
- ConnectionTcp.new(@logger, client_copy, peer_copy, @options).run(&block)
149
+ client_threads[client] = Thread.new(client, @tcp_server.peer) do |client_copy, peer_copy|
150
+ run_thread client_copy, peer_copy, &block
137
151
  end
138
152
  end
153
+ return
139
154
  rescue ShutdownSignal
140
- # Capture shutting down signal
141
- 0
155
+ return
142
156
  rescue StandardError, NativeException => e
143
157
  # Some other unknown problem
144
- @logger.warn("[LogCourierServer] Unknown error: #{e}") unless @logger.nil?
145
- @logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
158
+ @logger.warn e, :hint => 'Unknown error, shutting down' unless @logger.nil?
146
159
  raise e
147
160
  ensure
148
161
  # Raise shutdown in all client threads and join then
@@ -154,6 +167,24 @@ module LogCourier
154
167
 
155
168
  @tcp_server.close
156
169
  end
170
+
171
+ private
172
+
173
+ def run_thread(client, peer, &block)
174
+ # Perform the handshake inside the new thread so we don't block TCP accept
175
+ if @options[:transport] == 'tls'
176
+ begin
177
+ client.accept
178
+ rescue EOFError, OpenSSL::SSL::SSLError, IOError => e
179
+ # Handshake failure or other issue
180
+ @logger.warn 'Connection failed to initialise', :error => e.message, :peer => peer unless @logger.nil?
181
+ client.close
182
+ return
183
+ end
184
+ end
185
+
186
+ ConnectionTcp.new(@logger, client, peer, @options).run(&block)
187
+ end
157
188
  end
158
189
 
159
190
  # Representation of a single connected client
@@ -179,7 +210,7 @@ module LogCourier
179
210
 
180
211
  # Sanity
181
212
  if length > @options[:max_packet_size]
182
- raise ProtocolError, "packet too large (#{length} > #{@options[:max_packet_size]})"
213
+ fail ProtocolError, "packet too large (#{length} > #{@options[:max_packet_size]})"
183
214
  end
184
215
 
185
216
  # While we're processing, EOF is bad as it may occur during send
@@ -198,32 +229,61 @@ module LogCourier
198
229
  # If we EOF next it's a graceful close
199
230
  @in_progress = false
200
231
  end
232
+ return
201
233
  rescue TimeoutError
202
234
  # Timeout of the connection, we were idle too long without a ping/pong
203
- @logger.warn("[LogCourierServer] Connection from #{@peer} timed out") unless @logger.nil?
235
+ @logger.warn 'Connection timed out', :peer => @peer unless @logger.nil?
236
+ return
204
237
  rescue EOFError
205
238
  if @in_progress
206
- @logger.warn("[LogCourierServer] Premature connection close on connection from #{@peer}") unless @logger.nil?
239
+ @logger.warn 'Unexpected EOF', :peer => @peer unless @logger.nil?
207
240
  else
208
- @logger.info("[LogCourierServer] Connection from #{@peer} closed") unless @logger.nil?
241
+ @logger.info 'Connection closed', :peer => @peer unless @logger.nil?
209
242
  end
243
+ return
210
244
  rescue OpenSSL::SSL::SSLError, IOError, Errno::ECONNRESET => e
211
245
  # Read errors, only action is to shutdown which we'll do in ensure
212
- @logger.warn("[LogCourierServer] SSL error on connection from #{@peer}: #{e}") unless @logger.nil?
246
+ @logger.warn 'SSL error, connection aborted', :error => e.message, :peer => @peer unless @logger.nil?
247
+ return
213
248
  rescue ProtocolError => e
214
249
  # Connection abort request due to a protocol error
215
- @logger.warn("[LogCourierServer] Protocol error on connection from #{@peer}: #{e}") unless @logger.nil?
250
+ @logger.warn 'Protocol error, connection aborted', :error => e.message, :peer => @peer unless @logger.nil?
251
+ return
216
252
  rescue ShutdownSignal
217
253
  # Shutting down
218
- @logger.warn("[LogCourierServer] Closing connecting from #{@peer}: server shutting down") unless @logger.nil?
219
- rescue => e
254
+ @logger.info 'Server shutting down, closing connection', :peer => @peer unless @logger.nil?
255
+ return
256
+ rescue StandardError, NativeException => e
220
257
  # Some other unknown problem
221
- @logger.warn("[LogCourierServer] Unknown error on connection from #{@peer}: #{e}") unless @logger.nil?
222
- @logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
258
+ @logger.warn e, :hint => 'Unknown error, connection aborted', :peer => @peer unless @logger.nil?
259
+ return
223
260
  ensure
224
261
  @fd.close rescue nil
225
262
  end
226
263
 
264
+ def send(signature, message)
265
+ reset_timeout
266
+ data = signature + [message.length].pack('N') + message
267
+ done = 0
268
+ loop do
269
+ begin
270
+ written = @fd.write_nonblock(data[done...data.length])
271
+ rescue IO::WaitReadable
272
+ fail TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
273
+ retry
274
+ rescue IO::WaitWritable
275
+ fail TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
276
+ retry
277
+ end
278
+ fail ProtocolError, "write failure (#{done}/#{data.length})" if written == 0
279
+ done += written
280
+ break if done >= data.length
281
+ end
282
+ return
283
+ end
284
+
285
+ private
286
+
227
287
  def recv(need)
228
288
  reset_timeout
229
289
  have = ''
@@ -231,16 +291,16 @@ module LogCourier
231
291
  begin
232
292
  buffer = @fd.read_nonblock need - have.length
233
293
  rescue IO::WaitReadable
234
- raise TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
294
+ fail TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
235
295
  retry
236
296
  rescue IO::WaitWritable
237
- raise TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
297
+ fail TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
238
298
  retry
239
299
  end
240
300
  if buffer.nil?
241
- raise EOFError
301
+ fail EOFError
242
302
  elsif buffer.length == 0
243
- raise ProtocolError, "read failure (#{have.length}/#{need})"
303
+ fail ProtocolError, "read failure (#{have.length}/#{need})"
244
304
  end
245
305
  if have.length == 0
246
306
  have = buffer
@@ -252,29 +312,10 @@ module LogCourier
252
312
  have
253
313
  end
254
314
 
255
- def send(signature, message)
256
- reset_timeout
257
- data = signature + [message.length].pack('N') + message
258
- done = 0
259
- loop do
260
- begin
261
- written = @fd.write_nonblock(data[done...data.length])
262
- rescue IO::WaitReadable
263
- raise TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
264
- retry
265
- rescue IO::WaitWritable
266
- raise TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
267
- retry
268
- end
269
- raise ProtocolError, "write failure (#{done}/#{data.length})" if written == 0
270
- done += written
271
- break if done >= data.length
272
- end
273
- end
274
-
275
315
  def reset_timeout
276
316
  # TODO: Make configurable
277
317
  @timeout = Time.now.to_i + 1_800
318
+ return
278
319
  end
279
320
  end
280
321
  end
@@ -14,20 +14,31 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
- begin
18
- require 'ffi-rzmq-core'
19
- require 'ffi-rzmq-core/version'
20
- require 'ffi-rzmq'
21
- require 'ffi-rzmq/version'
22
- rescue LoadError => e
23
- raise "[LogCourierServer] Could not initialise: #{e}"
24
- end
17
+ require 'thread'
18
+ require 'log-courier/zmq_qpoll'
25
19
 
26
20
  module LogCourier
27
21
  # ZMQ transport implementation for the server
28
22
  class ServerZmq
29
23
  class ZMQError < StandardError; end
30
24
 
25
+ class << self
26
+ @print_zmq_versions = false
27
+
28
+ def print_zmq_versions(logger)
29
+ return if @print_zmq_versions || logger.nil?
30
+
31
+ libversion = LibZMQ.version
32
+ libversion = "#{libversion[:major]}.#{libversion[:minor]}.#{libversion[:patch]}"
33
+
34
+ logger.info 'libzmq', :version => libversion
35
+ logger.info 'ffi-rzmq-core', :version => LibZMQ::VERSION
36
+ logger.info 'ffi-rzmq', :version => ZMQ.version
37
+
38
+ @print_zmq_versions = true
39
+ end
40
+ end
41
+
31
42
  attr_reader :port
32
43
 
33
44
  def initialize(options = {})
@@ -38,19 +49,19 @@ module LogCourier
38
49
  address: '0.0.0.0',
39
50
  curve_secret_key: nil,
40
51
  max_packet_size: 10_485_760,
52
+ peer_recv_queue: 10,
41
53
  }.merge!(options)
42
54
 
43
55
  @logger = @options[:logger]
44
56
 
45
- libversion = LibZMQ.version
46
- libversion = "#{libversion[:major]}.#{libversion[:minor]}.#{libversion[:patch]}"
57
+ self.class.print_zmq_versions @logger
47
58
 
48
59
  if @options[:transport] == 'zmq'
49
- raise "[LogCourierServer] Transport 'zmq' requires libzmq version >= 4 (the current version is #{libversion})" unless LibZMQ.version4?
60
+ fail "input/courier: Transport 'zmq' requires libzmq version >= 4" unless LibZMQ.version4?
50
61
 
51
- raise '[LogCourierServer] \'curve_secret_key\' is required' if @options[:curve_secret_key].nil?
62
+ fail 'input/courier: \'curve_secret_key\' is required' if @options[:curve_secret_key].nil?
52
63
 
53
- raise '[LogCourierServer] \'curve_secret_key\' must be a valid 40 character Z85 encoded string' if @options[:curve_secret_key].length != 40 || !z85validate(@options[:curve_secret_key])
64
+ fail 'input/courier: \'curve_secret_key\' must be a valid 40 character Z85 encoded string' if @options[:curve_secret_key].length != 40 || !z85validate(@options[:curve_secret_key])
54
65
  end
55
66
 
56
67
  begin
@@ -60,128 +71,306 @@ module LogCourier
60
71
 
61
72
  if @options[:transport] == 'zmq'
62
73
  rc = @socket.setsockopt(ZMQ::CURVE_SERVER, 1)
63
- raise ZMQError, 'setsockopt CURVE_SERVER failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
74
+ fail 'setsockopt CURVE_SERVER failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
64
75
 
65
76
  rc = @socket.setsockopt(ZMQ::CURVE_SECRETKEY, @options[:curve_secret_key])
66
- raise ZMQError, 'setsockopt CURVE_SECRETKEY failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
77
+ fail 'setsockopt CURVE_SECRETKEY failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
67
78
  end
68
79
 
69
80
  bind = 'tcp://' + @options[:address] + (@options[:port] == 0 ? ':*' : ':' + @options[:port].to_s)
70
81
  rc = @socket.bind(bind)
71
- raise ZMQError, 'failed to bind at ' + bind + ': ' + rZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
82
+ fail 'failed to bind at ' + bind + ': ' + rZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
72
83
 
73
84
  # Lookup port number that was allocated in case it was set to 0
74
85
  endpoint = ''
75
86
  rc = @socket.getsockopt(ZMQ::LAST_ENDPOINT, endpoint)
76
- raise ZMQError, 'getsockopt LAST_ENDPOINT failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc) && %r{\Atcp://(?:.*):(?<endpoint_port>\d+)\0\z} =~ endpoint
87
+ fail 'getsockopt LAST_ENDPOINT failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc) && %r{\Atcp://(?:.*):(?<endpoint_port>\d+)\0\z} =~ endpoint
77
88
  @port = endpoint_port.to_i
78
89
 
79
- @poller = ZMQ::Poller.new
80
-
81
90
  if @options[:port] == 0
82
- @logger.warn '[LogCourierServer] Transport ' + @options[:transport] + ' is listening on ephemeral port ' + @port.to_s unless @logger.nil?
91
+ @logger.warn 'Ephemeral port allocated', :transport => @options[:transport], :port => @port unless @logger.nil?
83
92
  end
84
93
  rescue => e
85
- raise "[LogCourierServer] Failed to initialise: #{e}"
94
+ raise "input/courier: Failed to initialise: #{e}"
86
95
  end
87
96
 
88
- @logger.info "[LogCourierServer] libzmq version #{libversion}" unless @logger.nil?
89
- @logger.info "[LogCourierServer] ffi-rzmq-core version #{LibZMQ::VERSION}" unless @logger.nil?
90
- @logger.info "[LogCourierServer] ffi-rzmq version #{ZMQ.version}" unless @logger.nil?
91
-
92
97
  # TODO: Implement workers option by receiving on a ROUTER and proxying to a DEALER, with workers connecting to the DEALER
93
98
 
94
- @return_route = []
99
+ # TODO: Make this send queue configurable?
100
+ @send_queue = EventQueue.new 2
101
+ @factory = ClientFactoryZmq.new(@options, @send_queue)
102
+
103
+ # Setup poller
104
+ @poller = ZMQPoll::ZMQPoll.new(@context)
105
+ @poller.register_socket @socket, ZMQ::POLLIN
106
+ @poller.register_queue_to_socket @send_queue, @socket
107
+
108
+ # Register a finaliser that sets @context to nil
109
+ # This allows us to detect the JRuby bug where during "exit!" finalisers
110
+ # are run but threads are not killed - which leaves us in a situation of
111
+ # a terminated @context (it has a terminate finalizer) and an IO thread
112
+ # looping retries
113
+ # JRuby will still crash and burn, but at least we don't spam STDOUT with
114
+ # errors
115
+ ObjectSpace.define_finalizer(self, Proc.new do
116
+ @context = nil
117
+ end)
118
+ end
119
+
120
+ def run(&block)
121
+ loop do
122
+ begin
123
+ @poller.poll(5_000) do |socket, r, w|
124
+ next if socket != @socket
125
+ next if !r
95
126
 
96
- reset_timeout
127
+ receive &block
128
+ end
129
+ rescue ZMQPoll::ZMQError => e
130
+ # Detect JRuby bug
131
+ fail e if @context.nil?
132
+ @logger.warn e, :hint => 'ZMQ recv_string failure' unless @logger.nil?
133
+ next
134
+ rescue ZMQPoll::TimeoutError
135
+ # We'll let ZeroMQ manage reconnections and new connections
136
+ # There is no point in us doing any form of reconnect ourselves
137
+ next
138
+ end
139
+ end
140
+ return
141
+ rescue ShutdownSignal
142
+ # Shutting down
143
+ @logger.warn 'Server shutting down' unless @logger.nil?
144
+ return
145
+ rescue StandardError, NativeException => e
146
+ # Some other unknown problem
147
+ @logger.warn e, :hint => 'Unknown error, shutting down' unless @logger.nil?
148
+ raise e
149
+ ensure
150
+ @poller.shutdown
151
+ @factory.shutdown
152
+ @socket.close
153
+ @context.terminate
97
154
  end
98
155
 
156
+ private
157
+
99
158
  def z85validate(z85)
100
159
  # ffi-rzmq does not implement decode - but we want to validate during startup
101
160
  decoded = FFI::MemoryPointer.from_string(' ' * (8 * z85.length / 10))
102
161
  ret = LibZMQ.zmq_z85_decode decoded, z85
103
162
  return false if ret.nil?
104
-
105
163
  true
106
164
  end
107
165
 
108
- def run(&block)
109
- loop do
110
- begin
111
- begin
112
- # Try to receive a message
113
- reset_timeout
114
- data = []
115
- rc = @socket.recv_strings(data, ZMQ::DONTWAIT)
116
- unless ZMQ::Util.resultcode_ok?(rc)
117
- raise ZMQError, 'recv_string error: ' + ZMQ::Util.error_string if ZMQ::Util.errno != ZMQ::EAGAIN
118
-
119
- # Wait for a message to arrive, handling timeouts
120
- @poller.deregister @socket, ZMQ::POLLIN | ZMQ::POLLOUT
121
- @poller.register @socket, ZMQ::POLLIN
122
- while @poller.poll(1_000) == 0
123
- # Using this inner while triggers pollThreadEvents in JRuby which checks for Thread.raise immediately
124
- raise TimeoutError while Time.now.to_i >= @timeout
125
- end
126
- next
127
- end
128
- rescue ZMQError => e
129
- @logger.warn "[LogCourierServer] ZMQ recv_string failed: #{e}" unless @logger.nil?
130
- next
131
- end
166
+ def receive(&block)
167
+ # Try to receive a message
168
+ data = []
169
+ rc = @socket.recv_strings(data, ZMQ::DONTWAIT)
170
+ unless ZMQ::Util.resultcode_ok?(rc)
171
+ fail ZMQError, 'recv_string error: ' + ZMQ::Util.error_string if ZMQ::Util.errno != ZMQ::EAGAIN
172
+ end
132
173
 
133
- # Save the routing information that appears before the null messages
134
- @return_route = []
135
- @return_route.push data.shift until data.length == 0 || data[0] == ''
174
+ # Save the source information that appears before the null messages
175
+ source = []
176
+ source.push data.shift until data.length == 0 || data[0] == ''
136
177
 
137
- if data.length == 0
138
- @logger.warn '[LogCourierServer] Invalid message: no data' unless @logger.nil?
139
- next
140
- elsif data.length == 1
141
- @logger.warn '[LogCourierServer] Invalid message: empty data' unless @logger.nil?
142
- next
178
+ if data.length == 0
179
+ @logger.warn 'Invalid message: no data', :source_length => source.length unless @logger.nil?
180
+ return
181
+ elsif data.length == 1
182
+ @logger.warn 'Invalid message: empty data', :source_length => source.length unless @logger.nil?
183
+ return
184
+ end
185
+
186
+ # Drop the null message separator
187
+ data.shift
188
+
189
+ if data.length != 1
190
+ @logger.warn 'Invalid message: multipart unexpected', :source_length => source.length, :data_length => data.length unless @logger.nil?
191
+ if !@logger.nil? && @logger.debug?
192
+ i = 0
193
+ parts = {}
194
+ data.each do |msg|
195
+ i += 1
196
+ parts[i] = "#{part.length}:[#{msg[0..31].gsub(/[^[:print:]]/, '.')}]"
143
197
  end
198
+ @logger.debug 'Data', parts
199
+ end
200
+ return
201
+ end
202
+
203
+ @factory.deliver source, data.first, &block
204
+ return
205
+ end
206
+ end
207
+
208
+ class ClientFactoryZmq
209
+ attr_reader :options
210
+ attr_reader :send_queue
211
+
212
+ def initialize(options, send_queue)
213
+ @options = options
214
+ @logger = @options[:logger]
215
+
216
+ @send_queue = send_queue
217
+ @index = {}
218
+ @client_threads = {}
219
+ @mutex = Mutex.new
220
+ end
221
+
222
+ def shutdown
223
+ # Stop other threads from try_drop collisions
224
+ client_threads = @mutex.synchronize do
225
+ client_threads = @client_threads
226
+ @client_threads = {}
227
+ client_threads
228
+ end
229
+
230
+ client_threads.each_value do |thr|
231
+ thr.raise ShutdownSignal
232
+ end
233
+
234
+ client_threads.each_value(&:join)
235
+ return
236
+ end
144
237
 
145
- # Drop the null message separator
146
- data.shift
147
-
148
- if data.length != 1
149
- @logger.warn "[LogCourierServer] Invalid message: multipart unexpected (#{data.length})" unless @logger.nil?
150
- if !@logger.nil? && @logger.debug?
151
- i = 0
152
- data.each do |msg|
153
- i += 1
154
- part = msg[0..31].gsub(/[^[:print:]]/, '.')
155
- @logger.debug "[LogCourierServer] Part #{i}: #{part.length}:[#{part}]"
156
- end
238
+ def deliver(source, data, &block)
239
+ # Find the handling thread
240
+ # We separate each source into threads so that each thread can respond
241
+ # with partial ACKs if we hit a slow down
242
+ # If we processed in a single thread, we'd only be able to respond to
243
+ # a single client with partial ACKs
244
+ @mutex.synchronize do
245
+ index = @index
246
+ source.each do |identity|
247
+ index[identity] = {} if !index.key?(identity)
248
+ index = index[identity]
249
+ end
250
+
251
+ if !index.key?('')
252
+ source_str = source.map do |s|
253
+ s.each_byte.map do |b|
254
+ b.to_s(16).rjust(2, '0')
157
255
  end
158
- else
159
- recv(data.first, &block)
256
+ end.join
257
+
258
+ @logger.info 'New source', :source => source_str unless @logger.nil?
259
+
260
+ # Create the client and associated thread
261
+ client = ClientZmq.new(self, source, source_str) do
262
+ try_drop(source)
160
263
  end
264
+
265
+ thread = Thread.new do
266
+ client.run &block
267
+ end
268
+
269
+ @client_threads[thread] = thread
270
+
271
+ index[''] = {
272
+ 'client' => client,
273
+ 'thread' => thread,
274
+ }
275
+ end
276
+
277
+ # Existing thread, throw on the queue, if not enough room drop the message
278
+ index['']['client'].push data, 0
279
+ end
280
+ return
281
+ end
282
+
283
+ private
284
+
285
+ def try_drop(source, source_str)
286
+ # This is called when a client goes idle, to cleanup resources
287
+ # We may tie this into zmq monitor
288
+ @mutex.synchronize do
289
+ index = @index
290
+ parents = []
291
+ source.each do |identity|
292
+ if !index.key?(identity)
293
+ @logger.warn 'Unknown idle source failed to shutdown', :source => source_str unless @logger.nil?
294
+ break
295
+ end
296
+ parents.push [index, identity]
297
+ index = index[identity]
298
+ end
299
+
300
+ if !index.key?('')
301
+ @logger.warn 'Unknown idle source failed to shutdown', :source => source_str unless @logger.nil?
302
+ break
303
+ end
304
+
305
+ # Don't allow drop if we have messages in the queue
306
+ if index['']['client'].length != 0
307
+ @logger.warn 'Failed idle source shutdown as message queue is not empty', :source => source_str unless @logger.nil?
308
+ return false
309
+ end
310
+
311
+ @logger.info 'Idle source shutting down', :source => source_str unless @logger.nil?
312
+
313
+ # Delete the entry
314
+ @client_threads.delete(index['']['thread'])
315
+ index.delete('')
316
+
317
+ # Cleanup orphaned leafs
318
+ parents.reverse_each do |path|
319
+ path[0].delete(path[1]) if path[0][path[1]].length == 0
320
+ end
321
+ end
322
+
323
+ return true
324
+ end
325
+ end
326
+
327
+ class ClientZmq < EventQueue
328
+ def initialize(factory, source, source_str, &try_drop)
329
+ @factory = factory
330
+ @logger = @factory.options[:logger]
331
+ @send_queue = @factory.send_queue
332
+ @source = source
333
+ @source_str = source_str
334
+ @try_drop = try_drop
335
+
336
+ # Setup the queue for receiving events to process
337
+ super @factory.options[:peer_recv_queue]
338
+ end
339
+
340
+ def run(&block)
341
+ loop do
342
+ begin
343
+ # TODO: Make timeout configurable?
344
+ data = self.pop(30)
345
+ recv(data, &block)
161
346
  rescue TimeoutError
162
- # We'll let ZeroMQ manage reconnections and new connections
163
- # There is no point in us doing any form of reconnect ourselves
164
- # We will keep this timeout in however, for shutdown checks
165
- reset_timeout
166
- next
347
+ # Try to clean up resources - if we fail, new messages have arrived
348
+ retry if !@try_drop.call(@source)
349
+ break
167
350
  end
168
351
  end
352
+ return
169
353
  rescue ShutdownSignal
170
354
  # Shutting down
171
- @logger.warn('[LogCourierServer] Server shutting down') unless @logger.nil?
355
+ @logger.info 'Source shutting down', :source => @source_str unless @logger.nil?
356
+ return
172
357
  rescue StandardError, NativeException => e
173
358
  # Some other unknown problem
174
- @logger.warn("[LogCourierServer] Unknown error: #{e}") unless @logger.nil?
175
- @logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
359
+ @logger.warn e, :hint => 'Unknown error, connection aborted', :source => @source_str unless @logger.nil?
176
360
  raise e
177
- ensure
178
- @socket.close
179
- @context.terminate
180
361
  end
181
362
 
363
+ def send(signature, message)
364
+ data = signature + [message.length].pack('N') + message
365
+ @send_queue.push @source + ['', data]
366
+ return
367
+ end
368
+
369
+ private
370
+
182
371
  def recv(data)
183
372
  if data.length < 8
184
- @logger.warn '[LogCourierServer] Invalid message: not enough data' unless @logger.nil?
373
+ @logger.warn 'Invalid message: not enough data', :data_length => data.length, :source => @source_str unless @logger.nil?
185
374
  return
186
375
  end
187
376
 
@@ -190,52 +379,16 @@ module LogCourier
190
379
 
191
380
  # Verify length
192
381
  if data.length - 8 != length
193
- @logger.warn "[LogCourierServer] Invalid message: data has invalid length (#{data.length - 8} != #{length})" unless @logger.nil?
382
+ @logger.warn 'Invalid message: data has invalid length', :data_length => data.length - 8, :encoded_length => length, :source => @source_str unless @logger.nil?
194
383
  return
195
- elsif length > @options[:max_packet_size]
196
- @logger.warn "[LogCourierServer] Invalid message: packet too large (#{length} > #{@options[:max_packet_size]})" unless @logger.nil?
384
+ elsif length > @factory.options[:max_packet_size]
385
+ @logger.warn 'Invalid message: packet too large', :size => length, :max_packet_size => @options[:max_packet_size], :source => @source_str unless @logger.nil?
197
386
  return
198
387
  end
199
388
 
200
389
  # Yield the parts
201
390
  yield signature, data[8, length], self
202
- end
203
-
204
- def send(signature, message)
205
- data = signature + [message.length].pack('N') + message
206
-
207
- # Send the return route and then the message
208
- reset_timeout
209
- @return_route.each do |msg|
210
- send_with_poll msg, true
211
- end
212
- send_with_poll '', true
213
- send_with_poll data
214
- end
215
-
216
- def send_with_poll(data, more = false)
217
- loop do
218
- # Try to send a message but never block
219
- rc = @socket.send_string(data, (more ? ZMQ::SNDMORE : 0) | ZMQ::DONTWAIT)
220
- break if ZMQ::Util.resultcode_ok?(rc)
221
- if ZMQ::Util.errno != ZMQ::EAGAIN
222
- @logger.warn "[LogCourierServer] Message send failed: #{ZMQ::Util.error_string}" unless @logger.nil?
223
- raise TimeoutError
224
- end
225
-
226
- # Wait for send to become available, handling timeouts
227
- @poller.deregister @socket, ZMQ::POLLIN | ZMQ::POLLOUT
228
- @poller.register @socket, ZMQ::POLLOUT
229
- while @poller.poll(1_000) == 0
230
- # Using this inner while triggers pollThreadEvents in JRuby which checks for Thread.raise immediately
231
- raise TimeoutError while Time.now.to_i >= @timeout
232
- end
233
- end
234
- end
235
-
236
- def reset_timeout
237
- # TODO: Make configurable?
238
- @timeout = Time.now.to_i + 1_800
391
+ return
239
392
  end
240
393
  end
241
394
  end