log-courier 1.1 → 1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,6 +40,7 @@ module LogCourier
40
40
  }.merge!(options)
41
41
 
42
42
  @logger = @options[:logger]
43
+ @logger['plugin'] = 'input/courier'
43
44
 
44
45
  case @options[:transport]
45
46
  when 'tcp', 'tls'
@@ -49,11 +50,12 @@ module LogCourier
49
50
  require 'log-courier/server_zmq'
50
51
  @server = ServerZmq.new(@options)
51
52
  else
52
- raise '[LogCourierServer] \'transport\' must be tcp, tls, plainzmq or zmq'
53
+ fail 'input/courier: \'transport\' must be tcp, tls, plainzmq or zmq'
53
54
  end
54
55
 
55
- # Grab the port back
56
+ # Grab the port back and update the logger context
56
57
  @port = @server.port
58
+ @logger['port'] = @port unless @logger.nil?
57
59
 
58
60
  # Load the json adapter
59
61
  @json_adapter = MultiJson.adapter.instance
@@ -75,7 +77,11 @@ module LogCourier
75
77
  when 'JDAT'
76
78
  process_jdat message, comm, event_queue
77
79
  else
78
- @logger.warn("[LogCourierServer] Unknown message received from #{comm.peer}") unless @logger.nil?
80
+ if comm.peer.nil?
81
+ @logger.warn 'Unknown message received', :from => 'unknown' unless @logger.nil?
82
+ else
83
+ @logger.warn 'Unknown message received', :from => comm.peer unless @logger.nil?
84
+ end
79
85
  # Don't kill a client that sends a bad message
80
86
  # Just reject it and let it send it again, potentially to another server
81
87
  comm.send '????', ''
@@ -93,17 +99,21 @@ module LogCourier
93
99
  server_thread.join
94
100
  end
95
101
  end
102
+ return
96
103
  end
97
104
 
105
+ private
106
+
98
107
  def process_ping(message, comm)
99
108
  # Size of message should be 0
100
109
  if message.length != 0
101
- raise ProtocolError, "unexpected data attached to ping message (#{message.length})"
110
+ fail ProtocolError, "unexpected data attached to ping message (#{message.length})"
102
111
  end
103
112
 
104
113
  # PONG!
105
114
  # NOTE: comm.send can raise a Timeout::Error of its own
106
115
  comm.send 'PONG', ''
116
+ return
107
117
  end
108
118
 
109
119
  def process_jdat(message, comm, event_queue)
@@ -114,11 +124,17 @@ module LogCourier
114
124
  # This allows the client to know what is being acknowledged
115
125
  # Nonce is 16 so check we have enough
116
126
  if message.length < 17
117
- raise ProtocolError, "JDAT message too small (#{message.length})"
127
+ fail ProtocolError, "JDAT message too small (#{message.length})"
118
128
  end
119
129
 
120
130
  nonce = message[0...16]
121
131
 
132
+ if !@logger.nil? && @logger.debug?
133
+ nonce_str = nonce.each_byte.map do |b|
134
+ b.to_s(16).rjust(2, '0')
135
+ end
136
+ end
137
+
122
138
  # The remainder of the message is the compressed data block
123
139
  message = StringIO.new Zlib::Inflate.inflate(message[16...message.length])
124
140
 
@@ -136,7 +152,7 @@ module LogCourier
136
152
  # Finished!
137
153
  break
138
154
  elsif length_buf.length < 4
139
- raise ProtocolError, "JDAT length extraction failed (#{ret} #{length_buf.length})"
155
+ fail ProtocolError, "JDAT length extraction failed (#{ret} #{length_buf.length})"
140
156
  end
141
157
 
142
158
  length = length_buf.unpack('N').first
@@ -145,7 +161,7 @@ module LogCourier
145
161
  ret = message.read length, data_buf
146
162
  if ret.nil? or data_buf.length < length
147
163
  @logger.warn()
148
- raise ProtocolError, "JDAT message extraction failed #{ret} #{data_buf.length}"
164
+ fail ProtocolError, "JDAT message extraction failed #{ret} #{data_buf.length}"
149
165
  end
150
166
 
151
167
  data_buf.force_encoding('utf-8')
@@ -161,7 +177,7 @@ module LogCourier
161
177
  begin
162
178
  event = @json_adapter.load(data_buf, @json_options)
163
179
  rescue MultiJson::ParseError => e
164
- @logger.warn("[LogCourierServer] JSON parse failure, falling back to plain-text: #{e}") unless @logger.nil?
180
+ @logger.warn e, :hint => 'JSON parse failure, falling back to plain-text' unless @logger.nil?
165
181
  event = { 'message' => data_buf }
166
182
  end
167
183
 
@@ -171,7 +187,7 @@ module LogCourier
171
187
  rescue TimeoutError
172
188
  # Full pipeline, partial ack
173
189
  # NOTE: comm.send can raise a Timeout::Error of its own
174
- @logger.debug "[LogCourierServer] Partially acknowledging message #{nonce.hash} sequence #{sequence}" unless @logger.nil?
190
+ @logger.debug 'Partially acknowledging message', :nonce => nonce_str.join, :sequence => sequence if !@logger.nil? && @logger.debug?
175
191
  comm.send 'ACKN', [nonce, sequence].pack('A*N')
176
192
  ack_timeout = Time.now.to_i + 5
177
193
  retry
@@ -182,8 +198,9 @@ module LogCourier
182
198
 
183
199
  # Acknowledge the full message
184
200
  # NOTE: comm.send can raise a Timeout::Error
185
- @logger.debug "[LogCourierServer] Acknowledging message #{nonce.hash} sequence #{sequence}" unless @logger.nil?
201
+ @logger.debug 'Acknowledging message', :nonce => nonce_str.join, :sequence => sequence if !@logger.nil? && @logger.debug?
186
202
  comm.send 'ACKN', [nonce, sequence].pack('A*N')
203
+ return
187
204
  end
188
205
  end
189
206
  end
@@ -24,13 +24,25 @@ require 'thread'
24
24
  module LogCourier
25
25
  # Wrap around TCPServer to grab last error for use in reporting which peer had an error
26
26
  class ExtendedTCPServer < TCPServer
27
- # Yield the peer
27
+ attr_reader :peer
28
+
29
+ def initialise
30
+ reset_peer
31
+ super
32
+ end
33
+
34
+ # Save the peer
28
35
  def accept
29
36
  sock = super
30
37
  peer = sock.peeraddr(:numeric)
31
- Thread.current['LogCourierPeer'] = "#{peer[2]}:#{peer[1]}"
38
+ @peer = "#{peer[2]}:#{peer[1]}"
32
39
  return sock
33
40
  end
41
+
42
+ def reset_peer
43
+ @peer = 'unknown'
44
+ return
45
+ end
34
46
  end
35
47
 
36
48
  # TLS transport implementation for server
@@ -57,11 +69,11 @@ module LogCourier
57
69
 
58
70
  if @options[:transport] == 'tls'
59
71
  [:ssl_certificate, :ssl_key].each do |k|
60
- raise "[LogCourierServer] '#{k}' is required" if @options[k].nil?
72
+ fail "input/courier: '#{k}' is required" if @options[k].nil?
61
73
  end
62
74
 
63
75
  if @options[:ssl_verify] and (!@options[:ssl_verify_default_ca] && @options[:ssl_verify_ca].nil?)
64
- raise '[LogCourierServer] Either \'ssl_verify_default_ca\' or \'ssl_verify_ca\' must be specified when ssl_verify is true'
76
+ fail 'input/courier: Either \'ssl_verify_default_ca\' or \'ssl_verify_ca\' must be specified when ssl_verify is true'
65
77
  end
66
78
  end
67
79
 
@@ -94,16 +106,18 @@ module LogCourier
94
106
  ssl.verify_mode = OpenSSL::SSL::VERIFY_PEER | OpenSSL::SSL::VERIFY_FAIL_IF_NO_PEER_CERT
95
107
  end
96
108
 
109
+ # Create the OpenSSL server - set start_immediately to false so we can multithread handshake
97
110
  @server = OpenSSL::SSL::SSLServer.new(@tcp_server, ssl)
111
+ @server.start_immediately = false
98
112
  else
99
113
  @server = @tcp_server
100
114
  end
101
115
 
102
116
  if @options[:port] == 0
103
- @logger.warn '[LogCourierServer] Transport ' + @options[:transport] + ' is listening on ephemeral port ' + @port.to_s unless @logger.nil?
117
+ @logger.warn 'Ephemeral port allocated', :transport => @options[:transport], :port => @port unless @logger.nil?
104
118
  end
105
119
  rescue => e
106
- raise "[LogCourierServer] Failed to initialise: #{e}"
120
+ raise "input/courier: Failed to initialise: #{e}"
107
121
  end
108
122
  end # def initialize
109
123
 
@@ -111,20 +125,20 @@ module LogCourier
111
125
  client_threads = {}
112
126
 
113
127
  loop do
114
- # This means ssl accepting is single-threaded.
128
+ # Because start_immediately is false, TCP accept is single thread but
129
+ # handshake is essentiall multithreaded as we defer it to the thread
130
+ @tcp_server.reset_peer
131
+ client = nil
115
132
  begin
116
133
  client = @server.accept
117
134
  rescue EOFError, OpenSSL::SSL::SSLError, IOError => e
118
- # Handshake failure or other issue
119
- peer = Thread.current['LogCourierPeer'] || 'unknown'
120
- @logger.warn "[LogCourierServer] Connection from #{peer} failed to initialise: #{e}" unless @logger.nil?
121
- client.close rescue nil
135
+ # Accept failure or other issue
136
+ @logger.warn 'Connection failed to accept', :error => e.message, :peer => @tcp_server.peer unless @logger.nil
137
+ client.close rescue nil unless client.nil?
122
138
  next
123
139
  end
124
140
 
125
- peer = Thread.current['LogCourierPeer'] || 'unknown'
126
-
127
- @logger.info "[LogCourierServer] New connection from #{peer}" unless @logger.nil?
141
+ @logger.info 'New connection', :peer => @tcp_server.peer unless @logger.nil?
128
142
 
129
143
  # Clear up finished threads
130
144
  client_threads.delete_if do |_, thr|
@@ -132,17 +146,16 @@ module LogCourier
132
146
  end
133
147
 
134
148
  # Start a new connection thread
135
- client_threads[client] = Thread.new(client, peer) do |client_copy, peer_copy|
136
- ConnectionTcp.new(@logger, client_copy, peer_copy, @options).run(&block)
149
+ client_threads[client] = Thread.new(client, @tcp_server.peer) do |client_copy, peer_copy|
150
+ run_thread client_copy, peer_copy, &block
137
151
  end
138
152
  end
153
+ return
139
154
  rescue ShutdownSignal
140
- # Capture shutting down signal
141
- 0
155
+ return
142
156
  rescue StandardError, NativeException => e
143
157
  # Some other unknown problem
144
- @logger.warn("[LogCourierServer] Unknown error: #{e}") unless @logger.nil?
145
- @logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
158
+ @logger.warn e, :hint => 'Unknown error, shutting down' unless @logger.nil?
146
159
  raise e
147
160
  ensure
148
161
  # Raise shutdown in all client threads and join then
@@ -154,6 +167,24 @@ module LogCourier
154
167
 
155
168
  @tcp_server.close
156
169
  end
170
+
171
+ private
172
+
173
+ def run_thread(client, peer, &block)
174
+ # Perform the handshake inside the new thread so we don't block TCP accept
175
+ if @options[:transport] == 'tls'
176
+ begin
177
+ client.accept
178
+ rescue EOFError, OpenSSL::SSL::SSLError, IOError => e
179
+ # Handshake failure or other issue
180
+ @logger.warn 'Connection failed to initialise', :error => e.message, :peer => peer unless @logger.nil?
181
+ client.close
182
+ return
183
+ end
184
+ end
185
+
186
+ ConnectionTcp.new(@logger, client, peer, @options).run(&block)
187
+ end
157
188
  end
158
189
 
159
190
  # Representation of a single connected client
@@ -179,7 +210,7 @@ module LogCourier
179
210
 
180
211
  # Sanity
181
212
  if length > @options[:max_packet_size]
182
- raise ProtocolError, "packet too large (#{length} > #{@options[:max_packet_size]})"
213
+ fail ProtocolError, "packet too large (#{length} > #{@options[:max_packet_size]})"
183
214
  end
184
215
 
185
216
  # While we're processing, EOF is bad as it may occur during send
@@ -198,32 +229,61 @@ module LogCourier
198
229
  # If we EOF next it's a graceful close
199
230
  @in_progress = false
200
231
  end
232
+ return
201
233
  rescue TimeoutError
202
234
  # Timeout of the connection, we were idle too long without a ping/pong
203
- @logger.warn("[LogCourierServer] Connection from #{@peer} timed out") unless @logger.nil?
235
+ @logger.warn 'Connection timed out', :peer => @peer unless @logger.nil?
236
+ return
204
237
  rescue EOFError
205
238
  if @in_progress
206
- @logger.warn("[LogCourierServer] Premature connection close on connection from #{@peer}") unless @logger.nil?
239
+ @logger.warn 'Unexpected EOF', :peer => @peer unless @logger.nil?
207
240
  else
208
- @logger.info("[LogCourierServer] Connection from #{@peer} closed") unless @logger.nil?
241
+ @logger.info 'Connection closed', :peer => @peer unless @logger.nil?
209
242
  end
243
+ return
210
244
  rescue OpenSSL::SSL::SSLError, IOError, Errno::ECONNRESET => e
211
245
  # Read errors, only action is to shutdown which we'll do in ensure
212
- @logger.warn("[LogCourierServer] SSL error on connection from #{@peer}: #{e}") unless @logger.nil?
246
+ @logger.warn 'SSL error, connection aborted', :error => e.message, :peer => @peer unless @logger.nil?
247
+ return
213
248
  rescue ProtocolError => e
214
249
  # Connection abort request due to a protocol error
215
- @logger.warn("[LogCourierServer] Protocol error on connection from #{@peer}: #{e}") unless @logger.nil?
250
+ @logger.warn 'Protocol error, connection aborted', :error => e.message, :peer => @peer unless @logger.nil?
251
+ return
216
252
  rescue ShutdownSignal
217
253
  # Shutting down
218
- @logger.warn("[LogCourierServer] Closing connecting from #{@peer}: server shutting down") unless @logger.nil?
219
- rescue => e
254
+ @logger.info 'Server shutting down, closing connection', :peer => @peer unless @logger.nil?
255
+ return
256
+ rescue StandardError, NativeException => e
220
257
  # Some other unknown problem
221
- @logger.warn("[LogCourierServer] Unknown error on connection from #{@peer}: #{e}") unless @logger.nil?
222
- @logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
258
+ @logger.warn e, :hint => 'Unknown error, connection aborted', :peer => @peer unless @logger.nil?
259
+ return
223
260
  ensure
224
261
  @fd.close rescue nil
225
262
  end
226
263
 
264
+ def send(signature, message)
265
+ reset_timeout
266
+ data = signature + [message.length].pack('N') + message
267
+ done = 0
268
+ loop do
269
+ begin
270
+ written = @fd.write_nonblock(data[done...data.length])
271
+ rescue IO::WaitReadable
272
+ fail TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
273
+ retry
274
+ rescue IO::WaitWritable
275
+ fail TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
276
+ retry
277
+ end
278
+ fail ProtocolError, "write failure (#{done}/#{data.length})" if written == 0
279
+ done += written
280
+ break if done >= data.length
281
+ end
282
+ return
283
+ end
284
+
285
+ private
286
+
227
287
  def recv(need)
228
288
  reset_timeout
229
289
  have = ''
@@ -231,16 +291,16 @@ module LogCourier
231
291
  begin
232
292
  buffer = @fd.read_nonblock need - have.length
233
293
  rescue IO::WaitReadable
234
- raise TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
294
+ fail TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
235
295
  retry
236
296
  rescue IO::WaitWritable
237
- raise TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
297
+ fail TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
238
298
  retry
239
299
  end
240
300
  if buffer.nil?
241
- raise EOFError
301
+ fail EOFError
242
302
  elsif buffer.length == 0
243
- raise ProtocolError, "read failure (#{have.length}/#{need})"
303
+ fail ProtocolError, "read failure (#{have.length}/#{need})"
244
304
  end
245
305
  if have.length == 0
246
306
  have = buffer
@@ -252,29 +312,10 @@ module LogCourier
252
312
  have
253
313
  end
254
314
 
255
- def send(signature, message)
256
- reset_timeout
257
- data = signature + [message.length].pack('N') + message
258
- done = 0
259
- loop do
260
- begin
261
- written = @fd.write_nonblock(data[done...data.length])
262
- rescue IO::WaitReadable
263
- raise TimeoutError if IO.select([@fd], nil, [@fd], @timeout - Time.now.to_i).nil?
264
- retry
265
- rescue IO::WaitWritable
266
- raise TimeoutError if IO.select(nil, [@fd], [@fd], @timeout - Time.now.to_i).nil?
267
- retry
268
- end
269
- raise ProtocolError, "write failure (#{done}/#{data.length})" if written == 0
270
- done += written
271
- break if done >= data.length
272
- end
273
- end
274
-
275
315
  def reset_timeout
276
316
  # TODO: Make configurable
277
317
  @timeout = Time.now.to_i + 1_800
318
+ return
278
319
  end
279
320
  end
280
321
  end
@@ -14,20 +14,31 @@
14
14
  # See the License for the specific language governing permissions and
15
15
  # limitations under the License.
16
16
 
17
- begin
18
- require 'ffi-rzmq-core'
19
- require 'ffi-rzmq-core/version'
20
- require 'ffi-rzmq'
21
- require 'ffi-rzmq/version'
22
- rescue LoadError => e
23
- raise "[LogCourierServer] Could not initialise: #{e}"
24
- end
17
+ require 'thread'
18
+ require 'log-courier/zmq_qpoll'
25
19
 
26
20
  module LogCourier
27
21
  # ZMQ transport implementation for the server
28
22
  class ServerZmq
29
23
  class ZMQError < StandardError; end
30
24
 
25
+ class << self
26
+ @print_zmq_versions = false
27
+
28
+ def print_zmq_versions(logger)
29
+ return if @print_zmq_versions || logger.nil?
30
+
31
+ libversion = LibZMQ.version
32
+ libversion = "#{libversion[:major]}.#{libversion[:minor]}.#{libversion[:patch]}"
33
+
34
+ logger.info 'libzmq', :version => libversion
35
+ logger.info 'ffi-rzmq-core', :version => LibZMQ::VERSION
36
+ logger.info 'ffi-rzmq', :version => ZMQ.version
37
+
38
+ @print_zmq_versions = true
39
+ end
40
+ end
41
+
31
42
  attr_reader :port
32
43
 
33
44
  def initialize(options = {})
@@ -38,19 +49,19 @@ module LogCourier
38
49
  address: '0.0.0.0',
39
50
  curve_secret_key: nil,
40
51
  max_packet_size: 10_485_760,
52
+ peer_recv_queue: 10,
41
53
  }.merge!(options)
42
54
 
43
55
  @logger = @options[:logger]
44
56
 
45
- libversion = LibZMQ.version
46
- libversion = "#{libversion[:major]}.#{libversion[:minor]}.#{libversion[:patch]}"
57
+ self.class.print_zmq_versions @logger
47
58
 
48
59
  if @options[:transport] == 'zmq'
49
- raise "[LogCourierServer] Transport 'zmq' requires libzmq version >= 4 (the current version is #{libversion})" unless LibZMQ.version4?
60
+ fail "input/courier: Transport 'zmq' requires libzmq version >= 4" unless LibZMQ.version4?
50
61
 
51
- raise '[LogCourierServer] \'curve_secret_key\' is required' if @options[:curve_secret_key].nil?
62
+ fail 'input/courier: \'curve_secret_key\' is required' if @options[:curve_secret_key].nil?
52
63
 
53
- raise '[LogCourierServer] \'curve_secret_key\' must be a valid 40 character Z85 encoded string' if @options[:curve_secret_key].length != 40 || !z85validate(@options[:curve_secret_key])
64
+ fail 'input/courier: \'curve_secret_key\' must be a valid 40 character Z85 encoded string' if @options[:curve_secret_key].length != 40 || !z85validate(@options[:curve_secret_key])
54
65
  end
55
66
 
56
67
  begin
@@ -60,128 +71,306 @@ module LogCourier
60
71
 
61
72
  if @options[:transport] == 'zmq'
62
73
  rc = @socket.setsockopt(ZMQ::CURVE_SERVER, 1)
63
- raise ZMQError, 'setsockopt CURVE_SERVER failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
74
+ fail 'setsockopt CURVE_SERVER failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
64
75
 
65
76
  rc = @socket.setsockopt(ZMQ::CURVE_SECRETKEY, @options[:curve_secret_key])
66
- raise ZMQError, 'setsockopt CURVE_SECRETKEY failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
77
+ fail 'setsockopt CURVE_SECRETKEY failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
67
78
  end
68
79
 
69
80
  bind = 'tcp://' + @options[:address] + (@options[:port] == 0 ? ':*' : ':' + @options[:port].to_s)
70
81
  rc = @socket.bind(bind)
71
- raise ZMQError, 'failed to bind at ' + bind + ': ' + rZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
82
+ fail 'failed to bind at ' + bind + ': ' + rZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc)
72
83
 
73
84
  # Lookup port number that was allocated in case it was set to 0
74
85
  endpoint = ''
75
86
  rc = @socket.getsockopt(ZMQ::LAST_ENDPOINT, endpoint)
76
- raise ZMQError, 'getsockopt LAST_ENDPOINT failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc) && %r{\Atcp://(?:.*):(?<endpoint_port>\d+)\0\z} =~ endpoint
87
+ fail 'getsockopt LAST_ENDPOINT failure: ' + ZMQ::Util.error_string unless ZMQ::Util.resultcode_ok?(rc) && %r{\Atcp://(?:.*):(?<endpoint_port>\d+)\0\z} =~ endpoint
77
88
  @port = endpoint_port.to_i
78
89
 
79
- @poller = ZMQ::Poller.new
80
-
81
90
  if @options[:port] == 0
82
- @logger.warn '[LogCourierServer] Transport ' + @options[:transport] + ' is listening on ephemeral port ' + @port.to_s unless @logger.nil?
91
+ @logger.warn 'Ephemeral port allocated', :transport => @options[:transport], :port => @port unless @logger.nil?
83
92
  end
84
93
  rescue => e
85
- raise "[LogCourierServer] Failed to initialise: #{e}"
94
+ raise "input/courier: Failed to initialise: #{e}"
86
95
  end
87
96
 
88
- @logger.info "[LogCourierServer] libzmq version #{libversion}" unless @logger.nil?
89
- @logger.info "[LogCourierServer] ffi-rzmq-core version #{LibZMQ::VERSION}" unless @logger.nil?
90
- @logger.info "[LogCourierServer] ffi-rzmq version #{ZMQ.version}" unless @logger.nil?
91
-
92
97
  # TODO: Implement workers option by receiving on a ROUTER and proxying to a DEALER, with workers connecting to the DEALER
93
98
 
94
- @return_route = []
99
+ # TODO: Make this send queue configurable?
100
+ @send_queue = EventQueue.new 2
101
+ @factory = ClientFactoryZmq.new(@options, @send_queue)
102
+
103
+ # Setup poller
104
+ @poller = ZMQPoll::ZMQPoll.new(@context)
105
+ @poller.register_socket @socket, ZMQ::POLLIN
106
+ @poller.register_queue_to_socket @send_queue, @socket
107
+
108
+ # Register a finaliser that sets @context to nil
109
+ # This allows us to detect the JRuby bug where during "exit!" finalisers
110
+ # are run but threads are not killed - which leaves us in a situation of
111
+ # a terminated @context (it has a terminate finalizer) and an IO thread
112
+ # looping retries
113
+ # JRuby will still crash and burn, but at least we don't spam STDOUT with
114
+ # errors
115
+ ObjectSpace.define_finalizer(self, Proc.new do
116
+ @context = nil
117
+ end)
118
+ end
119
+
120
+ def run(&block)
121
+ loop do
122
+ begin
123
+ @poller.poll(5_000) do |socket, r, w|
124
+ next if socket != @socket
125
+ next if !r
95
126
 
96
- reset_timeout
127
+ receive &block
128
+ end
129
+ rescue ZMQPoll::ZMQError => e
130
+ # Detect JRuby bug
131
+ fail e if @context.nil?
132
+ @logger.warn e, :hint => 'ZMQ recv_string failure' unless @logger.nil?
133
+ next
134
+ rescue ZMQPoll::TimeoutError
135
+ # We'll let ZeroMQ manage reconnections and new connections
136
+ # There is no point in us doing any form of reconnect ourselves
137
+ next
138
+ end
139
+ end
140
+ return
141
+ rescue ShutdownSignal
142
+ # Shutting down
143
+ @logger.warn 'Server shutting down' unless @logger.nil?
144
+ return
145
+ rescue StandardError, NativeException => e
146
+ # Some other unknown problem
147
+ @logger.warn e, :hint => 'Unknown error, shutting down' unless @logger.nil?
148
+ raise e
149
+ ensure
150
+ @poller.shutdown
151
+ @factory.shutdown
152
+ @socket.close
153
+ @context.terminate
97
154
  end
98
155
 
156
+ private
157
+
99
158
  def z85validate(z85)
100
159
  # ffi-rzmq does not implement decode - but we want to validate during startup
101
160
  decoded = FFI::MemoryPointer.from_string(' ' * (8 * z85.length / 10))
102
161
  ret = LibZMQ.zmq_z85_decode decoded, z85
103
162
  return false if ret.nil?
104
-
105
163
  true
106
164
  end
107
165
 
108
- def run(&block)
109
- loop do
110
- begin
111
- begin
112
- # Try to receive a message
113
- reset_timeout
114
- data = []
115
- rc = @socket.recv_strings(data, ZMQ::DONTWAIT)
116
- unless ZMQ::Util.resultcode_ok?(rc)
117
- raise ZMQError, 'recv_string error: ' + ZMQ::Util.error_string if ZMQ::Util.errno != ZMQ::EAGAIN
118
-
119
- # Wait for a message to arrive, handling timeouts
120
- @poller.deregister @socket, ZMQ::POLLIN | ZMQ::POLLOUT
121
- @poller.register @socket, ZMQ::POLLIN
122
- while @poller.poll(1_000) == 0
123
- # Using this inner while triggers pollThreadEvents in JRuby which checks for Thread.raise immediately
124
- raise TimeoutError while Time.now.to_i >= @timeout
125
- end
126
- next
127
- end
128
- rescue ZMQError => e
129
- @logger.warn "[LogCourierServer] ZMQ recv_string failed: #{e}" unless @logger.nil?
130
- next
131
- end
166
+ def receive(&block)
167
+ # Try to receive a message
168
+ data = []
169
+ rc = @socket.recv_strings(data, ZMQ::DONTWAIT)
170
+ unless ZMQ::Util.resultcode_ok?(rc)
171
+ fail ZMQError, 'recv_string error: ' + ZMQ::Util.error_string if ZMQ::Util.errno != ZMQ::EAGAIN
172
+ end
132
173
 
133
- # Save the routing information that appears before the null messages
134
- @return_route = []
135
- @return_route.push data.shift until data.length == 0 || data[0] == ''
174
+ # Save the source information that appears before the null messages
175
+ source = []
176
+ source.push data.shift until data.length == 0 || data[0] == ''
136
177
 
137
- if data.length == 0
138
- @logger.warn '[LogCourierServer] Invalid message: no data' unless @logger.nil?
139
- next
140
- elsif data.length == 1
141
- @logger.warn '[LogCourierServer] Invalid message: empty data' unless @logger.nil?
142
- next
178
+ if data.length == 0
179
+ @logger.warn 'Invalid message: no data', :source_length => source.length unless @logger.nil?
180
+ return
181
+ elsif data.length == 1
182
+ @logger.warn 'Invalid message: empty data', :source_length => source.length unless @logger.nil?
183
+ return
184
+ end
185
+
186
+ # Drop the null message separator
187
+ data.shift
188
+
189
+ if data.length != 1
190
+ @logger.warn 'Invalid message: multipart unexpected', :source_length => source.length, :data_length => data.length unless @logger.nil?
191
+ if !@logger.nil? && @logger.debug?
192
+ i = 0
193
+ parts = {}
194
+ data.each do |msg|
195
+ i += 1
196
+ parts[i] = "#{part.length}:[#{msg[0..31].gsub(/[^[:print:]]/, '.')}]"
143
197
  end
198
+ @logger.debug 'Data', parts
199
+ end
200
+ return
201
+ end
202
+
203
+ @factory.deliver source, data.first, &block
204
+ return
205
+ end
206
+ end
207
+
208
+ class ClientFactoryZmq
209
+ attr_reader :options
210
+ attr_reader :send_queue
211
+
212
+ def initialize(options, send_queue)
213
+ @options = options
214
+ @logger = @options[:logger]
215
+
216
+ @send_queue = send_queue
217
+ @index = {}
218
+ @client_threads = {}
219
+ @mutex = Mutex.new
220
+ end
221
+
222
+ def shutdown
223
+ # Stop other threads from try_drop collisions
224
+ client_threads = @mutex.synchronize do
225
+ client_threads = @client_threads
226
+ @client_threads = {}
227
+ client_threads
228
+ end
229
+
230
+ client_threads.each_value do |thr|
231
+ thr.raise ShutdownSignal
232
+ end
233
+
234
+ client_threads.each_value(&:join)
235
+ return
236
+ end
144
237
 
145
- # Drop the null message separator
146
- data.shift
147
-
148
- if data.length != 1
149
- @logger.warn "[LogCourierServer] Invalid message: multipart unexpected (#{data.length})" unless @logger.nil?
150
- if !@logger.nil? && @logger.debug?
151
- i = 0
152
- data.each do |msg|
153
- i += 1
154
- part = msg[0..31].gsub(/[^[:print:]]/, '.')
155
- @logger.debug "[LogCourierServer] Part #{i}: #{part.length}:[#{part}]"
156
- end
238
+ def deliver(source, data, &block)
239
+ # Find the handling thread
240
+ # We separate each source into threads so that each thread can respond
241
+ # with partial ACKs if we hit a slow down
242
+ # If we processed in a single thread, we'd only be able to respond to
243
+ # a single client with partial ACKs
244
+ @mutex.synchronize do
245
+ index = @index
246
+ source.each do |identity|
247
+ index[identity] = {} if !index.key?(identity)
248
+ index = index[identity]
249
+ end
250
+
251
+ if !index.key?('')
252
+ source_str = source.map do |s|
253
+ s.each_byte.map do |b|
254
+ b.to_s(16).rjust(2, '0')
157
255
  end
158
- else
159
- recv(data.first, &block)
256
+ end.join
257
+
258
+ @logger.info 'New source', :source => source_str unless @logger.nil?
259
+
260
+ # Create the client and associated thread
261
+ client = ClientZmq.new(self, source, source_str) do
262
+ try_drop(source)
160
263
  end
264
+
265
+ thread = Thread.new do
266
+ client.run &block
267
+ end
268
+
269
+ @client_threads[thread] = thread
270
+
271
+ index[''] = {
272
+ 'client' => client,
273
+ 'thread' => thread,
274
+ }
275
+ end
276
+
277
+ # Existing thread, throw on the queue, if not enough room drop the message
278
+ index['']['client'].push data, 0
279
+ end
280
+ return
281
+ end
282
+
283
+ private
284
+
285
+ def try_drop(source, source_str)
286
+ # This is called when a client goes idle, to cleanup resources
287
+ # We may tie this into zmq monitor
288
+ @mutex.synchronize do
289
+ index = @index
290
+ parents = []
291
+ source.each do |identity|
292
+ if !index.key?(identity)
293
+ @logger.warn 'Unknown idle source failed to shutdown', :source => source_str unless @logger.nil?
294
+ break
295
+ end
296
+ parents.push [index, identity]
297
+ index = index[identity]
298
+ end
299
+
300
+ if !index.key?('')
301
+ @logger.warn 'Unknown idle source failed to shutdown', :source => source_str unless @logger.nil?
302
+ break
303
+ end
304
+
305
+ # Don't allow drop if we have messages in the queue
306
+ if index['']['client'].length != 0
307
+ @logger.warn 'Failed idle source shutdown as message queue is not empty', :source => source_str unless @logger.nil?
308
+ return false
309
+ end
310
+
311
+ @logger.info 'Idle source shutting down', :source => source_str unless @logger.nil?
312
+
313
+ # Delete the entry
314
+ @client_threads.delete(index['']['thread'])
315
+ index.delete('')
316
+
317
+ # Cleanup orphaned leafs
318
+ parents.reverse_each do |path|
319
+ path[0].delete(path[1]) if path[0][path[1]].length == 0
320
+ end
321
+ end
322
+
323
+ return true
324
+ end
325
+ end
326
+
327
+ class ClientZmq < EventQueue
328
+ def initialize(factory, source, source_str, &try_drop)
329
+ @factory = factory
330
+ @logger = @factory.options[:logger]
331
+ @send_queue = @factory.send_queue
332
+ @source = source
333
+ @source_str = source_str
334
+ @try_drop = try_drop
335
+
336
+ # Setup the queue for receiving events to process
337
+ super @factory.options[:peer_recv_queue]
338
+ end
339
+
340
+ def run(&block)
341
+ loop do
342
+ begin
343
+ # TODO: Make timeout configurable?
344
+ data = self.pop(30)
345
+ recv(data, &block)
161
346
  rescue TimeoutError
162
- # We'll let ZeroMQ manage reconnections and new connections
163
- # There is no point in us doing any form of reconnect ourselves
164
- # We will keep this timeout in however, for shutdown checks
165
- reset_timeout
166
- next
347
+ # Try to clean up resources - if we fail, new messages have arrived
348
+ retry if !@try_drop.call(@source)
349
+ break
167
350
  end
168
351
  end
352
+ return
169
353
  rescue ShutdownSignal
170
354
  # Shutting down
171
- @logger.warn('[LogCourierServer] Server shutting down') unless @logger.nil?
355
+ @logger.info 'Source shutting down', :source => @source_str unless @logger.nil?
356
+ return
172
357
  rescue StandardError, NativeException => e
173
358
  # Some other unknown problem
174
- @logger.warn("[LogCourierServer] Unknown error: #{e}") unless @logger.nil?
175
- @logger.warn("[LogCourierServer] #{e.backtrace}: #{e.message} (#{e.class})") unless @logger.nil?
359
+ @logger.warn e, :hint => 'Unknown error, connection aborted', :source => @source_str unless @logger.nil?
176
360
  raise e
177
- ensure
178
- @socket.close
179
- @context.terminate
180
361
  end
181
362
 
363
+ def send(signature, message)
364
+ data = signature + [message.length].pack('N') + message
365
+ @send_queue.push @source + ['', data]
366
+ return
367
+ end
368
+
369
+ private
370
+
182
371
  def recv(data)
183
372
  if data.length < 8
184
- @logger.warn '[LogCourierServer] Invalid message: not enough data' unless @logger.nil?
373
+ @logger.warn 'Invalid message: not enough data', :data_length => data.length, :source => @source_str unless @logger.nil?
185
374
  return
186
375
  end
187
376
 
@@ -190,52 +379,16 @@ module LogCourier
190
379
 
191
380
  # Verify length
192
381
  if data.length - 8 != length
193
- @logger.warn "[LogCourierServer] Invalid message: data has invalid length (#{data.length - 8} != #{length})" unless @logger.nil?
382
+ @logger.warn 'Invalid message: data has invalid length', :data_length => data.length - 8, :encoded_length => length, :source => @source_str unless @logger.nil?
194
383
  return
195
- elsif length > @options[:max_packet_size]
196
- @logger.warn "[LogCourierServer] Invalid message: packet too large (#{length} > #{@options[:max_packet_size]})" unless @logger.nil?
384
+ elsif length > @factory.options[:max_packet_size]
385
+ @logger.warn 'Invalid message: packet too large', :size => length, :max_packet_size => @options[:max_packet_size], :source => @source_str unless @logger.nil?
197
386
  return
198
387
  end
199
388
 
200
389
  # Yield the parts
201
390
  yield signature, data[8, length], self
202
- end
203
-
204
- def send(signature, message)
205
- data = signature + [message.length].pack('N') + message
206
-
207
- # Send the return route and then the message
208
- reset_timeout
209
- @return_route.each do |msg|
210
- send_with_poll msg, true
211
- end
212
- send_with_poll '', true
213
- send_with_poll data
214
- end
215
-
216
- def send_with_poll(data, more = false)
217
- loop do
218
- # Try to send a message but never block
219
- rc = @socket.send_string(data, (more ? ZMQ::SNDMORE : 0) | ZMQ::DONTWAIT)
220
- break if ZMQ::Util.resultcode_ok?(rc)
221
- if ZMQ::Util.errno != ZMQ::EAGAIN
222
- @logger.warn "[LogCourierServer] Message send failed: #{ZMQ::Util.error_string}" unless @logger.nil?
223
- raise TimeoutError
224
- end
225
-
226
- # Wait for send to become available, handling timeouts
227
- @poller.deregister @socket, ZMQ::POLLIN | ZMQ::POLLOUT
228
- @poller.register @socket, ZMQ::POLLOUT
229
- while @poller.poll(1_000) == 0
230
- # Using this inner while triggers pollThreadEvents in JRuby which checks for Thread.raise immediately
231
- raise TimeoutError while Time.now.to_i >= @timeout
232
- end
233
- end
234
- end
235
-
236
- def reset_timeout
237
- # TODO: Make configurable?
238
- @timeout = Time.now.to_i + 1_800
391
+ return
239
392
  end
240
393
  end
241
394
  end