net_tcp_client 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +201 -0
- data/README.md +101 -0
- data/Rakefile +28 -0
- data/lib/net/tcp_client.rb +5 -0
- data/lib/net/tcp_client/exceptions.rb +35 -0
- data/lib/net/tcp_client/logging.rb +191 -0
- data/lib/net/tcp_client/tcp_client.rb +604 -0
- data/lib/net/tcp_client/version.rb +5 -0
- data/test/simple_tcp_server.rb +114 -0
- data/test/tcp_client_test.rb +190 -0
- metadata +58 -0
@@ -0,0 +1,604 @@
|
|
1
|
+
require 'socket'
|
2
|
+
module Net
|
3
|
+
|
4
|
+
# Make Socket calls resilient by adding timeouts, retries and specific
|
5
|
+
# exception categories
|
6
|
+
#
|
7
|
+
# TCP Client with:
|
8
|
+
# * Connection Timeouts
|
9
|
+
# Ability to timeout if a connect does not complete within a reasonable time
|
10
|
+
# For example, this can occur when the server is turned off without shutting down
|
11
|
+
# causing clients to hang creating new connections
|
12
|
+
#
|
13
|
+
# * Automatic retries on startup connection failure
|
14
|
+
# For example, the server is being restarted while the client is starting
|
15
|
+
# Gives the server a few seconds to restart to
|
16
|
+
#
|
17
|
+
# * Automatic retries on active connection failures
|
18
|
+
# If the server is restarted during
|
19
|
+
#
|
20
|
+
# Connection and Read Timeouts are fully configurable
|
21
|
+
#
|
22
|
+
# Raises Net::TCPClient::ConnectionTimeout when the connection timeout is exceeded
|
23
|
+
# Raises Net::TCPClient::ReadTimeout when the read timeout is exceeded
|
24
|
+
# Raises Net::TCPClient::ConnectionFailure when a network error occurs whilst reading or writing
|
25
|
+
#
|
26
|
+
# Note: Only the following methods currently have auto-reconnect enabled:
|
27
|
+
# * read
|
28
|
+
# * write
|
29
|
+
#
|
30
|
+
# Future:
|
31
|
+
# * Add auto-reconnect feature to sysread, syswrite, etc...
|
32
|
+
# * To be a drop-in replacement to TCPSocket should also need to implement the
|
33
|
+
# following TCPSocket instance methods: :addr, :peeraddr
|
34
|
+
#
|
35
|
+
# Design Notes:
|
36
|
+
# * Does not inherit from Socket or TCP Socket because the socket instance
|
37
|
+
# has to be completely destroyed and recreated after a connection failure
|
38
|
+
#
|
39
|
+
class TCPClient
|
40
|
+
# Supports embedding user supplied data along with this connection
|
41
|
+
# such as sequence number and other connection specific information
|
42
|
+
attr_accessor :user_data
|
43
|
+
|
44
|
+
# Returns [String] Name of the server connected to including the port number
|
45
|
+
#
|
46
|
+
# Example:
|
47
|
+
# localhost:2000
|
48
|
+
attr_reader :server
|
49
|
+
|
50
|
+
attr_accessor :read_timeout, :connect_timeout, :connect_retry_count,
|
51
|
+
:retry_count, :connect_retry_interval, :server_selector, :close_on_error
|
52
|
+
|
53
|
+
# Returns [TrueClass|FalseClass] Whether send buffering is enabled for this connection
|
54
|
+
attr_reader :buffered
|
55
|
+
|
56
|
+
# Returns the logger being used by the TCPClient instance
|
57
|
+
attr_reader :logger
|
58
|
+
|
59
|
+
@@reconnect_on_errors = [
|
60
|
+
Errno::ECONNABORTED,
|
61
|
+
Errno::ECONNREFUSED,
|
62
|
+
Errno::ECONNRESET,
|
63
|
+
Errno::EHOSTUNREACH,
|
64
|
+
Errno::EIO,
|
65
|
+
Errno::ENETDOWN,
|
66
|
+
Errno::ENETRESET,
|
67
|
+
Errno::EPIPE,
|
68
|
+
Errno::ETIMEDOUT,
|
69
|
+
EOFError,
|
70
|
+
]
|
71
|
+
|
72
|
+
# Return the array of errors that will result in an automatic connection retry
|
73
|
+
# To add any additional errors to the standard list:
|
74
|
+
# Net::TCPClient.reconnect_on_errors << Errno::EPROTO
|
75
|
+
def self.reconnect_on_errors
|
76
|
+
@@reconnect_on_errors
|
77
|
+
end
|
78
|
+
|
79
|
+
# Create a connection, call the supplied block and close the connection on
|
80
|
+
# completion of the block
|
81
|
+
#
|
82
|
+
# See #initialize for the list of parameters
|
83
|
+
#
|
84
|
+
# Example
|
85
|
+
# Net::TCPClient.connect(
|
86
|
+
# server: 'server:3300',
|
87
|
+
# connect_retry_interval: 0.1,
|
88
|
+
# connect_retry_count: 5
|
89
|
+
# ) do |client|
|
90
|
+
# client.retry_on_connection_failure do
|
91
|
+
# client.send('Update the database')
|
92
|
+
# end
|
93
|
+
# response = client.read(20)
|
94
|
+
# puts "Received: #{response}"
|
95
|
+
# end
|
96
|
+
#
|
97
|
+
def self.connect(params={})
|
98
|
+
begin
|
99
|
+
connection = self.new(params)
|
100
|
+
yield(connection)
|
101
|
+
ensure
|
102
|
+
connection.close if connection
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# Create a new TCP Client connection
|
107
|
+
#
|
108
|
+
# Parameters:
|
109
|
+
# :server [String]
|
110
|
+
# URL of the server to connect to with port number
|
111
|
+
# 'localhost:2000'
|
112
|
+
#
|
113
|
+
# :servers [Array of String]
|
114
|
+
# Array of URL's of servers to connect to with port numbers
|
115
|
+
# ['server1:2000', 'server2:2000']
|
116
|
+
#
|
117
|
+
# The second server will only be attempted once the first server
|
118
|
+
# cannot be connected to or has timed out on connect
|
119
|
+
# A read failure or timeout will not result in switching to the second
|
120
|
+
# server, only a connection failure or during an automatic reconnect
|
121
|
+
#
|
122
|
+
# :read_timeout [Float]
|
123
|
+
# Time in seconds to timeout on read
|
124
|
+
# Can be overridden by supplying a timeout in the read call
|
125
|
+
# Default: 60
|
126
|
+
#
|
127
|
+
# :connect_timeout [Float]
|
128
|
+
# Time in seconds to timeout when trying to connect to the server
|
129
|
+
# A value of -1 will cause the connect wait time to be infinite
|
130
|
+
# Default: Half of the :read_timeout ( 30 seconds )
|
131
|
+
#
|
132
|
+
# :logger [Logger]
|
133
|
+
# Set the logger to which to write log messages to
|
134
|
+
# Note: Additional methods will be mixed into this logger to make it
|
135
|
+
# compatible with the SematicLogger extensions if it is not already
|
136
|
+
# a SemanticLogger logger instance
|
137
|
+
#
|
138
|
+
# :log_level [Symbol]
|
139
|
+
# Set the logging level for the TCPClient
|
140
|
+
# Any valid SemanticLogger log level:
|
141
|
+
# :trace, :debug, :info, :warn, :error, :fatal
|
142
|
+
# Default: SemanticLogger.default_level
|
143
|
+
#
|
144
|
+
# :buffered [Boolean]
|
145
|
+
# Whether to use Nagle's Buffering algorithm (http://en.wikipedia.org/wiki/Nagle's_algorithm)
|
146
|
+
# Recommend disabling for RPC style invocations where we don't want to wait for an
|
147
|
+
# ACK from the server before sending the last partial segment
|
148
|
+
# Buffering is recommended in a browser or file transfer style environment
|
149
|
+
# where multiple sends are expected during a single response
|
150
|
+
# Default: true
|
151
|
+
#
|
152
|
+
# :connect_retry_count [Fixnum]
|
153
|
+
# Number of times to retry connecting when a connection fails
|
154
|
+
# Default: 10
|
155
|
+
#
|
156
|
+
# :connect_retry_interval [Float]
|
157
|
+
# Number of seconds between connection retry attempts after the first failed attempt
|
158
|
+
# Default: 0.5
|
159
|
+
#
|
160
|
+
# :retry_count [Fixnum]
|
161
|
+
# Number of times to retry when calling #retry_on_connection_failure
|
162
|
+
# This is independent of :connect_retry_count which still applies with
|
163
|
+
# connection failures. This retry controls upto how many times to retry the
|
164
|
+
# supplied block should a connection failure occurr during the block
|
165
|
+
# Default: 3
|
166
|
+
#
|
167
|
+
# :on_connect [Proc]
|
168
|
+
# Directly after a connection is established and before it is made available
|
169
|
+
# for use this Block is invoked.
|
170
|
+
# Typical Use Cases:
|
171
|
+
# - Initialize per connection session sequence numbers
|
172
|
+
# - Pass any authentication information to the server
|
173
|
+
# - Perform a handshake with the server
|
174
|
+
#
|
175
|
+
# :server_selector [Symbol|Proc]
|
176
|
+
# When multiple servers are supplied using :servers, this option will
|
177
|
+
# determine which server is selected from the list
|
178
|
+
# :ordered
|
179
|
+
# Select a server in the order supplied in the array, with the first
|
180
|
+
# having the highest priority. The second server will only be connected
|
181
|
+
# to if the first server is unreachable
|
182
|
+
# :random
|
183
|
+
# Randomly select a server from the list every time a connection
|
184
|
+
# is established, including during automatic connection recovery.
|
185
|
+
# :nearest
|
186
|
+
# FUTURE - Not implemented yet
|
187
|
+
# The server with an IP address that most closely matches the
|
188
|
+
# local ip address will be attempted first
|
189
|
+
# This will result in connections to servers on the localhost
|
190
|
+
# first prior to looking at remote servers
|
191
|
+
# :ping_time
|
192
|
+
# FUTURE - Not implemented yet
|
193
|
+
# The server with the lowest ping time will be selected first
|
194
|
+
# Proc:
|
195
|
+
# When a Proc is supplied, it will be called passing in the list
|
196
|
+
# of servers. The Proc must return one server name
|
197
|
+
# Example:
|
198
|
+
# :server_selector => Proc.new do |servers|
|
199
|
+
# servers.last
|
200
|
+
# end
|
201
|
+
# Default: :ordered
|
202
|
+
#
|
203
|
+
# :close_on_error [True|False]
|
204
|
+
# To prevent the connection from going into an inconsistent state
|
205
|
+
# automatically close the connection if an error occurs
|
206
|
+
# This includes a Read Timeout
|
207
|
+
# Default: true
|
208
|
+
#
|
209
|
+
# Example
|
210
|
+
# client = Net::TCPClient.new(
|
211
|
+
# server: 'server:3300',
|
212
|
+
# connect_retry_interval: 0.1,
|
213
|
+
# connect_retry_count: 5
|
214
|
+
# )
|
215
|
+
#
|
216
|
+
# client.retry_on_connection_failure do
|
217
|
+
# client.send('Update the database')
|
218
|
+
# end
|
219
|
+
#
|
220
|
+
# # Read upto 20 characters from the server
|
221
|
+
# response = client.read(20)
|
222
|
+
#
|
223
|
+
# puts "Received: #{response}"
|
224
|
+
# client.close
|
225
|
+
def initialize(parameters={})
|
226
|
+
params = parameters.dup
|
227
|
+
@read_timeout = (params.delete(:read_timeout) || 60.0).to_f
|
228
|
+
@connect_timeout = (params.delete(:connect_timeout) || (@read_timeout/2)).to_f
|
229
|
+
buffered = params.delete(:buffered)
|
230
|
+
@buffered = buffered.nil? ? true : buffered
|
231
|
+
@connect_retry_count = params.delete(:connect_retry_count) || 10
|
232
|
+
@retry_count = params.delete(:retry_count) || 3
|
233
|
+
@connect_retry_interval = (params.delete(:connect_retry_interval) || 0.5).to_f
|
234
|
+
@on_connect = params.delete(:on_connect)
|
235
|
+
@server_selector = params.delete(:server_selector) || :ordered
|
236
|
+
@close_on_error = params.delete(:close_on_error)
|
237
|
+
@close_on_error = true if @close_on_error.nil?
|
238
|
+
@logger = params.delete(:logger)
|
239
|
+
|
240
|
+
unless @servers = params.delete(:servers)
|
241
|
+
raise "Missing mandatory :server or :servers" unless server = params.delete(:server)
|
242
|
+
@servers = [ server ]
|
243
|
+
end
|
244
|
+
|
245
|
+
# If a logger is supplied, add the SemanticLogger extensions
|
246
|
+
@logger = Logging.new_logger(logger, "#{self.class.name} #{@servers.inspect}", params.delete(:log_level))
|
247
|
+
|
248
|
+
params.each_pair {|k,v| logger.warn "Ignoring unknown option #{k} = #{v}"}
|
249
|
+
|
250
|
+
# Connect to the Server
|
251
|
+
connect
|
252
|
+
end
|
253
|
+
|
254
|
+
# Connect to the TCP server
|
255
|
+
#
|
256
|
+
# Raises Net::TCPClient::ConnectionTimeout when the time taken to create a connection
|
257
|
+
# exceeds the :connect_timeout
|
258
|
+
# Raises Net::TCPClient::ConnectionFailure whenever Socket raises an error such as Error::EACCESS etc, see Socket#connect for more information
|
259
|
+
#
|
260
|
+
# Error handling is implemented as follows:
|
261
|
+
# 1. TCP Socket Connect failure:
|
262
|
+
# Cannot reach server
|
263
|
+
# Server is being restarted, or is not running
|
264
|
+
# Retry 50 times every 100ms before raising a Net::TCPClient::ConnectionFailure
|
265
|
+
# - Means all calls to #connect will take at least 5 seconds before failing if the server is not running
|
266
|
+
# - Allows hot restart of server process if it restarts within 5 seconds
|
267
|
+
#
|
268
|
+
# 2. TCP Socket Connect timeout:
|
269
|
+
# Timed out after 5 seconds trying to connect to the server
|
270
|
+
# Usually means server is busy or the remote server disappeared off the network recently
|
271
|
+
# No retry, just raise a Net::TCPClient::ConnectionTimeout
|
272
|
+
#
|
273
|
+
# Note: When multiple servers are supplied it will only try to connect to
|
274
|
+
# the subsequent servers once the retry count has been exceeded
|
275
|
+
#
|
276
|
+
# Note: Calling #connect on an open connection will close the current connection
|
277
|
+
# and create a new connection
|
278
|
+
def connect
|
279
|
+
@socket.close if @socket && !@socket.closed?
|
280
|
+
if @servers.size > 1
|
281
|
+
case
|
282
|
+
when @server_selector.is_a?(Proc)
|
283
|
+
connect_to_server(@server_selector.call(@servers))
|
284
|
+
|
285
|
+
when @server_selector == :ordered
|
286
|
+
# Try each server in sequence
|
287
|
+
exception = nil
|
288
|
+
@servers.find do |server|
|
289
|
+
begin
|
290
|
+
connect_to_server(server)
|
291
|
+
exception = nil
|
292
|
+
true
|
293
|
+
rescue Net::TCPClient::ConnectionFailure => exc
|
294
|
+
exception = exc
|
295
|
+
false
|
296
|
+
end
|
297
|
+
end
|
298
|
+
# Raise Exception once it has also failed to connect to all servers
|
299
|
+
raise(exception) if exception
|
300
|
+
|
301
|
+
when @server_selector == :random
|
302
|
+
# Pick each server randomly, trying each server until one can be connected to
|
303
|
+
# If no server can be connected to a Net::TCPClient::ConnectionFailure is raised
|
304
|
+
servers_to_try = @servers.uniq
|
305
|
+
exception = nil
|
306
|
+
servers_to_try.size.times do |i|
|
307
|
+
server = servers_to_try[rand(servers_to_try.size)]
|
308
|
+
servers_to_try.delete(server)
|
309
|
+
begin
|
310
|
+
connect_to_server(server)
|
311
|
+
exception = nil
|
312
|
+
rescue Net::TCPClient::ConnectionFailure => exc
|
313
|
+
exception = exc
|
314
|
+
end
|
315
|
+
end
|
316
|
+
# Raise Exception once it has also failed to connect to all servers
|
317
|
+
raise(exception) if exception
|
318
|
+
|
319
|
+
else
|
320
|
+
raise ArgumentError.new("Invalid or unknown value for parameter :server_selector => #{@server_selector}")
|
321
|
+
end
|
322
|
+
else
|
323
|
+
connect_to_server(@servers.first)
|
324
|
+
end
|
325
|
+
|
326
|
+
# Invoke user supplied Block every time a new connection has been established
|
327
|
+
@on_connect.call(self) if @on_connect
|
328
|
+
true
|
329
|
+
end
|
330
|
+
|
331
|
+
# Send data to the server
|
332
|
+
#
|
333
|
+
# Use #with_retry to add resilience to the #send method
|
334
|
+
#
|
335
|
+
# Raises Net::TCPClient::ConnectionFailure whenever the send fails
|
336
|
+
# For a description of the errors, see Socket#write
|
337
|
+
#
|
338
|
+
def write(data)
|
339
|
+
logger.trace("#write ==> sending", data)
|
340
|
+
stats = {}
|
341
|
+
logger.benchmark_debug("#write ==> complete", stats) do
|
342
|
+
begin
|
343
|
+
stats[:bytes_sent] = @socket.write(data)
|
344
|
+
rescue SystemCallError => exception
|
345
|
+
logger.warn "#write Connection failure: #{exception.class}: #{exception.message}"
|
346
|
+
close if close_on_error
|
347
|
+
raise Net::TCPClient::ConnectionFailure.new("Send Connection failure: #{exception.class}: #{exception.message}", @server, exception)
|
348
|
+
rescue Exception
|
349
|
+
# Close the connection on any other exception since the connection
|
350
|
+
# will now be in an inconsistent state
|
351
|
+
close if close_on_error
|
352
|
+
raise
|
353
|
+
end
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
# Returns a response from the server
|
358
|
+
#
|
359
|
+
# Raises Net::TCPClient::ConnectionTimeout when the time taken to create a connection
|
360
|
+
# exceeds the :connect_timeout
|
361
|
+
# Connection is closed
|
362
|
+
# Raises Net::TCPClient::ConnectionFailure whenever Socket raises an error such as
|
363
|
+
# Error::EACCESS etc, see Socket#connect for more information
|
364
|
+
# Connection is closed
|
365
|
+
# Raises Net::TCPClient::ReadTimeout if the timeout has been exceeded waiting for the
|
366
|
+
# requested number of bytes from the server
|
367
|
+
# Partial data will not be returned
|
368
|
+
# Connection is _not_ closed and #read can be called again later
|
369
|
+
# to read the respnse from the connection
|
370
|
+
#
|
371
|
+
# Parameters
|
372
|
+
# length [Fixnum]
|
373
|
+
# The number of bytes to return
|
374
|
+
# #read will not return unitl 'length' bytes have been received from
|
375
|
+
# the server
|
376
|
+
#
|
377
|
+
# timeout [Float]
|
378
|
+
# Optional: Override the default read timeout for this read
|
379
|
+
# Number of seconds before raising Net::TCPClient::ReadTimeout when no data has
|
380
|
+
# been returned
|
381
|
+
# A value of -1 will wait forever for a response on the socket
|
382
|
+
# Default: :read_timeout supplied to #initialize
|
383
|
+
#
|
384
|
+
# Note: After a ResilientSocket::Net::TCPClient::ReadTimeout #read can be called again on
|
385
|
+
# the same socket to read the response later.
|
386
|
+
# If the application no longers want the connection after a
|
387
|
+
# Net::TCPClient::ReadTimeout, then the #close method _must_ be called
|
388
|
+
# before calling _connect_ or _retry_on_connection_failure_ to create
|
389
|
+
# a new connection
|
390
|
+
#
|
391
|
+
def read(length, buffer=nil, timeout=nil)
|
392
|
+
result = nil
|
393
|
+
logger.benchmark_debug("#read <== read #{length} bytes") do
|
394
|
+
if timeout != -1
|
395
|
+
# Block on data to read for @read_timeout seconds
|
396
|
+
ready = begin
|
397
|
+
ready = IO.select([@socket], nil, [@socket], timeout || @read_timeout)
|
398
|
+
rescue IOError => exception
|
399
|
+
logger.warn "#read Connection failure while waiting for data: #{exception.class}: #{exception.message}"
|
400
|
+
close if close_on_error
|
401
|
+
raise Net::TCPClient::ConnectionFailure.new("#{exception.class}: #{exception.message}", @server, exception)
|
402
|
+
rescue Exception
|
403
|
+
# Close the connection on any other exception since the connection
|
404
|
+
# will now be in an inconsistent state
|
405
|
+
close if close_on_error
|
406
|
+
raise
|
407
|
+
end
|
408
|
+
unless ready
|
409
|
+
close if close_on_error
|
410
|
+
logger.warn "#read Timeout waiting for server to reply"
|
411
|
+
raise Net::TCPClient::ReadTimeout.new("Timedout after #{timeout || @read_timeout} seconds trying to read from #{@server}")
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
# Read data from socket
|
416
|
+
begin
|
417
|
+
result = buffer.nil? ? @socket.read(length) : @socket.read(length, buffer)
|
418
|
+
logger.trace("#read <== received", result.inspect)
|
419
|
+
|
420
|
+
# EOF before all the data was returned
|
421
|
+
if result.nil? || (result.length < length)
|
422
|
+
close if close_on_error
|
423
|
+
logger.warn "#read server closed the connection before #{length} bytes were returned"
|
424
|
+
raise Net::TCPClient::ConnectionFailure.new("Connection lost while reading data", @server, EOFError.new("end of file reached"))
|
425
|
+
end
|
426
|
+
rescue SystemCallError, IOError => exception
|
427
|
+
close if close_on_error
|
428
|
+
logger.warn "#read Connection failure while reading data: #{exception.class}: #{exception.message}"
|
429
|
+
raise Net::TCPClient::ConnectionFailure.new("#{exception.class}: #{exception.message}", @server, exception)
|
430
|
+
rescue Exception
|
431
|
+
# Close the connection on any other exception since the connection
|
432
|
+
# will now be in an inconsistent state
|
433
|
+
close if close_on_error
|
434
|
+
raise
|
435
|
+
end
|
436
|
+
end
|
437
|
+
result
|
438
|
+
end
|
439
|
+
|
440
|
+
# Send and/or receive data with automatic retry on connection failure
|
441
|
+
#
|
442
|
+
# On a connection failure, it will create a new connection and retry the block.
|
443
|
+
# Returns immediately on exception Net::TCPClient::ReadTimeout
|
444
|
+
# The connection is always closed on Net::TCPClient::ConnectionFailure regardless of close_on_error
|
445
|
+
#
|
446
|
+
# 1. Example of a resilient _readonly_ request:
|
447
|
+
#
|
448
|
+
# When reading data from a server that does not change state on the server
|
449
|
+
# Wrap both the send and the read with #retry_on_connection_failure
|
450
|
+
# since it is safe to send the same data twice to the server
|
451
|
+
#
|
452
|
+
# # Since the send can be sent many times it is safe to also put the receive
|
453
|
+
# # inside the retry block
|
454
|
+
# value = client.retry_on_connection_failure do
|
455
|
+
# client.send("GETVALUE:count\n")
|
456
|
+
# client.read(20).strip.to_i
|
457
|
+
# end
|
458
|
+
#
|
459
|
+
# 2. Example of a resilient request that _modifies_ data on the server:
|
460
|
+
#
|
461
|
+
# When changing state on the server, for example when updating a value
|
462
|
+
# Wrap _only_ the send with #retry_on_connection_failure
|
463
|
+
# The read must be outside the #retry_on_connection_failure since we must
|
464
|
+
# not retry the send if the connection fails during the #read
|
465
|
+
#
|
466
|
+
# value = 45
|
467
|
+
# # Only the send is within the retry block since we cannot re-send once
|
468
|
+
# # the send was successful since the server may have made the change
|
469
|
+
# client.retry_on_connection_failure do
|
470
|
+
# client.send("SETVALUE:#{count}\n")
|
471
|
+
# end
|
472
|
+
# # Server returns "SAVED" if the call was successfull
|
473
|
+
# result = client.read(20).strip
|
474
|
+
#
|
475
|
+
# Error handling is implemented as follows:
|
476
|
+
# If a network failure occurrs during the block invocation the block
|
477
|
+
# will be called again with a new connection to the server.
|
478
|
+
# It will only be retried up to 3 times
|
479
|
+
# The re-connect will independently retry and timeout using all the
|
480
|
+
# rules of #connect
|
481
|
+
def retry_on_connection_failure
|
482
|
+
retries = 0
|
483
|
+
begin
|
484
|
+
connect if closed?
|
485
|
+
yield(self)
|
486
|
+
rescue Net::TCPClient::ConnectionFailure => exception
|
487
|
+
exc_str = exception.cause ? "#{exception.cause.class}: #{exception.cause.message}" : exception.message
|
488
|
+
# Re-raise exceptions that should not be retried
|
489
|
+
if !self.class.reconnect_on_errors.include?(exception.cause.class)
|
490
|
+
logger.warn "#retry_on_connection_failure not configured to retry: #{exc_str}"
|
491
|
+
raise exception
|
492
|
+
elsif retries < @retry_count
|
493
|
+
retries += 1
|
494
|
+
logger.warn "#retry_on_connection_failure retry #{retries} due to #{exception.class}: #{exception.message}"
|
495
|
+
connect
|
496
|
+
retry
|
497
|
+
end
|
498
|
+
logger.error "#retry_on_connection_failure Connection failure: #{exception.class}: #{exception.message}. Giving up after #{retries} retries"
|
499
|
+
raise Net::TCPClient::ConnectionFailure.new("After #{retries} retries to host '#{server}': #{exc_str}", @server, exception.cause)
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
# Close the socket only if it is not already closed
|
504
|
+
#
|
505
|
+
# Logs a warning if an error occurs trying to close the socket
|
506
|
+
def close
|
507
|
+
@socket.close unless @socket.closed?
|
508
|
+
rescue IOError => exception
|
509
|
+
logger.warn "IOError when attempting to close socket: #{exception.class}: #{exception.message}"
|
510
|
+
end
|
511
|
+
|
512
|
+
# Returns whether the socket is closed
|
513
|
+
def closed?
|
514
|
+
@socket.closed?
|
515
|
+
end
|
516
|
+
|
517
|
+
# Returns whether the connection to the server is alive
|
518
|
+
#
|
519
|
+
# It is useful to call this method before making a call to the server
|
520
|
+
# that would change data on the server
|
521
|
+
#
|
522
|
+
# Note: This method is only useful if the server closed the connection or
|
523
|
+
# if a previous connection failure occurred.
|
524
|
+
# If the server is hard killed this will still return true until one
|
525
|
+
# or more writes are attempted
|
526
|
+
#
|
527
|
+
# Note: In testing the overhead of this call is rather low, with the ability to
|
528
|
+
# make about 120,000 calls per second against an active connection.
|
529
|
+
# I.e. About 8.3 micro seconds per call
|
530
|
+
def alive?
|
531
|
+
return false if @socket.closed?
|
532
|
+
|
533
|
+
if IO.select([@socket], nil, nil, 0)
|
534
|
+
!@socket.eof? rescue false
|
535
|
+
else
|
536
|
+
true
|
537
|
+
end
|
538
|
+
rescue IOError
|
539
|
+
false
|
540
|
+
end
|
541
|
+
|
542
|
+
# See: Socket#setsockopt
|
543
|
+
def setsockopt(level, optname, optval)
|
544
|
+
@socket.setsockopt(level, optname, optval)
|
545
|
+
end
|
546
|
+
|
547
|
+
#############################################
|
548
|
+
protected
|
549
|
+
|
550
|
+
# Try connecting to a single server
|
551
|
+
# Returns the connected socket
|
552
|
+
#
|
553
|
+
# Raises Net::TCPClient::ConnectionTimeout when the connection timeout has been exceeded
|
554
|
+
# Raises Net::TCPClient::ConnectionFailure
|
555
|
+
def connect_to_server(server)
|
556
|
+
# Have to use Socket internally instead of TCPSocket since TCPSocket
|
557
|
+
# does not offer async connect API amongst others:
|
558
|
+
# :accept, :accept_nonblock, :bind, :connect, :connect_nonblock, :getpeereid,
|
559
|
+
# :ipv6only!, :listen, :recvfrom_nonblock, :sysaccept
|
560
|
+
retries = 0
|
561
|
+
logger.benchmark_info "Connected to #{server}" do
|
562
|
+
host_name, port = server.split(":")
|
563
|
+
port = port.to_i
|
564
|
+
|
565
|
+
address = Socket.getaddrinfo(host_name, nil, Socket::AF_INET)
|
566
|
+
socket_address = Socket.pack_sockaddr_in(port, address[0][3])
|
567
|
+
|
568
|
+
begin
|
569
|
+
@socket = Socket.new(Socket.const_get(address[0][0]), Socket::SOCK_STREAM, 0)
|
570
|
+
@socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1) unless buffered
|
571
|
+
if @connect_timeout == -1
|
572
|
+
# Timeout of -1 means wait forever for a connection
|
573
|
+
@socket.connect(socket_address)
|
574
|
+
else
|
575
|
+
begin
|
576
|
+
@socket.connect_nonblock(socket_address)
|
577
|
+
rescue Errno::EINPROGRESS
|
578
|
+
end
|
579
|
+
if IO.select(nil, [@socket], nil, @connect_timeout)
|
580
|
+
begin
|
581
|
+
@socket.connect_nonblock(socket_address)
|
582
|
+
rescue Errno::EISCONN
|
583
|
+
end
|
584
|
+
else
|
585
|
+
raise(Net::TCPClient::ConnectionTimeout.new("Timedout after #{@connect_timeout} seconds trying to connect to #{server}"))
|
586
|
+
end
|
587
|
+
end
|
588
|
+
break
|
589
|
+
rescue SystemCallError => exception
|
590
|
+
if retries < @connect_retry_count && self.class.reconnect_on_errors.include?(exception.class)
|
591
|
+
retries += 1
|
592
|
+
logger.warn "Connection failure: #{exception.class}: #{exception.message}. Retry: #{retries}"
|
593
|
+
sleep @connect_retry_interval
|
594
|
+
retry
|
595
|
+
end
|
596
|
+
logger.error "Connection failure: #{exception.class}: #{exception.message}. Giving up after #{retries} retries"
|
597
|
+
raise Net::TCPClient::ConnectionFailure.new("After #{retries} connection attempts to host '#{server}': #{exception.class}: #{exception.message}", @server, exception)
|
598
|
+
end
|
599
|
+
end
|
600
|
+
@server = server
|
601
|
+
end
|
602
|
+
|
603
|
+
end
|
604
|
+
end
|