right_support 2.10.1 → 2.11.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -20,6 +20,8 @@
20
20
  # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
21
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
 
23
+ require 'thread'
24
+
23
25
  module RightSupport::Net
24
26
  # Raised to indicate the (uncommon) error condition where a RequestBalancer rotated
25
27
  # through EVERY URL in a list without getting a non-nil, non-timeout response.
@@ -110,32 +112,55 @@ module RightSupport::Net
110
112
  end
111
113
  end
112
114
 
115
+ # no-op health-check
113
116
  DEFAULT_HEALTH_CHECK_PROC = Proc.new do |endpoint|
114
117
  true
115
118
  end
116
119
 
120
+ # debug mode
121
+ DEFAULT_DEBUG_MODE = ::ENV['DEBUG_MODE'] == 'true'
122
+
123
+ # default options
117
124
  DEFAULT_OPTIONS = {
118
- :policy => nil,
119
- :retry => DEFAULT_RETRY_PROC,
120
- :fatal => DEFAULT_FATAL_PROC,
121
- :on_exception => nil,
122
- :health_check => DEFAULT_HEALTH_CHECK_PROC
125
+ :policy => nil,
126
+ :retry => DEFAULT_RETRY_PROC,
127
+ :fatal => DEFAULT_FATAL_PROC,
128
+ :on_exception => nil,
129
+ :health_check => DEFAULT_HEALTH_CHECK_PROC,
130
+ :resolve => nil, # not resolving DNS to IP(s) by default; rely on consul, etc.
131
+ :thread_safe => false, # not thread-safe by default,
132
+ :debug_mode => nil # infer from DEBUG_MODE
123
133
  }
124
134
 
125
135
  attr_reader :endpoints
126
136
 
127
137
  # Return the actual, potentially DNS-resolved endpoints that are used for requests.
128
- # If the balancer was constructed with :resolve=>false, return self.endpoints.
138
+ # If the balancer was constructed with :resolve=>nil, return self.endpoints.
129
139
  #
130
140
  # @return [Array] collection of endpoints
131
141
  def resolved_endpoints
132
- (@ips.nil? || @ips.empty?) ? @endpoints : @ips
142
+ @synchronize.call do
143
+ (@ips.nil? || @ips.empty?) ? @endpoints : @ips
144
+ end
133
145
  end
134
146
 
135
147
  def self.request(endpoints, options={}, &block)
136
148
  new(endpoints, options).request(&block)
137
149
  end
138
150
 
151
+ # encapsulates exponential backoff/retry logic in a callback for use as the
152
+ # :retry option to request balancer.
153
+ def self.backoff_retry_callback(max_attempts)
154
+ lambda do |_, n|
155
+ if n < max_attempts
156
+ sleep 2 ** n
157
+ true
158
+ else
159
+ false
160
+ end
161
+ end
162
+ end
163
+
139
164
  # Constructor. Accepts a sequence of request endpoints which it shuffles randomly at
140
165
  # creation time; however, the ordering of the endpoints does not change thereafter
141
166
  # and the sequence is tried from the beginning for every request.
@@ -147,24 +172,45 @@ module RightSupport::Net
147
172
  # :resolve option allows the balancer to treat each backing server as a distinct
148
173
  # endpoint with its own health state, etc.
149
174
  #
150
- # === Parameters
151
- # endpoints(Array):: a set of network endpoints (e.g. HTTP URLs) to be load-balanced
152
- #
153
- # === Options
154
- # retry:: a Class, array of Class or decision Proc to determine whether to keep retrying; default is to try all endpoints
155
- # fatal:: a Class, array of Class, or decision Proc to determine whether an exception is fatal and should not be retried
156
- # resolve(Integer):: how often to re-resolve DNS hostnames of endpoints; default is nil (never resolve)
157
- # on_exception(Proc):: notification hook that accepts three arguments: whether the exception is fatal, the exception itself,
158
- # and the endpoint for which the exception happened
159
- # health_check(Proc):: callback that allows balancer to check an endpoint health; should raise an exception if the endpoint
160
- # is not healthy
161
- # on_health_change(Proc):: callback that is made when the overall health of the endpoints transition to a different level;
162
- # its single argument contains the new minimum health level
163
- #
175
+ # @param [String|Array] endpoints (e.g. HTTP URLs) for balancing
176
+ # @param [Hash] options
177
+ # @option options [Integer|Proc] :retry callback to determine whether to
178
+ # keep retrying; default is to try each endpoint at most once. can also be
179
+ # passed as an integer which provides a set number of attempts with no
180
+ # backoff. for retry with backoff use the backoff_retry_callback method.
181
+ # @option options [Integer] :resolve as a timeout in seconds to re-resolve
182
+ # DNS hostnames of endpoints to IP addresses; default is nil (never).
183
+ # @option options [TrueClass|FalseClass] :thread_safe as true to guard the
184
+ # balancer state with a mutex, false to be free-threaded (default). Ruby is
185
+ # generally thread-safe because real concurrency does not exist and/or apps
186
+ # consistently use the Rainbows gem to ensure one process per API handler.
187
+ # @option options [TrueClass|FalseClass] :debug_mode as true to log
188
+ # additional error information as failures occur, false to only log error
189
+ # summary after all retries fail or nil to infer from DEBUG_MODE (default).
190
+ # @option options [Proc] :fatal callback to determine whether an exception
191
+ # is fatal and should not be retried.
192
+ # @option options [Proc] :on_exception notification hook that accepts three
193
+ # arguments: whether the exception is fatal, the exception itself, and the
194
+ # endpoint for which the exception happened
195
+ # @option options [Proc] :health_check callback that allows balancer to
196
+ # check an endpoint health; should raise an exception if the endpoint is
197
+ # not healthy
198
+ # @option options [Proc] :on_health_change callback that is made when the
199
+ # overall health of the endpoints transition to a different level; its
200
+ # single argument contains the new minimum health level
164
201
  def initialize(endpoints, options={})
165
202
  @options = DEFAULT_OPTIONS.merge(options)
166
203
 
167
- unless endpoints && !endpoints.empty?
204
+ # provide thread-safety only when specified.
205
+ if @options[:thread_safe]
206
+ @mutex = ::Mutex.new
207
+ @synchronize = @mutex.method(:synchronize)
208
+ else
209
+ @synchronize = self.method(:free_threaded)
210
+ end
211
+
212
+ endpoints = Array(endpoints)
213
+ if endpoints.empty?
168
214
  raise ArgumentError, "Must specify at least one endpoint"
169
215
  end
170
216
 
@@ -172,11 +218,32 @@ module RightSupport::Net
172
218
  @policy = @options[:policy]
173
219
  @policy = @policy.new(options) if @policy.is_a?(Class)
174
220
 
221
+ if (@debug_mode = @options.delete(:debug_mode)).nil?
222
+ @debug_mode = DEFAULT_DEBUG_MODE
223
+ end
224
+
225
+ # convert retry counter to a simple retry callback, if necessary.
226
+ @retry = @options.delete(:retry) || DEFAULT_RETRY_PROC
227
+ unless @retry.kind_of?(Proc)
228
+ # ensure that the count is captured by callback for safety.
229
+ @retry = Integer(@retry)
230
+ retry_proc = lambda do |max_attempts|
231
+ lambda do |ep, n|
232
+ n < max_attempts
233
+ end
234
+ end.call(@retry)
235
+ @retry = retry_proc # and now the type always Proc
236
+ end
237
+
175
238
  unless test_policy_duck_type(@policy)
176
239
  raise ArgumentError, ":policy must be a class/object that responds to :next, :good and :bad"
177
240
  end
178
241
 
179
- unless test_callable_arity(options[:retry], 2)
242
+ # note @retry is now always defined as a callback. the legacy code always
243
+ # had a default retry but it could have been defined later during actual
244
+ # execution instead of being concretely defined on initialization. now it
245
+ # is always defined on initialization.
246
+ unless test_callable_arity(@retry, 2, false)
180
247
  raise ArgumentError, ":retry callback must accept two parameters"
181
248
  end
182
249
 
@@ -209,13 +276,17 @@ module RightSupport::Net
209
276
 
210
277
  # Un-resolve an IP address.
211
278
  #
212
- # === Parameters
213
- # endpoint:: a network endpoint (e.g. HTTP URL) to be un-resolved
279
+ # @param [String] endpoint (e.g. HTTP URL) to be un-resolved
214
280
  #
215
- # === Return
216
- # Return the first hostname that resolved to the IP (there should only ever be one)
281
+ # @return [String] the first hostname that resolved to the IP (there should be at most one) or nil
217
282
  def lookup_hostname(endpoint)
218
- @resolved_hostnames.select{ |k,v| v.addresses.include?(endpoint) }.shift[0]
283
+ result = nil
284
+ @synchronize.call do
285
+ if resolved_hostname = @resolved_hostnames && @resolved_hostnames.select{ |k, v| v.addresses.include?(endpoint) }
286
+ result = resolved_hostname.shift[0]
287
+ end
288
+ end
289
+ result
219
290
  end
220
291
 
221
292
  # Perform a request.
@@ -233,8 +304,9 @@ module RightSupport::Net
233
304
  # Return the first non-nil value provided by the block.
234
305
  def request
235
306
  raise ArgumentError, "Must call this method with a block" unless block_given?
236
-
237
- resolve if need_resolve?
307
+ @synchronize.call do
308
+ resolve if need_resolve?
309
+ end
238
310
 
239
311
  exceptions = {}
240
312
  result = nil
@@ -243,12 +315,27 @@ module RightSupport::Net
243
315
 
244
316
  loop do
245
317
  if n > 0
246
- do_retry = @options[:retry] || DEFAULT_RETRY_PROC
247
- do_retry = do_retry.call((@ips.nil? || @ips.empty?) ? @endpoints : @ips, n) if do_retry.respond_to?(:call)
248
- break if (do_retry.is_a?(Integer) && n >= do_retry) || [nil, false].include?(do_retry)
318
+ retry_result = nil
319
+ @synchronize.call do
320
+ retry_result = @retry.call((@ips.nil? || @ips.empty?) ? @endpoints : @ips, n)
321
+ end
322
+
323
+ # FIX: this integer result logic is odd but is left for legacy support
324
+ # reasons. technically any retry proc could return integer and invoke
325
+ # this odd side-effect, which was only intended to support :retry as
326
+ # a literal integer. retry proc implementations should now only return
327
+ # boolean to avoid this weirdness. this logic should be removed in v3.
328
+ if retry_result.is_a?(Integer) && n >= retry_result
329
+ retry_result = false
330
+ end
331
+ break unless retry_result
249
332
  end
250
333
 
251
- endpoint, need_health_check = @policy.next
334
+ endpoint = nil
335
+ need_health_check = false
336
+ @synchronize.call do
337
+ endpoint, need_health_check = @policy.next
338
+ end
252
339
  break unless endpoint
253
340
 
254
341
  n += 1
@@ -257,41 +344,57 @@ module RightSupport::Net
257
344
  # Perform health check if necessary. Note that we guard this with a rescue, because the
258
345
  # health check may raise an exception and we want to log the exception info if this happens.
259
346
  if need_health_check
260
- begin
261
- unless @policy.health_check(endpoint)
262
- logger.error "RequestBalancer: health check failed to #{endpoint} because of non-true return value"
263
- next
347
+ hc_result = false
348
+ hc_exception = nil
349
+ @synchronize.call do
350
+ begin
351
+ # note that health-check can update the policy's good/bad state
352
+ # for endpoints.
353
+ hc_result = @policy.health_check(endpoint)
354
+ rescue Exception => e
355
+ hc_exception = e
264
356
  end
265
- rescue Exception => e
266
- logger.error "RequestBalancer: health check failed to #{endpoint} because of #{e.class.name}: #{e.message}"
267
- if fatal_exception?(e)
357
+ end
358
+ if hc_result
359
+ logger.info "RequestBalancer: health check succeeded to #{endpoint}"
360
+ elsif hc_exception
361
+ logger.error "RequestBalancer: health check failed to #{endpoint} because of #{hc_exception.class.name}: #{hc_exception.message}"
362
+ if fatal_exception?(hc_exception)
268
363
  # Fatal exceptions should still raise, even if only during a health check
269
- raise
364
+ raise hc_exception
270
365
  else
271
366
  # Nonfatal exceptions: keep on truckin'
367
+ exceptions[endpoint] ||= []
368
+ exceptions[endpoint] << hc_exception
369
+ debug_exception(hc_exception) if @debug_mode
272
370
  next
273
371
  end
372
+ else
373
+ logger.error "RequestBalancer: health check failed to #{endpoint} because of non-true return value"
374
+ next
274
375
  end
275
-
276
- logger.info "RequestBalancer: health check succeeded to #{endpoint}"
277
376
  end
278
377
 
279
378
  begin
280
- result = yield(endpoint)
281
- @policy.good(endpoint, t0, Time.now)
379
+ result = yield(endpoint)
380
+ @synchronize.call do
381
+ @policy.good(endpoint, t0, Time.now)
382
+ end
282
383
  complete = true
283
384
  break
284
385
  rescue Exception => e
285
386
  if to_raise = handle_exception(endpoint, e, t0)
286
387
  raise(to_raise)
287
388
  else
288
- @policy.bad(endpoint, t0, Time.now)
389
+ @synchronize.call do
390
+ @policy.bad(endpoint, t0, Time.now)
391
+ end
289
392
  exceptions[endpoint] ||= []
290
393
  exceptions[endpoint] << e
394
+ debug_exception(e) if @debug_mode
291
395
  end
292
396
  end
293
-
294
- end
397
+ end # loop
295
398
 
296
399
  return result if complete
297
400
 
@@ -302,15 +405,13 @@ module RightSupport::Net
302
405
  summary = []
303
406
  list.each { |e| summary << e.class }
304
407
  health = stats[endpoint] if stats[endpoint] != 'n/a'
305
- if @resolved_hostnames
306
- hostname = lookup_hostname(endpoint)
408
+ if hostname = lookup_hostname(endpoint)
307
409
  msg << "'#{hostname}' (#{endpoint}#{", "+health if health}) => [#{summary.uniq.join(', ')}]"
308
410
  else
309
411
  msg << "'#{endpoint}' #{"("+health+")" if health} => [#{summary.uniq.join(', ')}]"
310
412
  end
311
413
  end
312
- message = "Request failed after #{n} tries to #{exceptions.keys.size} endpoints: (#{msg.join(', ')})"
313
-
414
+ message = "Request failed after #{n} tries to #{exceptions.size} endpoints: (#{msg.join(', ')})"
314
415
  logger.error "RequestBalancer: #{message}"
315
416
  raise NoResult.new(message, exceptions)
316
417
  end
@@ -332,10 +433,15 @@ module RightSupport::Net
332
433
  #
333
434
  # {2 => "n/a", 1 => "n/a", 3 => "n/a"}
334
435
  def get_stats
335
- stats = {}
336
- @endpoints.each { |endpoint| stats[endpoint] = 'n/a' }
337
- stats = @policy.get_stats if @policy.respond_to?(:get_stats)
338
- stats
436
+ result = nil
437
+ if @policy.respond_to?(:get_stats)
438
+ @synchronize.call do
439
+ result = @policy.get_stats
440
+ end
441
+ else
442
+ result = @endpoints.inject({}) { |h, endpoint| h[endpoint] = 'n/a'; h }
443
+ end
444
+ result
339
445
  end
340
446
 
341
447
  protected
@@ -345,9 +451,13 @@ module RightSupport::Net
345
451
  def handle_exception(endpoint, e, t0)
346
452
  fatal = fatal_exception?(e)
347
453
  duration = sprintf('%.4f', Time.now - t0)
348
- ept = @resolved_hostnames ? "#{lookup_hostname(endpoint)}(#{endpoint})" : "#{endpoint}"
349
- msg = "RequestBalancer: rescued #{fatal ? 'fatal' : 'retryable'} #{e.class.name} " +
350
- "during request to #{ept}: #{e.message} after #{duration} seconds"
454
+ if hostname = lookup_hostname(endpoint)
455
+ ept = "#{hostname}(#{endpoint})"
456
+ else
457
+ ept = endpoint
458
+ end
459
+ msg = "RequestBalancer: rescued #{fatal ? 'fatal' : 'retryable'} #{e.class.name} " +
460
+ "during request to #{ept}: #{e.message} after #{duration} seconds"
351
461
  logger.error msg
352
462
  @options[:on_exception].call(fatal, e, endpoint) if @options[:on_exception]
353
463
 
@@ -385,6 +495,7 @@ module RightSupport::Net
385
495
  @ips = resolved_endpoints
386
496
  @policy.set_endpoints(@ips)
387
497
  @resolved_at = Time.now.to_i
498
+ true
388
499
  end
389
500
 
390
501
  def need_resolve?
@@ -402,6 +513,19 @@ module RightSupport::Net
402
513
  return true if optional && !callable.respond_to?(:call)
403
514
  return callable.respond_to?(:arity) && (callable.arity == arity)
404
515
  end
516
+
517
+ # free-threaded invocation of the provided callback.
518
+ def free_threaded
519
+ yield
520
+ end
521
+
522
+ # logs exception with backtrace truncation for debug purposes only.
523
+ def debug_exception(e)
524
+ if (lines = e.backtrace || []).size > 7
525
+ lines = lines[0, 7] << '...'
526
+ end
527
+ logger.debug((["#{e.class}: #{e.message}"] + lines).join("\n"))
528
+ end
405
529
  end # RequestBalancer
406
530
 
407
531
  end # RightScale