right_support 2.10.1 → 2.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,6 +20,8 @@
20
20
  # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
21
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
 
23
+ require 'thread'
24
+
23
25
  module RightSupport::Net
24
26
  # Raised to indicate the (uncommon) error condition where a RequestBalancer rotated
25
27
  # through EVERY URL in a list without getting a non-nil, non-timeout response.
@@ -110,32 +112,55 @@ module RightSupport::Net
110
112
  end
111
113
  end
112
114
 
115
+ # no-op health-check
113
116
  DEFAULT_HEALTH_CHECK_PROC = Proc.new do |endpoint|
114
117
  true
115
118
  end
116
119
 
120
+ # debug mode
121
+ DEFAULT_DEBUG_MODE = ::ENV['DEBUG_MODE'] == 'true'
122
+
123
+ # default options
117
124
  DEFAULT_OPTIONS = {
118
- :policy => nil,
119
- :retry => DEFAULT_RETRY_PROC,
120
- :fatal => DEFAULT_FATAL_PROC,
121
- :on_exception => nil,
122
- :health_check => DEFAULT_HEALTH_CHECK_PROC
125
+ :policy => nil,
126
+ :retry => DEFAULT_RETRY_PROC,
127
+ :fatal => DEFAULT_FATAL_PROC,
128
+ :on_exception => nil,
129
+ :health_check => DEFAULT_HEALTH_CHECK_PROC,
130
+ :resolve => nil, # not resolving DNS to IP(s) by default; rely on consul, etc.
131
+ :thread_safe => false, # not thread-safe by default,
132
+ :debug_mode => nil # infer from DEBUG_MODE
123
133
  }
124
134
 
125
135
  attr_reader :endpoints
126
136
 
127
137
  # Return the actual, potentially DNS-resolved endpoints that are used for requests.
128
- # If the balancer was constructed with :resolve=>false, return self.endpoints.
138
+ # If the balancer was constructed with :resolve=>nil, return self.endpoints.
129
139
  #
130
140
  # @return [Array] collection of endpoints
131
141
  def resolved_endpoints
132
- (@ips.nil? || @ips.empty?) ? @endpoints : @ips
142
+ @synchronize.call do
143
+ (@ips.nil? || @ips.empty?) ? @endpoints : @ips
144
+ end
133
145
  end
134
146
 
135
147
  def self.request(endpoints, options={}, &block)
136
148
  new(endpoints, options).request(&block)
137
149
  end
138
150
 
151
+ # encapsulates exponential backoff/retry logic in a callback for use as the
152
+ # :retry option to request balancer.
153
+ def self.backoff_retry_callback(max_attempts)
154
+ lambda do |_, n|
155
+ if n < max_attempts
156
+ sleep 2 ** n
157
+ true
158
+ else
159
+ false
160
+ end
161
+ end
162
+ end
163
+
139
164
  # Constructor. Accepts a sequence of request endpoints which it shuffles randomly at
140
165
  # creation time; however, the ordering of the endpoints does not change thereafter
141
166
  # and the sequence is tried from the beginning for every request.
@@ -147,24 +172,45 @@ module RightSupport::Net
147
172
  # :resolve option allows the balancer to treat each backing server as a distinct
148
173
  # endpoint with its own health state, etc.
149
174
  #
150
- # === Parameters
151
- # endpoints(Array):: a set of network endpoints (e.g. HTTP URLs) to be load-balanced
152
- #
153
- # === Options
154
- # retry:: a Class, array of Class or decision Proc to determine whether to keep retrying; default is to try all endpoints
155
- # fatal:: a Class, array of Class, or decision Proc to determine whether an exception is fatal and should not be retried
156
- # resolve(Integer):: how often to re-resolve DNS hostnames of endpoints; default is nil (never resolve)
157
- # on_exception(Proc):: notification hook that accepts three arguments: whether the exception is fatal, the exception itself,
158
- # and the endpoint for which the exception happened
159
- # health_check(Proc):: callback that allows balancer to check an endpoint health; should raise an exception if the endpoint
160
- # is not healthy
161
- # on_health_change(Proc):: callback that is made when the overall health of the endpoints transition to a different level;
162
- # its single argument contains the new minimum health level
163
- #
175
+ # @param [String|Array] endpoints (e.g. HTTP URLs) for balancing
176
+ # @param [Hash] options
177
+ # @option options [Integer|Proc] :retry callback to determine whether to
178
+ # keep retrying; default is to try each endpoint at most once. can also be
179
+ # passed as an integer which provides a set number of attempts with no
180
+ # backoff. for retry with backoff use the backoff_retry_callback method.
181
+ # @option options [Integer] :resolve as a timeout in seconds to re-resolve
182
+ # DNS hostnames of endpoints to IP addresses; default is nil (never).
183
+ # @option options [TrueClass|FalseClass] :thread_safe as true to guard the
184
+ # balancer state with a mutex, false to be free-threaded (default). Ruby is
185
+ # generally thread-safe because real concurrency does not exist and/or apps
186
+ # consistently use the Rainbows gem to ensure one process per API handler.
187
+ # @option options [TrueClass|FalseClass] :debug_mode as true to log
188
+ # additional error information as failures occur, false to only log error
189
+ # summary after all retries fail or nil to infer from DEBUG_MODE (default).
190
+ # @option options [Proc] :fatal callback to determine whether an exception
191
+ # is fatal and should not be retried.
192
+ # @option options [Proc] :on_exception notification hook that accepts three
193
+ # arguments: whether the exception is fatal, the exception itself, and the
194
+ # endpoint for which the exception happened
195
+ # @option options [Proc] :health_check callback that allows balancer to
196
+ # check an endpoint health; should raise an exception if the endpoint is
197
+ # not healthy
198
+ # @option options [Proc] :on_health_change callback that is made when the
199
+ # overall health of the endpoints transition to a different level; its
200
+ # single argument contains the new minimum health level
164
201
  def initialize(endpoints, options={})
165
202
  @options = DEFAULT_OPTIONS.merge(options)
166
203
 
167
- unless endpoints && !endpoints.empty?
204
+ # provide thread-safety only when specified.
205
+ if @options[:thread_safe]
206
+ @mutex = ::Mutex.new
207
+ @synchronize = @mutex.method(:synchronize)
208
+ else
209
+ @synchronize = self.method(:free_threaded)
210
+ end
211
+
212
+ endpoints = Array(endpoints)
213
+ if endpoints.empty?
168
214
  raise ArgumentError, "Must specify at least one endpoint"
169
215
  end
170
216
 
@@ -172,11 +218,32 @@ module RightSupport::Net
172
218
  @policy = @options[:policy]
173
219
  @policy = @policy.new(options) if @policy.is_a?(Class)
174
220
 
221
+ if (@debug_mode = @options.delete(:debug_mode)).nil?
222
+ @debug_mode = DEFAULT_DEBUG_MODE
223
+ end
224
+
225
+ # convert retry counter to a simple retry callback, if necessary.
226
+ @retry = @options.delete(:retry) || DEFAULT_RETRY_PROC
227
+ unless @retry.kind_of?(Proc)
228
+ # ensure that the count is captured by callback for safety.
229
+ @retry = Integer(@retry)
230
+ retry_proc = lambda do |max_attempts|
231
+ lambda do |ep, n|
232
+ n < max_attempts
233
+ end
234
+ end.call(@retry)
235
+ @retry = retry_proc # and now the type always Proc
236
+ end
237
+
175
238
  unless test_policy_duck_type(@policy)
176
239
  raise ArgumentError, ":policy must be a class/object that responds to :next, :good and :bad"
177
240
  end
178
241
 
179
- unless test_callable_arity(options[:retry], 2)
242
+ # note @retry is now always defined as a callback. the legacy code always
243
+ # had a default retry but it could have been defined later during actual
244
+ # execution instead of being concretely defined on initialization. now it
245
+ # is always defined on initialization.
246
+ unless test_callable_arity(@retry, 2, false)
180
247
  raise ArgumentError, ":retry callback must accept two parameters"
181
248
  end
182
249
 
@@ -209,13 +276,17 @@ module RightSupport::Net
209
276
 
210
277
  # Un-resolve an IP address.
211
278
  #
212
- # === Parameters
213
- # endpoint:: a network endpoint (e.g. HTTP URL) to be un-resolved
279
+ # @param [String] endpoint (e.g. HTTP URL) to be un-resolved
214
280
  #
215
- # === Return
216
- # Return the first hostname that resolved to the IP (there should only ever be one)
281
+ # @return [String] the first hostname that resolved to the IP (there should be at most one) or nil
217
282
  def lookup_hostname(endpoint)
218
- @resolved_hostnames.select{ |k,v| v.addresses.include?(endpoint) }.shift[0]
283
+ result = nil
284
+ @synchronize.call do
285
+ if resolved_hostname = @resolved_hostnames && @resolved_hostnames.select{ |k, v| v.addresses.include?(endpoint) }
286
+ result = resolved_hostname.shift[0]
287
+ end
288
+ end
289
+ result
219
290
  end
220
291
 
221
292
  # Perform a request.
@@ -233,8 +304,9 @@ module RightSupport::Net
233
304
  # Return the first non-nil value provided by the block.
234
305
  def request
235
306
  raise ArgumentError, "Must call this method with a block" unless block_given?
236
-
237
- resolve if need_resolve?
307
+ @synchronize.call do
308
+ resolve if need_resolve?
309
+ end
238
310
 
239
311
  exceptions = {}
240
312
  result = nil
@@ -243,12 +315,27 @@ module RightSupport::Net
243
315
 
244
316
  loop do
245
317
  if n > 0
246
- do_retry = @options[:retry] || DEFAULT_RETRY_PROC
247
- do_retry = do_retry.call((@ips.nil? || @ips.empty?) ? @endpoints : @ips, n) if do_retry.respond_to?(:call)
248
- break if (do_retry.is_a?(Integer) && n >= do_retry) || [nil, false].include?(do_retry)
318
+ retry_result = nil
319
+ @synchronize.call do
320
+ retry_result = @retry.call((@ips.nil? || @ips.empty?) ? @endpoints : @ips, n)
321
+ end
322
+
323
+ # FIX: this integer result logic is odd but is left for legacy support
324
+ # reasons. technically any retry proc could return integer and invoke
325
+ # this odd side-effect, which was only intended to support :retry as
326
+ # a literal integer. retry proc implementations should now only return
327
+ # boolean to avoid this weirdness. this logic should be removed in v3.
328
+ if retry_result.is_a?(Integer) && n >= retry_result
329
+ retry_result = false
330
+ end
331
+ break unless retry_result
249
332
  end
250
333
 
251
- endpoint, need_health_check = @policy.next
334
+ endpoint = nil
335
+ need_health_check = false
336
+ @synchronize.call do
337
+ endpoint, need_health_check = @policy.next
338
+ end
252
339
  break unless endpoint
253
340
 
254
341
  n += 1
@@ -257,41 +344,57 @@ module RightSupport::Net
257
344
  # Perform health check if necessary. Note that we guard this with a rescue, because the
258
345
  # health check may raise an exception and we want to log the exception info if this happens.
259
346
  if need_health_check
260
- begin
261
- unless @policy.health_check(endpoint)
262
- logger.error "RequestBalancer: health check failed to #{endpoint} because of non-true return value"
263
- next
347
+ hc_result = false
348
+ hc_exception = nil
349
+ @synchronize.call do
350
+ begin
351
+ # note that health-check can update the policy's good/bad state
352
+ # for endpoints.
353
+ hc_result = @policy.health_check(endpoint)
354
+ rescue Exception => e
355
+ hc_exception = e
264
356
  end
265
- rescue Exception => e
266
- logger.error "RequestBalancer: health check failed to #{endpoint} because of #{e.class.name}: #{e.message}"
267
- if fatal_exception?(e)
357
+ end
358
+ if hc_result
359
+ logger.info "RequestBalancer: health check succeeded to #{endpoint}"
360
+ elsif hc_exception
361
+ logger.error "RequestBalancer: health check failed to #{endpoint} because of #{hc_exception.class.name}: #{hc_exception.message}"
362
+ if fatal_exception?(hc_exception)
268
363
  # Fatal exceptions should still raise, even if only during a health check
269
- raise
364
+ raise hc_exception
270
365
  else
271
366
  # Nonfatal exceptions: keep on truckin'
367
+ exceptions[endpoint] ||= []
368
+ exceptions[endpoint] << hc_exception
369
+ debug_exception(hc_exception) if @debug_mode
272
370
  next
273
371
  end
372
+ else
373
+ logger.error "RequestBalancer: health check failed to #{endpoint} because of non-true return value"
374
+ next
274
375
  end
275
-
276
- logger.info "RequestBalancer: health check succeeded to #{endpoint}"
277
376
  end
278
377
 
279
378
  begin
280
- result = yield(endpoint)
281
- @policy.good(endpoint, t0, Time.now)
379
+ result = yield(endpoint)
380
+ @synchronize.call do
381
+ @policy.good(endpoint, t0, Time.now)
382
+ end
282
383
  complete = true
283
384
  break
284
385
  rescue Exception => e
285
386
  if to_raise = handle_exception(endpoint, e, t0)
286
387
  raise(to_raise)
287
388
  else
288
- @policy.bad(endpoint, t0, Time.now)
389
+ @synchronize.call do
390
+ @policy.bad(endpoint, t0, Time.now)
391
+ end
289
392
  exceptions[endpoint] ||= []
290
393
  exceptions[endpoint] << e
394
+ debug_exception(e) if @debug_mode
291
395
  end
292
396
  end
293
-
294
- end
397
+ end # loop
295
398
 
296
399
  return result if complete
297
400
 
@@ -302,15 +405,13 @@ module RightSupport::Net
302
405
  summary = []
303
406
  list.each { |e| summary << e.class }
304
407
  health = stats[endpoint] if stats[endpoint] != 'n/a'
305
- if @resolved_hostnames
306
- hostname = lookup_hostname(endpoint)
408
+ if hostname = lookup_hostname(endpoint)
307
409
  msg << "'#{hostname}' (#{endpoint}#{", "+health if health}) => [#{summary.uniq.join(', ')}]"
308
410
  else
309
411
  msg << "'#{endpoint}' #{"("+health+")" if health} => [#{summary.uniq.join(', ')}]"
310
412
  end
311
413
  end
312
- message = "Request failed after #{n} tries to #{exceptions.keys.size} endpoints: (#{msg.join(', ')})"
313
-
414
+ message = "Request failed after #{n} tries to #{exceptions.size} endpoints: (#{msg.join(', ')})"
314
415
  logger.error "RequestBalancer: #{message}"
315
416
  raise NoResult.new(message, exceptions)
316
417
  end
@@ -332,10 +433,15 @@ module RightSupport::Net
332
433
  #
333
434
  # {2 => "n/a", 1 => "n/a", 3 => "n/a"}
334
435
  def get_stats
335
- stats = {}
336
- @endpoints.each { |endpoint| stats[endpoint] = 'n/a' }
337
- stats = @policy.get_stats if @policy.respond_to?(:get_stats)
338
- stats
436
+ result = nil
437
+ if @policy.respond_to?(:get_stats)
438
+ @synchronize.call do
439
+ result = @policy.get_stats
440
+ end
441
+ else
442
+ result = @endpoints.inject({}) { |h, endpoint| h[endpoint] = 'n/a'; h }
443
+ end
444
+ result
339
445
  end
340
446
 
341
447
  protected
@@ -345,9 +451,13 @@ module RightSupport::Net
345
451
  def handle_exception(endpoint, e, t0)
346
452
  fatal = fatal_exception?(e)
347
453
  duration = sprintf('%.4f', Time.now - t0)
348
- ept = @resolved_hostnames ? "#{lookup_hostname(endpoint)}(#{endpoint})" : "#{endpoint}"
349
- msg = "RequestBalancer: rescued #{fatal ? 'fatal' : 'retryable'} #{e.class.name} " +
350
- "during request to #{ept}: #{e.message} after #{duration} seconds"
454
+ if hostname = lookup_hostname(endpoint)
455
+ ept = "#{hostname}(#{endpoint})"
456
+ else
457
+ ept = endpoint
458
+ end
459
+ msg = "RequestBalancer: rescued #{fatal ? 'fatal' : 'retryable'} #{e.class.name} " +
460
+ "during request to #{ept}: #{e.message} after #{duration} seconds"
351
461
  logger.error msg
352
462
  @options[:on_exception].call(fatal, e, endpoint) if @options[:on_exception]
353
463
 
@@ -385,6 +495,7 @@ module RightSupport::Net
385
495
  @ips = resolved_endpoints
386
496
  @policy.set_endpoints(@ips)
387
497
  @resolved_at = Time.now.to_i
498
+ true
388
499
  end
389
500
 
390
501
  def need_resolve?
@@ -402,6 +513,19 @@ module RightSupport::Net
402
513
  return true if optional && !callable.respond_to?(:call)
403
514
  return callable.respond_to?(:arity) && (callable.arity == arity)
404
515
  end
516
+
517
+ # free-threaded invocation of the provided callback.
518
+ def free_threaded
519
+ yield
520
+ end
521
+
522
+ # logs exception with backtrace truncation for debug purposes only.
523
+ def debug_exception(e)
524
+ if (lines = e.backtrace || []).size > 7
525
+ lines = lines[0, 7] << '...'
526
+ end
527
+ logger.debug((["#{e.class}: #{e.message}"] + lines).join("\n"))
528
+ end
405
529
  end # RequestBalancer
406
530
 
407
531
  end # RightScale