right_support 2.10.1 → 2.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/right_support/data/token.rb +51 -0
- data/lib/right_support/data.rb +1 -0
- data/lib/right_support/net/lb/base.rb +96 -0
- data/lib/right_support/net/lb/health_check.rb +22 -46
- data/lib/right_support/net/lb/round_robin.rb +5 -23
- data/lib/right_support/net/lb/sticky.rb +10 -27
- data/lib/right_support/net/lb.rb +2 -1
- data/lib/right_support/net/request_balancer.rb +184 -60
- data/lib/right_support/rack/request_logger.rb +221 -42
- data/lib/right_support/rack/request_tracker.rb +112 -25
- data/right_support.gemspec +11 -6
- data/spec/data/token_spec.rb +21 -0
- data/spec/net/{balancing → lb}/health_check_spec.rb +56 -21
- data/spec/net/{balancing → lb}/round_robin_spec.rb +0 -0
- data/spec/net/{balancing/sticky_policy_spec.rb → lb/sticky_spec.rb} +1 -1
- data/spec/net/request_balancer_spec.rb +161 -57
- data/spec/rack/request_logger_spec.rb +91 -27
- data/spec/rack/request_tracker_spec.rb +111 -1
- metadata +8 -5
@@ -20,6 +20,8 @@
|
|
20
20
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
|
23
|
+
require 'thread'
|
24
|
+
|
23
25
|
module RightSupport::Net
|
24
26
|
# Raised to indicate the (uncommon) error condition where a RequestBalancer rotated
|
25
27
|
# through EVERY URL in a list without getting a non-nil, non-timeout response.
|
@@ -110,32 +112,55 @@ module RightSupport::Net
|
|
110
112
|
end
|
111
113
|
end
|
112
114
|
|
115
|
+
# no-op health-check
|
113
116
|
DEFAULT_HEALTH_CHECK_PROC = Proc.new do |endpoint|
|
114
117
|
true
|
115
118
|
end
|
116
119
|
|
120
|
+
# debug mode
|
121
|
+
DEFAULT_DEBUG_MODE = ::ENV['DEBUG_MODE'] == 'true'
|
122
|
+
|
123
|
+
# default options
|
117
124
|
DEFAULT_OPTIONS = {
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
125
|
+
:policy => nil,
|
126
|
+
:retry => DEFAULT_RETRY_PROC,
|
127
|
+
:fatal => DEFAULT_FATAL_PROC,
|
128
|
+
:on_exception => nil,
|
129
|
+
:health_check => DEFAULT_HEALTH_CHECK_PROC,
|
130
|
+
:resolve => nil, # not resolving DNS to IP(s) by default; rely on consul, etc.
|
131
|
+
:thread_safe => false, # not thread-safe by default,
|
132
|
+
:debug_mode => nil # infer from DEBUG_MODE
|
123
133
|
}
|
124
134
|
|
125
135
|
attr_reader :endpoints
|
126
136
|
|
127
137
|
# Return the actual, potentially DNS-resolved endpoints that are used for requests.
|
128
|
-
# If the balancer was constructed with :resolve=>
|
138
|
+
# If the balancer was constructed with :resolve=>nil, return self.endpoints.
|
129
139
|
#
|
130
140
|
# @return [Array] collection of endpoints
|
131
141
|
def resolved_endpoints
|
132
|
-
|
142
|
+
@synchronize.call do
|
143
|
+
(@ips.nil? || @ips.empty?) ? @endpoints : @ips
|
144
|
+
end
|
133
145
|
end
|
134
146
|
|
135
147
|
def self.request(endpoints, options={}, &block)
|
136
148
|
new(endpoints, options).request(&block)
|
137
149
|
end
|
138
150
|
|
151
|
+
# encapsulates exponential backoff/retry logic in a callback for use as the
|
152
|
+
# :retry option to request balancer.
|
153
|
+
def self.backoff_retry_callback(max_attempts)
|
154
|
+
lambda do |_, n|
|
155
|
+
if n < max_attempts
|
156
|
+
sleep 2 ** n
|
157
|
+
true
|
158
|
+
else
|
159
|
+
false
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
139
164
|
# Constructor. Accepts a sequence of request endpoints which it shuffles randomly at
|
140
165
|
# creation time; however, the ordering of the endpoints does not change thereafter
|
141
166
|
# and the sequence is tried from the beginning for every request.
|
@@ -147,24 +172,45 @@ module RightSupport::Net
|
|
147
172
|
# :resolve option allows the balancer to treat each backing server as a distinct
|
148
173
|
# endpoint with its own health state, etc.
|
149
174
|
#
|
150
|
-
#
|
151
|
-
#
|
152
|
-
#
|
153
|
-
#
|
154
|
-
#
|
155
|
-
#
|
156
|
-
#
|
157
|
-
#
|
158
|
-
#
|
159
|
-
#
|
160
|
-
#
|
161
|
-
#
|
162
|
-
#
|
163
|
-
#
|
175
|
+
# @param [String|Array] endpoints (e.g. HTTP URLs) for balancing
|
176
|
+
# @param [Hash] options
|
177
|
+
# @option options [Integer|Proc] :retry callback to determine whether to
|
178
|
+
# keep retrying; default is to try each endpoint at most once. can also be
|
179
|
+
# passed as an integer which provides a set number of attempts with no
|
180
|
+
# backoff. for retry with backoff use the backoff_retry_callback method.
|
181
|
+
# @option options [Integer] :resolve as a timeout in seconds to re-resolve
|
182
|
+
# DNS hostnames of endpoints to IP addresses; default is nil (never).
|
183
|
+
# @option options [TrueClass|FalseClass] :thread_safe as true to guard the
|
184
|
+
# balancer state with a mutex, false to be free-threaded (default). Ruby is
|
185
|
+
# generally thread-safe because real concurrency does not exist and/or apps
|
186
|
+
# consistently use the Rainbows gem to ensure one process per API handler.
|
187
|
+
# @option options [TrueClass|FalseClass] :debug_mode as true to log
|
188
|
+
# additional error information as failures occur, false to only log error
|
189
|
+
# summary after all retries fail or nil to infer from DEBUG_MODE (default).
|
190
|
+
# @option options [Proc] :fatal callback to determine whether an exception
|
191
|
+
# is fatal and should not be retried.
|
192
|
+
# @option options [Proc] :on_exception notification hook that accepts three
|
193
|
+
# arguments: whether the exception is fatal, the exception itself, and the
|
194
|
+
# endpoint for which the exception happened
|
195
|
+
# @option options [Proc] :health_check callback that allows balancer to
|
196
|
+
# check an endpoint health; should raise an exception if the endpoint is
|
197
|
+
# not healthy
|
198
|
+
# @option options [Proc] :on_health_change callback that is made when the
|
199
|
+
# overall health of the endpoints transition to a different level; its
|
200
|
+
# single argument contains the new minimum health level
|
164
201
|
def initialize(endpoints, options={})
|
165
202
|
@options = DEFAULT_OPTIONS.merge(options)
|
166
203
|
|
167
|
-
|
204
|
+
# provide thread-safety only when specified.
|
205
|
+
if @options[:thread_safe]
|
206
|
+
@mutex = ::Mutex.new
|
207
|
+
@synchronize = @mutex.method(:synchronize)
|
208
|
+
else
|
209
|
+
@synchronize = self.method(:free_threaded)
|
210
|
+
end
|
211
|
+
|
212
|
+
endpoints = Array(endpoints)
|
213
|
+
if endpoints.empty?
|
168
214
|
raise ArgumentError, "Must specify at least one endpoint"
|
169
215
|
end
|
170
216
|
|
@@ -172,11 +218,32 @@ module RightSupport::Net
|
|
172
218
|
@policy = @options[:policy]
|
173
219
|
@policy = @policy.new(options) if @policy.is_a?(Class)
|
174
220
|
|
221
|
+
if (@debug_mode = @options.delete(:debug_mode)).nil?
|
222
|
+
@debug_mode = DEFAULT_DEBUG_MODE
|
223
|
+
end
|
224
|
+
|
225
|
+
# convert retry counter to a simple retry callback, if necessary.
|
226
|
+
@retry = @options.delete(:retry) || DEFAULT_RETRY_PROC
|
227
|
+
unless @retry.kind_of?(Proc)
|
228
|
+
# ensure that the count is captured by callback for safety.
|
229
|
+
@retry = Integer(@retry)
|
230
|
+
retry_proc = lambda do |max_attempts|
|
231
|
+
lambda do |ep, n|
|
232
|
+
n < max_attempts
|
233
|
+
end
|
234
|
+
end.call(@retry)
|
235
|
+
@retry = retry_proc # and now the type always Proc
|
236
|
+
end
|
237
|
+
|
175
238
|
unless test_policy_duck_type(@policy)
|
176
239
|
raise ArgumentError, ":policy must be a class/object that responds to :next, :good and :bad"
|
177
240
|
end
|
178
241
|
|
179
|
-
|
242
|
+
# note @retry is now always defined as a callback. the legacy code always
|
243
|
+
# had a default retry but it could have been defined later during actual
|
244
|
+
# execution instead of being concretely defined on initialization. now it
|
245
|
+
# is always defined on initialization.
|
246
|
+
unless test_callable_arity(@retry, 2, false)
|
180
247
|
raise ArgumentError, ":retry callback must accept two parameters"
|
181
248
|
end
|
182
249
|
|
@@ -209,13 +276,17 @@ module RightSupport::Net
|
|
209
276
|
|
210
277
|
# Un-resolve an IP address.
|
211
278
|
#
|
212
|
-
#
|
213
|
-
# endpoint:: a network endpoint (e.g. HTTP URL) to be un-resolved
|
279
|
+
# @param [String] endpoint (e.g. HTTP URL) to be un-resolved
|
214
280
|
#
|
215
|
-
#
|
216
|
-
# Return the first hostname that resolved to the IP (there should only ever be one)
|
281
|
+
# @return [String] the first hostname that resolved to the IP (there should be at most one) or nil
|
217
282
|
def lookup_hostname(endpoint)
|
218
|
-
|
283
|
+
result = nil
|
284
|
+
@synchronize.call do
|
285
|
+
if resolved_hostname = @resolved_hostnames && @resolved_hostnames.select{ |k, v| v.addresses.include?(endpoint) }
|
286
|
+
result = resolved_hostname.shift[0]
|
287
|
+
end
|
288
|
+
end
|
289
|
+
result
|
219
290
|
end
|
220
291
|
|
221
292
|
# Perform a request.
|
@@ -233,8 +304,9 @@ module RightSupport::Net
|
|
233
304
|
# Return the first non-nil value provided by the block.
|
234
305
|
def request
|
235
306
|
raise ArgumentError, "Must call this method with a block" unless block_given?
|
236
|
-
|
237
|
-
|
307
|
+
@synchronize.call do
|
308
|
+
resolve if need_resolve?
|
309
|
+
end
|
238
310
|
|
239
311
|
exceptions = {}
|
240
312
|
result = nil
|
@@ -243,12 +315,27 @@ module RightSupport::Net
|
|
243
315
|
|
244
316
|
loop do
|
245
317
|
if n > 0
|
246
|
-
|
247
|
-
|
248
|
-
|
318
|
+
retry_result = nil
|
319
|
+
@synchronize.call do
|
320
|
+
retry_result = @retry.call((@ips.nil? || @ips.empty?) ? @endpoints : @ips, n)
|
321
|
+
end
|
322
|
+
|
323
|
+
# FIX: this integer result logic is odd but is left for legacy support
|
324
|
+
# reasons. technically any retry proc could return integer and invoke
|
325
|
+
# this odd side-effect, which was only intended to support :retry as
|
326
|
+
# a literal integer. retry proc implementations should now only return
|
327
|
+
# boolean to avoid this weirdness. this logic should be removed in v3.
|
328
|
+
if retry_result.is_a?(Integer) && n >= retry_result
|
329
|
+
retry_result = false
|
330
|
+
end
|
331
|
+
break unless retry_result
|
249
332
|
end
|
250
333
|
|
251
|
-
endpoint
|
334
|
+
endpoint = nil
|
335
|
+
need_health_check = false
|
336
|
+
@synchronize.call do
|
337
|
+
endpoint, need_health_check = @policy.next
|
338
|
+
end
|
252
339
|
break unless endpoint
|
253
340
|
|
254
341
|
n += 1
|
@@ -257,41 +344,57 @@ module RightSupport::Net
|
|
257
344
|
# Perform health check if necessary. Note that we guard this with a rescue, because the
|
258
345
|
# health check may raise an exception and we want to log the exception info if this happens.
|
259
346
|
if need_health_check
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
347
|
+
hc_result = false
|
348
|
+
hc_exception = nil
|
349
|
+
@synchronize.call do
|
350
|
+
begin
|
351
|
+
# note that health-check can update the policy's good/bad state
|
352
|
+
# for endpoints.
|
353
|
+
hc_result = @policy.health_check(endpoint)
|
354
|
+
rescue Exception => e
|
355
|
+
hc_exception = e
|
264
356
|
end
|
265
|
-
|
266
|
-
|
267
|
-
|
357
|
+
end
|
358
|
+
if hc_result
|
359
|
+
logger.info "RequestBalancer: health check succeeded to #{endpoint}"
|
360
|
+
elsif hc_exception
|
361
|
+
logger.error "RequestBalancer: health check failed to #{endpoint} because of #{hc_exception.class.name}: #{hc_exception.message}"
|
362
|
+
if fatal_exception?(hc_exception)
|
268
363
|
# Fatal exceptions should still raise, even if only during a health check
|
269
|
-
raise
|
364
|
+
raise hc_exception
|
270
365
|
else
|
271
366
|
# Nonfatal exceptions: keep on truckin'
|
367
|
+
exceptions[endpoint] ||= []
|
368
|
+
exceptions[endpoint] << hc_exception
|
369
|
+
debug_exception(hc_exception) if @debug_mode
|
272
370
|
next
|
273
371
|
end
|
372
|
+
else
|
373
|
+
logger.error "RequestBalancer: health check failed to #{endpoint} because of non-true return value"
|
374
|
+
next
|
274
375
|
end
|
275
|
-
|
276
|
-
logger.info "RequestBalancer: health check succeeded to #{endpoint}"
|
277
376
|
end
|
278
377
|
|
279
378
|
begin
|
280
|
-
result
|
281
|
-
@
|
379
|
+
result = yield(endpoint)
|
380
|
+
@synchronize.call do
|
381
|
+
@policy.good(endpoint, t0, Time.now)
|
382
|
+
end
|
282
383
|
complete = true
|
283
384
|
break
|
284
385
|
rescue Exception => e
|
285
386
|
if to_raise = handle_exception(endpoint, e, t0)
|
286
387
|
raise(to_raise)
|
287
388
|
else
|
288
|
-
@
|
389
|
+
@synchronize.call do
|
390
|
+
@policy.bad(endpoint, t0, Time.now)
|
391
|
+
end
|
289
392
|
exceptions[endpoint] ||= []
|
290
393
|
exceptions[endpoint] << e
|
394
|
+
debug_exception(e) if @debug_mode
|
291
395
|
end
|
292
396
|
end
|
293
|
-
|
294
|
-
end
|
397
|
+
end # loop
|
295
398
|
|
296
399
|
return result if complete
|
297
400
|
|
@@ -302,15 +405,13 @@ module RightSupport::Net
|
|
302
405
|
summary = []
|
303
406
|
list.each { |e| summary << e.class }
|
304
407
|
health = stats[endpoint] if stats[endpoint] != 'n/a'
|
305
|
-
if
|
306
|
-
hostname = lookup_hostname(endpoint)
|
408
|
+
if hostname = lookup_hostname(endpoint)
|
307
409
|
msg << "'#{hostname}' (#{endpoint}#{", "+health if health}) => [#{summary.uniq.join(', ')}]"
|
308
410
|
else
|
309
411
|
msg << "'#{endpoint}' #{"("+health+")" if health} => [#{summary.uniq.join(', ')}]"
|
310
412
|
end
|
311
413
|
end
|
312
|
-
message = "Request failed after #{n} tries to #{exceptions.
|
313
|
-
|
414
|
+
message = "Request failed after #{n} tries to #{exceptions.size} endpoints: (#{msg.join(', ')})"
|
314
415
|
logger.error "RequestBalancer: #{message}"
|
315
416
|
raise NoResult.new(message, exceptions)
|
316
417
|
end
|
@@ -332,10 +433,15 @@ module RightSupport::Net
|
|
332
433
|
#
|
333
434
|
# {2 => "n/a", 1 => "n/a", 3 => "n/a"}
|
334
435
|
def get_stats
|
335
|
-
|
336
|
-
@
|
337
|
-
|
338
|
-
|
436
|
+
result = nil
|
437
|
+
if @policy.respond_to?(:get_stats)
|
438
|
+
@synchronize.call do
|
439
|
+
result = @policy.get_stats
|
440
|
+
end
|
441
|
+
else
|
442
|
+
result = @endpoints.inject({}) { |h, endpoint| h[endpoint] = 'n/a'; h }
|
443
|
+
end
|
444
|
+
result
|
339
445
|
end
|
340
446
|
|
341
447
|
protected
|
@@ -345,9 +451,13 @@ module RightSupport::Net
|
|
345
451
|
def handle_exception(endpoint, e, t0)
|
346
452
|
fatal = fatal_exception?(e)
|
347
453
|
duration = sprintf('%.4f', Time.now - t0)
|
348
|
-
|
349
|
-
|
350
|
-
|
454
|
+
if hostname = lookup_hostname(endpoint)
|
455
|
+
ept = "#{hostname}(#{endpoint})"
|
456
|
+
else
|
457
|
+
ept = endpoint
|
458
|
+
end
|
459
|
+
msg = "RequestBalancer: rescued #{fatal ? 'fatal' : 'retryable'} #{e.class.name} " +
|
460
|
+
"during request to #{ept}: #{e.message} after #{duration} seconds"
|
351
461
|
logger.error msg
|
352
462
|
@options[:on_exception].call(fatal, e, endpoint) if @options[:on_exception]
|
353
463
|
|
@@ -385,6 +495,7 @@ module RightSupport::Net
|
|
385
495
|
@ips = resolved_endpoints
|
386
496
|
@policy.set_endpoints(@ips)
|
387
497
|
@resolved_at = Time.now.to_i
|
498
|
+
true
|
388
499
|
end
|
389
500
|
|
390
501
|
def need_resolve?
|
@@ -402,6 +513,19 @@ module RightSupport::Net
|
|
402
513
|
return true if optional && !callable.respond_to?(:call)
|
403
514
|
return callable.respond_to?(:arity) && (callable.arity == arity)
|
404
515
|
end
|
516
|
+
|
517
|
+
# free-threaded invocation of the provided callback.
|
518
|
+
def free_threaded
|
519
|
+
yield
|
520
|
+
end
|
521
|
+
|
522
|
+
# logs exception with backtrace truncation for debug purposes only.
|
523
|
+
def debug_exception(e)
|
524
|
+
if (lines = e.backtrace || []).size > 7
|
525
|
+
lines = lines[0, 7] << '...'
|
526
|
+
end
|
527
|
+
logger.debug((["#{e.class}: #{e.message}"] + lines).join("\n"))
|
528
|
+
end
|
405
529
|
end # RequestBalancer
|
406
530
|
|
407
531
|
end # RightScale
|