right_support 2.10.1 → 2.11.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/right_support/data/token.rb +51 -0
- data/lib/right_support/data.rb +1 -0
- data/lib/right_support/net/lb/base.rb +96 -0
- data/lib/right_support/net/lb/health_check.rb +22 -46
- data/lib/right_support/net/lb/round_robin.rb +5 -23
- data/lib/right_support/net/lb/sticky.rb +10 -27
- data/lib/right_support/net/lb.rb +2 -1
- data/lib/right_support/net/request_balancer.rb +184 -60
- data/lib/right_support/rack/request_logger.rb +221 -42
- data/lib/right_support/rack/request_tracker.rb +112 -25
- data/right_support.gemspec +11 -6
- data/spec/data/token_spec.rb +21 -0
- data/spec/net/{balancing → lb}/health_check_spec.rb +56 -21
- data/spec/net/{balancing → lb}/round_robin_spec.rb +0 -0
- data/spec/net/{balancing/sticky_policy_spec.rb → lb/sticky_spec.rb} +1 -1
- data/spec/net/request_balancer_spec.rb +161 -57
- data/spec/rack/request_logger_spec.rb +91 -27
- data/spec/rack/request_tracker_spec.rb +111 -1
- metadata +8 -5
@@ -20,6 +20,8 @@
|
|
20
20
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
22
|
|
23
|
+
require 'thread'
|
24
|
+
|
23
25
|
module RightSupport::Net
|
24
26
|
# Raised to indicate the (uncommon) error condition where a RequestBalancer rotated
|
25
27
|
# through EVERY URL in a list without getting a non-nil, non-timeout response.
|
@@ -110,32 +112,55 @@ module RightSupport::Net
|
|
110
112
|
end
|
111
113
|
end
|
112
114
|
|
115
|
+
# no-op health-check
|
113
116
|
DEFAULT_HEALTH_CHECK_PROC = Proc.new do |endpoint|
|
114
117
|
true
|
115
118
|
end
|
116
119
|
|
120
|
+
# debug mode
|
121
|
+
DEFAULT_DEBUG_MODE = ::ENV['DEBUG_MODE'] == 'true'
|
122
|
+
|
123
|
+
# default options
|
117
124
|
DEFAULT_OPTIONS = {
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
125
|
+
:policy => nil,
|
126
|
+
:retry => DEFAULT_RETRY_PROC,
|
127
|
+
:fatal => DEFAULT_FATAL_PROC,
|
128
|
+
:on_exception => nil,
|
129
|
+
:health_check => DEFAULT_HEALTH_CHECK_PROC,
|
130
|
+
:resolve => nil, # not resolving DNS to IP(s) by default; rely on consul, etc.
|
131
|
+
:thread_safe => false, # not thread-safe by default,
|
132
|
+
:debug_mode => nil # infer from DEBUG_MODE
|
123
133
|
}
|
124
134
|
|
125
135
|
attr_reader :endpoints
|
126
136
|
|
127
137
|
# Return the actual, potentially DNS-resolved endpoints that are used for requests.
|
128
|
-
# If the balancer was constructed with :resolve=>
|
138
|
+
# If the balancer was constructed with :resolve=>nil, return self.endpoints.
|
129
139
|
#
|
130
140
|
# @return [Array] collection of endpoints
|
131
141
|
def resolved_endpoints
|
132
|
-
|
142
|
+
@synchronize.call do
|
143
|
+
(@ips.nil? || @ips.empty?) ? @endpoints : @ips
|
144
|
+
end
|
133
145
|
end
|
134
146
|
|
135
147
|
def self.request(endpoints, options={}, &block)
|
136
148
|
new(endpoints, options).request(&block)
|
137
149
|
end
|
138
150
|
|
151
|
+
# encapsulates exponential backoff/retry logic in a callback for use as the
|
152
|
+
# :retry option to request balancer.
|
153
|
+
def self.backoff_retry_callback(max_attempts)
|
154
|
+
lambda do |_, n|
|
155
|
+
if n < max_attempts
|
156
|
+
sleep 2 ** n
|
157
|
+
true
|
158
|
+
else
|
159
|
+
false
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
139
164
|
# Constructor. Accepts a sequence of request endpoints which it shuffles randomly at
|
140
165
|
# creation time; however, the ordering of the endpoints does not change thereafter
|
141
166
|
# and the sequence is tried from the beginning for every request.
|
@@ -147,24 +172,45 @@ module RightSupport::Net
|
|
147
172
|
# :resolve option allows the balancer to treat each backing server as a distinct
|
148
173
|
# endpoint with its own health state, etc.
|
149
174
|
#
|
150
|
-
#
|
151
|
-
#
|
152
|
-
#
|
153
|
-
#
|
154
|
-
#
|
155
|
-
#
|
156
|
-
#
|
157
|
-
#
|
158
|
-
#
|
159
|
-
#
|
160
|
-
#
|
161
|
-
#
|
162
|
-
#
|
163
|
-
#
|
175
|
+
# @param [String|Array] endpoints (e.g. HTTP URLs) for balancing
|
176
|
+
# @param [Hash] options
|
177
|
+
# @option options [Integer|Proc] :retry callback to determine whether to
|
178
|
+
# keep retrying; default is to try each endpoint at most once. can also be
|
179
|
+
# passed as an integer which provides a set number of attempts with no
|
180
|
+
# backoff. for retry with backoff use the backoff_retry_callback method.
|
181
|
+
# @option options [Integer] :resolve as a timeout in seconds to re-resolve
|
182
|
+
# DNS hostnames of endpoints to IP addresses; default is nil (never).
|
183
|
+
# @option options [TrueClass|FalseClass] :thread_safe as true to guard the
|
184
|
+
# balancer state with a mutex, false to be free-threaded (default). Ruby is
|
185
|
+
# generally thread-safe because real concurrency does not exist and/or apps
|
186
|
+
# consistently use the Rainbows gem to ensure one process per API handler.
|
187
|
+
# @option options [TrueClass|FalseClass] :debug_mode as true to log
|
188
|
+
# additional error information as failures occur, false to only log error
|
189
|
+
# summary after all retries fail or nil to infer from DEBUG_MODE (default).
|
190
|
+
# @option options [Proc] :fatal callback to determine whether an exception
|
191
|
+
# is fatal and should not be retried.
|
192
|
+
# @option options [Proc] :on_exception notification hook that accepts three
|
193
|
+
# arguments: whether the exception is fatal, the exception itself, and the
|
194
|
+
# endpoint for which the exception happened
|
195
|
+
# @option options [Proc] :health_check callback that allows balancer to
|
196
|
+
# check an endpoint health; should raise an exception if the endpoint is
|
197
|
+
# not healthy
|
198
|
+
# @option options [Proc] :on_health_change callback that is made when the
|
199
|
+
# overall health of the endpoints transition to a different level; its
|
200
|
+
# single argument contains the new minimum health level
|
164
201
|
def initialize(endpoints, options={})
|
165
202
|
@options = DEFAULT_OPTIONS.merge(options)
|
166
203
|
|
167
|
-
|
204
|
+
# provide thread-safety only when specified.
|
205
|
+
if @options[:thread_safe]
|
206
|
+
@mutex = ::Mutex.new
|
207
|
+
@synchronize = @mutex.method(:synchronize)
|
208
|
+
else
|
209
|
+
@synchronize = self.method(:free_threaded)
|
210
|
+
end
|
211
|
+
|
212
|
+
endpoints = Array(endpoints)
|
213
|
+
if endpoints.empty?
|
168
214
|
raise ArgumentError, "Must specify at least one endpoint"
|
169
215
|
end
|
170
216
|
|
@@ -172,11 +218,32 @@ module RightSupport::Net
|
|
172
218
|
@policy = @options[:policy]
|
173
219
|
@policy = @policy.new(options) if @policy.is_a?(Class)
|
174
220
|
|
221
|
+
if (@debug_mode = @options.delete(:debug_mode)).nil?
|
222
|
+
@debug_mode = DEFAULT_DEBUG_MODE
|
223
|
+
end
|
224
|
+
|
225
|
+
# convert retry counter to a simple retry callback, if necessary.
|
226
|
+
@retry = @options.delete(:retry) || DEFAULT_RETRY_PROC
|
227
|
+
unless @retry.kind_of?(Proc)
|
228
|
+
# ensure that the count is captured by callback for safety.
|
229
|
+
@retry = Integer(@retry)
|
230
|
+
retry_proc = lambda do |max_attempts|
|
231
|
+
lambda do |ep, n|
|
232
|
+
n < max_attempts
|
233
|
+
end
|
234
|
+
end.call(@retry)
|
235
|
+
@retry = retry_proc # and now the type always Proc
|
236
|
+
end
|
237
|
+
|
175
238
|
unless test_policy_duck_type(@policy)
|
176
239
|
raise ArgumentError, ":policy must be a class/object that responds to :next, :good and :bad"
|
177
240
|
end
|
178
241
|
|
179
|
-
|
242
|
+
# note @retry is now always defined as a callback. the legacy code always
|
243
|
+
# had a default retry but it could have been defined later during actual
|
244
|
+
# execution instead of being concretely defined on initialization. now it
|
245
|
+
# is always defined on initialization.
|
246
|
+
unless test_callable_arity(@retry, 2, false)
|
180
247
|
raise ArgumentError, ":retry callback must accept two parameters"
|
181
248
|
end
|
182
249
|
|
@@ -209,13 +276,17 @@ module RightSupport::Net
|
|
209
276
|
|
210
277
|
# Un-resolve an IP address.
|
211
278
|
#
|
212
|
-
#
|
213
|
-
# endpoint:: a network endpoint (e.g. HTTP URL) to be un-resolved
|
279
|
+
# @param [String] endpoint (e.g. HTTP URL) to be un-resolved
|
214
280
|
#
|
215
|
-
#
|
216
|
-
# Return the first hostname that resolved to the IP (there should only ever be one)
|
281
|
+
# @return [String] the first hostname that resolved to the IP (there should be at most one) or nil
|
217
282
|
def lookup_hostname(endpoint)
|
218
|
-
|
283
|
+
result = nil
|
284
|
+
@synchronize.call do
|
285
|
+
if resolved_hostname = @resolved_hostnames && @resolved_hostnames.select{ |k, v| v.addresses.include?(endpoint) }
|
286
|
+
result = resolved_hostname.shift[0]
|
287
|
+
end
|
288
|
+
end
|
289
|
+
result
|
219
290
|
end
|
220
291
|
|
221
292
|
# Perform a request.
|
@@ -233,8 +304,9 @@ module RightSupport::Net
|
|
233
304
|
# Return the first non-nil value provided by the block.
|
234
305
|
def request
|
235
306
|
raise ArgumentError, "Must call this method with a block" unless block_given?
|
236
|
-
|
237
|
-
|
307
|
+
@synchronize.call do
|
308
|
+
resolve if need_resolve?
|
309
|
+
end
|
238
310
|
|
239
311
|
exceptions = {}
|
240
312
|
result = nil
|
@@ -243,12 +315,27 @@ module RightSupport::Net
|
|
243
315
|
|
244
316
|
loop do
|
245
317
|
if n > 0
|
246
|
-
|
247
|
-
|
248
|
-
|
318
|
+
retry_result = nil
|
319
|
+
@synchronize.call do
|
320
|
+
retry_result = @retry.call((@ips.nil? || @ips.empty?) ? @endpoints : @ips, n)
|
321
|
+
end
|
322
|
+
|
323
|
+
# FIX: this integer result logic is odd but is left for legacy support
|
324
|
+
# reasons. technically any retry proc could return integer and invoke
|
325
|
+
# this odd side-effect, which was only intended to support :retry as
|
326
|
+
# a literal integer. retry proc implementations should now only return
|
327
|
+
# boolean to avoid this weirdness. this logic should be removed in v3.
|
328
|
+
if retry_result.is_a?(Integer) && n >= retry_result
|
329
|
+
retry_result = false
|
330
|
+
end
|
331
|
+
break unless retry_result
|
249
332
|
end
|
250
333
|
|
251
|
-
endpoint
|
334
|
+
endpoint = nil
|
335
|
+
need_health_check = false
|
336
|
+
@synchronize.call do
|
337
|
+
endpoint, need_health_check = @policy.next
|
338
|
+
end
|
252
339
|
break unless endpoint
|
253
340
|
|
254
341
|
n += 1
|
@@ -257,41 +344,57 @@ module RightSupport::Net
|
|
257
344
|
# Perform health check if necessary. Note that we guard this with a rescue, because the
|
258
345
|
# health check may raise an exception and we want to log the exception info if this happens.
|
259
346
|
if need_health_check
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
347
|
+
hc_result = false
|
348
|
+
hc_exception = nil
|
349
|
+
@synchronize.call do
|
350
|
+
begin
|
351
|
+
# note that health-check can update the policy's good/bad state
|
352
|
+
# for endpoints.
|
353
|
+
hc_result = @policy.health_check(endpoint)
|
354
|
+
rescue Exception => e
|
355
|
+
hc_exception = e
|
264
356
|
end
|
265
|
-
|
266
|
-
|
267
|
-
|
357
|
+
end
|
358
|
+
if hc_result
|
359
|
+
logger.info "RequestBalancer: health check succeeded to #{endpoint}"
|
360
|
+
elsif hc_exception
|
361
|
+
logger.error "RequestBalancer: health check failed to #{endpoint} because of #{hc_exception.class.name}: #{hc_exception.message}"
|
362
|
+
if fatal_exception?(hc_exception)
|
268
363
|
# Fatal exceptions should still raise, even if only during a health check
|
269
|
-
raise
|
364
|
+
raise hc_exception
|
270
365
|
else
|
271
366
|
# Nonfatal exceptions: keep on truckin'
|
367
|
+
exceptions[endpoint] ||= []
|
368
|
+
exceptions[endpoint] << hc_exception
|
369
|
+
debug_exception(hc_exception) if @debug_mode
|
272
370
|
next
|
273
371
|
end
|
372
|
+
else
|
373
|
+
logger.error "RequestBalancer: health check failed to #{endpoint} because of non-true return value"
|
374
|
+
next
|
274
375
|
end
|
275
|
-
|
276
|
-
logger.info "RequestBalancer: health check succeeded to #{endpoint}"
|
277
376
|
end
|
278
377
|
|
279
378
|
begin
|
280
|
-
result
|
281
|
-
@
|
379
|
+
result = yield(endpoint)
|
380
|
+
@synchronize.call do
|
381
|
+
@policy.good(endpoint, t0, Time.now)
|
382
|
+
end
|
282
383
|
complete = true
|
283
384
|
break
|
284
385
|
rescue Exception => e
|
285
386
|
if to_raise = handle_exception(endpoint, e, t0)
|
286
387
|
raise(to_raise)
|
287
388
|
else
|
288
|
-
@
|
389
|
+
@synchronize.call do
|
390
|
+
@policy.bad(endpoint, t0, Time.now)
|
391
|
+
end
|
289
392
|
exceptions[endpoint] ||= []
|
290
393
|
exceptions[endpoint] << e
|
394
|
+
debug_exception(e) if @debug_mode
|
291
395
|
end
|
292
396
|
end
|
293
|
-
|
294
|
-
end
|
397
|
+
end # loop
|
295
398
|
|
296
399
|
return result if complete
|
297
400
|
|
@@ -302,15 +405,13 @@ module RightSupport::Net
|
|
302
405
|
summary = []
|
303
406
|
list.each { |e| summary << e.class }
|
304
407
|
health = stats[endpoint] if stats[endpoint] != 'n/a'
|
305
|
-
if
|
306
|
-
hostname = lookup_hostname(endpoint)
|
408
|
+
if hostname = lookup_hostname(endpoint)
|
307
409
|
msg << "'#{hostname}' (#{endpoint}#{", "+health if health}) => [#{summary.uniq.join(', ')}]"
|
308
410
|
else
|
309
411
|
msg << "'#{endpoint}' #{"("+health+")" if health} => [#{summary.uniq.join(', ')}]"
|
310
412
|
end
|
311
413
|
end
|
312
|
-
message = "Request failed after #{n} tries to #{exceptions.
|
313
|
-
|
414
|
+
message = "Request failed after #{n} tries to #{exceptions.size} endpoints: (#{msg.join(', ')})"
|
314
415
|
logger.error "RequestBalancer: #{message}"
|
315
416
|
raise NoResult.new(message, exceptions)
|
316
417
|
end
|
@@ -332,10 +433,15 @@ module RightSupport::Net
|
|
332
433
|
#
|
333
434
|
# {2 => "n/a", 1 => "n/a", 3 => "n/a"}
|
334
435
|
def get_stats
|
335
|
-
|
336
|
-
@
|
337
|
-
|
338
|
-
|
436
|
+
result = nil
|
437
|
+
if @policy.respond_to?(:get_stats)
|
438
|
+
@synchronize.call do
|
439
|
+
result = @policy.get_stats
|
440
|
+
end
|
441
|
+
else
|
442
|
+
result = @endpoints.inject({}) { |h, endpoint| h[endpoint] = 'n/a'; h }
|
443
|
+
end
|
444
|
+
result
|
339
445
|
end
|
340
446
|
|
341
447
|
protected
|
@@ -345,9 +451,13 @@ module RightSupport::Net
|
|
345
451
|
def handle_exception(endpoint, e, t0)
|
346
452
|
fatal = fatal_exception?(e)
|
347
453
|
duration = sprintf('%.4f', Time.now - t0)
|
348
|
-
|
349
|
-
|
350
|
-
|
454
|
+
if hostname = lookup_hostname(endpoint)
|
455
|
+
ept = "#{hostname}(#{endpoint})"
|
456
|
+
else
|
457
|
+
ept = endpoint
|
458
|
+
end
|
459
|
+
msg = "RequestBalancer: rescued #{fatal ? 'fatal' : 'retryable'} #{e.class.name} " +
|
460
|
+
"during request to #{ept}: #{e.message} after #{duration} seconds"
|
351
461
|
logger.error msg
|
352
462
|
@options[:on_exception].call(fatal, e, endpoint) if @options[:on_exception]
|
353
463
|
|
@@ -385,6 +495,7 @@ module RightSupport::Net
|
|
385
495
|
@ips = resolved_endpoints
|
386
496
|
@policy.set_endpoints(@ips)
|
387
497
|
@resolved_at = Time.now.to_i
|
498
|
+
true
|
388
499
|
end
|
389
500
|
|
390
501
|
def need_resolve?
|
@@ -402,6 +513,19 @@ module RightSupport::Net
|
|
402
513
|
return true if optional && !callable.respond_to?(:call)
|
403
514
|
return callable.respond_to?(:arity) && (callable.arity == arity)
|
404
515
|
end
|
516
|
+
|
517
|
+
# free-threaded invocation of the provided callback.
|
518
|
+
def free_threaded
|
519
|
+
yield
|
520
|
+
end
|
521
|
+
|
522
|
+
# logs exception with backtrace truncation for debug purposes only.
|
523
|
+
def debug_exception(e)
|
524
|
+
if (lines = e.backtrace || []).size > 7
|
525
|
+
lines = lines[0, 7] << '...'
|
526
|
+
end
|
527
|
+
logger.debug((["#{e.class}: #{e.message}"] + lines).join("\n"))
|
528
|
+
end
|
405
529
|
end # RequestBalancer
|
406
530
|
|
407
531
|
end # RightScale
|