right_agent 2.0.8-x86-mingw32 → 2.1.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -71,18 +71,23 @@ module RightScale
71
71
  # Default time to wait for an event or to ping WebSocket
72
72
  DEFAULT_LISTEN_TIMEOUT = 60
73
73
 
74
+ # Maximum repeated listen failures at which point give up listening
75
+ MAX_LISTEN_FAILURES = 10
76
+
74
77
  # Create RightNet router client
75
78
  #
76
79
  # @param [AuthClient] auth_client providing authorization session for HTTP requests
77
80
  #
78
81
  # @option options [Numeric] :open_timeout maximum wait for connection; defaults to DEFAULT_OPEN_TIMEOUT
79
82
  # @option options [Numeric] :request_timeout maximum wait for response; defaults to DEFAULT_REQUEST_TIMEOUT
80
- # @option options [Numeric] :listen_timeout maximum wait for event; defaults to DEFAULT_POLL_TIMEOUT
83
+ # @option options [Numeric] :listen_timeout maximum wait for event; defaults to DEFAULT_LISTEN_TIMEOUT
81
84
  # @option options [Boolean] :long_polling_only never attempt to create a WebSocket, always long-polling instead
82
85
  # @option options [Numeric] :retry_timeout maximum before stop retrying; defaults to DEFAULT_RETRY_TIMEOUT
83
86
  # @option options [Array] :retry_intervals between successive retries; defaults to DEFAULT_RETRY_INTERVALS
84
87
  # @option options [Boolean] :retry_enabled for requests that fail to connect or that return a retry result
85
88
  # @option options [Numeric] :reconnect_interval for reconnect attempts after lose connectivity
89
+ # @option options [Boolean] :non_blocking i/o is to be used for HTTP requests by applying
90
+ # EM::HttpRequest and fibers instead of RestClient; requests remain synchronous
86
91
  # @option options [Proc] :exception_callback for unexpected exceptions
87
92
  #
88
93
  # @raise [ArgumentError] auth client does not support this client type
@@ -199,8 +204,6 @@ module RightScale
199
204
  end
200
205
 
201
206
  # Receive events via an HTTP WebSocket if available, otherwise via an HTTP long-polling
202
- # This is a blocking call and therefore should be used from a thread different than
203
- # otherwise used with this object, e.g., EM.defer thread
204
207
  #
205
208
  # @param [Array, NilClass] routing_keys for event sources of interest with nil meaning all
206
209
  #
@@ -219,28 +222,14 @@ module RightScale
219
222
  def listen(routing_keys, &handler)
220
223
  raise ArgumentError, "Block missing" unless block_given?
221
224
 
225
+ @event_uuids = nil
226
+ @listen_interval = 0
227
+ @listen_state = :choose
228
+ @listen_failures = 0
222
229
  @connect_interval = CONNECT_INTERVAL
223
- @last_connect_time = Time.now - @connect_interval
224
230
  @reconnect_interval = RECONNECT_INTERVAL
225
231
 
226
- uuids = nil
227
- retries = 0
228
- until [:closing, :closed].include?(state) do
229
- if @websocket
230
- @connect_interval = CONNECT_INTERVAL
231
- @reconnect_interval = RECONNECT_INTERVAL
232
- sleep(CHECK_INTERVAL)
233
- next
234
- elsif retry_connect?
235
- @last_connect_time = Time.now
236
- @close_code = @close_reason = nil
237
- @stats["reconnects"].update("websocket") if (retries += 1) > 1
238
- next if try_connect(routing_keys, &handler)
239
- end
240
-
241
- # Resort to long-polling if WebSocket not usable
242
- uuids = try_long_poll(routing_keys, uuids, &handler) if @websocket.nil?
243
- end
232
+ listen_loop(routing_keys, &handler)
244
233
  true
245
234
  end
246
235
 
@@ -253,6 +242,7 @@ module RightScale
253
242
  # @return [TrueClass] always true
254
243
  def close(scope = :all)
255
244
  super
245
+ update_listen_state(:cancel)
256
246
  @websocket.close(SHUTDOWN_CLOSE, "Agent terminating") if @websocket
257
247
  end
258
248
 
@@ -285,56 +275,148 @@ module RightScale
285
275
  true
286
276
  end
287
277
 
288
- # Determine whether should retry creation of WebSocket connection
289
- # Should only retry if (1) WebSocket is enabled, (2) there is none currently,
290
- # (3) previous closure was for acceptable reasons (normal, router shutdown,
291
- # router inaccessible), or (4) enough time has elapsed to make another attempt
292
- #
293
- # @return [Boolean] true if should try, otherwise false
294
- def retry_connect?
295
- unless @options[:long_polling_only]
296
- if @websocket.nil?
297
- if (Time.now - @last_connect_time) > @connect_interval
298
- true
299
- elsif [NORMAL_CLOSE, SHUTDOWN_CLOSE].include?(@close_code)
300
- true
301
- elsif router_not_responding?
302
- true
303
- end
304
- end
305
- end
306
- end
307
-
308
- # Try to create WebSocket connection
278
+ # Perform listen action, then wait prescribed time for next action
279
+ # A periodic timer is not effective here because it does not wa
309
280
  #
310
281
  # @param [Array, NilClass] routing_keys for event sources of interest with nil meaning all
311
282
  #
312
283
  # @yield [event] required block called each time event received
313
284
  # @yieldparam [Hash] event received
314
285
  #
315
- # @return [Boolean] true if should not try long-polling, otherwise false
316
- def try_connect(routing_keys, &handler)
286
+ # @return [Boolean] false if failed or terminating, otherwise true
287
+ def listen_loop(routing_keys, &handler)
288
+ @listen_timer = nil
289
+
317
290
  begin
318
- connect(routing_keys, &handler)
319
- CHECK_INTERVAL.times do
320
- # Allow for possibility of asynchronous handshake failure resulting in close
291
+ # Perform listen action based on current state
292
+ case @listen_state
293
+ when :choose
294
+ # Choose listen method or continue as is if already listening
295
+ # or want to delay choosing
296
+ choose_listen_method
297
+ when :check
298
+ # Check whether really got connected, given the possibility of an
299
+ # asynchronous WebSocket handshake failure that resulted in a close
300
+ # Continue to use WebSockets if still connected or if connect failed
301
+ # due to unresponsive server
321
302
  if @websocket.nil?
322
303
  if router_not_responding?
323
- sleep(backoff_reconnect_interval)
304
+ update_listen_state(:connect, backoff_reconnect_interval)
324
305
  else
325
306
  backoff_connect_interval
307
+ update_listen_state(:long_poll)
326
308
  end
327
- break
309
+ elsif (@listen_checks += 1) > CHECK_INTERVAL
310
+ @reconnect_interval = RECONNECT_INTERVAL
311
+ update_listen_state(:choose, @connect_interval = CONNECT_INTERVAL)
328
312
  end
329
- sleep(1)
313
+ when :connect
314
+ # Use of WebSockets is enabled and it is again time to try to connect
315
+ @stats["reconnects"].update("websocket") if @attempted_connect_at
316
+ try_connect(routing_keys, &handler)
317
+ when :long_poll
318
+ # Resorting to long-polling
319
+ # Need to long-poll on separate thread if cannot use non-blocking HTTP i/o
320
+ # Will still periodically retry WebSockets if not restricted to just long-polling
321
+ if @options[:non_blocking]
322
+ @event_uuids = process_long_poll(try_long_poll(routing_keys, @event_uuids, &handler))
323
+ else
324
+ update_listen_state(:wait, 1)
325
+ try_deferred_long_poll(routing_keys, @event_uuids, &handler)
326
+ end
327
+ when :wait
328
+ # Deferred long-polling is expected to break out of this state eventually
329
+ when :cancel
330
+ return false
330
331
  end
331
- @websocket.nil?
332
+ @listen_failures = 0
332
333
  rescue Exception => e
333
- Log.error("Failed creating WebSocket", e)
334
- @stats["exceptions"].track("websocket", e)
335
- backoff_connect_interval
336
- false
334
+ Log.error("Failed to listen", e, :trace)
335
+ @stats["exceptions"].track("listen", e)
336
+ @listen_failures += 1
337
+ if @listen_failures > MAX_LISTEN_FAILURES
338
+ Log.error("Exceeded maximum repeated listen failures (#{MAX_LISTEN_FAILURES}), stopping listening")
339
+ @listen_state = :cancel
340
+ self.state = :failed
341
+ return false
342
+ end
343
+ @listen_state = :choose
344
+ @listen_interval = CHECK_INTERVAL
345
+ end
346
+
347
+ # Loop using next_tick or timer
348
+ if @listen_interval == 0
349
+ EM_S.next_tick { listen_loop(routing_keys, &handler) }
350
+ else
351
+ @listen_timer = EM_S::Timer.new(@listen_interval) { listen_loop(routing_keys, &handler) }
337
352
  end
353
+ true
354
+ end
355
+
356
+ # Update listen state
357
+ #
358
+ # @param [Symbol] state next
359
+ # @param [Integer] interval before next listen action
360
+ #
361
+ # @return [TrueClass] always true
362
+ #
363
+ # @raise [ArgumentError] invalid state
364
+ def update_listen_state(state, interval = 0)
365
+ if state == :cancel
366
+ @listen_timer.cancel if @listen_timer
367
+ @listen_timer = nil
368
+ @listen_state = state
369
+ elsif [:choose, :check, :connect, :long_poll, :wait].include?(state)
370
+ @listen_checks = 0 if state == :check && @listen_state != :check
371
+ @listen_state = state
372
+ @listen_interval = interval
373
+ else
374
+ raise ArgumentError, "Invalid listen state: #{state.inspect}"
375
+ end
376
+ true
377
+ end
378
+
379
+ # Determine whether should retry creation of WebSocket connection now
380
+ # Should only retry if (1) WebSocket is enabled, (2) there is none currently,
381
+ # (3) previous closure was for acceptable reasons (normal, router shutdown,
382
+ # router inaccessible), or (4) enough time has elapsed to make another attempt
383
+ #
384
+ # @return [TrueClass] always true
385
+ def choose_listen_method
386
+ if @options[:long_polling_only]
387
+ update_listen_state(:long_poll)
388
+ @connect_interval = MAX_CONNECT_INTERVAL
389
+ elsif @websocket
390
+ update_listen_state(:choose, @connect_interval)
391
+ else
392
+ if @attempted_connect_at.nil?
393
+ interval = 0
394
+ elsif (interval = @connect_interval - (Time.now - @attempted_connect_at)) < 0 ||
395
+ [NORMAL_CLOSE, SHUTDOWN_CLOSE].include?(@close_code) ||
396
+ router_not_responding?
397
+ interval = 0
398
+ end
399
+ update_listen_state(:connect, interval)
400
+ end
401
+ true
402
+ end
403
+
404
+ # Try to create WebSocket connection
405
+ #
406
+ # @param [Array, NilClass] routing_keys for event sources of interest with nil meaning all
407
+ #
408
+ # @yield [event] required block called each time event received
409
+ # @yieldparam [Hash] event received
410
+ #
411
+ # @return [TrueClass] always true
412
+ def try_connect(routing_keys, &handler)
413
+ connect(routing_keys, &handler)
414
+ update_listen_state(:check, 1)
415
+ rescue Exception => e
416
+ Log.error("Failed creating WebSocket", e)
417
+ @stats["exceptions"].track("websocket", e)
418
+ backoff_connect_interval
419
+ update_listen_state(:long_poll)
338
420
  end
339
421
 
340
422
  # Connect to RightNet router using WebSocket for receiving events
@@ -353,6 +435,9 @@ module RightScale
353
435
  def connect(routing_keys, &handler)
354
436
  raise ArgumentError, "Block missing" unless block_given?
355
437
 
438
+ @attempted_connect_at = Time.now
439
+ @close_code = @close_reason = nil
440
+
356
441
  options = {
357
442
  # Limit to .auth_header here (rather than .headers) to keep WebSockets happy
358
443
  :headers => {"X-API-Version" => API_VERSION}.merge(@auth_client.auth_header),
@@ -392,11 +477,8 @@ module RightScale
392
477
  # Acknowledge event
393
478
  @websocket.send(JSON.dump({:ack => event[:uuid]}))
394
479
 
395
- # Send response, if any
396
- if (result = handler.call(event))
397
- Log.info("Sending EVENT <#{result[:uuid]}> #{result[:type]} #{result[:path]} to #{result[:from]}")
398
- @websocket.send(JSON.dump({:event => result, :routing_keys => [event[:from]]}))
399
- end
480
+ # Handle event
481
+ handler.call(event)
400
482
  @communicated_callbacks.each { |callback| callback.call } if @communicated_callbacks
401
483
  rescue Exception => e
402
484
  Log.error("Failed handling WebSocket event", e, :trace)
@@ -410,30 +492,44 @@ module RightScale
410
492
  # Try to make long-polling request to receive events
411
493
  #
412
494
  # @param [Array, NilClass] routing_keys for event sources of interest with nil meaning all
413
- # @param [Array, NilClass] uuids for events received on previous poll
495
+ # @param [Array, NilClass] event_uuids from previous poll
414
496
  #
415
497
  # @yield [event] required block called each time event received
416
498
  # @yieldparam [Hash] event received
417
499
  #
418
- # @return [Array, NilClass] UUIDs of events received, or nil if none
419
- def try_long_poll(routing_keys, uuids, &handler)
420
- result = nil
500
+ # @return [Array, NilClass, Exception] UUIDs of events received, or nil if none, or Exception if failed
501
+ def try_long_poll(routing_keys, event_uuids, &handler)
421
502
  begin
422
- result = long_poll(routing_keys, uuids, &handler)
423
- @reconnect_interval = RECONNECT_INTERVAL
424
- @communicated_callbacks.each { |callback| callback.call } if @communicated_callbacks
425
- rescue Exceptions::Unauthorized, Exceptions::ConnectivityFailure, Exceptions::RetryableError => e
426
- Log.error("Failed long-polling", e, :no_trace)
427
- sleep(backoff_reconnect_interval)
503
+ long_poll(routing_keys, event_uuids, &handler)
428
504
  rescue Exception => e
429
- Log.error("Failed long-polling", e, :trace)
430
- @stats["exceptions"].track("long-polling", e)
431
- sleep(backoff_reconnect_interval)
505
+ e
432
506
  end
433
- result
507
+ end
508
+
509
+ # Try to make long-polling request to receive events using EM defer thread
510
+ # Repeat long-polling until there is an error or the stop time has been reached
511
+ #
512
+ # @param [Array, NilClass] routing_keys for event sources of interest with nil meaning all
513
+ # @param [Array, NilClass] event_uuids from previous poll
514
+ #
515
+ # @yield [event] required block called each time event received
516
+ # @yieldparam [Hash] event received
517
+ #
518
+ # @return [Array, NilClass] UUIDs of events received, or nil if none
519
+ def try_deferred_long_poll(routing_keys, event_uuids, &handler)
520
+ # Proc for running long-poll in EM defer thread since this is a blocking call
521
+ @defer_operation_proc = Proc.new { try_long_poll(routing_keys, event_uuids, &handler) }
522
+
523
+ # Proc that runs in main EM reactor thread to handle result from above operation proc
524
+ @defer_callback_proc = Proc.new { |result| @event_uuids = process_long_poll(result) }
525
+
526
+ # Use EM defer thread since the long-poll will block
527
+ EM.defer(@defer_operation_proc, @defer_callback_proc)
528
+ true
434
529
  end
435
530
 
436
531
  # Make long-polling request to receive one or more events
532
+ # Do not return until an event is received or the polling times out or fails
437
533
  # Limit logging unless in debug mode
438
534
  #
439
535
  # @param [Array, NilClass] routing_keys as strings to assist router in delivering
@@ -455,18 +551,47 @@ module RightScale
455
551
  params[:routing_keys] = routing_keys if routing_keys
456
552
  params[:ack] = ack if ack && ack.any?
457
553
 
458
- uuids = []
459
- if (events = make_request(:get, "/listen", params, "listen", nil, :log_level => :debug,
460
- :request_timeout => @options[:listen_timeout]))
554
+ options = {
555
+ :log_level => :debug,
556
+ :request_timeout => @connect_interval,
557
+ :poll_timeout => @options[:listen_timeout] }
558
+
559
+ event_uuids = []
560
+ events = make_request(:poll, "/listen", params, "listen", nil, options)
561
+ if events
461
562
  events.each do |event|
462
563
  event = SerializationHelper.symbolize_keys(event)
463
564
  Log.info("Received EVENT <#{event[:uuid]}> #{event[:type]} #{event[:path]} from #{event[:from]}")
464
565
  @stats["events"].update("#{event[:type]} #{event[:path]}")
465
- uuids << event[:uuid]
566
+ event_uuids << event[:uuid]
466
567
  handler.call(event)
467
568
  end
468
569
  end
469
- uuids if uuids.any?
570
+ event_uuids if event_uuids.any?
571
+ end
572
+
573
+ # Process result from long-polling attempt
574
+ #
575
+ # @param [Array, NilClass] result from long-polling attempt
576
+ #
577
+ # @return [Array, NilClass] result for long-polling attempt
578
+ def process_long_poll(result)
579
+ case result
580
+ when Exceptions::Unauthorized, Exceptions::ConnectivityFailure, Exceptions::RetryableError, Exceptions::InternalServerError
581
+ Log.error("Failed long-polling", result, :no_trace)
582
+ update_listen_state(:choose, backoff_reconnect_interval)
583
+ result = nil
584
+ when Exception
585
+ Log.error("Failed long-polling", result, :trace)
586
+ @stats["exceptions"].track("long-polling", result)
587
+ update_listen_state(:choose, backoff_reconnect_interval)
588
+ result = nil
589
+ else
590
+ @reconnect_interval = RECONNECT_INTERVAL
591
+ @communicated_callbacks.each { |callback| callback.call } if @communicated_callbacks
592
+ update_listen_state(:choose)
593
+ end
594
+ result
470
595
  end
471
596
 
472
597
  # Exponentially increase WebSocket connect attempt interval after failing to connect
@@ -35,7 +35,7 @@ module RightScale
35
35
  # Default maximum number of consecutive ping timeouts before attempt to reconnect
36
36
  MAX_PING_TIMEOUTS = 3
37
37
 
38
- # (EM::Timer) Timer while waiting for RightNet router ping response
38
+ # Timer while waiting for RightNet router ping response
39
39
  attr_accessor :ping_timer
40
40
 
41
41
  def initialize(sender, check_interval, ping_stats, exception_stats)
@@ -0,0 +1,70 @@
1
+ #
2
+ # Copyright (c) 2014 RightScale Inc
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining
5
+ # a copy of this software and associated documentation files (the
6
+ # "Software"), to deal in the Software without restriction, including
7
+ # without limitation the rights to use, copy, modify, merge, publish,
8
+ # distribute, sublicense, and/or sell copies of the Software, and to
9
+ # permit persons to whom the Software is furnished to do so, subject to
10
+ # the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be
13
+ # included in all copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
+
23
+ # Wrap EventMachine to support automatically spawning of fiber before executing
24
+ # associated block so that if block yields its fiber it is not the root fiber
25
+ module EventMachineSpawn
26
+ @fiber_pool = nil
27
+
28
+ def self.fiber_pool
29
+ @fiber_pool
30
+ end
31
+
32
+ def self.fiber_pool=(value)
33
+ @fiber_pool = value
34
+ end
35
+
36
+ def self.execute(&block)
37
+ @fiber_pool ? @fiber_pool.spawn(&block) : yield
38
+ end
39
+
40
+ def self.run(*args, &block)
41
+ EM.run(*args) { @fiber_pool ? @fiber_pool.spawn(&block) : yield }
42
+ end
43
+
44
+ def self.next_tick(*args, &block)
45
+ EM.next_tick(*args) { @fiber_pool ? @fiber_pool.spawn(&block) : yield }
46
+ end
47
+
48
+ def self.add_timer(*args, &block)
49
+ EM.add_timer(*args) { @fiber_pool ? @fiber_pool.spawn(&block) : yield }
50
+ end
51
+
52
+ def self.add_periodic_timer(*args, &block)
53
+ EM.add_periodic_timer(*args) { @fiber_pool ? @fiber_pool.spawn(&block) : yield }
54
+ end
55
+
56
+ class Timer
57
+ def self.new(*args, &block)
58
+ EM::Timer.new(*args) { EM_S.fiber_pool ? EM_S.fiber_pool.spawn(&block) : yield }
59
+ end
60
+ end
61
+
62
+ class PeriodicTimer
63
+ def self.new(*args, &block)
64
+ EM::PeriodicTimer.new(*args) { EM_S.fiber_pool ? EM_S.fiber_pool.spawn(&block) : yield }
65
+ end
66
+ end
67
+ end
68
+
69
+ # Alias for EventMachineSpawn
70
+ EM_S = EventMachineSpawn