right_agent 0.13.5 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  #
2
- # Copyright (c) 2009-2011 RightScale Inc
2
+ # Copyright (c) 2009-2012 RightScale Inc
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining
5
5
  # a copy of this software and associated documentation files (the
@@ -129,9 +129,6 @@ module RightScale
129
129
  # :abnormal_terminate_callback(Proc):: Called at end of termination when terminate abnormally (no argument)
130
130
  # :services(Symbol):: List of services provided by this agent. Defaults to all methods exposed by actors.
131
131
  # :secure(Boolean):: true indicates to use security features of RabbitMQ to restrict agents to themselves
132
- # :single_threaded(Boolean):: true indicates to run all operations in one thread; false indicates
133
- # to do requested work on EM defer thread and all else on main thread
134
- # :threadpool_size(Integer):: Number of threads in EM thread pool
135
132
  # :vhost(String):: AMQP broker virtual host
136
133
  # :user(String):: AMQP broker user
137
134
  # :pass(String):: AMQP broker password
@@ -164,8 +161,6 @@ module RightScale
164
161
  @tags << opts[:tag] if opts[:tag]
165
162
  @tags.flatten!
166
163
  @options.freeze
167
- @deferred_tasks = []
168
- @history = History.new(@identity)
169
164
  @last_stat_reset_time = Time.now
170
165
  reset_agent_stats
171
166
  true
@@ -200,38 +195,13 @@ module RightScale
200
195
  # Initiate AMQP broker connection, wait for connection before proceeding
201
196
  # otherwise messages published on failed connection will be lost
202
197
  @broker = RightAMQP::HABrokerClient.new(Serializer.new(:secure), @options)
203
- @all_setup.each { |s| @remaining_setup[s] = @broker.all }
198
+ @queues.each { |s| @remaining_queue_setup[s] = @broker.all }
204
199
  @broker.connection_status(:one_off => @options[:connect_timeout]) do |status|
205
200
  if status == :connected
206
201
  # Need to give EM (on Windows) a chance to respond to the AMQP handshake
207
202
  # before doing anything interesting to prevent AMQP handshake from
208
203
  # timing-out; delay post-connected activity a second.
209
- EM.add_timer(1) do
210
- begin
211
- @registry = ActorRegistry.new
212
- @dispatcher = create_dispatcher
213
- @sender = create_sender
214
- load_actors
215
- setup_traps
216
- setup_queues
217
- @history.update("run")
218
- start_console if @options[:console] && !@options[:daemonize]
219
-
220
- # Need to keep reconnect interval at least :connect_timeout in size,
221
- # otherwise connection_status callback will not timeout prior to next
222
- # reconnect attempt, which can result in repeated attempts to setup
223
- # queues when finally do connect
224
- interval = [@options[:check_interval], @options[:connect_timeout]].max
225
- @check_status_count = 0
226
- @check_status_brokers = @broker.all
227
- EM.next_tick { @options[:ready_callback].call } if @options[:ready_callback]
228
- @check_status_timer = EM::PeriodicTimer.new(interval) { check_status }
229
- rescue SystemExit
230
- raise
231
- rescue Exception => e
232
- terminate("failed startup after connecting to a broker", e, &terminate_callback)
233
- end
234
- end
204
+ EM.add_timer(1) { start_service(&terminate_callback) }
235
205
  elsif status == :failed
236
206
  terminate("failed to connect to any brokers during startup", &terminate_callback)
237
207
  elsif status == :timeout
@@ -265,58 +235,6 @@ module RightScale
265
235
  @registry.register(actor, prefix)
266
236
  end
267
237
 
268
- # Tune connection heartbeat frequency for all brokers
269
- # Causes a reconnect to each broker
270
- #
271
- # === Parameters
272
- # heartbeat(Integer):: Number of seconds between AMQP connection heartbeats used to keep
273
- # connection alive (e.g., when AMQP broker is behind a firewall), nil or 0 means disable
274
- #
275
- # === Return
276
- # res(String|nil):: Error message if failed, otherwise nil
277
- def tune_heartbeat(heartbeat)
278
- res = nil
279
- begin
280
- Log.info("[setup] Reconnecting each broker to tune heartbeat to #{heartbeat}")
281
- @broker.heartbeat = heartbeat
282
- update_configuration(:heartbeat => heartbeat)
283
- ids = []
284
- all = @broker.all
285
- all.each do |id|
286
- begin
287
- host, port, index, priority = @broker.identity_parts(id)
288
- @broker.connect(host, port, index, priority, force = true) do |id|
289
- @broker.connection_status(:one_off => @options[:connect_timeout], :brokers => [id]) do |status|
290
- begin
291
- if status == :connected
292
- setup_queues([id])
293
- tuned = (heartbeat && heartbeat != 0) ? "Tuned heartbeat to #{heartbeat} seconds" : "Disabled heartbeat"
294
- Log.info("[setup] #{tuned} for broker #{id}")
295
- else
296
- Log.error("Failed to reconnect to broker #{id} to tune heartbeat, status #{status.inspect}")
297
- end
298
- rescue Exception => e
299
- Log.error("Failed to setup queues for broker #{id} when tuning heartbeat", e, :trace)
300
- @exceptions.track("tune heartbeat", e)
301
- end
302
- end
303
- end
304
- ids << id
305
- rescue Exception => e
306
- res = Log.format("Failed to reconnect to broker #{id} to tune heartbeat", e)
307
- Log.error("Failed to reconnect to broker #{id} to tune heartbeat", e, :trace)
308
- @exceptions.track("tune heartbeat", e)
309
- end
310
- end
311
- res = "Failed to tune heartbeat for brokers #{(all - ids).inspect}" unless (all - ids).empty?
312
- rescue Exception => e
313
- res = Log.format("Failed tuning broker connection heartbeat", e)
314
- Log.error("Failed tuning broker connection heartbeat", e, :trace)
315
- @exceptions.track("tune heartbeat", e)
316
- end
317
- res
318
- end
319
-
320
238
  # Connect to an additional broker or reconnect it if connection has failed
321
239
  # Subscribe to identity queue on this broker
322
240
  # Update config file if this is a new broker
@@ -333,7 +251,7 @@ module RightScale
333
251
  # === Return
334
252
  # res(String|nil):: Error message if failed, otherwise nil
335
253
  def connect(host, port, index, priority = nil, force = false)
336
- @connect_requests.update("connect b#{index}")
254
+ @connect_request_stats.update("connect b#{index}")
337
255
  even_if = " even if already connected" if force
338
256
  Log.info("Connecting to broker at host #{host.inspect} port #{port.inspect} " +
339
257
  "index #{index.inspect} priority #{priority.inspect}#{even_if}")
@@ -346,7 +264,7 @@ module RightScale
346
264
  if status == :connected
347
265
  setup_queues([id])
348
266
  remaining = 0
349
- @remaining_setup.each_value { |ids| remaining += ids.size }
267
+ @remaining_queue_setup.each_value { |ids| remaining += ids.size }
350
268
  Log.info("[setup] Finished subscribing to queues after reconnecting to broker #{id}") if remaining == 0
351
269
  unless update_configuration(:host => @broker.hosts, :port => @broker.ports)
352
270
  Log.warning("Successfully connected to broker #{id} but failed to update config file")
@@ -356,13 +274,13 @@ module RightScale
356
274
  end
357
275
  rescue Exception => e
358
276
  Log.error("Failed to connect to broker #{id}, status #{status.inspect}", e)
359
- @exceptions.track("connect", e)
277
+ @exception_stats.track("connect", e)
360
278
  end
361
279
  end
362
280
  end
363
281
  rescue Exception => e
364
282
  res = Log.format("Failed to connect to broker at host #{host.inspect} and port #{port.inspect}", e)
365
- @exceptions.track("connect", e)
283
+ @exception_stats.track("connect", e)
366
284
  end
367
285
  Log.error(res) if res
368
286
  res
@@ -384,7 +302,7 @@ module RightScale
384
302
  Log.info("Disconnecting#{and_remove} broker at host #{host.inspect} port #{port.inspect}")
385
303
  Log.info("Current broker configuration: #{@broker.status.inspect}")
386
304
  id = RightAMQP::HABrokerClient.identity(host, port)
387
- @connect_requests.update("disconnect #{@broker.alias_(id)}")
305
+ @connect_request_stats.update("disconnect #{@broker.alias_(id)}")
388
306
  connected = @broker.connected
389
307
  res = nil
390
308
  if connected.include?(id) && connected.size == 1
@@ -402,7 +320,7 @@ module RightScale
402
320
  end
403
321
  rescue Exception => e
404
322
  res = Log.format("Failed to disconnect from broker #{id}", e)
405
- @exceptions.track("disconnect", e)
323
+ @exception_stats.track("disconnect", e)
406
324
  end
407
325
  else
408
326
  res = "Cannot disconnect from broker #{id} because not configured for this agent"
@@ -422,7 +340,7 @@ module RightScale
422
340
  # res(String|nil):: Error message if failed, otherwise nil
423
341
  def connect_failed(ids)
424
342
  aliases = @broker.aliases(ids).join(", ")
425
- @connect_requests.update("enroll failed #{aliases}")
343
+ @connect_request_stats.update("enroll failed #{aliases}")
426
344
  res = nil
427
345
  begin
428
346
  Log.info("Received indication that service initialization for this agent for brokers #{ids.inspect} has failed")
@@ -434,49 +352,11 @@ module RightScale
434
352
  rescue Exception => e
435
353
  res = Log.format("Failed handling broker connection failure indication for #{ids.inspect}", e)
436
354
  Log.error(res)
437
- @exceptions.track("connect failed", e)
355
+ @exception_stats.track("connect failed", e)
438
356
  end
439
357
  res
440
358
  end
441
359
 
442
- # Handle packet received
443
- # Delegate packet acknowledgement to dispatcher/sender
444
- # Ignore requests if in the process of terminating but continue to accept responses
445
- #
446
- # === Parameters
447
- # packet(Request|Push|Result):: Packet received
448
- # header(AMQP::Frame::Header|nil):: Request header containing ack control
449
- #
450
- # === Return
451
- # true:: Always return true
452
- def receive(packet, header = nil)
453
- begin
454
- case packet
455
- when Push, Request then @dispatcher.dispatch(packet, header) unless @terminating
456
- when Result then @sender.handle_response(packet, header)
457
- else header.ack if header
458
- end
459
- @sender.message_received
460
- rescue RightAMQP::HABrokerClient::NoConnectedBrokers => e
461
- Log.error("Identity queue processing error", e)
462
- rescue Exception => e
463
- Log.error("Identity queue processing error", e, :trace)
464
- @exceptions.track("identity queue", e, packet)
465
- end
466
- true
467
- end
468
-
469
- # Defer task until next status check
470
- #
471
- # === Block
472
- # Required block to be activated on next status check
473
- #
474
- # === Return
475
- # true:: Always return true
476
- def defer_task(&task)
477
- @deferred_tasks << task
478
- end
479
-
480
360
  # Gracefully terminate execution by allowing unfinished tasks to complete
481
361
  # Immediately terminate if called a second time
482
362
  # Report reason for termination if it is abnormal
@@ -506,45 +386,8 @@ module RightScale
506
386
  @terminating = true
507
387
  @check_status_timer.cancel if @check_status_timer
508
388
  @check_status_timer = nil
509
- timeout = @options[:grace_timeout]
510
389
  Log.info("[stop] Agent #{@identity} terminating")
511
-
512
- stop_gracefully(timeout) do
513
- if @sender
514
- dispatch_age = @dispatcher.dispatch_age
515
- request_count, request_age = @sender.terminate
516
-
517
- finish = lambda do
518
- request_count, request_age = @sender.terminate
519
- Log.info("[stop] The following #{request_count} requests initiated as recently as #{request_age} " +
520
- "seconds ago are being dropped:\n " + @sender.dump_requests.join("\n ")) if request_age
521
- @broker.close { block.call }
522
- end
523
-
524
- wait_time = [timeout - (request_age || timeout), timeout - (dispatch_age || timeout), 0].max
525
- if wait_time == 0
526
- finish.call
527
- else
528
- reason = ""
529
- reason = "completion of #{request_count} requests initiated as recently as #{request_age} seconds ago" if request_age
530
- reason += " and " if request_age && dispatch_age
531
- reason += "requests received as recently as #{dispatch_age} seconds ago" if dispatch_age
532
- Log.info("[stop] Termination waiting #{wait_time} seconds for #{reason}")
533
- @termination_timer = EM::Timer.new(wait_time) do
534
- begin
535
- Log.info("[stop] Continuing with termination")
536
- finish.call
537
- rescue Exception => e
538
- Log.error("Failed while finishing termination", e, :trace)
539
- begin block.call; rescue Exception; end
540
- end
541
- end
542
- end
543
- else
544
- block.call
545
- end
546
- @history.update("graceful exit")
547
- end
390
+ stop_gracefully(@options[:grace_timeout], &block)
548
391
  end
549
392
  rescue SystemExit
550
393
  raise
@@ -604,11 +447,17 @@ module RightScale
604
447
  # "recent"(Array):: Most recent as a hash of "count", "type", "message", "when", and "where"
605
448
  # "non-deliveries"(Hash):: Message non-delivery activity stats with keys "total", "percent", "last", and "rate"
606
449
  # with percentage breakdown by request type, or nil if none
450
+ # "request failures"(Hash|nil):: Request dispatch failure activity stats with keys "total", "percent", "last", and "rate"
451
+ # with percentage breakdown per failure type, or nil if none
452
+ # "response failures"(Hash|nil):: Response delivery failure activity stats with keys "total", "percent", "last", and "rate"
453
+ # with percentage breakdown per failure type, or nil if none
607
454
  def agent_stats(reset = false)
608
455
  stats = {
609
- "connect requests" => @connect_requests.all,
610
- "exceptions" => @exceptions.stats,
611
- "non-deliveries" => @non_deliveries.all
456
+ "connect requests" => @connect_request_stats.all,
457
+ "exceptions" => @exception_stats.stats,
458
+ "non-deliveries" => @non_delivery_stats.all,
459
+ "request failures" => @request_failure_stats.all,
460
+ "response failures" => @response_failure_stats.all
612
461
  }
613
462
  reset_agent_stats if reset
614
463
  stats
@@ -619,9 +468,11 @@ module RightScale
619
468
  # === Return
620
469
  # true:: Always return true
621
470
  def reset_agent_stats
622
- @connect_requests = RightSupport::Stats::Activity.new(measure_rate = false)
623
- @non_deliveries = RightSupport::Stats::Activity.new
624
- @exceptions = RightSupport::Stats::Exceptions.new(self, @options[:exception_callback])
471
+ @connect_request_stats = RightSupport::Stats::Activity.new(measure_rate = false)
472
+ @non_delivery_stats = RightSupport::Stats::Activity.new
473
+ @request_failure_stats = RightSupport::Stats::Activity.new
474
+ @response_failure_stats = RightSupport::Stats::Activity.new
475
+ @exception_stats = RightSupport::Stats::Exceptions.new(self, @options[:exception_callback])
625
476
  true
626
477
  end
627
478
 
@@ -631,7 +482,7 @@ module RightScale
631
482
  # opts(Hash):: Configuration options
632
483
  #
633
484
  # === Return
634
- # (String):: Serialized agent identity
485
+ # true:: Always return true
635
486
  def set_configuration(opts)
636
487
  @options = DEFAULT_OPTIONS.clone
637
488
  @options.update(opts)
@@ -651,10 +502,9 @@ module RightScale
651
502
  @agent_name = @options[:agent_name]
652
503
  @stats_routing_key = "stats.#{@agent_type}.#{parsed_identity.base_id}"
653
504
  @revision = revision
654
-
655
- @remaining_setup = {}
656
- @all_setup = [:setup_identity_queue]
657
- @identity
505
+ @queues = [@identity]
506
+ @remaining_queue_setup = {}
507
+ @history = History.new(@identity)
658
508
  end
659
509
 
660
510
  # Update agent's persisted configuration
@@ -679,6 +529,39 @@ module RightScale
679
529
  false
680
530
  end
681
531
 
532
+ # Start service now that connected to at least one broker
533
+ #
534
+ # === Block
535
+ # Optional block to be executed if terminate abnormally
536
+ #
537
+ # === Return
538
+ # true:: Always return true
539
+ def start_service(&terminate_callback)
540
+ begin
541
+ @registry = ActorRegistry.new
542
+ @dispatcher = create_dispatcher
543
+ @sender = create_sender
544
+ load_actors
545
+ setup_traps
546
+ setup_non_delivery
547
+ setup_queues
548
+ @history.update("run")
549
+ start_console if @options[:console] && !@options[:daemonize]
550
+ EM.next_tick { @options[:ready_callback].call } if @options[:ready_callback]
551
+
552
+ # Need to keep reconnect interval at least :connect_timeout in size,
553
+ # otherwise connection_status callback will not timeout prior to next
554
+ # reconnect attempt, which can result in repeated attempts to setup
555
+ # queues when finally do connect
556
+ setup_status_checks([@options[:check_interval], @options[:connect_timeout]].max)
557
+ rescue SystemExit
558
+ raise
559
+ rescue Exception => e
560
+ terminate("failed startup after connecting to a broker", e, &terminate_callback)
561
+ end
562
+ true
563
+ end
564
+
682
565
  # Create dispatcher for handling incoming requests
683
566
  #
684
567
  # === Return
@@ -726,18 +609,36 @@ module RightScale
726
609
  true
727
610
  end
728
611
 
729
- # Setup the queues on the specified brokers for this agent
730
- # Also configure message non-delivery handling
612
+ # Setup signal traps
731
613
  #
732
- # === Parameters
733
- # ids(Array):: Identity of brokers for which to subscribe, defaults to all usable
614
+ # === Return
615
+ # true:: Always return true
616
+ def setup_traps
617
+ ['INT', 'TERM'].each do |sig|
618
+ old = trap(sig) do
619
+ EM.next_tick do
620
+ begin
621
+ terminate do
622
+ DEFAULT_TERMINATE_BLOCK.call
623
+ old.call if old.is_a? Proc
624
+ end
625
+ rescue Exception => e
626
+ Log.error("Failed in termination", e, :trace)
627
+ end
628
+ end
629
+ end
630
+ end
631
+ true
632
+ end
633
+
634
+ # Setup non-delivery handler
734
635
  #
735
636
  # === Return
736
637
  # true:: Always return true
737
- def setup_queues(ids = nil)
638
+ def setup_non_delivery
738
639
  @broker.non_delivery do |reason, type, token, from, to|
739
640
  begin
740
- @non_deliveries.update(type)
641
+ @non_delivery_stats.update(type)
741
642
  reason = case reason
742
643
  when "NO_ROUTE" then OperationResult::NO_ROUTE_TO_TARGET
743
644
  when "NO_CONSUMERS" then OperationResult::TARGET_NOT_CONNECTED
@@ -747,46 +648,108 @@ module RightScale
747
648
  @sender.handle_response(result)
748
649
  rescue Exception => e
749
650
  Log.error("Failed handling non-delivery for <#{token}>", e, :trace)
750
- @exceptions.track("message return", e)
651
+ @exception_stats.track("message return", e)
751
652
  end
752
653
  end
753
- # Do the setup regardless of whether remaining setup is empty since may be reconnecting
754
- @all_setup.each { |setup| @remaining_setup[setup] -= self.__send__(setup, ids) }
654
+ end
655
+
656
+ # Setup the queues on the specified brokers for this agent
657
+ # Do the setup regardless of whether remaining setup is empty since may be reconnecting
658
+ #
659
+ # === Parameters
660
+ # ids(Array):: Identity of brokers for which to subscribe, defaults to all usable
661
+ #
662
+ # === Return
663
+ # true:: Always return true
664
+ def setup_queues(ids = nil)
665
+ @queues.each { |q| @remaining_queue_setup[q] -= setup_queue(q, ids) }
755
666
  true
756
667
  end
757
668
 
758
- # Setup identity queue for this agent
669
+ # Setup queue for this agent
759
670
  #
760
671
  # === Parameters
672
+ # name(String):: Queue name
761
673
  # ids(Array):: Identity of brokers for which to subscribe, defaults to all usable
762
674
  #
763
675
  # === Return
764
- # ids(Array):: Identity of brokers to which subscribe submitted (although may still fail)
765
- def setup_identity_queue(ids = nil)
766
- queue = {:name => @identity, :options => {:durable => true, :no_declare => @options[:secure]}}
676
+ # (Array):: Identity of brokers to which subscribe submitted (although may still fail)
677
+ def setup_queue(name, ids = nil)
678
+ queue = {:name => name, :options => {:durable => true, :no_declare => @options[:secure]}}
767
679
  filter = [:from, :tags, :tries, :persistent]
768
- options = {:ack => true, Request => filter, Push => filter, Result => [:from], :brokers => ids}
769
- ids = @broker.subscribe(queue, nil, options) { |_, packet, header| receive(packet, header) }
680
+ options = {:ack => true, Push => filter, Request => filter, Result => [:from], :brokers => ids}
681
+ @broker.subscribe(queue, nil, options) { |_, packet, header| handle_packet(name, packet, header) }
770
682
  end
771
683
 
772
- # Setup signal traps
684
+ # Handle packet from queue
685
+ #
686
+ # === Parameters
687
+ # queue(String):: Name of queue from which message was received
688
+ # packet(Packet):: Packet received
689
+ # header(AMQP::Frame::Header):: Packet header containing ack control
773
690
  #
774
691
  # === Return
775
692
  # true:: Always return true
776
- def setup_traps
777
- ['INT', 'TERM'].each do |sig|
778
- old = trap(sig) do
779
- EM.next_tick do
780
- begin
781
- terminate do
782
- DEFAULT_TERMINATE_BLOCK.call
783
- old.call if old.is_a? Proc
784
- end
785
- rescue Exception => e
786
- Log.error("Failed in termination", e, :trace)
787
- end
788
- end
693
+ def handle_packet(queue, packet, header)
694
+ begin
695
+ # Continue to dispatch/ack requests even when terminating otherwise will block results
696
+ # Ideally would reject requests when terminating but broker client does not yet support that
697
+ case packet
698
+ when Push, Request then dispatch_request(packet, queue)
699
+ when Result then deliver_response(packet)
789
700
  end
701
+ @sender.message_received
702
+ rescue Exception => e
703
+ Log.error("#{queue} queue processing error", e, :trace)
704
+ @exception_stats.track("#{queue} queue", e, packet)
705
+ ensure
706
+ # Relying on fact that all dispatches/deliveries are synchronous and therefore
707
+ # need to have completed or failed by now, thus allowing packet acknowledgement
708
+ header.ack
709
+ end
710
+ true
711
+ end
712
+
713
+ # Dispatch request and then send response if any
714
+ #
715
+ # === Parameters
716
+ # request(Push|Request):: Packet containing request
717
+ # queue(String):: Name of queue from which message was received
718
+ #
719
+ # === Return
720
+ # true:: Always return true
721
+ def dispatch_request(request, queue)
722
+ begin
723
+ if result = @dispatcher.dispatch(request)
724
+ exchange = {:type => :queue, :name => "response", :options => {:durable => true, :no_declare => @options[:secure]}}
725
+ @broker.publish(exchange, result, :persistent => true, :mandatory => true, :log_filter => [:tries, :persistent, :duration])
726
+ end
727
+ rescue Dispatcher::DuplicateRequest
728
+ rescue RightAMQP::HABrokerClient::NoConnectedBrokers => e
729
+ Log.error("Failed to publish result of dispatched request #{request.trace} from queue #{queue}", e)
730
+ @request_failure_stats.update("NoConnectedBrokers")
731
+ rescue Exception => e
732
+ Log.error("Failed to dispatch request #{request.trace} from queue #{queue}", e, :trace)
733
+ @request_failure_stats.update(e.class.name)
734
+ @exception_stats.track("request", e)
735
+ end
736
+ true
737
+ end
738
+
739
+ # Deliver response to request sender
740
+ #
741
+ # === Parameters
742
+ # result(Result):: Packet containing response
743
+ #
744
+ # === Return
745
+ # true:: Always return true
746
+ def deliver_response(result)
747
+ begin
748
+ @sender.handle_response(result)
749
+ rescue Exception => e
750
+ Log.error("Failed to deliver response #{result.trace}", e, :trace)
751
+ @response_failure_stats.update(e.class.name)
752
+ @exception_stats.track("response", e)
790
753
  end
791
754
  true
792
755
  end
@@ -804,45 +767,51 @@ module RightScale
804
767
  true
805
768
  end
806
769
 
807
- # Check status of agent by gathering current operation statistics and publishing them,
808
- # finishing any queue setup, and executing any deferred tasks
770
+ # Setup periodic status check
771
+ #
772
+ # === Parameters
773
+ # interval(Integer):: Number of seconds between status checks
774
+ #
775
+ # === Return
776
+ # true:: Always return true
777
+ def setup_status_checks(interval)
778
+ @check_status_count = 0
779
+ @check_status_brokers = @broker.all
780
+ @check_status_timer = EM::PeriodicTimer.new(interval) { check_status }
781
+ true
782
+ end
783
+
784
+ # Check status of agent by gathering current operation statistics and publishing them
785
+ # and finishing any queue setup
786
+ # Although agent termination cancels the check_status_timer, this method could induce
787
+ # termination, therefore the termination status needs to be checked before each step
809
788
  #
810
789
  # === Return
811
790
  # true:: Always return true
812
791
  def check_status
813
792
  begin
814
- finish_setup
793
+ finish_setup unless @terminating
815
794
  rescue Exception => e
816
795
  Log.error("Failed finishing setup", e)
817
- @exceptions.track("check status", e)
796
+ @exception_stats.track("check status", e)
818
797
  end
819
798
 
820
799
  begin
821
- if @stats_routing_key
800
+ if @stats_routing_key && !@terminating
822
801
  exchange = {:type => :topic, :name => "stats", :options => {:no_declare => true}}
823
802
  @broker.publish(exchange, Stats.new(stats.content, @identity), :no_log => true,
824
803
  :routing_key => @stats_routing_key, :brokers => @check_status_brokers.rotate!)
825
804
  end
826
805
  rescue Exception => e
827
806
  Log.error("Failed publishing stats", e)
828
- @exceptions.track("check status", e)
829
- end
830
-
831
- @deferred_tasks.reject! do |t|
832
- begin
833
- t.call
834
- rescue Exception => e
835
- Log.error("Failed to perform deferred task", e)
836
- @exceptions.track("check status", e)
837
- end
838
- true
807
+ @exception_stats.track("check status", e)
839
808
  end
840
809
 
841
810
  begin
842
- check_other(@check_status_count)
811
+ check_other(@check_status_count) unless @terminating
843
812
  rescue Exception => e
844
813
  Log.error("Failed to perform other check status check", e)
845
- @exceptions.track("check status", e)
814
+ @exception_stats.track("check status", e)
846
815
  end
847
816
 
848
817
  @check_status_count += 1
@@ -879,13 +848,57 @@ module RightScale
879
848
  # terminating regardless of whether there are still unfinished requests
880
849
  #
881
850
  # === Block
882
- # Required block to be executed after stopping message receipt wherever possible
851
+ # Optional block to be executed after stopping message processing wherever possible
883
852
  #
884
853
  # === Return
885
854
  # true:: Always return true
886
- def stop_gracefully(timeout)
855
+ def stop_gracefully(timeout, &block)
887
856
  @broker.unusable.each { |id| @broker.close_one(id, propagate = false) }
888
- yield
857
+ finish_terminating(timeout, &block)
858
+ end
859
+
860
+ # Finish termination after all requests have been processed
861
+ #
862
+ # === Parameters
863
+ # timeout(Integer):: Maximum number of seconds to wait after last request received before
864
+ # terminating regardless of whether there are still unfinished requests
865
+ #
866
+ # === Block
867
+ # Optional block to be executed after stopping message processing wherever possible
868
+ #
869
+ # === Return
870
+ # true:: Always return true
871
+ def finish_terminating(timeout, &block)
872
+ if @sender
873
+ request_count, request_age = @sender.terminate
874
+
875
+ finish = lambda do
876
+ request_count, request_age = @sender.terminate
877
+ Log.info("[stop] The following #{request_count} requests initiated as recently as #{request_age} " +
878
+ "seconds ago are being dropped:\n " + @sender.dump_requests.join("\n ")) if request_age
879
+ @broker.close { block.call }
880
+ end
881
+
882
+ if (wait_time = [timeout - (request_age || timeout), 0].max) > 0
883
+ Log.info("[stop] Termination waiting #{wait_time} seconds for completion of #{request_count} " +
884
+ "requests initiated as recently as #{request_age} seconds ago")
885
+ @termination_timer = EM::Timer.new(wait_time) do
886
+ begin
887
+ Log.info("[stop] Continuing with termination")
888
+ finish.call
889
+ rescue Exception => e
890
+ Log.error("Failed while finishing termination", e, :trace)
891
+ begin block.call; rescue Exception; end
892
+ end
893
+ end
894
+ else
895
+ finish.call
896
+ end
897
+ else
898
+ block.call
899
+ end
900
+ @history.update("graceful exit")
901
+ true
889
902
  end
890
903
 
891
904
  # Determine current revision of software