right_agent 0.10.13 → 0.13.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. data/lib/right_agent.rb +2 -0
  2. data/lib/right_agent/actor.rb +45 -10
  3. data/lib/right_agent/actor_registry.rb +5 -5
  4. data/lib/right_agent/actors/agent_manager.rb +4 -4
  5. data/lib/right_agent/agent.rb +97 -37
  6. data/lib/right_agent/agent_tag_manager.rb +1 -2
  7. data/lib/right_agent/command/command_io.rb +1 -3
  8. data/lib/right_agent/command/command_runner.rb +9 -3
  9. data/lib/right_agent/dispatched_cache.rb +110 -0
  10. data/lib/right_agent/dispatcher.rb +119 -180
  11. data/lib/right_agent/history.rb +136 -0
  12. data/lib/right_agent/log.rb +6 -3
  13. data/lib/right_agent/monkey_patches/ruby_patch.rb +0 -1
  14. data/lib/right_agent/pid_file.rb +1 -1
  15. data/lib/right_agent/platform.rb +2 -2
  16. data/lib/right_agent/platform/linux.rb +8 -1
  17. data/lib/right_agent/platform/windows.rb +1 -1
  18. data/lib/right_agent/sender.rb +57 -41
  19. data/right_agent.gemspec +4 -4
  20. data/spec/actor_registry_spec.rb +7 -8
  21. data/spec/actor_spec.rb +87 -24
  22. data/spec/agent_spec.rb +107 -8
  23. data/spec/command/command_runner_spec.rb +12 -1
  24. data/spec/dispatched_cache_spec.rb +142 -0
  25. data/spec/dispatcher_spec.rb +110 -129
  26. data/spec/history_spec.rb +234 -0
  27. data/spec/idempotent_request_spec.rb +1 -1
  28. data/spec/log_spec.rb +15 -0
  29. data/spec/operation_result_spec.rb +4 -2
  30. data/spec/platform/darwin_spec.rb +13 -0
  31. data/spec/platform/linux_spec.rb +38 -0
  32. data/spec/platform/platform_spec.rb +46 -51
  33. data/spec/platform/windows_spec.rb +13 -0
  34. data/spec/sender_spec.rb +81 -38
  35. metadata +12 -9
  36. data/lib/right_agent/monkey_patches/ruby_patch/singleton_patch.rb +0 -45
  37. data/spec/platform/darwin.rb +0 -11
  38. data/spec/platform/linux.rb +0 -23
  39. data/spec/platform/windows.rb +0 -11
data/lib/right_agent.rb CHANGED
@@ -47,7 +47,9 @@ require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'agent_tag_manager')
47
47
  require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'actor'))
48
48
  require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'actor_registry'))
49
49
  require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'dispatcher'))
50
+ require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'dispatched_cache'))
50
51
  require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'sender'))
51
52
  require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'secure_identity'))
52
53
  require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'idempotent_request'))
54
+ require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'history'))
53
55
  require File.normalize_path(File.join(RIGHT_AGENT_BASE_DIR, 'agent'))
@@ -59,21 +59,45 @@ module RightScale
59
59
  prefix = to_s.to_const_path
60
60
  end
61
61
 
62
+ # Add methods to list of services supported by actor and mark these methods
63
+ # as idempotent
64
+ #
65
+ # === Parameters
66
+ # methods(Array):: Symbol names for methods being exposed as actor idempotent services
67
+ #
68
+ # === Return
69
+ # true:: Always return true
70
+ def expose_idempotent(*methods)
71
+ @exposed ||= {}
72
+ methods.each do |m|
73
+ if @exposed[m] == false
74
+ Log.warning("Method #{m} declared both idempotent and non-idempotent, assuming non-idempotent")
75
+ else
76
+ @exposed[m] = true
77
+ end
78
+ end
79
+ true
80
+ end
81
+
62
82
  # Add methods to list of services supported by actor
83
+ # By default these methods are not idempotent
63
84
  #
64
85
  # === Parameters
65
86
  # meths(Array):: Symbol names for methods being exposed as actor services
66
87
  #
67
88
  # === Return
68
- # @exposed(Array):: List of unique methods exposed
69
- def expose(*meths)
70
- @exposed ||= []
71
- meths.each do |meth|
72
- @exposed << meth unless @exposed.include?(meth)
89
+ # true:: Always return true
90
+ def expose_non_idempotent(*methods)
91
+ @exposed ||= {}
92
+ methods.each do |m|
93
+ Log.warning("Method #{m} declared both idempotent and non-idempotent, assuming non-idempotent") if @exposed[m]
94
+ @exposed[m] = false
73
95
  end
74
- @exposed
96
+ true
75
97
  end
76
98
 
99
+ alias :expose :expose_non_idempotent
100
+
77
101
  # Get /prefix/method paths that actor responds to
78
102
  #
79
103
  # === Parameters
@@ -83,14 +107,25 @@ module RightScale
83
107
  # (Array):: /prefix/method strings
84
108
  def provides_for(prefix)
85
109
  return [] unless @exposed
86
- @exposed.select do |meth|
87
- if instance_methods.include?(meth.to_s) or instance_methods.include?(meth.to_sym)
110
+ @exposed.each_key.select do |method|
111
+ if instance_methods.include?(method.to_s) or instance_methods.include?(method.to_sym)
88
112
  true
89
113
  else
90
- Log.warning("Exposing non-existing method #{meth} in actor #{name}")
114
+ Log.warning("Exposing non-existing method #{method} in actor #{prefix}")
91
115
  false
92
116
  end
93
- end.map {|meth| "/#{prefix}/#{meth}".squeeze('/')}
117
+ end.map { |method| "/#{prefix}/#{method}".squeeze('/') }
118
+ end
119
+
120
+ # Determine whether actor method is idempotent
121
+ #
122
+ # === Parameters
123
+ # method(Symbol):: Name for actor method
124
+ #
125
+ # === Return
126
+ # (Boolean):: true if idempotent, false otherwise
127
+ def idempotent?(method)
128
+ @exposed[method] if @exposed
94
129
  end
95
130
 
96
131
  # Set method called when dispatching to this actor fails
@@ -49,15 +49,15 @@ module RightScale
49
49
  log_msg += ", prefix #{prefix}" if prefix && !prefix.empty?
50
50
  Log.info(log_msg)
51
51
  prefix ||= actor.class.default_prefix
52
- actors[prefix.to_s] = actor
52
+ @actors[prefix.to_s] = actor
53
53
  end
54
54
 
55
55
  # Retrieve services provided by all of the registered actors
56
56
  #
57
57
  # === Return
58
- # services(Array):: List of unique /prefix/method path strings
58
+ # (Array):: List of unique /prefix/method path strings
59
59
  def services
60
- actors.map {|prefix, actor| actor.class.provides_for(prefix) }.flatten.uniq
60
+ @actors.map { |prefix, actor| actor.class.provides_for(prefix) }.flatten.uniq
61
61
  end
62
62
 
63
63
  # Retrieve actor by prefix
@@ -66,9 +66,9 @@ module RightScale
66
66
  # prefix(String):: Prefix identifying actor
67
67
  #
68
68
  # === Return
69
- # actor(Actor):: Retrieved actor, may be nil
69
+ # (Actor|nil):: Retrieved actor, or nil if unknown
70
70
  def actor_for(prefix)
71
- actor = actors[prefix]
71
+ @actors[prefix]
72
72
  end
73
73
 
74
74
  end # ActorRegistry
@@ -30,8 +30,8 @@ class AgentManager
30
30
 
31
31
  on_exception { |meth, deliverable, e| RightScale::ExceptionMailer.deliver_notification(meth, deliverable, e) }
32
32
 
33
- expose :ping, :stats, :profile, :set_log_level, :execute, :connect, :disconnect, :connect_failed,
34
- :tune_heartbeat, :terminate
33
+ expose_idempotent :ping, :stats, :profile, :set_log_level, :connect, :disconnect, :connect_failed, :tune_heartbeat
34
+ expose_non_idempotent :execute, :terminate
35
35
 
36
36
  # Valid log levels
37
37
  LEVELS = [:debug, :info, :warn, :error, :fatal]
@@ -255,8 +255,8 @@ class AgentManager
255
255
  # true
256
256
  def terminate(options = nil)
257
257
  RightScale::CommandRunner.stop
258
- # Delay terminate a bit to give reply a chance to be sent
259
- EM.next_tick { @agent.terminate }
258
+ # Delay terminate a bit to give request message a chance to be ack'd and reply to be sent
259
+ EM.add_timer(1) { @agent.terminate }
260
260
  true
261
261
  end
262
262
 
@@ -73,9 +73,12 @@ module RightScale
73
73
  :check_interval => 5 * 60,
74
74
  :grace_timeout => 30,
75
75
  :prefetch => 1,
76
- :heartbeat => 60
76
+ :heartbeat => 0
77
77
  }
78
78
 
79
+ # Default block to be activated when finish terminating
80
+ DEFAULT_TERMINATE_BLOCK = lambda { EM.stop if EM.reactor_running? }
81
+
79
82
  # Initializes a new agent and establishes an AMQP connection.
80
83
  # This must be used inside EM.run block or if EventMachine reactor
81
84
  # is already started, for instance, by a Thin server that your Merb/Rails
@@ -112,6 +115,7 @@ module RightScale
112
115
  # :grace_timeout(Integer):: Maximum number of seconds to wait after last request received before
113
116
  # terminating regardless of whether there are still unfinished requests
114
117
  # :dup_check(Boolean):: Whether to check for and reject duplicate requests, e.g., due to retries
118
+ # or redelivery by broker after server failure
115
119
  # :prefetch(Integer):: Maximum number of messages the AMQP broker is to prefetch for this agent
116
120
  # before it receives an ack. Value 1 ensures that only last unacknowledged gets redelivered
117
121
  # if the agent crashes. Value 0 means unlimited prefetch.
@@ -119,9 +123,10 @@ module RightScale
119
123
  # exception(Exception):: Exception
120
124
  # message(Packet):: Message being processed
121
125
  # agent(Agent):: Reference to agent
122
- # :ready_callback(Proc):: Called once agent is connected ready to service (no argument)
123
- # :restart_callback(Proc):: Callback that is activated on each restart vote with votes being initiated
124
- # by offline queue exceeding MAX_QUEUED_REQUESTS or by repeated failures to access mapper when online
126
+ # :ready_callback(Proc):: Called once agent is connected to broker and ready for service (no argument)
127
+ # :restart_callback(Proc):: Called on each restart vote with votes being initiated by offline queue
128
+ # exceeding MAX_QUEUED_REQUESTS or by repeated failures to access mapper when online (no argument)
129
+ # :abnormal_terminate_callback(Proc):: Called at end of termination when terminate abnormally (no argument)
125
130
  # :services(Symbol):: List of services provided by this agent. Defaults to all methods exposed by actors.
126
131
  # :secure(Boolean):: true indicates to use security features of RabbitMQ to restrict agents to themselves
127
132
  # :single_threaded(Boolean):: true indicates to run all operations in one thread; false indicates
@@ -160,8 +165,8 @@ module RightScale
160
165
  @tags.flatten!
161
166
  @options.freeze
162
167
  @deferred_tasks = []
163
- @terminating = false
164
- @last_stat_reset_time = @service_start_time = Time.now
168
+ @history = History.new(@identity)
169
+ @last_stat_reset_time = Time.now
165
170
  reset_agent_stats
166
171
  true
167
172
  end
@@ -174,11 +179,15 @@ module RightScale
174
179
  Log.init(@identity, @options[:log_path], :print => true)
175
180
  Log.level = @options[:log_level] if @options[:log_level]
176
181
  RightSupport::Log::Mixin.default_logger = Log
182
+ @history.update("start")
177
183
  now = Time.now
178
184
  Log.info("[start] Agent #{@identity} starting; time: #{now.utc}; utc_offset: #{now.utc_offset}")
179
185
  Log.debug("Start options:")
180
- log_opts = @options.inject([]){ |t, (k, v)| t << "- #{k}: #{v.respond_to?(:each) ? v.inspect : v}" }
186
+ log_opts = @options.inject([]) do |t, (k, v)|
187
+ t << "- #{k}: #{k.to_s =~ /pass/ ? '****' : (v.respond_to?(:each) ? v.inspect : v)}"
188
+ end
181
189
  log_opts.each { |l| Log.debug(l) }
190
+ terminate_callback = @options[:abnormal_terminate_callback]
182
191
 
183
192
  begin
184
193
  # Capture process id in file after optional daemonize
@@ -200,11 +209,12 @@ module RightScale
200
209
  EM.add_timer(1) do
201
210
  begin
202
211
  @registry = ActorRegistry.new
203
- @dispatcher = Dispatcher.new(self)
204
- @sender = Sender.new(self)
212
+ @dispatcher = create_dispatcher
213
+ @sender = create_sender
205
214
  load_actors
206
215
  setup_traps
207
216
  setup_queues
217
+ @history.update("run")
208
218
  start_console if @options[:console] && !@options[:daemonize]
209
219
 
210
220
  # Need to keep reconnect interval at least :connect_timeout in size,
@@ -216,29 +226,29 @@ module RightScale
216
226
  @check_status_brokers = @broker.all
217
227
  EM.next_tick { @options[:ready_callback].call } if @options[:ready_callback]
218
228
  @check_status_timer = EM::PeriodicTimer.new(interval) { check_status }
229
+ rescue SystemExit
230
+ raise
219
231
  rescue Exception => e
220
- Log.error("Agent failed startup", e, :trace) unless e.message == "exit"
221
- EM.stop
232
+ terminate("failed startup after connecting to a broker", e, &terminate_callback)
222
233
  end
223
234
  end
224
235
  elsif status == :failed
225
- Log.error("Agent failed to connect to any brokers during startup")
226
- EM.stop
236
+ terminate("failed to connect to any brokers during startup", &terminate_callback)
227
237
  elsif status == :timeout
228
- Log.error("Agent failed to connect to any brokers after #{@options[:connect_timeout]} seconds during startup")
229
- EM.stop
238
+ terminate("failed to connect to any brokers after #{@options[:connect_timeout]} seconds during startup",
239
+ &terminate_callback)
230
240
  else
231
- Log.error("Agent broker connect attempt failed unexpectedly with status #{status} during startup")
232
- EM.stop
241
+ terminate("broker connect attempt failed unexpectedly with status #{status} during startup",
242
+ &terminate_callback)
233
243
  end
234
244
  end
235
- rescue SystemExit => e
236
- raise e
245
+ rescue SystemExit
246
+ raise
237
247
  rescue PidFile::AlreadyRunning
248
+ EM.stop if EM.reactor_running?
238
249
  raise
239
250
  rescue Exception => e
240
- Log.error("Agent failed startup", e, :trace) unless e.message == "exit"
241
- raise e
251
+ terminate("failed startup", e, &terminate_callback)
242
252
  end
243
253
  true
244
254
  end
@@ -430,17 +440,21 @@ module RightScale
430
440
  end
431
441
 
432
442
  # Handle packet received
443
+ # Delegate packet acknowledgement to dispatcher/sender
444
+ # Ignore requests if in the process of terminating but continue to accept responses
433
445
  #
434
446
  # === Parameters
435
447
  # packet(Request|Push|Result):: Packet received
448
+ # header(AMQP::Frame::Header|nil):: Request header containing ack control
436
449
  #
437
450
  # === Return
438
451
  # true:: Always return true
439
- def receive(packet)
452
+ def receive(packet, header = nil)
440
453
  begin
441
454
  case packet
442
- when Push, Request then @dispatcher.dispatch(packet) unless @terminating
443
- when Result then @sender.handle_response(packet)
455
+ when Push, Request then @dispatcher.dispatch(packet, header) unless @terminating
456
+ when Result then @sender.handle_response(packet, header)
457
+ else header.ack if header
444
458
  end
445
459
  @sender.message_received
446
460
  rescue RightAMQP::HABrokerClient::NoConnectedBrokers => e
@@ -465,20 +479,29 @@ module RightScale
465
479
 
466
480
  # Gracefully terminate execution by allowing unfinished tasks to complete
467
481
  # Immediately terminate if called a second time
482
+ # Report reason for termination if it is abnormal
483
+ #
484
+ # === Parameters
485
+ # reason(String):: Reason for abnormal termination, if any
486
+ # exception(Exception|String):: Exception or other parenthetical error information, if any
468
487
  #
469
488
  # === Block
470
489
  # Optional block to be executed after termination is complete
471
490
  #
472
491
  # === Return
473
492
  # true:: Always return true
474
- def terminate(&block)
493
+ def terminate(reason = nil, exception = nil, &block)
494
+ block ||= DEFAULT_TERMINATE_BLOCK
475
495
  begin
476
- if @terminating
477
- Log.info("[stop] Terminating immediately")
496
+ @history.update("stop")
497
+ Log.error("[stop] Terminating because #{reason}", exception, :trace) if reason
498
+ if @terminating || @broker.nil?
499
+ @terminating = true
478
500
  @termination_timer.cancel if @termination_timer
479
501
  @termination_timer = nil
480
- block.call if block
481
- EM.stop if EM.reactor_running?
502
+ Log.info("[stop] Terminating immediately")
503
+ block.call
504
+ @history.update("graceful exit") if @broker.nil?
482
505
  else
483
506
  @terminating = true
484
507
  @check_status_timer.cancel if @check_status_timer
@@ -495,7 +518,7 @@ module RightScale
495
518
  request_count, request_age = @sender.terminate
496
519
  Log.info("[stop] The following #{request_count} requests initiated as recently as #{request_age} " +
497
520
  "seconds ago are being dropped:\n " + @sender.dump_requests.join("\n ")) if request_age
498
- @broker.close { block.call if block; EM.stop if EM.reactor_running? }
521
+ @broker.close { block.call }
499
522
  end
500
523
 
501
524
  wait_time = [timeout - (request_age || timeout), timeout - (dispatch_age || timeout), 0].max
@@ -513,19 +536,21 @@ module RightScale
513
536
  finish.call
514
537
  rescue Exception => e
515
538
  Log.error("Failed while finishing termination", e, :trace)
516
- EM.stop if EM.reactor_running?
539
+ begin block.call; rescue Exception; end
517
540
  end
518
541
  end
519
542
  end
520
543
  else
521
- block.call if block
522
- EM.stop if EM.reactor_running?
544
+ block.call
523
545
  end
546
+ @history.update("graceful exit")
524
547
  end
525
548
  end
549
+ rescue SystemExit
550
+ raise
526
551
  rescue Exception => e
527
552
  Log.error("Failed to terminate gracefully", e, :trace)
528
- EM.stop if EM.reactor_running?
553
+ begin block.call; rescue Exception; end
529
554
  end
530
555
  true
531
556
  end
@@ -553,7 +578,7 @@ module RightScale
553
578
  "send stats" => @sender.stats(reset),
554
579
  "last reset time" => @last_stat_reset_time.to_i,
555
580
  "stat time" => now.to_i,
556
- "service uptime" => (now - @service_start_time).to_i,
581
+ "service uptime" => @history.analyze_service,
557
582
  "machine uptime" => Platform.shell.uptime
558
583
  }
559
584
  stats["revision"] = @revision if @revision
@@ -654,6 +679,23 @@ module RightScale
654
679
  false
655
680
  end
656
681
 
682
+ # Create dispatcher for handling incoming requests
683
+ #
684
+ # === Return
685
+ # (Dispatcher):: New dispatcher
686
+ def create_dispatcher
687
+ cache = DispatchedCache.new(@identity) if @options[:dup_check]
688
+ Dispatcher.new(self, cache)
689
+ end
690
+
691
+ # Create manager for outgoing requests
692
+ #
693
+ # === Return
694
+ # (Sender):: New sender
695
+ def create_sender
696
+ Sender.new(self)
697
+ end
698
+
657
699
  # Load the ruby code for the actors
658
700
  #
659
701
  # === Return
@@ -724,7 +766,7 @@ module RightScale
724
766
  queue = {:name => @identity, :options => {:durable => true, :no_declare => @options[:secure]}}
725
767
  filter = [:from, :tags, :tries, :persistent]
726
768
  options = {:ack => true, Request => filter, Push => filter, Result => [:from], :brokers => ids}
727
- ids = @broker.subscribe(queue, nil, options) { |_, packet| receive(packet) }
769
+ ids = @broker.subscribe(queue, nil, options) { |_, packet, header| receive(packet, header) }
728
770
  end
729
771
 
730
772
  # Setup signal traps
@@ -737,7 +779,7 @@ module RightScale
737
779
  EM.next_tick do
738
780
  begin
739
781
  terminate do
740
- EM.stop if EM.reactor_running?
782
+ DEFAULT_TERMINATE_BLOCK.call
741
783
  old.call if old.is_a? Proc
742
784
  end
743
785
  rescue Exception => e
@@ -796,10 +838,28 @@ module RightScale
796
838
  true
797
839
  end
798
840
 
841
+ begin
842
+ check_other(@check_status_count)
843
+ rescue Exception => e
844
+ Log.error("Failed to perform other check status check", e)
845
+ @exceptions.track("check status", e)
846
+ end
847
+
799
848
  @check_status_count += 1
800
849
  true
801
850
  end
802
851
 
852
+ # Allow derived classes to perform any other useful periodic checks
853
+ #
854
+ # === Parameters
855
+ # check_status_count(Integer):: Counter that is incremented for each status check
856
+ #
857
+ # === Return
858
+ # true:: Always return true
859
+ def check_other(check_status_count)
860
+ true
861
+ end
862
+
803
863
  # Store unique tags
804
864
  #
805
865
  # === Parameters
@@ -20,13 +20,12 @@
20
20
  # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
21
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
 
23
- require 'singleton'
24
23
 
25
24
  module RightScale
26
25
 
27
26
  # Agent tags management
28
27
  class AgentTagManager
29
- include Singleton
28
+ include RightSupport::Ruby::EasySingleton
30
29
 
31
30
  # (Agent) Agent being managed
32
31
  attr_accessor :agent