right_agent 0.5.1 → 0.5.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. data/lib/right_agent.rb +3 -13
  2. data/lib/right_agent/actors/agent_manager.rb +78 -4
  3. data/lib/right_agent/agent.rb +81 -4
  4. data/lib/right_agent/agent_config.rb +17 -1
  5. data/lib/right_agent/agent_tags_manager.rb +2 -2
  6. data/lib/right_agent/broker_client.rb +32 -34
  7. data/lib/right_agent/command/agent_manager_commands.rb +16 -0
  8. data/lib/right_agent/command/command_constants.rb +0 -9
  9. data/lib/right_agent/dispatcher.rb +6 -3
  10. data/lib/right_agent/ha_broker_client.rb +63 -14
  11. data/lib/right_agent/log.rb +1 -1
  12. data/lib/right_agent/minimal.rb +43 -0
  13. data/lib/right_agent/monkey_patches/amqp_patch.rb +91 -182
  14. data/lib/right_agent/packets.rb +10 -5
  15. data/lib/right_agent/platform.rb +8 -0
  16. data/lib/right_agent/platform/darwin.rb +14 -0
  17. data/lib/right_agent/platform/linux.rb +23 -0
  18. data/lib/right_agent/platform/windows.rb +31 -0
  19. data/lib/right_agent/scripts/agent_controller.rb +16 -8
  20. data/lib/right_agent/scripts/agent_deployer.rb +6 -0
  21. data/lib/right_agent/scripts/log_level_manager.rb +4 -5
  22. data/lib/right_agent/scripts/stats_manager.rb +9 -1
  23. data/lib/right_agent/sender.rb +623 -371
  24. data/lib/right_agent/stats_helper.rb +15 -1
  25. data/lib/right_agent/tracer.rb +1 -1
  26. data/right_agent.gemspec +14 -15
  27. data/spec/agent_config_spec.rb +9 -0
  28. data/spec/agent_spec.rb +154 -18
  29. data/spec/broker_client_spec.rb +171 -170
  30. data/spec/dispatcher_spec.rb +24 -8
  31. data/spec/ha_broker_client_spec.rb +55 -33
  32. data/spec/monkey_patches/amqp_patch_spec.rb +12 -0
  33. data/spec/packets_spec.rb +2 -0
  34. data/spec/sender_spec.rb +140 -69
  35. data/spec/stats_helper_spec.rb +5 -0
  36. metadata +54 -53
@@ -380,7 +380,8 @@ module RightScale
380
380
  # Packet for a work request for an actor node that has no result, i.e., one-way request
381
381
  class Push < Packet
382
382
 
383
- attr_accessor :from, :scope, :payload, :type, :token, :selector, :target, :persistent, :expires_at, :tags
383
+ attr_accessor :from, :scope, :payload, :type, :token, :selector, :target, :persistent, :confirm,
384
+ :expires_at, :tags
384
385
 
385
386
  DEFAULT_OPTIONS = {:selector => :any}
386
387
 
@@ -397,6 +398,8 @@ module RightScale
397
398
  # :target(String):: Target recipient
398
399
  # :persistent(Boolean):: Indicates if this request should be saved to persistent storage
399
400
  # by the AMQP broker
401
+ # :confirm(Boolean):: Whether require confirmation response from mapper containing targets
402
+ # to which request was published but not necessarily delivered
400
403
  # :expires_at(Integer|nil):: Time in seconds in Unix-epoch when this request expires and
401
404
  # is to be ignored by the receiver; value 0 means never expire; defaults to 0
402
405
  # :tags(Array(Symbol)):: List of tags to be used for selecting target for this request
@@ -416,6 +419,7 @@ module RightScale
416
419
  @selector = :any if ["least_loaded", "random"].include?(@selector.to_s)
417
420
  @target = opts[:target]
418
421
  @persistent = opts[:persistent]
422
+ @confirm = opts[:confirm]
419
423
  @expires_at = opts[:expires_at] || 0
420
424
  @tags = opts[:tags] || []
421
425
  @version = version
@@ -448,10 +452,11 @@ module RightScale
448
452
  # (Push):: New packet
449
453
  def self.create(o)
450
454
  i = o['data']
451
- new(i['type'], i['payload'], { :from => self.compatible(i['from']), :scope => i['scope'],
452
- :token => i['token'], :selector => i['selector'],
453
- :target => self.compatible(i['target']), :persistent => i['persistent'],
454
- :tags => i['tags'], :expires_at => i['expires_at'] },
455
+ new(i['type'], i['payload'], { :from => self.compatible(i['from']), :scope => i['scope'],
456
+ :token => i['token'], :selector => i['selector'],
457
+ :target => self.compatible(i['target']), :persistent => i['persistent'],
458
+ :confirm => i['confirm'], :expires_at => i['expires_at'],
459
+ :tags => i['tags']},
455
460
  i['version'] || [DEFAULT_VERSION, DEFAULT_VERSION], o['size'])
456
461
  end
457
462
 
@@ -234,6 +234,14 @@ module RightScale
234
234
  platform_service(:rng)
235
235
  end
236
236
 
237
+ # Platform process facilities.
238
+ #
239
+ # === Return
240
+ # (Object):: Platform-specific process facilities object
241
+ def process
242
+ platform_service(:process)
243
+ end
244
+
237
245
  private
238
246
 
239
247
  # Load platform specific implementation
@@ -222,6 +222,20 @@ module RightScale
222
222
  end
223
223
  end
224
224
 
225
+ class Process
226
+ # queries resident set size (current working set size in Windows).
227
+ #
228
+ # === Parameters
229
+ # pid(Fixnum):: process ID or nil for current process
230
+ #
231
+ # === Return
232
+ # result(Fixnum):: current set size in KB
233
+ def resident_set_size(pid=nil)
234
+ pid = $$ unless pid
235
+ return `ps -o rss= -p #{pid}`.to_i
236
+ end
237
+ end
238
+
225
239
  end # Platform
226
240
 
227
241
  end # RightScale
@@ -75,6 +75,15 @@ module RightScale
75
75
  @flavor =~ /suse/
76
76
  end
77
77
 
78
+ # Is this machine running rhel?
79
+ #
80
+ # === Return
81
+ # true:: If Linux flavor is rhel
82
+ # false:: Otherwise
83
+ def rhel?
84
+ @flavor =~ /redhatenterpriseserver/
85
+ end
86
+
78
87
  class Filesystem
79
88
 
80
89
  # Is given command available in the PATH?
@@ -263,6 +272,20 @@ module RightScale
263
272
  end
264
273
  end
265
274
 
275
+ class Process
276
+ # queries resident set size (current working set size in Windows).
277
+ #
278
+ # === Parameters
279
+ # pid(Fixnum):: process ID or nil for current process
280
+ #
281
+ # === Return
282
+ # result(Fixnum):: current set size in KB
283
+ def resident_set_size(pid=nil)
284
+ pid = $$ unless pid
285
+ return `ps -o rss= -p #{pid}`.to_i
286
+ end
287
+ end
288
+
266
289
  end # Platform
267
290
 
268
291
  end # RightScale
@@ -1183,6 +1183,37 @@ EOF
1183
1183
  end
1184
1184
  end
1185
1185
 
1186
+ class Process
1187
+ include ::Windows::Process
1188
+
1189
+ @@get_process_memory_info = nil
1190
+
1191
+ # see PROCESS_MEMORY_COUNTERS structure: "http://msdn.microsoft.com/en-us/library/ms684877%28VS.85%29.aspx"
1192
+ SIZEOF_PROCESS_MEMORY_COUNTERS = 10 * 4
1193
+
1194
+ # queries resident set size (current working set size in Windows).
1195
+ #
1196
+ # === Parameters
1197
+ # pid(Fixnum):: process ID or nil for current process
1198
+ #
1199
+ # === Return
1200
+ # result(Fixnum):: current set size in KB
1201
+ def resident_set_size(pid=nil)
1202
+ @@get_process_memory_info = ::Win32::API.new("GetProcessMemoryInfo", 'LPL', 'B', 'psapi') unless @@get_process_memory_info
1203
+
1204
+ # FIX: call OpenProcess and ensure proper access and close if given PID.
1205
+ raise NotImplementedError.new("pid != nil not yet implemented") if pid
1206
+ process_handle = GetCurrentProcess()
1207
+ process_memory_counters = "\0" * SIZEOF_PROCESS_MEMORY_COUNTERS
1208
+ result = @@get_process_memory_info.call(process_handle, process_memory_counters, process_memory_counters.size)
1209
+ # note that the 'B' return type is a Fixnum (i.e. not TrueClass or FalseClass) of 'zero' on failure or 'non-zero' on success
1210
+ raise ::RightScale::Win32Error.new("Failed to get resident set size for process") if 0 == result
1211
+
1212
+ # current .WorkingSetSize (bytes) is equivalent of Linux' ps resident set size (KB)
1213
+ return process_memory_counters[12..16].unpack("L")[0] / 1024 # bytes to KB
1214
+ end
1215
+ end
1216
+
1186
1217
  protected
1187
1218
 
1188
1219
  # internal class for querying OS version, etc.
@@ -73,10 +73,9 @@
73
73
 
74
74
  require 'rubygems'
75
75
  require 'optparse'
76
- require 'fileutils'
77
- require File.expand_path(File.join(File.dirname(__FILE__), 'usage'))
78
- require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'right_agent'))
79
- require File.expand_path(File.join(File.dirname(__FILE__), 'common_parser'))
76
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'minimal'))
77
+ require File.normalize_path(File.join(File.dirname(__FILE__), 'usage'))
78
+ require File.normalize_path(File.join(File.dirname(__FILE__), 'common_parser'))
80
79
 
81
80
  module RightScale
82
81
 
@@ -115,6 +114,7 @@ module RightScale
115
114
  # === Return
116
115
  # true:: Always return true
117
116
  def control(options)
117
+
118
118
  # Initialize directory settings
119
119
  AgentConfig.cfg_dir = options[:cfg_dir]
120
120
  AgentConfig.pid_dir = options[:pid_dir]
@@ -169,7 +169,7 @@ module RightScale
169
169
  # === Return
170
170
  # options(Hash):: Parsed options
171
171
  def parse_args
172
- options = {}
172
+ options = {:thin_command_client => false}
173
173
 
174
174
  opts = OptionParser.new do |opts|
175
175
  parse_common(opts, options)
@@ -194,7 +194,7 @@ module RightScale
194
194
  options[:pid_file] = file
195
195
  options[:action] = 'kill'
196
196
  end
197
-
197
+
198
198
  opts.on("-K", "--killall") do
199
199
  options[:action] = 'killall'
200
200
  end
@@ -228,7 +228,7 @@ module RightScale
228
228
 
229
229
  opts.on("-f", "--foreground") do
230
230
  options[:daemonize] = false
231
- #Squelch Ruby VM warnings about various things
231
+ #Squelch Ruby VM warnings about various things
232
232
  $VERBOSE = nil
233
233
  end
234
234
 
@@ -249,6 +249,14 @@ module RightScale
249
249
  exit 0 if e.is_a?(SystemExit)
250
250
  fail(e.message, print_usage = true)
251
251
  end
252
+
253
+ # allow specific arguments to use a thin command client for faster
254
+ # execution (on Windows, etc.)
255
+ unless options[:thin_command_client]
256
+ # require full right_agent for any commands which do not specify thin
257
+ # command client.
258
+ require File.normalize_path(File.join(File.dirname(__FILE__), '..', '..', 'right_agent'))
259
+ end
252
260
  resolve_identity(options)
253
261
  options
254
262
  end
@@ -344,7 +352,7 @@ module RightScale
344
352
  end
345
353
  true
346
354
  end
347
-
355
+
348
356
  # Stop agent process
349
357
  #
350
358
  # === Parameters
@@ -34,6 +34,7 @@
34
34
  # --vhost, -v VHOST Set agent AMQP virtual host
35
35
  # --host, -h HOST Set AMQP broker host
36
36
  # --port, -P PORT Set AMQP broker port
37
+ # --heartbeat, -b SEC Set number of seconds between AMQP broker connection heartbeats, 0 means disable
37
38
  # --prefetch COUNT Set maximum requests AMQP broker is to prefetch before current is ack'd
38
39
  # --http-proxy PROXY Use a proxy for all agent-originated HTTP traffic
39
40
  # --http-no-proxy NOPROXY Comma-separated list of proxy exceptions (e.g. metadata server)
@@ -186,6 +187,10 @@ module RightScale
186
187
  options[:prefetch] = count.to_i
187
188
  end
188
189
 
190
+ opts.on('-b', '--heartbeat SEC') do |sec|
191
+ options[:heartbeat] = sec.to_i
192
+ end
193
+
189
194
  opts.on('-o', '--options OPT') do |e|
190
195
  fail("Invalid option definition #{e}' (use '=' to separate name and value)") unless e.include?('=')
191
196
  key, val = e.split(/=/)
@@ -282,6 +287,7 @@ module RightScale
282
287
  cfg[:port] = options[:port] if options[:port]
283
288
  cfg[:host] = options[:host] if options[:host]
284
289
  cfg[:prefetch] = options[:prefetch] || 1
290
+ cfg[:heartbeat] = options[:heartbeat] if options[:heartbeat]
285
291
  cfg[:time_to_live] = options[:time_to_live] || 60
286
292
  cfg[:retry_timeout] = options[:retry_timeout] || 2 * 60
287
293
  cfg[:retry_interval] = options[:retry_interval] || 15
@@ -94,8 +94,8 @@ module RightScale
94
94
  opts = OptionParser.new do |opts|
95
95
 
96
96
  opts.on('-l', '--log-level LEVEL') do |l|
97
- fail("Invalid log level '#{l}'") unless AgentManager::LEVELS.include?(l.to_sym)
98
- options[:level] = l
97
+ fail("Invalid log level '#{l}'") unless AgentManager::LEVELS.include?(l.downcase.to_sym)
98
+ options[:level] = l.downcase
99
99
  end
100
100
 
101
101
  opts.on("-c", "--cfg-dir DIR") do |d|
@@ -136,12 +136,11 @@ module RightScale
136
136
  def request_log_level(agent_name, command, options)
137
137
  res = false
138
138
  config_options = AgentConfig.agent_options(agent_name)
139
- unless config_options.empty?
140
- listen_port = config_options[:listen_port]
139
+ unless config_options.empty? || (listen_port = config_options[:listen_port]).nil?
141
140
  fail("Could not retrieve #{agent_name} agent listen port") unless listen_port
142
141
  client = CommandClient.new(listen_port, config_options[:cookie])
143
142
  begin
144
- client.send_command(command, options[:verbose]) do |level|
143
+ client.send_command(command, options[:verbose], timeout = 5) do |level|
145
144
  puts "Agent #{agent_name} log level: #{level.to_s.upcase}"
146
145
  end
147
146
  res = true
@@ -14,6 +14,9 @@
14
14
  # rstat AGENT --json
15
15
  # rstat AGENT --j
16
16
  #
17
+ # Log details of statistics retrieval
18
+ # rstat AGENT -v
19
+ #
17
20
  # === Usage:
18
21
  # rstat [AGENT] [options]
19
22
  #
@@ -21,6 +24,7 @@
21
24
  # --reset, -r As part of gathering the stats from an agent also reset the stats
22
25
  # --timeout, -t SEC Override default timeout in seconds to wait for a response from an agent
23
26
  # --json, -j Display the stats data in JSON format
27
+ # --verbose, -v Log debug information
24
28
  # --cfg-dir, -c DIR Set directory containing configuration for all agents
25
29
  # --help Display help
26
30
 
@@ -61,7 +65,7 @@ module RightScale
61
65
  # === Return
62
66
  # true:: Always return true
63
67
  def manage(options)
64
- init_log
68
+ init_log if options[:verbose]
65
69
  AgentConfig.cfg_dir = options[:cfg_dir]
66
70
  options[:timeout] ||= DEFAULT_TIMEOUT
67
71
  request_stats(options)
@@ -92,6 +96,10 @@ module RightScale
92
96
  options[:json] = true
93
97
  end
94
98
 
99
+ opts.on('-v', '--verbose') do
100
+ options[:verbose] = true
101
+ end
102
+
95
103
  opts.on("-c", "--cfg-dir DIR") do |d|
96
104
  options[:cfg_dir] = d
97
105
  end
@@ -30,40 +30,504 @@ module RightScale
30
30
 
31
31
  include StatsHelper
32
32
 
33
- # Minimum number of seconds between restarts of the inactivity timer
34
- MIN_RESTART_INACTIVITY_TIMER_INTERVAL = 60
33
+ # Request that is waiting for a response
34
+ class PendingRequest
35
35
 
36
- # Number of seconds to wait for ping response from a mapper when checking connectivity
37
- PING_TIMEOUT = 30
36
+ # (Symbol) Kind of send request
37
+ attr_reader :kind
38
38
 
39
- # Factor used on each retry iteration to achieve exponential backoff
40
- RETRY_BACKOFF_FACTOR = 4
39
+ # (Time) Time when request message was received
40
+ attr_reader :receive_time
41
+
42
+ # (Proc) Block to be activated when response is received
43
+ attr_reader :response_handler
44
+
45
+ # (String) Token for parent request in a retry situation
46
+ attr_accessor :retry_parent
47
+
48
+ def initialize(kind, receive_time, response_handler)
49
+ @kind = kind
50
+ @receive_time = receive_time
51
+ @response_handler = response_handler
52
+ @retry_parent = nil
53
+ end
54
+
55
+ end # PendingRequest
56
+
57
+ # Cache for requests that are waiting for a response
58
+ # Automatically deletes push requests when get too old
59
+ # Retains non-push requests until explicitly deleted
60
+ class PendingRequests < Hash
61
+
62
+ # Kinds of send requests
63
+ REQUEST_KINDS = [:send_retryable_request, :send_persistent_request]
64
+
65
+ # Kinds of send pushes
66
+ PUSH_KINDS = [:send_push, :send_persistent_push]
67
+
68
+ # Maximum number of seconds to retain send pushes in cache
69
+ MAX_PUSH_AGE = 2 * 60
70
+
71
+ # Minimum number of seconds between push cleanups
72
+ MIN_CLEANUP_INTERVAL = 15
73
+
74
+ # Create cache
75
+ def initialize
76
+ @last_cleanup = Time.now
77
+ super
78
+ end
79
+
80
+ # Store pending request
81
+ #
82
+ # === Parameters
83
+ # token(String):: Generated message identifier
84
+ # request(PendingRequest):: Pending request
85
+ #
86
+ # === Return
87
+ # (PendingRequest):: Stored request
88
+ def []=(token, request)
89
+ now = Time.now
90
+ if (now - @last_cleanup) > MIN_CLEANUP_INTERVAL
91
+ self.reject! { |t, r| PUSH_KINDS.include?(r.kind) && (now - r.receive_time) > MAX_PUSH_AGE }
92
+ @last_cleanup = now
93
+ end
94
+ super
95
+ end
96
+
97
+ # Select cache entries of the given kinds
98
+ #
99
+ # === Parameters
100
+ # kinds(Array):: Kind of requests to be included
101
+ #
102
+ # === Return
103
+ # (Hash):: Requests of specified kind
104
+ def kind(kinds)
105
+ self.reject { |t, r| !kinds.include?(r.kind) }
106
+ end
107
+
108
+ # Get age of youngest pending request
109
+ #
110
+ # === Return
111
+ # age(Integer):: Age of youngest request
112
+ def youngest_age
113
+ now = Time.now
114
+ age = nil
115
+ self.each_value do |r|
116
+ seconds = (now - r.receive_time).to_i
117
+ age = seconds if age.nil? || seconds < age
118
+ end
119
+ age
120
+ end
121
+
122
+ # Get age of oldest pending request
123
+ #
124
+ # === Return
125
+ # age(Integer):: Age of oldest request
126
+ def oldest_age
127
+ now = Time.now
128
+ age = nil
129
+ self.each_value do |r|
130
+ seconds = (now - r.receive_time).to_i
131
+ age = seconds if age.nil? || seconds > age
132
+ end
133
+ age
134
+ end
135
+
136
+ end # PendingRequests
137
+
138
+ # Queue for storing requests while disconnected from broker and then sending
139
+ # them when successfully reconnect
140
+ class OfflineHandler
141
+
142
+ # Maximum seconds to wait before starting flushing offline queue when disabling offline mode
143
+ MAX_QUEUE_FLUSH_DELAY = 2 * 60
144
+
145
+ # Maximum number of offline queued requests before triggering restart vote
146
+ MAX_QUEUED_REQUESTS = 1000
147
+
148
+ # Number of seconds that should be spent in offline mode before triggering a restart vote
149
+ RESTART_VOTE_DELAY = 15 * 60
150
+
151
+ # (Symbol) Current queue state with possible values:
152
+ # Value Description Action Next state
153
+ # :created Queue created init :initializing
154
+ # :initializing Agent still initializing start :running
155
+ # :running Queue has been started disable when offline :flushing
156
+ # :flushing Sending queued requests enable :running
157
+ # :terminating Agent terminating
158
+ attr_reader :state
159
+
160
+ # (Symbol) Current offline handling mode with possible values:
161
+ # Value Description
162
+ # :initializing Agent still initializing
163
+ # :online Agent connected to broker
164
+ # :offline Agent disconnected from broker
165
+ attr_reader :mode
166
+
167
+ # (Array) Offline queue
168
+ attr_accessor :queue
169
+
170
+ # Create offline queue
171
+ #
172
+ # === Parameters
173
+ # restart_callback(Proc):: Callback that is activated on each restart vote with votes being initiated
174
+ # by offline queue exceeding MAX_QUEUED_REQUESTS
175
+ # offline_stats(ActivityStats):: Offline queue tracking statistics
176
+ def initialize(restart_callback, offline_stats)
177
+ @restart_vote = restart_callback
178
+ @restart_vote_timer = nil
179
+ @restart_vote_count = 0
180
+ @offline_stats = offline_stats
181
+ @state = :created
182
+ @mode = :initializing
183
+ @queue = []
184
+ end
185
+
186
+ # Initialize the offline queue
187
+ # All requests sent prior to running this initialization are queued
188
+ # and then are sent once this initialization has run
189
+ # All requests following this call and prior to calling start
190
+ # are prepended to the request queue
191
+ #
192
+ # === Return
193
+ # true:: Always return true
194
+ def init
195
+ @state = :initializing if @state == :created
196
+ true
197
+ end
41
198
 
42
- # Maximum seconds to wait before starting flushing offline queue when disabling offline mode
43
- MAX_QUEUE_FLUSH_DELAY = 120 # 2 minutes
199
+ # Switch to online mode and send all buffered messages
200
+ #
201
+ # === Return
202
+ # true:: Always return true
203
+ def start
204
+ if @state == :initializing
205
+ @state = :running
206
+ flush unless @mode == :offline
207
+ @mode = :online if @mode == :initializing
208
+ end
209
+ true
210
+ end
211
+
212
+ # Is agent currently offline?
213
+ #
214
+ # === Return
215
+ # (Boolean):: true if agent offline, otherwise false
216
+ def offline?
217
+ @mode == :offline || @state == :created
218
+ end
219
+
220
+ # In request queueing mode?
221
+ #
222
+ # === Return
223
+ # (Boolean):: true if should queue request, otherwise false
224
+ def queueing?
225
+ offline? && @state != :flushing
226
+ end
227
+
228
+ # Switch to offline mode
229
+ # In this mode requests are queued in memory rather than sent to the mapper
230
+ # Idempotent
231
+ #
232
+ # === Return
233
+ # true:: Always return true
234
+ def enable
235
+ if offline?
236
+ if @state == :flushing
237
+ # If we were in offline mode then switched back to online but are still in the
238
+ # process of flushing the in-memory queue and are now switching to offline mode
239
+ # again then stop the flushing
240
+ @state = :running
241
+ end
242
+ else
243
+ Log.info("[offline] Disconnect from broker detected, entering offline mode")
244
+ Log.info("[offline] Messages will be queued in memory until connection to broker is re-established")
245
+ @offline_stats.update
246
+ @queue ||= [] # ensure queue is valid without losing any messages when going offline
247
+ @mode = :offline
248
+ start_timer
249
+ end
250
+ true
251
+ end
252
+
253
+ # Switch back to sending requests to mapper after in-memory queue gets flushed
254
+ # Idempotent
255
+ #
256
+ # === Return
257
+ # true:: Always return true
258
+ def disable
259
+ if offline? && @state != :created
260
+ Log.info("[offline] Connection to broker re-established")
261
+ @offline_stats.finish
262
+ cancel_timer
263
+ @state = :flushing
264
+ # Wait a bit to avoid flooding the mapper
265
+ EM.add_timer(rand(MAX_QUEUE_FLUSH_DELAY)) { flush }
266
+ end
267
+ true
268
+ end
269
+
270
+ # Queue given request in memory
271
+ #
272
+ # === Parameters
273
+ # request(Hash):: Request to be stored
274
+ #
275
+ # === Return
276
+ # true:: Always return true
277
+ def queue_request(kind, type, payload, target, callback)
278
+ request = {:kind => kind, :type => type, :payload => payload, :target => target, :callback => callback}
279
+ Log.info("[offline] Queuing request: #{request.inspect}")
280
+ vote_to_restart if (@restart_vote_count += 1) >= MAX_QUEUED_REQUESTS
281
+ if @state == :initializing
282
+ # We are in the initialization callback, requests should be put at the head of the queue
283
+ @queue.unshift(request)
284
+ else
285
+ @queue << request
286
+ end
287
+ true
288
+ end
289
+
290
+ # Prepare for agent termination
291
+ #
292
+ # === Return
293
+ # true:: Always return true
294
+ def terminate
295
+ @state = :terminating
296
+ cancel_timer
297
+ true
298
+ end
44
299
 
45
- # Maximum number of offline queued requests before triggering restart vote
46
- MAX_QUEUED_REQUESTS = 1000
300
+ protected
47
301
 
48
- # Number of seconds that should be spent in offline mode before triggering a restart vote
49
- RESTART_VOTE_DELAY = 900 # 15 minutes
302
+ # Send any requests that were queued while in offline mode
303
+ # Do this asynchronously to allow for agents to respond to requests
304
+ # Once all in-memory requests have been flushed, switch off offline mode
305
+ #
306
+ # === Return
307
+ # true:: Always return true
308
+ def flush
309
+ if @state == :flushing
310
+ Log.info("[offline] Starting to flush request queue of size #{@queue.size}") unless @mode == :initializing
311
+ unless @queue.empty?
312
+ r = @queue.shift
313
+ if r[:callback]
314
+ Sender.instance.__send__(r[:kind], r[:type], r[:payload], r[:target]) { |result| r[:callback].call(result) }
315
+ else
316
+ Sender.instance.__send__(r[:kind], r[:type], r[:payload], r[:target])
317
+ end
318
+ end
319
+ if @queue.empty?
320
+ Log.info("[offline] Request queue flushed, resuming normal operations") unless @mode == :initializing
321
+ @mode = :online
322
+ @state = :running
323
+ else
324
+ EM.next_tick { flush }
325
+ end
326
+ end
327
+ true
328
+ end
50
329
 
51
- # (EM::Timer) Timer while waiting for mapper ping response
52
- attr_accessor :pending_ping
53
-
54
- # (Hash) Pending requests; key is request token and value is a hash
55
- # :response_handler(Proc):: Block to be activated when response is received
56
- # :receive_time(Time):: Time when message was received
57
- # :request_kind(String):: Kind of Sender request, optional
58
- # :retry_parent(String):: Token for parent request in a retry situation, optional
330
+ # Vote for restart and reset trigger
331
+ #
332
+ # === Parameters
333
+ # timer_trigger(Boolean):: true if vote was triggered by timer, false if it
334
+ # was triggered by number of messages in in-memory queue
335
+ #
336
+ # === Return
337
+ # true:: Always return true
338
+ def vote_to_restart(timer_trigger = false)
339
+ if @restart_vote
340
+ @restart_vote.call
341
+ if timer_trigger
342
+ start_timer
343
+ else
344
+ @restart_vote_count = 0
345
+ end
346
+ end
347
+ true
348
+ end
349
+
350
+ # Start restart vote timer
351
+ #
352
+ # === Return
353
+ # true:: Always return true
354
+ def start_timer
355
+ if @restart_vote && @state != :terminating
356
+ @restart_vote_timer ||= EM::Timer.new(RESTART_VOTE_DELAY) { vote_to_restart(timer_trigger = true) }
357
+ end
358
+ true
359
+ end
360
+
361
+ # Cancel restart vote timer
362
+ #
363
+ # === Return
364
+ # true:: Always return true
365
+ def cancel_timer
366
+ if @restart_vote_timer
367
+ @restart_vote_timer.cancel
368
+ @restart_vote_timer = nil
369
+ @restart_vote_count = 0
370
+ end
371
+ true
372
+ end
373
+
374
+ end # OfflineHandler
375
+
376
+ # Broker connectivity checker
377
+ # Checks connectivity when requested
378
+ class ConnectivityChecker
379
+
380
+ # Minimum number of seconds between restarts of the inactivity timer
381
+ MIN_RESTART_INACTIVITY_TIMER_INTERVAL = 60
382
+
383
+ # Number of seconds to wait for ping response from a mapper when checking connectivity
384
+ PING_TIMEOUT = 30
385
+
386
+ # (EM::Timer) Timer while waiting for mapper ping response
387
+ attr_accessor :ping_timer
388
+
389
+ def initialize(sender, check_interval, ping_stats, exception_stats)
390
+ @sender = sender
391
+ @check_interval = check_interval
392
+ @ping_timer = nil
393
+ @ping_stats = ping_stats
394
+ @exception_stats = exception_stats
395
+ @last_received = Time.now
396
+ @message_received_callbacks = []
397
+ restart_inactivity_timer if @check_interval > 0
398
+ end
399
+
400
+ # Update the time this agent last received a request or response message
401
+ # and restart the inactivity timer thus deferring the next connectivity check
402
+ # Also forward this message receipt notification to any callbacks that have registered
403
+ #
404
+ # === Block
405
+ # Optional block without parameters that is activated when a message is received
406
+ #
407
+ # === Return
408
+ # true:: Always return true
409
+ def message_received(&callback)
410
+ if block_given?
411
+ @message_received_callbacks << callback
412
+ else
413
+ @message_received_callbacks.each { |c| c.call }
414
+ if @check_interval > 0
415
+ now = Time.now
416
+ if (now - @last_received) > MIN_RESTART_INACTIVITY_TIMER_INTERVAL
417
+ @last_received = now
418
+ restart_inactivity_timer
419
+ end
420
+ end
421
+ end
422
+ true
423
+ end
424
+
425
+ # Check whether broker connection is usable by pinging a mapper via that broker
426
+ # Attempt to reconnect if ping does not respond in PING_TIMEOUT seconds
427
+ # Ignore request if already checking a connection
428
+ # Only to be called from primary thread
429
+ #
430
+ # === Parameters
431
+ # id(String):: Identity of specific broker to use to send ping, defaults to any
432
+ # currently connected broker
433
+ #
434
+ # === Return
435
+ # true:: Always return true
436
+ def check(id = nil)
437
+ unless @terminating || @ping_timer || (id && !@sender.broker.connected?(id))
438
+ @ping_timer = EM::Timer.new(PING_TIMEOUT) do
439
+ begin
440
+ @ping_stats.update("timeout")
441
+ @ping_timer = nil
442
+ Log.warning("Mapper ping via broker #{id} timed out after #{PING_TIMEOUT} seconds, attempting to reconnect")
443
+ host, port, index, priority, _ = @sender.broker.identity_parts(id)
444
+ @sender.agent.connect(host, port, index, priority, force = true)
445
+ rescue Exception => e
446
+ Log.error("Failed to reconnect to broker #{id}", e, :trace)
447
+ @exception_stats.track("ping timeout", e)
448
+ end
449
+ end
450
+
451
+ handler = lambda do |_|
452
+ begin
453
+ if @ping_timer
454
+ @ping_stats.update("success")
455
+ @ping_timer.cancel
456
+ @ping_timer = nil
457
+ end
458
+ rescue Exception => e
459
+ Log.error("Failed to cancel mapper ping", e, :trace)
460
+ @exception_stats.track("cancel ping", e)
461
+ end
462
+ end
463
+ request = Request.new("/mapper/ping", nil, {:from => @sender.identity, :token => AgentIdentity.generate})
464
+ @sender.pending_requests[request.token] = PendingRequest.new(:send_persistent_request, Time.now, handler)
465
+ ids = [id] if id
466
+ id = @sender.publish(request, ids).first
467
+ end
468
+ true
469
+ end
470
+
471
+ # Prepare for agent termination
472
+ #
473
+ # === Return
474
+ # true:: Always return true
475
+ def terminate
476
+ @terminating = true
477
+ @check_interval = 0
478
+ if @ping_timer
479
+ @ping_timer.cancel
480
+ @ping_timer = nil
481
+ end
482
+ if @inactivity_timer
483
+ @inactivity_timer.cancel
484
+ @inactivity_timer = nil
485
+ end
486
+ true
487
+ end
488
+
489
+ protected
490
+
491
+ # Start timer that waits for inactive messaging period to end before checking connectivity
492
+ #
493
+ # === Return
494
+ # true:: Always return true
495
+ def restart_inactivity_timer
496
+ @inactivity_timer.cancel if @inactivity_timer
497
+ @inactivity_timer = EM::Timer.new(@check_interval) do
498
+ begin
499
+ check
500
+ rescue Exception => e
501
+ Log.error("Failed connectivity check", e, :trace)
502
+ @exception_stats.track("check connectivity", e)
503
+ end
504
+ end
505
+ true
506
+ end
507
+
508
+ end # ConnectivityChecker
509
+
510
+ # Factor used on each retry iteration to achieve exponential backoff
511
+ RETRY_BACKOFF_FACTOR = 4
512
+
513
+ # (PendingRequests) Requests waiting for a response
59
514
  attr_accessor :pending_requests
60
515
 
516
+ # (OfflineHandler) Handler for requests when disconnected from broker
517
+ attr_reader :offline_handler
518
+
519
+ # (ConnectivityChecker) Broker connection checker
520
+ attr_reader :connectivity_checker
521
+
61
522
  # (HABrokerClient) High availability AMQP broker client
62
523
  attr_accessor :broker
63
524
 
64
525
  # (String) Identity of the associated agent
65
526
  attr_reader :identity
66
527
 
528
+ # (Agent) Associated agent
529
+ attr_reader :agent
530
+
67
531
  # Accessor for use by actor
68
532
  #
69
533
  # === Return
@@ -82,6 +546,8 @@ module RightScale
82
546
  # agent(Agent):: Reference to agent
83
547
  # :offline_queueing(Boolean):: Whether to queue request if currently not connected to any brokers,
84
548
  # also requires agent invocation of initialize_offline_queue and start_offline_queue methods below
549
+ # :ping_interval(Integer):: Minimum number of seconds since last message receipt to ping the mapper
550
+ # to check connectivity, defaults to 0 meaning do not ping
85
551
  # :restart_callback(Proc):: Callback that is activated on each restart vote with votes being initiated
86
552
  # by offline queue exceeding MAX_QUEUED_REQUESTS or by repeated failures to access mapper when online
87
553
  # :retry_timeout(Numeric):: Maximum number of seconds to retry request before give up
@@ -98,52 +564,19 @@ module RightScale
98
564
  @broker = @agent.broker
99
565
  @secure = @options[:secure]
100
566
  @single_threaded = @options[:single_threaded]
101
- @queueing_mode = :initializing
102
- @queue_running = false
103
- @queue_initializing = false
104
- @queue = []
105
- @restart_vote_count = 0
106
567
  @retry_timeout = nil_if_zero(@options[:retry_timeout])
107
568
  @retry_interval = nil_if_zero(@options[:retry_interval])
108
- @ping_interval = @options[:ping_interval] || 0
109
569
 
110
570
  # Only to be accessed from primary thread
111
- @pending_requests = {}
112
- @pending_ping = nil
571
+ @pending_requests = PendingRequests.new
113
572
 
114
573
  reset_stats
115
- @last_received = 0
116
- @message_received_callbacks = []
117
- restart_inactivity_timer if @ping_interval > 0
574
+ @offline_handler = OfflineHandler.new(@options[:restart_callback], @offline_stats)
575
+ @connectivity_checker = ConnectivityChecker.new(self, @options[:ping_interval] || 0, @ping_stats, @exception_stats)
118
576
  @@instance = self
119
577
  end
120
578
 
121
- # Update the time this agent last received a request or response message
122
- # and restart the inactivity timer thus deferring the next connectivity check
123
- # Also forward this message receipt notification to any callbacks that have registered
124
- #
125
- # === Block
126
- # Optional block without parameters that is activated when a message is received
127
- #
128
- # === Return
129
- # true:: Always return true
130
- def message_received(&callback)
131
- if block_given?
132
- @message_received_callbacks << callback
133
- else
134
- @message_received_callbacks.each { |c| c.call }
135
- if @ping_interval > 0
136
- now = Time.now.to_i
137
- if (now - @last_received) > MIN_RESTART_INACTIVITY_TIMER_INTERVAL
138
- @last_received = now
139
- restart_inactivity_timer
140
- end
141
- end
142
- end
143
- true
144
- end
145
-
146
- # Initialize the offline queue (should be called once)
579
+ # Initialize the offline queue
147
580
  # All requests sent prior to running this initialization are queued if offline
148
581
  # queueing is enabled and then are sent once this initialization has run
149
582
  # All requests following this call and prior to calling start_offline_queue
@@ -152,11 +585,7 @@ module RightScale
152
585
  # === Return
153
586
  # true:: Always return true
154
587
  def initialize_offline_queue
155
- unless @queue_running || !@options[:offline_queueing]
156
- @queue_running = true
157
- @queue_initializing = true
158
- end
159
- true
588
+ @offline_handler.init if @options[:offline_queueing]
160
589
  end
161
590
 
162
591
  # Switch offline queueing to online mode and flush all buffered messages
@@ -164,12 +593,38 @@ module RightScale
164
593
  # === Return
165
594
  # true:: Always return true
166
595
  def start_offline_queue
167
- if @queue_initializing
168
- @queue_initializing = false
169
- flush_queue unless @queueing_mode == :offline
170
- @queueing_mode = :online if @queueing_mode == :initializing
171
- end
172
- true
596
+ @offline_handler.start if @options[:offline_queueing]
597
+ end
598
+
599
+ # Switch to offline mode
600
+ # In this mode requests are queued in memory rather than sent to the mapper
601
+ # Idempotent
602
+ #
603
+ # === Return
604
+ # true:: Always return true
605
+ def enable_offline_mode
606
+ @offline_handler.enable if @options[:offline_queueing]
607
+ end
608
+
609
+ # Switch back to sending requests to mapper after in memory queue gets flushed
610
+ # Idempotent
611
+ #
612
+ # === Return
613
+ # true:: Always return true
614
+ def disable_offline_mode
615
+ @offline_handler.disable if @options[:offline_queueing]
616
+ end
617
+
618
+ # Update the time this agent last received a request or response message
619
+ # Also forward this message receipt notification to any callbacks that have registered
620
+ #
621
+ # === Block
622
+ # Optional block without parameters that is activated when a message is received
623
+ #
624
+ # === Return
625
+ # true:: Always return true
626
+ def message_received(&callback)
627
+ @connectivity_checker.message_received(&callback)
173
628
  end
174
629
 
175
630
  # Send a request to a single target or multiple targets with no response expected other
@@ -194,9 +649,11 @@ module RightScale
194
649
  # defaults to :any
195
650
  #
196
651
  # === Block
197
- # Optional block used to process routing response failures asynchronously with the following parameter:
198
- # result(Result):: Response with an OperationResult of RETRY, NON_DELIVERY, or ERROR,
199
- # use RightScale::OperationResult.from_results to decode
652
+ # Optional block used to process routing responses asynchronously with the following parameter:
653
+ # result(Result):: Response with an OperationResult of SUCCESS, RETRY, NON_DELIVERY, or ERROR,
654
+ # with an initial SUCCESS response containing the targets to which the mapper published the
655
+ # request and any additional responses indicating any failures to actually route the request
656
+ # to those targets, use RightScale::OperationResult.from_results to decode
200
657
  #
201
658
  # === Return
202
659
  # true:: Always return true
@@ -227,9 +684,11 @@ module RightScale
227
684
  # defaults to :any
228
685
  #
229
686
  # === Block
230
- # Optional block used to process routing response failures asynchronously with the following parameter:
231
- # result(Result):: Response with an OperationResult of RETRY, NON_DELIVERY, or ERROR,
232
- # use RightScale::OperationResult.from_results to decode
687
+ # Optional block used to process routing responses asynchronously with the following parameter:
688
+ # result(Result):: Response with an OperationResult of SUCCESS, RETRY, NON_DELIVERY, or ERROR,
689
+ # with an initial SUCCESS response containing the targets to which the mapper published the
690
+ # request and any additional responses indicating any failures to actually route the request
691
+ # to those targets, use RightScale::OperationResult.from_results to decode
233
692
  #
234
693
  # === Return
235
694
  # true:: Always return true
@@ -316,17 +775,17 @@ module RightScale
316
775
  if response.is_a?(Result)
317
776
  if result = OperationResult.from_results(response)
318
777
  if result.non_delivery?
319
- @non_deliveries.update(result.content.nil? ? "nil" : result.content.inspect)
778
+ @non_delivery_stats.update(result.content.nil? ? "nil" : result.content.inspect)
320
779
  elsif result.error?
321
- @result_errors.update(result.content.nil? ? "nil" : result.content.inspect)
780
+ @result_error_stats.update(result.content.nil? ? "nil" : result.content.inspect)
322
781
  end
323
- @results.update(result.status)
782
+ @result_stats.update(result.status)
324
783
  else
325
- @results.update(response.results.nil? ? "nil" : response.results)
784
+ @result_stats.update(response.results.nil? ? "nil" : response.results)
326
785
  end
327
786
 
328
787
  if handler = @pending_requests[token]
329
- if result && result.non_delivery? && handler[:request_kind] == :send_retryable_request &&
788
+ if result && result.non_delivery? && handler.kind == :send_retryable_request &&
330
789
  [OperationResult::TARGET_NOT_CONNECTED, OperationResult::TTL_EXPIRATION].include?(result.content)
331
790
  # Log and ignore so that timeout retry mechanism continues
332
791
  # Leave purging of associated request until final response, i.e., success response or retry timeout
@@ -343,95 +802,52 @@ module RightScale
343
802
  true
344
803
  end
345
804
 
346
- # Switch to offline mode, in this mode requests are queued in memory
347
- # rather than sent to the mapper
348
- # Idempotent
805
+ # Publish request
806
+ # Use mandatory flag to request return of message if it cannot be delivered
349
807
  #
350
- # === Return
351
- # true:: Always return true
352
- def enable_offline_mode
353
- if offline?
354
- if @flushing_queue
355
- # If we were in offline mode then switched back to online but are still in the
356
- # process of flushing the in memory queue and are now switching to offline mode
357
- # again then stop the flushing
358
- @stop_flushing_queue = true
359
- end
360
- else
361
- Log.info("[offline] Disconnect from broker detected, entering offline mode")
362
- Log.info("[offline] Messages will be queued in memory until connection to broker is re-established")
363
- @offlines.update
364
- @queue ||= [] # ensure queue is valid without losing any messages when going offline
365
- @queueing_mode = :offline
366
- @restart_vote_timer ||= EM::Timer.new(RESTART_VOTE_DELAY) { vote_to_restart(timer_trigger=true) }
367
- end
368
- end
369
-
370
- # Switch back to sending requests to mapper after in memory queue gets flushed
371
- # Idempotent
808
+ # === Parameters
809
+ # request(Push|Request):: Packet to be sent
810
+ # ids(Array|nil):: Identity of specific brokers to choose from, or nil if any okay
372
811
  #
373
812
  # === Return
374
- # true:: Always return true
375
- def disable_offline_mode
376
- if offline? && @queue_running
377
- Log.info("[offline] Connection to broker re-established")
378
- @offlines.finish
379
- @restart_vote_timer.cancel if @restart_vote_timer
380
- @restart_vote_timer = nil
381
- @stop_flushing_queue = false
382
- @flushing_queue = true
383
- # Let's wait a bit not to flood the mapper
384
- EM.add_timer(rand(MAX_QUEUE_FLUSH_DELAY)) { flush_queue } if @queue_running
813
+ # ids(Array):: Identity of brokers published to
814
+ def publish(request, ids = nil)
815
+ begin
816
+ exchange = {:type => :fanout, :name => "request", :options => {:durable => true, :no_declare => @secure}}
817
+ ids = @broker.publish(exchange, request, :persistent => request.persistent, :mandatory => true,
818
+ :log_filter => [:tags, :target, :tries, :persistent], :brokers => ids)
819
+ rescue HABrokerClient::NoConnectedBrokers => e
820
+ Log.error("Failed to publish request #{request.to_s([:tags, :target, :tries])}", e)
821
+ ids = []
822
+ rescue Exception => e
823
+ Log.error("Failed to publish request #{request.to_s([:tags, :target, :tries])}", e, :trace)
824
+ @exception_stats.track("publish", e, request)
825
+ ids = []
385
826
  end
386
- true
387
- end
388
-
389
- # Get age of youngest pending request
390
- #
391
- # === Return
392
- # age(Integer|nil):: Age in seconds of youngest request, or nil if no pending requests
393
- def request_age
394
- time = Time.now
395
- age = nil
396
- @pending_requests.each_value do |request|
397
- seconds = time - request[:receive_time]
398
- age = seconds.to_i if age.nil? || seconds < age
399
- end
400
- age
827
+ ids
401
828
  end
402
829
 
403
830
  # Take any actions necessary to quiesce mapper interaction in preparation
404
831
  # for agent termination but allow message receipt to continue
405
832
  #
406
833
  # === Return
407
- # (Array):: Number of pending requests and age of youngest request
834
+ # (Array):: Number of pending non-push requests and age of youngest request
408
835
  def terminate
409
- @terminating = true
410
- @ping_interval = 0
411
- if @pending_ping
412
- @pending_ping.cancel
413
- @pending_ping = nil
414
- end
415
- if @timer
416
- @timer.cancel
417
- @timer = nil
418
- end
419
- if @restart_vote_timer
420
- @restart_vote_timer.cancel
421
- @restart_vote_timer = nil
422
- end
423
- [@pending_requests.size, request_age]
836
+ @offline_handler.terminate
837
+ @connectivity_checker.terminate
838
+ pending = @pending_requests.kind(PendingRequests::REQUEST_KINDS)
839
+ [pending.size, pending.youngest_age]
424
840
  end
425
841
 
426
- # Create displayable dump of unfinished request information
842
+ # Create displayable dump of unfinished non-push request information
427
843
  # Truncate list if there are more than 50 requests
428
844
  #
429
845
  # === Return
430
846
  # info(Array(String)):: Receive time and token for each request in descending time order
431
847
  def dump_requests
432
848
  info = []
433
- @pending_requests.each do |token, request|
434
- info << "#{request[:receive_time].localtime} <#{token}>"
849
+ @pending_requests.kind(PendingRequests::REQUEST_KINDS).each do |token, request|
850
+ info << "#{request.receive_time.localtime} <#{token}>"
435
851
  end
436
852
  info.sort.reverse
437
853
  info = info[0..49] + ["..."] if info.size > 50
@@ -458,7 +874,8 @@ module RightScale
458
874
  # with percentage breakdown per kind, or nil if none
459
875
  # "requests"(Hash|nil):: Request activity stats with keys "total", "percent", "last", and "rate"
460
876
  # with percentage breakdown per request type, or nil if none
461
- # "requests pending"(Hash|nil):: Number of requests waiting for response and age of oldest, or nil if none
877
+ # "requests pending"(Hash|nil):: Number of requests waiting for response and age of oldest,
878
+ # or nil if none
462
879
  # "response time"(Float):: Average number of seconds to respond to a request recently
463
880
  # "result errors"(Hash|nil):: Error result activity stats with keys "total", "percent", "last",
464
881
  # and 'rate' with percentage breakdown per error, or nil if none
@@ -467,25 +884,28 @@ module RightScale
467
884
  # "retries"(Hash|nil):: Retry activity stats with keys "total", "percent", "last", and "rate"
468
885
  # with percentage breakdown per request type, or nil if none
469
886
  def stats(reset = false)
470
- offlines = @offlines.all
471
- offlines.merge!("duration" => @offlines.avg_duration) if offlines
472
- requests_pending = if @pending_requests.size > 0
473
- now = Time.now.to_i
474
- oldest = @pending_requests.values.inject(0) { |m, r| [m, now - r[:receive_time].to_i].max }
475
- {"total" => @pending_requests.size, "oldest age" => oldest}
887
+ offlines = @offline_stats.all
888
+ offlines.merge!("duration" => @offline_stats.avg_duration) if offlines
889
+ if @pending_requests.size > 0
890
+ pending = {}
891
+ pending["pushes"] = @pending_requests.kind(PendingRequests::PUSH_KINDS).size
892
+ requests = @pending_requests.kind(PendingRequests::REQUEST_KINDS)
893
+ if (pending["requests"] = requests.size) > 0
894
+ pending["oldest age"] = requests.oldest_age
895
+ end
476
896
  end
477
897
  stats = {
478
- "exceptions" => @exceptions.stats,
479
- "non-deliveries" => @non_deliveries.all,
898
+ "exceptions" => @exception_stats.stats,
899
+ "non-deliveries" => @non_delivery_stats.all,
480
900
  "offlines" => offlines,
481
- "pings" => @pings.all,
901
+ "pings" => @ping_stats.all,
482
902
  "request kinds" => @request_kinds.all,
483
- "requests" => @requests.all,
484
- "requests pending" => requests_pending,
485
- "response time" => @requests.avg_duration,
486
- "result errors" => @result_errors.all,
487
- "results" => @results.all,
488
- "retries" => @retries.all
903
+ "requests" => @request_stats.all,
904
+ "requests pending" => pending,
905
+ "response time" => @request_stats.avg_duration,
906
+ "result errors" => @result_error_stats.all,
907
+ "results" => @result_stats.all,
908
+ "retries" => @retry_stats.all
489
909
  }
490
910
  reset_stats if reset
491
911
  stats
@@ -498,15 +918,15 @@ module RightScale
498
918
  # === Return
499
919
  # true:: Always return true
500
920
  def reset_stats
501
- @pings = ActivityStats.new
502
- @retries = ActivityStats.new
503
- @requests = ActivityStats.new
504
- @results = ActivityStats.new
505
- @result_errors = ActivityStats.new
506
- @non_deliveries = ActivityStats.new
507
- @offlines = ActivityStats.new(measure_rate = false)
921
+ @ping_stats = ActivityStats.new
922
+ @retry_stats = ActivityStats.new
923
+ @request_stats = ActivityStats.new
924
+ @result_stats = ActivityStats.new
925
+ @result_error_stats = ActivityStats.new
926
+ @non_delivery_stats = ActivityStats.new
927
+ @offline_stats = ActivityStats.new(measure_rate = false)
508
928
  @request_kinds = ActivityStats.new(measure_rate = false)
509
- @exceptions = ExceptionStats.new(@agent, @options[:exception_callback])
929
+ @exception_stats = ExceptionStats.new(@agent, @options[:exception_callback])
510
930
  true
511
931
  end
512
932
 
@@ -528,9 +948,11 @@ module RightScale
528
948
  # defaults to :any
529
949
  #
530
950
  # === Block
531
- # Optional block used to process routing response failures asynchronously with the following parameter:
532
- # result(Result):: Response with an OperationResult of RETRY, NON_DELIVERY, or ERROR,
533
- # use RightScale::OperationResult.from_results to decode
951
+ # Optional block used to process routing responses asynchronously with the following parameter:
952
+ # result(Result):: Response with an OperationResult of SUCCESS, RETRY, NON_DELIVERY, or ERROR,
953
+ # with an initial SUCCESS response containing the targets to which the mapper published the
954
+ # request and any additional responses indicating any failures to actually route the request
955
+ # to those targets, use RightScale::OperationResult.from_results to decode
534
956
  #
535
957
  # === Return
536
958
  # true:: Always return true
@@ -540,10 +962,10 @@ module RightScale
540
962
  def build_push(kind, type, payload = nil, target = nil, &callback)
541
963
  validate_target(target, allow_selector = true)
542
964
  if should_queue?
543
- queue_request(:kind => kind, :type => type, :payload => payload, :target => target, :callback => callback)
965
+ @offline_handler.queue_request(kind, type, payload, target, callback)
544
966
  else
545
967
  method = type.split('/').last
546
- received_at = @requests.update(method)
968
+ received_at = @request_stats.update(method)
547
969
  push = Push.new(type, payload)
548
970
  push.from = @identity
549
971
  push.token = AgentIdentity.generate
@@ -556,11 +978,10 @@ module RightScale
556
978
  end
557
979
  push.persistent = kind == :send_persistent_push
558
980
  @request_kinds.update((push.selector == :all ? kind.to_s.sub(/push/, "fanout") : kind.to_s)[5..-1])
559
- @pending_requests[push.token] = {
560
- :response_handler => callback,
561
- :receive_time => received_at,
562
- :request_kind => kind
563
- } if callback
981
+ if callback
982
+ push.confirm = true
983
+ @pending_requests[push.token] = PendingRequest.new(kind, received_at, callback)
984
+ end
564
985
  publish(push)
565
986
  end
566
987
  true
@@ -594,12 +1015,12 @@ module RightScale
594
1015
  def build_request(kind, type, payload, target, &callback)
595
1016
  validate_target(target, allow_selector = false)
596
1017
  if should_queue?
597
- queue_request(:kind => kind, :type => type, :payload => payload, :target => target, :callback => callback)
1018
+ @offline_handler.queue_request(kind, type, payload, target, callback)
598
1019
  else
599
1020
  method = type.split('/').last
600
1021
  token = AgentIdentity.generate
601
1022
  non_duplicate = kind == :send_persistent_request
602
- received_at = @requests.update(method, token)
1023
+ received_at = @request_stats.update(method, token)
603
1024
  @request_kinds.update(kind.to_s[5..-1])
604
1025
 
605
1026
  # Using next_tick to ensure on primary thread since using @pending_requests
@@ -617,10 +1038,7 @@ module RightScale
617
1038
  end
618
1039
  request.expires_at = Time.now.to_i + @options[:time_to_live] if !non_duplicate && @options[:time_to_live] && @options[:time_to_live] != 0
619
1040
  request.persistent = non_duplicate
620
- @pending_requests[token] = {
621
- :response_handler => callback,
622
- :receive_time => received_at,
623
- :request_kind => kind}
1041
+ @pending_requests[token] = PendingRequest.new(kind, received_at, callback)
624
1042
  if non_duplicate
625
1043
  publish(request)
626
1044
  else
@@ -628,7 +1046,7 @@ module RightScale
628
1046
  end
629
1047
  rescue Exception => e
630
1048
  Log.error("Failed to send #{type} #{kind.to_s}", e, :trace)
631
- @exceptions.track(kind.to_s, e, request)
1049
+ @exception_stats.track(kind.to_s, e, request)
632
1050
  end
633
1051
  end
634
1052
  end
@@ -695,7 +1113,7 @@ module RightScale
695
1113
  ids = publish(request)
696
1114
 
697
1115
  if @retry_interval && @retry_timeout && parent && !ids.empty?
698
- interval = [(@retry_interval * multiplier) + (@requests.avg_duration || 0), @retry_timeout - elapsed].min
1116
+ interval = [(@retry_interval * multiplier) + (@request_stats.avg_duration || 0), @retry_timeout - elapsed].min
699
1117
  EM.add_timer(interval) do
700
1118
  begin
701
1119
  if handler = @pending_requests[parent]
@@ -704,52 +1122,27 @@ module RightScale
704
1122
  if elapsed < @retry_timeout
705
1123
  request.tries << request.token
706
1124
  request.token = AgentIdentity.generate
707
- @pending_requests[parent][:retry_parent] = parent if count == 1
1125
+ @pending_requests[parent].retry_parent = parent if count == 1
708
1126
  @pending_requests[request.token] = @pending_requests[parent]
709
1127
  publish_with_timeout_retry(request, parent, count, multiplier * RETRY_BACKOFF_FACTOR, elapsed)
710
- @retries.update(request.type.split('/').last)
1128
+ @retry_stats.update(request.type.split('/').last)
711
1129
  else
712
1130
  Log.warning("RE-SEND TIMEOUT after #{elapsed.to_i} seconds for #{request.to_s([:tags, :target, :tries])}")
713
1131
  result = OperationResult.non_delivery(OperationResult::RETRY_TIMEOUT)
714
- @non_deliveries.update(result.content)
1132
+ @non_delivery_stats.update(result.content)
715
1133
  handle_response(Result.new(request.token, request.reply_to, result, @identity))
716
1134
  end
717
- check_connection(ids.first) if count == 1
1135
+ @connectivity_checker.check(ids.first) if count == 1
718
1136
  end
719
1137
  rescue Exception => e
720
1138
  Log.error("Failed retry for #{request.token}", e, :trace)
721
- @exceptions.track("retry", e, request)
1139
+ @exception_stats.track("retry", e, request)
722
1140
  end
723
1141
  end
724
1142
  end
725
1143
  true
726
1144
  end
727
1145
 
728
- # Publish request
729
- # Use mandatory flag to request return of message if it cannot be delivered
730
- #
731
- # === Parameters
732
- # request(Push|Request):: Packet to be sent
733
- # ids(Array|nil):: Identity of specific brokers to choose from, or nil if any okay
734
- #
735
- # === Return
736
- # ids(Array):: Identity of brokers published to
737
- def publish(request, ids = nil)
738
- begin
739
- exchange = {:type => :fanout, :name => "request", :options => {:durable => true, :no_declare => @secure}}
740
- ids = @broker.publish(exchange, request, :persistent => request.persistent, :mandatory => true,
741
- :log_filter => [:tags, :target, :tries, :persistent], :brokers => ids)
742
- rescue HABrokerClient::NoConnectedBrokers => e
743
- Log.error("Failed to publish request #{request.to_s([:tags, :target, :tries])}", e)
744
- ids = []
745
- rescue Exception => e
746
- Log.error("Failed to publish request #{request.to_s([:tags, :target, :tries])}", e, :trace)
747
- @exceptions.track("publish", e, request)
748
- ids = []
749
- end
750
- ids
751
- end
752
-
753
1146
  # Deliver the response and remove associated request(s) from pending
754
1147
  # Use defer thread instead of primary if not single threaded, consistent with dispatcher,
755
1148
  # so that all shared data is accessed from the same thread
@@ -763,113 +1156,22 @@ module RightScale
763
1156
  # === Return
764
1157
  # true:: Always return true
765
1158
  def deliver(response, handler)
766
- @requests.finish(handler[:receive_time], response.token)
1159
+ @request_stats.finish(handler.receive_time, response.token)
767
1160
 
768
- @pending_requests.delete(response.token)
769
- if parent = handler[:retry_parent]
770
- @pending_requests.reject! { |k, v| k == parent || v[:retry_parent] == parent }
1161
+ @pending_requests.delete(response.token) if PendingRequests::REQUEST_KINDS.include?(handler.kind)
1162
+ if parent = handler.retry_parent
1163
+ @pending_requests.reject! { |k, v| k == parent || v.retry_parent == parent }
771
1164
  end
772
1165
 
773
- if handler[:response_handler]
1166
+ if handler.response_handler
774
1167
  EM.__send__(@single_threaded ? :next_tick : :defer) do
775
1168
  begin
776
- handler[:response_handler].call(response)
777
- rescue Exception => e
778
- Log.error("Failed processing response {response.to_s([])}", e, :trace)
779
- @exceptions.track("response", e, response)
780
- end
781
- end
782
- end
783
- true
784
- end
785
-
786
- # Check whether broker connection is usable by pinging a mapper via that broker
787
- # Attempt to reconnect if ping does not respond in PING_TIMEOUT seconds
788
- # Ignore request if already checking a connection
789
- # Only to be called from primary thread
790
- #
791
- # === Parameters
792
- # id(String):: Identity of specific broker to use to send ping, defaults to any
793
- # currently connected broker
794
- #
795
- # === Return
796
- # true:: Always return true
797
- def check_connection(id = nil)
798
- unless @terminating || @pending_ping || (id && !@broker.connected?(id))
799
- @pending_ping = EM::Timer.new(PING_TIMEOUT) do
800
- begin
801
- @pings.update("timeout")
802
- @pending_ping = nil
803
- Log.warning("Mapper ping via broker #{id} timed out after #{PING_TIMEOUT} seconds, attempting to reconnect")
804
- host, port, index, priority, _ = @broker.identity_parts(id)
805
- @agent.connect(host, port, index, priority, force = true)
806
- rescue Exception => e
807
- Log.error("Failed to reconnect to broker #{id}", e, :trace)
808
- @exceptions.track("ping timeout", e)
809
- end
810
- end
811
-
812
- handler = lambda do |_|
813
- begin
814
- if @pending_ping
815
- @pings.update("success")
816
- @pending_ping.cancel
817
- @pending_ping = nil
818
- end
1169
+ handler.response_handler.call(response)
819
1170
  rescue Exception => e
820
- Log.error("Failed to cancel mapper ping", e, :trace)
821
- @exceptions.track("cancel ping", e)
1171
+ Log.error("Failed processing response #{response.to_s([])}", e, :trace)
1172
+ @exception_stats.track("response", e, response)
822
1173
  end
823
1174
  end
824
-
825
- request = Request.new("/mapper/ping", nil, {:from => @identity, :token => AgentIdentity.generate})
826
- @pending_requests[request.token] = {:response_handler => handler, :receive_time => Time.now}
827
- ids = [id] if id
828
- id = publish(request, ids).first
829
- end
830
- true
831
- end
832
-
833
- # Vote for restart and reset trigger
834
- #
835
- # === Parameters
836
- # timer_trigger(Boolean):: true if vote was triggered by timer, false if it
837
- # was triggered by number of messages in in-memory queue
838
- #
839
- # === Return
840
- # true:: Always return true
841
- def vote_to_restart(timer_trigger)
842
- if restart_vote = @options[:restart_callback]
843
- restart_vote.call
844
- if timer_trigger
845
- @restart_vote_timer = EM::Timer.new(RESTART_VOTE_DELAY) { vote_to_restart(timer_trigger = true) }
846
- else
847
- @restart_vote_count = 0
848
- end
849
- end
850
- true
851
- end
852
-
853
- # Is agent currently offline?
854
- #
855
- # === Return
856
- # offline(Boolean):: true if agent is disconnected or not initialized
857
- def offline?
858
- offline = @queueing_mode == :offline || !@queue_running
859
- end
860
-
861
- # Start timer that waits for inactive messaging period to end before checking connectivity
862
- #
863
- # === Return
864
- # true:: Always return true
865
- def restart_inactivity_timer
866
- @timer.cancel if @timer
867
- @timer = EM::Timer.new(@ping_interval) do
868
- begin
869
- check_connection
870
- rescue Exception => e
871
- Log.error("Failed connectivity check", e, :trace)
872
- end
873
1175
  end
874
1176
  true
875
1177
  end
@@ -879,57 +1181,7 @@ module RightScale
879
1181
  # === Return
880
1182
  # (Boolean):: true if should queue request, otherwise false
881
1183
  def should_queue?
882
- @options[:offline_queueing] && offline? && !@flushing_queue
883
- end
884
-
885
- # Queue given request in memory
886
- #
887
- # === Parameters
888
- # request(Hash):: Request to be stored
889
- #
890
- # === Return
891
- # true:: Always return true
892
- def queue_request(request)
893
- Log.info("[offline] Queuing request: #{request.inspect}")
894
- @restart_vote_count += 1 if @queue_running
895
- vote_to_restart(timer_trigger = false) if @restart_vote_count >= MAX_QUEUED_REQUESTS
896
- if @queue_initializing
897
- # We are in the initialization callback, requests should be put at the head of the queue
898
- @queue.unshift(request)
899
- else
900
- @queue << request
901
- end
902
- true
903
- end
904
-
905
- # Flush in memory queue of requests that were stored while in offline mode
906
- # Do this asynchronously to allow for agents to respond to requests
907
- # Once all in-memory requests have been flushed, switch off offline mode
908
- #
909
- # === Return
910
- # true:: Always return true
911
- def flush_queue
912
- if @stop_flushing_queue
913
- @stop_flushing_queue = false
914
- @flushing_queue = false
915
- else
916
- Log.info("[offline] Starting to flush request queue of size #{@queue.size}") unless @queueing_mode == :initializing
917
- unless @queue.empty?
918
- r = @queue.shift
919
- if r[:callback]
920
- Sender.instance.__send__(r[:kind], r[:type], r[:payload], r[:target]) { |res| r[:callback].call(res) }
921
- else
922
- Sender.instance.__send__(r[:kind], r[:type], r[:payload], r[:target])
923
- end
924
- end
925
- if @queue.empty?
926
- Log.info("[offline] Request queue flushed, resuming normal operations") unless @queueing_mode == :initializing
927
- @queueing_mode = :online
928
- @flushing_queue = false
929
- else
930
- EM.next_tick { flush_queue }
931
- end
932
- end
1184
+ @options[:offline_queueing] && @offline_handler.queueing?
933
1185
  end
934
1186
 
935
1187
  end # Sender