right_agent 0.5.1 → 0.5.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. data/lib/right_agent.rb +3 -13
  2. data/lib/right_agent/actors/agent_manager.rb +78 -4
  3. data/lib/right_agent/agent.rb +81 -4
  4. data/lib/right_agent/agent_config.rb +17 -1
  5. data/lib/right_agent/agent_tags_manager.rb +2 -2
  6. data/lib/right_agent/broker_client.rb +32 -34
  7. data/lib/right_agent/command/agent_manager_commands.rb +16 -0
  8. data/lib/right_agent/command/command_constants.rb +0 -9
  9. data/lib/right_agent/dispatcher.rb +6 -3
  10. data/lib/right_agent/ha_broker_client.rb +63 -14
  11. data/lib/right_agent/log.rb +1 -1
  12. data/lib/right_agent/minimal.rb +43 -0
  13. data/lib/right_agent/monkey_patches/amqp_patch.rb +91 -182
  14. data/lib/right_agent/packets.rb +10 -5
  15. data/lib/right_agent/platform.rb +8 -0
  16. data/lib/right_agent/platform/darwin.rb +14 -0
  17. data/lib/right_agent/platform/linux.rb +23 -0
  18. data/lib/right_agent/platform/windows.rb +31 -0
  19. data/lib/right_agent/scripts/agent_controller.rb +16 -8
  20. data/lib/right_agent/scripts/agent_deployer.rb +6 -0
  21. data/lib/right_agent/scripts/log_level_manager.rb +4 -5
  22. data/lib/right_agent/scripts/stats_manager.rb +9 -1
  23. data/lib/right_agent/sender.rb +623 -371
  24. data/lib/right_agent/stats_helper.rb +15 -1
  25. data/lib/right_agent/tracer.rb +1 -1
  26. data/right_agent.gemspec +14 -15
  27. data/spec/agent_config_spec.rb +9 -0
  28. data/spec/agent_spec.rb +154 -18
  29. data/spec/broker_client_spec.rb +171 -170
  30. data/spec/dispatcher_spec.rb +24 -8
  31. data/spec/ha_broker_client_spec.rb +55 -33
  32. data/spec/monkey_patches/amqp_patch_spec.rb +12 -0
  33. data/spec/packets_spec.rb +2 -0
  34. data/spec/sender_spec.rb +140 -69
  35. data/spec/stats_helper_spec.rb +5 -0
  36. metadata +54 -53
@@ -380,7 +380,8 @@ module RightScale
380
380
  # Packet for a work request for an actor node that has no result, i.e., one-way request
381
381
  class Push < Packet
382
382
 
383
- attr_accessor :from, :scope, :payload, :type, :token, :selector, :target, :persistent, :expires_at, :tags
383
+ attr_accessor :from, :scope, :payload, :type, :token, :selector, :target, :persistent, :confirm,
384
+ :expires_at, :tags
384
385
 
385
386
  DEFAULT_OPTIONS = {:selector => :any}
386
387
 
@@ -397,6 +398,8 @@ module RightScale
397
398
  # :target(String):: Target recipient
398
399
  # :persistent(Boolean):: Indicates if this request should be saved to persistent storage
399
400
  # by the AMQP broker
401
+ # :confirm(Boolean):: Whether require confirmation response from mapper containing targets
402
+ # to which request was published but not necessarily delivered
400
403
  # :expires_at(Integer|nil):: Time in seconds in Unix-epoch when this request expires and
401
404
  # is to be ignored by the receiver; value 0 means never expire; defaults to 0
402
405
  # :tags(Array(Symbol)):: List of tags to be used for selecting target for this request
@@ -416,6 +419,7 @@ module RightScale
416
419
  @selector = :any if ["least_loaded", "random"].include?(@selector.to_s)
417
420
  @target = opts[:target]
418
421
  @persistent = opts[:persistent]
422
+ @confirm = opts[:confirm]
419
423
  @expires_at = opts[:expires_at] || 0
420
424
  @tags = opts[:tags] || []
421
425
  @version = version
@@ -448,10 +452,11 @@ module RightScale
448
452
  # (Push):: New packet
449
453
  def self.create(o)
450
454
  i = o['data']
451
- new(i['type'], i['payload'], { :from => self.compatible(i['from']), :scope => i['scope'],
452
- :token => i['token'], :selector => i['selector'],
453
- :target => self.compatible(i['target']), :persistent => i['persistent'],
454
- :tags => i['tags'], :expires_at => i['expires_at'] },
455
+ new(i['type'], i['payload'], { :from => self.compatible(i['from']), :scope => i['scope'],
456
+ :token => i['token'], :selector => i['selector'],
457
+ :target => self.compatible(i['target']), :persistent => i['persistent'],
458
+ :confirm => i['confirm'], :expires_at => i['expires_at'],
459
+ :tags => i['tags']},
455
460
  i['version'] || [DEFAULT_VERSION, DEFAULT_VERSION], o['size'])
456
461
  end
457
462
 
@@ -234,6 +234,14 @@ module RightScale
234
234
  platform_service(:rng)
235
235
  end
236
236
 
237
+ # Platform process facilities.
238
+ #
239
+ # === Return
240
+ # (Object):: Platform-specific process facilities object
241
+ def process
242
+ platform_service(:process)
243
+ end
244
+
237
245
  private
238
246
 
239
247
  # Load platform specific implementation
@@ -222,6 +222,20 @@ module RightScale
222
222
  end
223
223
  end
224
224
 
225
+ class Process
226
+ # queries resident set size (current working set size in Windows).
227
+ #
228
+ # === Parameters
229
+ # pid(Fixnum):: process ID or nil for current process
230
+ #
231
+ # === Return
232
+ # result(Fixnum):: current set size in KB
233
+ def resident_set_size(pid=nil)
234
+ pid = $$ unless pid
235
+ return `ps -o rss= -p #{pid}`.to_i
236
+ end
237
+ end
238
+
225
239
  end # Platform
226
240
 
227
241
  end # RightScale
@@ -75,6 +75,15 @@ module RightScale
75
75
  @flavor =~ /suse/
76
76
  end
77
77
 
78
+ # Is this machine running rhel?
79
+ #
80
+ # === Return
81
+ # true:: If Linux flavor is rhel
82
+ # false:: Otherwise
83
+ def rhel?
84
+ @flavor =~ /redhatenterpriseserver/
85
+ end
86
+
78
87
  class Filesystem
79
88
 
80
89
  # Is given command available in the PATH?
@@ -263,6 +272,20 @@ module RightScale
263
272
  end
264
273
  end
265
274
 
275
+ class Process
276
+ # queries resident set size (current working set size in Windows).
277
+ #
278
+ # === Parameters
279
+ # pid(Fixnum):: process ID or nil for current process
280
+ #
281
+ # === Return
282
+ # result(Fixnum):: current set size in KB
283
+ def resident_set_size(pid=nil)
284
+ pid = $$ unless pid
285
+ return `ps -o rss= -p #{pid}`.to_i
286
+ end
287
+ end
288
+
266
289
  end # Platform
267
290
 
268
291
  end # RightScale
@@ -1183,6 +1183,37 @@ EOF
1183
1183
  end
1184
1184
  end
1185
1185
 
1186
+ class Process
1187
+ include ::Windows::Process
1188
+
1189
+ @@get_process_memory_info = nil
1190
+
1191
+ # see PROCESS_MEMORY_COUNTERS structure: "http://msdn.microsoft.com/en-us/library/ms684877%28VS.85%29.aspx"
1192
+ SIZEOF_PROCESS_MEMORY_COUNTERS = 10 * 4
1193
+
1194
+ # queries resident set size (current working set size in Windows).
1195
+ #
1196
+ # === Parameters
1197
+ # pid(Fixnum):: process ID or nil for current process
1198
+ #
1199
+ # === Return
1200
+ # result(Fixnum):: current set size in KB
1201
+ def resident_set_size(pid=nil)
1202
+ @@get_process_memory_info = ::Win32::API.new("GetProcessMemoryInfo", 'LPL', 'B', 'psapi') unless @@get_process_memory_info
1203
+
1204
+ # FIX: call OpenProcess and ensure proper access and close if given PID.
1205
+ raise NotImplementedError.new("pid != nil not yet implemented") if pid
1206
+ process_handle = GetCurrentProcess()
1207
+ process_memory_counters = "\0" * SIZEOF_PROCESS_MEMORY_COUNTERS
1208
+ result = @@get_process_memory_info.call(process_handle, process_memory_counters, process_memory_counters.size)
1209
+ # note that the 'B' return type is a Fixnum (i.e. not TrueClass or FalseClass) of 'zero' on failure or 'non-zero' on success
1210
+ raise ::RightScale::Win32Error.new("Failed to get resident set size for process") if 0 == result
1211
+
1212
+ # current .WorkingSetSize (bytes) is equivalent of Linux' ps resident set size (KB)
1213
+ return process_memory_counters[12..16].unpack("L")[0] / 1024 # bytes to KB
1214
+ end
1215
+ end
1216
+
1186
1217
  protected
1187
1218
 
1188
1219
  # internal class for querying OS version, etc.
@@ -73,10 +73,9 @@
73
73
 
74
74
  require 'rubygems'
75
75
  require 'optparse'
76
- require 'fileutils'
77
- require File.expand_path(File.join(File.dirname(__FILE__), 'usage'))
78
- require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', 'right_agent'))
79
- require File.expand_path(File.join(File.dirname(__FILE__), 'common_parser'))
76
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'minimal'))
77
+ require File.normalize_path(File.join(File.dirname(__FILE__), 'usage'))
78
+ require File.normalize_path(File.join(File.dirname(__FILE__), 'common_parser'))
80
79
 
81
80
  module RightScale
82
81
 
@@ -115,6 +114,7 @@ module RightScale
115
114
  # === Return
116
115
  # true:: Always return true
117
116
  def control(options)
117
+
118
118
  # Initialize directory settings
119
119
  AgentConfig.cfg_dir = options[:cfg_dir]
120
120
  AgentConfig.pid_dir = options[:pid_dir]
@@ -169,7 +169,7 @@ module RightScale
169
169
  # === Return
170
170
  # options(Hash):: Parsed options
171
171
  def parse_args
172
- options = {}
172
+ options = {:thin_command_client => false}
173
173
 
174
174
  opts = OptionParser.new do |opts|
175
175
  parse_common(opts, options)
@@ -194,7 +194,7 @@ module RightScale
194
194
  options[:pid_file] = file
195
195
  options[:action] = 'kill'
196
196
  end
197
-
197
+
198
198
  opts.on("-K", "--killall") do
199
199
  options[:action] = 'killall'
200
200
  end
@@ -228,7 +228,7 @@ module RightScale
228
228
 
229
229
  opts.on("-f", "--foreground") do
230
230
  options[:daemonize] = false
231
- #Squelch Ruby VM warnings about various things
231
+ #Squelch Ruby VM warnings about various things
232
232
  $VERBOSE = nil
233
233
  end
234
234
 
@@ -249,6 +249,14 @@ module RightScale
249
249
  exit 0 if e.is_a?(SystemExit)
250
250
  fail(e.message, print_usage = true)
251
251
  end
252
+
253
+ # allow specific arguments to use a thin command client for faster
254
+ # execution (on Windows, etc.)
255
+ unless options[:thin_command_client]
256
+ # require full right_agent for any commands which do not specify thin
257
+ # command client.
258
+ require File.normalize_path(File.join(File.dirname(__FILE__), '..', '..', 'right_agent'))
259
+ end
252
260
  resolve_identity(options)
253
261
  options
254
262
  end
@@ -344,7 +352,7 @@ module RightScale
344
352
  end
345
353
  true
346
354
  end
347
-
355
+
348
356
  # Stop agent process
349
357
  #
350
358
  # === Parameters
@@ -34,6 +34,7 @@
34
34
  # --vhost, -v VHOST Set agent AMQP virtual host
35
35
  # --host, -h HOST Set AMQP broker host
36
36
  # --port, -P PORT Set AMQP broker port
37
+ # --heartbeat, -b SEC Set number of seconds between AMQP broker connection heartbeats, 0 means disable
37
38
  # --prefetch COUNT Set maximum requests AMQP broker is to prefetch before current is ack'd
38
39
  # --http-proxy PROXY Use a proxy for all agent-originated HTTP traffic
39
40
  # --http-no-proxy NOPROXY Comma-separated list of proxy exceptions (e.g. metadata server)
@@ -186,6 +187,10 @@ module RightScale
186
187
  options[:prefetch] = count.to_i
187
188
  end
188
189
 
190
+ opts.on('-b', '--heartbeat SEC') do |sec|
191
+ options[:heartbeat] = sec.to_i
192
+ end
193
+
189
194
  opts.on('-o', '--options OPT') do |e|
190
195
  fail("Invalid option definition #{e}' (use '=' to separate name and value)") unless e.include?('=')
191
196
  key, val = e.split(/=/)
@@ -282,6 +287,7 @@ module RightScale
282
287
  cfg[:port] = options[:port] if options[:port]
283
288
  cfg[:host] = options[:host] if options[:host]
284
289
  cfg[:prefetch] = options[:prefetch] || 1
290
+ cfg[:heartbeat] = options[:heartbeat] if options[:heartbeat]
285
291
  cfg[:time_to_live] = options[:time_to_live] || 60
286
292
  cfg[:retry_timeout] = options[:retry_timeout] || 2 * 60
287
293
  cfg[:retry_interval] = options[:retry_interval] || 15
@@ -94,8 +94,8 @@ module RightScale
94
94
  opts = OptionParser.new do |opts|
95
95
 
96
96
  opts.on('-l', '--log-level LEVEL') do |l|
97
- fail("Invalid log level '#{l}'") unless AgentManager::LEVELS.include?(l.to_sym)
98
- options[:level] = l
97
+ fail("Invalid log level '#{l}'") unless AgentManager::LEVELS.include?(l.downcase.to_sym)
98
+ options[:level] = l.downcase
99
99
  end
100
100
 
101
101
  opts.on("-c", "--cfg-dir DIR") do |d|
@@ -136,12 +136,11 @@ module RightScale
136
136
  def request_log_level(agent_name, command, options)
137
137
  res = false
138
138
  config_options = AgentConfig.agent_options(agent_name)
139
- unless config_options.empty?
140
- listen_port = config_options[:listen_port]
139
+ unless config_options.empty? || (listen_port = config_options[:listen_port]).nil?
141
140
  fail("Could not retrieve #{agent_name} agent listen port") unless listen_port
142
141
  client = CommandClient.new(listen_port, config_options[:cookie])
143
142
  begin
144
- client.send_command(command, options[:verbose]) do |level|
143
+ client.send_command(command, options[:verbose], timeout = 5) do |level|
145
144
  puts "Agent #{agent_name} log level: #{level.to_s.upcase}"
146
145
  end
147
146
  res = true
@@ -14,6 +14,9 @@
14
14
  # rstat AGENT --json
15
15
  # rstat AGENT --j
16
16
  #
17
+ # Log details of statistics retrieval
18
+ # rstat AGENT -v
19
+ #
17
20
  # === Usage:
18
21
  # rstat [AGENT] [options]
19
22
  #
@@ -21,6 +24,7 @@
21
24
  # --reset, -r As part of gathering the stats from an agent also reset the stats
22
25
  # --timeout, -t SEC Override default timeout in seconds to wait for a response from an agent
23
26
  # --json, -j Display the stats data in JSON format
27
+ # --verbose, -v Log debug information
24
28
  # --cfg-dir, -c DIR Set directory containing configuration for all agents
25
29
  # --help Display help
26
30
 
@@ -61,7 +65,7 @@ module RightScale
61
65
  # === Return
62
66
  # true:: Always return true
63
67
  def manage(options)
64
- init_log
68
+ init_log if options[:verbose]
65
69
  AgentConfig.cfg_dir = options[:cfg_dir]
66
70
  options[:timeout] ||= DEFAULT_TIMEOUT
67
71
  request_stats(options)
@@ -92,6 +96,10 @@ module RightScale
92
96
  options[:json] = true
93
97
  end
94
98
 
99
+ opts.on('-v', '--verbose') do
100
+ options[:verbose] = true
101
+ end
102
+
95
103
  opts.on("-c", "--cfg-dir DIR") do |d|
96
104
  options[:cfg_dir] = d
97
105
  end
@@ -30,40 +30,504 @@ module RightScale
30
30
 
31
31
  include StatsHelper
32
32
 
33
- # Minimum number of seconds between restarts of the inactivity timer
34
- MIN_RESTART_INACTIVITY_TIMER_INTERVAL = 60
33
+ # Request that is waiting for a response
34
+ class PendingRequest
35
35
 
36
- # Number of seconds to wait for ping response from a mapper when checking connectivity
37
- PING_TIMEOUT = 30
36
+ # (Symbol) Kind of send request
37
+ attr_reader :kind
38
38
 
39
- # Factor used on each retry iteration to achieve exponential backoff
40
- RETRY_BACKOFF_FACTOR = 4
39
+ # (Time) Time when request message was received
40
+ attr_reader :receive_time
41
+
42
+ # (Proc) Block to be activated when response is received
43
+ attr_reader :response_handler
44
+
45
+ # (String) Token for parent request in a retry situation
46
+ attr_accessor :retry_parent
47
+
48
+ def initialize(kind, receive_time, response_handler)
49
+ @kind = kind
50
+ @receive_time = receive_time
51
+ @response_handler = response_handler
52
+ @retry_parent = nil
53
+ end
54
+
55
+ end # PendingRequest
56
+
57
+ # Cache for requests that are waiting for a response
58
+ # Automatically deletes push requests when get too old
59
+ # Retains non-push requests until explicitly deleted
60
+ class PendingRequests < Hash
61
+
62
+ # Kinds of send requests
63
+ REQUEST_KINDS = [:send_retryable_request, :send_persistent_request]
64
+
65
+ # Kinds of send pushes
66
+ PUSH_KINDS = [:send_push, :send_persistent_push]
67
+
68
+ # Maximum number of seconds to retain send pushes in cache
69
+ MAX_PUSH_AGE = 2 * 60
70
+
71
+ # Minimum number of seconds between push cleanups
72
+ MIN_CLEANUP_INTERVAL = 15
73
+
74
+ # Create cache
75
+ def initialize
76
+ @last_cleanup = Time.now
77
+ super
78
+ end
79
+
80
+ # Store pending request
81
+ #
82
+ # === Parameters
83
+ # token(String):: Generated message identifier
84
+ # request(PendingRequest):: Pending request
85
+ #
86
+ # === Return
87
+ # (PendingRequest):: Stored request
88
+ def []=(token, request)
89
+ now = Time.now
90
+ if (now - @last_cleanup) > MIN_CLEANUP_INTERVAL
91
+ self.reject! { |t, r| PUSH_KINDS.include?(r.kind) && (now - r.receive_time) > MAX_PUSH_AGE }
92
+ @last_cleanup = now
93
+ end
94
+ super
95
+ end
96
+
97
+ # Select cache entries of the given kinds
98
+ #
99
+ # === Parameters
100
+ # kinds(Array):: Kind of requests to be included
101
+ #
102
+ # === Return
103
+ # (Hash):: Requests of specified kind
104
+ def kind(kinds)
105
+ self.reject { |t, r| !kinds.include?(r.kind) }
106
+ end
107
+
108
+ # Get age of youngest pending request
109
+ #
110
+ # === Return
111
+ # age(Integer):: Age of youngest request
112
+ def youngest_age
113
+ now = Time.now
114
+ age = nil
115
+ self.each_value do |r|
116
+ seconds = (now - r.receive_time).to_i
117
+ age = seconds if age.nil? || seconds < age
118
+ end
119
+ age
120
+ end
121
+
122
+ # Get age of oldest pending request
123
+ #
124
+ # === Return
125
+ # age(Integer):: Age of oldest request
126
+ def oldest_age
127
+ now = Time.now
128
+ age = nil
129
+ self.each_value do |r|
130
+ seconds = (now - r.receive_time).to_i
131
+ age = seconds if age.nil? || seconds > age
132
+ end
133
+ age
134
+ end
135
+
136
+ end # PendingRequests
137
+
138
+ # Queue for storing requests while disconnected from broker and then sending
139
+ # them when successfully reconnect
140
+ class OfflineHandler
141
+
142
+ # Maximum seconds to wait before starting flushing offline queue when disabling offline mode
143
+ MAX_QUEUE_FLUSH_DELAY = 2 * 60
144
+
145
+ # Maximum number of offline queued requests before triggering restart vote
146
+ MAX_QUEUED_REQUESTS = 1000
147
+
148
+ # Number of seconds that should be spent in offline mode before triggering a restart vote
149
+ RESTART_VOTE_DELAY = 15 * 60
150
+
151
+ # (Symbol) Current queue state with possible values:
152
+ # Value Description Action Next state
153
+ # :created Queue created init :initializing
154
+ # :initializing Agent still initializing start :running
155
+ # :running Queue has been started disable when offline :flushing
156
+ # :flushing Sending queued requests enable :running
157
+ # :terminating Agent terminating
158
+ attr_reader :state
159
+
160
+ # (Symbol) Current offline handling mode with possible values:
161
+ # Value Description
162
+ # :initializing Agent still initializing
163
+ # :online Agent connected to broker
164
+ # :offline Agent disconnected from broker
165
+ attr_reader :mode
166
+
167
+ # (Array) Offline queue
168
+ attr_accessor :queue
169
+
170
+ # Create offline queue
171
+ #
172
+ # === Parameters
173
+ # restart_callback(Proc):: Callback that is activated on each restart vote with votes being initiated
174
+ # by offline queue exceeding MAX_QUEUED_REQUESTS
175
+ # offline_stats(ActivityStats):: Offline queue tracking statistics
176
+ def initialize(restart_callback, offline_stats)
177
+ @restart_vote = restart_callback
178
+ @restart_vote_timer = nil
179
+ @restart_vote_count = 0
180
+ @offline_stats = offline_stats
181
+ @state = :created
182
+ @mode = :initializing
183
+ @queue = []
184
+ end
185
+
186
+ # Initialize the offline queue
187
+ # All requests sent prior to running this initialization are queued
188
+ # and then are sent once this initialization has run
189
+ # All requests following this call and prior to calling start
190
+ # are prepended to the request queue
191
+ #
192
+ # === Return
193
+ # true:: Always return true
194
+ def init
195
+ @state = :initializing if @state == :created
196
+ true
197
+ end
41
198
 
42
- # Maximum seconds to wait before starting flushing offline queue when disabling offline mode
43
- MAX_QUEUE_FLUSH_DELAY = 120 # 2 minutes
199
+ # Switch to online mode and send all buffered messages
200
+ #
201
+ # === Return
202
+ # true:: Always return true
203
+ def start
204
+ if @state == :initializing
205
+ @state = :running
206
+ flush unless @mode == :offline
207
+ @mode = :online if @mode == :initializing
208
+ end
209
+ true
210
+ end
211
+
212
+ # Is agent currently offline?
213
+ #
214
+ # === Return
215
+ # (Boolean):: true if agent offline, otherwise false
216
+ def offline?
217
+ @mode == :offline || @state == :created
218
+ end
219
+
220
+ # In request queueing mode?
221
+ #
222
+ # === Return
223
+ # (Boolean):: true if should queue request, otherwise false
224
+ def queueing?
225
+ offline? && @state != :flushing
226
+ end
227
+
228
+ # Switch to offline mode
229
+ # In this mode requests are queued in memory rather than sent to the mapper
230
+ # Idempotent
231
+ #
232
+ # === Return
233
+ # true:: Always return true
234
+ def enable
235
+ if offline?
236
+ if @state == :flushing
237
+ # If we were in offline mode then switched back to online but are still in the
238
+ # process of flushing the in-memory queue and are now switching to offline mode
239
+ # again then stop the flushing
240
+ @state = :running
241
+ end
242
+ else
243
+ Log.info("[offline] Disconnect from broker detected, entering offline mode")
244
+ Log.info("[offline] Messages will be queued in memory until connection to broker is re-established")
245
+ @offline_stats.update
246
+ @queue ||= [] # ensure queue is valid without losing any messages when going offline
247
+ @mode = :offline
248
+ start_timer
249
+ end
250
+ true
251
+ end
252
+
253
+ # Switch back to sending requests to mapper after in-memory queue gets flushed
254
+ # Idempotent
255
+ #
256
+ # === Return
257
+ # true:: Always return true
258
+ def disable
259
+ if offline? && @state != :created
260
+ Log.info("[offline] Connection to broker re-established")
261
+ @offline_stats.finish
262
+ cancel_timer
263
+ @state = :flushing
264
+ # Wait a bit to avoid flooding the mapper
265
+ EM.add_timer(rand(MAX_QUEUE_FLUSH_DELAY)) { flush }
266
+ end
267
+ true
268
+ end
269
+
270
+ # Queue given request in memory
271
+ #
272
+ # === Parameters
273
+ # request(Hash):: Request to be stored
274
+ #
275
+ # === Return
276
+ # true:: Always return true
277
+ def queue_request(kind, type, payload, target, callback)
278
+ request = {:kind => kind, :type => type, :payload => payload, :target => target, :callback => callback}
279
+ Log.info("[offline] Queuing request: #{request.inspect}")
280
+ vote_to_restart if (@restart_vote_count += 1) >= MAX_QUEUED_REQUESTS
281
+ if @state == :initializing
282
+ # We are in the initialization callback, requests should be put at the head of the queue
283
+ @queue.unshift(request)
284
+ else
285
+ @queue << request
286
+ end
287
+ true
288
+ end
289
+
290
+ # Prepare for agent termination
291
+ #
292
+ # === Return
293
+ # true:: Always return true
294
+ def terminate
295
+ @state = :terminating
296
+ cancel_timer
297
+ true
298
+ end
44
299
 
45
- # Maximum number of offline queued requests before triggering restart vote
46
- MAX_QUEUED_REQUESTS = 1000
300
+ protected
47
301
 
48
- # Number of seconds that should be spent in offline mode before triggering a restart vote
49
- RESTART_VOTE_DELAY = 900 # 15 minutes
302
+ # Send any requests that were queued while in offline mode
303
+ # Do this asynchronously to allow for agents to respond to requests
304
+ # Once all in-memory requests have been flushed, switch off offline mode
305
+ #
306
+ # === Return
307
+ # true:: Always return true
308
+ def flush
309
+ if @state == :flushing
310
+ Log.info("[offline] Starting to flush request queue of size #{@queue.size}") unless @mode == :initializing
311
+ unless @queue.empty?
312
+ r = @queue.shift
313
+ if r[:callback]
314
+ Sender.instance.__send__(r[:kind], r[:type], r[:payload], r[:target]) { |result| r[:callback].call(result) }
315
+ else
316
+ Sender.instance.__send__(r[:kind], r[:type], r[:payload], r[:target])
317
+ end
318
+ end
319
+ if @queue.empty?
320
+ Log.info("[offline] Request queue flushed, resuming normal operations") unless @mode == :initializing
321
+ @mode = :online
322
+ @state = :running
323
+ else
324
+ EM.next_tick { flush }
325
+ end
326
+ end
327
+ true
328
+ end
50
329
 
51
- # (EM::Timer) Timer while waiting for mapper ping response
52
- attr_accessor :pending_ping
53
-
54
- # (Hash) Pending requests; key is request token and value is a hash
55
- # :response_handler(Proc):: Block to be activated when response is received
56
- # :receive_time(Time):: Time when message was received
57
- # :request_kind(String):: Kind of Sender request, optional
58
- # :retry_parent(String):: Token for parent request in a retry situation, optional
330
+ # Vote for restart and reset trigger
331
+ #
332
+ # === Parameters
333
+ # timer_trigger(Boolean):: true if vote was triggered by timer, false if it
334
+ # was triggered by number of messages in in-memory queue
335
+ #
336
+ # === Return
337
+ # true:: Always return true
338
+ def vote_to_restart(timer_trigger = false)
339
+ if @restart_vote
340
+ @restart_vote.call
341
+ if timer_trigger
342
+ start_timer
343
+ else
344
+ @restart_vote_count = 0
345
+ end
346
+ end
347
+ true
348
+ end
349
+
350
+ # Start restart vote timer
351
+ #
352
+ # === Return
353
+ # true:: Always return true
354
+ def start_timer
355
+ if @restart_vote && @state != :terminating
356
+ @restart_vote_timer ||= EM::Timer.new(RESTART_VOTE_DELAY) { vote_to_restart(timer_trigger = true) }
357
+ end
358
+ true
359
+ end
360
+
361
+ # Cancel restart vote timer
362
+ #
363
+ # === Return
364
+ # true:: Always return true
365
+ def cancel_timer
366
+ if @restart_vote_timer
367
+ @restart_vote_timer.cancel
368
+ @restart_vote_timer = nil
369
+ @restart_vote_count = 0
370
+ end
371
+ true
372
+ end
373
+
374
+ end # OfflineHandler
375
+
376
+ # Broker connectivity checker
377
+ # Checks connectivity when requested
378
+ class ConnectivityChecker
379
+
380
+ # Minimum number of seconds between restarts of the inactivity timer
381
+ MIN_RESTART_INACTIVITY_TIMER_INTERVAL = 60
382
+
383
+ # Number of seconds to wait for ping response from a mapper when checking connectivity
384
+ PING_TIMEOUT = 30
385
+
386
+ # (EM::Timer) Timer while waiting for mapper ping response
387
+ attr_accessor :ping_timer
388
+
389
+ def initialize(sender, check_interval, ping_stats, exception_stats)
390
+ @sender = sender
391
+ @check_interval = check_interval
392
+ @ping_timer = nil
393
+ @ping_stats = ping_stats
394
+ @exception_stats = exception_stats
395
+ @last_received = Time.now
396
+ @message_received_callbacks = []
397
+ restart_inactivity_timer if @check_interval > 0
398
+ end
399
+
400
+ # Update the time this agent last received a request or response message
401
+ # and restart the inactivity timer thus deferring the next connectivity check
402
+ # Also forward this message receipt notification to any callbacks that have registered
403
+ #
404
+ # === Block
405
+ # Optional block without parameters that is activated when a message is received
406
+ #
407
+ # === Return
408
+ # true:: Always return true
409
+ def message_received(&callback)
410
+ if block_given?
411
+ @message_received_callbacks << callback
412
+ else
413
+ @message_received_callbacks.each { |c| c.call }
414
+ if @check_interval > 0
415
+ now = Time.now
416
+ if (now - @last_received) > MIN_RESTART_INACTIVITY_TIMER_INTERVAL
417
+ @last_received = now
418
+ restart_inactivity_timer
419
+ end
420
+ end
421
+ end
422
+ true
423
+ end
424
+
425
+ # Check whether broker connection is usable by pinging a mapper via that broker
426
+ # Attempt to reconnect if ping does not respond in PING_TIMEOUT seconds
427
+ # Ignore request if already checking a connection
428
+ # Only to be called from primary thread
429
+ #
430
+ # === Parameters
431
+ # id(String):: Identity of specific broker to use to send ping, defaults to any
432
+ # currently connected broker
433
+ #
434
+ # === Return
435
+ # true:: Always return true
436
+ def check(id = nil)
437
+ unless @terminating || @ping_timer || (id && !@sender.broker.connected?(id))
438
+ @ping_timer = EM::Timer.new(PING_TIMEOUT) do
439
+ begin
440
+ @ping_stats.update("timeout")
441
+ @ping_timer = nil
442
+ Log.warning("Mapper ping via broker #{id} timed out after #{PING_TIMEOUT} seconds, attempting to reconnect")
443
+ host, port, index, priority, _ = @sender.broker.identity_parts(id)
444
+ @sender.agent.connect(host, port, index, priority, force = true)
445
+ rescue Exception => e
446
+ Log.error("Failed to reconnect to broker #{id}", e, :trace)
447
+ @exception_stats.track("ping timeout", e)
448
+ end
449
+ end
450
+
451
+ handler = lambda do |_|
452
+ begin
453
+ if @ping_timer
454
+ @ping_stats.update("success")
455
+ @ping_timer.cancel
456
+ @ping_timer = nil
457
+ end
458
+ rescue Exception => e
459
+ Log.error("Failed to cancel mapper ping", e, :trace)
460
+ @exception_stats.track("cancel ping", e)
461
+ end
462
+ end
463
+ request = Request.new("/mapper/ping", nil, {:from => @sender.identity, :token => AgentIdentity.generate})
464
+ @sender.pending_requests[request.token] = PendingRequest.new(:send_persistent_request, Time.now, handler)
465
+ ids = [id] if id
466
+ id = @sender.publish(request, ids).first
467
+ end
468
+ true
469
+ end
470
+
471
+ # Prepare for agent termination
472
+ #
473
+ # === Return
474
+ # true:: Always return true
475
+ def terminate
476
+ @terminating = true
477
+ @check_interval = 0
478
+ if @ping_timer
479
+ @ping_timer.cancel
480
+ @ping_timer = nil
481
+ end
482
+ if @inactivity_timer
483
+ @inactivity_timer.cancel
484
+ @inactivity_timer = nil
485
+ end
486
+ true
487
+ end
488
+
489
+ protected
490
+
491
+ # Start timer that waits for inactive messaging period to end before checking connectivity
492
+ #
493
+ # === Return
494
+ # true:: Always return true
495
+ def restart_inactivity_timer
496
+ @inactivity_timer.cancel if @inactivity_timer
497
+ @inactivity_timer = EM::Timer.new(@check_interval) do
498
+ begin
499
+ check
500
+ rescue Exception => e
501
+ Log.error("Failed connectivity check", e, :trace)
502
+ @exception_stats.track("check connectivity", e)
503
+ end
504
+ end
505
+ true
506
+ end
507
+
508
+ end # ConnectivityChecker
509
+
510
+ # Factor used on each retry iteration to achieve exponential backoff
511
+ RETRY_BACKOFF_FACTOR = 4
512
+
513
+ # (PendingRequests) Requests waiting for a response
59
514
  attr_accessor :pending_requests
60
515
 
516
+ # (OfflineHandler) Handler for requests when disconnected from broker
517
+ attr_reader :offline_handler
518
+
519
+ # (ConnectivityChecker) Broker connection checker
520
+ attr_reader :connectivity_checker
521
+
61
522
  # (HABrokerClient) High availability AMQP broker client
62
523
  attr_accessor :broker
63
524
 
64
525
  # (String) Identity of the associated agent
65
526
  attr_reader :identity
66
527
 
528
+ # (Agent) Associated agent
529
+ attr_reader :agent
530
+
67
531
  # Accessor for use by actor
68
532
  #
69
533
  # === Return
@@ -82,6 +546,8 @@ module RightScale
82
546
  # agent(Agent):: Reference to agent
83
547
  # :offline_queueing(Boolean):: Whether to queue request if currently not connected to any brokers,
84
548
  # also requires agent invocation of initialize_offline_queue and start_offline_queue methods below
549
+ # :ping_interval(Integer):: Minimum number of seconds since last message receipt to ping the mapper
550
+ # to check connectivity, defaults to 0 meaning do not ping
85
551
  # :restart_callback(Proc):: Callback that is activated on each restart vote with votes being initiated
86
552
  # by offline queue exceeding MAX_QUEUED_REQUESTS or by repeated failures to access mapper when online
87
553
  # :retry_timeout(Numeric):: Maximum number of seconds to retry request before give up
@@ -98,52 +564,19 @@ module RightScale
98
564
  @broker = @agent.broker
99
565
  @secure = @options[:secure]
100
566
  @single_threaded = @options[:single_threaded]
101
- @queueing_mode = :initializing
102
- @queue_running = false
103
- @queue_initializing = false
104
- @queue = []
105
- @restart_vote_count = 0
106
567
  @retry_timeout = nil_if_zero(@options[:retry_timeout])
107
568
  @retry_interval = nil_if_zero(@options[:retry_interval])
108
- @ping_interval = @options[:ping_interval] || 0
109
569
 
110
570
  # Only to be accessed from primary thread
111
- @pending_requests = {}
112
- @pending_ping = nil
571
+ @pending_requests = PendingRequests.new
113
572
 
114
573
  reset_stats
115
- @last_received = 0
116
- @message_received_callbacks = []
117
- restart_inactivity_timer if @ping_interval > 0
574
+ @offline_handler = OfflineHandler.new(@options[:restart_callback], @offline_stats)
575
+ @connectivity_checker = ConnectivityChecker.new(self, @options[:ping_interval] || 0, @ping_stats, @exception_stats)
118
576
  @@instance = self
119
577
  end
120
578
 
121
- # Update the time this agent last received a request or response message
122
- # and restart the inactivity timer thus deferring the next connectivity check
123
- # Also forward this message receipt notification to any callbacks that have registered
124
- #
125
- # === Block
126
- # Optional block without parameters that is activated when a message is received
127
- #
128
- # === Return
129
- # true:: Always return true
130
- def message_received(&callback)
131
- if block_given?
132
- @message_received_callbacks << callback
133
- else
134
- @message_received_callbacks.each { |c| c.call }
135
- if @ping_interval > 0
136
- now = Time.now.to_i
137
- if (now - @last_received) > MIN_RESTART_INACTIVITY_TIMER_INTERVAL
138
- @last_received = now
139
- restart_inactivity_timer
140
- end
141
- end
142
- end
143
- true
144
- end
145
-
146
- # Initialize the offline queue (should be called once)
579
+ # Initialize the offline queue
147
580
  # All requests sent prior to running this initialization are queued if offline
148
581
  # queueing is enabled and then are sent once this initialization has run
149
582
  # All requests following this call and prior to calling start_offline_queue
@@ -152,11 +585,7 @@ module RightScale
152
585
  # === Return
153
586
  # true:: Always return true
154
587
  def initialize_offline_queue
155
- unless @queue_running || !@options[:offline_queueing]
156
- @queue_running = true
157
- @queue_initializing = true
158
- end
159
- true
588
+ @offline_handler.init if @options[:offline_queueing]
160
589
  end
161
590
 
162
591
  # Switch offline queueing to online mode and flush all buffered messages
@@ -164,12 +593,38 @@ module RightScale
164
593
  # === Return
165
594
  # true:: Always return true
166
595
  def start_offline_queue
167
- if @queue_initializing
168
- @queue_initializing = false
169
- flush_queue unless @queueing_mode == :offline
170
- @queueing_mode = :online if @queueing_mode == :initializing
171
- end
172
- true
596
+ @offline_handler.start if @options[:offline_queueing]
597
+ end
598
+
599
+ # Switch to offline mode
600
+ # In this mode requests are queued in memory rather than sent to the mapper
601
+ # Idempotent
602
+ #
603
+ # === Return
604
+ # true:: Always return true
605
+ def enable_offline_mode
606
+ @offline_handler.enable if @options[:offline_queueing]
607
+ end
608
+
609
+ # Switch back to sending requests to mapper after in memory queue gets flushed
610
+ # Idempotent
611
+ #
612
+ # === Return
613
+ # true:: Always return true
614
+ def disable_offline_mode
615
+ @offline_handler.disable if @options[:offline_queueing]
616
+ end
617
+
618
+ # Update the time this agent last received a request or response message
619
+ # Also forward this message receipt notification to any callbacks that have registered
620
+ #
621
+ # === Block
622
+ # Optional block without parameters that is activated when a message is received
623
+ #
624
+ # === Return
625
+ # true:: Always return true
626
+ def message_received(&callback)
627
+ @connectivity_checker.message_received(&callback)
173
628
  end
174
629
 
175
630
  # Send a request to a single target or multiple targets with no response expected other
@@ -194,9 +649,11 @@ module RightScale
194
649
  # defaults to :any
195
650
  #
196
651
  # === Block
197
- # Optional block used to process routing response failures asynchronously with the following parameter:
198
- # result(Result):: Response with an OperationResult of RETRY, NON_DELIVERY, or ERROR,
199
- # use RightScale::OperationResult.from_results to decode
652
+ # Optional block used to process routing responses asynchronously with the following parameter:
653
+ # result(Result):: Response with an OperationResult of SUCCESS, RETRY, NON_DELIVERY, or ERROR,
654
+ # with an initial SUCCESS response containing the targets to which the mapper published the
655
+ # request and any additional responses indicating any failures to actually route the request
656
+ # to those targets, use RightScale::OperationResult.from_results to decode
200
657
  #
201
658
  # === Return
202
659
  # true:: Always return true
@@ -227,9 +684,11 @@ module RightScale
227
684
  # defaults to :any
228
685
  #
229
686
  # === Block
230
- # Optional block used to process routing response failures asynchronously with the following parameter:
231
- # result(Result):: Response with an OperationResult of RETRY, NON_DELIVERY, or ERROR,
232
- # use RightScale::OperationResult.from_results to decode
687
+ # Optional block used to process routing responses asynchronously with the following parameter:
688
+ # result(Result):: Response with an OperationResult of SUCCESS, RETRY, NON_DELIVERY, or ERROR,
689
+ # with an initial SUCCESS response containing the targets to which the mapper published the
690
+ # request and any additional responses indicating any failures to actually route the request
691
+ # to those targets, use RightScale::OperationResult.from_results to decode
233
692
  #
234
693
  # === Return
235
694
  # true:: Always return true
@@ -316,17 +775,17 @@ module RightScale
316
775
  if response.is_a?(Result)
317
776
  if result = OperationResult.from_results(response)
318
777
  if result.non_delivery?
319
- @non_deliveries.update(result.content.nil? ? "nil" : result.content.inspect)
778
+ @non_delivery_stats.update(result.content.nil? ? "nil" : result.content.inspect)
320
779
  elsif result.error?
321
- @result_errors.update(result.content.nil? ? "nil" : result.content.inspect)
780
+ @result_error_stats.update(result.content.nil? ? "nil" : result.content.inspect)
322
781
  end
323
- @results.update(result.status)
782
+ @result_stats.update(result.status)
324
783
  else
325
- @results.update(response.results.nil? ? "nil" : response.results)
784
+ @result_stats.update(response.results.nil? ? "nil" : response.results)
326
785
  end
327
786
 
328
787
  if handler = @pending_requests[token]
329
- if result && result.non_delivery? && handler[:request_kind] == :send_retryable_request &&
788
+ if result && result.non_delivery? && handler.kind == :send_retryable_request &&
330
789
  [OperationResult::TARGET_NOT_CONNECTED, OperationResult::TTL_EXPIRATION].include?(result.content)
331
790
  # Log and ignore so that timeout retry mechanism continues
332
791
  # Leave purging of associated request until final response, i.e., success response or retry timeout
@@ -343,95 +802,52 @@ module RightScale
343
802
  true
344
803
  end
345
804
 
346
- # Switch to offline mode, in this mode requests are queued in memory
347
- # rather than sent to the mapper
348
- # Idempotent
805
+ # Publish request
806
+ # Use mandatory flag to request return of message if it cannot be delivered
349
807
  #
350
- # === Return
351
- # true:: Always return true
352
- def enable_offline_mode
353
- if offline?
354
- if @flushing_queue
355
- # If we were in offline mode then switched back to online but are still in the
356
- # process of flushing the in memory queue and are now switching to offline mode
357
- # again then stop the flushing
358
- @stop_flushing_queue = true
359
- end
360
- else
361
- Log.info("[offline] Disconnect from broker detected, entering offline mode")
362
- Log.info("[offline] Messages will be queued in memory until connection to broker is re-established")
363
- @offlines.update
364
- @queue ||= [] # ensure queue is valid without losing any messages when going offline
365
- @queueing_mode = :offline
366
- @restart_vote_timer ||= EM::Timer.new(RESTART_VOTE_DELAY) { vote_to_restart(timer_trigger=true) }
367
- end
368
- end
369
-
370
- # Switch back to sending requests to mapper after in memory queue gets flushed
371
- # Idempotent
808
+ # === Parameters
809
+ # request(Push|Request):: Packet to be sent
810
+ # ids(Array|nil):: Identity of specific brokers to choose from, or nil if any okay
372
811
  #
373
812
  # === Return
374
- # true:: Always return true
375
- def disable_offline_mode
376
- if offline? && @queue_running
377
- Log.info("[offline] Connection to broker re-established")
378
- @offlines.finish
379
- @restart_vote_timer.cancel if @restart_vote_timer
380
- @restart_vote_timer = nil
381
- @stop_flushing_queue = false
382
- @flushing_queue = true
383
- # Let's wait a bit not to flood the mapper
384
- EM.add_timer(rand(MAX_QUEUE_FLUSH_DELAY)) { flush_queue } if @queue_running
813
+ # ids(Array):: Identity of brokers published to
814
+ def publish(request, ids = nil)
815
+ begin
816
+ exchange = {:type => :fanout, :name => "request", :options => {:durable => true, :no_declare => @secure}}
817
+ ids = @broker.publish(exchange, request, :persistent => request.persistent, :mandatory => true,
818
+ :log_filter => [:tags, :target, :tries, :persistent], :brokers => ids)
819
+ rescue HABrokerClient::NoConnectedBrokers => e
820
+ Log.error("Failed to publish request #{request.to_s([:tags, :target, :tries])}", e)
821
+ ids = []
822
+ rescue Exception => e
823
+ Log.error("Failed to publish request #{request.to_s([:tags, :target, :tries])}", e, :trace)
824
+ @exception_stats.track("publish", e, request)
825
+ ids = []
385
826
  end
386
- true
387
- end
388
-
389
- # Get age of youngest pending request
390
- #
391
- # === Return
392
- # age(Integer|nil):: Age in seconds of youngest request, or nil if no pending requests
393
- def request_age
394
- time = Time.now
395
- age = nil
396
- @pending_requests.each_value do |request|
397
- seconds = time - request[:receive_time]
398
- age = seconds.to_i if age.nil? || seconds < age
399
- end
400
- age
827
+ ids
401
828
  end
402
829
 
403
830
  # Take any actions necessary to quiesce mapper interaction in preparation
404
831
  # for agent termination but allow message receipt to continue
405
832
  #
406
833
  # === Return
407
- # (Array):: Number of pending requests and age of youngest request
834
+ # (Array):: Number of pending non-push requests and age of youngest request
408
835
  def terminate
409
- @terminating = true
410
- @ping_interval = 0
411
- if @pending_ping
412
- @pending_ping.cancel
413
- @pending_ping = nil
414
- end
415
- if @timer
416
- @timer.cancel
417
- @timer = nil
418
- end
419
- if @restart_vote_timer
420
- @restart_vote_timer.cancel
421
- @restart_vote_timer = nil
422
- end
423
- [@pending_requests.size, request_age]
836
+ @offline_handler.terminate
837
+ @connectivity_checker.terminate
838
+ pending = @pending_requests.kind(PendingRequests::REQUEST_KINDS)
839
+ [pending.size, pending.youngest_age]
424
840
  end
425
841
 
426
- # Create displayable dump of unfinished request information
842
+ # Create displayable dump of unfinished non-push request information
427
843
  # Truncate list if there are more than 50 requests
428
844
  #
429
845
  # === Return
430
846
  # info(Array(String)):: Receive time and token for each request in descending time order
431
847
  def dump_requests
432
848
  info = []
433
- @pending_requests.each do |token, request|
434
- info << "#{request[:receive_time].localtime} <#{token}>"
849
+ @pending_requests.kind(PendingRequests::REQUEST_KINDS).each do |token, request|
850
+ info << "#{request.receive_time.localtime} <#{token}>"
435
851
  end
436
852
  info.sort.reverse
437
853
  info = info[0..49] + ["..."] if info.size > 50
@@ -458,7 +874,8 @@ module RightScale
458
874
  # with percentage breakdown per kind, or nil if none
459
875
  # "requests"(Hash|nil):: Request activity stats with keys "total", "percent", "last", and "rate"
460
876
  # with percentage breakdown per request type, or nil if none
461
- # "requests pending"(Hash|nil):: Number of requests waiting for response and age of oldest, or nil if none
877
+ # "requests pending"(Hash|nil):: Number of requests waiting for response and age of oldest,
878
+ # or nil if none
462
879
  # "response time"(Float):: Average number of seconds to respond to a request recently
463
880
  # "result errors"(Hash|nil):: Error result activity stats with keys "total", "percent", "last",
464
881
  # and 'rate' with percentage breakdown per error, or nil if none
@@ -467,25 +884,28 @@ module RightScale
467
884
  # "retries"(Hash|nil):: Retry activity stats with keys "total", "percent", "last", and "rate"
468
885
  # with percentage breakdown per request type, or nil if none
469
886
  def stats(reset = false)
470
- offlines = @offlines.all
471
- offlines.merge!("duration" => @offlines.avg_duration) if offlines
472
- requests_pending = if @pending_requests.size > 0
473
- now = Time.now.to_i
474
- oldest = @pending_requests.values.inject(0) { |m, r| [m, now - r[:receive_time].to_i].max }
475
- {"total" => @pending_requests.size, "oldest age" => oldest}
887
+ offlines = @offline_stats.all
888
+ offlines.merge!("duration" => @offline_stats.avg_duration) if offlines
889
+ if @pending_requests.size > 0
890
+ pending = {}
891
+ pending["pushes"] = @pending_requests.kind(PendingRequests::PUSH_KINDS).size
892
+ requests = @pending_requests.kind(PendingRequests::REQUEST_KINDS)
893
+ if (pending["requests"] = requests.size) > 0
894
+ pending["oldest age"] = requests.oldest_age
895
+ end
476
896
  end
477
897
  stats = {
478
- "exceptions" => @exceptions.stats,
479
- "non-deliveries" => @non_deliveries.all,
898
+ "exceptions" => @exception_stats.stats,
899
+ "non-deliveries" => @non_delivery_stats.all,
480
900
  "offlines" => offlines,
481
- "pings" => @pings.all,
901
+ "pings" => @ping_stats.all,
482
902
  "request kinds" => @request_kinds.all,
483
- "requests" => @requests.all,
484
- "requests pending" => requests_pending,
485
- "response time" => @requests.avg_duration,
486
- "result errors" => @result_errors.all,
487
- "results" => @results.all,
488
- "retries" => @retries.all
903
+ "requests" => @request_stats.all,
904
+ "requests pending" => pending,
905
+ "response time" => @request_stats.avg_duration,
906
+ "result errors" => @result_error_stats.all,
907
+ "results" => @result_stats.all,
908
+ "retries" => @retry_stats.all
489
909
  }
490
910
  reset_stats if reset
491
911
  stats
@@ -498,15 +918,15 @@ module RightScale
498
918
  # === Return
499
919
  # true:: Always return true
500
920
  def reset_stats
501
- @pings = ActivityStats.new
502
- @retries = ActivityStats.new
503
- @requests = ActivityStats.new
504
- @results = ActivityStats.new
505
- @result_errors = ActivityStats.new
506
- @non_deliveries = ActivityStats.new
507
- @offlines = ActivityStats.new(measure_rate = false)
921
+ @ping_stats = ActivityStats.new
922
+ @retry_stats = ActivityStats.new
923
+ @request_stats = ActivityStats.new
924
+ @result_stats = ActivityStats.new
925
+ @result_error_stats = ActivityStats.new
926
+ @non_delivery_stats = ActivityStats.new
927
+ @offline_stats = ActivityStats.new(measure_rate = false)
508
928
  @request_kinds = ActivityStats.new(measure_rate = false)
509
- @exceptions = ExceptionStats.new(@agent, @options[:exception_callback])
929
+ @exception_stats = ExceptionStats.new(@agent, @options[:exception_callback])
510
930
  true
511
931
  end
512
932
 
@@ -528,9 +948,11 @@ module RightScale
528
948
  # defaults to :any
529
949
  #
530
950
  # === Block
531
- # Optional block used to process routing response failures asynchronously with the following parameter:
532
- # result(Result):: Response with an OperationResult of RETRY, NON_DELIVERY, or ERROR,
533
- # use RightScale::OperationResult.from_results to decode
951
+ # Optional block used to process routing responses asynchronously with the following parameter:
952
+ # result(Result):: Response with an OperationResult of SUCCESS, RETRY, NON_DELIVERY, or ERROR,
953
+ # with an initial SUCCESS response containing the targets to which the mapper published the
954
+ # request and any additional responses indicating any failures to actually route the request
955
+ # to those targets, use RightScale::OperationResult.from_results to decode
534
956
  #
535
957
  # === Return
536
958
  # true:: Always return true
@@ -540,10 +962,10 @@ module RightScale
540
962
  def build_push(kind, type, payload = nil, target = nil, &callback)
541
963
  validate_target(target, allow_selector = true)
542
964
  if should_queue?
543
- queue_request(:kind => kind, :type => type, :payload => payload, :target => target, :callback => callback)
965
+ @offline_handler.queue_request(kind, type, payload, target, callback)
544
966
  else
545
967
  method = type.split('/').last
546
- received_at = @requests.update(method)
968
+ received_at = @request_stats.update(method)
547
969
  push = Push.new(type, payload)
548
970
  push.from = @identity
549
971
  push.token = AgentIdentity.generate
@@ -556,11 +978,10 @@ module RightScale
556
978
  end
557
979
  push.persistent = kind == :send_persistent_push
558
980
  @request_kinds.update((push.selector == :all ? kind.to_s.sub(/push/, "fanout") : kind.to_s)[5..-1])
559
- @pending_requests[push.token] = {
560
- :response_handler => callback,
561
- :receive_time => received_at,
562
- :request_kind => kind
563
- } if callback
981
+ if callback
982
+ push.confirm = true
983
+ @pending_requests[push.token] = PendingRequest.new(kind, received_at, callback)
984
+ end
564
985
  publish(push)
565
986
  end
566
987
  true
@@ -594,12 +1015,12 @@ module RightScale
594
1015
  def build_request(kind, type, payload, target, &callback)
595
1016
  validate_target(target, allow_selector = false)
596
1017
  if should_queue?
597
- queue_request(:kind => kind, :type => type, :payload => payload, :target => target, :callback => callback)
1018
+ @offline_handler.queue_request(kind, type, payload, target, callback)
598
1019
  else
599
1020
  method = type.split('/').last
600
1021
  token = AgentIdentity.generate
601
1022
  non_duplicate = kind == :send_persistent_request
602
- received_at = @requests.update(method, token)
1023
+ received_at = @request_stats.update(method, token)
603
1024
  @request_kinds.update(kind.to_s[5..-1])
604
1025
 
605
1026
  # Using next_tick to ensure on primary thread since using @pending_requests
@@ -617,10 +1038,7 @@ module RightScale
617
1038
  end
618
1039
  request.expires_at = Time.now.to_i + @options[:time_to_live] if !non_duplicate && @options[:time_to_live] && @options[:time_to_live] != 0
619
1040
  request.persistent = non_duplicate
620
- @pending_requests[token] = {
621
- :response_handler => callback,
622
- :receive_time => received_at,
623
- :request_kind => kind}
1041
+ @pending_requests[token] = PendingRequest.new(kind, received_at, callback)
624
1042
  if non_duplicate
625
1043
  publish(request)
626
1044
  else
@@ -628,7 +1046,7 @@ module RightScale
628
1046
  end
629
1047
  rescue Exception => e
630
1048
  Log.error("Failed to send #{type} #{kind.to_s}", e, :trace)
631
- @exceptions.track(kind.to_s, e, request)
1049
+ @exception_stats.track(kind.to_s, e, request)
632
1050
  end
633
1051
  end
634
1052
  end
@@ -695,7 +1113,7 @@ module RightScale
695
1113
  ids = publish(request)
696
1114
 
697
1115
  if @retry_interval && @retry_timeout && parent && !ids.empty?
698
- interval = [(@retry_interval * multiplier) + (@requests.avg_duration || 0), @retry_timeout - elapsed].min
1116
+ interval = [(@retry_interval * multiplier) + (@request_stats.avg_duration || 0), @retry_timeout - elapsed].min
699
1117
  EM.add_timer(interval) do
700
1118
  begin
701
1119
  if handler = @pending_requests[parent]
@@ -704,52 +1122,27 @@ module RightScale
704
1122
  if elapsed < @retry_timeout
705
1123
  request.tries << request.token
706
1124
  request.token = AgentIdentity.generate
707
- @pending_requests[parent][:retry_parent] = parent if count == 1
1125
+ @pending_requests[parent].retry_parent = parent if count == 1
708
1126
  @pending_requests[request.token] = @pending_requests[parent]
709
1127
  publish_with_timeout_retry(request, parent, count, multiplier * RETRY_BACKOFF_FACTOR, elapsed)
710
- @retries.update(request.type.split('/').last)
1128
+ @retry_stats.update(request.type.split('/').last)
711
1129
  else
712
1130
  Log.warning("RE-SEND TIMEOUT after #{elapsed.to_i} seconds for #{request.to_s([:tags, :target, :tries])}")
713
1131
  result = OperationResult.non_delivery(OperationResult::RETRY_TIMEOUT)
714
- @non_deliveries.update(result.content)
1132
+ @non_delivery_stats.update(result.content)
715
1133
  handle_response(Result.new(request.token, request.reply_to, result, @identity))
716
1134
  end
717
- check_connection(ids.first) if count == 1
1135
+ @connectivity_checker.check(ids.first) if count == 1
718
1136
  end
719
1137
  rescue Exception => e
720
1138
  Log.error("Failed retry for #{request.token}", e, :trace)
721
- @exceptions.track("retry", e, request)
1139
+ @exception_stats.track("retry", e, request)
722
1140
  end
723
1141
  end
724
1142
  end
725
1143
  true
726
1144
  end
727
1145
 
728
- # Publish request
729
- # Use mandatory flag to request return of message if it cannot be delivered
730
- #
731
- # === Parameters
732
- # request(Push|Request):: Packet to be sent
733
- # ids(Array|nil):: Identity of specific brokers to choose from, or nil if any okay
734
- #
735
- # === Return
736
- # ids(Array):: Identity of brokers published to
737
- def publish(request, ids = nil)
738
- begin
739
- exchange = {:type => :fanout, :name => "request", :options => {:durable => true, :no_declare => @secure}}
740
- ids = @broker.publish(exchange, request, :persistent => request.persistent, :mandatory => true,
741
- :log_filter => [:tags, :target, :tries, :persistent], :brokers => ids)
742
- rescue HABrokerClient::NoConnectedBrokers => e
743
- Log.error("Failed to publish request #{request.to_s([:tags, :target, :tries])}", e)
744
- ids = []
745
- rescue Exception => e
746
- Log.error("Failed to publish request #{request.to_s([:tags, :target, :tries])}", e, :trace)
747
- @exceptions.track("publish", e, request)
748
- ids = []
749
- end
750
- ids
751
- end
752
-
753
1146
  # Deliver the response and remove associated request(s) from pending
754
1147
  # Use defer thread instead of primary if not single threaded, consistent with dispatcher,
755
1148
  # so that all shared data is accessed from the same thread
@@ -763,113 +1156,22 @@ module RightScale
763
1156
  # === Return
764
1157
  # true:: Always return true
765
1158
  def deliver(response, handler)
766
- @requests.finish(handler[:receive_time], response.token)
1159
+ @request_stats.finish(handler.receive_time, response.token)
767
1160
 
768
- @pending_requests.delete(response.token)
769
- if parent = handler[:retry_parent]
770
- @pending_requests.reject! { |k, v| k == parent || v[:retry_parent] == parent }
1161
+ @pending_requests.delete(response.token) if PendingRequests::REQUEST_KINDS.include?(handler.kind)
1162
+ if parent = handler.retry_parent
1163
+ @pending_requests.reject! { |k, v| k == parent || v.retry_parent == parent }
771
1164
  end
772
1165
 
773
- if handler[:response_handler]
1166
+ if handler.response_handler
774
1167
  EM.__send__(@single_threaded ? :next_tick : :defer) do
775
1168
  begin
776
- handler[:response_handler].call(response)
777
- rescue Exception => e
778
- Log.error("Failed processing response {response.to_s([])}", e, :trace)
779
- @exceptions.track("response", e, response)
780
- end
781
- end
782
- end
783
- true
784
- end
785
-
786
- # Check whether broker connection is usable by pinging a mapper via that broker
787
- # Attempt to reconnect if ping does not respond in PING_TIMEOUT seconds
788
- # Ignore request if already checking a connection
789
- # Only to be called from primary thread
790
- #
791
- # === Parameters
792
- # id(String):: Identity of specific broker to use to send ping, defaults to any
793
- # currently connected broker
794
- #
795
- # === Return
796
- # true:: Always return true
797
- def check_connection(id = nil)
798
- unless @terminating || @pending_ping || (id && !@broker.connected?(id))
799
- @pending_ping = EM::Timer.new(PING_TIMEOUT) do
800
- begin
801
- @pings.update("timeout")
802
- @pending_ping = nil
803
- Log.warning("Mapper ping via broker #{id} timed out after #{PING_TIMEOUT} seconds, attempting to reconnect")
804
- host, port, index, priority, _ = @broker.identity_parts(id)
805
- @agent.connect(host, port, index, priority, force = true)
806
- rescue Exception => e
807
- Log.error("Failed to reconnect to broker #{id}", e, :trace)
808
- @exceptions.track("ping timeout", e)
809
- end
810
- end
811
-
812
- handler = lambda do |_|
813
- begin
814
- if @pending_ping
815
- @pings.update("success")
816
- @pending_ping.cancel
817
- @pending_ping = nil
818
- end
1169
+ handler.response_handler.call(response)
819
1170
  rescue Exception => e
820
- Log.error("Failed to cancel mapper ping", e, :trace)
821
- @exceptions.track("cancel ping", e)
1171
+ Log.error("Failed processing response #{response.to_s([])}", e, :trace)
1172
+ @exception_stats.track("response", e, response)
822
1173
  end
823
1174
  end
824
-
825
- request = Request.new("/mapper/ping", nil, {:from => @identity, :token => AgentIdentity.generate})
826
- @pending_requests[request.token] = {:response_handler => handler, :receive_time => Time.now}
827
- ids = [id] if id
828
- id = publish(request, ids).first
829
- end
830
- true
831
- end
832
-
833
- # Vote for restart and reset trigger
834
- #
835
- # === Parameters
836
- # timer_trigger(Boolean):: true if vote was triggered by timer, false if it
837
- # was triggered by number of messages in in-memory queue
838
- #
839
- # === Return
840
- # true:: Always return true
841
- def vote_to_restart(timer_trigger)
842
- if restart_vote = @options[:restart_callback]
843
- restart_vote.call
844
- if timer_trigger
845
- @restart_vote_timer = EM::Timer.new(RESTART_VOTE_DELAY) { vote_to_restart(timer_trigger = true) }
846
- else
847
- @restart_vote_count = 0
848
- end
849
- end
850
- true
851
- end
852
-
853
- # Is agent currently offline?
854
- #
855
- # === Return
856
- # offline(Boolean):: true if agent is disconnected or not initialized
857
- def offline?
858
- offline = @queueing_mode == :offline || !@queue_running
859
- end
860
-
861
- # Start timer that waits for inactive messaging period to end before checking connectivity
862
- #
863
- # === Return
864
- # true:: Always return true
865
- def restart_inactivity_timer
866
- @timer.cancel if @timer
867
- @timer = EM::Timer.new(@ping_interval) do
868
- begin
869
- check_connection
870
- rescue Exception => e
871
- Log.error("Failed connectivity check", e, :trace)
872
- end
873
1175
  end
874
1176
  true
875
1177
  end
@@ -879,57 +1181,7 @@ module RightScale
879
1181
  # === Return
880
1182
  # (Boolean):: true if should queue request, otherwise false
881
1183
  def should_queue?
882
- @options[:offline_queueing] && offline? && !@flushing_queue
883
- end
884
-
885
- # Queue given request in memory
886
- #
887
- # === Parameters
888
- # request(Hash):: Request to be stored
889
- #
890
- # === Return
891
- # true:: Always return true
892
- def queue_request(request)
893
- Log.info("[offline] Queuing request: #{request.inspect}")
894
- @restart_vote_count += 1 if @queue_running
895
- vote_to_restart(timer_trigger = false) if @restart_vote_count >= MAX_QUEUED_REQUESTS
896
- if @queue_initializing
897
- # We are in the initialization callback, requests should be put at the head of the queue
898
- @queue.unshift(request)
899
- else
900
- @queue << request
901
- end
902
- true
903
- end
904
-
905
- # Flush in memory queue of requests that were stored while in offline mode
906
- # Do this asynchronously to allow for agents to respond to requests
907
- # Once all in-memory requests have been flushed, switch off offline mode
908
- #
909
- # === Return
910
- # true:: Always return true
911
- def flush_queue
912
- if @stop_flushing_queue
913
- @stop_flushing_queue = false
914
- @flushing_queue = false
915
- else
916
- Log.info("[offline] Starting to flush request queue of size #{@queue.size}") unless @queueing_mode == :initializing
917
- unless @queue.empty?
918
- r = @queue.shift
919
- if r[:callback]
920
- Sender.instance.__send__(r[:kind], r[:type], r[:payload], r[:target]) { |res| r[:callback].call(res) }
921
- else
922
- Sender.instance.__send__(r[:kind], r[:type], r[:payload], r[:target])
923
- end
924
- end
925
- if @queue.empty?
926
- Log.info("[offline] Request queue flushed, resuming normal operations") unless @queueing_mode == :initializing
927
- @queueing_mode = :online
928
- @flushing_queue = false
929
- else
930
- EM.next_tick { flush_queue }
931
- end
932
- end
1184
+ @options[:offline_queueing] && @offline_handler.queueing?
933
1185
  end
934
1186
 
935
1187
  end # Sender