who_can 0.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +7 -0
- data/.rspec +4 -0
- data/.rvmrc +1 -0
- data/Gemfile +8 -0
- data/Rakefile +2 -0
- data/lib/who_can.rb +64 -0
- data/lib/who_can/base.rb +72 -0
- data/lib/who_can/connection_manager.rb +268 -0
- data/lib/who_can/connection_wrapper.rb +210 -0
- data/lib/who_can/heartbeater.rb +7 -0
- data/lib/who_can/heartbeater/beat.rb +72 -0
- data/lib/who_can/heartbeater/ekg.rb +169 -0
- data/lib/who_can/logging.rb +12 -0
- data/lib/who_can/pinger.rb +82 -0
- data/lib/who_can/responder.rb +129 -0
- data/lib/who_can/version.rb +3 -0
- data/poc/heartbeater.rb +37 -0
- data/poc/submitter.rb +180 -0
- data/poc/who_can_with_hearbeat.rb +20 -0
- data/scripts/cross_cluster_ping.rb +107 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/support/evented_spec_extensions.rb +14 -0
- data/spec/support/logging.rb +8 -0
- data/spec/support/logging_progress_bar_formatter.rb +14 -0
- data/spec/who_can/base_spec.rb +66 -0
- data/spec/who_can/connection_manager_spec.rb +260 -0
- data/spec/who_can/connection_wrapper_spec.rb +91 -0
- data/spec/who_can/heartbeater/beat_spec.rb +101 -0
- data/spec/who_can/heartbeater/ekg_spec.rb +45 -0
- data/spec/who_can/pinger_responder_integration_spec.rb +63 -0
- data/spec/who_can/pinger_spec.rb +82 -0
- data/spec/who_can/responder_spec.rb +48 -0
- data/who_can.gemspec +32 -0
- metadata +290 -0
@@ -0,0 +1,72 @@
|
|
1
|
+
module WhoCan
|
2
|
+
module Heartbeater
|
3
|
+
class Beat
|
4
|
+
include Logging
|
5
|
+
include Deferred
|
6
|
+
|
7
|
+
# mainly used for hooking-in tests
|
8
|
+
deferred_event :after_publish, :prefix => false
|
9
|
+
|
10
|
+
attr_reader :channel, :queue, :beat_id, :timeout_after
|
11
|
+
|
12
|
+
# @param [AMQP::Channel] channel the amqp channel we are to use
|
13
|
+
#
|
14
|
+
# @param [String] beat_id a unique identifier for this beat instance
|
15
|
+
#
|
16
|
+
# @param [Numeric] timeout how long do we wait until we decide this beat
|
17
|
+
# was a failure. if nil, no timeout will be set. Timeout is started
|
18
|
+
# before we open the channel, and is cancelled on successful return
|
19
|
+
# message
|
20
|
+
#
|
21
|
+
def initialize(channel, queue, beat_id, timeout=nil)
|
22
|
+
@channel = channel
|
23
|
+
@queue = queue
|
24
|
+
@beat_id = beat_id
|
25
|
+
@timeout_after = timeout
|
26
|
+
@started = false
|
27
|
+
end
|
28
|
+
|
29
|
+
def pending?
|
30
|
+
@deferred_status.nil? or (@deferred_status == :unknown)
|
31
|
+
end
|
32
|
+
|
33
|
+
def fired?
|
34
|
+
!pending?
|
35
|
+
end
|
36
|
+
|
37
|
+
def cancel!
|
38
|
+
return if fired?
|
39
|
+
|
40
|
+
logger.info { "#{beat_id} cancel called!" }
|
41
|
+
|
42
|
+
cancel_timeout
|
43
|
+
@cancelled = true
|
44
|
+
succeed
|
45
|
+
end
|
46
|
+
|
47
|
+
def cancelled?
|
48
|
+
@cancelled
|
49
|
+
end
|
50
|
+
|
51
|
+
def ping_received!
|
52
|
+
succeed if pending? and not cancelled?
|
53
|
+
end
|
54
|
+
|
55
|
+
def start!
|
56
|
+
return if @started
|
57
|
+
@started = true
|
58
|
+
|
59
|
+
timeout(timeout_after) if timeout_after
|
60
|
+
|
61
|
+
channel.once_opened do
|
62
|
+
errback_on_exception do
|
63
|
+
channel.default_exchange.publish('BEAT', :routing_key => queue.name, :message_id => beat_id) do
|
64
|
+
logger.debug { "message #{beat_id} sent" }
|
65
|
+
after_publish.succeed
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
module WhoCan
|
2
|
+
module Heartbeater
|
3
|
+
# Pings the broker at regular intervals. After N number of successive
|
4
|
+
# failures, will fire the on_heartbeat_failure callback (note: not the
|
5
|
+
# errback, the *callback*). Due to the nature of Deferreds, after a failure
|
6
|
+
# a Monitor instance must be discarded.
|
7
|
+
#
|
8
|
+
# a successful beat resets failure_count
|
9
|
+
#
|
10
|
+
class EKG
|
11
|
+
include Logging
|
12
|
+
include Deferred
|
13
|
+
|
14
|
+
attr_reader :connection, :queue, :interval, :max_failures, :failure_count
|
15
|
+
|
16
|
+
# fired when the amqp channel has opened and we've started sending beats
|
17
|
+
deferred_event :start
|
18
|
+
|
19
|
+
# fired when failure_count >= max_failures
|
20
|
+
deferred_event :heartbeat_failure
|
21
|
+
|
22
|
+
deferred_event :shutdown
|
23
|
+
|
24
|
+
|
25
|
+
# Create a new Monitor.
|
26
|
+
#
|
27
|
+
# @option opts [Integer] :max_failures (3) fire the on_heartbeat_failure
|
28
|
+
# callback after this many failures
|
29
|
+
#
|
30
|
+
# @option opts [Numeric] :interval (2.0) interval between beats
|
31
|
+
#
|
32
|
+
# @option opts [Numeric] :timeout (5.0) time to live for a single beat
|
33
|
+
def initialize(connection, opts={})
|
34
|
+
opts = {:interval => 2.0, :max_failures => 3, :timeout => 5.0}.merge(opts)
|
35
|
+
|
36
|
+
@connection = connection
|
37
|
+
@channel = nil
|
38
|
+
@timer = nil
|
39
|
+
@interval = opts[:interval]
|
40
|
+
@max_failures = opts[:max_failures]
|
41
|
+
@beat_timeout_after = opts[:timeout]
|
42
|
+
@queue = nil
|
43
|
+
@failure_count = 0
|
44
|
+
@running = false
|
45
|
+
@beat_num = 0
|
46
|
+
@outstanding_beats = {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def channel
|
50
|
+
@channel ||= AMQP::Channel.new(connection)
|
51
|
+
end
|
52
|
+
|
53
|
+
def start!(&blk)
|
54
|
+
on_start(&blk)
|
55
|
+
return on_start if @running
|
56
|
+
@running = true
|
57
|
+
|
58
|
+
connection.on_tcp_connection_loss do
|
59
|
+
logger.error { "on_tcp_connection_loss callback fired!!" }
|
60
|
+
heartbeat_failure!
|
61
|
+
end
|
62
|
+
|
63
|
+
connection.on_open do
|
64
|
+
channel.once_opened do
|
65
|
+
channel.on_error { |*a| logger.error { "channel.on_error called with #{a.inspect}" } }
|
66
|
+
channel.queue('', :exclusive => true, :auto_delete => true) do |q|
|
67
|
+
@queue = q
|
68
|
+
|
69
|
+
confirmed = lambda do |*|
|
70
|
+
logger.debug { "confirmed queue subscription!" }
|
71
|
+
on_start.succeed
|
72
|
+
do_beat!
|
73
|
+
end
|
74
|
+
|
75
|
+
@queue.subscribe(:confirm => confirmed, &method(:handle_message))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
on_start
|
80
|
+
end
|
81
|
+
|
82
|
+
def shutdown!(&blk)
|
83
|
+
on_shutdown(&blk)
|
84
|
+
return on_shutdown unless @running
|
85
|
+
@running = false
|
86
|
+
|
87
|
+
logger.debug { "performing shutdown!" }
|
88
|
+
|
89
|
+
@queue.unsubscribe if @queue
|
90
|
+
|
91
|
+
@timer.cancel if @timer
|
92
|
+
|
93
|
+
close_failsafe_timer = EM::Timer.new(1.0) do
|
94
|
+
logger.warn { "channel.close never called its block, probably hung. calling on_shutdown.succeed" }
|
95
|
+
on_shutdown.succeed
|
96
|
+
end
|
97
|
+
|
98
|
+
@channel.close {
|
99
|
+
close_failsafe_timer.cancel
|
100
|
+
logger.debug { "channel has closed!" }
|
101
|
+
on_shutdown.succeed
|
102
|
+
}
|
103
|
+
|
104
|
+
on_shutdown
|
105
|
+
rescue AMQ::Client::ConnectionClosedError
|
106
|
+
on_shutdown.succeed
|
107
|
+
end
|
108
|
+
|
109
|
+
def handle_message(header, payload)
|
110
|
+
msg_id = header.message_id
|
111
|
+
|
112
|
+
if outstanding_beat = @outstanding_beats.delete(msg_id)
|
113
|
+
outstanding_beat.ping_received!
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def do_beat!
|
118
|
+
return unless @running
|
119
|
+
@timer = nil
|
120
|
+
|
121
|
+
beat_id = "beat_#{@beat_num}"
|
122
|
+
@beat_num += 1
|
123
|
+
|
124
|
+
beat = Beat.new(channel, queue, beat_id, @beat_timeout_after)
|
125
|
+
|
126
|
+
@outstanding_beats[beat_id] = beat
|
127
|
+
|
128
|
+
beat.ensure_that { @outstanding_beats.delete(beat_id) }
|
129
|
+
|
130
|
+
beat.callback do
|
131
|
+
logger.debug { "beat #{beat_id.inspect} was successful" }
|
132
|
+
@failure_count = 0
|
133
|
+
end
|
134
|
+
|
135
|
+
beat.errback do |*a|
|
136
|
+
handle_failure(beat, *a)
|
137
|
+
end
|
138
|
+
|
139
|
+
beat.start!
|
140
|
+
|
141
|
+
# set this up to happen regularly if it hasn't been done already
|
142
|
+
@timer = EM::Timer.new(interval) { do_beat! }
|
143
|
+
end
|
144
|
+
|
145
|
+
def heartbeat_failure!
|
146
|
+
logger.warn { "heartbeat_failure!" }
|
147
|
+
cancel_outstanding_beats!
|
148
|
+
on_heartbeat_failure.succeed
|
149
|
+
shutdown!
|
150
|
+
end
|
151
|
+
|
152
|
+
def handle_failure(beat, exception=nil)
|
153
|
+
logger.warn { "beat errback called! #{beat.beat_id}" }
|
154
|
+
@failure_count += 1
|
155
|
+
if @failure_count >= @max_failures
|
156
|
+
logger.warn { "@failure_count #{@failure_count} >= @max_failures #{@max_failures}, firing heartbeat_failure" }
|
157
|
+
heartbeat_failure!
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# @private
|
162
|
+
# goes through the outstanding_beats and cancels them
|
163
|
+
def cancel_outstanding_beats!
|
164
|
+
outstanding_beats, @outstanding_beats = @outstanding_beats, {}
|
165
|
+
outstanding_beats.values.each { |b| b.cancel! }
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module WhoCan
|
2
|
+
class Pinger
|
3
|
+
include Deferred::Accessors
|
4
|
+
include Logging
|
5
|
+
|
6
|
+
deferred_event :start
|
7
|
+
|
8
|
+
attr_reader :connection, :channel
|
9
|
+
|
10
|
+
def self.create_msg_id
|
11
|
+
"who_can_#{UUIDTools::UUID.random_create.to_s.tr('-', '_')}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(connection)
|
15
|
+
@connection = connection
|
16
|
+
@needs_reply = {}
|
17
|
+
@queue = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def start!(&blk)
|
22
|
+
EM.schedule do
|
23
|
+
AMQP::Channel.new(connection) do |channel,_|
|
24
|
+
@channel = channel
|
25
|
+
logger.debug { "pinger channel is now open" }
|
26
|
+
|
27
|
+
channel.on_error do |*a|
|
28
|
+
logger.error { "channel on_error called with: #{a.inspect}" }
|
29
|
+
end
|
30
|
+
|
31
|
+
channel.queue('', :exclusive => true, :auto_delete => true) do |q|
|
32
|
+
logger.debug { "queue opened: #{q.name} exclusive, auto_delete" }
|
33
|
+
|
34
|
+
@queue = q
|
35
|
+
block_callback = lambda do |*args|
|
36
|
+
logger.debug {"calling back to with_queue"}
|
37
|
+
on_start.succeed
|
38
|
+
end
|
39
|
+
|
40
|
+
logger.debug {"subscribing to the response queue"}
|
41
|
+
@queue.subscribe(:confirm => block_callback, &method(:handle_response))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
on_start(&blk)
|
46
|
+
end
|
47
|
+
|
48
|
+
# returns immediately, will call the given callback with the headers and
|
49
|
+
# payload of the "winning" message (the first ping back from the workers we
|
50
|
+
# receive).
|
51
|
+
def ping!(exchange, timeout=5, &callback)
|
52
|
+
logger.debug { "sending a ping to #{exchange}" }
|
53
|
+
deferred = Deferred::Default.new
|
54
|
+
deferred.callback(&callback)
|
55
|
+
deferred.timeout(timeout)
|
56
|
+
raise "Channel not opened" unless channel.open?
|
57
|
+
msg_id = self.class.create_msg_id
|
58
|
+
@needs_reply[msg_id] = deferred
|
59
|
+
|
60
|
+
deferred.errback_on_exception do
|
61
|
+
logger.debug {"sending the ping to #{exchange} with a reply to of #{@queue.name}"}
|
62
|
+
# XXX: the block given to publish here appears to be a bug w/ amqp gem, without it an exception is raised
|
63
|
+
ping_exchange = channel.fanout(exchange)
|
64
|
+
ping_exchange.publish('PING', :reply_to => @queue.name, :message_id => msg_id) { logger.debug { "actually sent the ping to #{exchange}" } }
|
65
|
+
end
|
66
|
+
deferred
|
67
|
+
rescue Exception => e
|
68
|
+
logger.error {"received an exception on ping!: #{e.to_std_format}"}
|
69
|
+
deferred.fail(e)
|
70
|
+
deferred
|
71
|
+
end
|
72
|
+
|
73
|
+
def handle_response(header, payload)
|
74
|
+
logger.debug {"handling a response (#{header.message_id}) with a payload of: #{payload}"}
|
75
|
+
if deferred = @needs_reply.delete(header.message_id)
|
76
|
+
EM.schedule do
|
77
|
+
deferred.succeed(header, payload)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
module WhoCan
|
2
|
+
class Responder
|
3
|
+
include Deferred::Accessors
|
4
|
+
include Logging
|
5
|
+
|
6
|
+
deferred_event :start
|
7
|
+
|
8
|
+
class Response
|
9
|
+
attr_reader :header, :payload, :delay, :response
|
10
|
+
|
11
|
+
attr_accessor :do_not_respond
|
12
|
+
|
13
|
+
def initialize(header, payload)
|
14
|
+
@header, @payload = header, payload
|
15
|
+
@response = ''
|
16
|
+
@do_not_respond = false
|
17
|
+
end
|
18
|
+
|
19
|
+
def delay=(val)
|
20
|
+
if val.kind_of?(String) or not val.respond_to?(:to_f)
|
21
|
+
raise TypeError, "return value of on_ping callback should respond_to? :to_f, not be #{val.inspect}"
|
22
|
+
end
|
23
|
+
|
24
|
+
@delay = val.to_f
|
25
|
+
end
|
26
|
+
|
27
|
+
def response=(val)
|
28
|
+
@response = val.to_s
|
29
|
+
end
|
30
|
+
|
31
|
+
def validate!
|
32
|
+
raise DelayMustBeSetError unless delay
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
attr_reader :channel, :ping_exchange, :queue, :ping_exchange_name, :connection
|
37
|
+
|
38
|
+
# channel here is an honest-to-goodness AMQP::Channel, not a ChannelPool object
|
39
|
+
def initialize(connection, ping_exchange_name)
|
40
|
+
@connection = connection
|
41
|
+
@ping_exchange_name = ping_exchange_name
|
42
|
+
@on_ping_cb = nil
|
43
|
+
@set_up = false
|
44
|
+
end
|
45
|
+
|
46
|
+
# wc = WhoCan.new(connection_opts)
|
47
|
+
# responder = wc.new_responder(:retriever)
|
48
|
+
# responder.set_load(0.1)
|
49
|
+
|
50
|
+
# wc = WhoCan.new(connection_opts)
|
51
|
+
# wc.who_can?("retriever") { |payload| yay!}
|
52
|
+
|
53
|
+
# resp = wc.new_responder(ping_exch_name)
|
54
|
+
#
|
55
|
+
# resp.on_ping do |obj|
|
56
|
+
# obj.header # msg headers
|
57
|
+
# obj.paylod # msg payload
|
58
|
+
#
|
59
|
+
# obj.delay = load_avg * num_slots * Math::PI
|
60
|
+
# obj.response = "something here"
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# wc = WhoCan.new(connection_opts)
|
64
|
+
# wc.on_ping("retriever") do |resp|
|
65
|
+
#
|
66
|
+
# end
|
67
|
+
|
68
|
+
def setup!(&blk)
|
69
|
+
return on_start if @set_up
|
70
|
+
@set_up = true
|
71
|
+
@close_requested = false
|
72
|
+
on_start(&blk)
|
73
|
+
AMQP::Channel.new(connection) do |channel,_|
|
74
|
+
@channel = channel
|
75
|
+
@ping_exchange = @channel.fanout(ping_exchange_name, :nowait => false)
|
76
|
+
@queue = @channel.queue('', :exclusive => true, :auto_delete => true)
|
77
|
+
|
78
|
+
unless @close_requested
|
79
|
+
@queue.bind(@ping_exchange) do
|
80
|
+
#logger.debug {"subscribing to the #{@ping_exchange_name}"}
|
81
|
+
|
82
|
+
confirm_queue = lambda do |*a|
|
83
|
+
#logger.debug { "queue subscribed to #{@ping_exchange_name}" }
|
84
|
+
on_start.succeed
|
85
|
+
end
|
86
|
+
|
87
|
+
@queue.subscribe(:confirm => confirm_queue, &method(:handle_message))
|
88
|
+
end #bind
|
89
|
+
end # unless
|
90
|
+
end
|
91
|
+
on_start
|
92
|
+
end
|
93
|
+
|
94
|
+
def handle_message(header, payload)
|
95
|
+
if @on_ping_cb
|
96
|
+
resp = Response.new(header, payload)
|
97
|
+
logger.debug {"ping request came in, sending off to get response"}
|
98
|
+
@on_ping_cb.call(resp)
|
99
|
+
|
100
|
+
if resp.do_not_respond
|
101
|
+
logger.debug { "do_not_respond flag set, not sending reply" }
|
102
|
+
else
|
103
|
+
resp.validate!
|
104
|
+
#logger.debug {"adding timer of: #{resp.delay} and then publishing message"}
|
105
|
+
EM.add_timer(resp.delay) do
|
106
|
+
logger.debug {"publishing ping response to #{header.reply_to} after delay of #{resp.delay} happened"}
|
107
|
+
channel.default_exchange.publish(resp.response, :routing_key => header.reply_to, :message_id => header.message_id)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def close!
|
114
|
+
return false if !@set_up or @close_requested
|
115
|
+
@close_requested = true
|
116
|
+
|
117
|
+
@queue.unsubscribe if @queue
|
118
|
+
@queue = nil
|
119
|
+
@set_up = false
|
120
|
+
end
|
121
|
+
|
122
|
+
# Register a callback that must return a Numeric (either float or integer)
|
123
|
+
# that will be used to delay the response to the pinger.
|
124
|
+
def on_ping(&block)
|
125
|
+
@on_ping_cb = block
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|