who_can 0.3.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +7 -0
- data/.rspec +4 -0
- data/.rvmrc +1 -0
- data/Gemfile +8 -0
- data/Rakefile +2 -0
- data/lib/who_can.rb +64 -0
- data/lib/who_can/base.rb +72 -0
- data/lib/who_can/connection_manager.rb +268 -0
- data/lib/who_can/connection_wrapper.rb +210 -0
- data/lib/who_can/heartbeater.rb +7 -0
- data/lib/who_can/heartbeater/beat.rb +72 -0
- data/lib/who_can/heartbeater/ekg.rb +169 -0
- data/lib/who_can/logging.rb +12 -0
- data/lib/who_can/pinger.rb +82 -0
- data/lib/who_can/responder.rb +129 -0
- data/lib/who_can/version.rb +3 -0
- data/poc/heartbeater.rb +37 -0
- data/poc/submitter.rb +180 -0
- data/poc/who_can_with_hearbeat.rb +20 -0
- data/scripts/cross_cluster_ping.rb +107 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/support/evented_spec_extensions.rb +14 -0
- data/spec/support/logging.rb +8 -0
- data/spec/support/logging_progress_bar_formatter.rb +14 -0
- data/spec/who_can/base_spec.rb +66 -0
- data/spec/who_can/connection_manager_spec.rb +260 -0
- data/spec/who_can/connection_wrapper_spec.rb +91 -0
- data/spec/who_can/heartbeater/beat_spec.rb +101 -0
- data/spec/who_can/heartbeater/ekg_spec.rb +45 -0
- data/spec/who_can/pinger_responder_integration_spec.rb +63 -0
- data/spec/who_can/pinger_spec.rb +82 -0
- data/spec/who_can/responder_spec.rb +48 -0
- data/who_can.gemspec +32 -0
- metadata +290 -0
@@ -0,0 +1,72 @@
|
|
1
|
+
module WhoCan
|
2
|
+
module Heartbeater
|
3
|
+
class Beat
|
4
|
+
include Logging
|
5
|
+
include Deferred
|
6
|
+
|
7
|
+
# mainly used for hooking-in tests
|
8
|
+
deferred_event :after_publish, :prefix => false
|
9
|
+
|
10
|
+
attr_reader :channel, :queue, :beat_id, :timeout_after
|
11
|
+
|
12
|
+
# @param [AMQP::Channel] channel the amqp channel we are to use
|
13
|
+
#
|
14
|
+
# @param [String] beat_id a unique identifier for this beat instance
|
15
|
+
#
|
16
|
+
# @param [Numeric] timeout how long do we wait until we decide this beat
|
17
|
+
# was a failure. if nil, no timeout will be set. Timeout is started
|
18
|
+
# before we open the channel, and is cancelled on successful return
|
19
|
+
# message
|
20
|
+
#
|
21
|
+
def initialize(channel, queue, beat_id, timeout=nil)
|
22
|
+
@channel = channel
|
23
|
+
@queue = queue
|
24
|
+
@beat_id = beat_id
|
25
|
+
@timeout_after = timeout
|
26
|
+
@started = false
|
27
|
+
end
|
28
|
+
|
29
|
+
def pending?
|
30
|
+
@deferred_status.nil? or (@deferred_status == :unknown)
|
31
|
+
end
|
32
|
+
|
33
|
+
def fired?
|
34
|
+
!pending?
|
35
|
+
end
|
36
|
+
|
37
|
+
def cancel!
|
38
|
+
return if fired?
|
39
|
+
|
40
|
+
logger.info { "#{beat_id} cancel called!" }
|
41
|
+
|
42
|
+
cancel_timeout
|
43
|
+
@cancelled = true
|
44
|
+
succeed
|
45
|
+
end
|
46
|
+
|
47
|
+
def cancelled?
|
48
|
+
@cancelled
|
49
|
+
end
|
50
|
+
|
51
|
+
def ping_received!
|
52
|
+
succeed if pending? and not cancelled?
|
53
|
+
end
|
54
|
+
|
55
|
+
def start!
|
56
|
+
return if @started
|
57
|
+
@started = true
|
58
|
+
|
59
|
+
timeout(timeout_after) if timeout_after
|
60
|
+
|
61
|
+
channel.once_opened do
|
62
|
+
errback_on_exception do
|
63
|
+
channel.default_exchange.publish('BEAT', :routing_key => queue.name, :message_id => beat_id) do
|
64
|
+
logger.debug { "message #{beat_id} sent" }
|
65
|
+
after_publish.succeed
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,169 @@
|
|
1
|
+
module WhoCan
|
2
|
+
module Heartbeater
|
3
|
+
# Pings the broker at regular intervals. After N number of successive
|
4
|
+
# failures, will fire the on_heartbeat_failure callback (note: not the
|
5
|
+
# errback, the *callback*). Due to the nature of Deferreds, after a failure
|
6
|
+
# a Monitor instance must be discarded.
|
7
|
+
#
|
8
|
+
# a successful beat resets failure_count
|
9
|
+
#
|
10
|
+
class EKG
|
11
|
+
include Logging
|
12
|
+
include Deferred
|
13
|
+
|
14
|
+
attr_reader :connection, :queue, :interval, :max_failures, :failure_count
|
15
|
+
|
16
|
+
# fired when the amqp channel has opened and we've started sending beats
|
17
|
+
deferred_event :start
|
18
|
+
|
19
|
+
# fired when failure_count >= max_failures
|
20
|
+
deferred_event :heartbeat_failure
|
21
|
+
|
22
|
+
deferred_event :shutdown
|
23
|
+
|
24
|
+
|
25
|
+
# Create a new Monitor.
|
26
|
+
#
|
27
|
+
# @option opts [Integer] :max_failures (3) fire the on_heartbeat_failure
|
28
|
+
# callback after this many failures
|
29
|
+
#
|
30
|
+
# @option opts [Numeric] :interval (2.0) interval between beats
|
31
|
+
#
|
32
|
+
# @option opts [Numeric] :timeout (5.0) time to live for a single beat
|
33
|
+
def initialize(connection, opts={})
|
34
|
+
opts = {:interval => 2.0, :max_failures => 3, :timeout => 5.0}.merge(opts)
|
35
|
+
|
36
|
+
@connection = connection
|
37
|
+
@channel = nil
|
38
|
+
@timer = nil
|
39
|
+
@interval = opts[:interval]
|
40
|
+
@max_failures = opts[:max_failures]
|
41
|
+
@beat_timeout_after = opts[:timeout]
|
42
|
+
@queue = nil
|
43
|
+
@failure_count = 0
|
44
|
+
@running = false
|
45
|
+
@beat_num = 0
|
46
|
+
@outstanding_beats = {}
|
47
|
+
end
|
48
|
+
|
49
|
+
def channel
|
50
|
+
@channel ||= AMQP::Channel.new(connection)
|
51
|
+
end
|
52
|
+
|
53
|
+
def start!(&blk)
|
54
|
+
on_start(&blk)
|
55
|
+
return on_start if @running
|
56
|
+
@running = true
|
57
|
+
|
58
|
+
connection.on_tcp_connection_loss do
|
59
|
+
logger.error { "on_tcp_connection_loss callback fired!!" }
|
60
|
+
heartbeat_failure!
|
61
|
+
end
|
62
|
+
|
63
|
+
connection.on_open do
|
64
|
+
channel.once_opened do
|
65
|
+
channel.on_error { |*a| logger.error { "channel.on_error called with #{a.inspect}" } }
|
66
|
+
channel.queue('', :exclusive => true, :auto_delete => true) do |q|
|
67
|
+
@queue = q
|
68
|
+
|
69
|
+
confirmed = lambda do |*|
|
70
|
+
logger.debug { "confirmed queue subscription!" }
|
71
|
+
on_start.succeed
|
72
|
+
do_beat!
|
73
|
+
end
|
74
|
+
|
75
|
+
@queue.subscribe(:confirm => confirmed, &method(:handle_message))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
on_start
|
80
|
+
end
|
81
|
+
|
82
|
+
def shutdown!(&blk)
|
83
|
+
on_shutdown(&blk)
|
84
|
+
return on_shutdown unless @running
|
85
|
+
@running = false
|
86
|
+
|
87
|
+
logger.debug { "performing shutdown!" }
|
88
|
+
|
89
|
+
@queue.unsubscribe if @queue
|
90
|
+
|
91
|
+
@timer.cancel if @timer
|
92
|
+
|
93
|
+
close_failsafe_timer = EM::Timer.new(1.0) do
|
94
|
+
logger.warn { "channel.close never called its block, probably hung. calling on_shutdown.succeed" }
|
95
|
+
on_shutdown.succeed
|
96
|
+
end
|
97
|
+
|
98
|
+
@channel.close {
|
99
|
+
close_failsafe_timer.cancel
|
100
|
+
logger.debug { "channel has closed!" }
|
101
|
+
on_shutdown.succeed
|
102
|
+
}
|
103
|
+
|
104
|
+
on_shutdown
|
105
|
+
rescue AMQ::Client::ConnectionClosedError
|
106
|
+
on_shutdown.succeed
|
107
|
+
end
|
108
|
+
|
109
|
+
def handle_message(header, payload)
|
110
|
+
msg_id = header.message_id
|
111
|
+
|
112
|
+
if outstanding_beat = @outstanding_beats.delete(msg_id)
|
113
|
+
outstanding_beat.ping_received!
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def do_beat!
|
118
|
+
return unless @running
|
119
|
+
@timer = nil
|
120
|
+
|
121
|
+
beat_id = "beat_#{@beat_num}"
|
122
|
+
@beat_num += 1
|
123
|
+
|
124
|
+
beat = Beat.new(channel, queue, beat_id, @beat_timeout_after)
|
125
|
+
|
126
|
+
@outstanding_beats[beat_id] = beat
|
127
|
+
|
128
|
+
beat.ensure_that { @outstanding_beats.delete(beat_id) }
|
129
|
+
|
130
|
+
beat.callback do
|
131
|
+
logger.debug { "beat #{beat_id.inspect} was successful" }
|
132
|
+
@failure_count = 0
|
133
|
+
end
|
134
|
+
|
135
|
+
beat.errback do |*a|
|
136
|
+
handle_failure(beat, *a)
|
137
|
+
end
|
138
|
+
|
139
|
+
beat.start!
|
140
|
+
|
141
|
+
# set this up to happen regularly if it hasn't been done already
|
142
|
+
@timer = EM::Timer.new(interval) { do_beat! }
|
143
|
+
end
|
144
|
+
|
145
|
+
def heartbeat_failure!
|
146
|
+
logger.warn { "heartbeat_failure!" }
|
147
|
+
cancel_outstanding_beats!
|
148
|
+
on_heartbeat_failure.succeed
|
149
|
+
shutdown!
|
150
|
+
end
|
151
|
+
|
152
|
+
def handle_failure(beat, exception=nil)
|
153
|
+
logger.warn { "beat errback called! #{beat.beat_id}" }
|
154
|
+
@failure_count += 1
|
155
|
+
if @failure_count >= @max_failures
|
156
|
+
logger.warn { "@failure_count #{@failure_count} >= @max_failures #{@max_failures}, firing heartbeat_failure" }
|
157
|
+
heartbeat_failure!
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
# @private
|
162
|
+
# goes through the outstanding_beats and cancels them
|
163
|
+
def cancel_outstanding_beats!
|
164
|
+
outstanding_beats, @outstanding_beats = @outstanding_beats, {}
|
165
|
+
outstanding_beats.values.each { |b| b.cancel! }
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module WhoCan
|
2
|
+
class Pinger
|
3
|
+
include Deferred::Accessors
|
4
|
+
include Logging
|
5
|
+
|
6
|
+
deferred_event :start
|
7
|
+
|
8
|
+
attr_reader :connection, :channel
|
9
|
+
|
10
|
+
def self.create_msg_id
|
11
|
+
"who_can_#{UUIDTools::UUID.random_create.to_s.tr('-', '_')}"
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(connection)
|
15
|
+
@connection = connection
|
16
|
+
@needs_reply = {}
|
17
|
+
@queue = nil
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def start!(&blk)
|
22
|
+
EM.schedule do
|
23
|
+
AMQP::Channel.new(connection) do |channel,_|
|
24
|
+
@channel = channel
|
25
|
+
logger.debug { "pinger channel is now open" }
|
26
|
+
|
27
|
+
channel.on_error do |*a|
|
28
|
+
logger.error { "channel on_error called with: #{a.inspect}" }
|
29
|
+
end
|
30
|
+
|
31
|
+
channel.queue('', :exclusive => true, :auto_delete => true) do |q|
|
32
|
+
logger.debug { "queue opened: #{q.name} exclusive, auto_delete" }
|
33
|
+
|
34
|
+
@queue = q
|
35
|
+
block_callback = lambda do |*args|
|
36
|
+
logger.debug {"calling back to with_queue"}
|
37
|
+
on_start.succeed
|
38
|
+
end
|
39
|
+
|
40
|
+
logger.debug {"subscribing to the response queue"}
|
41
|
+
@queue.subscribe(:confirm => block_callback, &method(:handle_response))
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
on_start(&blk)
|
46
|
+
end
|
47
|
+
|
48
|
+
# returns immediately, will call the given callback with the headers and
|
49
|
+
# payload of the "winning" message (the first ping back from the workers we
|
50
|
+
# receive).
|
51
|
+
def ping!(exchange, timeout=5, &callback)
|
52
|
+
logger.debug { "sending a ping to #{exchange}" }
|
53
|
+
deferred = Deferred::Default.new
|
54
|
+
deferred.callback(&callback)
|
55
|
+
deferred.timeout(timeout)
|
56
|
+
raise "Channel not opened" unless channel.open?
|
57
|
+
msg_id = self.class.create_msg_id
|
58
|
+
@needs_reply[msg_id] = deferred
|
59
|
+
|
60
|
+
deferred.errback_on_exception do
|
61
|
+
logger.debug {"sending the ping to #{exchange} with a reply to of #{@queue.name}"}
|
62
|
+
# XXX: the block given to publish here appears to be a bug w/ amqp gem, without it an exception is raised
|
63
|
+
ping_exchange = channel.fanout(exchange)
|
64
|
+
ping_exchange.publish('PING', :reply_to => @queue.name, :message_id => msg_id) { logger.debug { "actually sent the ping to #{exchange}" } }
|
65
|
+
end
|
66
|
+
deferred
|
67
|
+
rescue Exception => e
|
68
|
+
logger.error {"received an exception on ping!: #{e.to_std_format}"}
|
69
|
+
deferred.fail(e)
|
70
|
+
deferred
|
71
|
+
end
|
72
|
+
|
73
|
+
def handle_response(header, payload)
|
74
|
+
logger.debug {"handling a response (#{header.message_id}) with a payload of: #{payload}"}
|
75
|
+
if deferred = @needs_reply.delete(header.message_id)
|
76
|
+
EM.schedule do
|
77
|
+
deferred.succeed(header, payload)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
module WhoCan
|
2
|
+
class Responder
|
3
|
+
include Deferred::Accessors
|
4
|
+
include Logging
|
5
|
+
|
6
|
+
deferred_event :start
|
7
|
+
|
8
|
+
class Response
|
9
|
+
attr_reader :header, :payload, :delay, :response
|
10
|
+
|
11
|
+
attr_accessor :do_not_respond
|
12
|
+
|
13
|
+
def initialize(header, payload)
|
14
|
+
@header, @payload = header, payload
|
15
|
+
@response = ''
|
16
|
+
@do_not_respond = false
|
17
|
+
end
|
18
|
+
|
19
|
+
def delay=(val)
|
20
|
+
if val.kind_of?(String) or not val.respond_to?(:to_f)
|
21
|
+
raise TypeError, "return value of on_ping callback should respond_to? :to_f, not be #{val.inspect}"
|
22
|
+
end
|
23
|
+
|
24
|
+
@delay = val.to_f
|
25
|
+
end
|
26
|
+
|
27
|
+
def response=(val)
|
28
|
+
@response = val.to_s
|
29
|
+
end
|
30
|
+
|
31
|
+
def validate!
|
32
|
+
raise DelayMustBeSetError unless delay
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
attr_reader :channel, :ping_exchange, :queue, :ping_exchange_name, :connection
|
37
|
+
|
38
|
+
# channel here is an honest-to-goodness AMQP::Channel, not a ChannelPool object
|
39
|
+
def initialize(connection, ping_exchange_name)
|
40
|
+
@connection = connection
|
41
|
+
@ping_exchange_name = ping_exchange_name
|
42
|
+
@on_ping_cb = nil
|
43
|
+
@set_up = false
|
44
|
+
end
|
45
|
+
|
46
|
+
# wc = WhoCan.new(connection_opts)
|
47
|
+
# responder = wc.new_responder(:retriever)
|
48
|
+
# responder.set_load(0.1)
|
49
|
+
|
50
|
+
# wc = WhoCan.new(connection_opts)
|
51
|
+
# wc.who_can?("retriever") { |payload| yay!}
|
52
|
+
|
53
|
+
# resp = wc.new_responder(ping_exch_name)
|
54
|
+
#
|
55
|
+
# resp.on_ping do |obj|
|
56
|
+
# obj.header # msg headers
|
57
|
+
# obj.paylod # msg payload
|
58
|
+
#
|
59
|
+
# obj.delay = load_avg * num_slots * Math::PI
|
60
|
+
# obj.response = "something here"
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# wc = WhoCan.new(connection_opts)
|
64
|
+
# wc.on_ping("retriever") do |resp|
|
65
|
+
#
|
66
|
+
# end
|
67
|
+
|
68
|
+
def setup!(&blk)
|
69
|
+
return on_start if @set_up
|
70
|
+
@set_up = true
|
71
|
+
@close_requested = false
|
72
|
+
on_start(&blk)
|
73
|
+
AMQP::Channel.new(connection) do |channel,_|
|
74
|
+
@channel = channel
|
75
|
+
@ping_exchange = @channel.fanout(ping_exchange_name, :nowait => false)
|
76
|
+
@queue = @channel.queue('', :exclusive => true, :auto_delete => true)
|
77
|
+
|
78
|
+
unless @close_requested
|
79
|
+
@queue.bind(@ping_exchange) do
|
80
|
+
#logger.debug {"subscribing to the #{@ping_exchange_name}"}
|
81
|
+
|
82
|
+
confirm_queue = lambda do |*a|
|
83
|
+
#logger.debug { "queue subscribed to #{@ping_exchange_name}" }
|
84
|
+
on_start.succeed
|
85
|
+
end
|
86
|
+
|
87
|
+
@queue.subscribe(:confirm => confirm_queue, &method(:handle_message))
|
88
|
+
end #bind
|
89
|
+
end # unless
|
90
|
+
end
|
91
|
+
on_start
|
92
|
+
end
|
93
|
+
|
94
|
+
def handle_message(header, payload)
|
95
|
+
if @on_ping_cb
|
96
|
+
resp = Response.new(header, payload)
|
97
|
+
logger.debug {"ping request came in, sending off to get response"}
|
98
|
+
@on_ping_cb.call(resp)
|
99
|
+
|
100
|
+
if resp.do_not_respond
|
101
|
+
logger.debug { "do_not_respond flag set, not sending reply" }
|
102
|
+
else
|
103
|
+
resp.validate!
|
104
|
+
#logger.debug {"adding timer of: #{resp.delay} and then publishing message"}
|
105
|
+
EM.add_timer(resp.delay) do
|
106
|
+
logger.debug {"publishing ping response to #{header.reply_to} after delay of #{resp.delay} happened"}
|
107
|
+
channel.default_exchange.publish(resp.response, :routing_key => header.reply_to, :message_id => header.message_id)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def close!
|
114
|
+
return false if !@set_up or @close_requested
|
115
|
+
@close_requested = true
|
116
|
+
|
117
|
+
@queue.unsubscribe if @queue
|
118
|
+
@queue = nil
|
119
|
+
@set_up = false
|
120
|
+
end
|
121
|
+
|
122
|
+
# Register a callback that must return a Numeric (either float or integer)
|
123
|
+
# that will be used to delay the response to the pinger.
|
124
|
+
def on_ping(&block)
|
125
|
+
@on_ping_cb = block
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|