zk 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,411 @@
1
+ module ZK
2
+ # ==== Overview
3
+ #
4
+ # This module implements the "leader election" protocols described
5
+ # {here}[http://hadoop.apache.org/zookeeper/docs/current/recipes.html#sc_leaderElection].
6
+ #
7
+ # There are Candidates and Observers. Candidates take part in elections and
8
+ # all have equal ability and chance to become the leader. When a leader is
9
+ # decided, they hold onto the leadership role until they die. When the leader
10
+ # dies, an election is held and the winner has its +on_winning_election+
11
+ # callbacks fired, and the losers have their +on_losing_election+ callbacks
12
+ # fired. When all of the +on_winning_election+ callbacks have completed
13
+ # (completing whatever steps are necessary to assume the leadership role),
14
+ # the leader will "acknowledge" that it has taken over by creating an
15
+ # ephemeral node at a known location (with optional data that the Observers
16
+ # can then read and take action upon). Note that when this node is created,
17
+ # it means the *leader* has finished taking over, but it does *not* mean that
18
+ # all the slaves have completed *their* tasks.
19
+ #
20
+ # Observers are interested parties in the election, the "constituents" of the
21
+ # process. They can register callbacks to be fired when a new leader has been
22
+ # elected and when a leader has died. The new leader callbacks will only fire
23
+ # once the leader has acknowledged its role, so they can be sure that the
24
+ # leader is ready to perform its duties.
25
+ #
26
+ # ==== Use Case / Example
27
+ #
28
+ # One problem this pattern can be used to solve is failover between two
29
+ # database nodes. Candidates set up callbacks to both take over as master
30
+ # and to follow the master if they lose the election. On the client side,
31
+ # Obesrvers are set up to follow the "leader ack" node. The leader writes its
32
+ # connection info to the "leader ack" node, and the clients can reconnect to
33
+ # the currently active leader.
34
+ #
35
+ #
36
+ # def server
37
+ # candidate = @zk.election_candidate("database_election", "dbhost2.fqdn.tld:4567", :follow => :leader)
38
+ # candidate.on_winning_election { become_master_node! }
39
+ # candidate.on_losing_election { become_slave_of_master! }
40
+ #
41
+ # @zk.on_connected do
42
+ # candidate.vote!
43
+ # end
44
+ # end
45
+ #
46
+ # Note that as soon as vote! is called, either the on_winning_election or
47
+ # on_losing_election callbacks will be called.
48
+ #
49
+ #
50
+ module Election
51
+ VOTE_PREFIX = 'ballot'.freeze
52
+ ROOT_NODE = '/_zkelection'.freeze
53
+
54
+ VALID_FOLLOW_OPTIONS = [:next_node, :leader].freeze
55
+
56
+ DEFAULT_OPTS = {
57
+ :root_election_node => ROOT_NODE,
58
+ }.freeze
59
+
60
+ class Base
61
+ include Logging
62
+
63
+ attr_reader :zk, :vote_path, :root_election_node
64
+
65
+ def initialize(client, name, opts={})
66
+ @zk = client
67
+ @name = name
68
+ opts = DEFAULT_OPTS.merge(opts)
69
+ @root_election_node = opts[:root_election_node]
70
+ @mutex = Monitor.new
71
+ end
72
+
73
+ # holds the ephemeral nodes of this election
74
+ def root_vote_path #:nodoc:
75
+ @root_vote_path ||= "#{@root_election_node}/#{@name.gsub('/', '__')}"
76
+ end
77
+
78
+ # this znode will be created as an acknowledgement by the leader
79
+ # that it's aware of its status as the new leader and has run its
80
+ # procedures to become master
81
+ def leader_ack_path
82
+ @leader_ack_path ||= "#{root_vote_path}/leader_ack"
83
+ end
84
+
85
+ def cast_ballot!(data)
86
+ return if @vote_path
87
+ create_root_path!
88
+ @vote_path = @zk.create("#{root_vote_path}/#{VOTE_PREFIX}", data, :mode => :ephemeral_sequential)
89
+ rescue Exceptions::NoNode
90
+ retry
91
+ end
92
+
93
+ # has the leader acknowledged their role?
94
+ def leader_acked?(watch=false)
95
+ @zk.exists?(leader_ack_path, :watch => watch)
96
+ end
97
+
98
+ # return the data from the current leader or nil if there is no current leader
99
+ def leader_data
100
+ @zk.get(leader_ack_path).first
101
+ rescue Exceptions::NoNode
102
+ end
103
+
104
+ # Asynchronously call the block when the leader has acknowledged its
105
+ # role. The given block will *always* be called on a background thread.
106
+ def on_leader_ack(&block)
107
+ creation_sub = @zk.watcher.register(leader_ack_path) do |event|
108
+ case event.type
109
+ when Zookeeper::ZOO_CREATED_EVENT, Zookeeper::ZOO_CHANGED_EVENT
110
+ begin
111
+ logger.debug { "in #{leader_ack_path} watcher, got creation event, notifying" }
112
+ block.call
113
+ ensure
114
+ creation_sub.unregister
115
+ end
116
+ else
117
+ if @zk.exists?(leader_ack_path, :watch => true)
118
+ begin
119
+ logger.debug { "in #{leader_ack_path} watcher, node created behind our back, notifying" }
120
+ block.call
121
+ ensure
122
+ creation_sub.unregister
123
+ end
124
+ else
125
+ logger.debug { "in #{leader_ack_path} watcher, got non-creation event, re-watching" }
126
+ end
127
+ end
128
+ end
129
+
130
+ @zk.defer do
131
+ if @zk.exists?(leader_ack_path, :watch => true)
132
+ logger.debug { "on_leader_ack, #{leader_ack_path} exists, calling block" }
133
+ begin
134
+ block.call
135
+ ensure
136
+ creation_sub.unregister if creation_sub
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ protected
143
+ def create_root_path!
144
+ @zk.mkdir_p(root_vote_path)
145
+ end
146
+
147
+ def vote_basename
148
+ vote_path and File.basename(vote_path)
149
+ end
150
+
151
+ def digit(path)
152
+ path[/\d+$/].to_i
153
+
154
+ end
155
+
156
+ def safe_call(*callbacks)
157
+ callbacks.each do |cb|
158
+ begin
159
+ cb.call
160
+ rescue Exception => e
161
+ logger.error { "Error caught in user supplied callback" }
162
+ logger.error { e.to_std_format }
163
+ end
164
+ end
165
+ end
166
+ end
167
+
168
+ # This class is for registering candidates in the leader election. This instance will
169
+ # participate in votes for becoming the leader and will be notified in the
170
+ # case where it needs to take over.
171
+ #
172
+ # if data is given, it will be used as the content of both our ballot and
173
+ # the leader acknowledgement node if and when we become the leader.
174
+ class Candidate < Base
175
+ def initialize(client, name, opts={})
176
+ super(client, name, opts)
177
+ opts = DEFAULT_OPTS.merge(opts)
178
+
179
+ @leader = nil
180
+ @data = opts[:data] || ''
181
+ @vote_path = nil
182
+
183
+ @winner_callbacks = []
184
+ @loser_callbacks = []
185
+
186
+ @next_node_ballot_sub = nil # the subscription for next-node failure
187
+ end
188
+
189
+ def leader?
190
+ false|@leader
191
+ end
192
+
193
+ # true if leader has been determined at least once (used in tests)
194
+ def voted? #:nodoc:
195
+ !@leader.nil?
196
+ end
197
+
198
+ # When we win the election, we will call the procs registered using this
199
+ # method.
200
+ def on_winning_election(&block)
201
+ @winner_callbacks << block
202
+ end
203
+
204
+ # When we lose the election and are relegated to the shadows, waiting for
205
+ # the leader to make one small misstep, where we can finally claim what
206
+ # is rightfully ours! MWUAHAHAHAHAHA(*cough*)
207
+ def on_losing_election(&block)
208
+ @loser_callbacks << block
209
+ end
210
+
211
+ # These procs should be run in the case of an error when trying to assume
212
+ # the leadership role. This should *probably* be a "hara-kiri" or STONITH
213
+ # type procedure (i.e. kill the candidate)
214
+ #
215
+ def on_takeover_error #:nodoc:
216
+ raise NotImplementedError
217
+ end
218
+
219
+ # volunteer to become the leader. if we win, on_winning_election blocks will
220
+ # be called, otherwise, wait for next election
221
+ #
222
+ # +data+ will be placed in the znode representing our vote
223
+ def vote!
224
+ @mutex.synchronize do
225
+ clear_next_node_ballot_sub!
226
+ cast_ballot!(@data) unless @vote_path
227
+ check_election_results!
228
+ end
229
+ end
230
+
231
+ protected
232
+ # the inauguration, as it were
233
+ def acknowledge_win!
234
+ @zk.create(leader_ack_path, @data, :ephemeral => true) rescue Exceptions::NodeExists
235
+ end
236
+
237
+ # return the list of ephemeral vote nodes
238
+ def get_ballots
239
+ @zk.children(root_vote_path).grep(/^ballot/).tap do |ballots|
240
+ ballots.sort! {|a,b| digit(a) <=> digit(b) }
241
+ end
242
+ end
243
+
244
+ # if +watch_next+ is true, we register a watcher for the next-lowest
245
+ # index number in the list of ballots
246
+ #
247
+ def check_election_results!
248
+ #return if leader? # we already know we're the leader
249
+ ballots = get_ballots()
250
+
251
+ our_idx = ballots.index(vote_basename)
252
+
253
+ if our_idx == 0 # if we have the lowest number
254
+ logger.info { "ZK: We have become leader, data: #{@data.inspect}" }
255
+ handle_winning_election
256
+ else
257
+ logger.info { "ZK: we are not the leader, data: #{@data.inspect}" }
258
+ handle_losing_election(our_idx, ballots)
259
+ end
260
+ end
261
+
262
+ def handle_winning_election
263
+ @leader = true
264
+ fire_winning_callbacks!
265
+ acknowledge_win!
266
+ end
267
+
268
+ def handle_losing_election(our_idx, ballots)
269
+ @leader = false
270
+
271
+ on_leader_ack do
272
+ fire_losing_callbacks!
273
+
274
+ next_ballot = File.join(root_vote_path, ballots[our_idx - 1])
275
+
276
+ logger.info { "ZK: following #{next_ballot} for changes, #{@data.inspect}" }
277
+
278
+ @next_node_ballot_sub ||= @zk.watcher.register(next_ballot) do |event|
279
+ if event.node_deleted?
280
+ logger.debug { "#{next_ballot} was deleted, voting, #{@data.inspect}" }
281
+ vote!
282
+ else
283
+ # this takes care of the race condition where the leader ballot would
284
+ # have been deleted before we could re-register to receive updates
285
+ # if zk.stat returns false, it means the path was deleted
286
+ unless @zk.exists?(next_ballot, :watch => true)
287
+ logger.debug { "#{next_ballot} was deleted (detected on re-watch), voting, #{@data.inspect}" }
288
+ vote!
289
+ end
290
+ end
291
+ end
292
+
293
+ # this catches a possible race condition, where the leader has died before
294
+ # our callback has fired. In this case, retry and do this procedure again
295
+ unless @zk.stat(next_ballot, :watch => true).exists?
296
+ logger.debug { "#{@data.inspect}: the node #{next_ballot} did not exist, retrying" }
297
+ vote!
298
+ end
299
+ end
300
+ end
301
+
302
+ def clear_next_node_ballot_sub!
303
+ if @next_node_ballot_sub
304
+ @next_node_ballot_sub.unsubscribe
305
+ @next_node_ballot_sub = nil
306
+ end
307
+ end
308
+
309
+ def fire_winning_callbacks!
310
+ safe_call(*@winner_callbacks)
311
+ end
312
+
313
+ def fire_losing_callbacks!
314
+ safe_call(*@loser_callbacks)
315
+ end
316
+ end
317
+
318
+ class Observer < Base
319
+ def initialize(client, name, opts={})
320
+ super
321
+ @leader_death_cbs = []
322
+ @new_leader_cbs = []
323
+ @deletion_sub = @creation_sub = nil
324
+ @leader_alive = nil
325
+ @observing = false
326
+ end
327
+
328
+ # our current idea about the state of the election
329
+ def leader_alive #:nodoc:
330
+ @mutex.synchronize { @leader_alive }
331
+ end
332
+
333
+ # register callbacks that should be fired when a leader dies
334
+ def on_leaders_death(&blk)
335
+ @leader_death_cbs << blk
336
+ end
337
+
338
+ # register callbacks for when the new leader has acknowledged their role
339
+ # returns a subscription object that can be used to cancel further events
340
+ def on_new_leader(&blk)
341
+ @new_leader_cbs << blk
342
+ end
343
+
344
+ def observe!
345
+ @mutex.synchronize do
346
+ return if @observing
347
+ @observing = true
348
+
349
+ @leader_ack_sub ||= @zk.watcher.register(leader_ack_path) do |event|
350
+ if event.node_deleted?
351
+ the_king_is_dead
352
+ elsif event.node_created?
353
+ long_live_the_king
354
+ else
355
+ acked = leader_acked?(true)
356
+
357
+ # If the current state of the system is not what we think it should be
358
+ # a transition has occurred and we should fire our callbacks
359
+ if (acked and !@leader_alive)
360
+ long_live_the_king
361
+ elsif (!acked and @leader_alive)
362
+ the_king_is_dead
363
+ else
364
+ # things are how we think they should be, so just wait for the
365
+ # watch to fire
366
+ end
367
+ end
368
+ end
369
+
370
+ leader_acked?(true) ? long_live_the_king : the_king_is_dead
371
+ end
372
+ end
373
+
374
+ def close
375
+ @mutex.synchronize do
376
+ return unless @observing
377
+
378
+ @deletion_sub.unregister if @deletion_sub
379
+ @creation_sub.unregister if @creation_sub
380
+
381
+ @deletion_sub = @creation_sub = nil
382
+
383
+ @leader_death_cbs.clear
384
+ @new_leader_cbs.clear
385
+
386
+ @leader_alive = nil
387
+ @observing = false
388
+ end
389
+ end
390
+
391
+ protected
392
+ def the_king_is_dead
393
+ @mutex.synchronize do
394
+ safe_call(*@leader_death_cbs)
395
+ @leader_alive = false
396
+ end
397
+
398
+ long_live_the_king if leader_acked?(true)
399
+ end
400
+
401
+ def long_live_the_king
402
+ @mutex.synchronize do
403
+ safe_call(*@new_leader_cbs)
404
+ @leader_alive = true
405
+ end
406
+
407
+ the_king_is_dead unless leader_acked?(true)
408
+ end
409
+ end
410
+ end
411
+ end
@@ -0,0 +1,202 @@
1
+ module ZK
2
+ # this is the default watcher provided by the zookeeper connection
3
+ # watchers are implemented by adding the :watch => true flag to
4
+ # any #children or #get or #exists calls
5
+ # you never really need to initialize this yourself
6
+ class EventHandler
7
+ include org.apache.zookeeper.Watcher if defined?(JRUBY_VERSION)
8
+ include ZK::Logging
9
+
10
+ VALID_WATCH_TYPES = [:data, :child].freeze
11
+
12
+ ZOOKEEPER_WATCH_TYPE_MAP = {
13
+ Zookeeper::ZOO_CREATED_EVENT => :data,
14
+ Zookeeper::ZOO_DELETED_EVENT => :data,
15
+ Zookeeper::ZOO_CHANGED_EVENT => :data,
16
+ Zookeeper::ZOO_CHILD_EVENT => :child,
17
+ }.freeze
18
+
19
+ attr_accessor :zk # :nodoc:
20
+
21
+ # @private
22
+ # :nodoc:
23
+ def initialize(zookeeper_client)
24
+ @zk = zookeeper_client
25
+ @callbacks = Hash.new { |h,k| h[k] = [] }
26
+
27
+ @mutex = Monitor.new
28
+
29
+ @outstanding_watches = VALID_WATCH_TYPES.inject({}) do |h,k|
30
+ h.tap { |x| x[k] = Set.new }
31
+ end
32
+ end
33
+
34
+ # register a path with the handler
35
+ # your block will be called with all events on that path.
36
+ # aliased as #subscribe
37
+ # @param [String] path the path you want to listen to
38
+ # @param [Block] block the block to execute when a watch event happpens
39
+ # @yield [connection, event] We will call your block with the connection the
40
+ # watch event occured on and the event object
41
+ # @return [ZooKeeper::EventHandlerSubscription] the subscription object
42
+ # you can use to to unsubscribe from an event
43
+ # @see ZooKeeper::WatcherEvent
44
+ # @see ZooKeeper::EventHandlerSubscription
45
+ def register(path, &block)
46
+ logger.debug { "EventHandler#register path=#{path.inspect}" }
47
+ EventHandlerSubscription.new(self, path, block).tap do |subscription|
48
+ synchronize { @callbacks[path] << subscription }
49
+ end
50
+ end
51
+ alias :subscribe :register
52
+
53
+ # registers a "state of the connection" handler
54
+ #
55
+ # @param [String] state the state you want to register for
56
+ # @param [Block] block the block to execute on state changes
57
+ # @yield [connection, event] yields your block with
58
+ def register_state_handler(state, &block)
59
+ register(state_key(state), &block)
60
+ end
61
+
62
+ # @deprecated use #unsubscribe on the subscription object
63
+ # @see ZooKeeper::EventHandlerSubscription#unsubscribe
64
+ def unregister_state_handler(*args)
65
+ if args.first.is_a?(EventHandlerSubscription)
66
+ unregister(args.first)
67
+ else
68
+ unregister(state_key(args.first), args[1])
69
+ end
70
+ end
71
+
72
+ # @deprecated use #unsubscribe on the subscription object
73
+ # @see ZooKeeper::EventHandlerSubscription#unsubscribe
74
+ def unregister(*args)
75
+ if args.first.is_a?(EventHandlerSubscription)
76
+ subscription = args.first
77
+ elsif args.first.is_a?(String) and args[1].is_a?(EventHandlerSubscription)
78
+ subscription = args[1]
79
+ else
80
+ path, index = args[0..1]
81
+ synchronize { @callbacks[path][index] = nil }
82
+ return
83
+ end
84
+
85
+ synchronize do
86
+ ary = @callbacks[subscription.path]
87
+
88
+ idx = ary.index(subscription) and ary.delete_at(idx)
89
+ end
90
+
91
+ nil
92
+ end
93
+ alias :unsubscribe :unregister
94
+
95
+ # called from the client-registered callback when an event fires
96
+ def process(event) #:nodoc:
97
+ logger.debug { "EventHandler#process dispatching event: #{event.inspect}" } unless event.type == -1
98
+ event.zk = @zk
99
+
100
+ cb_key =
101
+ if event.node_event?
102
+ event.path
103
+ elsif event.state_event?
104
+ state_key(event.state)
105
+ else
106
+ raise ZKError, "don't know how to process event: #{event.inspect}"
107
+ end
108
+
109
+ cb_ary = synchronize do
110
+ if event.node_event?
111
+ if watch_type = ZOOKEEPER_WATCH_TYPE_MAP[event.type]
112
+ logger.debug { "re-allowing #{watch_type.inspect} watches on path #{event.path.inspect}" }
113
+
114
+ # we recieved a watch event for this path, now we allow code to set new watchers
115
+ @outstanding_watches[watch_type].delete(event.path)
116
+ end
117
+ end
118
+
119
+ @callbacks[cb_key].dup
120
+ end
121
+
122
+ cb_ary.compact!
123
+
124
+ safe_call(cb_ary, event)
125
+ end
126
+
127
+ # used during shutdown to clear registered listeners
128
+ def clear! #:nodoc:
129
+ synchronize do
130
+ @callbacks.clear
131
+ nil
132
+ end
133
+ end
134
+
135
+ def synchronize #:nodoc:
136
+ @mutex.synchronize { yield }
137
+ end
138
+
139
+ def get_default_watcher_block
140
+ @default_watcher_block ||= lambda do |hash|
141
+ watcher_callback.tap do |cb|
142
+ cb.call(hash)
143
+ end
144
+ end
145
+ end
146
+
147
+ # implements not only setting up the watcher callback, but deduplicating
148
+ # event delivery. Keeps track of in-flight watcher-type+path requests and
149
+ # doesn't re-register the watcher with the server until a response has been
150
+ # fired. This prevents one event delivery to *every* callback per :watch => true
151
+ # argument.
152
+ def setup_watcher!(watch_type, opts)
153
+ return unless opts.delete(:watch)
154
+
155
+ synchronize do
156
+ set = @outstanding_watches.fetch(watch_type)
157
+ path = opts[:path]
158
+
159
+ if set.add?(path)
160
+ # this path has no outstanding watchers, let it do its thing
161
+ opts[:watcher] = watcher_callback
162
+ else
163
+ # outstanding watch for path and data pair already exists, so ignore
164
+ logger.debug { "outstanding watch request for path #{path.inspect} and watcher type #{watch_type.inspect}, not re-registering" }
165
+ end
166
+ end
167
+ end
168
+
169
+ protected
170
+ def watcher_callback
171
+ ZookeeperCallbacks::WatcherCallback.create { |event| process(event) }
172
+ end
173
+
174
+ def state_key(arg)
175
+ int =
176
+ case arg
177
+ when String, Symbol
178
+ ZookeeperConstants.const_get(:"ZOO_#{arg.to_s.upcase}_STATE")
179
+ when Integer
180
+ arg
181
+ else
182
+ raise NameError # ugh lame
183
+ end
184
+
185
+ "state_#{int}"
186
+ rescue NameError
187
+ raise ArgumentError, "#{arg} is not a valid zookeeper state", caller
188
+ end
189
+
190
+ def safe_call(callbacks, *args)
191
+ callbacks.each do |cb|
192
+ begin
193
+ cb.call(*args) if cb.respond_to?(:call)
194
+ rescue Exception => e
195
+ logger.error { "Error caught in user supplied callback" }
196
+ logger.error { e.to_std_format }
197
+ end
198
+ end
199
+ end
200
+ end
201
+ end
202
+
@@ -0,0 +1,29 @@
1
+ module ZK
2
+ # the subscription object that is passed back from subscribing
3
+ # to events.
4
+ # @see ZooKeeperEventHandler#subscribe
5
+ class EventHandlerSubscription
6
+ attr_accessor :event_handler, :path, :callback
7
+
8
+ # @private
9
+ # :nodoc:
10
+ def initialize(event_handler, path, callback)
11
+ @event_handler, @path, @callback = event_handler, path, callback
12
+ end
13
+
14
+ # unsubscribe from the path or state you were watching
15
+ # @see ZooKeeperEventHandler#subscribe
16
+ def unsubscribe
17
+ @event_handler.unregister(self)
18
+ end
19
+ alias :unregister :unsubscribe
20
+
21
+ # @private
22
+ # :nodoc:
23
+ def call(event)
24
+ callback.call(event)
25
+ end
26
+
27
+ end
28
+ end
29
+