zk 1.5.1 → 1.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +4 -0
- data/Guardfile +9 -5
- data/README.markdown +1 -1
- data/RELEASES.markdown +8 -0
- data/lib/zk/client/threaded.rb +12 -5
- data/lib/zk/fork_hook.rb +3 -0
- data/lib/zk/locker/locker_base.rb +58 -14
- data/lib/zk/version.rb +1 -1
- data/spec/message_queue_spec.rb +3 -2
- data/spec/shared/client_contexts.rb +1 -1
- data/spec/shared/locker_contexts.rb +53 -0
- data/spec/shared/locker_examples.rb +55 -0
- data/spec/support/logging.rb +37 -23
- data/spec/zk/locker/exclusive_locker_spec.rb +122 -0
- data/spec/zk/locker/locker_basic_spec.rb +79 -0
- data/spec/zk/locker/shared_exclusive_integration_spec.rb +157 -0
- data/spec/zk/locker/shared_locker_spec.rb +137 -0
- data/spec/zk/pool_spec.rb +6 -3
- data/spec/zk/watch_spec.rb +0 -1
- data/spec/zk/zookeeper_spec.rb +2 -1
- data/zk.gemspec +1 -1
- metadata +19 -9
- data/spec/zk/locker_spec.rb +0 -552
data/Gemfile
CHANGED
data/Guardfile
CHANGED
@@ -17,14 +17,18 @@ guard 'rspec', :version => 2 do
|
|
17
17
|
|
18
18
|
watch(%r{^lib/(.+)\.rb$}) do |m|
|
19
19
|
case m[1]
|
20
|
-
when
|
20
|
+
when 'zk/event_handler'
|
21
21
|
"spec/zk/watch_spec.rb"
|
22
|
-
|
22
|
+
|
23
|
+
when 'zk/client/threaded'
|
23
24
|
["spec/zk/client_spec.rb", "spec/zk/zookeeper_spec.rb"]
|
24
|
-
|
25
|
-
|
26
|
-
|
25
|
+
|
26
|
+
when %r{^(?:zk/locker/locker_base|spec/shared/locker)}
|
27
|
+
Dir["spec/zk/locker/*_spec.rb"]
|
28
|
+
|
29
|
+
when 'zk' # .rb
|
27
30
|
'spec' # run all tests
|
31
|
+
|
28
32
|
else
|
29
33
|
"spec/#{m[1]}_spec.rb"
|
30
34
|
end
|
data/README.markdown
CHANGED
@@ -67,7 +67,7 @@ In addition to all of that, I would like to think that the public API the ZK::Cl
|
|
67
67
|
## NEWS ##
|
68
68
|
### v1.5.1 ###
|
69
69
|
|
70
|
-
* Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
|
70
|
+
* Added a `:retry_duration` option to the Threaded client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
|
71
71
|
|
72
72
|
* Small fork-hook implementation fix. Previously we were using WeakRefs so that hooks would not prevent an object from being garbage collected. This has been replaced with a finalizer which is more deterministic.
|
73
73
|
|
data/RELEASES.markdown
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
This file notes feature differences and bugfixes contained between releases.
|
2
2
|
|
3
|
+
### v1.5.2 ###
|
4
|
+
|
5
|
+
* Fix locker cleanup code to avoid a nasty race when a session is lost, see [issue #34](https://github.com/slyphon/zk/issues/34)
|
6
|
+
|
7
|
+
* Fix potential deadlock in ForkHook code so the mutex is unlocked in the case of an exception
|
8
|
+
|
9
|
+
* Do not hang forever when shutting down and the shutdown thread does not exit (wait 30 seconds).
|
10
|
+
|
3
11
|
### v1.5.1 ###
|
4
12
|
|
5
13
|
* Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
|
data/lib/zk/client/threaded.rb
CHANGED
@@ -171,6 +171,8 @@ module ZK
|
|
171
171
|
|
172
172
|
@retry_duration = opts.fetch(:retry_duration, nil).to_i
|
173
173
|
|
174
|
+
yield self if block_given?
|
175
|
+
|
174
176
|
@fork_subs = [
|
175
177
|
ForkHook.prepare_for_fork(method(:pause_before_fork_in_parent)),
|
176
178
|
ForkHook.after_fork_in_parent(method(:resume_after_fork_in_parent)),
|
@@ -179,11 +181,10 @@ module ZK
|
|
179
181
|
|
180
182
|
ObjectSpace.define_finalizer(self, self.class.finalizer(@fork_subs))
|
181
183
|
|
182
|
-
yield self if block_given?
|
183
|
-
|
184
184
|
connect if opts.fetch(:connect, true)
|
185
185
|
end
|
186
186
|
|
187
|
+
# @private
|
187
188
|
def self.finalizer(hooks)
|
188
189
|
proc { hooks.each(&:unregister) }
|
189
190
|
end
|
@@ -259,7 +260,11 @@ module ZK
|
|
259
260
|
@cond.broadcast
|
260
261
|
end
|
261
262
|
|
262
|
-
|
263
|
+
# the compact is here because the @cnx *may* be nil when this callback is fired by the
|
264
|
+
# ForkHook (in the case of ZK.open). The race is between the GC calling the finalizer
|
265
|
+
[@event_handler, @threadpool, @cnx].compact.each(&:pause_before_fork_in_parent)
|
266
|
+
ensure
|
267
|
+
logger.debug { "#{self.class}##{__method__} returning" }
|
263
268
|
end
|
264
269
|
|
265
270
|
# @private
|
@@ -270,7 +275,7 @@ module ZK
|
|
270
275
|
|
271
276
|
logger.debug { "#{self.class}##{__method__}" }
|
272
277
|
|
273
|
-
[@cnx, @event_handler, @threadpool].each(&:resume_after_fork_in_parent)
|
278
|
+
[@cnx, @event_handler, @threadpool].compact.each(&:resume_after_fork_in_parent)
|
274
279
|
|
275
280
|
@cond.broadcast
|
276
281
|
end
|
@@ -304,6 +309,8 @@ module ZK
|
|
304
309
|
#
|
305
310
|
shutdown_thread = Thread.new do
|
306
311
|
@threadpool.shutdown(10)
|
312
|
+
|
313
|
+
# this will call #close
|
307
314
|
super
|
308
315
|
|
309
316
|
@mutex.synchronize do
|
@@ -313,7 +320,7 @@ module ZK
|
|
313
320
|
end
|
314
321
|
end
|
315
322
|
|
316
|
-
on_tpool ? shutdown_thread : shutdown_thread.join
|
323
|
+
on_tpool ? shutdown_thread : shutdown_thread.join(30)
|
317
324
|
end
|
318
325
|
|
319
326
|
# {see Base#close}
|
data/lib/zk/fork_hook.rb
CHANGED
@@ -48,11 +48,14 @@ module ZK
|
|
48
48
|
def initialize(client, name, root_lock_node=nil)
|
49
49
|
@zk = client
|
50
50
|
@root_lock_node = root_lock_node || Locker.default_root_lock_node
|
51
|
-
|
52
|
-
@
|
53
|
-
@
|
54
|
-
@
|
51
|
+
|
52
|
+
@path = name
|
53
|
+
@locked = false
|
54
|
+
@waiting = false
|
55
|
+
@lock_path = nil
|
56
|
+
@parent_stat = nil
|
55
57
|
@root_lock_path = "#{@root_lock_node}/#{@path.gsub("/", "__")}"
|
58
|
+
|
56
59
|
@mutex = Monitor.new
|
57
60
|
@cond = @mutex.new_cond
|
58
61
|
@node_deletion_watcher = nil
|
@@ -119,19 +122,21 @@ module ZK
|
|
119
122
|
# @return [true] if we held the lock and this method has
|
120
123
|
# unlocked it successfully
|
121
124
|
#
|
122
|
-
# @return [false] we did not own the lock
|
125
|
+
# @return [false] if we did not own the lock.
|
126
|
+
#
|
127
|
+
# @note There is more than one way you might not "own the lock"
|
128
|
+
# see [issue #34](https://github.com/slyphon/zk/issues/34)
|
123
129
|
#
|
124
130
|
def unlock
|
131
|
+
rval = false
|
125
132
|
synchronize do
|
126
133
|
if @locked
|
127
|
-
cleanup_lock_path!
|
134
|
+
rval = cleanup_lock_path!
|
128
135
|
@locked = false
|
129
136
|
@node_deletion_watcher = nil
|
130
|
-
true
|
131
|
-
else
|
132
|
-
false # i know, i know, but be explicit
|
133
137
|
end
|
134
138
|
end
|
139
|
+
rval
|
135
140
|
end
|
136
141
|
|
137
142
|
# (see #unlock)
|
@@ -220,6 +225,7 @@ module ZK
|
|
220
225
|
raise LockAssertionFailedError, "not connected" unless zk.connected?
|
221
226
|
raise LockAssertionFailedError, "lock_path was #{lock_path.inspect}" unless lock_path
|
222
227
|
raise LockAssertionFailedError, "the lock path #{lock_path} did not exist!" unless zk.exists?(lock_path)
|
228
|
+
raise LockAssertionFailedError, "the parent node was replaced!" unless root_lock_path_same?
|
223
229
|
raise LockAssertionFailedError, "we do not actually hold the lock" unless got_lock?
|
224
230
|
end
|
225
231
|
end
|
@@ -248,6 +254,8 @@ module ZK
|
|
248
254
|
end
|
249
255
|
end
|
250
256
|
|
257
|
+
# root_lock_path is /_zklocking/foobar
|
258
|
+
#
|
251
259
|
def create_root_path!
|
252
260
|
zk.mkdir_p(@root_lock_path)
|
253
261
|
end
|
@@ -262,9 +270,14 @@ module ZK
|
|
262
270
|
# prefix is the string that will appear in front of the sequence num,
|
263
271
|
# defaults to 'lock'
|
264
272
|
#
|
273
|
+
# this method also saves the stat of root_lock_path at the time of creation
|
274
|
+
# to ensure we don't accidentally remove a lock we don't own. see
|
275
|
+
# [rule #34](https://github.com/slyphon/zk/issues/34)...er, *issue* #34.
|
276
|
+
#
|
265
277
|
def create_lock_path!(prefix='lock')
|
266
278
|
synchronize do
|
267
|
-
@lock_path = @zk.create("#{root_lock_path}/#{prefix}",
|
279
|
+
@lock_path = @zk.create("#{root_lock_path}/#{prefix}", :mode => :ephemeral_sequential)
|
280
|
+
@parent_stat = @zk.stat(root_lock_path)
|
268
281
|
end
|
269
282
|
|
270
283
|
logger.debug { "got lock path #{@lock_path}" }
|
@@ -274,12 +287,43 @@ module ZK
|
|
274
287
|
retry
|
275
288
|
end
|
276
289
|
|
290
|
+
# if the root_lock_path has the same stat .ctime as the one
|
291
|
+
# we cached when we created our lock path, then we can be sure
|
292
|
+
# that we actually own the lock_path
|
293
|
+
#
|
294
|
+
# see [issue #34](https://github.com/slyphon/zk/issues/34)
|
295
|
+
#
|
296
|
+
def root_lock_path_same?
|
297
|
+
synchronize do
|
298
|
+
return false unless @parent_stat
|
299
|
+
|
300
|
+
cur_stat = zk.stat(root_lock_path)
|
301
|
+
cur_stat.exists? and (cur_stat.ctime == @parent_stat.ctime)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
# we make a best-effort to clean up, this case is rife with race
|
306
|
+
# conditions if there is a lot of contention for the locks, so if we
|
307
|
+
# can't remove a path or if that path happens to not be empty we figure
|
308
|
+
# either we got pwned or that someone else will run this same method
|
309
|
+
# later and get to it
|
310
|
+
#
|
277
311
|
def cleanup_lock_path!
|
278
|
-
|
279
|
-
|
312
|
+
rval = false
|
313
|
+
|
314
|
+
synchronize do
|
315
|
+
if root_lock_path_same?
|
316
|
+
logger.debug { "removing lock path #{@lock_path}" }
|
317
|
+
|
318
|
+
zk.delete(@lock_path, :ignore => :no_node)
|
319
|
+
zk.delete(root_lock_path, :ignore => [:not_empty, :no_node])
|
320
|
+
rval = true
|
321
|
+
end
|
322
|
+
|
323
|
+
@lock_path = @parent_stat = nil
|
324
|
+
end
|
280
325
|
|
281
|
-
|
282
|
-
@lock_path = nil
|
326
|
+
rval
|
283
327
|
end
|
284
328
|
end # LockerBase
|
285
329
|
end # Locker
|
data/lib/zk/version.rb
CHANGED
data/spec/message_queue_spec.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), %w[spec_helper])
|
2
2
|
|
3
3
|
describe ZK::MessageQueue do
|
4
|
+
include_context 'connection opts'
|
4
5
|
|
5
6
|
before(:each) do
|
6
|
-
@zk = ZK.new(
|
7
|
-
@zk2 = ZK.new(
|
7
|
+
@zk = ZK.new(connection_host)
|
8
|
+
@zk2 = ZK.new(connection_host)
|
8
9
|
wait_until{ @zk.connected? && @zk2.connected? }
|
9
10
|
@queue_name = "_specQueue"
|
10
11
|
@consume_queue = @zk.queue(@queue_name)
|
@@ -10,7 +10,7 @@ shared_context 'threaded client connection' do
|
|
10
10
|
before do
|
11
11
|
# logger.debug { "threaded client connection - begin before hook" }
|
12
12
|
|
13
|
-
@connection_string =
|
13
|
+
@connection_string = connection_host
|
14
14
|
@base_path = '/zktests'
|
15
15
|
@zk = ZK::Client::Threaded.new(*connection_args).tap { |z| wait_until { z.connected? } }
|
16
16
|
@threadpool_exception = nil
|
@@ -0,0 +1,53 @@
|
|
1
|
+
shared_context 'locker non-chrooted' do
|
2
|
+
include_context 'connection opts'
|
3
|
+
|
4
|
+
let(:zk) { ZK.new(*connection_args) }
|
5
|
+
let(:zk2) { ZK.new(*connection_args) }
|
6
|
+
let(:zk3) { ZK.new(*connection_args) }
|
7
|
+
|
8
|
+
let(:connections) { [zk, zk2, zk3] }
|
9
|
+
|
10
|
+
let(:path) { "lock_path" }
|
11
|
+
let(:root_lock_path) { "#{ZK::Locker.default_root_lock_node}/#{path}" }
|
12
|
+
|
13
|
+
before do
|
14
|
+
wait_until{ connections.all?(&:connected?) }
|
15
|
+
zk.rm_rf(ZK::Locker.default_root_lock_node)
|
16
|
+
end
|
17
|
+
|
18
|
+
after do
|
19
|
+
connections.each { |c| c.close! }
|
20
|
+
wait_until { !connections.any?(&:connected?) }
|
21
|
+
ZK.open(*connection_args) { |z| z.rm_rf(ZK::Locker.default_root_lock_node) }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
shared_context 'locker chrooted' do
|
26
|
+
include_context 'connection opts'
|
27
|
+
|
28
|
+
let(:chroot_path) { '/_zk_chroot_' }
|
29
|
+
let(:path) { "lock_path" }
|
30
|
+
|
31
|
+
let(:zk) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
|
32
|
+
let(:zk2) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
|
33
|
+
let(:zk3) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
|
34
|
+
let(:connections) { [zk, zk2, zk3] }
|
35
|
+
let(:root_lock_path) { "#{ZK::Locker.default_root_lock_node}/#{path}" }
|
36
|
+
|
37
|
+
before do
|
38
|
+
ZK.open(*connection_args) do |zk|
|
39
|
+
zk.mkdir_p(chroot_path)
|
40
|
+
end
|
41
|
+
|
42
|
+
wait_until{ connections.all?(&:connected?) }
|
43
|
+
end
|
44
|
+
|
45
|
+
after do
|
46
|
+
connections.each { |c| c.close! }
|
47
|
+
wait_until { !connections.any?(&:connected?) }
|
48
|
+
|
49
|
+
ZK.open(*connection_args) do |zk|
|
50
|
+
zk.rm_rf(chroot_path)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# basic shared exmples for locker specs (both exclusive and shared)
|
2
|
+
|
3
|
+
# these assume they're being executed in the 'locker chrooted' or 'locker
|
4
|
+
# non-chrooted' contexts
|
5
|
+
#
|
6
|
+
shared_examples_for 'LockerBase#assert!' do
|
7
|
+
it %[should raise LockAssertionFailedError if its connection is no longer connected?] do
|
8
|
+
zk.close!
|
9
|
+
lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
|
10
|
+
end
|
11
|
+
|
12
|
+
it %[should raise LockAssertionFailedError if locked? is false] do
|
13
|
+
locker.should_not be_locked
|
14
|
+
lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
|
15
|
+
end
|
16
|
+
|
17
|
+
it %[should raise LockAssertionFailedError lock_path does not exist] do
|
18
|
+
locker.lock
|
19
|
+
lambda { locker.assert! }.should_not raise_error
|
20
|
+
|
21
|
+
zk.delete(locker.lock_path)
|
22
|
+
lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
|
23
|
+
end
|
24
|
+
|
25
|
+
it %[should raise LockAssertionFailedError if our parent node's ctime is different than what we think it should be] do
|
26
|
+
locker.lock.should be_true
|
27
|
+
|
28
|
+
zk.rm_rf(File.dirname(locker.lock_path)) # remove the parent node
|
29
|
+
zk.mkdir_p(locker.lock_path)
|
30
|
+
|
31
|
+
lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
shared_examples_for 'LockerBase#unlock' do
|
36
|
+
it %[should not delete a lock path it does not own] do
|
37
|
+
locker.lock.should be_true
|
38
|
+
|
39
|
+
zk.rm_rf(File.dirname(locker.lock_path)) # remove the parent node
|
40
|
+
zk.mkdir_p(File.dirname(locker.lock_path))
|
41
|
+
|
42
|
+
locker2.lock.should be_true
|
43
|
+
|
44
|
+
locker2.lock_path.should == locker.lock_path
|
45
|
+
|
46
|
+
lambda { locker2.assert! }.should_not raise_error
|
47
|
+
|
48
|
+
lock_path = locker.lock_path
|
49
|
+
|
50
|
+
locker.unlock.should be_false
|
51
|
+
|
52
|
+
zk.stat(lock_path).should exist
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
data/spec/support/logging.rb
CHANGED
@@ -1,35 +1,49 @@
|
|
1
1
|
module ZK
|
2
2
|
TEST_LOG_PATH = File.join(ZK::ZK_ROOT, 'test.log')
|
3
|
-
end
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
appender.
|
14
|
-
appender.
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
4
|
+
def self.logging_gem_setup
|
5
|
+
layout = ::Logging.layouts.pattern(
|
6
|
+
:pattern => '%.1l, [%d #%p] %30.30c{2}: %m\n',
|
7
|
+
:date_pattern => '%Y-%m-%d %H:%M:%S.%6N'
|
8
|
+
)
|
9
|
+
|
10
|
+
|
11
|
+
appender = ENV['ZK_DEBUG'] ? ::Logging.appenders.stderr : ::Logging.appenders.file(ZK::TEST_LOG_PATH)
|
12
|
+
appender.layout = layout
|
13
|
+
appender.immediate_at = "debug,info,warn,error,fatal"
|
14
|
+
# appender.auto_flushing = true
|
15
|
+
appender.auto_flushing = 25
|
16
|
+
appender.flush_period = 5
|
17
|
+
|
18
|
+
%w[ZK ClientForker spec Zookeeper].each do |name|
|
19
|
+
::Logging.logger[name].tap do |log|
|
20
|
+
log.appenders = [appender]
|
21
|
+
log.level = :debug
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# this logger is kinda noisy
|
26
|
+
::Logging.logger['ZK::EventHandler'].level = :info
|
27
|
+
|
28
|
+
Zookeeper.logger = ::Logging.logger['Zookeeper']
|
29
|
+
Zookeeper.logger.level = ENV['ZOOKEEPER_DEBUG'] ? :debug : :warn
|
30
|
+
|
31
|
+
ZK::ForkHook.after_fork_in_child { ::Logging.reopen }
|
20
32
|
end
|
21
|
-
end
|
22
33
|
|
23
|
-
# this logger is kinda noisy
|
24
|
-
Logging.logger['ZK::EventHandler'].level = :info
|
25
34
|
|
26
|
-
|
27
|
-
|
35
|
+
def self.stdlib_logger_setup
|
36
|
+
require 'logger'
|
37
|
+
log = ::Logger.new($stderr).tap {|l| l.level = ::Logger::DEBUG }
|
38
|
+
ZK.logger = log
|
39
|
+
Zookeeper.logger = log
|
40
|
+
end
|
41
|
+
end
|
28
42
|
|
29
|
-
ZK
|
43
|
+
ZK.logging_gem_setup
|
44
|
+
# ZK.stdlib_logger_setup
|
30
45
|
|
31
46
|
# Zookeeper.logger = ZK.logger.clone_new_log(:progname => 'zoo')
|
32
|
-
|
33
47
|
# Zookeeper.logger = ZK.logger
|
34
48
|
# Zookeeper.set_debug_level(4)
|
35
49
|
|