zk 1.5.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +4 -0
- data/Guardfile +9 -5
- data/README.markdown +1 -1
- data/RELEASES.markdown +8 -0
- data/lib/zk/client/threaded.rb +12 -5
- data/lib/zk/fork_hook.rb +3 -0
- data/lib/zk/locker/locker_base.rb +58 -14
- data/lib/zk/version.rb +1 -1
- data/spec/message_queue_spec.rb +3 -2
- data/spec/shared/client_contexts.rb +1 -1
- data/spec/shared/locker_contexts.rb +53 -0
- data/spec/shared/locker_examples.rb +55 -0
- data/spec/support/logging.rb +37 -23
- data/spec/zk/locker/exclusive_locker_spec.rb +122 -0
- data/spec/zk/locker/locker_basic_spec.rb +79 -0
- data/spec/zk/locker/shared_exclusive_integration_spec.rb +157 -0
- data/spec/zk/locker/shared_locker_spec.rb +137 -0
- data/spec/zk/pool_spec.rb +6 -3
- data/spec/zk/watch_spec.rb +0 -1
- data/spec/zk/zookeeper_spec.rb +2 -1
- data/zk.gemspec +1 -1
- metadata +19 -9
- data/spec/zk/locker_spec.rb +0 -552
data/Gemfile
CHANGED
data/Guardfile
CHANGED
@@ -17,14 +17,18 @@ guard 'rspec', :version => 2 do
|
|
17
17
|
|
18
18
|
watch(%r{^lib/(.+)\.rb$}) do |m|
|
19
19
|
case m[1]
|
20
|
-
when
|
20
|
+
when 'zk/event_handler'
|
21
21
|
"spec/zk/watch_spec.rb"
|
22
|
-
|
22
|
+
|
23
|
+
when 'zk/client/threaded'
|
23
24
|
["spec/zk/client_spec.rb", "spec/zk/zookeeper_spec.rb"]
|
24
|
-
|
25
|
-
|
26
|
-
|
25
|
+
|
26
|
+
when %r{^(?:zk/locker/locker_base|spec/shared/locker)}
|
27
|
+
Dir["spec/zk/locker/*_spec.rb"]
|
28
|
+
|
29
|
+
when 'zk' # .rb
|
27
30
|
'spec' # run all tests
|
31
|
+
|
28
32
|
else
|
29
33
|
"spec/#{m[1]}_spec.rb"
|
30
34
|
end
|
data/README.markdown
CHANGED
@@ -67,7 +67,7 @@ In addition to all of that, I would like to think that the public API the ZK::Cl
|
|
67
67
|
## NEWS ##
|
68
68
|
### v1.5.1 ###
|
69
69
|
|
70
|
-
* Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
|
70
|
+
* Added a `:retry_duration` option to the Threaded client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
|
71
71
|
|
72
72
|
* Small fork-hook implementation fix. Previously we were using WeakRefs so that hooks would not prevent an object from being garbage collected. This has been replaced with a finalizer which is more deterministic.
|
73
73
|
|
data/RELEASES.markdown
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
This file notes feature differences and bugfixes contained between releases.
|
2
2
|
|
3
|
+
### v1.5.2 ###
|
4
|
+
|
5
|
+
* Fix locker cleanup code to avoid a nasty race when a session is lost, see [issue #34](https://github.com/slyphon/zk/issues/34)
|
6
|
+
|
7
|
+
* Fix potential deadlock in ForkHook code so the mutex is unlocked in the case of an exception
|
8
|
+
|
9
|
+
* Do not hang forever when shutting down and the shutdown thread does not exit (wait 30 seconds).
|
10
|
+
|
3
11
|
### v1.5.1 ###
|
4
12
|
|
5
13
|
* Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
|
data/lib/zk/client/threaded.rb
CHANGED
@@ -171,6 +171,8 @@ module ZK
|
|
171
171
|
|
172
172
|
@retry_duration = opts.fetch(:retry_duration, nil).to_i
|
173
173
|
|
174
|
+
yield self if block_given?
|
175
|
+
|
174
176
|
@fork_subs = [
|
175
177
|
ForkHook.prepare_for_fork(method(:pause_before_fork_in_parent)),
|
176
178
|
ForkHook.after_fork_in_parent(method(:resume_after_fork_in_parent)),
|
@@ -179,11 +181,10 @@ module ZK
|
|
179
181
|
|
180
182
|
ObjectSpace.define_finalizer(self, self.class.finalizer(@fork_subs))
|
181
183
|
|
182
|
-
yield self if block_given?
|
183
|
-
|
184
184
|
connect if opts.fetch(:connect, true)
|
185
185
|
end
|
186
186
|
|
187
|
+
# @private
|
187
188
|
def self.finalizer(hooks)
|
188
189
|
proc { hooks.each(&:unregister) }
|
189
190
|
end
|
@@ -259,7 +260,11 @@ module ZK
|
|
259
260
|
@cond.broadcast
|
260
261
|
end
|
261
262
|
|
262
|
-
|
263
|
+
# the compact is here because the @cnx *may* be nil when this callback is fired by the
|
264
|
+
# ForkHook (in the case of ZK.open). The race is between the GC calling the finalizer
|
265
|
+
[@event_handler, @threadpool, @cnx].compact.each(&:pause_before_fork_in_parent)
|
266
|
+
ensure
|
267
|
+
logger.debug { "#{self.class}##{__method__} returning" }
|
263
268
|
end
|
264
269
|
|
265
270
|
# @private
|
@@ -270,7 +275,7 @@ module ZK
|
|
270
275
|
|
271
276
|
logger.debug { "#{self.class}##{__method__}" }
|
272
277
|
|
273
|
-
[@cnx, @event_handler, @threadpool].each(&:resume_after_fork_in_parent)
|
278
|
+
[@cnx, @event_handler, @threadpool].compact.each(&:resume_after_fork_in_parent)
|
274
279
|
|
275
280
|
@cond.broadcast
|
276
281
|
end
|
@@ -304,6 +309,8 @@ module ZK
|
|
304
309
|
#
|
305
310
|
shutdown_thread = Thread.new do
|
306
311
|
@threadpool.shutdown(10)
|
312
|
+
|
313
|
+
# this will call #close
|
307
314
|
super
|
308
315
|
|
309
316
|
@mutex.synchronize do
|
@@ -313,7 +320,7 @@ module ZK
|
|
313
320
|
end
|
314
321
|
end
|
315
322
|
|
316
|
-
on_tpool ? shutdown_thread : shutdown_thread.join
|
323
|
+
on_tpool ? shutdown_thread : shutdown_thread.join(30)
|
317
324
|
end
|
318
325
|
|
319
326
|
# {see Base#close}
|
data/lib/zk/fork_hook.rb
CHANGED
@@ -48,11 +48,14 @@ module ZK
|
|
48
48
|
def initialize(client, name, root_lock_node=nil)
|
49
49
|
@zk = client
|
50
50
|
@root_lock_node = root_lock_node || Locker.default_root_lock_node
|
51
|
-
|
52
|
-
@
|
53
|
-
@
|
54
|
-
@
|
51
|
+
|
52
|
+
@path = name
|
53
|
+
@locked = false
|
54
|
+
@waiting = false
|
55
|
+
@lock_path = nil
|
56
|
+
@parent_stat = nil
|
55
57
|
@root_lock_path = "#{@root_lock_node}/#{@path.gsub("/", "__")}"
|
58
|
+
|
56
59
|
@mutex = Monitor.new
|
57
60
|
@cond = @mutex.new_cond
|
58
61
|
@node_deletion_watcher = nil
|
@@ -119,19 +122,21 @@ module ZK
|
|
119
122
|
# @return [true] if we held the lock and this method has
|
120
123
|
# unlocked it successfully
|
121
124
|
#
|
122
|
-
# @return [false] we did not own the lock
|
125
|
+
# @return [false] if we did not own the lock.
|
126
|
+
#
|
127
|
+
# @note There is more than one way you might not "own the lock"
|
128
|
+
# see [issue #34](https://github.com/slyphon/zk/issues/34)
|
123
129
|
#
|
124
130
|
def unlock
|
131
|
+
rval = false
|
125
132
|
synchronize do
|
126
133
|
if @locked
|
127
|
-
cleanup_lock_path!
|
134
|
+
rval = cleanup_lock_path!
|
128
135
|
@locked = false
|
129
136
|
@node_deletion_watcher = nil
|
130
|
-
true
|
131
|
-
else
|
132
|
-
false # i know, i know, but be explicit
|
133
137
|
end
|
134
138
|
end
|
139
|
+
rval
|
135
140
|
end
|
136
141
|
|
137
142
|
# (see #unlock)
|
@@ -220,6 +225,7 @@ module ZK
|
|
220
225
|
raise LockAssertionFailedError, "not connected" unless zk.connected?
|
221
226
|
raise LockAssertionFailedError, "lock_path was #{lock_path.inspect}" unless lock_path
|
222
227
|
raise LockAssertionFailedError, "the lock path #{lock_path} did not exist!" unless zk.exists?(lock_path)
|
228
|
+
raise LockAssertionFailedError, "the parent node was replaced!" unless root_lock_path_same?
|
223
229
|
raise LockAssertionFailedError, "we do not actually hold the lock" unless got_lock?
|
224
230
|
end
|
225
231
|
end
|
@@ -248,6 +254,8 @@ module ZK
|
|
248
254
|
end
|
249
255
|
end
|
250
256
|
|
257
|
+
# root_lock_path is /_zklocking/foobar
|
258
|
+
#
|
251
259
|
def create_root_path!
|
252
260
|
zk.mkdir_p(@root_lock_path)
|
253
261
|
end
|
@@ -262,9 +270,14 @@ module ZK
|
|
262
270
|
# prefix is the string that will appear in front of the sequence num,
|
263
271
|
# defaults to 'lock'
|
264
272
|
#
|
273
|
+
# this method also saves the stat of root_lock_path at the time of creation
|
274
|
+
# to ensure we don't accidentally remove a lock we don't own. see
|
275
|
+
# [rule #34](https://github.com/slyphon/zk/issues/34)...er, *issue* #34.
|
276
|
+
#
|
265
277
|
def create_lock_path!(prefix='lock')
|
266
278
|
synchronize do
|
267
|
-
@lock_path = @zk.create("#{root_lock_path}/#{prefix}",
|
279
|
+
@lock_path = @zk.create("#{root_lock_path}/#{prefix}", :mode => :ephemeral_sequential)
|
280
|
+
@parent_stat = @zk.stat(root_lock_path)
|
268
281
|
end
|
269
282
|
|
270
283
|
logger.debug { "got lock path #{@lock_path}" }
|
@@ -274,12 +287,43 @@ module ZK
|
|
274
287
|
retry
|
275
288
|
end
|
276
289
|
|
290
|
+
# if the root_lock_path has the same stat .ctime as the one
|
291
|
+
# we cached when we created our lock path, then we can be sure
|
292
|
+
# that we actually own the lock_path
|
293
|
+
#
|
294
|
+
# see [issue #34](https://github.com/slyphon/zk/issues/34)
|
295
|
+
#
|
296
|
+
def root_lock_path_same?
|
297
|
+
synchronize do
|
298
|
+
return false unless @parent_stat
|
299
|
+
|
300
|
+
cur_stat = zk.stat(root_lock_path)
|
301
|
+
cur_stat.exists? and (cur_stat.ctime == @parent_stat.ctime)
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
# we make a best-effort to clean up, this case is rife with race
|
306
|
+
# conditions if there is a lot of contention for the locks, so if we
|
307
|
+
# can't remove a path or if that path happens to not be empty we figure
|
308
|
+
# either we got pwned or that someone else will run this same method
|
309
|
+
# later and get to it
|
310
|
+
#
|
277
311
|
def cleanup_lock_path!
|
278
|
-
|
279
|
-
|
312
|
+
rval = false
|
313
|
+
|
314
|
+
synchronize do
|
315
|
+
if root_lock_path_same?
|
316
|
+
logger.debug { "removing lock path #{@lock_path}" }
|
317
|
+
|
318
|
+
zk.delete(@lock_path, :ignore => :no_node)
|
319
|
+
zk.delete(root_lock_path, :ignore => [:not_empty, :no_node])
|
320
|
+
rval = true
|
321
|
+
end
|
322
|
+
|
323
|
+
@lock_path = @parent_stat = nil
|
324
|
+
end
|
280
325
|
|
281
|
-
|
282
|
-
@lock_path = nil
|
326
|
+
rval
|
283
327
|
end
|
284
328
|
end # LockerBase
|
285
329
|
end # Locker
|
data/lib/zk/version.rb
CHANGED
data/spec/message_queue_spec.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), %w[spec_helper])
|
2
2
|
|
3
3
|
describe ZK::MessageQueue do
|
4
|
+
include_context 'connection opts'
|
4
5
|
|
5
6
|
before(:each) do
|
6
|
-
@zk = ZK.new(
|
7
|
-
@zk2 = ZK.new(
|
7
|
+
@zk = ZK.new(connection_host)
|
8
|
+
@zk2 = ZK.new(connection_host)
|
8
9
|
wait_until{ @zk.connected? && @zk2.connected? }
|
9
10
|
@queue_name = "_specQueue"
|
10
11
|
@consume_queue = @zk.queue(@queue_name)
|
@@ -10,7 +10,7 @@ shared_context 'threaded client connection' do
|
|
10
10
|
before do
|
11
11
|
# logger.debug { "threaded client connection - begin before hook" }
|
12
12
|
|
13
|
-
@connection_string =
|
13
|
+
@connection_string = connection_host
|
14
14
|
@base_path = '/zktests'
|
15
15
|
@zk = ZK::Client::Threaded.new(*connection_args).tap { |z| wait_until { z.connected? } }
|
16
16
|
@threadpool_exception = nil
|
@@ -0,0 +1,53 @@
|
|
1
|
+
shared_context 'locker non-chrooted' do
|
2
|
+
include_context 'connection opts'
|
3
|
+
|
4
|
+
let(:zk) { ZK.new(*connection_args) }
|
5
|
+
let(:zk2) { ZK.new(*connection_args) }
|
6
|
+
let(:zk3) { ZK.new(*connection_args) }
|
7
|
+
|
8
|
+
let(:connections) { [zk, zk2, zk3] }
|
9
|
+
|
10
|
+
let(:path) { "lock_path" }
|
11
|
+
let(:root_lock_path) { "#{ZK::Locker.default_root_lock_node}/#{path}" }
|
12
|
+
|
13
|
+
before do
|
14
|
+
wait_until{ connections.all?(&:connected?) }
|
15
|
+
zk.rm_rf(ZK::Locker.default_root_lock_node)
|
16
|
+
end
|
17
|
+
|
18
|
+
after do
|
19
|
+
connections.each { |c| c.close! }
|
20
|
+
wait_until { !connections.any?(&:connected?) }
|
21
|
+
ZK.open(*connection_args) { |z| z.rm_rf(ZK::Locker.default_root_lock_node) }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
shared_context 'locker chrooted' do
|
26
|
+
include_context 'connection opts'
|
27
|
+
|
28
|
+
let(:chroot_path) { '/_zk_chroot_' }
|
29
|
+
let(:path) { "lock_path" }
|
30
|
+
|
31
|
+
let(:zk) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
|
32
|
+
let(:zk2) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
|
33
|
+
let(:zk3) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
|
34
|
+
let(:connections) { [zk, zk2, zk3] }
|
35
|
+
let(:root_lock_path) { "#{ZK::Locker.default_root_lock_node}/#{path}" }
|
36
|
+
|
37
|
+
before do
|
38
|
+
ZK.open(*connection_args) do |zk|
|
39
|
+
zk.mkdir_p(chroot_path)
|
40
|
+
end
|
41
|
+
|
42
|
+
wait_until{ connections.all?(&:connected?) }
|
43
|
+
end
|
44
|
+
|
45
|
+
after do
|
46
|
+
connections.each { |c| c.close! }
|
47
|
+
wait_until { !connections.any?(&:connected?) }
|
48
|
+
|
49
|
+
ZK.open(*connection_args) do |zk|
|
50
|
+
zk.rm_rf(chroot_path)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# basic shared exmples for locker specs (both exclusive and shared)
|
2
|
+
|
3
|
+
# these assume they're being executed in the 'locker chrooted' or 'locker
|
4
|
+
# non-chrooted' contexts
|
5
|
+
#
|
6
|
+
shared_examples_for 'LockerBase#assert!' do
|
7
|
+
it %[should raise LockAssertionFailedError if its connection is no longer connected?] do
|
8
|
+
zk.close!
|
9
|
+
lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
|
10
|
+
end
|
11
|
+
|
12
|
+
it %[should raise LockAssertionFailedError if locked? is false] do
|
13
|
+
locker.should_not be_locked
|
14
|
+
lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
|
15
|
+
end
|
16
|
+
|
17
|
+
it %[should raise LockAssertionFailedError lock_path does not exist] do
|
18
|
+
locker.lock
|
19
|
+
lambda { locker.assert! }.should_not raise_error
|
20
|
+
|
21
|
+
zk.delete(locker.lock_path)
|
22
|
+
lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
|
23
|
+
end
|
24
|
+
|
25
|
+
it %[should raise LockAssertionFailedError if our parent node's ctime is different than what we think it should be] do
|
26
|
+
locker.lock.should be_true
|
27
|
+
|
28
|
+
zk.rm_rf(File.dirname(locker.lock_path)) # remove the parent node
|
29
|
+
zk.mkdir_p(locker.lock_path)
|
30
|
+
|
31
|
+
lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
shared_examples_for 'LockerBase#unlock' do
|
36
|
+
it %[should not delete a lock path it does not own] do
|
37
|
+
locker.lock.should be_true
|
38
|
+
|
39
|
+
zk.rm_rf(File.dirname(locker.lock_path)) # remove the parent node
|
40
|
+
zk.mkdir_p(File.dirname(locker.lock_path))
|
41
|
+
|
42
|
+
locker2.lock.should be_true
|
43
|
+
|
44
|
+
locker2.lock_path.should == locker.lock_path
|
45
|
+
|
46
|
+
lambda { locker2.assert! }.should_not raise_error
|
47
|
+
|
48
|
+
lock_path = locker.lock_path
|
49
|
+
|
50
|
+
locker.unlock.should be_false
|
51
|
+
|
52
|
+
zk.stat(lock_path).should exist
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
data/spec/support/logging.rb
CHANGED
@@ -1,35 +1,49 @@
|
|
1
1
|
module ZK
|
2
2
|
TEST_LOG_PATH = File.join(ZK::ZK_ROOT, 'test.log')
|
3
|
-
end
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
appender.
|
14
|
-
appender.
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
4
|
+
def self.logging_gem_setup
|
5
|
+
layout = ::Logging.layouts.pattern(
|
6
|
+
:pattern => '%.1l, [%d #%p] %30.30c{2}: %m\n',
|
7
|
+
:date_pattern => '%Y-%m-%d %H:%M:%S.%6N'
|
8
|
+
)
|
9
|
+
|
10
|
+
|
11
|
+
appender = ENV['ZK_DEBUG'] ? ::Logging.appenders.stderr : ::Logging.appenders.file(ZK::TEST_LOG_PATH)
|
12
|
+
appender.layout = layout
|
13
|
+
appender.immediate_at = "debug,info,warn,error,fatal"
|
14
|
+
# appender.auto_flushing = true
|
15
|
+
appender.auto_flushing = 25
|
16
|
+
appender.flush_period = 5
|
17
|
+
|
18
|
+
%w[ZK ClientForker spec Zookeeper].each do |name|
|
19
|
+
::Logging.logger[name].tap do |log|
|
20
|
+
log.appenders = [appender]
|
21
|
+
log.level = :debug
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# this logger is kinda noisy
|
26
|
+
::Logging.logger['ZK::EventHandler'].level = :info
|
27
|
+
|
28
|
+
Zookeeper.logger = ::Logging.logger['Zookeeper']
|
29
|
+
Zookeeper.logger.level = ENV['ZOOKEEPER_DEBUG'] ? :debug : :warn
|
30
|
+
|
31
|
+
ZK::ForkHook.after_fork_in_child { ::Logging.reopen }
|
20
32
|
end
|
21
|
-
end
|
22
33
|
|
23
|
-
# this logger is kinda noisy
|
24
|
-
Logging.logger['ZK::EventHandler'].level = :info
|
25
34
|
|
26
|
-
|
27
|
-
|
35
|
+
def self.stdlib_logger_setup
|
36
|
+
require 'logger'
|
37
|
+
log = ::Logger.new($stderr).tap {|l| l.level = ::Logger::DEBUG }
|
38
|
+
ZK.logger = log
|
39
|
+
Zookeeper.logger = log
|
40
|
+
end
|
41
|
+
end
|
28
42
|
|
29
|
-
ZK
|
43
|
+
ZK.logging_gem_setup
|
44
|
+
# ZK.stdlib_logger_setup
|
30
45
|
|
31
46
|
# Zookeeper.logger = ZK.logger.clone_new_log(:progname => 'zoo')
|
32
|
-
|
33
47
|
# Zookeeper.logger = ZK.logger
|
34
48
|
# Zookeeper.set_debug_level(4)
|
35
49
|
|