zk 1.5.1 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -33,6 +33,10 @@ group :development do
33
33
  gem 'guard-shell', :require => false
34
34
  gem 'guard-bundler', :require => false
35
35
  gem 'growl', :require => false
36
+
37
+ if RUBY_PLATFORM =~ /darwin/i
38
+ gem 'rb-readline', :platform => :ruby
39
+ end
36
40
  end
37
41
 
38
42
  group :test do
data/Guardfile CHANGED
@@ -17,14 +17,18 @@ guard 'rspec', :version => 2 do
17
17
 
18
18
  watch(%r{^lib/(.+)\.rb$}) do |m|
19
19
  case m[1]
20
- when %r{^zk/event_handler$}
20
+ when 'zk/event_handler'
21
21
  "spec/zk/watch_spec.rb"
22
- when %r{^zk/client/threaded.rb$}
22
+
23
+ when 'zk/client/threaded'
23
24
  ["spec/zk/client_spec.rb", "spec/zk/zookeeper_spec.rb"]
24
- when %r{^zk/locker/}
25
- "spec/zk/locker_spec.rb"
26
- when %r{^zk\.rb$}
25
+
26
+ when %r{^(?:zk/locker/locker_base|spec/shared/locker)}
27
+ Dir["spec/zk/locker/*_spec.rb"]
28
+
29
+ when 'zk' # .rb
27
30
  'spec' # run all tests
31
+
28
32
  else
29
33
  "spec/#{m[1]}_spec.rb"
30
34
  end
data/README.markdown CHANGED
@@ -67,7 +67,7 @@ In addition to all of that, I would like to think that the public API the ZK::Cl
67
67
  ## NEWS ##
68
68
  ### v1.5.1 ###
69
69
 
70
- * Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
70
+ * Added a `:retry_duration` option to the Threaded client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
71
71
 
72
72
  * Small fork-hook implementation fix. Previously we were using WeakRefs so that hooks would not prevent an object from being garbage collected. This has been replaced with a finalizer which is more deterministic.
73
73
 
data/RELEASES.markdown CHANGED
@@ -1,5 +1,13 @@
1
1
  This file notes feature differences and bugfixes contained between releases.
2
2
 
3
+ ### v1.5.2 ###
4
+
5
+ * Fix locker cleanup code to avoid a nasty race when a session is lost, see [issue #34](https://github.com/slyphon/zk/issues/34)
6
+
7
+ * Fix potential deadlock in ForkHook code so the mutex is unlocked in the case of an exception
8
+
9
+ * Do not hang forever when shutting down and the shutdown thread does not exit (wait 30 seconds).
10
+
3
11
  ### v1.5.1 ###
4
12
 
5
13
  * Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
@@ -171,6 +171,8 @@ module ZK
171
171
 
172
172
  @retry_duration = opts.fetch(:retry_duration, nil).to_i
173
173
 
174
+ yield self if block_given?
175
+
174
176
  @fork_subs = [
175
177
  ForkHook.prepare_for_fork(method(:pause_before_fork_in_parent)),
176
178
  ForkHook.after_fork_in_parent(method(:resume_after_fork_in_parent)),
@@ -179,11 +181,10 @@ module ZK
179
181
 
180
182
  ObjectSpace.define_finalizer(self, self.class.finalizer(@fork_subs))
181
183
 
182
- yield self if block_given?
183
-
184
184
  connect if opts.fetch(:connect, true)
185
185
  end
186
186
 
187
+ # @private
187
188
  def self.finalizer(hooks)
188
189
  proc { hooks.each(&:unregister) }
189
190
  end
@@ -259,7 +260,11 @@ module ZK
259
260
  @cond.broadcast
260
261
  end
261
262
 
262
- [@event_handler, @threadpool, @cnx].each(&:pause_before_fork_in_parent)
263
+ # the compact is here because the @cnx *may* be nil when this callback is fired by the
264
+ # ForkHook (in the case of ZK.open). The race is between the GC calling the finalizer
265
+ [@event_handler, @threadpool, @cnx].compact.each(&:pause_before_fork_in_parent)
266
+ ensure
267
+ logger.debug { "#{self.class}##{__method__} returning" }
263
268
  end
264
269
 
265
270
  # @private
@@ -270,7 +275,7 @@ module ZK
270
275
 
271
276
  logger.debug { "#{self.class}##{__method__}" }
272
277
 
273
- [@cnx, @event_handler, @threadpool].each(&:resume_after_fork_in_parent)
278
+ [@cnx, @event_handler, @threadpool].compact.each(&:resume_after_fork_in_parent)
274
279
 
275
280
  @cond.broadcast
276
281
  end
@@ -304,6 +309,8 @@ module ZK
304
309
  #
305
310
  shutdown_thread = Thread.new do
306
311
  @threadpool.shutdown(10)
312
+
313
+ # this will call #close
307
314
  super
308
315
 
309
316
  @mutex.synchronize do
@@ -313,7 +320,7 @@ module ZK
313
320
  end
314
321
  end
315
322
 
316
- on_tpool ? shutdown_thread : shutdown_thread.join
323
+ on_tpool ? shutdown_thread : shutdown_thread.join(30)
317
324
  end
318
325
 
319
326
  # {see Base#close}
data/lib/zk/fork_hook.rb CHANGED
@@ -18,6 +18,9 @@ module ZK
18
18
  @mutex.lock
19
19
  logger.debug { "#{__method__}" }
20
20
  safe_call(@hooks[:prepare])
21
+ rescue Exception => e
22
+ @mutex.unlock rescue nil # if something goes wrong in a hook, then release the lock
23
+ raise e
21
24
  end
22
25
 
23
26
  # @private
@@ -48,11 +48,14 @@ module ZK
48
48
  def initialize(client, name, root_lock_node=nil)
49
49
  @zk = client
50
50
  @root_lock_node = root_lock_node || Locker.default_root_lock_node
51
- @path = name
52
- @locked = false
53
- @waiting = false
54
- @lock_path = nil
51
+
52
+ @path = name
53
+ @locked = false
54
+ @waiting = false
55
+ @lock_path = nil
56
+ @parent_stat = nil
55
57
  @root_lock_path = "#{@root_lock_node}/#{@path.gsub("/", "__")}"
58
+
56
59
  @mutex = Monitor.new
57
60
  @cond = @mutex.new_cond
58
61
  @node_deletion_watcher = nil
@@ -119,19 +122,21 @@ module ZK
119
122
  # @return [true] if we held the lock and this method has
120
123
  # unlocked it successfully
121
124
  #
122
- # @return [false] we did not own the lock
125
+ # @return [false] if we did not own the lock.
126
+ #
127
+ # @note There is more than one way you might not "own the lock"
128
+ # see [issue #34](https://github.com/slyphon/zk/issues/34)
123
129
  #
124
130
  def unlock
131
+ rval = false
125
132
  synchronize do
126
133
  if @locked
127
- cleanup_lock_path!
134
+ rval = cleanup_lock_path!
128
135
  @locked = false
129
136
  @node_deletion_watcher = nil
130
- true
131
- else
132
- false # i know, i know, but be explicit
133
137
  end
134
138
  end
139
+ rval
135
140
  end
136
141
 
137
142
  # (see #unlock)
@@ -220,6 +225,7 @@ module ZK
220
225
  raise LockAssertionFailedError, "not connected" unless zk.connected?
221
226
  raise LockAssertionFailedError, "lock_path was #{lock_path.inspect}" unless lock_path
222
227
  raise LockAssertionFailedError, "the lock path #{lock_path} did not exist!" unless zk.exists?(lock_path)
228
+ raise LockAssertionFailedError, "the parent node was replaced!" unless root_lock_path_same?
223
229
  raise LockAssertionFailedError, "we do not actually hold the lock" unless got_lock?
224
230
  end
225
231
  end
@@ -248,6 +254,8 @@ module ZK
248
254
  end
249
255
  end
250
256
 
257
+ # root_lock_path is /_zklocking/foobar
258
+ #
251
259
  def create_root_path!
252
260
  zk.mkdir_p(@root_lock_path)
253
261
  end
@@ -262,9 +270,14 @@ module ZK
262
270
  # prefix is the string that will appear in front of the sequence num,
263
271
  # defaults to 'lock'
264
272
  #
273
+ # this method also saves the stat of root_lock_path at the time of creation
274
+ # to ensure we don't accidentally remove a lock we don't own. see
275
+ # [rule #34](https://github.com/slyphon/zk/issues/34)...er, *issue* #34.
276
+ #
265
277
  def create_lock_path!(prefix='lock')
266
278
  synchronize do
267
- @lock_path = @zk.create("#{root_lock_path}/#{prefix}", "", :mode => :ephemeral_sequential)
279
+ @lock_path = @zk.create("#{root_lock_path}/#{prefix}", :mode => :ephemeral_sequential)
280
+ @parent_stat = @zk.stat(root_lock_path)
268
281
  end
269
282
 
270
283
  logger.debug { "got lock path #{@lock_path}" }
@@ -274,12 +287,43 @@ module ZK
274
287
  retry
275
288
  end
276
289
 
290
+ # if the root_lock_path has the same stat .ctime as the one
291
+ # we cached when we created our lock path, then we can be sure
292
+ # that we actually own the lock_path
293
+ #
294
+ # see [issue #34](https://github.com/slyphon/zk/issues/34)
295
+ #
296
+ def root_lock_path_same?
297
+ synchronize do
298
+ return false unless @parent_stat
299
+
300
+ cur_stat = zk.stat(root_lock_path)
301
+ cur_stat.exists? and (cur_stat.ctime == @parent_stat.ctime)
302
+ end
303
+ end
304
+
305
+ # we make a best-effort to clean up, this case is rife with race
306
+ # conditions if there is a lot of contention for the locks, so if we
307
+ # can't remove a path or if that path happens to not be empty we figure
308
+ # either we got pwned or that someone else will run this same method
309
+ # later and get to it
310
+ #
277
311
  def cleanup_lock_path!
278
- logger.debug { "removing lock path #{@lock_path}" }
279
- zk.delete(@lock_path)
312
+ rval = false
313
+
314
+ synchronize do
315
+ if root_lock_path_same?
316
+ logger.debug { "removing lock path #{@lock_path}" }
317
+
318
+ zk.delete(@lock_path, :ignore => :no_node)
319
+ zk.delete(root_lock_path, :ignore => [:not_empty, :no_node])
320
+ rval = true
321
+ end
322
+
323
+ @lock_path = @parent_stat = nil
324
+ end
280
325
 
281
- zk.delete(root_lock_path, :ignore => :not_empty)
282
- @lock_path = nil
326
+ rval
283
327
  end
284
328
  end # LockerBase
285
329
  end # Locker
data/lib/zk/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module ZK
2
- VERSION = "1.5.1"
2
+ VERSION = "1.5.2"
3
3
  end
@@ -1,10 +1,11 @@
1
1
  require File.join(File.dirname(__FILE__), %w[spec_helper])
2
2
 
3
3
  describe ZK::MessageQueue do
4
+ include_context 'connection opts'
4
5
 
5
6
  before(:each) do
6
- @zk = ZK.new("localhost:#{ZK.test_port}")
7
- @zk2 = ZK.new("localhost:#{ZK.test_port}")
7
+ @zk = ZK.new(connection_host)
8
+ @zk2 = ZK.new(connection_host)
8
9
  wait_until{ @zk.connected? && @zk2.connected? }
9
10
  @queue_name = "_specQueue"
10
11
  @consume_queue = @zk.queue(@queue_name)
@@ -10,7 +10,7 @@ shared_context 'threaded client connection' do
10
10
  before do
11
11
  # logger.debug { "threaded client connection - begin before hook" }
12
12
 
13
- @connection_string = "localhost:#{ZK.test_port}"
13
+ @connection_string = connection_host
14
14
  @base_path = '/zktests'
15
15
  @zk = ZK::Client::Threaded.new(*connection_args).tap { |z| wait_until { z.connected? } }
16
16
  @threadpool_exception = nil
@@ -0,0 +1,53 @@
1
+ shared_context 'locker non-chrooted' do
2
+ include_context 'connection opts'
3
+
4
+ let(:zk) { ZK.new(*connection_args) }
5
+ let(:zk2) { ZK.new(*connection_args) }
6
+ let(:zk3) { ZK.new(*connection_args) }
7
+
8
+ let(:connections) { [zk, zk2, zk3] }
9
+
10
+ let(:path) { "lock_path" }
11
+ let(:root_lock_path) { "#{ZK::Locker.default_root_lock_node}/#{path}" }
12
+
13
+ before do
14
+ wait_until{ connections.all?(&:connected?) }
15
+ zk.rm_rf(ZK::Locker.default_root_lock_node)
16
+ end
17
+
18
+ after do
19
+ connections.each { |c| c.close! }
20
+ wait_until { !connections.any?(&:connected?) }
21
+ ZK.open(*connection_args) { |z| z.rm_rf(ZK::Locker.default_root_lock_node) }
22
+ end
23
+ end
24
+
25
+ shared_context 'locker chrooted' do
26
+ include_context 'connection opts'
27
+
28
+ let(:chroot_path) { '/_zk_chroot_' }
29
+ let(:path) { "lock_path" }
30
+
31
+ let(:zk) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
32
+ let(:zk2) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
33
+ let(:zk3) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
34
+ let(:connections) { [zk, zk2, zk3] }
35
+ let(:root_lock_path) { "#{ZK::Locker.default_root_lock_node}/#{path}" }
36
+
37
+ before do
38
+ ZK.open(*connection_args) do |zk|
39
+ zk.mkdir_p(chroot_path)
40
+ end
41
+
42
+ wait_until{ connections.all?(&:connected?) }
43
+ end
44
+
45
+ after do
46
+ connections.each { |c| c.close! }
47
+ wait_until { !connections.any?(&:connected?) }
48
+
49
+ ZK.open(*connection_args) do |zk|
50
+ zk.rm_rf(chroot_path)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,55 @@
1
+ # basic shared exmples for locker specs (both exclusive and shared)
2
+
3
+ # these assume they're being executed in the 'locker chrooted' or 'locker
4
+ # non-chrooted' contexts
5
+ #
6
+ shared_examples_for 'LockerBase#assert!' do
7
+ it %[should raise LockAssertionFailedError if its connection is no longer connected?] do
8
+ zk.close!
9
+ lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
10
+ end
11
+
12
+ it %[should raise LockAssertionFailedError if locked? is false] do
13
+ locker.should_not be_locked
14
+ lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
15
+ end
16
+
17
+ it %[should raise LockAssertionFailedError lock_path does not exist] do
18
+ locker.lock
19
+ lambda { locker.assert! }.should_not raise_error
20
+
21
+ zk.delete(locker.lock_path)
22
+ lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
23
+ end
24
+
25
+ it %[should raise LockAssertionFailedError if our parent node's ctime is different than what we think it should be] do
26
+ locker.lock.should be_true
27
+
28
+ zk.rm_rf(File.dirname(locker.lock_path)) # remove the parent node
29
+ zk.mkdir_p(locker.lock_path)
30
+
31
+ lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
32
+ end
33
+ end
34
+
35
+ shared_examples_for 'LockerBase#unlock' do
36
+ it %[should not delete a lock path it does not own] do
37
+ locker.lock.should be_true
38
+
39
+ zk.rm_rf(File.dirname(locker.lock_path)) # remove the parent node
40
+ zk.mkdir_p(File.dirname(locker.lock_path))
41
+
42
+ locker2.lock.should be_true
43
+
44
+ locker2.lock_path.should == locker.lock_path
45
+
46
+ lambda { locker2.assert! }.should_not raise_error
47
+
48
+ lock_path = locker.lock_path
49
+
50
+ locker.unlock.should be_false
51
+
52
+ zk.stat(lock_path).should exist
53
+ end
54
+ end
55
+
@@ -1,35 +1,49 @@
1
1
  module ZK
2
2
  TEST_LOG_PATH = File.join(ZK::ZK_ROOT, 'test.log')
3
- end
4
3
 
5
- layout = Logging.layouts.pattern(
6
- :pattern => '%.1l, [%d #%p] %30.30c{2}: %m\n',
7
- :date_pattern => '%Y-%m-%d %H:%M:%S.%6N'
8
- )
9
-
10
- appender = ENV['ZK_DEBUG'] ? Logging.appenders.stderr : Logging.appenders.file(ZK::TEST_LOG_PATH)
11
- appender.layout = layout
12
- #appender.immediate_at = "debug,info,warn,error,fatal"
13
- appender.auto_flushing = 25
14
- appender.flush_period = 5
15
-
16
- %w[ZK ClientForker spec Zookeeper].each do |name|
17
- ::Logging.logger[name].tap do |log|
18
- log.appenders = [appender]
19
- log.level = :debug
4
+ def self.logging_gem_setup
5
+ layout = ::Logging.layouts.pattern(
6
+ :pattern => '%.1l, [%d #%p] %30.30c{2}: %m\n',
7
+ :date_pattern => '%Y-%m-%d %H:%M:%S.%6N'
8
+ )
9
+
10
+
11
+ appender = ENV['ZK_DEBUG'] ? ::Logging.appenders.stderr : ::Logging.appenders.file(ZK::TEST_LOG_PATH)
12
+ appender.layout = layout
13
+ appender.immediate_at = "debug,info,warn,error,fatal"
14
+ # appender.auto_flushing = true
15
+ appender.auto_flushing = 25
16
+ appender.flush_period = 5
17
+
18
+ %w[ZK ClientForker spec Zookeeper].each do |name|
19
+ ::Logging.logger[name].tap do |log|
20
+ log.appenders = [appender]
21
+ log.level = :debug
22
+ end
23
+ end
24
+
25
+ # this logger is kinda noisy
26
+ ::Logging.logger['ZK::EventHandler'].level = :info
27
+
28
+ Zookeeper.logger = ::Logging.logger['Zookeeper']
29
+ Zookeeper.logger.level = ENV['ZOOKEEPER_DEBUG'] ? :debug : :warn
30
+
31
+ ZK::ForkHook.after_fork_in_child { ::Logging.reopen }
20
32
  end
21
- end
22
33
 
23
- # this logger is kinda noisy
24
- Logging.logger['ZK::EventHandler'].level = :info
25
34
 
26
- Zookeeper.logger = Logging.logger['Zookeeper']
27
- Zookeeper.logger.level = ENV['ZOOKEEPER_DEBUG'] ? :debug : :warn
35
+ def self.stdlib_logger_setup
36
+ require 'logger'
37
+ log = ::Logger.new($stderr).tap {|l| l.level = ::Logger::DEBUG }
38
+ ZK.logger = log
39
+ Zookeeper.logger = log
40
+ end
41
+ end
28
42
 
29
- ZK::ForkHook.after_fork_in_child { ::Logging.reopen }
43
+ ZK.logging_gem_setup
44
+ # ZK.stdlib_logger_setup
30
45
 
31
46
  # Zookeeper.logger = ZK.logger.clone_new_log(:progname => 'zoo')
32
-
33
47
  # Zookeeper.logger = ZK.logger
34
48
  # Zookeeper.set_debug_level(4)
35
49