zk 1.5.1 → 1.5.2

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -33,6 +33,10 @@ group :development do
33
33
  gem 'guard-shell', :require => false
34
34
  gem 'guard-bundler', :require => false
35
35
  gem 'growl', :require => false
36
+
37
+ if RUBY_PLATFORM =~ /darwin/i
38
+ gem 'rb-readline', :platform => :ruby
39
+ end
36
40
  end
37
41
 
38
42
  group :test do
data/Guardfile CHANGED
@@ -17,14 +17,18 @@ guard 'rspec', :version => 2 do
17
17
 
18
18
  watch(%r{^lib/(.+)\.rb$}) do |m|
19
19
  case m[1]
20
- when %r{^zk/event_handler$}
20
+ when 'zk/event_handler'
21
21
  "spec/zk/watch_spec.rb"
22
- when %r{^zk/client/threaded.rb$}
22
+
23
+ when 'zk/client/threaded'
23
24
  ["spec/zk/client_spec.rb", "spec/zk/zookeeper_spec.rb"]
24
- when %r{^zk/locker/}
25
- "spec/zk/locker_spec.rb"
26
- when %r{^zk\.rb$}
25
+
26
+ when %r{^(?:zk/locker/locker_base|spec/shared/locker)}
27
+ Dir["spec/zk/locker/*_spec.rb"]
28
+
29
+ when 'zk' # .rb
27
30
  'spec' # run all tests
31
+
28
32
  else
29
33
  "spec/#{m[1]}_spec.rb"
30
34
  end
data/README.markdown CHANGED
@@ -67,7 +67,7 @@ In addition to all of that, I would like to think that the public API the ZK::Cl
67
67
  ## NEWS ##
68
68
  ### v1.5.1 ###
69
69
 
70
- * Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
70
+ * Added a `:retry_duration` option to the Threaded client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
71
71
 
72
72
  * Small fork-hook implementation fix. Previously we were using WeakRefs so that hooks would not prevent an object from being garbage collected. This has been replaced with a finalizer which is more deterministic.
73
73
 
data/RELEASES.markdown CHANGED
@@ -1,5 +1,13 @@
1
1
  This file notes feature differences and bugfixes contained between releases.
2
2
 
3
+ ### v1.5.2 ###
4
+
5
+ * Fix locker cleanup code to avoid a nasty race when a session is lost, see [issue #34](https://github.com/slyphon/zk/issues/34)
6
+
7
+ * Fix potential deadlock in ForkHook code so the mutex is unlocked in the case of an exception
8
+
9
+ * Do not hang forever when shutting down and the shutdown thread does not exit (wait 30 seconds).
10
+
3
11
  ### v1.5.1 ###
4
12
 
5
13
  * Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
@@ -171,6 +171,8 @@ module ZK
171
171
 
172
172
  @retry_duration = opts.fetch(:retry_duration, nil).to_i
173
173
 
174
+ yield self if block_given?
175
+
174
176
  @fork_subs = [
175
177
  ForkHook.prepare_for_fork(method(:pause_before_fork_in_parent)),
176
178
  ForkHook.after_fork_in_parent(method(:resume_after_fork_in_parent)),
@@ -179,11 +181,10 @@ module ZK
179
181
 
180
182
  ObjectSpace.define_finalizer(self, self.class.finalizer(@fork_subs))
181
183
 
182
- yield self if block_given?
183
-
184
184
  connect if opts.fetch(:connect, true)
185
185
  end
186
186
 
187
+ # @private
187
188
  def self.finalizer(hooks)
188
189
  proc { hooks.each(&:unregister) }
189
190
  end
@@ -259,7 +260,11 @@ module ZK
259
260
  @cond.broadcast
260
261
  end
261
262
 
262
- [@event_handler, @threadpool, @cnx].each(&:pause_before_fork_in_parent)
263
+ # the compact is here because the @cnx *may* be nil when this callback is fired by the
264
+ # ForkHook (in the case of ZK.open). The race is between the GC calling the finalizer
265
+ [@event_handler, @threadpool, @cnx].compact.each(&:pause_before_fork_in_parent)
266
+ ensure
267
+ logger.debug { "#{self.class}##{__method__} returning" }
263
268
  end
264
269
 
265
270
  # @private
@@ -270,7 +275,7 @@ module ZK
270
275
 
271
276
  logger.debug { "#{self.class}##{__method__}" }
272
277
 
273
- [@cnx, @event_handler, @threadpool].each(&:resume_after_fork_in_parent)
278
+ [@cnx, @event_handler, @threadpool].compact.each(&:resume_after_fork_in_parent)
274
279
 
275
280
  @cond.broadcast
276
281
  end
@@ -304,6 +309,8 @@ module ZK
304
309
  #
305
310
  shutdown_thread = Thread.new do
306
311
  @threadpool.shutdown(10)
312
+
313
+ # this will call #close
307
314
  super
308
315
 
309
316
  @mutex.synchronize do
@@ -313,7 +320,7 @@ module ZK
313
320
  end
314
321
  end
315
322
 
316
- on_tpool ? shutdown_thread : shutdown_thread.join
323
+ on_tpool ? shutdown_thread : shutdown_thread.join(30)
317
324
  end
318
325
 
319
326
  # {see Base#close}
data/lib/zk/fork_hook.rb CHANGED
@@ -18,6 +18,9 @@ module ZK
18
18
  @mutex.lock
19
19
  logger.debug { "#{__method__}" }
20
20
  safe_call(@hooks[:prepare])
21
+ rescue Exception => e
22
+ @mutex.unlock rescue nil # if something goes wrong in a hook, then release the lock
23
+ raise e
21
24
  end
22
25
 
23
26
  # @private
@@ -48,11 +48,14 @@ module ZK
48
48
  def initialize(client, name, root_lock_node=nil)
49
49
  @zk = client
50
50
  @root_lock_node = root_lock_node || Locker.default_root_lock_node
51
- @path = name
52
- @locked = false
53
- @waiting = false
54
- @lock_path = nil
51
+
52
+ @path = name
53
+ @locked = false
54
+ @waiting = false
55
+ @lock_path = nil
56
+ @parent_stat = nil
55
57
  @root_lock_path = "#{@root_lock_node}/#{@path.gsub("/", "__")}"
58
+
56
59
  @mutex = Monitor.new
57
60
  @cond = @mutex.new_cond
58
61
  @node_deletion_watcher = nil
@@ -119,19 +122,21 @@ module ZK
119
122
  # @return [true] if we held the lock and this method has
120
123
  # unlocked it successfully
121
124
  #
122
- # @return [false] we did not own the lock
125
+ # @return [false] if we did not own the lock.
126
+ #
127
+ # @note There is more than one way you might not "own the lock"
128
+ # see [issue #34](https://github.com/slyphon/zk/issues/34)
123
129
  #
124
130
  def unlock
131
+ rval = false
125
132
  synchronize do
126
133
  if @locked
127
- cleanup_lock_path!
134
+ rval = cleanup_lock_path!
128
135
  @locked = false
129
136
  @node_deletion_watcher = nil
130
- true
131
- else
132
- false # i know, i know, but be explicit
133
137
  end
134
138
  end
139
+ rval
135
140
  end
136
141
 
137
142
  # (see #unlock)
@@ -220,6 +225,7 @@ module ZK
220
225
  raise LockAssertionFailedError, "not connected" unless zk.connected?
221
226
  raise LockAssertionFailedError, "lock_path was #{lock_path.inspect}" unless lock_path
222
227
  raise LockAssertionFailedError, "the lock path #{lock_path} did not exist!" unless zk.exists?(lock_path)
228
+ raise LockAssertionFailedError, "the parent node was replaced!" unless root_lock_path_same?
223
229
  raise LockAssertionFailedError, "we do not actually hold the lock" unless got_lock?
224
230
  end
225
231
  end
@@ -248,6 +254,8 @@ module ZK
248
254
  end
249
255
  end
250
256
 
257
+ # root_lock_path is /_zklocking/foobar
258
+ #
251
259
  def create_root_path!
252
260
  zk.mkdir_p(@root_lock_path)
253
261
  end
@@ -262,9 +270,14 @@ module ZK
262
270
  # prefix is the string that will appear in front of the sequence num,
263
271
  # defaults to 'lock'
264
272
  #
273
+ # this method also saves the stat of root_lock_path at the time of creation
274
+ # to ensure we don't accidentally remove a lock we don't own. see
275
+ # [rule #34](https://github.com/slyphon/zk/issues/34)...er, *issue* #34.
276
+ #
265
277
  def create_lock_path!(prefix='lock')
266
278
  synchronize do
267
- @lock_path = @zk.create("#{root_lock_path}/#{prefix}", "", :mode => :ephemeral_sequential)
279
+ @lock_path = @zk.create("#{root_lock_path}/#{prefix}", :mode => :ephemeral_sequential)
280
+ @parent_stat = @zk.stat(root_lock_path)
268
281
  end
269
282
 
270
283
  logger.debug { "got lock path #{@lock_path}" }
@@ -274,12 +287,43 @@ module ZK
274
287
  retry
275
288
  end
276
289
 
290
+ # if the root_lock_path has the same stat .ctime as the one
291
+ # we cached when we created our lock path, then we can be sure
292
+ # that we actually own the lock_path
293
+ #
294
+ # see [issue #34](https://github.com/slyphon/zk/issues/34)
295
+ #
296
+ def root_lock_path_same?
297
+ synchronize do
298
+ return false unless @parent_stat
299
+
300
+ cur_stat = zk.stat(root_lock_path)
301
+ cur_stat.exists? and (cur_stat.ctime == @parent_stat.ctime)
302
+ end
303
+ end
304
+
305
+ # we make a best-effort to clean up, this case is rife with race
306
+ # conditions if there is a lot of contention for the locks, so if we
307
+ # can't remove a path or if that path happens to not be empty we figure
308
+ # either we got pwned or that someone else will run this same method
309
+ # later and get to it
310
+ #
277
311
  def cleanup_lock_path!
278
- logger.debug { "removing lock path #{@lock_path}" }
279
- zk.delete(@lock_path)
312
+ rval = false
313
+
314
+ synchronize do
315
+ if root_lock_path_same?
316
+ logger.debug { "removing lock path #{@lock_path}" }
317
+
318
+ zk.delete(@lock_path, :ignore => :no_node)
319
+ zk.delete(root_lock_path, :ignore => [:not_empty, :no_node])
320
+ rval = true
321
+ end
322
+
323
+ @lock_path = @parent_stat = nil
324
+ end
280
325
 
281
- zk.delete(root_lock_path, :ignore => :not_empty)
282
- @lock_path = nil
326
+ rval
283
327
  end
284
328
  end # LockerBase
285
329
  end # Locker
data/lib/zk/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module ZK
2
- VERSION = "1.5.1"
2
+ VERSION = "1.5.2"
3
3
  end
@@ -1,10 +1,11 @@
1
1
  require File.join(File.dirname(__FILE__), %w[spec_helper])
2
2
 
3
3
  describe ZK::MessageQueue do
4
+ include_context 'connection opts'
4
5
 
5
6
  before(:each) do
6
- @zk = ZK.new("localhost:#{ZK.test_port}")
7
- @zk2 = ZK.new("localhost:#{ZK.test_port}")
7
+ @zk = ZK.new(connection_host)
8
+ @zk2 = ZK.new(connection_host)
8
9
  wait_until{ @zk.connected? && @zk2.connected? }
9
10
  @queue_name = "_specQueue"
10
11
  @consume_queue = @zk.queue(@queue_name)
@@ -10,7 +10,7 @@ shared_context 'threaded client connection' do
10
10
  before do
11
11
  # logger.debug { "threaded client connection - begin before hook" }
12
12
 
13
- @connection_string = "localhost:#{ZK.test_port}"
13
+ @connection_string = connection_host
14
14
  @base_path = '/zktests'
15
15
  @zk = ZK::Client::Threaded.new(*connection_args).tap { |z| wait_until { z.connected? } }
16
16
  @threadpool_exception = nil
@@ -0,0 +1,53 @@
1
+ shared_context 'locker non-chrooted' do
2
+ include_context 'connection opts'
3
+
4
+ let(:zk) { ZK.new(*connection_args) }
5
+ let(:zk2) { ZK.new(*connection_args) }
6
+ let(:zk3) { ZK.new(*connection_args) }
7
+
8
+ let(:connections) { [zk, zk2, zk3] }
9
+
10
+ let(:path) { "lock_path" }
11
+ let(:root_lock_path) { "#{ZK::Locker.default_root_lock_node}/#{path}" }
12
+
13
+ before do
14
+ wait_until{ connections.all?(&:connected?) }
15
+ zk.rm_rf(ZK::Locker.default_root_lock_node)
16
+ end
17
+
18
+ after do
19
+ connections.each { |c| c.close! }
20
+ wait_until { !connections.any?(&:connected?) }
21
+ ZK.open(*connection_args) { |z| z.rm_rf(ZK::Locker.default_root_lock_node) }
22
+ end
23
+ end
24
+
25
+ shared_context 'locker chrooted' do
26
+ include_context 'connection opts'
27
+
28
+ let(:chroot_path) { '/_zk_chroot_' }
29
+ let(:path) { "lock_path" }
30
+
31
+ let(:zk) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
32
+ let(:zk2) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
33
+ let(:zk3) { ZK.new("#{connection_host}#{chroot_path}", connection_opts) }
34
+ let(:connections) { [zk, zk2, zk3] }
35
+ let(:root_lock_path) { "#{ZK::Locker.default_root_lock_node}/#{path}" }
36
+
37
+ before do
38
+ ZK.open(*connection_args) do |zk|
39
+ zk.mkdir_p(chroot_path)
40
+ end
41
+
42
+ wait_until{ connections.all?(&:connected?) }
43
+ end
44
+
45
+ after do
46
+ connections.each { |c| c.close! }
47
+ wait_until { !connections.any?(&:connected?) }
48
+
49
+ ZK.open(*connection_args) do |zk|
50
+ zk.rm_rf(chroot_path)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,55 @@
1
+ # basic shared exmples for locker specs (both exclusive and shared)
2
+
3
+ # these assume they're being executed in the 'locker chrooted' or 'locker
4
+ # non-chrooted' contexts
5
+ #
6
+ shared_examples_for 'LockerBase#assert!' do
7
+ it %[should raise LockAssertionFailedError if its connection is no longer connected?] do
8
+ zk.close!
9
+ lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
10
+ end
11
+
12
+ it %[should raise LockAssertionFailedError if locked? is false] do
13
+ locker.should_not be_locked
14
+ lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
15
+ end
16
+
17
+ it %[should raise LockAssertionFailedError lock_path does not exist] do
18
+ locker.lock
19
+ lambda { locker.assert! }.should_not raise_error
20
+
21
+ zk.delete(locker.lock_path)
22
+ lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
23
+ end
24
+
25
+ it %[should raise LockAssertionFailedError if our parent node's ctime is different than what we think it should be] do
26
+ locker.lock.should be_true
27
+
28
+ zk.rm_rf(File.dirname(locker.lock_path)) # remove the parent node
29
+ zk.mkdir_p(locker.lock_path)
30
+
31
+ lambda { locker.assert! }.should raise_error(ZK::Exceptions::LockAssertionFailedError)
32
+ end
33
+ end
34
+
35
+ shared_examples_for 'LockerBase#unlock' do
36
+ it %[should not delete a lock path it does not own] do
37
+ locker.lock.should be_true
38
+
39
+ zk.rm_rf(File.dirname(locker.lock_path)) # remove the parent node
40
+ zk.mkdir_p(File.dirname(locker.lock_path))
41
+
42
+ locker2.lock.should be_true
43
+
44
+ locker2.lock_path.should == locker.lock_path
45
+
46
+ lambda { locker2.assert! }.should_not raise_error
47
+
48
+ lock_path = locker.lock_path
49
+
50
+ locker.unlock.should be_false
51
+
52
+ zk.stat(lock_path).should exist
53
+ end
54
+ end
55
+
@@ -1,35 +1,49 @@
1
1
  module ZK
2
2
  TEST_LOG_PATH = File.join(ZK::ZK_ROOT, 'test.log')
3
- end
4
3
 
5
- layout = Logging.layouts.pattern(
6
- :pattern => '%.1l, [%d #%p] %30.30c{2}: %m\n',
7
- :date_pattern => '%Y-%m-%d %H:%M:%S.%6N'
8
- )
9
-
10
- appender = ENV['ZK_DEBUG'] ? Logging.appenders.stderr : Logging.appenders.file(ZK::TEST_LOG_PATH)
11
- appender.layout = layout
12
- #appender.immediate_at = "debug,info,warn,error,fatal"
13
- appender.auto_flushing = 25
14
- appender.flush_period = 5
15
-
16
- %w[ZK ClientForker spec Zookeeper].each do |name|
17
- ::Logging.logger[name].tap do |log|
18
- log.appenders = [appender]
19
- log.level = :debug
4
+ def self.logging_gem_setup
5
+ layout = ::Logging.layouts.pattern(
6
+ :pattern => '%.1l, [%d #%p] %30.30c{2}: %m\n',
7
+ :date_pattern => '%Y-%m-%d %H:%M:%S.%6N'
8
+ )
9
+
10
+
11
+ appender = ENV['ZK_DEBUG'] ? ::Logging.appenders.stderr : ::Logging.appenders.file(ZK::TEST_LOG_PATH)
12
+ appender.layout = layout
13
+ appender.immediate_at = "debug,info,warn,error,fatal"
14
+ # appender.auto_flushing = true
15
+ appender.auto_flushing = 25
16
+ appender.flush_period = 5
17
+
18
+ %w[ZK ClientForker spec Zookeeper].each do |name|
19
+ ::Logging.logger[name].tap do |log|
20
+ log.appenders = [appender]
21
+ log.level = :debug
22
+ end
23
+ end
24
+
25
+ # this logger is kinda noisy
26
+ ::Logging.logger['ZK::EventHandler'].level = :info
27
+
28
+ Zookeeper.logger = ::Logging.logger['Zookeeper']
29
+ Zookeeper.logger.level = ENV['ZOOKEEPER_DEBUG'] ? :debug : :warn
30
+
31
+ ZK::ForkHook.after_fork_in_child { ::Logging.reopen }
20
32
  end
21
- end
22
33
 
23
- # this logger is kinda noisy
24
- Logging.logger['ZK::EventHandler'].level = :info
25
34
 
26
- Zookeeper.logger = Logging.logger['Zookeeper']
27
- Zookeeper.logger.level = ENV['ZOOKEEPER_DEBUG'] ? :debug : :warn
35
+ def self.stdlib_logger_setup
36
+ require 'logger'
37
+ log = ::Logger.new($stderr).tap {|l| l.level = ::Logger::DEBUG }
38
+ ZK.logger = log
39
+ Zookeeper.logger = log
40
+ end
41
+ end
28
42
 
29
- ZK::ForkHook.after_fork_in_child { ::Logging.reopen }
43
+ ZK.logging_gem_setup
44
+ # ZK.stdlib_logger_setup
30
45
 
31
46
  # Zookeeper.logger = ZK.logger.clone_new_log(:progname => 'zoo')
32
-
33
47
  # Zookeeper.logger = ZK.logger
34
48
  # Zookeeper.set_debug_level(4)
35
49