zk 1.5.0 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -28,10 +28,11 @@ platform :mri_19 do
28
28
  end
29
29
 
30
30
  group :development do
31
- gem 'guard', :require => false
32
- gem 'guard-rspec', :require => false
33
- gem 'guard-shell', :require => false
31
+ gem 'guard', :require => false
32
+ gem 'guard-rspec', :require => false
33
+ gem 'guard-shell', :require => false
34
34
  gem 'guard-bundler', :require => false
35
+ gem 'growl', :require => false
35
36
  end
36
37
 
37
38
  group :test do
@@ -65,6 +65,12 @@ In addition to all of that, I would like to think that the public API the ZK::Cl
65
65
  [zk-eventmachine]: https://github.com/slyphon/zk-eventmachine
66
66
 
67
67
  ## NEWS ##
68
+ ### v1.5.1 ###
69
+
70
+ * Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
71
+
72
+ * Small fork-hook implementation fix. Previously we were using WeakRefs so that hooks would not prevent an object from being garbage collected. This has been replaced with a finalizer which is more deterministic.
73
+
68
74
  ### v1.5.0 ###
69
75
 
70
76
  Ok, now seriously this time. I think all of the forking issues are done.
@@ -1,5 +1,11 @@
1
1
  This file notes feature differences and bugfixes contained between releases.
2
2
 
3
+ ### v1.5.1 ###
4
+
5
+ * Added a `:retry_duration` option to client constructor which will allows the user to specify for how long in the case of a connection loss, should an operation wait for the connection to be re-established before retrying the operation. This can be set at a global level and overridden on a per-call basis. The default is to not retry (which may change at a later date). Generally speaking, a timeout of > 30s is probably excessive, and care should be taken because during a connection loss, the server-side state may change without you being aware of it (i.e. events will not be delivered).
6
+
7
+ * Small fork-hook implementation fix. Previously we were using WeakRefs so that hooks would not prevent an object from being garbage collected. This has been replaced with a finalizer which is more deterministic.
8
+
3
9
  ### v1.5.0 ###
4
10
 
5
11
  Ok, now seriously this time. I think all of the forking issues are done.
data/lib/zk.rb CHANGED
@@ -12,7 +12,6 @@ require 'monitor'
12
12
  require 'set'
13
13
  require 'time'
14
14
  require 'date'
15
- require 'weakref'
16
15
 
17
16
  module ZK
18
17
  # just like stdlib Monitor but provides the SAME API AS MUTEX, FFS!
@@ -72,6 +72,8 @@ module ZK
72
72
 
73
73
  # returns true if the connection has been closed
74
74
  def closed?
75
+ return true if cnx.nil?
76
+
75
77
  # XXX: should this be *our* idea of closed or ZOO_CLOSED_STATE ?
76
78
  defined?(::JRUBY_VERSION) ? jruby_closed? : mri_closed?
77
79
  end
@@ -600,16 +602,7 @@ module ZK
600
602
  h = { :path => path }.merge(opts)
601
603
 
602
604
  setup_watcher!(:data, h) do
603
- rv = cnx.stat(h)
604
-
605
- return rv if opts[:callback]
606
-
607
- case rv[:rc]
608
- when Zookeeper::ZOK, Zookeeper::ZNONODE
609
- rv[:stat]
610
- else
611
- check_rc(rv, h) # throws the appropriate error
612
- end
605
+ call_and_check_rc(:stat, h.merge(:ignore => :no_node)).fetch(:stat)
613
606
  end
614
607
  end
615
608
 
@@ -1034,6 +1027,9 @@ module ZK
1034
1027
  end
1035
1028
 
1036
1029
  def call_and_check_rc(meth, opts)
1030
+ # TODO: we should not be raising Zookeeper errors, that's not cool.
1031
+ raise Zookeeper::Exceptions::NotConnected if cnx.nil?
1032
+
1037
1033
  scrubbed_opts = opts.dup
1038
1034
  scrubbed_opts.delete(:ignore)
1039
1035
 
@@ -1045,7 +1041,7 @@ module ZK
1045
1041
  # @private
1046
1042
  # XXX: make this actually call the method on cnx
1047
1043
  def check_rc(rv_hash, inputs)
1048
- code = rv_hash[:rc]
1044
+ code = rv_hash[:rc]
1049
1045
 
1050
1046
  if code && (code != Zookeeper::ZOK)
1051
1047
  return rv_hash if ignore_set(inputs[:ignore]).include?(code)
@@ -43,6 +43,15 @@ module ZK
43
43
 
44
44
  DEFAULT_THREADPOOL_SIZE = 1
45
45
 
46
+ # @private
47
+ module Constants
48
+ CLI_RUNNING = :running
49
+ CLI_PAUSED = :paused
50
+ CLI_CLOSE_REQ = :close_requested
51
+ CLI_CLOSED = :closed
52
+ end
53
+ include Constants
54
+
46
55
  # Construct a new threaded client.
47
56
  #
48
57
  # Pay close attention to the `:threaded` option, and have a look at the
@@ -84,6 +93,19 @@ module ZK
84
93
  # that does something application specific, and you want to avoid a
85
94
  # conflict.
86
95
  #
96
+ # @option opts [Fixnum] :retry_duration (nil) for how long (in seconds)
97
+ # should we wait to re-attempt a synchronous operation after we receive a
98
+ # ZK::Exceptions::Retryable error. This exception (or really, group of
99
+ # exceptions) is raised when there has been an unintentional network
100
+ # connection or session loss, so retrying an operation in this situation
101
+ # is like saying "If we are disconnected, How long should we wait for the
102
+ # connection to become available before attempthing this operation?"
103
+ #
104
+ # The default `nil` means automatic retry is not attempted.
105
+ #
106
+ # This is a global option, and will be used for all operations on this
107
+ # connection, however it can be overridden for any individual operation.
108
+ #
87
109
  # @option opts [:single,:per_callback] :thread (:single) choose your event
88
110
  # delivery model:
89
111
  #
@@ -140,9 +162,14 @@ module ZK
140
162
  @reconnect = opts.fetch(:reconnect, true)
141
163
 
142
164
  @mutex = Monitor.new
143
- @cond = @mutex.new_cond
165
+ @cond = @mutex.new_cond
144
166
 
145
- @cli_state = :running # this is to distinguish between *our* state and the underlying connection state
167
+ @cli_state = CLI_RUNNING # this is to distinguish between *our* state and the underlying connection state
168
+
169
+ # this is the last status update we've received from the underlying connection
170
+ @last_cnx_state = Zookeeper::ZOO_CLOSED_STATE
171
+
172
+ @retry_duration = opts.fetch(:retry_duration, nil).to_i
146
173
 
147
174
  @fork_subs = [
148
175
  ForkHook.prepare_for_fork(method(:pause_before_fork_in_parent)),
@@ -150,22 +177,22 @@ module ZK
150
177
  ForkHook.after_fork_in_child(method(:reopen)),
151
178
  ]
152
179
 
180
+ ObjectSpace.define_finalizer(self, self.class.finalizer(@fork_subs))
181
+
153
182
  yield self if block_given?
154
183
 
155
- @mutex.synchronize do
156
- connect if opts.fetch(:connect, true)
157
- end
184
+ connect if opts.fetch(:connect, true)
185
+ end
186
+
187
+ def self.finalizer(hooks)
188
+ proc { hooks.each(&:unregister) }
158
189
  end
159
190
 
160
191
  # @option opts [Fixnum] :timeout how long we will wait for the connection
161
192
  # to be established. If timeout is nil, we will wait forever: *use
162
193
  # carefully*.
163
194
  def connect(opts={})
164
- @mutex.synchronize do
165
- return if @cnx
166
- timeout = opts.fetch(:timeout, @connection_timeout)
167
- @cnx = create_connection(@host, timeout, @event_handler.get_default_watcher_block)
168
- end
195
+ @mutex.synchronize { unlocked_connect(opts) }
169
196
  end
170
197
 
171
198
  # (see Base#reopen)
@@ -178,9 +205,10 @@ module ZK
178
205
 
179
206
  logger.debug { "reopening everything, fork detected!" }
180
207
 
181
- @mutex = Monitor.new
182
- @pid = Process.pid
183
- @cli_state = :running # reset state to running if we were paused
208
+ @mutex = Mutex.new
209
+ @cond = ConditionVariable.new
210
+ @pid = Process.pid
211
+ @cli_state = CLI_RUNNING # reset state to running if we were paused
184
212
 
185
213
  old_cnx, @cnx = @cnx, nil
186
214
  old_cnx.close! if old_cnx # && !old_cnx.closed?
@@ -195,11 +223,11 @@ module ZK
195
223
  @event_handler.reopen_after_fork!
196
224
  @threadpool.reopen_after_fork! # prune dead threadpool threads after a fork()
197
225
 
198
- connect
226
+ unlocked_connect
199
227
  end
200
228
  else
201
229
  @mutex.synchronize do
202
- if @cli_state == :paused
230
+ if @cli_state == CLI_PAUSED
203
231
  # XXX: what to do in this case? does it matter?
204
232
  end
205
233
 
@@ -220,23 +248,32 @@ module ZK
220
248
  # before that deadline, or you will have to re-establish your session.
221
249
  #
222
250
  # @raise [InvalidStateError] when called and not in running? state
251
+ # @private
223
252
  def pause_before_fork_in_parent
224
253
  @mutex.synchronize do
225
- raise InvalidStateError, "client must be running? when you call #{__method__}" unless running?
226
- @cli_state = :paused
254
+ raise InvalidStateError, "client must be running? when you call #{__method__}" unless (@cli_state == CLI_RUNNING)
255
+ @cli_state = CLI_PAUSED
256
+
257
+ logger.debug { "#{self.class}##{__method__}" }
258
+
259
+ @cond.broadcast
227
260
  end
228
- logger.debug { "#{self.class}##{__method__}" }
261
+
229
262
  [@event_handler, @threadpool, @cnx].each(&:pause_before_fork_in_parent)
230
263
  end
231
264
 
265
+ # @private
232
266
  def resume_after_fork_in_parent
233
267
  @mutex.synchronize do
234
- raise InvalidStateError, "client must be paused? when you call #{__method__}" unless paused?
235
- @cli_state = :running
236
- end
268
+ raise InvalidStateError, "client must be paused? when you call #{__method__}" unless (@cli_state == CLI_PAUSED)
269
+ @cli_state = CLI_RUNNING
270
+
271
+ logger.debug { "#{self.class}##{__method__}" }
237
272
 
238
- logger.debug { "#{self.class}##{__method__}" }
239
- [@cnx, @event_handler, @threadpool].each(&:resume_after_fork_in_parent)
273
+ [@cnx, @event_handler, @threadpool].each(&:resume_after_fork_in_parent)
274
+
275
+ @cond.broadcast
276
+ end
240
277
  end
241
278
 
242
279
  # (see Base#close!)
@@ -250,8 +287,9 @@ module ZK
250
287
  def close!
251
288
  @mutex.synchronize do
252
289
  return if [:closed, :close_requested].include?(@cli_state)
253
- # logger.debug { "moving to :close_requested state" }
254
- @cli_state = :close_requested
290
+ logger.debug { "moving to :close_requested state" }
291
+ @cli_state = CLI_CLOSE_REQ
292
+ @cond.broadcast
255
293
  end
256
294
 
257
295
  on_tpool = on_threadpool?
@@ -269,8 +307,9 @@ module ZK
269
307
  super
270
308
 
271
309
  @mutex.synchronize do
272
- # logger.debug { "moving to :closed state" }
273
- @cli_state = :closed
310
+ logger.debug { "moving to :closed state" }
311
+ @cli_state = CLI_CLOSED
312
+ @cond.broadcast
274
313
  end
275
314
  end
276
315
 
@@ -298,44 +337,43 @@ module ZK
298
337
  # @private
299
338
  def raw_event_handler(event)
300
339
  return unless event.session_event?
340
+
341
+ @mutex.synchronize do
342
+ @last_cnx_state = event.state
301
343
 
302
- if event.client_invalid?
303
- return unless @reconnect
344
+ if event.client_invalid? and @reconnect and not dead_or_dying?
345
+ logger.error { "Got event #{event.state_name}, calling reopen(0)! things may be messed up until this works itself out!" }
304
346
 
305
- @mutex.synchronize do
306
- unless dead_or_dying? # a legitimate shutdown case
307
-
308
- logger.error { "Got event #{event.state_name}, calling reopen(0)! things may be messed up until this works itself out!" }
309
-
310
- # reopen(0) means that we don't want to wait for the connection
311
- # to reach the connected state before returning as we're on the
312
- # event thread.
313
- reopen(0)
314
- end
347
+ # reopen(0) means that we don't want to wait for the connection
348
+ # to reach the connected state before returning as we're on the
349
+ # event thread.
350
+ reopen(0)
315
351
  end
352
+
353
+ @cond.broadcast # wake anyone waiting for a connection state update
316
354
  end
317
355
  rescue Exception => e
318
356
  logger.error { "BUG: Exception caught in raw_event_handler: #{e.to_std_format}" }
319
357
  end
320
358
 
321
359
  def closed?
322
- return true if @mutex.synchronize { @cli_state == :closed }
360
+ return true if @mutex.synchronize { @cli_state == CLI_CLOSED }
323
361
  super
324
362
  end
325
363
 
326
364
  # are we in running (not-paused) state?
327
365
  def running?
328
- @mutex.synchronize { @cli_state == :running }
366
+ @mutex.synchronize { @cli_state == CLI_RUNNING }
329
367
  end
330
368
 
331
369
  # are we in paused state?
332
370
  def paused?
333
- @mutex.synchronize { @cli_state == :paused }
371
+ @mutex.synchronize { @cli_state == CLI_PAUSED }
334
372
  end
335
373
 
336
374
  # has shutdown time arrived?
337
375
  def close_requested?
338
- @mutex.synchronize { @cli_state == :close_requested }
376
+ @mutex.synchronize { @cli_state == CLI_CLOSE_REQ }
339
377
  end
340
378
 
341
379
  protected
@@ -346,13 +384,51 @@ module ZK
346
384
  @mutex.synchronize { @cnx }
347
385
  end
348
386
 
349
- # @private
387
+ def call_and_check_rc(meth, opts)
388
+ if retry_duration = (opts.delete(:retry_duration) || @retry_duration)
389
+ begin
390
+ super(meth, opts)
391
+ rescue Exceptions::Retryable => e
392
+ time_to_stop = Time.now + retry_duration
393
+
394
+ wait_until_connected_or_dying(retry_duration)
395
+
396
+ if (@last_cnx_state != Zookeeper::ZOO_CONNECTED_STATE) || (Time.now > time_to_stop) || (@cli_state != CLI_RUNNING)
397
+ raise e
398
+ else
399
+ retry
400
+ end
401
+ end
402
+ else
403
+ super
404
+ end
405
+ end
406
+
407
+ def wait_until_connected_or_dying(timeout)
408
+ time_to_stop = Time.now + timeout
409
+
410
+ @mutex.synchronize do
411
+ while (@last_cnx_state != Zookeeper::ZOO_CONNECTED_STATE) && (Time.now < time_to_stop) && (@cli_state == CLI_RUNNING)
412
+ @cond.wait(timeout)
413
+ end
414
+
415
+ logger.debug { "@last_cnx_state: #{@last_cnx_state}, time_left? #{Time.now.to_f < time_to_stop.to_f}, @cli_state: #{@cli_state.inspect}" }
416
+ end
417
+ end
418
+
350
419
  def create_connection(*args)
351
420
  ::Zookeeper.new(*args)
352
421
  end
353
422
 
354
423
  def dead_or_dying?
355
- (@cli_state == :close_requested) || (@cli_state == :closed)
424
+ (@cli_state == CLI_CLOSE_REQ) || (@cli_state == CLI_CLOSED)
425
+ end
426
+
427
+ private
428
+ def unlocked_connect(opts={})
429
+ return if @cnx
430
+ timeout = opts.fetch(:timeout, @connection_timeout)
431
+ @cnx = create_connection(@host, timeout, @event_handler.get_default_watcher_block)
356
432
  end
357
433
  end
358
434
  end
@@ -43,12 +43,15 @@ module ZK
43
43
  module InterruptedSession
44
44
  end
45
45
 
46
+ # mixed into exceptions that may be retried
47
+ module Retryable
48
+ end
49
+
46
50
  class SystemError < KeeperException; end
47
51
  class RunTimeInconsistency < KeeperException; end
48
52
  class DataInconsistency < KeeperException; end
49
53
  class MarshallingError < KeeperException; end
50
54
  class Unimplemented < KeeperException; end
51
- class OperationTimeOut < KeeperException; end
52
55
  class BadArguments < KeeperException; end
53
56
  class ApiError < KeeperException; end
54
57
  class NoNode < KeeperException; end
@@ -61,12 +64,18 @@ module ZK
61
64
  class InvalidACL < KeeperException; end
62
65
  class AuthFailed < KeeperException; end
63
66
 
67
+ class OperationTimeOut < KeeperException
68
+ include Retryable
69
+ end
70
+
64
71
  class ConnectionLoss < KeeperException
65
72
  include InterruptedSession
73
+ include Retryable
66
74
  end
67
75
 
68
76
  class SessionExpired < KeeperException
69
77
  include InterruptedSession
78
+ include Retryable
70
79
  end
71
80
 
72
81
  # mixes in InterruptedSession, and can be raised on its own
@@ -48,3 +48,9 @@ end
48
48
  end
49
49
  end
50
50
 
51
+ [:NotConnected, :SessionExpired, :ConnectionLoss].each do |class_name|
52
+ Zookeeper::Exceptions.const_get(class_name).tap do |klass|
53
+ klass.__send__(:include, ZK::Exceptions::Retryable)
54
+ end
55
+ end
56
+
@@ -16,21 +16,22 @@ module ZK
16
16
  # @private
17
17
  def fire_prepare_hooks!
18
18
  @mutex.lock
19
+ logger.debug { "#{__method__}" }
19
20
  safe_call(@hooks[:prepare])
20
21
  end
21
22
 
22
23
  # @private
23
24
  def fire_after_child_hooks!
24
- safe_call(@hooks[:after_child])
25
- ensure
26
25
  @mutex.unlock rescue nil
26
+ logger.debug { "#{__method__}" }
27
+ safe_call(@hooks[:after_child])
27
28
  end
28
29
 
29
30
  # @private
30
31
  def fire_after_parent_hooks!
31
- safe_call(@hooks[:after_parent])
32
- ensure
33
32
  @mutex.unlock rescue nil
33
+ logger.debug { "#{__method__}" }
34
+ safe_call(@hooks[:after_parent])
34
35
  end
35
36
 
36
37
  # @private
@@ -52,15 +53,10 @@ module ZK
52
53
  def safe_call(callbacks)
53
54
  cbs = callbacks.dup
54
55
 
56
+ # exceptions in these hooks will be raised normally
57
+
55
58
  while cb = cbs.shift
56
- begin
57
- cb.call
58
- rescue WeakRef::RefError
59
- # clean weakrefs out of the original callback arrays if they're bad
60
- callbacks.delete(cb)
61
- rescue Exception => e
62
- logger.error { e.to_std_format }
63
- end
59
+ cb.call
64
60
  end
65
61
  end
66
62
 
@@ -76,7 +72,7 @@ module ZK
76
72
 
77
73
  ForkSubscription.new(hook_type, block).tap do |sub|
78
74
  # use a WeakRef so that the original objects can be GC'd
79
- @mutex.synchronize { @hooks[hook_type] << WeakRef.new(sub) }
75
+ @mutex.synchronize { @hooks[hook_type] << sub }
80
76
  end
81
77
  end
82
78
 
@@ -14,10 +14,13 @@ module ZK
14
14
  ::Logging.logger['ZK'].tap do |ch_root|
15
15
  ::Logging.appenders.stderr.tap do |serr|
16
16
  serr.layout = ::Logging.layouts.pattern(
17
- :pattern => '%.1l, [%d] %c30.30{2}: %m\n',
17
+ :pattern => '%.1l, [%d #p] %c30.30{2}: %m\n',
18
18
  :date_pattern => '%Y-%m-%d %H:%M:%S.%6N'
19
19
  )
20
20
 
21
+ serr.auto_flushing = 25
22
+ serr.flush_period = 5
23
+
21
24
  ch_root.add_appenders(serr)
22
25
  end
23
26
 
@@ -25,12 +28,8 @@ module ZK
25
28
  end
26
29
  end
27
30
 
28
- # cache the logger at the instance level, as that's where most of the
29
- # logging is done, this means that the user should set up the override
30
- # of the ZK.logger early, before creating instances.
31
- #
32
31
  def logger
33
- @logger ||= (::ZK.logger || ::Logging.logger[self.class.logger_name]) # logger_name defined in ::Logging::Utils
32
+ self.class.logger
34
33
  end
35
34
  end
36
35
  end
@@ -124,6 +124,7 @@ module ZK
124
124
 
125
125
  def dispatch_thread_body
126
126
  Thread.current.abort_on_exception = true
127
+ Thread.current[:callback] = @callback
127
128
  while true
128
129
  args = nil
129
130
 
@@ -132,7 +133,7 @@ module ZK
132
133
  @cond.wait(@mutex) while @array.empty? and @state == :running
133
134
 
134
135
  if @state != :running
135
- # logger.warn { "ThreadedCallback, state is #{@state.inspect}, returning" }
136
+ logger.warn { "ThreadedCallback, state is #{@state.inspect}, returning" }
136
137
  return
137
138
  end
138
139
 
@@ -147,8 +148,9 @@ module ZK
147
148
  logger.error { e.to_std_format }
148
149
  end
149
150
  end
150
- # ensure
151
- # logger.debug { "#{self.class}##{__method__} returning" }
151
+ ensure
152
+ Thread.current[:callback] = nil
153
+ logger.debug { "##{__method__} returning" }
152
154
  end
153
155
  end
154
156
  end
@@ -124,9 +124,9 @@ module ZK
124
124
  begin
125
125
  raise InvalidStateError, "invalid state, expected to be :running, was #{@state.inspect}" if @state != :running
126
126
  return false if @state == :paused
127
+ threads = @threadpool.slice!(0, @threadpool.length)
127
128
  @state = :paused
128
129
  @cond.broadcast # wake threads, let them die
129
- threads = @threadpool.slice!(0, @threadpool.length)
130
130
  ensure
131
131
  @mutex.unlock rescue nil
132
132
  end
@@ -1,3 +1,3 @@
1
1
  module ZK
2
- VERSION = "1.5.0"
2
+ VERSION = "1.5.1"
3
3
  end
@@ -302,7 +302,9 @@ shared_examples_for 'client' do
302
302
  m.should_receive(:reopen).with(0).once
303
303
  end
304
304
 
305
- bogus_event = flexmock(:expired_session_event, :session_event? => true, :client_invalid? => true, :state_name => 'ZOO_EXPIRED_SESSION_STATE')
305
+ props = { :session_event? => true, :client_invalid? => true, :state_name => 'ZOO_EXPIRED_SESSION_STATE', :state => Zookeeper::ZOO_EXPIRED_SESSION_STATE }
306
+
307
+ bogus_event = flexmock(:expired_session_event, props)
306
308
 
307
309
  @zk.raw_event_handler(bogus_event)
308
310
  end
@@ -109,4 +109,10 @@ class ::Thread
109
109
  end
110
110
  end
111
111
 
112
+ if RUBY_VERSION == '1.9.3'
113
+ trap('USR1') do
114
+ threads = Thread.list.map { |th| { :inspect => th.inspect, :calback => th[:callback], :backtrace => th.backtrace } }
115
+ pp threads
116
+ end
117
+ end
112
118
 
@@ -12,6 +12,7 @@ class ClientForker
12
12
  @cnx_args = cnx_args
13
13
  @base_path = base_path
14
14
  @pids_root = "#{@base_path}/pid"
15
+ @child_latch = Latch.new
15
16
  end
16
17
 
17
18
  LBORDER = ('-' * 35) << '< '
@@ -63,6 +64,13 @@ class ClientForker
63
64
  end
64
65
  end
65
66
 
67
+ def start_child_exit_thread(pid)
68
+ @child_exit_thread ||= Thread.new do
69
+ _, @stat = Process.wait2(pid)
70
+ @child_latch.release
71
+ end
72
+ end
73
+
66
74
  def run
67
75
  before
68
76
  mark 'BEGIN TEST'
@@ -102,7 +110,6 @@ class ClientForker
102
110
 
103
111
  @pid = fork do
104
112
  Thread.abort_on_exception = true
105
- ::Logging.reopen
106
113
 
107
114
  @zk.wait_until_connected
108
115
 
@@ -169,6 +176,8 @@ class ClientForker
169
176
  end
170
177
  end # forked child
171
178
 
179
+ start_child_exit_thread(@pid)
180
+
172
181
  # replicates deletion watcher inside child
173
182
  child_pid_path = "#{@pids_root}/#{@pid}"
174
183
 
@@ -188,9 +197,10 @@ class ClientForker
188
197
 
189
198
  delete_latch.await if @zk.exists?(child_pid_path, :watch => true)
190
199
 
191
- _, @stat = Process.wait2(@pid)
200
+ @child_latch.await(30) # if we don't get a response in 30 seconds, then we're *definately* hosed
201
+
202
+ raise "Child did not exit after 30 seconds of waiting, something is very wrong" unless @stat
192
203
 
193
- # $stderr.puts "#{@pid} exited with status: #{stat.inspect}"
194
204
  ensure
195
205
  mark "END TEST"
196
206
  kill_child!
@@ -9,6 +9,9 @@ layout = Logging.layouts.pattern(
9
9
 
10
10
  appender = ENV['ZK_DEBUG'] ? Logging.appenders.stderr : Logging.appenders.file(ZK::TEST_LOG_PATH)
11
11
  appender.layout = layout
12
+ #appender.immediate_at = "debug,info,warn,error,fatal"
13
+ appender.auto_flushing = 25
14
+ appender.flush_period = 5
12
15
 
13
16
  %w[ZK ClientForker spec Zookeeper].each do |name|
14
17
  ::Logging.logger[name].tap do |log|
@@ -21,7 +24,9 @@ end
21
24
  Logging.logger['ZK::EventHandler'].level = :info
22
25
 
23
26
  Zookeeper.logger = Logging.logger['Zookeeper']
24
- Zookeeper.logger.level = :info
27
+ Zookeeper.logger.level = ENV['ZOOKEEPER_DEBUG'] ? :debug : :warn
28
+
29
+ ZK::ForkHook.after_fork_in_child { ::Logging.reopen }
25
30
 
26
31
  # Zookeeper.logger = ZK.logger.clone_new_log(:progname => 'zoo')
27
32
 
@@ -40,5 +40,79 @@ describe ZK::Client::Threaded do
40
40
  end
41
41
  end
42
42
  end
43
+
44
+ describe :retry do
45
+ include_context 'connection opts'
46
+
47
+ before do
48
+ @zk = ZK::Client::Threaded.new(connection_host, :reconect => false, :connect => false)
49
+ end
50
+
51
+ after do
52
+ @zk.close! unless @zk.closed?
53
+ end
54
+
55
+ it %[should retry a Retryable operation] do
56
+ # TODO: this is a terrible test. there is no way to guarantee that this
57
+ # has been retried. the join at the end should not raise an error
58
+
59
+ @zk.should_not be_connected
60
+
61
+ th = Thread.new do
62
+ @zk.stat('/path/to/blah', :retry_duration => 30)
63
+ end
64
+
65
+ th.run
66
+
67
+ @zk.connect
68
+ th.join(5).should == th
69
+ end
70
+
71
+ it %[barfs if the connection is closed before the connected event is received] do
72
+ @zk.should_not be_connected
73
+
74
+ exc = nil
75
+
76
+ th = Thread.new do
77
+ # this nonsense is because 1.8.7 is psychotic
78
+ begin
79
+ @zk.stat('/path/to/blah', :retry_duration => 300)
80
+ rescue Exception
81
+ exc = $!
82
+ end
83
+ end
84
+
85
+ th.run
86
+
87
+ @zk.close!
88
+
89
+ th.join(5).should == th
90
+
91
+ exc.should_not be_nil
92
+ exc.should be_kind_of(ZK::Exceptions::Retryable)
93
+ end
94
+
95
+ it %[should barf if the timeout expires] do
96
+ @zk.should_not be_connected
97
+
98
+ exc = nil
99
+
100
+ th = Thread.new do
101
+ # this nonsense is because 1.8.7 is psychotic
102
+ begin
103
+ @zk.stat('/path/to/blah', :retry_duration => 0.001)
104
+ rescue Exception
105
+ exc = $!
106
+ end
107
+ end
108
+
109
+ th.run
110
+
111
+ th.join(5).should == th
112
+
113
+ exc.should_not be_nil
114
+ exc.should be_kind_of(ZK::Exceptions::Retryable)
115
+ end
116
+ end
43
117
  end # ZK::Client::Threaded
44
118
 
@@ -432,7 +432,7 @@ shared_examples_for 'shared-exclusive interaction' do
432
432
  ex_th = Thread.new do
433
433
  begin
434
434
  @ex_lock.lock(true) # blocking lock
435
- Thread.current[:got_lock] = true
435
+ @ex_lock.assert!
436
436
  @array << :ex_lock
437
437
  ensure
438
438
  @ex_lock.unlock
@@ -455,7 +455,7 @@ shared_examples_for 'shared-exclusive interaction' do
455
455
  sh2_th = Thread.new do
456
456
  begin
457
457
  @sh_lock2.lock(true)
458
- Thread.current[:got_lock] = true
458
+ @sh_lock2.assert!
459
459
  @array << :sh_lock2
460
460
  ensure
461
461
  @sh_lock2.unlock
@@ -469,13 +469,12 @@ shared_examples_for 'shared-exclusive interaction' do
469
469
 
470
470
  logger.debug { "@sh_lock2 is waiting" }
471
471
 
472
+ # ok, now unlock the first in the chain
473
+ @sh_lock.assert!
472
474
  @sh_lock.unlock.should be_true
473
475
 
474
476
  ex_th.join(5).should == ex_th
475
- ex_th[:got_lock].should be_true
476
-
477
477
  sh2_th.join(5).should == sh2_th
478
- sh2_th[:got_lock].should be_true
479
478
 
480
479
  @array.length.should == 2
481
480
  @array.should == [:ex_lock, :sh_lock2]
data/zk.gemspec CHANGED
@@ -12,7 +12,7 @@ Gem::Specification.new do |s|
12
12
  s.summary = %q{A high-level wrapper around the zookeeper driver}
13
13
  s.description = s.summary + "\n"
14
14
 
15
- s.add_runtime_dependency 'zookeeper', '~> 1.2.2'
15
+ s.add_runtime_dependency 'zookeeper', '~> 1.2.3'
16
16
  s.add_runtime_dependency 'backports', '~> 2.5.1'
17
17
  s.add_runtime_dependency 'logging', '~> 1.7.2'
18
18
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: zk
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 1
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 5
9
- - 0
10
- version: 1.5.0
9
+ - 1
10
+ version: 1.5.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jonathan D. Simms
@@ -16,7 +16,7 @@ autorequire:
16
16
  bindir: bin
17
17
  cert_chain: []
18
18
 
19
- date: 2012-05-22 00:00:00 Z
19
+ date: 2012-05-23 00:00:00 Z
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  name: zookeeper
@@ -26,12 +26,12 @@ dependencies:
26
26
  requirements:
27
27
  - - ~>
28
28
  - !ruby/object:Gem::Version
29
- hash: 27
29
+ hash: 25
30
30
  segments:
31
31
  - 1
32
32
  - 2
33
- - 2
34
- version: 1.2.2
33
+ - 3
34
+ version: 1.2.3
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency