zeevex_cluster 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. data/.gitignore +5 -0
  2. data/Gemfile +22 -0
  3. data/Rakefile +44 -0
  4. data/doc/BUGS-zookeeper.txt +60 -0
  5. data/doc/TODO.txt +85 -0
  6. data/lib/zeevex_cluster/base.rb +95 -0
  7. data/lib/zeevex_cluster/coordinator/base_key_val_store.rb +85 -0
  8. data/lib/zeevex_cluster/coordinator/memcached.rb +118 -0
  9. data/lib/zeevex_cluster/coordinator/mysql.rb +396 -0
  10. data/lib/zeevex_cluster/coordinator/redis.rb +101 -0
  11. data/lib/zeevex_cluster/coordinator.rb +29 -0
  12. data/lib/zeevex_cluster/election.rb +102 -0
  13. data/lib/zeevex_cluster/message.rb +52 -0
  14. data/lib/zeevex_cluster/nil_logger.rb +7 -0
  15. data/lib/zeevex_cluster/serializer/json_hash.rb +67 -0
  16. data/lib/zeevex_cluster/serializer.rb +27 -0
  17. data/lib/zeevex_cluster/static.rb +67 -0
  18. data/lib/zeevex_cluster/strategy/base.rb +92 -0
  19. data/lib/zeevex_cluster/strategy/cas.rb +403 -0
  20. data/lib/zeevex_cluster/strategy/static.rb +55 -0
  21. data/lib/zeevex_cluster/strategy/unclustered.rb +9 -0
  22. data/lib/zeevex_cluster/strategy/zookeeper.rb +163 -0
  23. data/lib/zeevex_cluster/strategy.rb +12 -0
  24. data/lib/zeevex_cluster/synchronized.rb +46 -0
  25. data/lib/zeevex_cluster/unclustered.rb +11 -0
  26. data/lib/zeevex_cluster/util/logging.rb +7 -0
  27. data/lib/zeevex_cluster/util.rb +15 -0
  28. data/lib/zeevex_cluster/version.rb +3 -0
  29. data/lib/zeevex_cluster.rb +29 -0
  30. data/script/election.rb +46 -0
  31. data/script/memc.rb +13 -0
  32. data/script/mysql.rb +25 -0
  33. data/script/redis.rb +14 -0
  34. data/script/repl +10 -0
  35. data/script/repl.rb +8 -0
  36. data/script/ser.rb +11 -0
  37. data/script/static.rb +34 -0
  38. data/script/testall +2 -0
  39. data/spec/cluster_static_spec.rb +49 -0
  40. data/spec/cluster_unclustered_spec.rb +32 -0
  41. data/spec/coordinator/coordinator_memcached_spec.rb +102 -0
  42. data/spec/message_spec.rb +38 -0
  43. data/spec/serializer/json_hash_spec.rb +68 -0
  44. data/spec/shared_master_examples.rb +20 -0
  45. data/spec/shared_member_examples.rb +39 -0
  46. data/spec/shared_non_master_examples.rb +8 -0
  47. data/spec/spec_helper.rb +14 -0
  48. data/zeevex_cluster.gemspec +43 -0
  49. metadata +298 -0
@@ -0,0 +1,67 @@
1
+ require 'json'
2
+ require 'json/add/core'
3
+ require 'date'
4
+
5
+ require 'zeevex_cluster/serializer'
6
+
7
+ class ZeevexCluster::Serializer::JsonHash
8
+ def new(options = {})
9
+ @options = options
10
+ end
11
+
12
+ def is_time_field(key, val = nil)
13
+ key.to_s.match(/(_at|timestamp)$/)
14
+ end
15
+
16
+ def untranslate_hash(parsed)
17
+ raise ArgumentError, 'Must be a hash' unless parsed.is_a?(Hash)
18
+ if parsed.count == 1 && (parsed.has_key?('$primitive') || parsed.has_key?(:$primitive))
19
+ return parsed.values.first
20
+ end
21
+ hash = {}
22
+ parsed.each do |(key, val)|
23
+ # val = Time.at(val).utc if is_time_field(key, val)
24
+ hash[key.to_sym] = val
25
+ end
26
+ hash
27
+ end
28
+
29
+ def translate_hash(hash)
30
+ raise ArgumentError, 'Must be a hash' unless hash.is_a?(Hash)
31
+ hash = hash.clone
32
+ #hash.keys.each do |key|
33
+ # hash[key] = hash[key].utc.to_f if is_time_field(key, hash[key])
34
+ #end
35
+ hash
36
+ end
37
+
38
+ def deserialize(str)
39
+ parsed = JSON.parse(str, :symbolize_names => true,
40
+ :object_class => IndifferentHash,
41
+ :create_additions => true)
42
+ case parsed
43
+ when Hash then untranslate_hash(parsed)
44
+ else parsed
45
+ end
46
+ end
47
+
48
+ def serialize(obj)
49
+ obj = case obj
50
+ when Hash then translate_hash(obj)
51
+ when Numeric, String, TrueClass, FalseClass, NilClass then
52
+ {'$primitive' => obj}
53
+ else obj
54
+ end
55
+ JSON.dump(obj)
56
+ end
57
+
58
+ class IndifferentHash < Hash
59
+ def fetch(key, defaultval = nil)
60
+ super(key.to_sym, defaultval)
61
+ end
62
+
63
+ def [](key)
64
+ super(key.to_sym)
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,27 @@
1
+
2
+ module ZeevexCluster
3
+ module Serializer
4
+ def included(base)
5
+ base.extend(ClassMethods)
6
+ base.class_eval { include ZeevexCluster::Serializer::InstanceMethods }
7
+ end
8
+
9
+ module InstanceMethods
10
+ def to_json
11
+ serializer.serialize(self)
12
+ end
13
+
14
+ def serializer
15
+ @_serializer ||= ZeevexCluster::Serializer::JsonHash.new
16
+ end
17
+ end
18
+
19
+ module ClassMethods
20
+ def from_json(string)
21
+ serializer.deserialize(self)
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ require 'zeevex_cluster/serializer/json_hash'
@@ -0,0 +1,67 @@
1
+ module ZeevexCluster
2
+ class Static < Base
3
+ def initialize(options = {})
4
+ super
5
+ raise ArgumentError, "Must supply :master_nodename" unless @options[:master_nodename]
6
+ if @options[:master_nodename] == :self
7
+ @options[:master_nodename] = nodename
8
+ end
9
+ @member = false
10
+ after_initialize
11
+ end
12
+
13
+ ##
14
+ ## joining is a no-op for ol' singleton here
15
+ ##
16
+ def join
17
+ @member = true
18
+ end
19
+
20
+ ##
21
+ ## leaving, too
22
+ ##
23
+ def leave
24
+ @member = false
25
+ true
26
+ end
27
+
28
+ def member?
29
+ !! @member
30
+ end
31
+
32
+ ##
33
+ ## Are we the chosen one?
34
+ ##
35
+ def master?
36
+ member? && nodename == options[:master_nodename]
37
+ end
38
+
39
+ ##
40
+ ## Nobody can change the master
41
+ ##
42
+ def make_master!
43
+ raise ClusterActionFailed, "Can not change master" unless master?
44
+ true
45
+ end
46
+
47
+ ##
48
+ ## Resign from mastership; returns false if this is the only node.
49
+ ##
50
+ ## No-op for now.
51
+ ##
52
+ def resign!
53
+ raise NotMaster unless master?
54
+
55
+ # master is currently fixed, so we can't resign
56
+ raise ClusterPolicyViolation, "Current master cannot resign in this implementation."
57
+ end
58
+
59
+ ##
60
+ ## Return name of master node
61
+ ##
62
+ def master
63
+ options[:master_nodename]
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,92 @@
1
+ require 'zeevex_cluster/strategy'
2
+ require 'hookem'
3
+
4
+ # require 'zeevex_threadsafe/thread_safer'
5
+
6
+ module ZeevexCluster::Strategy
7
+ class Base
8
+ include ZeevexCluster::Util::Logging
9
+ include Hookem
10
+ # include ZeevexThreadsafe::ThreadSafer
11
+
12
+ def initialize(options = {})
13
+ @options = options
14
+ @namespace = options[:namespace]
15
+ @cluster_name = options[:cluster_name]
16
+ @nodename = options[:nodename] || Socket.gethostname
17
+ @hooks = {}
18
+ @logger = options[:logger]
19
+
20
+ @state = :stopped
21
+
22
+ reset_state_vars
23
+
24
+ _initialize_hook_module
25
+
26
+ if options[:hooks]
27
+ add_hooks options[:hooks]
28
+ end
29
+ end
30
+
31
+ def has_master?
32
+ !! @current_master
33
+ end
34
+
35
+ def am_i_master?
36
+ @my_cluster_status == :master
37
+ end
38
+
39
+ def state
40
+ @state
41
+ end
42
+
43
+ def online?
44
+ @cluster_status == :online
45
+ end
46
+
47
+ def member?
48
+ online?
49
+ end
50
+
51
+ def started?
52
+ @state == :started
53
+ end
54
+
55
+ def stopped?
56
+ @state == :stopped
57
+ end
58
+
59
+ protected
60
+
61
+ def change_my_status(status, attrs = {})
62
+ return if status == @my_cluster_status
63
+
64
+ old_status = @my_cluster_status
65
+ @my_cluster_status = status
66
+ run_hook :status_change, status, old_status, attrs
67
+ end
68
+
69
+ def change_master_status(status, attrs = {})
70
+ return if status == @master_status
71
+
72
+ old_status, @master_status = @master_status, status
73
+ run_hook :master_status_change, status, old_status, attrs
74
+ end
75
+
76
+ def change_cluster_status(status, attrs = {})
77
+ return if status == @cluster_status
78
+
79
+ old_status, @cluster_status = @cluster_status, status
80
+ run_hook :cluster_status_change, status, old_status, attrs
81
+ end
82
+
83
+ def reset_state_vars
84
+ @state = :stopped
85
+ @my_cluster_status = :nonmember
86
+ @master_status = :none
87
+ @cluster_status = :offline
88
+ end
89
+
90
+ # make_thread_safe :change_my_status, :change_master_status, :change_cluster_status
91
+ end
92
+ end
@@ -0,0 +1,403 @@
1
+ require 'zeevex_cluster/strategy/base'
2
+ require 'socket'
3
+ require 'logger'
4
+
5
+ class ZeevexCluster::Strategy::Cas < ZeevexCluster::Strategy::Base
6
+
7
+ attr_accessor :stale_time, :update_period, :server, :nodename, :cluster_name
8
+
9
+ SUSPECT_MISSED_UPDATE_COUNT = 3
10
+ INAUGURATION_UPDATE_DELAY = 2
11
+
12
+ def initialize(options = {})
13
+ super
14
+ @stale_time = options.fetch(:stale_time, 40)
15
+ @update_period = options.fetch(:update_period, 10)
16
+
17
+ unless (@server = options[:coordinator])
18
+ coordinator_type = options[:coordinator_type] || 'memcached'
19
+ @server = ZeevexCluster::Coordinator.create(coordinator_type,
20
+ {:server => options[:server],
21
+ :port => options[:port],
22
+ :expiration => @stale_time * 4}.merge(options[:coordinator_options] || {}))
23
+ end
24
+ unless @server.is_a?(ZeevexCluster::Synchronized)
25
+ @server = ZeevexCluster.Synchronized(@server)
26
+ end
27
+ end
28
+
29
+ def do_i_hold_lock?
30
+ @my_cluster_status == :master || @my_cluster_status == :master_elect
31
+ end
32
+
33
+ def master_node
34
+ @current_master
35
+ end
36
+
37
+ def master_nodename
38
+ @current_master && @current_master[:nodename]
39
+ end
40
+
41
+
42
+
43
+ class StopException < StandardError; end
44
+
45
+ def start
46
+ raise "Already started" if @thread || @state == :started
47
+ @start_time = time_now
48
+ @state = :started
49
+ @locked_at = nil
50
+ @thread = Thread.new do
51
+ begin
52
+ change_my_status :member
53
+ spin
54
+ rescue
55
+ logger.warn "rescued from spin: #{$!.inspect}\n#{$!.backtrace.join("\n")}"
56
+ ensure
57
+ logger.debug "spin over"
58
+ @state = :stopped
59
+ end
60
+ end
61
+ end
62
+
63
+ def stop
64
+ case @state
65
+ when :stop_requested
66
+ when :stopped
67
+ when :started
68
+ @state = :stop_requested
69
+ @thread.raise(StopException.new 'stop')
70
+ else
71
+ raise "Bad state: #{@state}"
72
+ end
73
+ @thread.join
74
+ @thread = nil
75
+ change_my_status :nonmember
76
+ reset_state_vars
77
+ end
78
+
79
+ def resign(delay = nil)
80
+ # unresign
81
+ if delay == 0
82
+ @resign_until = nil
83
+ campaign
84
+ else
85
+ @resign_until = time_now + (delay || [@update_period*6, @stale_time].min)
86
+ current = nil
87
+ server.cas(key) do |val|
88
+ current = val
89
+ if is_me?(val)
90
+ my_token.merge(:timestamp => time_now - 2*@stale_time)
91
+ else
92
+ raise ZeevexCluster::Coordinator::DontChange
93
+ end
94
+ end
95
+ failed_lock(my_token, current)
96
+ end
97
+ rescue ZeevexCluster::Coordinator::ConnectionError
98
+ failed_lock(my_token, nil)
99
+ end
100
+
101
+
102
+ def steal_election!
103
+ logger.warn "Stealing election"
104
+ @resign_until = nil
105
+ me = my_token
106
+ server.set(key, me)
107
+ got_lock(me)
108
+ true
109
+ rescue ZeevexCluster::Coordinator::ConnectionError
110
+ false
111
+ end
112
+
113
+ def members
114
+ stale_point = time_now - @stale_time
115
+ list = server.get(key('members')) || make_member_list
116
+ members = []
117
+ list[:members].values.each do |v|
118
+ members << v[:nodename] unless v[:timestamp].utc < stale_point
119
+ end
120
+ members
121
+ end
122
+
123
+ protected
124
+
125
+ def spin
126
+ logger.debug "spin started"
127
+ @state = :started
128
+ run_hook :started
129
+ run_hook :joined_cluster, cluster_name
130
+ while @state == :started
131
+ begin
132
+ register
133
+ campaign
134
+ if @state == :started
135
+ begin
136
+ sleep [@update_period - 1, 1].max
137
+ rescue StopException
138
+ logger.debug 'Stopping on stop exception'
139
+ end
140
+ end
141
+ rescue ZeevexCluster::Coordinator::ConnectionError
142
+ connection_error
143
+ end
144
+ end
145
+ ensure
146
+ ignoring_connection_error { resign } if do_i_hold_lock?
147
+ ignoring_connection_error { unregister }
148
+ @state = :stopped
149
+ run_hook :left_cluster, cluster_name
150
+ change_cluster_status :offline
151
+ run_hook :stopped
152
+ end
153
+
154
+ def ignoring_connection_error
155
+ begin
156
+ yield
157
+ rescue ZeevexCluster::Coordinator::ConnectionError
158
+ logger.debug 'got connection error in ignoring_connection_error'
159
+ $!
160
+ end
161
+ end
162
+
163
+ def connection_error
164
+ run_hook :connection_error
165
+ change_cluster_status :offline
166
+ end
167
+
168
+ def my_token
169
+ now = time_now
170
+ {:nodename => nodename,
171
+ :joined_at => @start_time,
172
+ :locked_at => @locked_at || now,
173
+ :timestamp => now}
174
+ end
175
+
176
+ def key(subkey = 'throne')
177
+ (@options[:cluster_key] || cluster_name) + ":" + subkey
178
+ end
179
+
180
+ def is_me?(token)
181
+ token && token.is_a?(Hash) && token[:nodename] == nodename
182
+ end
183
+
184
+
185
+ def got_lock(token)
186
+ unless @locked_at
187
+ @locked_at = token[:timestamp]
188
+ token = my_token
189
+ run_hook :election_won
190
+ end
191
+ @my_master_token = token
192
+ if qualifies_for_master?(token)
193
+ change_my_status :master
194
+ if @current_master && is_me?(@current_master)
195
+ run_hook :reelected
196
+ else
197
+ run_hook :became_master
198
+ end
199
+ change_master_status :good
200
+ @current_master = token
201
+ else
202
+ change_my_status :master_elect
203
+ change_master_status :waiting_for_inauguration
204
+ run_hook :waiting_for_inauguration
205
+ @current_master = nil
206
+ end
207
+ end
208
+
209
+ def failed_lock(me, winner)
210
+ @locked_at = nil
211
+
212
+ if qualifies_for_master?(winner)
213
+ @current_master = winner
214
+ change_my_status :member
215
+ change_master_status :good
216
+ elsif ! token_invalid?(winner)
217
+ @current_master = winner
218
+ change_master_status :waiting_for_inauguration
219
+ else
220
+ @current_master = nil
221
+ change_master_status :none
222
+ end
223
+ run_hook :election_lost, @current_master
224
+
225
+ if @my_cluster_status == :master
226
+ @my_master_token = nil
227
+ change_my_status :lame_duck
228
+ run_hook :lame_duck
229
+ else
230
+ change_my_status :member
231
+ end
232
+ end
233
+
234
+ #
235
+ # Must have held lock for INAUGURATION_UPDATE_DELAY update periods
236
+ #
237
+ def qualifies_for_master?(token)
238
+ now = time_now()
239
+ ! token_invalid?(token) and
240
+ token[:timestamp] > (now - @stale_time) and
241
+ token[:locked_at] <= (now - INAUGURATION_UPDATE_DELAY * @update_period)
242
+ end
243
+
244
+ def time_now
245
+ Time.now.utc
246
+ end
247
+
248
+ def token_invalid?(token)
249
+ now = time_now
250
+ !token || !token.is_a?(Hash) || !token[:timestamp] ||
251
+ ! token[:locked_at] || ! token[:nodename] ||
252
+ token[:timestamp].utc < (now - @stale_time)
253
+ end
254
+
255
+ def resigned?
256
+ @resign_until && @resign_until > time_now
257
+ end
258
+
259
+ def campaign
260
+ me = my_token
261
+
262
+ act_resigned = resigned?
263
+ compete_for_token = !act_resigned
264
+
265
+ hook = nil
266
+ current = nil
267
+ res = server.cas(key) do |val|
268
+ current = val
269
+ if is_me?(val) && !token_invalid?(val) && compete_for_token
270
+ me
271
+ elsif token_invalid?(val) && compete_for_token
272
+ if is_me?(val)
273
+ logger.info "My old token is invalid, refreshing: #{val.inspect}"
274
+ else
275
+ logger.info "CAS: master invalid, stealing: #{val.inspect}"
276
+ # it's necessary to run this outside of the CAS block to be sure we won
277
+ hook = :deposed_master
278
+ end
279
+ me
280
+ else
281
+ run_hook :suspect_master if @master_status != :none && master_suspect?(val)
282
+ raise ZeevexCluster::Coordinator::DontChange
283
+ end
284
+ end
285
+
286
+ # if we got a result, we must be online
287
+ change_cluster_status :online
288
+
289
+ if act_resigned
290
+ run_hook :staying_resigned
291
+ failed_lock(me, current)
292
+ return
293
+ else
294
+ @resign_until = nil
295
+ end
296
+
297
+ if res
298
+ run_hook hook if hook && res
299
+ got_lock(me)
300
+ return true
301
+ elsif res.nil?
302
+ failed_lock(me, nil)
303
+ if server.add(key, me)
304
+ logger.debug 'CAS: added frist post!'
305
+ got_lock(me)
306
+ return true
307
+ end
308
+ end
309
+
310
+ # CAS succeeded so we're the boss
311
+ if res
312
+ got_lock(me)
313
+ true
314
+
315
+ # didn't get it, somebody else must be boss
316
+ else
317
+ failed_lock(me, current)
318
+ false
319
+ end
320
+ rescue ZeevexCluster::Coordinator::ConnectionError
321
+ connection_error
322
+ failed_lock(me, current)
323
+ false
324
+ end
325
+
326
+ def make_member_list
327
+ {:members => {@nodename => my_token}}
328
+ end
329
+
330
+ def register
331
+ me = my_token
332
+
333
+ self_key = self.key('member:' + @nodename)
334
+ memberlist_key = self.key('members')
335
+ server.set(self_key, me) or raise "failed to set #{self_key}"
336
+
337
+ res = false
338
+ retries = 5
339
+
340
+ while retries > 0 && res == false
341
+ stale_point = time_now - @stale_time
342
+ res = server.cas(memberlist_key) do |hash|
343
+ hash[:members] ||= {}
344
+ hash[:members].keys.each do |key|
345
+ hash[:members].delete(key) if hash[:members][key][:timestamp] < stale_point
346
+ end
347
+ hash[:members][@nodename] = me
348
+ hash
349
+ end
350
+ retries -= 1
351
+ end
352
+
353
+ if res.nil?
354
+ server.add(memberlist_key, {:members => {@nodename => me}})
355
+ end
356
+
357
+ true
358
+ rescue ZeevexCluster::Coordinator::ConnectionError
359
+ connection_error
360
+ false
361
+ end
362
+
363
+ def unregister
364
+ me = my_token
365
+
366
+ self_key = self.key('member:' + @nodename)
367
+ memberlist_key = self.key('members')
368
+ server.delete(self_key)
369
+
370
+ res = false
371
+ retries = 5
372
+
373
+ while retries > 0 && res == false
374
+ res = server.cas(memberlist_key) do |hash|
375
+ hash[:members] ||= {}
376
+ hash[:members].delete @nodename
377
+ hash
378
+ end
379
+ retries -= 1
380
+ end
381
+
382
+ true
383
+ rescue ZeevexCluster::Coordinator::ConnectionError
384
+ connection_error
385
+ false
386
+ end
387
+
388
+ #
389
+ # has the master gone without updating suspiciously long?
390
+ #
391
+ def master_suspect?(token)
392
+ time_now - token[:timestamp] > SUSPECT_MISSED_UPDATE_COUNT * @update_period
393
+ end
394
+
395
+ def reset_state_vars
396
+ super
397
+
398
+ @resign_until = nil
399
+ @my_master_token = nil
400
+ @current_master = nil
401
+ @thread = nil
402
+ end
403
+ end
@@ -0,0 +1,55 @@
1
+ require 'zeevex_cluster/strategy/base'
2
+
3
+ module ZeevexCluster::Strategy
4
+ class Static < Base
5
+ def initialize(options = {})
6
+ super
7
+ @master_nodename = options[:master_nodename] || raise(ArgumentError, 'Must specify :master_nodename')
8
+ @members = options[:members]
9
+ end
10
+
11
+ def start
12
+ @state = :started
13
+ change_cluster_status :online
14
+ if @nodename == @master_nodename
15
+ change_my_status :master
16
+ change_master_status :good
17
+ else
18
+ change_my_status :member
19
+ change_master_status :unknown
20
+ end
21
+ end
22
+
23
+ def stop
24
+ @state = :stopped
25
+ change_my_status :nonmember
26
+ change_master_status :unknown
27
+ change_cluster_status :offline
28
+ end
29
+
30
+ def am_i_master?
31
+ @state == :started && @my_cluster_status == :master
32
+ end
33
+
34
+ # FIXME: this is CAS-specific
35
+ def master_node
36
+ {:nodename => @master_nodename}
37
+ end
38
+
39
+ def members
40
+ @members || [@master_nodename, @nodename].select {|x| x != "none" }.uniq
41
+ end
42
+
43
+ def resign(delay = nil)
44
+ # master is currently fixed, so we can't resign
45
+ logger.warn 'Current master cannot resign in this implementation.'
46
+ false
47
+ end
48
+
49
+ def steal_election!
50
+ raise ClusterActionFailed, 'Can not change master' unless am_i_master?
51
+ true
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,9 @@
1
+ module ZeevexCluster::Strategy
2
+ class Unclustered < Static
3
+ def initialize(options)
4
+ options[:master_nodename] = options[:nodename]
5
+ super
6
+ @members = [@nodename]
7
+ end
8
+ end
9
+ end