zeevex_cluster 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. data/.gitignore +5 -0
  2. data/Gemfile +22 -0
  3. data/Rakefile +44 -0
  4. data/doc/BUGS-zookeeper.txt +60 -0
  5. data/doc/TODO.txt +85 -0
  6. data/lib/zeevex_cluster/base.rb +95 -0
  7. data/lib/zeevex_cluster/coordinator/base_key_val_store.rb +85 -0
  8. data/lib/zeevex_cluster/coordinator/memcached.rb +118 -0
  9. data/lib/zeevex_cluster/coordinator/mysql.rb +396 -0
  10. data/lib/zeevex_cluster/coordinator/redis.rb +101 -0
  11. data/lib/zeevex_cluster/coordinator.rb +29 -0
  12. data/lib/zeevex_cluster/election.rb +102 -0
  13. data/lib/zeevex_cluster/message.rb +52 -0
  14. data/lib/zeevex_cluster/nil_logger.rb +7 -0
  15. data/lib/zeevex_cluster/serializer/json_hash.rb +67 -0
  16. data/lib/zeevex_cluster/serializer.rb +27 -0
  17. data/lib/zeevex_cluster/static.rb +67 -0
  18. data/lib/zeevex_cluster/strategy/base.rb +92 -0
  19. data/lib/zeevex_cluster/strategy/cas.rb +403 -0
  20. data/lib/zeevex_cluster/strategy/static.rb +55 -0
  21. data/lib/zeevex_cluster/strategy/unclustered.rb +9 -0
  22. data/lib/zeevex_cluster/strategy/zookeeper.rb +163 -0
  23. data/lib/zeevex_cluster/strategy.rb +12 -0
  24. data/lib/zeevex_cluster/synchronized.rb +46 -0
  25. data/lib/zeevex_cluster/unclustered.rb +11 -0
  26. data/lib/zeevex_cluster/util/logging.rb +7 -0
  27. data/lib/zeevex_cluster/util.rb +15 -0
  28. data/lib/zeevex_cluster/version.rb +3 -0
  29. data/lib/zeevex_cluster.rb +29 -0
  30. data/script/election.rb +46 -0
  31. data/script/memc.rb +13 -0
  32. data/script/mysql.rb +25 -0
  33. data/script/redis.rb +14 -0
  34. data/script/repl +10 -0
  35. data/script/repl.rb +8 -0
  36. data/script/ser.rb +11 -0
  37. data/script/static.rb +34 -0
  38. data/script/testall +2 -0
  39. data/spec/cluster_static_spec.rb +49 -0
  40. data/spec/cluster_unclustered_spec.rb +32 -0
  41. data/spec/coordinator/coordinator_memcached_spec.rb +102 -0
  42. data/spec/message_spec.rb +38 -0
  43. data/spec/serializer/json_hash_spec.rb +68 -0
  44. data/spec/shared_master_examples.rb +20 -0
  45. data/spec/shared_member_examples.rb +39 -0
  46. data/spec/shared_non_master_examples.rb +8 -0
  47. data/spec/spec_helper.rb +14 -0
  48. data/zeevex_cluster.gemspec +43 -0
  49. metadata +298 -0
@@ -0,0 +1,67 @@
1
+ require 'json'
2
+ require 'json/add/core'
3
+ require 'date'
4
+
5
+ require 'zeevex_cluster/serializer'
6
+
7
+ class ZeevexCluster::Serializer::JsonHash
8
+ def new(options = {})
9
+ @options = options
10
+ end
11
+
12
+ def is_time_field(key, val = nil)
13
+ key.to_s.match(/(_at|timestamp)$/)
14
+ end
15
+
16
+ def untranslate_hash(parsed)
17
+ raise ArgumentError, 'Must be a hash' unless parsed.is_a?(Hash)
18
+ if parsed.count == 1 && (parsed.has_key?('$primitive') || parsed.has_key?(:$primitive))
19
+ return parsed.values.first
20
+ end
21
+ hash = {}
22
+ parsed.each do |(key, val)|
23
+ # val = Time.at(val).utc if is_time_field(key, val)
24
+ hash[key.to_sym] = val
25
+ end
26
+ hash
27
+ end
28
+
29
+ def translate_hash(hash)
30
+ raise ArgumentError, 'Must be a hash' unless hash.is_a?(Hash)
31
+ hash = hash.clone
32
+ #hash.keys.each do |key|
33
+ # hash[key] = hash[key].utc.to_f if is_time_field(key, hash[key])
34
+ #end
35
+ hash
36
+ end
37
+
38
+ def deserialize(str)
39
+ parsed = JSON.parse(str, :symbolize_names => true,
40
+ :object_class => IndifferentHash,
41
+ :create_additions => true)
42
+ case parsed
43
+ when Hash then untranslate_hash(parsed)
44
+ else parsed
45
+ end
46
+ end
47
+
48
+ def serialize(obj)
49
+ obj = case obj
50
+ when Hash then translate_hash(obj)
51
+ when Numeric, String, TrueClass, FalseClass, NilClass then
52
+ {'$primitive' => obj}
53
+ else obj
54
+ end
55
+ JSON.dump(obj)
56
+ end
57
+
58
+ class IndifferentHash < Hash
59
+ def fetch(key, defaultval = nil)
60
+ super(key.to_sym, defaultval)
61
+ end
62
+
63
+ def [](key)
64
+ super(key.to_sym)
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,27 @@
1
+
2
+ module ZeevexCluster
3
+ module Serializer
4
+ def included(base)
5
+ base.extend(ClassMethods)
6
+ base.class_eval { include ZeevexCluster::Serializer::InstanceMethods }
7
+ end
8
+
9
+ module InstanceMethods
10
+ def to_json
11
+ serializer.serialize(self)
12
+ end
13
+
14
+ def serializer
15
+ @_serializer ||= ZeevexCluster::Serializer::JsonHash.new
16
+ end
17
+ end
18
+
19
+ module ClassMethods
20
+ def from_json(string)
21
+ serializer.deserialize(self)
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ require 'zeevex_cluster/serializer/json_hash'
@@ -0,0 +1,67 @@
1
+ module ZeevexCluster
2
+ class Static < Base
3
+ def initialize(options = {})
4
+ super
5
+ raise ArgumentError, "Must supply :master_nodename" unless @options[:master_nodename]
6
+ if @options[:master_nodename] == :self
7
+ @options[:master_nodename] = nodename
8
+ end
9
+ @member = false
10
+ after_initialize
11
+ end
12
+
13
+ ##
14
+ ## joining is a no-op for ol' singleton here
15
+ ##
16
+ def join
17
+ @member = true
18
+ end
19
+
20
+ ##
21
+ ## leaving, too
22
+ ##
23
+ def leave
24
+ @member = false
25
+ true
26
+ end
27
+
28
+ def member?
29
+ !! @member
30
+ end
31
+
32
+ ##
33
+ ## Are we the chosen one?
34
+ ##
35
+ def master?
36
+ member? && nodename == options[:master_nodename]
37
+ end
38
+
39
+ ##
40
+ ## Nobody can change the master
41
+ ##
42
+ def make_master!
43
+ raise ClusterActionFailed, "Can not change master" unless master?
44
+ true
45
+ end
46
+
47
+ ##
48
+ ## Resign from mastership; returns false if this is the only node.
49
+ ##
50
+ ## No-op for now.
51
+ ##
52
+ def resign!
53
+ raise NotMaster unless master?
54
+
55
+ # master is currently fixed, so we can't resign
56
+ raise ClusterPolicyViolation, "Current master cannot resign in this implementation."
57
+ end
58
+
59
+ ##
60
+ ## Return name of master node
61
+ ##
62
+ def master
63
+ options[:master_nodename]
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,92 @@
1
+ require 'zeevex_cluster/strategy'
2
+ require 'hookem'
3
+
4
+ # require 'zeevex_threadsafe/thread_safer'
5
+
6
+ module ZeevexCluster::Strategy
7
+ class Base
8
+ include ZeevexCluster::Util::Logging
9
+ include Hookem
10
+ # include ZeevexThreadsafe::ThreadSafer
11
+
12
+ def initialize(options = {})
13
+ @options = options
14
+ @namespace = options[:namespace]
15
+ @cluster_name = options[:cluster_name]
16
+ @nodename = options[:nodename] || Socket.gethostname
17
+ @hooks = {}
18
+ @logger = options[:logger]
19
+
20
+ @state = :stopped
21
+
22
+ reset_state_vars
23
+
24
+ _initialize_hook_module
25
+
26
+ if options[:hooks]
27
+ add_hooks options[:hooks]
28
+ end
29
+ end
30
+
31
+ def has_master?
32
+ !! @current_master
33
+ end
34
+
35
+ def am_i_master?
36
+ @my_cluster_status == :master
37
+ end
38
+
39
+ def state
40
+ @state
41
+ end
42
+
43
+ def online?
44
+ @cluster_status == :online
45
+ end
46
+
47
+ def member?
48
+ online?
49
+ end
50
+
51
+ def started?
52
+ @state == :started
53
+ end
54
+
55
+ def stopped?
56
+ @state == :stopped
57
+ end
58
+
59
+ protected
60
+
61
+ def change_my_status(status, attrs = {})
62
+ return if status == @my_cluster_status
63
+
64
+ old_status = @my_cluster_status
65
+ @my_cluster_status = status
66
+ run_hook :status_change, status, old_status, attrs
67
+ end
68
+
69
+ def change_master_status(status, attrs = {})
70
+ return if status == @master_status
71
+
72
+ old_status, @master_status = @master_status, status
73
+ run_hook :master_status_change, status, old_status, attrs
74
+ end
75
+
76
+ def change_cluster_status(status, attrs = {})
77
+ return if status == @cluster_status
78
+
79
+ old_status, @cluster_status = @cluster_status, status
80
+ run_hook :cluster_status_change, status, old_status, attrs
81
+ end
82
+
83
+ def reset_state_vars
84
+ @state = :stopped
85
+ @my_cluster_status = :nonmember
86
+ @master_status = :none
87
+ @cluster_status = :offline
88
+ end
89
+
90
+ # make_thread_safe :change_my_status, :change_master_status, :change_cluster_status
91
+ end
92
+ end
@@ -0,0 +1,403 @@
1
+ require 'zeevex_cluster/strategy/base'
2
+ require 'socket'
3
+ require 'logger'
4
+
5
+ class ZeevexCluster::Strategy::Cas < ZeevexCluster::Strategy::Base
6
+
7
+ attr_accessor :stale_time, :update_period, :server, :nodename, :cluster_name
8
+
9
+ SUSPECT_MISSED_UPDATE_COUNT = 3
10
+ INAUGURATION_UPDATE_DELAY = 2
11
+
12
+ def initialize(options = {})
13
+ super
14
+ @stale_time = options.fetch(:stale_time, 40)
15
+ @update_period = options.fetch(:update_period, 10)
16
+
17
+ unless (@server = options[:coordinator])
18
+ coordinator_type = options[:coordinator_type] || 'memcached'
19
+ @server = ZeevexCluster::Coordinator.create(coordinator_type,
20
+ {:server => options[:server],
21
+ :port => options[:port],
22
+ :expiration => @stale_time * 4}.merge(options[:coordinator_options] || {}))
23
+ end
24
+ unless @server.is_a?(ZeevexCluster::Synchronized)
25
+ @server = ZeevexCluster.Synchronized(@server)
26
+ end
27
+ end
28
+
29
+ def do_i_hold_lock?
30
+ @my_cluster_status == :master || @my_cluster_status == :master_elect
31
+ end
32
+
33
+ def master_node
34
+ @current_master
35
+ end
36
+
37
+ def master_nodename
38
+ @current_master && @current_master[:nodename]
39
+ end
40
+
41
+
42
+
43
+ class StopException < StandardError; end
44
+
45
+ def start
46
+ raise "Already started" if @thread || @state == :started
47
+ @start_time = time_now
48
+ @state = :started
49
+ @locked_at = nil
50
+ @thread = Thread.new do
51
+ begin
52
+ change_my_status :member
53
+ spin
54
+ rescue
55
+ logger.warn "rescued from spin: #{$!.inspect}\n#{$!.backtrace.join("\n")}"
56
+ ensure
57
+ logger.debug "spin over"
58
+ @state = :stopped
59
+ end
60
+ end
61
+ end
62
+
63
+ def stop
64
+ case @state
65
+ when :stop_requested
66
+ when :stopped
67
+ when :started
68
+ @state = :stop_requested
69
+ @thread.raise(StopException.new 'stop')
70
+ else
71
+ raise "Bad state: #{@state}"
72
+ end
73
+ @thread.join
74
+ @thread = nil
75
+ change_my_status :nonmember
76
+ reset_state_vars
77
+ end
78
+
79
+ def resign(delay = nil)
80
+ # unresign
81
+ if delay == 0
82
+ @resign_until = nil
83
+ campaign
84
+ else
85
+ @resign_until = time_now + (delay || [@update_period*6, @stale_time].min)
86
+ current = nil
87
+ server.cas(key) do |val|
88
+ current = val
89
+ if is_me?(val)
90
+ my_token.merge(:timestamp => time_now - 2*@stale_time)
91
+ else
92
+ raise ZeevexCluster::Coordinator::DontChange
93
+ end
94
+ end
95
+ failed_lock(my_token, current)
96
+ end
97
+ rescue ZeevexCluster::Coordinator::ConnectionError
98
+ failed_lock(my_token, nil)
99
+ end
100
+
101
+
102
+ def steal_election!
103
+ logger.warn "Stealing election"
104
+ @resign_until = nil
105
+ me = my_token
106
+ server.set(key, me)
107
+ got_lock(me)
108
+ true
109
+ rescue ZeevexCluster::Coordinator::ConnectionError
110
+ false
111
+ end
112
+
113
+ def members
114
+ stale_point = time_now - @stale_time
115
+ list = server.get(key('members')) || make_member_list
116
+ members = []
117
+ list[:members].values.each do |v|
118
+ members << v[:nodename] unless v[:timestamp].utc < stale_point
119
+ end
120
+ members
121
+ end
122
+
123
+ protected
124
+
125
+ def spin
126
+ logger.debug "spin started"
127
+ @state = :started
128
+ run_hook :started
129
+ run_hook :joined_cluster, cluster_name
130
+ while @state == :started
131
+ begin
132
+ register
133
+ campaign
134
+ if @state == :started
135
+ begin
136
+ sleep [@update_period - 1, 1].max
137
+ rescue StopException
138
+ logger.debug 'Stopping on stop exception'
139
+ end
140
+ end
141
+ rescue ZeevexCluster::Coordinator::ConnectionError
142
+ connection_error
143
+ end
144
+ end
145
+ ensure
146
+ ignoring_connection_error { resign } if do_i_hold_lock?
147
+ ignoring_connection_error { unregister }
148
+ @state = :stopped
149
+ run_hook :left_cluster, cluster_name
150
+ change_cluster_status :offline
151
+ run_hook :stopped
152
+ end
153
+
154
+ def ignoring_connection_error
155
+ begin
156
+ yield
157
+ rescue ZeevexCluster::Coordinator::ConnectionError
158
+ logger.debug 'got connection error in ignoring_connection_error'
159
+ $!
160
+ end
161
+ end
162
+
163
+ def connection_error
164
+ run_hook :connection_error
165
+ change_cluster_status :offline
166
+ end
167
+
168
+ def my_token
169
+ now = time_now
170
+ {:nodename => nodename,
171
+ :joined_at => @start_time,
172
+ :locked_at => @locked_at || now,
173
+ :timestamp => now}
174
+ end
175
+
176
+ def key(subkey = 'throne')
177
+ (@options[:cluster_key] || cluster_name) + ":" + subkey
178
+ end
179
+
180
+ def is_me?(token)
181
+ token && token.is_a?(Hash) && token[:nodename] == nodename
182
+ end
183
+
184
+
185
+ def got_lock(token)
186
+ unless @locked_at
187
+ @locked_at = token[:timestamp]
188
+ token = my_token
189
+ run_hook :election_won
190
+ end
191
+ @my_master_token = token
192
+ if qualifies_for_master?(token)
193
+ change_my_status :master
194
+ if @current_master && is_me?(@current_master)
195
+ run_hook :reelected
196
+ else
197
+ run_hook :became_master
198
+ end
199
+ change_master_status :good
200
+ @current_master = token
201
+ else
202
+ change_my_status :master_elect
203
+ change_master_status :waiting_for_inauguration
204
+ run_hook :waiting_for_inauguration
205
+ @current_master = nil
206
+ end
207
+ end
208
+
209
+ def failed_lock(me, winner)
210
+ @locked_at = nil
211
+
212
+ if qualifies_for_master?(winner)
213
+ @current_master = winner
214
+ change_my_status :member
215
+ change_master_status :good
216
+ elsif ! token_invalid?(winner)
217
+ @current_master = winner
218
+ change_master_status :waiting_for_inauguration
219
+ else
220
+ @current_master = nil
221
+ change_master_status :none
222
+ end
223
+ run_hook :election_lost, @current_master
224
+
225
+ if @my_cluster_status == :master
226
+ @my_master_token = nil
227
+ change_my_status :lame_duck
228
+ run_hook :lame_duck
229
+ else
230
+ change_my_status :member
231
+ end
232
+ end
233
+
234
+ #
235
+ # Must have held lock for INAUGURATION_UPDATE_DELAY update periods
236
+ #
237
+ def qualifies_for_master?(token)
238
+ now = time_now()
239
+ ! token_invalid?(token) and
240
+ token[:timestamp] > (now - @stale_time) and
241
+ token[:locked_at] <= (now - INAUGURATION_UPDATE_DELAY * @update_period)
242
+ end
243
+
244
+ def time_now
245
+ Time.now.utc
246
+ end
247
+
248
+ def token_invalid?(token)
249
+ now = time_now
250
+ !token || !token.is_a?(Hash) || !token[:timestamp] ||
251
+ ! token[:locked_at] || ! token[:nodename] ||
252
+ token[:timestamp].utc < (now - @stale_time)
253
+ end
254
+
255
+ def resigned?
256
+ @resign_until && @resign_until > time_now
257
+ end
258
+
259
+ def campaign
260
+ me = my_token
261
+
262
+ act_resigned = resigned?
263
+ compete_for_token = !act_resigned
264
+
265
+ hook = nil
266
+ current = nil
267
+ res = server.cas(key) do |val|
268
+ current = val
269
+ if is_me?(val) && !token_invalid?(val) && compete_for_token
270
+ me
271
+ elsif token_invalid?(val) && compete_for_token
272
+ if is_me?(val)
273
+ logger.info "My old token is invalid, refreshing: #{val.inspect}"
274
+ else
275
+ logger.info "CAS: master invalid, stealing: #{val.inspect}"
276
+ # it's necessary to run this outside of the CAS block to be sure we won
277
+ hook = :deposed_master
278
+ end
279
+ me
280
+ else
281
+ run_hook :suspect_master if @master_status != :none && master_suspect?(val)
282
+ raise ZeevexCluster::Coordinator::DontChange
283
+ end
284
+ end
285
+
286
+ # if we got a result, we must be online
287
+ change_cluster_status :online
288
+
289
+ if act_resigned
290
+ run_hook :staying_resigned
291
+ failed_lock(me, current)
292
+ return
293
+ else
294
+ @resign_until = nil
295
+ end
296
+
297
+ if res
298
+ run_hook hook if hook && res
299
+ got_lock(me)
300
+ return true
301
+ elsif res.nil?
302
+ failed_lock(me, nil)
303
+ if server.add(key, me)
304
+ logger.debug 'CAS: added frist post!'
305
+ got_lock(me)
306
+ return true
307
+ end
308
+ end
309
+
310
+ # CAS succeeded so we're the boss
311
+ if res
312
+ got_lock(me)
313
+ true
314
+
315
+ # didn't get it, somebody else must be boss
316
+ else
317
+ failed_lock(me, current)
318
+ false
319
+ end
320
+ rescue ZeevexCluster::Coordinator::ConnectionError
321
+ connection_error
322
+ failed_lock(me, current)
323
+ false
324
+ end
325
+
326
+ def make_member_list
327
+ {:members => {@nodename => my_token}}
328
+ end
329
+
330
+ def register
331
+ me = my_token
332
+
333
+ self_key = self.key('member:' + @nodename)
334
+ memberlist_key = self.key('members')
335
+ server.set(self_key, me) or raise "failed to set #{self_key}"
336
+
337
+ res = false
338
+ retries = 5
339
+
340
+ while retries > 0 && res == false
341
+ stale_point = time_now - @stale_time
342
+ res = server.cas(memberlist_key) do |hash|
343
+ hash[:members] ||= {}
344
+ hash[:members].keys.each do |key|
345
+ hash[:members].delete(key) if hash[:members][key][:timestamp] < stale_point
346
+ end
347
+ hash[:members][@nodename] = me
348
+ hash
349
+ end
350
+ retries -= 1
351
+ end
352
+
353
+ if res.nil?
354
+ server.add(memberlist_key, {:members => {@nodename => me}})
355
+ end
356
+
357
+ true
358
+ rescue ZeevexCluster::Coordinator::ConnectionError
359
+ connection_error
360
+ false
361
+ end
362
+
363
+ def unregister
364
+ me = my_token
365
+
366
+ self_key = self.key('member:' + @nodename)
367
+ memberlist_key = self.key('members')
368
+ server.delete(self_key)
369
+
370
+ res = false
371
+ retries = 5
372
+
373
+ while retries > 0 && res == false
374
+ res = server.cas(memberlist_key) do |hash|
375
+ hash[:members] ||= {}
376
+ hash[:members].delete @nodename
377
+ hash
378
+ end
379
+ retries -= 1
380
+ end
381
+
382
+ true
383
+ rescue ZeevexCluster::Coordinator::ConnectionError
384
+ connection_error
385
+ false
386
+ end
387
+
388
+ #
389
+ # has the master gone without updating suspiciously long?
390
+ #
391
+ def master_suspect?(token)
392
+ time_now - token[:timestamp] > SUSPECT_MISSED_UPDATE_COUNT * @update_period
393
+ end
394
+
395
+ def reset_state_vars
396
+ super
397
+
398
+ @resign_until = nil
399
+ @my_master_token = nil
400
+ @current_master = nil
401
+ @thread = nil
402
+ end
403
+ end
@@ -0,0 +1,55 @@
1
+ require 'zeevex_cluster/strategy/base'
2
+
3
+ module ZeevexCluster::Strategy
4
+ class Static < Base
5
+ def initialize(options = {})
6
+ super
7
+ @master_nodename = options[:master_nodename] || raise(ArgumentError, 'Must specify :master_nodename')
8
+ @members = options[:members]
9
+ end
10
+
11
+ def start
12
+ @state = :started
13
+ change_cluster_status :online
14
+ if @nodename == @master_nodename
15
+ change_my_status :master
16
+ change_master_status :good
17
+ else
18
+ change_my_status :member
19
+ change_master_status :unknown
20
+ end
21
+ end
22
+
23
+ def stop
24
+ @state = :stopped
25
+ change_my_status :nonmember
26
+ change_master_status :unknown
27
+ change_cluster_status :offline
28
+ end
29
+
30
+ def am_i_master?
31
+ @state == :started && @my_cluster_status == :master
32
+ end
33
+
34
+ # FIXME: this is CAS-specific
35
+ def master_node
36
+ {:nodename => @master_nodename}
37
+ end
38
+
39
+ def members
40
+ @members || [@master_nodename, @nodename].select {|x| x != "none" }.uniq
41
+ end
42
+
43
+ def resign(delay = nil)
44
+ # master is currently fixed, so we can't resign
45
+ logger.warn 'Current master cannot resign in this implementation.'
46
+ false
47
+ end
48
+
49
+ def steal_election!
50
+ raise ClusterActionFailed, 'Can not change master' unless am_i_master?
51
+ true
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,9 @@
1
+ module ZeevexCluster::Strategy
2
+ class Unclustered < Static
3
+ def initialize(options)
4
+ options[:master_nodename] = options[:nodename]
5
+ super
6
+ @members = [@nodename]
7
+ end
8
+ end
9
+ end