redis_failover 0.9.4 → 0.9.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -1
- data/Changes.md +5 -0
- data/README.md +1 -1
- data/lib/redis_failover/errors.rb +7 -0
- data/lib/redis_failover/node.rb +0 -1
- data/lib/redis_failover/node_manager.rb +108 -83
- data/lib/redis_failover/node_watcher.rb +2 -2
- data/lib/redis_failover/runner.rb +7 -9
- data/lib/redis_failover/version.rb +1 -1
- data/spec/node_manager_spec.rb +24 -0
- data/spec/support/node_manager_stub.rb +3 -2
- metadata +4 -4
data/.gitignore
CHANGED
data/Changes.md
CHANGED
data/README.md
CHANGED
@@ -155,7 +155,7 @@ redis_failover uses YARD for its API documentation. Refer to the generated [API
|
|
155
155
|
|
156
156
|
## Requirements
|
157
157
|
|
158
|
-
- redis_failover is actively tested against MRI 1.9.2/1.9.3 and JRuby 1.6.7 (1.9 mode only). Other rubies may work, although I don't actively test against them.
|
158
|
+
- redis_failover is actively tested against MRI 1.8.7/1.9.2/1.9.3 and JRuby 1.6.7 (1.9 mode only). Other rubies may work, although I don't actively test against them.
|
159
159
|
- redis_failover requires a ZooKeeper service cluster to ensure reliability and data consistency. ZooKeeper is very simple and easy to get up and running. Please refer to this [Quick ZooKeeper Guide](https://github.com/ryanlecompte/redis_failover/wiki/Quick-ZooKeeper-Guide) to get up and running quickly if you don't already have ZooKeeper as a part of your environment.
|
160
160
|
|
161
161
|
## Considerations
|
@@ -25,6 +25,13 @@ module RedisFailover
|
|
25
25
|
class NoMasterError < Error
|
26
26
|
end
|
27
27
|
|
28
|
+
# Raised when more than one master is found on startup.
|
29
|
+
class MultipleMastersError < Error
|
30
|
+
def initialize(nodes)
|
31
|
+
super("Multiple nodes with master role: #{nodes.map(&:to_s)}")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
28
35
|
# Raised when no slave is currently available.
|
29
36
|
class NoSlaveError < Error
|
30
37
|
end
|
data/lib/redis_failover/node.rb
CHANGED
@@ -32,13 +32,11 @@ module RedisFailover
|
|
32
32
|
@znode = @options[:znode_path] || Util::DEFAULT_ZNODE_PATH
|
33
33
|
@manual_znode = ManualFailover::ZNODE_PATH
|
34
34
|
@mutex = Mutex.new
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
# or fails, another Node Manager process will grab the lock and
|
41
|
-
# become the
|
35
|
+
@shutdown = false
|
36
|
+
@leader = false
|
37
|
+
@master = nil
|
38
|
+
@slaves = []
|
39
|
+
@unavailable = []
|
42
40
|
@lock_path = "#{@znode}_lock".freeze
|
43
41
|
end
|
44
42
|
|
@@ -46,21 +44,22 @@ module RedisFailover
|
|
46
44
|
#
|
47
45
|
# @note This method does not return until the manager terminates.
|
48
46
|
def start
|
47
|
+
return unless running?
|
49
48
|
@queue = Queue.new
|
50
|
-
@leader = false
|
51
49
|
setup_zk
|
52
50
|
logger.info('Waiting to become master Node Manager ...')
|
53
51
|
with_lock do
|
54
52
|
@leader = true
|
55
53
|
logger.info('Acquired master Node Manager lock')
|
56
|
-
discover_nodes
|
57
|
-
|
58
|
-
|
59
|
-
|
54
|
+
if discover_nodes
|
55
|
+
initialize_path
|
56
|
+
spawn_watchers
|
57
|
+
handle_state_reports
|
58
|
+
end
|
60
59
|
end
|
61
60
|
rescue *ZK_ERRORS => ex
|
62
61
|
logger.error("ZK error while attempting to manage nodes: #{ex.inspect}")
|
63
|
-
|
62
|
+
reset
|
64
63
|
retry
|
65
64
|
end
|
66
65
|
|
@@ -73,16 +72,23 @@ module RedisFailover
|
|
73
72
|
@queue << [node, state]
|
74
73
|
end
|
75
74
|
|
76
|
-
# Performs a
|
77
|
-
def
|
78
|
-
@
|
79
|
-
@queue << nil
|
75
|
+
# Performs a reset of the manager.
|
76
|
+
def reset
|
77
|
+
@leader = false
|
80
78
|
@watchers.each(&:shutdown) if @watchers
|
81
|
-
|
79
|
+
@queue.clear
|
82
80
|
@zk.close! if @zk
|
83
81
|
@zk_lock = nil
|
84
82
|
end
|
85
83
|
|
84
|
+
# Initiates a graceful shutdown.
|
85
|
+
def shutdown
|
86
|
+
logger.info('Shutting down ...')
|
87
|
+
@mutex.synchronize do
|
88
|
+
@shutdown = true
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
86
92
|
private
|
87
93
|
|
88
94
|
# Configures the ZooKeeper client.
|
@@ -92,17 +98,8 @@ module RedisFailover
|
|
92
98
|
@zk.on_expired_session { notify_state(:zk_disconnected, nil) }
|
93
99
|
|
94
100
|
@zk.register(@manual_znode) do |event|
|
95
|
-
|
96
|
-
|
97
|
-
if event.node_created? || event.node_changed?
|
98
|
-
schedule_manual_failover
|
99
|
-
end
|
100
|
-
rescue => ex
|
101
|
-
logger.error("Error scheduling a manual failover: #{ex.inspect}")
|
102
|
-
logger.error(ex.backtrace.join("\n"))
|
103
|
-
ensure
|
104
|
-
@zk.stat(@manual_znode, :watch => true)
|
105
|
-
end
|
101
|
+
if event.node_created? || event.node_changed?
|
102
|
+
perform_manual_failover
|
106
103
|
end
|
107
104
|
end
|
108
105
|
|
@@ -112,23 +109,23 @@ module RedisFailover
|
|
112
109
|
|
113
110
|
# Handles periodic state reports from {RedisFailover::NodeWatcher} instances.
|
114
111
|
def handle_state_reports
|
115
|
-
while state_report = @queue.pop
|
116
|
-
# Ensure that we still have the master lock.
|
117
|
-
@zk_lock.assert!
|
118
|
-
|
112
|
+
while running? && (state_report = @queue.pop)
|
119
113
|
begin
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
114
|
+
@mutex.synchronize do
|
115
|
+
return unless running?
|
116
|
+
@zk_lock.assert!
|
117
|
+
node, state = state_report
|
118
|
+
case state
|
119
|
+
when :unavailable then handle_unavailable(node)
|
120
|
+
when :available then handle_available(node)
|
121
|
+
when :syncing then handle_syncing(node)
|
122
|
+
when :zk_disconnected then raise ZKDisconnectedError
|
123
|
+
else raise InvalidNodeStateError.new(node, state)
|
124
|
+
end
|
129
125
|
|
130
|
-
|
131
|
-
|
126
|
+
# flush current state
|
127
|
+
write_state
|
128
|
+
end
|
132
129
|
rescue *ZK_ERRORS
|
133
130
|
# fail hard if this is a ZK connection-related error
|
134
131
|
raise
|
@@ -204,7 +201,7 @@ module RedisFailover
|
|
204
201
|
logger.info("Handling manual failover")
|
205
202
|
|
206
203
|
# make current master a slave, and promote new master
|
207
|
-
@slaves << @master
|
204
|
+
@slaves << @master if @master
|
208
205
|
@slaves.delete(node)
|
209
206
|
promote_new_master(node)
|
210
207
|
end
|
@@ -234,16 +231,35 @@ module RedisFailover
|
|
234
231
|
end
|
235
232
|
|
236
233
|
# Discovers the current master and slave nodes.
|
234
|
+
# @return [Boolean] true if nodes successfully discovered, false otherwise
|
237
235
|
def discover_nodes
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
236
|
+
@mutex.synchronize do
|
237
|
+
return false unless running?
|
238
|
+
nodes = @options[:nodes].map { |opts| Node.new(opts) }.uniq
|
239
|
+
if @master = find_existing_master
|
240
|
+
logger.info("Using master #{@master} from existing znode config.")
|
241
|
+
elsif @master = guess_master(nodes)
|
242
|
+
logger.info("Guessed master #{@master} from known redis nodes.")
|
243
|
+
end
|
244
|
+
@slaves = nodes - [@master]
|
245
|
+
logger.info("Managing master (#{@master}) and slaves " +
|
246
|
+
"(#{@slaves.map(&:to_s).join(', ')})")
|
247
|
+
# ensure that slaves are correctly pointing to this master
|
248
|
+
redirect_slaves_to(@master)
|
249
|
+
true
|
250
|
+
end
|
251
|
+
rescue NodeUnavailableError, NoMasterError, MultipleMastersError => ex
|
252
|
+
msg = <<-MSG.gsub(/\s+/, ' ')
|
253
|
+
Failed to discover master node: #{ex.inspect}
|
254
|
+
In order to ensure a safe startup, redis_failover requires that all redis
|
255
|
+
nodes be accessible, and only a single node indicating that it's the master.
|
256
|
+
In order to fix this, you can perform a manual failover via redis_failover,
|
257
|
+
or manually fix the individual redis servers. This discovery process will
|
258
|
+
retry in #{TIMEOUT}s.
|
259
|
+
MSG
|
260
|
+
logger.warn(msg)
|
261
|
+
sleep(TIMEOUT)
|
262
|
+
retry
|
247
263
|
end
|
248
264
|
|
249
265
|
# Seeds the initial node master from an existing znode config.
|
@@ -251,7 +267,7 @@ module RedisFailover
|
|
251
267
|
if data = @zk.get(@znode).first
|
252
268
|
nodes = symbolize_keys(decode(data))
|
253
269
|
master = node_from(nodes[:master])
|
254
|
-
logger.info("Master from existing config: #{master || 'none'}")
|
270
|
+
logger.info("Master from existing znode config: #{master || 'none'}")
|
255
271
|
master
|
256
272
|
end
|
257
273
|
rescue ZK::Exceptions::NoNode
|
@@ -281,14 +297,11 @@ module RedisFailover
|
|
281
297
|
#
|
282
298
|
# @param [Array<Node>] nodes the nodes to search
|
283
299
|
# @return [Node] the found master node, nil if not found
|
284
|
-
def
|
285
|
-
nodes.
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
false
|
290
|
-
end
|
291
|
-
end
|
300
|
+
def guess_master(nodes)
|
301
|
+
master_nodes = nodes.select { |node| node.master? }
|
302
|
+
raise NoMasterError if master_nodes.empty?
|
303
|
+
raise MultipleMastersError.new(master_nodes) if master_nodes.size > 1
|
304
|
+
master_nodes.first
|
292
305
|
end
|
293
306
|
|
294
307
|
# Redirects all slaves to the specified node.
|
@@ -378,32 +391,44 @@ module RedisFailover
|
|
378
391
|
# Executes a block wrapped in a ZK exclusive lock.
|
379
392
|
def with_lock
|
380
393
|
@zk_lock = @zk.locker(@lock_path)
|
381
|
-
|
382
|
-
|
394
|
+
while running? && !@zk_lock.lock
|
395
|
+
sleep(TIMEOUT)
|
396
|
+
end
|
397
|
+
|
398
|
+
if running?
|
399
|
+
yield
|
400
|
+
end
|
383
401
|
ensure
|
384
402
|
@zk_lock.unlock! if @zk_lock
|
385
403
|
end
|
386
404
|
|
387
|
-
#
|
388
|
-
def
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
405
|
+
# Perform a manual failover to a redis node.
|
406
|
+
def perform_manual_failover
|
407
|
+
@mutex.synchronize do
|
408
|
+
return unless running? && @leader && @zk_lock
|
409
|
+
@zk_lock.assert!
|
410
|
+
new_master = @zk.get(@manual_znode, :watch => true).first
|
411
|
+
return unless new_master && new_master.size > 0
|
412
|
+
logger.info("Received manual failover request for: #{new_master}")
|
413
|
+
logger.info("Current nodes: #{current_nodes.inspect}")
|
414
|
+
node = new_master == ManualFailover::ANY_SLAVE ?
|
415
|
+
@slaves.shuffle.first : node_from(new_master)
|
416
|
+
if node
|
417
|
+
handle_manual_failover(node)
|
418
|
+
else
|
419
|
+
logger.error('Failed to perform manual failover, no candidate found.')
|
420
|
+
end
|
400
421
|
end
|
422
|
+
rescue => ex
|
423
|
+
logger.error("Error handling a manual failover: #{ex.inspect}")
|
424
|
+
logger.error(ex.backtrace.join("\n"))
|
425
|
+
ensure
|
426
|
+
@zk.stat(@manual_znode, :watch => true)
|
427
|
+
end
|
401
428
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
logger.error('Failed to perform manual failover, no candidate found.')
|
406
|
-
end
|
429
|
+
# @return [Boolean] true if running, false otherwise
|
430
|
+
def running?
|
431
|
+
!@shutdown
|
407
432
|
end
|
408
433
|
end
|
409
434
|
end
|
@@ -8,22 +8,20 @@ module RedisFailover
|
|
8
8
|
# Node Manager is gracefully stopped
|
9
9
|
def self.run(options)
|
10
10
|
options = CLI.parse(options)
|
11
|
-
|
12
|
-
trap_signals
|
13
|
-
|
14
|
-
@node_manager_thread.join
|
11
|
+
node_manager = NodeManager.new(options)
|
12
|
+
trap_signals(node_manager)
|
13
|
+
node_manager.start
|
15
14
|
end
|
16
15
|
|
17
16
|
# Traps shutdown signals.
|
18
|
-
|
17
|
+
# @param [NodeManager] node_manager the node manager
|
18
|
+
def self.trap_signals(node_manager)
|
19
19
|
[:INT, :TERM].each do |signal|
|
20
20
|
trap(signal) do
|
21
|
-
|
22
|
-
@node_manager.shutdown
|
23
|
-
@node_manager_thread.join
|
24
|
-
exit(0)
|
21
|
+
node_manager.shutdown
|
25
22
|
end
|
26
23
|
end
|
27
24
|
end
|
25
|
+
private_class_method :trap_signals
|
28
26
|
end
|
29
27
|
end
|
data/spec/node_manager_spec.rb
CHANGED
@@ -108,5 +108,29 @@ module RedisFailover
|
|
108
108
|
end
|
109
109
|
end
|
110
110
|
end
|
111
|
+
|
112
|
+
describe '#guess_master' do
|
113
|
+
let(:node1) { Node.new(:host => 'node1').extend(RedisStubSupport) }
|
114
|
+
let(:node2) { Node.new(:host => 'node2').extend(RedisStubSupport) }
|
115
|
+
let(:node3) { Node.new(:host => 'node3').extend(RedisStubSupport) }
|
116
|
+
|
117
|
+
it 'raises error when no master is found' do
|
118
|
+
node1.make_slave!(node3)
|
119
|
+
node2.make_slave!(node3)
|
120
|
+
expect { manager.guess_master([node1, node2]) }.to raise_error(NoMasterError)
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'raises error when multiple masters found' do
|
124
|
+
node1.make_master!
|
125
|
+
node2.make_master!
|
126
|
+
expect { manager.guess_master([node1, node2]) }.to raise_error(MultipleMastersError)
|
127
|
+
end
|
128
|
+
|
129
|
+
it 'raises error when a node can not be reached' do
|
130
|
+
node1.make_master!
|
131
|
+
node2.redis.make_unavailable!
|
132
|
+
expect { manager.guess_master([node1, node2]) }.to raise_error(NodeUnavailableError)
|
133
|
+
end
|
134
|
+
end
|
111
135
|
end
|
112
136
|
end
|
@@ -1,11 +1,12 @@
|
|
1
1
|
module RedisFailover
|
2
2
|
class NodeManagerStub < NodeManager
|
3
3
|
attr_accessor :master
|
4
|
-
|
4
|
+
# HACK - this will go away once we refactor the tests to use a real ZK/Redis server.
|
5
|
+
public :current_nodes, :guess_master
|
5
6
|
|
6
7
|
def discover_nodes
|
7
8
|
# only discover nodes once in testing
|
8
|
-
return if @nodes_discovered
|
9
|
+
return true if @nodes_discovered
|
9
10
|
|
10
11
|
master = Node.new(:host => 'master')
|
11
12
|
slave = Node.new(:host => 'slave')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redis_failover
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: redis
|
@@ -189,7 +189,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
189
189
|
version: '0'
|
190
190
|
segments:
|
191
191
|
- 0
|
192
|
-
hash: -
|
192
|
+
hash: -2193925210006995870
|
193
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
194
194
|
none: false
|
195
195
|
requirements:
|
@@ -198,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
198
198
|
version: '0'
|
199
199
|
segments:
|
200
200
|
- 0
|
201
|
-
hash: -
|
201
|
+
hash: -2193925210006995870
|
202
202
|
requirements: []
|
203
203
|
rubyforge_project:
|
204
204
|
rubygems_version: 1.8.23
|