redis_failover 0.9.4 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -1
- data/Changes.md +5 -0
- data/README.md +1 -1
- data/lib/redis_failover/errors.rb +7 -0
- data/lib/redis_failover/node.rb +0 -1
- data/lib/redis_failover/node_manager.rb +108 -83
- data/lib/redis_failover/node_watcher.rb +2 -2
- data/lib/redis_failover/runner.rb +7 -9
- data/lib/redis_failover/version.rb +1 -1
- data/spec/node_manager_spec.rb +24 -0
- data/spec/support/node_manager_stub.rb +3 -2
- metadata +4 -4
data/.gitignore
CHANGED
data/Changes.md
CHANGED
data/README.md
CHANGED
@@ -155,7 +155,7 @@ redis_failover uses YARD for its API documentation. Refer to the generated [API
|
|
155
155
|
|
156
156
|
## Requirements
|
157
157
|
|
158
|
-
- redis_failover is actively tested against MRI 1.9.2/1.9.3 and JRuby 1.6.7 (1.9 mode only). Other rubies may work, although I don't actively test against them.
|
158
|
+
- redis_failover is actively tested against MRI 1.8.7/1.9.2/1.9.3 and JRuby 1.6.7 (1.9 mode only). Other rubies may work, although I don't actively test against them.
|
159
159
|
- redis_failover requires a ZooKeeper service cluster to ensure reliability and data consistency. ZooKeeper is very simple and easy to get up and running. Please refer to this [Quick ZooKeeper Guide](https://github.com/ryanlecompte/redis_failover/wiki/Quick-ZooKeeper-Guide) to get up and running quickly if you don't already have ZooKeeper as a part of your environment.
|
160
160
|
|
161
161
|
## Considerations
|
@@ -25,6 +25,13 @@ module RedisFailover
|
|
25
25
|
class NoMasterError < Error
|
26
26
|
end
|
27
27
|
|
28
|
+
# Raised when more than one master is found on startup.
|
29
|
+
class MultipleMastersError < Error
|
30
|
+
def initialize(nodes)
|
31
|
+
super("Multiple nodes with master role: #{nodes.map(&:to_s)}")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
28
35
|
# Raised when no slave is currently available.
|
29
36
|
class NoSlaveError < Error
|
30
37
|
end
|
data/lib/redis_failover/node.rb
CHANGED
@@ -32,13 +32,11 @@ module RedisFailover
|
|
32
32
|
@znode = @options[:znode_path] || Util::DEFAULT_ZNODE_PATH
|
33
33
|
@manual_znode = ManualFailover::ZNODE_PATH
|
34
34
|
@mutex = Mutex.new
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
# or fails, another Node Manager process will grab the lock and
|
41
|
-
# become the
|
35
|
+
@shutdown = false
|
36
|
+
@leader = false
|
37
|
+
@master = nil
|
38
|
+
@slaves = []
|
39
|
+
@unavailable = []
|
42
40
|
@lock_path = "#{@znode}_lock".freeze
|
43
41
|
end
|
44
42
|
|
@@ -46,21 +44,22 @@ module RedisFailover
|
|
46
44
|
#
|
47
45
|
# @note This method does not return until the manager terminates.
|
48
46
|
def start
|
47
|
+
return unless running?
|
49
48
|
@queue = Queue.new
|
50
|
-
@leader = false
|
51
49
|
setup_zk
|
52
50
|
logger.info('Waiting to become master Node Manager ...')
|
53
51
|
with_lock do
|
54
52
|
@leader = true
|
55
53
|
logger.info('Acquired master Node Manager lock')
|
56
|
-
discover_nodes
|
57
|
-
|
58
|
-
|
59
|
-
|
54
|
+
if discover_nodes
|
55
|
+
initialize_path
|
56
|
+
spawn_watchers
|
57
|
+
handle_state_reports
|
58
|
+
end
|
60
59
|
end
|
61
60
|
rescue *ZK_ERRORS => ex
|
62
61
|
logger.error("ZK error while attempting to manage nodes: #{ex.inspect}")
|
63
|
-
|
62
|
+
reset
|
64
63
|
retry
|
65
64
|
end
|
66
65
|
|
@@ -73,16 +72,23 @@ module RedisFailover
|
|
73
72
|
@queue << [node, state]
|
74
73
|
end
|
75
74
|
|
76
|
-
# Performs a
|
77
|
-
def
|
78
|
-
@
|
79
|
-
@queue << nil
|
75
|
+
# Performs a reset of the manager.
|
76
|
+
def reset
|
77
|
+
@leader = false
|
80
78
|
@watchers.each(&:shutdown) if @watchers
|
81
|
-
|
79
|
+
@queue.clear
|
82
80
|
@zk.close! if @zk
|
83
81
|
@zk_lock = nil
|
84
82
|
end
|
85
83
|
|
84
|
+
# Initiates a graceful shutdown.
|
85
|
+
def shutdown
|
86
|
+
logger.info('Shutting down ...')
|
87
|
+
@mutex.synchronize do
|
88
|
+
@shutdown = true
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
86
92
|
private
|
87
93
|
|
88
94
|
# Configures the ZooKeeper client.
|
@@ -92,17 +98,8 @@ module RedisFailover
|
|
92
98
|
@zk.on_expired_session { notify_state(:zk_disconnected, nil) }
|
93
99
|
|
94
100
|
@zk.register(@manual_znode) do |event|
|
95
|
-
|
96
|
-
|
97
|
-
if event.node_created? || event.node_changed?
|
98
|
-
schedule_manual_failover
|
99
|
-
end
|
100
|
-
rescue => ex
|
101
|
-
logger.error("Error scheduling a manual failover: #{ex.inspect}")
|
102
|
-
logger.error(ex.backtrace.join("\n"))
|
103
|
-
ensure
|
104
|
-
@zk.stat(@manual_znode, :watch => true)
|
105
|
-
end
|
101
|
+
if event.node_created? || event.node_changed?
|
102
|
+
perform_manual_failover
|
106
103
|
end
|
107
104
|
end
|
108
105
|
|
@@ -112,23 +109,23 @@ module RedisFailover
|
|
112
109
|
|
113
110
|
# Handles periodic state reports from {RedisFailover::NodeWatcher} instances.
|
114
111
|
def handle_state_reports
|
115
|
-
while state_report = @queue.pop
|
116
|
-
# Ensure that we still have the master lock.
|
117
|
-
@zk_lock.assert!
|
118
|
-
|
112
|
+
while running? && (state_report = @queue.pop)
|
119
113
|
begin
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
114
|
+
@mutex.synchronize do
|
115
|
+
return unless running?
|
116
|
+
@zk_lock.assert!
|
117
|
+
node, state = state_report
|
118
|
+
case state
|
119
|
+
when :unavailable then handle_unavailable(node)
|
120
|
+
when :available then handle_available(node)
|
121
|
+
when :syncing then handle_syncing(node)
|
122
|
+
when :zk_disconnected then raise ZKDisconnectedError
|
123
|
+
else raise InvalidNodeStateError.new(node, state)
|
124
|
+
end
|
129
125
|
|
130
|
-
|
131
|
-
|
126
|
+
# flush current state
|
127
|
+
write_state
|
128
|
+
end
|
132
129
|
rescue *ZK_ERRORS
|
133
130
|
# fail hard if this is a ZK connection-related error
|
134
131
|
raise
|
@@ -204,7 +201,7 @@ module RedisFailover
|
|
204
201
|
logger.info("Handling manual failover")
|
205
202
|
|
206
203
|
# make current master a slave, and promote new master
|
207
|
-
@slaves << @master
|
204
|
+
@slaves << @master if @master
|
208
205
|
@slaves.delete(node)
|
209
206
|
promote_new_master(node)
|
210
207
|
end
|
@@ -234,16 +231,35 @@ module RedisFailover
|
|
234
231
|
end
|
235
232
|
|
236
233
|
# Discovers the current master and slave nodes.
|
234
|
+
# @return [Boolean] true if nodes successfully discovered, false otherwise
|
237
235
|
def discover_nodes
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
236
|
+
@mutex.synchronize do
|
237
|
+
return false unless running?
|
238
|
+
nodes = @options[:nodes].map { |opts| Node.new(opts) }.uniq
|
239
|
+
if @master = find_existing_master
|
240
|
+
logger.info("Using master #{@master} from existing znode config.")
|
241
|
+
elsif @master = guess_master(nodes)
|
242
|
+
logger.info("Guessed master #{@master} from known redis nodes.")
|
243
|
+
end
|
244
|
+
@slaves = nodes - [@master]
|
245
|
+
logger.info("Managing master (#{@master}) and slaves " +
|
246
|
+
"(#{@slaves.map(&:to_s).join(', ')})")
|
247
|
+
# ensure that slaves are correctly pointing to this master
|
248
|
+
redirect_slaves_to(@master)
|
249
|
+
true
|
250
|
+
end
|
251
|
+
rescue NodeUnavailableError, NoMasterError, MultipleMastersError => ex
|
252
|
+
msg = <<-MSG.gsub(/\s+/, ' ')
|
253
|
+
Failed to discover master node: #{ex.inspect}
|
254
|
+
In order to ensure a safe startup, redis_failover requires that all redis
|
255
|
+
nodes be accessible, and only a single node indicating that it's the master.
|
256
|
+
In order to fix this, you can perform a manual failover via redis_failover,
|
257
|
+
or manually fix the individual redis servers. This discovery process will
|
258
|
+
retry in #{TIMEOUT}s.
|
259
|
+
MSG
|
260
|
+
logger.warn(msg)
|
261
|
+
sleep(TIMEOUT)
|
262
|
+
retry
|
247
263
|
end
|
248
264
|
|
249
265
|
# Seeds the initial node master from an existing znode config.
|
@@ -251,7 +267,7 @@ module RedisFailover
|
|
251
267
|
if data = @zk.get(@znode).first
|
252
268
|
nodes = symbolize_keys(decode(data))
|
253
269
|
master = node_from(nodes[:master])
|
254
|
-
logger.info("Master from existing config: #{master || 'none'}")
|
270
|
+
logger.info("Master from existing znode config: #{master || 'none'}")
|
255
271
|
master
|
256
272
|
end
|
257
273
|
rescue ZK::Exceptions::NoNode
|
@@ -281,14 +297,11 @@ module RedisFailover
|
|
281
297
|
#
|
282
298
|
# @param [Array<Node>] nodes the nodes to search
|
283
299
|
# @return [Node] the found master node, nil if not found
|
284
|
-
def
|
285
|
-
nodes.
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
false
|
290
|
-
end
|
291
|
-
end
|
300
|
+
def guess_master(nodes)
|
301
|
+
master_nodes = nodes.select { |node| node.master? }
|
302
|
+
raise NoMasterError if master_nodes.empty?
|
303
|
+
raise MultipleMastersError.new(master_nodes) if master_nodes.size > 1
|
304
|
+
master_nodes.first
|
292
305
|
end
|
293
306
|
|
294
307
|
# Redirects all slaves to the specified node.
|
@@ -378,32 +391,44 @@ module RedisFailover
|
|
378
391
|
# Executes a block wrapped in a ZK exclusive lock.
|
379
392
|
def with_lock
|
380
393
|
@zk_lock = @zk.locker(@lock_path)
|
381
|
-
|
382
|
-
|
394
|
+
while running? && !@zk_lock.lock
|
395
|
+
sleep(TIMEOUT)
|
396
|
+
end
|
397
|
+
|
398
|
+
if running?
|
399
|
+
yield
|
400
|
+
end
|
383
401
|
ensure
|
384
402
|
@zk_lock.unlock! if @zk_lock
|
385
403
|
end
|
386
404
|
|
387
|
-
#
|
388
|
-
def
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
405
|
+
# Perform a manual failover to a redis node.
|
406
|
+
def perform_manual_failover
|
407
|
+
@mutex.synchronize do
|
408
|
+
return unless running? && @leader && @zk_lock
|
409
|
+
@zk_lock.assert!
|
410
|
+
new_master = @zk.get(@manual_znode, :watch => true).first
|
411
|
+
return unless new_master && new_master.size > 0
|
412
|
+
logger.info("Received manual failover request for: #{new_master}")
|
413
|
+
logger.info("Current nodes: #{current_nodes.inspect}")
|
414
|
+
node = new_master == ManualFailover::ANY_SLAVE ?
|
415
|
+
@slaves.shuffle.first : node_from(new_master)
|
416
|
+
if node
|
417
|
+
handle_manual_failover(node)
|
418
|
+
else
|
419
|
+
logger.error('Failed to perform manual failover, no candidate found.')
|
420
|
+
end
|
400
421
|
end
|
422
|
+
rescue => ex
|
423
|
+
logger.error("Error handling a manual failover: #{ex.inspect}")
|
424
|
+
logger.error(ex.backtrace.join("\n"))
|
425
|
+
ensure
|
426
|
+
@zk.stat(@manual_znode, :watch => true)
|
427
|
+
end
|
401
428
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
logger.error('Failed to perform manual failover, no candidate found.')
|
406
|
-
end
|
429
|
+
# @return [Boolean] true if running, false otherwise
|
430
|
+
def running?
|
431
|
+
!@shutdown
|
407
432
|
end
|
408
433
|
end
|
409
434
|
end
|
@@ -8,22 +8,20 @@ module RedisFailover
|
|
8
8
|
# Node Manager is gracefully stopped
|
9
9
|
def self.run(options)
|
10
10
|
options = CLI.parse(options)
|
11
|
-
|
12
|
-
trap_signals
|
13
|
-
|
14
|
-
@node_manager_thread.join
|
11
|
+
node_manager = NodeManager.new(options)
|
12
|
+
trap_signals(node_manager)
|
13
|
+
node_manager.start
|
15
14
|
end
|
16
15
|
|
17
16
|
# Traps shutdown signals.
|
18
|
-
|
17
|
+
# @param [NodeManager] node_manager the node manager
|
18
|
+
def self.trap_signals(node_manager)
|
19
19
|
[:INT, :TERM].each do |signal|
|
20
20
|
trap(signal) do
|
21
|
-
|
22
|
-
@node_manager.shutdown
|
23
|
-
@node_manager_thread.join
|
24
|
-
exit(0)
|
21
|
+
node_manager.shutdown
|
25
22
|
end
|
26
23
|
end
|
27
24
|
end
|
25
|
+
private_class_method :trap_signals
|
28
26
|
end
|
29
27
|
end
|
data/spec/node_manager_spec.rb
CHANGED
@@ -108,5 +108,29 @@ module RedisFailover
|
|
108
108
|
end
|
109
109
|
end
|
110
110
|
end
|
111
|
+
|
112
|
+
describe '#guess_master' do
|
113
|
+
let(:node1) { Node.new(:host => 'node1').extend(RedisStubSupport) }
|
114
|
+
let(:node2) { Node.new(:host => 'node2').extend(RedisStubSupport) }
|
115
|
+
let(:node3) { Node.new(:host => 'node3').extend(RedisStubSupport) }
|
116
|
+
|
117
|
+
it 'raises error when no master is found' do
|
118
|
+
node1.make_slave!(node3)
|
119
|
+
node2.make_slave!(node3)
|
120
|
+
expect { manager.guess_master([node1, node2]) }.to raise_error(NoMasterError)
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'raises error when multiple masters found' do
|
124
|
+
node1.make_master!
|
125
|
+
node2.make_master!
|
126
|
+
expect { manager.guess_master([node1, node2]) }.to raise_error(MultipleMastersError)
|
127
|
+
end
|
128
|
+
|
129
|
+
it 'raises error when a node can not be reached' do
|
130
|
+
node1.make_master!
|
131
|
+
node2.redis.make_unavailable!
|
132
|
+
expect { manager.guess_master([node1, node2]) }.to raise_error(NodeUnavailableError)
|
133
|
+
end
|
134
|
+
end
|
111
135
|
end
|
112
136
|
end
|
@@ -1,11 +1,12 @@
|
|
1
1
|
module RedisFailover
|
2
2
|
class NodeManagerStub < NodeManager
|
3
3
|
attr_accessor :master
|
4
|
-
|
4
|
+
# HACK - this will go away once we refactor the tests to use a real ZK/Redis server.
|
5
|
+
public :current_nodes, :guess_master
|
5
6
|
|
6
7
|
def discover_nodes
|
7
8
|
# only discover nodes once in testing
|
8
|
-
return if @nodes_discovered
|
9
|
+
return true if @nodes_discovered
|
9
10
|
|
10
11
|
master = Node.new(:host => 'master')
|
11
12
|
slave = Node.new(:host => 'slave')
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redis_failover
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-09-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: redis
|
@@ -189,7 +189,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
189
189
|
version: '0'
|
190
190
|
segments:
|
191
191
|
- 0
|
192
|
-
hash: -
|
192
|
+
hash: -2193925210006995870
|
193
193
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
194
194
|
none: false
|
195
195
|
requirements:
|
@@ -198,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
198
198
|
version: '0'
|
199
199
|
segments:
|
200
200
|
- 0
|
201
|
-
hash: -
|
201
|
+
hash: -2193925210006995870
|
202
202
|
requirements: []
|
203
203
|
rubyforge_project:
|
204
204
|
rubygems_version: 1.8.23
|