redis_failover 0.9.7.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Changes.md +14 -0
- data/README.md +57 -20
- data/examples/config.yml +3 -0
- data/lib/redis_failover.rb +4 -1
- data/lib/redis_failover/cli.rb +25 -2
- data/lib/redis_failover/client.rb +25 -10
- data/lib/redis_failover/errors.rb +0 -4
- data/lib/redis_failover/failover_strategy.rb +25 -0
- data/lib/redis_failover/failover_strategy/latency.rb +21 -0
- data/lib/redis_failover/manual_failover.rb +16 -4
- data/lib/redis_failover/node.rb +2 -1
- data/lib/redis_failover/node_manager.rb +419 -144
- data/lib/redis_failover/node_snapshot.rb +81 -0
- data/lib/redis_failover/node_strategy.rb +34 -0
- data/lib/redis_failover/node_strategy/consensus.rb +18 -0
- data/lib/redis_failover/node_strategy/majority.rb +18 -0
- data/lib/redis_failover/node_strategy/single.rb +17 -0
- data/lib/redis_failover/node_watcher.rb +13 -13
- data/lib/redis_failover/util.rb +12 -4
- data/lib/redis_failover/version.rb +1 -1
- data/redis_failover.gemspec +1 -1
- data/spec/failover_strategy/latency_spec.rb +41 -0
- data/spec/failover_strategy_spec.rb +17 -0
- data/spec/node_snapshot_spec.rb +30 -0
- data/spec/node_strategy/consensus_spec.rb +30 -0
- data/spec/node_strategy/majority_spec.rb +22 -0
- data/spec/node_strategy/single_spec.rb +22 -0
- data/spec/node_strategy_spec.rb +22 -0
- data/spec/node_watcher_spec.rb +2 -2
- data/spec/spec_helper.rb +2 -1
- data/spec/support/node_manager_stub.rb +29 -8
- metadata +35 -8
@@ -0,0 +1,81 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Represents a snapshot of a particular redis node as seen by all currently running
|
3
|
+
# redis node managers.
|
4
|
+
class NodeSnapshot
|
5
|
+
# @return [String] the redis node
|
6
|
+
attr_reader :node
|
7
|
+
|
8
|
+
# Creates a new instance.
|
9
|
+
#
|
10
|
+
# @param [String] the redis node
|
11
|
+
# @see NodeManager#initialize
|
12
|
+
def initialize(node)
|
13
|
+
@node = node
|
14
|
+
@available = {}
|
15
|
+
@unavailable = []
|
16
|
+
end
|
17
|
+
|
18
|
+
# Declares this node available by the specified node manager.
|
19
|
+
#
|
20
|
+
# @param [String] node_manager the node manager id
|
21
|
+
# @param [Integer] latency the latency
|
22
|
+
def viewable_by(node_manager, latency)
|
23
|
+
@available[node_manager] = latency
|
24
|
+
end
|
25
|
+
|
26
|
+
# Determines if this node is viewable by a node manager.
|
27
|
+
#
|
28
|
+
# @param [String] node_manager the node manager id
|
29
|
+
def viewable_by?(node_manager)
|
30
|
+
@available.key?(node_manager)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Declares this node unavailable by the specified node manager.
|
34
|
+
#
|
35
|
+
# @param [String] node_manager the node manager id
|
36
|
+
def unviewable_by(node_manager)
|
37
|
+
@unavailable << node_manager
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [Integer] the number of node managers saying
|
41
|
+
# this node is available
|
42
|
+
def available_count
|
43
|
+
@available.size
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Integer] the number of node managers saying
|
47
|
+
# this node is unavailable
|
48
|
+
def unavailable_count
|
49
|
+
@unavailable.size
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Integer] the average available latency
|
53
|
+
def avg_latency
|
54
|
+
return if @available.empty?
|
55
|
+
@available.values.inject(0) { |sum, n| sum + n } / @available.size
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Array<String>] all node managers involved in this snapshot
|
59
|
+
def node_managers
|
60
|
+
(@available.keys + @unavailable).uniq
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Boolean] true if all node managers indicated that this
|
64
|
+
# node was viewable
|
65
|
+
def all_available?
|
66
|
+
available_count > 0 && unavailable_count == 0
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Boolean] true if all node managers indicated that this
|
70
|
+
# node was unviewable
|
71
|
+
def all_unavailable?
|
72
|
+
unavailable_count > 0 && available_count == 0
|
73
|
+
end
|
74
|
+
|
75
|
+
# @return [String] a friendly representation of this node snapshot
|
76
|
+
def to_s
|
77
|
+
'Node %s available by %p, unavailable by %p (%d up, %d down)' %
|
78
|
+
[node, @available, @unavailable, available_count, unavailable_count]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Base class for strategies that determine node availability.
|
3
|
+
class NodeStrategy
|
4
|
+
include Util
|
5
|
+
|
6
|
+
# Loads a strategy based on the given name.
|
7
|
+
#
|
8
|
+
# @param [String, Symbol] name the strategy name
|
9
|
+
# @return [Object] a new strategy instance
|
10
|
+
def self.for(name)
|
11
|
+
require "redis_failover/node_strategy/#{name.downcase}"
|
12
|
+
const_get(name.capitalize).new
|
13
|
+
rescue LoadError, NameError
|
14
|
+
raise "Failed to find node strategy: #{name}"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns the state determined by this strategy.
|
18
|
+
#
|
19
|
+
# @param [Node] the node to handle
|
20
|
+
# @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
|
21
|
+
# @return [Symbol] the status
|
22
|
+
def determine_state(node, snapshots)
|
23
|
+
raise NotImplementedError
|
24
|
+
end
|
25
|
+
|
26
|
+
# Logs a node as being unavailable.
|
27
|
+
#
|
28
|
+
# @param [Node] node the node
|
29
|
+
# @param [NodeSnapshot] snapshot the node snapshot
|
30
|
+
def log_unavailable(node, snapshot)
|
31
|
+
logger.info("#{self.class} marking #{node} as unavailable. Snapshot: #{snapshot}")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Consensus strategy only marks the node as unavailable if all members of the
|
4
|
+
# snapshot indicate that the node is down.
|
5
|
+
class Consensus < NodeStrategy
|
6
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
7
|
+
def determine_state(node, snapshots)
|
8
|
+
snapshot = snapshots[node]
|
9
|
+
if snapshot.all_unavailable?
|
10
|
+
log_unavailable(node, snapshot)
|
11
|
+
:unavailable
|
12
|
+
else
|
13
|
+
:available
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Majority strategy only marks the node as unavailable if a majority of the
|
4
|
+
# snapshot indicates that the node is down.
|
5
|
+
class Majority < NodeStrategy
|
6
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
7
|
+
def determine_state(node, snapshots)
|
8
|
+
snapshot = snapshots[node]
|
9
|
+
if snapshot.unavailable_count > snapshot.available_count
|
10
|
+
log_unavailable(node, snapshot)
|
11
|
+
:unavailable
|
12
|
+
else
|
13
|
+
:available
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Marks the node as unavailable if any node manager reports the node as down.
|
4
|
+
class Single < NodeStrategy
|
5
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
6
|
+
def determine_state(node, snapshots)
|
7
|
+
snapshot = snapshots[node]
|
8
|
+
if snapshot.unavailable_count > 0
|
9
|
+
log_unavailable(node, snapshot)
|
10
|
+
:unavailable
|
11
|
+
else
|
12
|
+
:available
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -33,8 +33,12 @@ module RedisFailover
|
|
33
33
|
# Performs a graceful shutdown of this watcher.
|
34
34
|
def shutdown
|
35
35
|
@done = true
|
36
|
-
|
37
|
-
|
36
|
+
begin
|
37
|
+
@node.wakeup
|
38
|
+
rescue
|
39
|
+
# best effort
|
40
|
+
end
|
41
|
+
@monitor_thread.join
|
38
42
|
rescue => ex
|
39
43
|
logger.warn("Failed to gracefully shutdown watcher for #{@node}")
|
40
44
|
end
|
@@ -48,17 +52,12 @@ module RedisFailover
|
|
48
52
|
|
49
53
|
loop do
|
50
54
|
begin
|
51
|
-
|
55
|
+
break if @done
|
52
56
|
sleep(WATCHER_SLEEP_TIME)
|
53
|
-
@node.ping
|
57
|
+
latency = Benchmark.realtime { @node.ping }
|
54
58
|
failures = 0
|
55
|
-
|
56
|
-
|
57
|
-
notify(:syncing)
|
58
|
-
else
|
59
|
-
notify(:available)
|
60
|
-
@node.wait
|
61
|
-
end
|
59
|
+
notify(:available, latency)
|
60
|
+
@node.wait
|
62
61
|
rescue NodeUnavailableError => ex
|
63
62
|
logger.debug("Failed to communicate with node #{@node}: #{ex.inspect}")
|
64
63
|
failures += 1
|
@@ -76,8 +75,9 @@ module RedisFailover
|
|
76
75
|
# Notifies the manager of a node's state.
|
77
76
|
#
|
78
77
|
# @param [Symbol] state the node's state
|
79
|
-
|
80
|
-
|
78
|
+
# @param [Integer] latency an optional latency
|
79
|
+
def notify(state, latency = nil)
|
80
|
+
@manager.notify_state(@node, state, latency)
|
81
81
|
end
|
82
82
|
end
|
83
83
|
end
|
data/lib/redis_failover/util.rb
CHANGED
@@ -51,8 +51,8 @@ module RedisFailover
|
|
51
51
|
# that abstracts the master/slave servers.
|
52
52
|
UNSUPPORTED_OPS = Set[:select, :dbsize].freeze
|
53
53
|
|
54
|
-
# Default node in ZK
|
55
|
-
|
54
|
+
# Default root node in ZK used for redis_failover.
|
55
|
+
DEFAULT_ROOT_ZNODE_PATH = '/redis_failover'.freeze
|
56
56
|
|
57
57
|
# Connectivity errors that the redis (<3.x) client raises.
|
58
58
|
REDIS_ERRORS = Errno.constants.map { |c| Errno.const_get(c) }
|
@@ -61,11 +61,19 @@ module RedisFailover
|
|
61
61
|
REDIS_ERRORS << Redis::BaseError if Redis.const_defined?('BaseError')
|
62
62
|
REDIS_ERRORS.freeze
|
63
63
|
|
64
|
+
# ZK Errors
|
65
|
+
ZK_ERRORS = [
|
66
|
+
ZK::Exceptions::LockAssertionFailedError,
|
67
|
+
ZK::Exceptions::InterruptedSession,
|
68
|
+
ZK::Exceptions::Retryable,
|
69
|
+
Zookeeper::Exceptions::ContinuationTimeoutError
|
70
|
+
].freeze
|
71
|
+
|
64
72
|
# Full set of errors related to connectivity.
|
65
73
|
CONNECTIVITY_ERRORS = [
|
66
74
|
RedisFailover::Error,
|
67
|
-
|
68
|
-
|
75
|
+
REDIS_ERRORS,
|
76
|
+
ZK_ERRORS
|
69
77
|
].flatten.freeze
|
70
78
|
|
71
79
|
# Symbolizes the keys of the specified hash.
|
data/redis_failover.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |gem|
|
|
18
18
|
gem.add_dependency('redis', ['>= 2.2', '< 4'])
|
19
19
|
gem.add_dependency('redis-namespace')
|
20
20
|
gem.add_dependency('multi_json', '~> 1')
|
21
|
-
gem.add_dependency('zk', '
|
21
|
+
gem.add_dependency('zk', ['>= 1.7.2', '< 1.8'])
|
22
22
|
|
23
23
|
gem.add_development_dependency('rake')
|
24
24
|
gem.add_development_dependency('rspec')
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
class FailoverStrategy
|
5
|
+
FailoverStrategy.for(:latency)
|
6
|
+
|
7
|
+
describe Latency do
|
8
|
+
describe '#find_candidate' do
|
9
|
+
it 'returns only candidates seen by all node managers' do
|
10
|
+
strategy = FailoverStrategy.for(:latency)
|
11
|
+
snapshot_1 = NodeSnapshot.new(Node.new(:host => 'localhost', :port => '123'))
|
12
|
+
snapshot_1.viewable_by('nm1', 0)
|
13
|
+
snapshot_1.unviewable_by('nm2')
|
14
|
+
|
15
|
+
snapshot_2 = NodeSnapshot.new(Node.new(:host => 'localhost', :port => '456'))
|
16
|
+
snapshot_2.viewable_by('nm2', 0)
|
17
|
+
snapshot_2.unviewable_by('nm1')
|
18
|
+
|
19
|
+
snapshots = {snapshot_1.node => snapshot_1, snapshot_2.node => snapshot_2}
|
20
|
+
strategy.find_candidate(snapshots).should be_nil
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns the candidate with the lowest average latency' do
|
24
|
+
strategy = FailoverStrategy.for(:latency)
|
25
|
+
snapshot_1 = NodeSnapshot.new(Node.new(:host => 'localhost', :port => '123'))
|
26
|
+
snapshot_1.viewable_by('nm1', 5)
|
27
|
+
snapshot_1.viewable_by('nm2', 4)
|
28
|
+
snapshot_1.viewable_by('nm3', 3)
|
29
|
+
|
30
|
+
snapshot_2 = NodeSnapshot.new(Node.new(:host => 'localhost', :port => '456'))
|
31
|
+
snapshot_2.viewable_by('nm1', 1)
|
32
|
+
snapshot_2.viewable_by('nm2', 1)
|
33
|
+
snapshot_2.viewable_by('nm3', 2)
|
34
|
+
|
35
|
+
snapshots = {snapshot_1.node => snapshot_1, snapshot_2.node => snapshot_2}
|
36
|
+
strategy.find_candidate(snapshots).should == snapshot_2.node
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
describe FailoverStrategy do
|
5
|
+
|
6
|
+
describe '.for' do
|
7
|
+
it 'creates a new latency strategy instance' do
|
8
|
+
s = FailoverStrategy.for('latency')
|
9
|
+
s.should be_a RedisFailover::FailoverStrategy::Latency
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'rejects unknown strategies' do
|
13
|
+
expect { FailoverStrategy.for('foobar') }.to raise_error(RuntimeError)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
describe NodeSnapshot do
|
5
|
+
let(:snapshot) { NodeSnapshot.new(Node.new(:host => 'localhost', :port => '123')) }
|
6
|
+
|
7
|
+
describe '#initialize' do
|
8
|
+
it 'creates a new empty snapshot' do
|
9
|
+
snapshot.available_count.should == 0
|
10
|
+
snapshot.unavailable_count.should == 0
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#viewable_by' do
|
15
|
+
it 'updates the availability count' do
|
16
|
+
snapshot.viewable_by('nm1', 0)
|
17
|
+
snapshot.viewable_by('nm2', 0)
|
18
|
+
snapshot.available_count.should == 2
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#unviewable_by' do
|
23
|
+
it 'updates the unavailability count' do
|
24
|
+
snapshot.unviewable_by('nm1')
|
25
|
+
snapshot.unviewable_by('nm2')
|
26
|
+
snapshot.unavailable_count.should == 2
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
class NodeStrategy
|
5
|
+
NodeStrategy.for(:consensus)
|
6
|
+
|
7
|
+
describe Consensus do
|
8
|
+
let(:node) { Node.new(:host => 'localhost', :port => '123') }
|
9
|
+
let(:snapshot) { NodeSnapshot.new(node) }
|
10
|
+
|
11
|
+
describe '#determine_state' do
|
12
|
+
it 'returns the unavailable state if unavailable by all node managers' do
|
13
|
+
strategy = NodeStrategy.for(:consensus)
|
14
|
+
snapshot.unviewable_by('nm1')
|
15
|
+
snapshot.unviewable_by('nm2')
|
16
|
+
snapshot.unviewable_by('nm3')
|
17
|
+
strategy.determine_state(node, node => snapshot).should == :unavailable
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'returns the available state if unavailable by some node managers' do
|
21
|
+
strategy = NodeStrategy.for(:consensus)
|
22
|
+
snapshot.unviewable_by('nm1')
|
23
|
+
snapshot.unviewable_by('nm2')
|
24
|
+
snapshot.viewable_by('nm3', 0)
|
25
|
+
strategy.determine_state(node, node => snapshot).should == :available
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
class NodeStrategy
|
5
|
+
NodeStrategy.for(:majority)
|
6
|
+
|
7
|
+
describe Majority do
|
8
|
+
let(:node) { Node.new(:host => 'localhost', :port => '123') }
|
9
|
+
let(:snapshot) { NodeSnapshot.new(node) }
|
10
|
+
|
11
|
+
describe '#determine_state' do
|
12
|
+
it 'returns the unavailable state if unavailable by the majority of node managers' do
|
13
|
+
strategy = NodeStrategy.for(:majority)
|
14
|
+
snapshot.viewable_by('nm1', 0)
|
15
|
+
snapshot.unviewable_by('nm2')
|
16
|
+
snapshot.unviewable_by('nm3')
|
17
|
+
strategy.determine_state(node, node => snapshot).should == :unavailable
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
class NodeStrategy
|
5
|
+
NodeStrategy.for(:single)
|
6
|
+
|
7
|
+
describe Single do
|
8
|
+
let(:node) { Node.new(:host => 'localhost', :port => '123') }
|
9
|
+
let(:snapshot) { NodeSnapshot.new(node) }
|
10
|
+
|
11
|
+
describe '#determine_state' do
|
12
|
+
it 'returns the unavailable state if any node manager reports as down' do
|
13
|
+
strategy = NodeStrategy.for(:single)
|
14
|
+
snapshot.unviewable_by('nm1')
|
15
|
+
snapshot.viewable_by('nm2', 0)
|
16
|
+
snapshot.viewable_by('nm3', 0)
|
17
|
+
strategy.determine_state(node, node => snapshot).should == :unavailable
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
describe NodeStrategy do
|
5
|
+
|
6
|
+
describe '.for' do
|
7
|
+
it 'creates a new majority strategy instance' do
|
8
|
+
s = NodeStrategy.for('majority')
|
9
|
+
s.should be_a RedisFailover::NodeStrategy::Majority
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'creates a new consensus strategy instance' do
|
13
|
+
s = NodeStrategy.for('consensus')
|
14
|
+
s.should be_a RedisFailover::NodeStrategy::Consensus
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'rejects unknown strategies' do
|
18
|
+
expect { NodeStrategy.for('foobar') }.to raise_error(RuntimeError)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|