redis_failover 0.9.7.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Changes.md +14 -0
- data/README.md +57 -20
- data/examples/config.yml +3 -0
- data/lib/redis_failover.rb +4 -1
- data/lib/redis_failover/cli.rb +25 -2
- data/lib/redis_failover/client.rb +25 -10
- data/lib/redis_failover/errors.rb +0 -4
- data/lib/redis_failover/failover_strategy.rb +25 -0
- data/lib/redis_failover/failover_strategy/latency.rb +21 -0
- data/lib/redis_failover/manual_failover.rb +16 -4
- data/lib/redis_failover/node.rb +2 -1
- data/lib/redis_failover/node_manager.rb +419 -144
- data/lib/redis_failover/node_snapshot.rb +81 -0
- data/lib/redis_failover/node_strategy.rb +34 -0
- data/lib/redis_failover/node_strategy/consensus.rb +18 -0
- data/lib/redis_failover/node_strategy/majority.rb +18 -0
- data/lib/redis_failover/node_strategy/single.rb +17 -0
- data/lib/redis_failover/node_watcher.rb +13 -13
- data/lib/redis_failover/util.rb +12 -4
- data/lib/redis_failover/version.rb +1 -1
- data/redis_failover.gemspec +1 -1
- data/spec/failover_strategy/latency_spec.rb +41 -0
- data/spec/failover_strategy_spec.rb +17 -0
- data/spec/node_snapshot_spec.rb +30 -0
- data/spec/node_strategy/consensus_spec.rb +30 -0
- data/spec/node_strategy/majority_spec.rb +22 -0
- data/spec/node_strategy/single_spec.rb +22 -0
- data/spec/node_strategy_spec.rb +22 -0
- data/spec/node_watcher_spec.rb +2 -2
- data/spec/spec_helper.rb +2 -1
- data/spec/support/node_manager_stub.rb +29 -8
- metadata +35 -8
@@ -0,0 +1,81 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Represents a snapshot of a particular redis node as seen by all currently running
|
3
|
+
# redis node managers.
|
4
|
+
class NodeSnapshot
|
5
|
+
# @return [String] the redis node
|
6
|
+
attr_reader :node
|
7
|
+
|
8
|
+
# Creates a new instance.
|
9
|
+
#
|
10
|
+
# @param [String] the redis node
|
11
|
+
# @see NodeManager#initialize
|
12
|
+
def initialize(node)
|
13
|
+
@node = node
|
14
|
+
@available = {}
|
15
|
+
@unavailable = []
|
16
|
+
end
|
17
|
+
|
18
|
+
# Declares this node available by the specified node manager.
|
19
|
+
#
|
20
|
+
# @param [String] node_manager the node manager id
|
21
|
+
# @param [Integer] latency the latency
|
22
|
+
def viewable_by(node_manager, latency)
|
23
|
+
@available[node_manager] = latency
|
24
|
+
end
|
25
|
+
|
26
|
+
# Determines if this node is viewable by a node manager.
|
27
|
+
#
|
28
|
+
# @param [String] node_manager the node manager id
|
29
|
+
def viewable_by?(node_manager)
|
30
|
+
@available.key?(node_manager)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Declares this node unavailable by the specified node manager.
|
34
|
+
#
|
35
|
+
# @param [String] node_manager the node manager id
|
36
|
+
def unviewable_by(node_manager)
|
37
|
+
@unavailable << node_manager
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [Integer] the number of node managers saying
|
41
|
+
# this node is available
|
42
|
+
def available_count
|
43
|
+
@available.size
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Integer] the number of node managers saying
|
47
|
+
# this node is unavailable
|
48
|
+
def unavailable_count
|
49
|
+
@unavailable.size
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Integer] the average available latency
|
53
|
+
def avg_latency
|
54
|
+
return if @available.empty?
|
55
|
+
@available.values.inject(0) { |sum, n| sum + n } / @available.size
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Array<String>] all node managers involved in this snapshot
|
59
|
+
def node_managers
|
60
|
+
(@available.keys + @unavailable).uniq
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Boolean] true if all node managers indicated that this
|
64
|
+
# node was viewable
|
65
|
+
def all_available?
|
66
|
+
available_count > 0 && unavailable_count == 0
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Boolean] true if all node managers indicated that this
|
70
|
+
# node was unviewable
|
71
|
+
def all_unavailable?
|
72
|
+
unavailable_count > 0 && available_count == 0
|
73
|
+
end
|
74
|
+
|
75
|
+
# @return [String] a friendly representation of this node snapshot
|
76
|
+
def to_s
|
77
|
+
'Node %s available by %p, unavailable by %p (%d up, %d down)' %
|
78
|
+
[node, @available, @unavailable, available_count, unavailable_count]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Base class for strategies that determine node availability.
|
3
|
+
class NodeStrategy
|
4
|
+
include Util
|
5
|
+
|
6
|
+
# Loads a strategy based on the given name.
|
7
|
+
#
|
8
|
+
# @param [String, Symbol] name the strategy name
|
9
|
+
# @return [Object] a new strategy instance
|
10
|
+
def self.for(name)
|
11
|
+
require "redis_failover/node_strategy/#{name.downcase}"
|
12
|
+
const_get(name.capitalize).new
|
13
|
+
rescue LoadError, NameError
|
14
|
+
raise "Failed to find node strategy: #{name}"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns the state determined by this strategy.
|
18
|
+
#
|
19
|
+
# @param [Node] the node to handle
|
20
|
+
# @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
|
21
|
+
# @return [Symbol] the status
|
22
|
+
def determine_state(node, snapshots)
|
23
|
+
raise NotImplementedError
|
24
|
+
end
|
25
|
+
|
26
|
+
# Logs a node as being unavailable.
|
27
|
+
#
|
28
|
+
# @param [Node] node the node
|
29
|
+
# @param [NodeSnapshot] snapshot the node snapshot
|
30
|
+
def log_unavailable(node, snapshot)
|
31
|
+
logger.info("#{self.class} marking #{node} as unavailable. Snapshot: #{snapshot}")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Consensus strategy only marks the node as unavailable if all members of the
|
4
|
+
# snapshot indicate that the node is down.
|
5
|
+
class Consensus < NodeStrategy
|
6
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
7
|
+
def determine_state(node, snapshots)
|
8
|
+
snapshot = snapshots[node]
|
9
|
+
if snapshot.all_unavailable?
|
10
|
+
log_unavailable(node, snapshot)
|
11
|
+
:unavailable
|
12
|
+
else
|
13
|
+
:available
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Majority strategy only marks the node as unavailable if a majority of the
|
4
|
+
# snapshot indicates that the node is down.
|
5
|
+
class Majority < NodeStrategy
|
6
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
7
|
+
def determine_state(node, snapshots)
|
8
|
+
snapshot = snapshots[node]
|
9
|
+
if snapshot.unavailable_count > snapshot.available_count
|
10
|
+
log_unavailable(node, snapshot)
|
11
|
+
:unavailable
|
12
|
+
else
|
13
|
+
:available
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Marks the node as unavailable if any node manager reports the node as down.
|
4
|
+
class Single < NodeStrategy
|
5
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
6
|
+
def determine_state(node, snapshots)
|
7
|
+
snapshot = snapshots[node]
|
8
|
+
if snapshot.unavailable_count > 0
|
9
|
+
log_unavailable(node, snapshot)
|
10
|
+
:unavailable
|
11
|
+
else
|
12
|
+
:available
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -33,8 +33,12 @@ module RedisFailover
|
|
33
33
|
# Performs a graceful shutdown of this watcher.
|
34
34
|
def shutdown
|
35
35
|
@done = true
|
36
|
-
|
37
|
-
|
36
|
+
begin
|
37
|
+
@node.wakeup
|
38
|
+
rescue
|
39
|
+
# best effort
|
40
|
+
end
|
41
|
+
@monitor_thread.join
|
38
42
|
rescue => ex
|
39
43
|
logger.warn("Failed to gracefully shutdown watcher for #{@node}")
|
40
44
|
end
|
@@ -48,17 +52,12 @@ module RedisFailover
|
|
48
52
|
|
49
53
|
loop do
|
50
54
|
begin
|
51
|
-
|
55
|
+
break if @done
|
52
56
|
sleep(WATCHER_SLEEP_TIME)
|
53
|
-
@node.ping
|
57
|
+
latency = Benchmark.realtime { @node.ping }
|
54
58
|
failures = 0
|
55
|
-
|
56
|
-
|
57
|
-
notify(:syncing)
|
58
|
-
else
|
59
|
-
notify(:available)
|
60
|
-
@node.wait
|
61
|
-
end
|
59
|
+
notify(:available, latency)
|
60
|
+
@node.wait
|
62
61
|
rescue NodeUnavailableError => ex
|
63
62
|
logger.debug("Failed to communicate with node #{@node}: #{ex.inspect}")
|
64
63
|
failures += 1
|
@@ -76,8 +75,9 @@ module RedisFailover
|
|
76
75
|
# Notifies the manager of a node's state.
|
77
76
|
#
|
78
77
|
# @param [Symbol] state the node's state
|
79
|
-
|
80
|
-
|
78
|
+
# @param [Integer] latency an optional latency
|
79
|
+
def notify(state, latency = nil)
|
80
|
+
@manager.notify_state(@node, state, latency)
|
81
81
|
end
|
82
82
|
end
|
83
83
|
end
|
data/lib/redis_failover/util.rb
CHANGED
@@ -51,8 +51,8 @@ module RedisFailover
|
|
51
51
|
# that abstracts the master/slave servers.
|
52
52
|
UNSUPPORTED_OPS = Set[:select, :dbsize].freeze
|
53
53
|
|
54
|
-
# Default node in ZK
|
55
|
-
|
54
|
+
# Default root node in ZK used for redis_failover.
|
55
|
+
DEFAULT_ROOT_ZNODE_PATH = '/redis_failover'.freeze
|
56
56
|
|
57
57
|
# Connectivity errors that the redis (<3.x) client raises.
|
58
58
|
REDIS_ERRORS = Errno.constants.map { |c| Errno.const_get(c) }
|
@@ -61,11 +61,19 @@ module RedisFailover
|
|
61
61
|
REDIS_ERRORS << Redis::BaseError if Redis.const_defined?('BaseError')
|
62
62
|
REDIS_ERRORS.freeze
|
63
63
|
|
64
|
+
# ZK Errors
|
65
|
+
ZK_ERRORS = [
|
66
|
+
ZK::Exceptions::LockAssertionFailedError,
|
67
|
+
ZK::Exceptions::InterruptedSession,
|
68
|
+
ZK::Exceptions::Retryable,
|
69
|
+
Zookeeper::Exceptions::ContinuationTimeoutError
|
70
|
+
].freeze
|
71
|
+
|
64
72
|
# Full set of errors related to connectivity.
|
65
73
|
CONNECTIVITY_ERRORS = [
|
66
74
|
RedisFailover::Error,
|
67
|
-
|
68
|
-
|
75
|
+
REDIS_ERRORS,
|
76
|
+
ZK_ERRORS
|
69
77
|
].flatten.freeze
|
70
78
|
|
71
79
|
# Symbolizes the keys of the specified hash.
|
data/redis_failover.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |gem|
|
|
18
18
|
gem.add_dependency('redis', ['>= 2.2', '< 4'])
|
19
19
|
gem.add_dependency('redis-namespace')
|
20
20
|
gem.add_dependency('multi_json', '~> 1')
|
21
|
-
gem.add_dependency('zk', '
|
21
|
+
gem.add_dependency('zk', ['>= 1.7.2', '< 1.8'])
|
22
22
|
|
23
23
|
gem.add_development_dependency('rake')
|
24
24
|
gem.add_development_dependency('rspec')
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
class FailoverStrategy
|
5
|
+
FailoverStrategy.for(:latency)
|
6
|
+
|
7
|
+
describe Latency do
|
8
|
+
describe '#find_candidate' do
|
9
|
+
it 'returns only candidates seen by all node managers' do
|
10
|
+
strategy = FailoverStrategy.for(:latency)
|
11
|
+
snapshot_1 = NodeSnapshot.new(Node.new(:host => 'localhost', :port => '123'))
|
12
|
+
snapshot_1.viewable_by('nm1', 0)
|
13
|
+
snapshot_1.unviewable_by('nm2')
|
14
|
+
|
15
|
+
snapshot_2 = NodeSnapshot.new(Node.new(:host => 'localhost', :port => '456'))
|
16
|
+
snapshot_2.viewable_by('nm2', 0)
|
17
|
+
snapshot_2.unviewable_by('nm1')
|
18
|
+
|
19
|
+
snapshots = {snapshot_1.node => snapshot_1, snapshot_2.node => snapshot_2}
|
20
|
+
strategy.find_candidate(snapshots).should be_nil
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'returns the candidate with the lowest average latency' do
|
24
|
+
strategy = FailoverStrategy.for(:latency)
|
25
|
+
snapshot_1 = NodeSnapshot.new(Node.new(:host => 'localhost', :port => '123'))
|
26
|
+
snapshot_1.viewable_by('nm1', 5)
|
27
|
+
snapshot_1.viewable_by('nm2', 4)
|
28
|
+
snapshot_1.viewable_by('nm3', 3)
|
29
|
+
|
30
|
+
snapshot_2 = NodeSnapshot.new(Node.new(:host => 'localhost', :port => '456'))
|
31
|
+
snapshot_2.viewable_by('nm1', 1)
|
32
|
+
snapshot_2.viewable_by('nm2', 1)
|
33
|
+
snapshot_2.viewable_by('nm3', 2)
|
34
|
+
|
35
|
+
snapshots = {snapshot_1.node => snapshot_1, snapshot_2.node => snapshot_2}
|
36
|
+
strategy.find_candidate(snapshots).should == snapshot_2.node
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
describe FailoverStrategy do
|
5
|
+
|
6
|
+
describe '.for' do
|
7
|
+
it 'creates a new latency strategy instance' do
|
8
|
+
s = FailoverStrategy.for('latency')
|
9
|
+
s.should be_a RedisFailover::FailoverStrategy::Latency
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'rejects unknown strategies' do
|
13
|
+
expect { FailoverStrategy.for('foobar') }.to raise_error(RuntimeError)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
describe NodeSnapshot do
|
5
|
+
let(:snapshot) { NodeSnapshot.new(Node.new(:host => 'localhost', :port => '123')) }
|
6
|
+
|
7
|
+
describe '#initialize' do
|
8
|
+
it 'creates a new empty snapshot' do
|
9
|
+
snapshot.available_count.should == 0
|
10
|
+
snapshot.unavailable_count.should == 0
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe '#viewable_by' do
|
15
|
+
it 'updates the availability count' do
|
16
|
+
snapshot.viewable_by('nm1', 0)
|
17
|
+
snapshot.viewable_by('nm2', 0)
|
18
|
+
snapshot.available_count.should == 2
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe '#unviewable_by' do
|
23
|
+
it 'updates the unavailability count' do
|
24
|
+
snapshot.unviewable_by('nm1')
|
25
|
+
snapshot.unviewable_by('nm2')
|
26
|
+
snapshot.unavailable_count.should == 2
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
class NodeStrategy
|
5
|
+
NodeStrategy.for(:consensus)
|
6
|
+
|
7
|
+
describe Consensus do
|
8
|
+
let(:node) { Node.new(:host => 'localhost', :port => '123') }
|
9
|
+
let(:snapshot) { NodeSnapshot.new(node) }
|
10
|
+
|
11
|
+
describe '#determine_state' do
|
12
|
+
it 'returns the unavailable state if unavailable by all node managers' do
|
13
|
+
strategy = NodeStrategy.for(:consensus)
|
14
|
+
snapshot.unviewable_by('nm1')
|
15
|
+
snapshot.unviewable_by('nm2')
|
16
|
+
snapshot.unviewable_by('nm3')
|
17
|
+
strategy.determine_state(node, node => snapshot).should == :unavailable
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'returns the available state if unavailable by some node managers' do
|
21
|
+
strategy = NodeStrategy.for(:consensus)
|
22
|
+
snapshot.unviewable_by('nm1')
|
23
|
+
snapshot.unviewable_by('nm2')
|
24
|
+
snapshot.viewable_by('nm3', 0)
|
25
|
+
strategy.determine_state(node, node => snapshot).should == :available
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
class NodeStrategy
|
5
|
+
NodeStrategy.for(:majority)
|
6
|
+
|
7
|
+
describe Majority do
|
8
|
+
let(:node) { Node.new(:host => 'localhost', :port => '123') }
|
9
|
+
let(:snapshot) { NodeSnapshot.new(node) }
|
10
|
+
|
11
|
+
describe '#determine_state' do
|
12
|
+
it 'returns the unavailable state if unavailable by the majority of node managers' do
|
13
|
+
strategy = NodeStrategy.for(:majority)
|
14
|
+
snapshot.viewable_by('nm1', 0)
|
15
|
+
snapshot.unviewable_by('nm2')
|
16
|
+
snapshot.unviewable_by('nm3')
|
17
|
+
strategy.determine_state(node, node => snapshot).should == :unavailable
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
class NodeStrategy
|
5
|
+
NodeStrategy.for(:single)
|
6
|
+
|
7
|
+
describe Single do
|
8
|
+
let(:node) { Node.new(:host => 'localhost', :port => '123') }
|
9
|
+
let(:snapshot) { NodeSnapshot.new(node) }
|
10
|
+
|
11
|
+
describe '#determine_state' do
|
12
|
+
it 'returns the unavailable state if any node manager reports as down' do
|
13
|
+
strategy = NodeStrategy.for(:single)
|
14
|
+
snapshot.unviewable_by('nm1')
|
15
|
+
snapshot.viewable_by('nm2', 0)
|
16
|
+
snapshot.viewable_by('nm3', 0)
|
17
|
+
strategy.determine_state(node, node => snapshot).should == :unavailable
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
describe NodeStrategy do
|
5
|
+
|
6
|
+
describe '.for' do
|
7
|
+
it 'creates a new majority strategy instance' do
|
8
|
+
s = NodeStrategy.for('majority')
|
9
|
+
s.should be_a RedisFailover::NodeStrategy::Majority
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'creates a new consensus strategy instance' do
|
13
|
+
s = NodeStrategy.for('consensus')
|
14
|
+
s.should be_a RedisFailover::NodeStrategy::Consensus
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'rejects unknown strategies' do
|
18
|
+
expect { NodeStrategy.for('foobar') }.to raise_error(RuntimeError)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|