spbtv_redis_failover 1.0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.travis.yml +7 -0
- data/.yardopts +6 -0
- data/Changes.md +191 -0
- data/Gemfile +2 -0
- data/LICENSE +22 -0
- data/README.md +240 -0
- data/Rakefile +9 -0
- data/bin/redis_node_manager +7 -0
- data/examples/config.yml +17 -0
- data/examples/multiple_environments_config.yml +15 -0
- data/lib/redis_failover.rb +25 -0
- data/lib/redis_failover/cli.rb +142 -0
- data/lib/redis_failover/client.rb +517 -0
- data/lib/redis_failover/errors.rb +54 -0
- data/lib/redis_failover/failover_strategy.rb +25 -0
- data/lib/redis_failover/failover_strategy/latency.rb +21 -0
- data/lib/redis_failover/manual_failover.rb +52 -0
- data/lib/redis_failover/node.rb +190 -0
- data/lib/redis_failover/node_manager.rb +741 -0
- data/lib/redis_failover/node_snapshot.rb +81 -0
- data/lib/redis_failover/node_strategy.rb +34 -0
- data/lib/redis_failover/node_strategy/consensus.rb +18 -0
- data/lib/redis_failover/node_strategy/majority.rb +18 -0
- data/lib/redis_failover/node_strategy/single.rb +17 -0
- data/lib/redis_failover/node_watcher.rb +83 -0
- data/lib/redis_failover/runner.rb +27 -0
- data/lib/redis_failover/util.rb +137 -0
- data/lib/redis_failover/version.rb +3 -0
- data/misc/redis_failover.png +0 -0
- data/spbtv_redis_failover.gemspec +26 -0
- data/spec/cli_spec.rb +75 -0
- data/spec/client_spec.rb +153 -0
- data/spec/failover_strategy/latency_spec.rb +41 -0
- data/spec/failover_strategy_spec.rb +17 -0
- data/spec/node_manager_spec.rb +136 -0
- data/spec/node_snapshot_spec.rb +30 -0
- data/spec/node_spec.rb +84 -0
- data/spec/node_strategy/consensus_spec.rb +30 -0
- data/spec/node_strategy/majority_spec.rb +22 -0
- data/spec/node_strategy/single_spec.rb +22 -0
- data/spec/node_strategy_spec.rb +22 -0
- data/spec/node_watcher_spec.rb +58 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/support/config/multiple_environments.yml +15 -0
- data/spec/support/config/multiple_environments_with_chroot.yml +17 -0
- data/spec/support/config/single_environment.yml +7 -0
- data/spec/support/config/single_environment_with_chroot.yml +8 -0
- data/spec/support/node_manager_stub.rb +87 -0
- data/spec/support/redis_stub.rb +105 -0
- data/spec/util_spec.rb +21 -0
- metadata +207 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Represents a snapshot of a particular redis node as seen by all currently running
|
3
|
+
# redis node managers.
|
4
|
+
class NodeSnapshot
|
5
|
+
# @return [String] the redis node
|
6
|
+
attr_reader :node
|
7
|
+
|
8
|
+
# Creates a new instance.
|
9
|
+
#
|
10
|
+
# @param [String] the redis node
|
11
|
+
# @see NodeManager#initialize
|
12
|
+
def initialize(node)
|
13
|
+
@node = node
|
14
|
+
@available = {}
|
15
|
+
@unavailable = []
|
16
|
+
end
|
17
|
+
|
18
|
+
# Declares this node available by the specified node manager.
|
19
|
+
#
|
20
|
+
# @param [String] node_manager the node manager id
|
21
|
+
# @param [Integer] latency the latency
|
22
|
+
def viewable_by(node_manager, latency)
|
23
|
+
@available[node_manager] = latency
|
24
|
+
end
|
25
|
+
|
26
|
+
# Determines if this node is viewable by a node manager.
|
27
|
+
#
|
28
|
+
# @param [String] node_manager the node manager id
|
29
|
+
def viewable_by?(node_manager)
|
30
|
+
@available.key?(node_manager)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Declares this node unavailable by the specified node manager.
|
34
|
+
#
|
35
|
+
# @param [String] node_manager the node manager id
|
36
|
+
def unviewable_by(node_manager)
|
37
|
+
@unavailable << node_manager
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [Integer] the number of node managers saying
|
41
|
+
# this node is available
|
42
|
+
def available_count
|
43
|
+
@available.size
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Integer] the number of node managers saying
|
47
|
+
# this node is unavailable
|
48
|
+
def unavailable_count
|
49
|
+
@unavailable.size
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Integer] the average available latency
|
53
|
+
def avg_latency
|
54
|
+
return if @available.empty?
|
55
|
+
@available.values.inject(0) { |sum, n| sum + n } / @available.size
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Array<String>] all node managers involved in this snapshot
|
59
|
+
def node_managers
|
60
|
+
(@available.keys + @unavailable).uniq
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Boolean] true if all node managers indicated that this
|
64
|
+
# node was viewable
|
65
|
+
def all_available?
|
66
|
+
available_count > 0 && unavailable_count == 0
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Boolean] true if all node managers indicated that this
|
70
|
+
# node was unviewable
|
71
|
+
def all_unavailable?
|
72
|
+
unavailable_count > 0 && available_count == 0
|
73
|
+
end
|
74
|
+
|
75
|
+
# @return [String] a friendly representation of this node snapshot
|
76
|
+
def to_s
|
77
|
+
'Node %s available by %p, unavailable by %p (%d up, %d down)' %
|
78
|
+
[node, @available, @unavailable, available_count, unavailable_count]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Base class for strategies that determine node availability.
|
3
|
+
class NodeStrategy
|
4
|
+
include Util
|
5
|
+
|
6
|
+
# Loads a strategy based on the given name.
|
7
|
+
#
|
8
|
+
# @param [String, Symbol] name the strategy name
|
9
|
+
# @return [Object] a new strategy instance
|
10
|
+
def self.for(name)
|
11
|
+
require "redis_failover/node_strategy/#{name.downcase}"
|
12
|
+
const_get(name.capitalize).new
|
13
|
+
rescue LoadError, NameError
|
14
|
+
raise "Failed to find node strategy: #{name}"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns the state determined by this strategy.
|
18
|
+
#
|
19
|
+
# @param [Node] the node to handle
|
20
|
+
# @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
|
21
|
+
# @return [Symbol] the status
|
22
|
+
def determine_state(node, snapshots)
|
23
|
+
raise NotImplementedError
|
24
|
+
end
|
25
|
+
|
26
|
+
# Logs a node as being unavailable.
|
27
|
+
#
|
28
|
+
# @param [Node] node the node
|
29
|
+
# @param [NodeSnapshot] snapshot the node snapshot
|
30
|
+
def log_unavailable(node, snapshot)
|
31
|
+
logger.info("#{self.class} marking #{node} as unavailable. Snapshot: #{snapshot}")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Consensus strategy only marks the node as unavailable if all members of the
|
4
|
+
# snapshot indicate that the node is down.
|
5
|
+
class Consensus < NodeStrategy
|
6
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
7
|
+
def determine_state(node, snapshots)
|
8
|
+
snapshot = snapshots[node]
|
9
|
+
if snapshot.all_unavailable?
|
10
|
+
log_unavailable(node, snapshot)
|
11
|
+
:unavailable
|
12
|
+
else
|
13
|
+
:available
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Majority strategy only marks the node as unavailable if a majority of the
|
4
|
+
# snapshot indicates that the node is down.
|
5
|
+
class Majority < NodeStrategy
|
6
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
7
|
+
def determine_state(node, snapshots)
|
8
|
+
snapshot = snapshots[node]
|
9
|
+
if snapshot.unavailable_count > snapshot.available_count
|
10
|
+
log_unavailable(node, snapshot)
|
11
|
+
:unavailable
|
12
|
+
else
|
13
|
+
:available
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Marks the node as unavailable if any node manager reports the node as down.
|
4
|
+
class Single < NodeStrategy
|
5
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
6
|
+
def determine_state(node, snapshots)
|
7
|
+
snapshot = snapshots[node]
|
8
|
+
if snapshot.unavailable_count > 0
|
9
|
+
log_unavailable(node, snapshot)
|
10
|
+
:unavailable
|
11
|
+
else
|
12
|
+
:available
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# NodeWatcher periodically monitors a specific redis node for its availability.
|
3
|
+
# NodeWatcher instances periodically report a redis node's current state
|
4
|
+
# to the NodeManager for proper handling.
|
5
|
+
class NodeWatcher
|
6
|
+
include Util
|
7
|
+
|
8
|
+
# Time to sleep before checking on the monitored node's status.
|
9
|
+
WATCHER_SLEEP_TIME = 2
|
10
|
+
|
11
|
+
# Creates a new instance.
|
12
|
+
#
|
13
|
+
# @param [NodeManager] manager the node manager
|
14
|
+
# @param [Node] node the node to watch
|
15
|
+
# @param [Integer] max_failures the max failues before reporting node as down
|
16
|
+
def initialize(manager, node, max_failures)
|
17
|
+
@manager = manager
|
18
|
+
@node = node
|
19
|
+
@max_failures = max_failures
|
20
|
+
@monitor_thread = nil
|
21
|
+
@done = false
|
22
|
+
end
|
23
|
+
|
24
|
+
# Starts the node watcher.
|
25
|
+
#
|
26
|
+
# @note this method returns immediately and causes monitoring to be
|
27
|
+
# performed in a new background thread
|
28
|
+
def watch
|
29
|
+
@monitor_thread ||= Thread.new { monitor_node }
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# Performs a graceful shutdown of this watcher.
|
34
|
+
def shutdown
|
35
|
+
@done = true
|
36
|
+
begin
|
37
|
+
@node.wakeup
|
38
|
+
rescue
|
39
|
+
# best effort
|
40
|
+
end
|
41
|
+
@monitor_thread.join
|
42
|
+
rescue => ex
|
43
|
+
logger.warn("Failed to gracefully shutdown watcher for #{@node}")
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
# Periodically monitors the redis node and reports state changes to
|
49
|
+
# the {RedisFailover::NodeManager}.
|
50
|
+
def monitor_node
|
51
|
+
failures = 0
|
52
|
+
|
53
|
+
loop do
|
54
|
+
begin
|
55
|
+
break if @done
|
56
|
+
sleep(WATCHER_SLEEP_TIME)
|
57
|
+
latency = Benchmark.realtime { @node.ping }
|
58
|
+
failures = 0
|
59
|
+
notify(:available, latency)
|
60
|
+
@node.wait
|
61
|
+
rescue NodeUnavailableError => ex
|
62
|
+
logger.debug("Failed to communicate with node #{@node}: #{ex.inspect}")
|
63
|
+
failures += 1
|
64
|
+
if failures >= @max_failures
|
65
|
+
notify(:unavailable)
|
66
|
+
failures = 0
|
67
|
+
end
|
68
|
+
rescue Exception => ex
|
69
|
+
logger.error("Unexpected error while monitoring node #{@node}: #{ex.inspect}")
|
70
|
+
logger.error(ex.backtrace.join("\n"))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Notifies the manager of a node's state.
|
76
|
+
#
|
77
|
+
# @param [Symbol] state the node's state
|
78
|
+
# @param [Integer] latency an optional latency
|
79
|
+
def notify(state, latency = nil)
|
80
|
+
@manager.notify_state(@node, state, latency)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Runner is responsible for bootstrapping the Node Manager.
|
3
|
+
class Runner
|
4
|
+
# Launches the Node Manager in a background thread.
|
5
|
+
#
|
6
|
+
# @param [Array] options the command-line options
|
7
|
+
# @note this method blocks and does not return until the
|
8
|
+
# Node Manager is gracefully stopped
|
9
|
+
def self.run(options)
|
10
|
+
options = CLI.parse(options)
|
11
|
+
node_manager = NodeManager.new(options)
|
12
|
+
trap_signals(node_manager)
|
13
|
+
node_manager.start
|
14
|
+
end
|
15
|
+
|
16
|
+
# Traps shutdown signals.
|
17
|
+
# @param [NodeManager] node_manager the node manager
|
18
|
+
def self.trap_signals(node_manager)
|
19
|
+
[:INT, :TERM].each do |signal|
|
20
|
+
trap(signal) do
|
21
|
+
node_manager.shutdown
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
private_class_method :trap_signals
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'redis_failover/errors'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
# Common utiilty methods and constants.
|
5
|
+
module Util
|
6
|
+
extend self
|
7
|
+
|
8
|
+
# Redis read operations that are automatically dispatched to slaves. Any
|
9
|
+
# operation not listed here will be dispatched to the master.
|
10
|
+
REDIS_READ_OPS = Set[
|
11
|
+
:echo,
|
12
|
+
:exists,
|
13
|
+
:get,
|
14
|
+
:getbit,
|
15
|
+
:getrange,
|
16
|
+
:hexists,
|
17
|
+
:hget,
|
18
|
+
:hgetall,
|
19
|
+
:hkeys,
|
20
|
+
:hlen,
|
21
|
+
:hmget,
|
22
|
+
:hvals,
|
23
|
+
:keys,
|
24
|
+
:lindex,
|
25
|
+
:llen,
|
26
|
+
:lrange,
|
27
|
+
:mapped_hmget,
|
28
|
+
:mapped_mget,
|
29
|
+
:mget,
|
30
|
+
:scard,
|
31
|
+
:sdiff,
|
32
|
+
:sinter,
|
33
|
+
:sismember,
|
34
|
+
:smembers,
|
35
|
+
:srandmember,
|
36
|
+
:strlen,
|
37
|
+
:sunion,
|
38
|
+
:type,
|
39
|
+
:zcard,
|
40
|
+
:zcount,
|
41
|
+
:zrange,
|
42
|
+
:zrangebyscore,
|
43
|
+
:zrank,
|
44
|
+
:zrevrange,
|
45
|
+
:zrevrangebyscore,
|
46
|
+
:zrevrank,
|
47
|
+
:zscore
|
48
|
+
].freeze
|
49
|
+
|
50
|
+
# Unsupported Redis operations. These don't make sense in a client
|
51
|
+
# that abstracts the master/slave servers.
|
52
|
+
UNSUPPORTED_OPS = Set[:select, :dbsize].freeze
|
53
|
+
|
54
|
+
# Default root node in ZK used for redis_failover.
|
55
|
+
DEFAULT_ROOT_ZNODE_PATH = '/redis_failover'.freeze
|
56
|
+
|
57
|
+
# Connectivity errors that the redis (<3.x) client raises.
|
58
|
+
REDIS_ERRORS = Errno.constants.map { |c| Errno.const_get(c) }
|
59
|
+
|
60
|
+
# Connectivity errors that the redis (>3.x) client raises.
|
61
|
+
REDIS_ERRORS << Redis::BaseError if Redis.const_defined?('BaseError')
|
62
|
+
REDIS_ERRORS.freeze
|
63
|
+
|
64
|
+
# ZK Errors
|
65
|
+
ZK_ERRORS = [
|
66
|
+
ZK::Exceptions::LockAssertionFailedError,
|
67
|
+
ZK::Exceptions::InterruptedSession,
|
68
|
+
ZK::Exceptions::Retryable,
|
69
|
+
Zookeeper::Exceptions::ContinuationTimeoutError
|
70
|
+
].freeze
|
71
|
+
|
72
|
+
# Full set of errors related to connectivity.
|
73
|
+
CONNECTIVITY_ERRORS = [
|
74
|
+
RedisFailover::Error,
|
75
|
+
REDIS_ERRORS,
|
76
|
+
ZK_ERRORS
|
77
|
+
].flatten.freeze
|
78
|
+
|
79
|
+
# Symbolizes the keys of the specified hash.
|
80
|
+
#
|
81
|
+
# @param [Hash] hash a hash for which keys should be symbolized
|
82
|
+
# @return [Hash] a new hash with symbolized keys
|
83
|
+
def symbolize_keys(hash)
|
84
|
+
Hash[hash.map { |k, v| [k.to_sym, v] }]
|
85
|
+
end
|
86
|
+
|
87
|
+
# Determines if two arrays are different.
|
88
|
+
#
|
89
|
+
# @param [Array] ary_a the first array
|
90
|
+
# @param [Array] ary_b the second array
|
91
|
+
# @return [Boolean] true if arrays are different, false otherwise
|
92
|
+
def different?(ary_a, ary_b)
|
93
|
+
((ary_a | ary_b) - (ary_a & ary_b)).size > 0
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [Logger] the logger instance to use
|
97
|
+
def self.logger
|
98
|
+
@logger ||= begin
|
99
|
+
logger = Logger.new(STDOUT)
|
100
|
+
logger.level = Logger::INFO
|
101
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
102
|
+
"#{datetime.utc} RedisFailover #{Process.pid} #{severity}: #{msg}\n"
|
103
|
+
end
|
104
|
+
logger
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Sets a new logger to use.
|
109
|
+
#
|
110
|
+
# @param [Logger] logger a new logger to use
|
111
|
+
def self.logger=(logger)
|
112
|
+
@logger = logger
|
113
|
+
end
|
114
|
+
|
115
|
+
# @return [Logger] the logger instance to use
|
116
|
+
def logger
|
117
|
+
Util.logger
|
118
|
+
end
|
119
|
+
|
120
|
+
# Encodes the specified data in JSON format.
|
121
|
+
#
|
122
|
+
# @param [Object] data the data to encode
|
123
|
+
# @return [String] the JSON-encoded data
|
124
|
+
def encode(data)
|
125
|
+
MultiJson.encode(data)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Decodes the specified JSON data.
|
129
|
+
#
|
130
|
+
# @param [String] data the JSON data to decode
|
131
|
+
# @return [Object] the decoded data
|
132
|
+
def decode(data)
|
133
|
+
return unless data
|
134
|
+
MultiJson.decode(data)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
Binary file
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/redis_failover/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Ryan LeCompte"]
|
6
|
+
gem.email = ["lecompte@gmail.com"]
|
7
|
+
gem.description = %(redis_failover is a ZooKeeper-based automatic master/slave failover solution for Ruby)
|
8
|
+
gem.summary = %(redis_failover is a ZooKeeper-based automatic master/slave failover solution for Ruby)
|
9
|
+
gem.homepage = "http://github.com/ryanlecompte/redis_failover"
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "spbtv_redis_failover"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = RedisFailover::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency('redis', ['>= 2.2', '< 4'])
|
19
|
+
gem.add_dependency('redis-namespace')
|
20
|
+
gem.add_dependency('multi_json', '~> 1')
|
21
|
+
gem.add_dependency('zk', ['>= 1.9', '< 1.10'])
|
22
|
+
|
23
|
+
gem.add_development_dependency('rake', '< 11.0')
|
24
|
+
gem.add_development_dependency('rspec', '< 3.0')
|
25
|
+
gem.add_development_dependency('yard')
|
26
|
+
end
|