spbtv_redis_failover 1.0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.travis.yml +7 -0
- data/.yardopts +6 -0
- data/Changes.md +191 -0
- data/Gemfile +2 -0
- data/LICENSE +22 -0
- data/README.md +240 -0
- data/Rakefile +9 -0
- data/bin/redis_node_manager +7 -0
- data/examples/config.yml +17 -0
- data/examples/multiple_environments_config.yml +15 -0
- data/lib/redis_failover.rb +25 -0
- data/lib/redis_failover/cli.rb +142 -0
- data/lib/redis_failover/client.rb +517 -0
- data/lib/redis_failover/errors.rb +54 -0
- data/lib/redis_failover/failover_strategy.rb +25 -0
- data/lib/redis_failover/failover_strategy/latency.rb +21 -0
- data/lib/redis_failover/manual_failover.rb +52 -0
- data/lib/redis_failover/node.rb +190 -0
- data/lib/redis_failover/node_manager.rb +741 -0
- data/lib/redis_failover/node_snapshot.rb +81 -0
- data/lib/redis_failover/node_strategy.rb +34 -0
- data/lib/redis_failover/node_strategy/consensus.rb +18 -0
- data/lib/redis_failover/node_strategy/majority.rb +18 -0
- data/lib/redis_failover/node_strategy/single.rb +17 -0
- data/lib/redis_failover/node_watcher.rb +83 -0
- data/lib/redis_failover/runner.rb +27 -0
- data/lib/redis_failover/util.rb +137 -0
- data/lib/redis_failover/version.rb +3 -0
- data/misc/redis_failover.png +0 -0
- data/spbtv_redis_failover.gemspec +26 -0
- data/spec/cli_spec.rb +75 -0
- data/spec/client_spec.rb +153 -0
- data/spec/failover_strategy/latency_spec.rb +41 -0
- data/spec/failover_strategy_spec.rb +17 -0
- data/spec/node_manager_spec.rb +136 -0
- data/spec/node_snapshot_spec.rb +30 -0
- data/spec/node_spec.rb +84 -0
- data/spec/node_strategy/consensus_spec.rb +30 -0
- data/spec/node_strategy/majority_spec.rb +22 -0
- data/spec/node_strategy/single_spec.rb +22 -0
- data/spec/node_strategy_spec.rb +22 -0
- data/spec/node_watcher_spec.rb +58 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/support/config/multiple_environments.yml +15 -0
- data/spec/support/config/multiple_environments_with_chroot.yml +17 -0
- data/spec/support/config/single_environment.yml +7 -0
- data/spec/support/config/single_environment_with_chroot.yml +8 -0
- data/spec/support/node_manager_stub.rb +87 -0
- data/spec/support/redis_stub.rb +105 -0
- data/spec/util_spec.rb +21 -0
- metadata +207 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Represents a snapshot of a particular redis node as seen by all currently running
|
3
|
+
# redis node managers.
|
4
|
+
class NodeSnapshot
|
5
|
+
# @return [String] the redis node
|
6
|
+
attr_reader :node
|
7
|
+
|
8
|
+
# Creates a new instance.
|
9
|
+
#
|
10
|
+
# @param [String] the redis node
|
11
|
+
# @see NodeManager#initialize
|
12
|
+
def initialize(node)
|
13
|
+
@node = node
|
14
|
+
@available = {}
|
15
|
+
@unavailable = []
|
16
|
+
end
|
17
|
+
|
18
|
+
# Declares this node available by the specified node manager.
|
19
|
+
#
|
20
|
+
# @param [String] node_manager the node manager id
|
21
|
+
# @param [Integer] latency the latency
|
22
|
+
def viewable_by(node_manager, latency)
|
23
|
+
@available[node_manager] = latency
|
24
|
+
end
|
25
|
+
|
26
|
+
# Determines if this node is viewable by a node manager.
|
27
|
+
#
|
28
|
+
# @param [String] node_manager the node manager id
|
29
|
+
def viewable_by?(node_manager)
|
30
|
+
@available.key?(node_manager)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Declares this node unavailable by the specified node manager.
|
34
|
+
#
|
35
|
+
# @param [String] node_manager the node manager id
|
36
|
+
def unviewable_by(node_manager)
|
37
|
+
@unavailable << node_manager
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return [Integer] the number of node managers saying
|
41
|
+
# this node is available
|
42
|
+
def available_count
|
43
|
+
@available.size
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Integer] the number of node managers saying
|
47
|
+
# this node is unavailable
|
48
|
+
def unavailable_count
|
49
|
+
@unavailable.size
|
50
|
+
end
|
51
|
+
|
52
|
+
# @return [Integer] the average available latency
|
53
|
+
def avg_latency
|
54
|
+
return if @available.empty?
|
55
|
+
@available.values.inject(0) { |sum, n| sum + n } / @available.size
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Array<String>] all node managers involved in this snapshot
|
59
|
+
def node_managers
|
60
|
+
(@available.keys + @unavailable).uniq
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Boolean] true if all node managers indicated that this
|
64
|
+
# node was viewable
|
65
|
+
def all_available?
|
66
|
+
available_count > 0 && unavailable_count == 0
|
67
|
+
end
|
68
|
+
|
69
|
+
# @return [Boolean] true if all node managers indicated that this
|
70
|
+
# node was unviewable
|
71
|
+
def all_unavailable?
|
72
|
+
unavailable_count > 0 && available_count == 0
|
73
|
+
end
|
74
|
+
|
75
|
+
# @return [String] a friendly representation of this node snapshot
|
76
|
+
def to_s
|
77
|
+
'Node %s available by %p, unavailable by %p (%d up, %d down)' %
|
78
|
+
[node, @available, @unavailable, available_count, unavailable_count]
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Base class for strategies that determine node availability.
|
3
|
+
class NodeStrategy
|
4
|
+
include Util
|
5
|
+
|
6
|
+
# Loads a strategy based on the given name.
|
7
|
+
#
|
8
|
+
# @param [String, Symbol] name the strategy name
|
9
|
+
# @return [Object] a new strategy instance
|
10
|
+
def self.for(name)
|
11
|
+
require "redis_failover/node_strategy/#{name.downcase}"
|
12
|
+
const_get(name.capitalize).new
|
13
|
+
rescue LoadError, NameError
|
14
|
+
raise "Failed to find node strategy: #{name}"
|
15
|
+
end
|
16
|
+
|
17
|
+
# Returns the state determined by this strategy.
|
18
|
+
#
|
19
|
+
# @param [Node] the node to handle
|
20
|
+
# @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
|
21
|
+
# @return [Symbol] the status
|
22
|
+
def determine_state(node, snapshots)
|
23
|
+
raise NotImplementedError
|
24
|
+
end
|
25
|
+
|
26
|
+
# Logs a node as being unavailable.
|
27
|
+
#
|
28
|
+
# @param [Node] node the node
|
29
|
+
# @param [NodeSnapshot] snapshot the node snapshot
|
30
|
+
def log_unavailable(node, snapshot)
|
31
|
+
logger.info("#{self.class} marking #{node} as unavailable. Snapshot: #{snapshot}")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Consensus strategy only marks the node as unavailable if all members of the
|
4
|
+
# snapshot indicate that the node is down.
|
5
|
+
class Consensus < NodeStrategy
|
6
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
7
|
+
def determine_state(node, snapshots)
|
8
|
+
snapshot = snapshots[node]
|
9
|
+
if snapshot.all_unavailable?
|
10
|
+
log_unavailable(node, snapshot)
|
11
|
+
:unavailable
|
12
|
+
else
|
13
|
+
:available
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Majority strategy only marks the node as unavailable if a majority of the
|
4
|
+
# snapshot indicates that the node is down.
|
5
|
+
class Majority < NodeStrategy
|
6
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
7
|
+
def determine_state(node, snapshots)
|
8
|
+
snapshot = snapshots[node]
|
9
|
+
if snapshot.unavailable_count > snapshot.available_count
|
10
|
+
log_unavailable(node, snapshot)
|
11
|
+
:unavailable
|
12
|
+
else
|
13
|
+
:available
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
class NodeStrategy
|
3
|
+
# Marks the node as unavailable if any node manager reports the node as down.
|
4
|
+
class Single < NodeStrategy
|
5
|
+
# @see RedisFailover::NodeStrategy#determine_state
|
6
|
+
def determine_state(node, snapshots)
|
7
|
+
snapshot = snapshots[node]
|
8
|
+
if snapshot.unavailable_count > 0
|
9
|
+
log_unavailable(node, snapshot)
|
10
|
+
:unavailable
|
11
|
+
else
|
12
|
+
:available
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# NodeWatcher periodically monitors a specific redis node for its availability.
|
3
|
+
# NodeWatcher instances periodically report a redis node's current state
|
4
|
+
# to the NodeManager for proper handling.
|
5
|
+
class NodeWatcher
|
6
|
+
include Util
|
7
|
+
|
8
|
+
# Time to sleep before checking on the monitored node's status.
|
9
|
+
WATCHER_SLEEP_TIME = 2
|
10
|
+
|
11
|
+
# Creates a new instance.
|
12
|
+
#
|
13
|
+
# @param [NodeManager] manager the node manager
|
14
|
+
# @param [Node] node the node to watch
|
15
|
+
# @param [Integer] max_failures the max failues before reporting node as down
|
16
|
+
def initialize(manager, node, max_failures)
|
17
|
+
@manager = manager
|
18
|
+
@node = node
|
19
|
+
@max_failures = max_failures
|
20
|
+
@monitor_thread = nil
|
21
|
+
@done = false
|
22
|
+
end
|
23
|
+
|
24
|
+
# Starts the node watcher.
|
25
|
+
#
|
26
|
+
# @note this method returns immediately and causes monitoring to be
|
27
|
+
# performed in a new background thread
|
28
|
+
def watch
|
29
|
+
@monitor_thread ||= Thread.new { monitor_node }
|
30
|
+
self
|
31
|
+
end
|
32
|
+
|
33
|
+
# Performs a graceful shutdown of this watcher.
|
34
|
+
def shutdown
|
35
|
+
@done = true
|
36
|
+
begin
|
37
|
+
@node.wakeup
|
38
|
+
rescue
|
39
|
+
# best effort
|
40
|
+
end
|
41
|
+
@monitor_thread.join
|
42
|
+
rescue => ex
|
43
|
+
logger.warn("Failed to gracefully shutdown watcher for #{@node}")
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
# Periodically monitors the redis node and reports state changes to
|
49
|
+
# the {RedisFailover::NodeManager}.
|
50
|
+
def monitor_node
|
51
|
+
failures = 0
|
52
|
+
|
53
|
+
loop do
|
54
|
+
begin
|
55
|
+
break if @done
|
56
|
+
sleep(WATCHER_SLEEP_TIME)
|
57
|
+
latency = Benchmark.realtime { @node.ping }
|
58
|
+
failures = 0
|
59
|
+
notify(:available, latency)
|
60
|
+
@node.wait
|
61
|
+
rescue NodeUnavailableError => ex
|
62
|
+
logger.debug("Failed to communicate with node #{@node}: #{ex.inspect}")
|
63
|
+
failures += 1
|
64
|
+
if failures >= @max_failures
|
65
|
+
notify(:unavailable)
|
66
|
+
failures = 0
|
67
|
+
end
|
68
|
+
rescue Exception => ex
|
69
|
+
logger.error("Unexpected error while monitoring node #{@node}: #{ex.inspect}")
|
70
|
+
logger.error(ex.backtrace.join("\n"))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Notifies the manager of a node's state.
|
76
|
+
#
|
77
|
+
# @param [Symbol] state the node's state
|
78
|
+
# @param [Integer] latency an optional latency
|
79
|
+
def notify(state, latency = nil)
|
80
|
+
@manager.notify_state(@node, state, latency)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Runner is responsible for bootstrapping the Node Manager.
|
3
|
+
class Runner
|
4
|
+
# Launches the Node Manager in a background thread.
|
5
|
+
#
|
6
|
+
# @param [Array] options the command-line options
|
7
|
+
# @note this method blocks and does not return until the
|
8
|
+
# Node Manager is gracefully stopped
|
9
|
+
def self.run(options)
|
10
|
+
options = CLI.parse(options)
|
11
|
+
node_manager = NodeManager.new(options)
|
12
|
+
trap_signals(node_manager)
|
13
|
+
node_manager.start
|
14
|
+
end
|
15
|
+
|
16
|
+
# Traps shutdown signals.
|
17
|
+
# @param [NodeManager] node_manager the node manager
|
18
|
+
def self.trap_signals(node_manager)
|
19
|
+
[:INT, :TERM].each do |signal|
|
20
|
+
trap(signal) do
|
21
|
+
node_manager.shutdown
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
private_class_method :trap_signals
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'redis_failover/errors'
|
2
|
+
|
3
|
+
module RedisFailover
|
4
|
+
# Common utiilty methods and constants.
|
5
|
+
module Util
|
6
|
+
extend self
|
7
|
+
|
8
|
+
# Redis read operations that are automatically dispatched to slaves. Any
|
9
|
+
# operation not listed here will be dispatched to the master.
|
10
|
+
REDIS_READ_OPS = Set[
|
11
|
+
:echo,
|
12
|
+
:exists,
|
13
|
+
:get,
|
14
|
+
:getbit,
|
15
|
+
:getrange,
|
16
|
+
:hexists,
|
17
|
+
:hget,
|
18
|
+
:hgetall,
|
19
|
+
:hkeys,
|
20
|
+
:hlen,
|
21
|
+
:hmget,
|
22
|
+
:hvals,
|
23
|
+
:keys,
|
24
|
+
:lindex,
|
25
|
+
:llen,
|
26
|
+
:lrange,
|
27
|
+
:mapped_hmget,
|
28
|
+
:mapped_mget,
|
29
|
+
:mget,
|
30
|
+
:scard,
|
31
|
+
:sdiff,
|
32
|
+
:sinter,
|
33
|
+
:sismember,
|
34
|
+
:smembers,
|
35
|
+
:srandmember,
|
36
|
+
:strlen,
|
37
|
+
:sunion,
|
38
|
+
:type,
|
39
|
+
:zcard,
|
40
|
+
:zcount,
|
41
|
+
:zrange,
|
42
|
+
:zrangebyscore,
|
43
|
+
:zrank,
|
44
|
+
:zrevrange,
|
45
|
+
:zrevrangebyscore,
|
46
|
+
:zrevrank,
|
47
|
+
:zscore
|
48
|
+
].freeze
|
49
|
+
|
50
|
+
# Unsupported Redis operations. These don't make sense in a client
|
51
|
+
# that abstracts the master/slave servers.
|
52
|
+
UNSUPPORTED_OPS = Set[:select, :dbsize].freeze
|
53
|
+
|
54
|
+
# Default root node in ZK used for redis_failover.
|
55
|
+
DEFAULT_ROOT_ZNODE_PATH = '/redis_failover'.freeze
|
56
|
+
|
57
|
+
# Connectivity errors that the redis (<3.x) client raises.
|
58
|
+
REDIS_ERRORS = Errno.constants.map { |c| Errno.const_get(c) }
|
59
|
+
|
60
|
+
# Connectivity errors that the redis (>3.x) client raises.
|
61
|
+
REDIS_ERRORS << Redis::BaseError if Redis.const_defined?('BaseError')
|
62
|
+
REDIS_ERRORS.freeze
|
63
|
+
|
64
|
+
# ZK Errors
|
65
|
+
ZK_ERRORS = [
|
66
|
+
ZK::Exceptions::LockAssertionFailedError,
|
67
|
+
ZK::Exceptions::InterruptedSession,
|
68
|
+
ZK::Exceptions::Retryable,
|
69
|
+
Zookeeper::Exceptions::ContinuationTimeoutError
|
70
|
+
].freeze
|
71
|
+
|
72
|
+
# Full set of errors related to connectivity.
|
73
|
+
CONNECTIVITY_ERRORS = [
|
74
|
+
RedisFailover::Error,
|
75
|
+
REDIS_ERRORS,
|
76
|
+
ZK_ERRORS
|
77
|
+
].flatten.freeze
|
78
|
+
|
79
|
+
# Symbolizes the keys of the specified hash.
|
80
|
+
#
|
81
|
+
# @param [Hash] hash a hash for which keys should be symbolized
|
82
|
+
# @return [Hash] a new hash with symbolized keys
|
83
|
+
def symbolize_keys(hash)
|
84
|
+
Hash[hash.map { |k, v| [k.to_sym, v] }]
|
85
|
+
end
|
86
|
+
|
87
|
+
# Determines if two arrays are different.
|
88
|
+
#
|
89
|
+
# @param [Array] ary_a the first array
|
90
|
+
# @param [Array] ary_b the second array
|
91
|
+
# @return [Boolean] true if arrays are different, false otherwise
|
92
|
+
def different?(ary_a, ary_b)
|
93
|
+
((ary_a | ary_b) - (ary_a & ary_b)).size > 0
|
94
|
+
end
|
95
|
+
|
96
|
+
# @return [Logger] the logger instance to use
|
97
|
+
def self.logger
|
98
|
+
@logger ||= begin
|
99
|
+
logger = Logger.new(STDOUT)
|
100
|
+
logger.level = Logger::INFO
|
101
|
+
logger.formatter = proc do |severity, datetime, progname, msg|
|
102
|
+
"#{datetime.utc} RedisFailover #{Process.pid} #{severity}: #{msg}\n"
|
103
|
+
end
|
104
|
+
logger
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Sets a new logger to use.
|
109
|
+
#
|
110
|
+
# @param [Logger] logger a new logger to use
|
111
|
+
def self.logger=(logger)
|
112
|
+
@logger = logger
|
113
|
+
end
|
114
|
+
|
115
|
+
# @return [Logger] the logger instance to use
|
116
|
+
def logger
|
117
|
+
Util.logger
|
118
|
+
end
|
119
|
+
|
120
|
+
# Encodes the specified data in JSON format.
|
121
|
+
#
|
122
|
+
# @param [Object] data the data to encode
|
123
|
+
# @return [String] the JSON-encoded data
|
124
|
+
def encode(data)
|
125
|
+
MultiJson.encode(data)
|
126
|
+
end
|
127
|
+
|
128
|
+
# Decodes the specified JSON data.
|
129
|
+
#
|
130
|
+
# @param [String] data the JSON data to decode
|
131
|
+
# @return [Object] the decoded data
|
132
|
+
def decode(data)
|
133
|
+
return unless data
|
134
|
+
MultiJson.decode(data)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
Binary file
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/redis_failover/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Ryan LeCompte"]
|
6
|
+
gem.email = ["lecompte@gmail.com"]
|
7
|
+
gem.description = %(redis_failover is a ZooKeeper-based automatic master/slave failover solution for Ruby)
|
8
|
+
gem.summary = %(redis_failover is a ZooKeeper-based automatic master/slave failover solution for Ruby)
|
9
|
+
gem.homepage = "http://github.com/ryanlecompte/redis_failover"
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "spbtv_redis_failover"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = RedisFailover::VERSION
|
17
|
+
|
18
|
+
gem.add_dependency('redis', ['>= 2.2', '< 4'])
|
19
|
+
gem.add_dependency('redis-namespace')
|
20
|
+
gem.add_dependency('multi_json', '~> 1')
|
21
|
+
gem.add_dependency('zk', ['>= 1.9', '< 1.10'])
|
22
|
+
|
23
|
+
gem.add_development_dependency('rake', '< 11.0')
|
24
|
+
gem.add_development_dependency('rspec', '< 3.0')
|
25
|
+
gem.add_development_dependency('yard')
|
26
|
+
end
|