nogara-redis_failover 0.8.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/.travis.yml +5 -0
- data/.yardopts +6 -0
- data/Changes.md +116 -0
- data/Gemfile +2 -0
- data/LICENSE +22 -0
- data/README.md +190 -0
- data/Rakefile +9 -0
- data/bin/redis_node_manager +7 -0
- data/examples/config.yml +14 -0
- data/examples/multiple_environments_config.yml +15 -0
- data/lib/redis_failover.rb +22 -0
- data/lib/redis_failover/cli.rb +119 -0
- data/lib/redis_failover/client.rb +441 -0
- data/lib/redis_failover/errors.rb +47 -0
- data/lib/redis_failover/manual_failover.rb +40 -0
- data/lib/redis_failover/node.rb +190 -0
- data/lib/redis_failover/node_manager.rb +352 -0
- data/lib/redis_failover/node_watcher.rb +79 -0
- data/lib/redis_failover/runner.rb +28 -0
- data/lib/redis_failover/util.rb +83 -0
- data/lib/redis_failover/version.rb +3 -0
- data/misc/redis_failover.png +0 -0
- data/redis_failover.gemspec +26 -0
- data/spec/cli_spec.rb +75 -0
- data/spec/client_spec.rb +100 -0
- data/spec/node_manager_spec.rb +112 -0
- data/spec/node_spec.rb +84 -0
- data/spec/node_watcher_spec.rb +58 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/support/config/multiple_environments.yml +15 -0
- data/spec/support/config/multiple_environments_with_chroot.yml +17 -0
- data/spec/support/config/single_environment.yml +7 -0
- data/spec/support/config/single_environment_with_chroot.yml +8 -0
- data/spec/support/node_manager_stub.rb +65 -0
- data/spec/support/redis_stub.rb +105 -0
- data/spec/util_spec.rb +21 -0
- metadata +210 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Base class for all RedisFailover errors.
|
3
|
+
class Error < StandardError
|
4
|
+
end
|
5
|
+
|
6
|
+
# Raised when a node is specified incorrectly.
|
7
|
+
class InvalidNodeError < Error
|
8
|
+
end
|
9
|
+
|
10
|
+
# Raised when a node changes to an invalid/unknown state.
|
11
|
+
class InvalidNodeStateError < Error
|
12
|
+
def initialize(node, state)
|
13
|
+
super("Invalid state change `#{state}` for node #{node}")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Raised when a node is unavailable (i.e., unreachable via network).
|
18
|
+
class NodeUnavailableError < Error
|
19
|
+
def initialize(node)
|
20
|
+
super("Node: #{node}")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Raised when no master is currently available.
|
25
|
+
class NoMasterError < Error
|
26
|
+
end
|
27
|
+
|
28
|
+
# Raised when no slave is currently available.
|
29
|
+
class NoSlaveError < Error
|
30
|
+
end
|
31
|
+
|
32
|
+
# Raised when a redis server is no longer using the same role
|
33
|
+
# as previously assumed.
|
34
|
+
class InvalidNodeRoleError < Error
|
35
|
+
def initialize(node, assumed, actual)
|
36
|
+
super("Invalid role detected for node #{node}, client thought " +
|
37
|
+
"it was a #{assumed}, but it's now a #{actual}")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Raised when an unsupported redis operation is performed.
|
42
|
+
class UnsupportedOperationError < Error
|
43
|
+
def initialize(operation)
|
44
|
+
super("Operation `#{operation}` is currently unsupported")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Provides manual failover support to a new master.
|
3
|
+
class ManualFailover
|
4
|
+
# Path for manual failover communication.
|
5
|
+
ZNODE_PATH = '/redis_failover_manual'.freeze
|
6
|
+
|
7
|
+
# Denotes that any slave can be used as a candidate for promotion.
|
8
|
+
ANY_SLAVE = "ANY_SLAVE".freeze
|
9
|
+
|
10
|
+
# Creates a new instance.
|
11
|
+
#
|
12
|
+
# @param [ZK] zk the ZooKeeper client
|
13
|
+
# @param [Hash] options the options used for manual failover
|
14
|
+
# @option options [String] :host the host of the failover candidate
|
15
|
+
# @option options [String] :port the port of the failover candidate
|
16
|
+
# @note
|
17
|
+
# If options is empty, a random slave will be used
|
18
|
+
# as a failover candidate.
|
19
|
+
def initialize(zk, options = {})
|
20
|
+
@zk = zk
|
21
|
+
@options = options
|
22
|
+
end
|
23
|
+
|
24
|
+
# Performs a manual failover.
|
25
|
+
def perform
|
26
|
+
create_path
|
27
|
+
node = @options.empty? ? ANY_SLAVE : "#{@options[:host]}:#{@options[:port]}"
|
28
|
+
@zk.set(ZNODE_PATH, node)
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# Creates the znode path used for coordinating manual failovers.
|
34
|
+
def create_path
|
35
|
+
@zk.create(ZNODE_PATH)
|
36
|
+
rescue ZK::Exceptions::NodeExists
|
37
|
+
# best effort
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Represents a redis node (master or slave). Instances of this class
|
3
|
+
# are used by the NodeManager and NodeWatcher to manipulate real redis
|
4
|
+
# servers.
|
5
|
+
class Node
|
6
|
+
include Util
|
7
|
+
|
8
|
+
# Maximum amount of time given for any redis operation to complete.
|
9
|
+
# If a redis operation doesn't complete in the alotted time, a
|
10
|
+
# NodeUnavailableError will be raised.
|
11
|
+
MAX_OP_WAIT_TIME = 5
|
12
|
+
|
13
|
+
# @return [String] the redis server host
|
14
|
+
attr_reader :host
|
15
|
+
|
16
|
+
# @return [Integer] the redis server port
|
17
|
+
attr_reader :port
|
18
|
+
|
19
|
+
# Creates a new instance.
|
20
|
+
#
|
21
|
+
# @param [Hash] options the options used to create the node
|
22
|
+
# @option options [String] :host the host of the redis server
|
23
|
+
# @option options [String] :port the port of the redis server
|
24
|
+
def initialize(options = {})
|
25
|
+
@host = options.fetch(:host) { raise InvalidNodeError, 'missing host'}
|
26
|
+
@port = Integer(options[:port] || 6379)
|
27
|
+
@password = options[:password]
|
28
|
+
end
|
29
|
+
|
30
|
+
# @return [Boolean] true if this node is a master, false otherwise
|
31
|
+
def master?
|
32
|
+
role == 'master'
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Boolean] true if this node is a slave, false otherwise
|
36
|
+
def slave?
|
37
|
+
!master?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Determines if this node is a slave of the given master.
|
41
|
+
#
|
42
|
+
# @param [Node] master the master to check
|
43
|
+
# @return [Boolean] true if slave of master, false otherwise
|
44
|
+
def slave_of?(master)
|
45
|
+
current_master == master
|
46
|
+
end
|
47
|
+
|
48
|
+
# Determines current master of this slave.
|
49
|
+
#
|
50
|
+
# @return [Node] the node representing the master of this slave
|
51
|
+
def current_master
|
52
|
+
info = fetch_info
|
53
|
+
return unless info[:role] == 'slave'
|
54
|
+
Node.new(:host => info[:master_host], :port => info[:master_port].to_i)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Waits until something interesting happens. If the connection
|
58
|
+
# with this node dies, the blpop call will raise an error. If
|
59
|
+
# the blpop call returns without error, then this will be due to
|
60
|
+
# a graceful shutdown signaled by #wakeup or a timeout.
|
61
|
+
def wait
|
62
|
+
perform_operation do |redis|
|
63
|
+
redis.blpop(wait_key, MAX_OP_WAIT_TIME - 3)
|
64
|
+
redis.del(wait_key)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Wakes up this node by pushing a value to its internal
|
69
|
+
# queue used by #wait.
|
70
|
+
def wakeup
|
71
|
+
perform_operation do |redis|
|
72
|
+
redis.lpush(wait_key, '1')
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Makes this node a slave of the given node.
|
77
|
+
#
|
78
|
+
# @param [Node] node the node of which to become a slave
|
79
|
+
def make_slave!(node)
|
80
|
+
perform_operation do |redis|
|
81
|
+
unless slave_of?(node)
|
82
|
+
redis.slaveof(node.host, node.port)
|
83
|
+
logger.info("#{self} is now a slave of #{node}")
|
84
|
+
wakeup
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Makes this node a master node.
|
90
|
+
def make_master!
|
91
|
+
perform_operation do |redis|
|
92
|
+
unless master?
|
93
|
+
redis.slaveof('no', 'one')
|
94
|
+
logger.info("#{self} is now master")
|
95
|
+
wakeup
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return [String] an inspect string for this node
|
101
|
+
def inspect
|
102
|
+
"<RedisFailover::Node #{to_s}>"
|
103
|
+
end
|
104
|
+
|
105
|
+
# @return [String] a friendly string for this node
|
106
|
+
def to_s
|
107
|
+
"#{@host}:#{@port}"
|
108
|
+
end
|
109
|
+
|
110
|
+
# Determines if this node is equal to another node.
|
111
|
+
#
|
112
|
+
# @param [Node] other the other node to compare
|
113
|
+
# @return [Boolean] true if equal, false otherwise
|
114
|
+
def ==(other)
|
115
|
+
return false unless Node === other
|
116
|
+
return true if self.equal?(other)
|
117
|
+
[host, port] == [other.host, other.port]
|
118
|
+
end
|
119
|
+
alias_method :eql?, :==
|
120
|
+
|
121
|
+
|
122
|
+
# @return [Integer] a hash value for this node
|
123
|
+
def hash
|
124
|
+
to_s.hash
|
125
|
+
end
|
126
|
+
|
127
|
+
# Fetches information/stats for this node.
|
128
|
+
#
|
129
|
+
# @return [Hash] the info for this node
|
130
|
+
def fetch_info
|
131
|
+
perform_operation do |redis|
|
132
|
+
symbolize_keys(redis.info)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
alias_method :ping, :fetch_info
|
136
|
+
|
137
|
+
# @return [Boolean] determines if this node prohibits stale reads
|
138
|
+
def prohibits_stale_reads?
|
139
|
+
perform_operation do |redis|
|
140
|
+
redis.config('get', 'slave-serve-stale-data').last == 'no'
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# @return [Boolean] determines if this node is syncing with its master
|
145
|
+
def syncing_with_master?
|
146
|
+
perform_operation do |redis|
|
147
|
+
fetch_info[:master_sync_in_progress] == '1'
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
# @return [String] the current role for this node
|
154
|
+
def role
|
155
|
+
fetch_info[:role]
|
156
|
+
end
|
157
|
+
|
158
|
+
# @return [String] the name of the wait queue for this node
|
159
|
+
def wait_key
|
160
|
+
@wait_key ||= "_redis_failover_#{SecureRandom.hex(32)}"
|
161
|
+
end
|
162
|
+
|
163
|
+
# @return [Redis] a new redis client instance for this node
|
164
|
+
def new_client
|
165
|
+
Redis.new(:host => @host, :password => @password, :port => @port)
|
166
|
+
end
|
167
|
+
|
168
|
+
# Safely performs a redis operation within a given timeout window.
|
169
|
+
#
|
170
|
+
# @yield [Redis] the redis client to use for the operation
|
171
|
+
# @raise [NodeUnavailableError] if node is currently unreachable
|
172
|
+
def perform_operation
|
173
|
+
redis = nil
|
174
|
+
Timeout.timeout(MAX_OP_WAIT_TIME) do
|
175
|
+
redis = new_client
|
176
|
+
yield redis
|
177
|
+
end
|
178
|
+
rescue
|
179
|
+
raise NodeUnavailableError, self, caller
|
180
|
+
ensure
|
181
|
+
if redis
|
182
|
+
begin
|
183
|
+
redis.client.disconnect
|
184
|
+
rescue
|
185
|
+
raise NodeUnavailableError, self, caller
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,352 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# NodeManager manages a list of redis nodes. Upon startup, the NodeManager
|
3
|
+
# will discover the current redis master and slaves. Each redis node is
|
4
|
+
# monitored by a NodeWatcher instance. The NodeWatchers periodically
|
5
|
+
# report the current state of the redis node it's watching to the
|
6
|
+
# NodeManager via an asynchronous queue. The NodeManager processes the
|
7
|
+
# state reports and reacts appropriately by handling stale/dead nodes,
|
8
|
+
# and promoting a new redis master if it sees fit to do so.
|
9
|
+
class NodeManager
|
10
|
+
include Util
|
11
|
+
|
12
|
+
# Name for the znode that handles exclusive locking between multiple
|
13
|
+
# Node Manager processes. Whoever holds the lock will be considered
|
14
|
+
# the "master" Node Manager, and will be responsible for monitoring
|
15
|
+
# the redis nodes. When a Node Manager that holds the lock disappears
|
16
|
+
# or fails, another Node Manager process will grab the lock and
|
17
|
+
# become the master.
|
18
|
+
LOCK_PATH = 'master_node_manager'
|
19
|
+
|
20
|
+
# Number of seconds to wait before retrying bootstrap process.
|
21
|
+
TIMEOUT = 5
|
22
|
+
|
23
|
+
# Creates a new instance.
|
24
|
+
#
|
25
|
+
# @param [Hash] options the options used to initialize the manager
|
26
|
+
# @option options [String] :zkservers comma-separated ZK host:port pairs
|
27
|
+
# @option options [String] :znode_path znode path override for redis nodes
|
28
|
+
# @option options [String] :password password for redis nodes
|
29
|
+
# @option options [Array<String>] :nodes the nodes to manage
|
30
|
+
# @option options [String] :max_failures the max failures for a node
|
31
|
+
def initialize(options)
|
32
|
+
logger.info("Redis Node Manager v#{VERSION} starting (#{RUBY_DESCRIPTION})")
|
33
|
+
@options = options
|
34
|
+
@znode = @options[:znode_path] || Util::DEFAULT_ZNODE_PATH
|
35
|
+
@manual_znode = ManualFailover::ZNODE_PATH
|
36
|
+
@mutex = Mutex.new
|
37
|
+
end
|
38
|
+
|
39
|
+
# Starts the node manager.
|
40
|
+
#
|
41
|
+
# @note This method does not return until the manager terminates.
|
42
|
+
def start
|
43
|
+
@queue = Queue.new
|
44
|
+
@leader = false
|
45
|
+
setup_zk
|
46
|
+
logger.info('Waiting to become master Node Manager ...')
|
47
|
+
@zk.with_lock(LOCK_PATH) do
|
48
|
+
@leader = true
|
49
|
+
logger.info('Acquired master Node Manager lock')
|
50
|
+
discover_nodes
|
51
|
+
initialize_path
|
52
|
+
spawn_watchers
|
53
|
+
handle_state_reports
|
54
|
+
end
|
55
|
+
rescue ZK::Exceptions::InterruptedSession => ex
|
56
|
+
logger.error("ZK error while attempting to manage nodes: #{ex.inspect}")
|
57
|
+
logger.error(ex.backtrace.join("\n"))
|
58
|
+
shutdown
|
59
|
+
sleep(TIMEOUT)
|
60
|
+
retry
|
61
|
+
end
|
62
|
+
|
63
|
+
# Notifies the manager of a state change. Used primarily by
|
64
|
+
# {RedisFailover::NodeWatcher} to inform the manager of watched node states.
|
65
|
+
#
|
66
|
+
# @param [Node] node the node
|
67
|
+
# @param [Symbol] state the state
|
68
|
+
def notify_state(node, state)
|
69
|
+
@queue << [node, state]
|
70
|
+
end
|
71
|
+
|
72
|
+
# Performs a graceful shutdown of the manager.
|
73
|
+
def shutdown
|
74
|
+
@queue.clear
|
75
|
+
@queue << nil
|
76
|
+
@watchers.each(&:shutdown) if @watchers
|
77
|
+
@zk.close! if @zk
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
# Configures the ZooKeeper client.
|
83
|
+
def setup_zk
|
84
|
+
@zk.close! if @zk
|
85
|
+
@zk = ZK.new("#{@options[:zkservers]}#{@options[:chroot] || ''}")
|
86
|
+
|
87
|
+
@zk.register(@manual_znode) do |event|
|
88
|
+
@mutex.synchronize do
|
89
|
+
if event.node_changed?
|
90
|
+
schedule_manual_failover
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
@zk.on_connected { @zk.stat(@manual_znode, :watch => true) }
|
96
|
+
@zk.stat(@manual_znode, :watch => true)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Handles periodic state reports from {RedisFailover::NodeWatcher} instances.
|
100
|
+
def handle_state_reports
|
101
|
+
while state_report = @queue.pop
|
102
|
+
begin
|
103
|
+
node, state = state_report
|
104
|
+
case state
|
105
|
+
when :unavailable then handle_unavailable(node)
|
106
|
+
when :available then handle_available(node)
|
107
|
+
when :syncing then handle_syncing(node)
|
108
|
+
when :manual_failover then handle_manual_failover(node)
|
109
|
+
else raise InvalidNodeStateError.new(node, state)
|
110
|
+
end
|
111
|
+
|
112
|
+
# flush current state
|
113
|
+
write_state
|
114
|
+
rescue ZK::Exceptions::InterruptedSession
|
115
|
+
# fail hard if this is a ZK connection-related error
|
116
|
+
raise
|
117
|
+
rescue => ex
|
118
|
+
logger.error("Error handling #{state_report.inspect}: #{ex.inspect}")
|
119
|
+
logger.error(ex.backtrace.join("\n"))
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Handles an unavailable node.
|
125
|
+
#
|
126
|
+
# @param [Node] node the unavailable node
|
127
|
+
def handle_unavailable(node)
|
128
|
+
# no-op if we already know about this node
|
129
|
+
return if @unavailable.include?(node)
|
130
|
+
logger.info("Handling unavailable node: #{node}")
|
131
|
+
|
132
|
+
@unavailable << node
|
133
|
+
# find a new master if this node was a master
|
134
|
+
if node == @master
|
135
|
+
logger.info("Demoting currently unavailable master #{node}.")
|
136
|
+
promote_new_master
|
137
|
+
else
|
138
|
+
@slaves.delete(node)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Handles an available node.
|
143
|
+
#
|
144
|
+
# @param [Node] node the available node
|
145
|
+
def handle_available(node)
|
146
|
+
reconcile(node)
|
147
|
+
|
148
|
+
# no-op if we already know about this node
|
149
|
+
return if @master == node || @slaves.include?(node)
|
150
|
+
logger.info("Handling available node: #{node}")
|
151
|
+
|
152
|
+
if @master
|
153
|
+
# master already exists, make a slave
|
154
|
+
node.make_slave!(@master)
|
155
|
+
@slaves << node
|
156
|
+
else
|
157
|
+
# no master exists, make this the new master
|
158
|
+
promote_new_master(node)
|
159
|
+
end
|
160
|
+
|
161
|
+
@unavailable.delete(node)
|
162
|
+
end
|
163
|
+
|
164
|
+
# Handles a node that is currently syncing.
|
165
|
+
#
|
166
|
+
# @param [Node] node the syncing node
|
167
|
+
def handle_syncing(node)
|
168
|
+
reconcile(node)
|
169
|
+
|
170
|
+
if node.syncing_with_master? && node.prohibits_stale_reads?
|
171
|
+
logger.info("Node #{node} not ready yet, still syncing with master.")
|
172
|
+
force_unavailable_slave(node)
|
173
|
+
return
|
174
|
+
end
|
175
|
+
|
176
|
+
# otherwise, we can use this node
|
177
|
+
handle_available(node)
|
178
|
+
end
|
179
|
+
|
180
|
+
# Handles a manual failover request to the given node.
|
181
|
+
#
|
182
|
+
# @param [Node] node the candidate node for failover
|
183
|
+
def handle_manual_failover(node)
|
184
|
+
# no-op if node to be failed over is already master
|
185
|
+
return if @master == node
|
186
|
+
logger.info("Handling manual failover")
|
187
|
+
|
188
|
+
# make current master a slave, and promote new master
|
189
|
+
@slaves << @master
|
190
|
+
@slaves.delete(node)
|
191
|
+
promote_new_master(node)
|
192
|
+
end
|
193
|
+
|
194
|
+
# Promotes a new master.
|
195
|
+
#
|
196
|
+
# @param [Node] node the optional node to promote
|
197
|
+
# @note if no node is specified, a random slave will be used
|
198
|
+
def promote_new_master(node = nil)
|
199
|
+
delete_path
|
200
|
+
@master = nil
|
201
|
+
|
202
|
+
# make a specific node or slave the new master
|
203
|
+
candidate = node || @slaves.pop
|
204
|
+
unless candidate
|
205
|
+
logger.error('Failed to promote a new master, no candidate available.')
|
206
|
+
return
|
207
|
+
end
|
208
|
+
|
209
|
+
redirect_slaves_to(candidate)
|
210
|
+
candidate.make_master!
|
211
|
+
@master = candidate
|
212
|
+
|
213
|
+
create_path
|
214
|
+
write_state
|
215
|
+
logger.info("Successfully promoted #{candidate} to master.")
|
216
|
+
end
|
217
|
+
|
218
|
+
# Discovers the current master and slave nodes.
|
219
|
+
def discover_nodes
|
220
|
+
@unavailable = []
|
221
|
+
nodes = @options[:nodes].map { |opts| Node.new(opts) }.uniq
|
222
|
+
raise NoMasterError unless @master = find_master(nodes)
|
223
|
+
@slaves = nodes - [@master]
|
224
|
+
logger.info("Managing master (#{@master}) and slaves" +
|
225
|
+
" (#{@slaves.map(&:to_s).join(', ')})")
|
226
|
+
|
227
|
+
# ensure that slaves are correctly pointing to this master
|
228
|
+
redirect_slaves_to(@master)
|
229
|
+
end
|
230
|
+
|
231
|
+
# Spawns the {RedisFailover::NodeWatcher} instances for each managed node.
|
232
|
+
def spawn_watchers
|
233
|
+
@watchers = [@master, @slaves, @unavailable].flatten.map do |node|
|
234
|
+
NodeWatcher.new(self, node, @options[:max_failures] || 3)
|
235
|
+
end
|
236
|
+
@watchers.each(&:watch)
|
237
|
+
end
|
238
|
+
|
239
|
+
# Searches for the master node.
|
240
|
+
#
|
241
|
+
# @param [Array<Node>] nodes the nodes to search
|
242
|
+
# @return [Node] the found master node, nil if not found
|
243
|
+
def find_master(nodes)
|
244
|
+
nodes.find do |node|
|
245
|
+
begin
|
246
|
+
node.master?
|
247
|
+
rescue NodeUnavailableError
|
248
|
+
false
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# Redirects all slaves to the specified node.
|
254
|
+
#
|
255
|
+
# @param [Node] node the node to which slaves are redirected
|
256
|
+
def redirect_slaves_to(node)
|
257
|
+
@slaves.dup.each do |slave|
|
258
|
+
begin
|
259
|
+
slave.make_slave!(node)
|
260
|
+
rescue NodeUnavailableError
|
261
|
+
logger.info("Failed to redirect unreachable slave #{slave} to #{node}")
|
262
|
+
force_unavailable_slave(slave)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
# Forces a slave to be marked as unavailable.
|
268
|
+
#
|
269
|
+
# @param [Node] node the node to force as unavailable
|
270
|
+
def force_unavailable_slave(node)
|
271
|
+
@slaves.delete(node)
|
272
|
+
@unavailable << node unless @unavailable.include?(node)
|
273
|
+
end
|
274
|
+
|
275
|
+
# It's possible that a newly available node may have been restarted
|
276
|
+
# and completely lost its dynamically set run-time role by the node
|
277
|
+
# manager. This method ensures that the node resumes its role as
|
278
|
+
# determined by the manager.
|
279
|
+
#
|
280
|
+
# @param [Node] node the node to reconcile
|
281
|
+
def reconcile(node)
|
282
|
+
return if @master == node && node.master?
|
283
|
+
return if @master && node.slave_of?(@master)
|
284
|
+
|
285
|
+
logger.info("Reconciling node #{node}")
|
286
|
+
if @master == node && !node.master?
|
287
|
+
# we think the node is a master, but the node doesn't
|
288
|
+
node.make_master!
|
289
|
+
return
|
290
|
+
end
|
291
|
+
|
292
|
+
# verify that node is a slave for the current master
|
293
|
+
if @master && !node.slave_of?(@master)
|
294
|
+
node.make_slave!(@master)
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
# @return [Hash] the set of current nodes grouped by category
|
299
|
+
def current_nodes
|
300
|
+
{
|
301
|
+
:master => @master ? @master.to_s : nil,
|
302
|
+
:slaves => @slaves.map(&:to_s),
|
303
|
+
:unavailable => @unavailable.map(&:to_s)
|
304
|
+
}
|
305
|
+
end
|
306
|
+
|
307
|
+
# Deletes the znode path containing the redis nodes.
|
308
|
+
def delete_path
|
309
|
+
@zk.delete(@znode)
|
310
|
+
logger.info("Deleted ZooKeeper node #{@znode}")
|
311
|
+
rescue ZK::Exceptions::NoNode => ex
|
312
|
+
logger.info("Tried to delete missing znode: #{ex.inspect}")
|
313
|
+
end
|
314
|
+
|
315
|
+
# Creates the znode path containing the redis nodes.
|
316
|
+
def create_path
|
317
|
+
unless @zk.exists?(@znode)
|
318
|
+
@zk.create(@znode, encode(current_nodes), :ephemeral => true)
|
319
|
+
logger.info("Created ZooKeeper node #{@znode}")
|
320
|
+
end
|
321
|
+
rescue ZK::Exceptions::NodeExists
|
322
|
+
# best effort
|
323
|
+
end
|
324
|
+
|
325
|
+
# Initializes the znode path containing the redis nodes.
|
326
|
+
def initialize_path
|
327
|
+
create_path
|
328
|
+
write_state
|
329
|
+
end
|
330
|
+
|
331
|
+
# Writes the current redis nodes state to the znode path.
|
332
|
+
def write_state
|
333
|
+
create_path
|
334
|
+
@zk.set(@znode, encode(current_nodes))
|
335
|
+
end
|
336
|
+
|
337
|
+
# Schedules a manual failover to a redis node.
|
338
|
+
def schedule_manual_failover
|
339
|
+
return unless @leader
|
340
|
+
new_master = @zk.get(@manual_znode, :watch => true).first
|
341
|
+
logger.info("Received manual failover request for: #{new_master}")
|
342
|
+
|
343
|
+
node = if new_master == ManualFailover::ANY_SLAVE
|
344
|
+
@slaves.sample
|
345
|
+
else
|
346
|
+
host, port = new_master.split(':', 2)
|
347
|
+
Node.new(:host => host, :port => port, :password => @options[:password])
|
348
|
+
end
|
349
|
+
notify_state(node, :manual_failover) if node
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|