nogara-redis_failover 0.8.9
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/.travis.yml +5 -0
- data/.yardopts +6 -0
- data/Changes.md +116 -0
- data/Gemfile +2 -0
- data/LICENSE +22 -0
- data/README.md +190 -0
- data/Rakefile +9 -0
- data/bin/redis_node_manager +7 -0
- data/examples/config.yml +14 -0
- data/examples/multiple_environments_config.yml +15 -0
- data/lib/redis_failover.rb +22 -0
- data/lib/redis_failover/cli.rb +119 -0
- data/lib/redis_failover/client.rb +441 -0
- data/lib/redis_failover/errors.rb +47 -0
- data/lib/redis_failover/manual_failover.rb +40 -0
- data/lib/redis_failover/node.rb +190 -0
- data/lib/redis_failover/node_manager.rb +352 -0
- data/lib/redis_failover/node_watcher.rb +79 -0
- data/lib/redis_failover/runner.rb +28 -0
- data/lib/redis_failover/util.rb +83 -0
- data/lib/redis_failover/version.rb +3 -0
- data/misc/redis_failover.png +0 -0
- data/redis_failover.gemspec +26 -0
- data/spec/cli_spec.rb +75 -0
- data/spec/client_spec.rb +100 -0
- data/spec/node_manager_spec.rb +112 -0
- data/spec/node_spec.rb +84 -0
- data/spec/node_watcher_spec.rb +58 -0
- data/spec/spec_helper.rb +20 -0
- data/spec/support/config/multiple_environments.yml +15 -0
- data/spec/support/config/multiple_environments_with_chroot.yml +17 -0
- data/spec/support/config/single_environment.yml +7 -0
- data/spec/support/config/single_environment_with_chroot.yml +8 -0
- data/spec/support/node_manager_stub.rb +65 -0
- data/spec/support/redis_stub.rb +105 -0
- data/spec/util_spec.rb +21 -0
- metadata +210 -0
@@ -0,0 +1,47 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Base class for all RedisFailover errors.
|
3
|
+
class Error < StandardError
|
4
|
+
end
|
5
|
+
|
6
|
+
# Raised when a node is specified incorrectly.
|
7
|
+
class InvalidNodeError < Error
|
8
|
+
end
|
9
|
+
|
10
|
+
# Raised when a node changes to an invalid/unknown state.
|
11
|
+
class InvalidNodeStateError < Error
|
12
|
+
def initialize(node, state)
|
13
|
+
super("Invalid state change `#{state}` for node #{node}")
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Raised when a node is unavailable (i.e., unreachable via network).
|
18
|
+
class NodeUnavailableError < Error
|
19
|
+
def initialize(node)
|
20
|
+
super("Node: #{node}")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Raised when no master is currently available.
|
25
|
+
class NoMasterError < Error
|
26
|
+
end
|
27
|
+
|
28
|
+
# Raised when no slave is currently available.
|
29
|
+
class NoSlaveError < Error
|
30
|
+
end
|
31
|
+
|
32
|
+
# Raised when a redis server is no longer using the same role
|
33
|
+
# as previously assumed.
|
34
|
+
class InvalidNodeRoleError < Error
|
35
|
+
def initialize(node, assumed, actual)
|
36
|
+
super("Invalid role detected for node #{node}, client thought " +
|
37
|
+
"it was a #{assumed}, but it's now a #{actual}")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Raised when an unsupported redis operation is performed.
|
42
|
+
class UnsupportedOperationError < Error
|
43
|
+
def initialize(operation)
|
44
|
+
super("Operation `#{operation}` is currently unsupported")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Provides manual failover support to a new master.
|
3
|
+
class ManualFailover
|
4
|
+
# Path for manual failover communication.
|
5
|
+
ZNODE_PATH = '/redis_failover_manual'.freeze
|
6
|
+
|
7
|
+
# Denotes that any slave can be used as a candidate for promotion.
|
8
|
+
ANY_SLAVE = "ANY_SLAVE".freeze
|
9
|
+
|
10
|
+
# Creates a new instance.
|
11
|
+
#
|
12
|
+
# @param [ZK] zk the ZooKeeper client
|
13
|
+
# @param [Hash] options the options used for manual failover
|
14
|
+
# @option options [String] :host the host of the failover candidate
|
15
|
+
# @option options [String] :port the port of the failover candidate
|
16
|
+
# @note
|
17
|
+
# If options is empty, a random slave will be used
|
18
|
+
# as a failover candidate.
|
19
|
+
def initialize(zk, options = {})
|
20
|
+
@zk = zk
|
21
|
+
@options = options
|
22
|
+
end
|
23
|
+
|
24
|
+
# Performs a manual failover.
|
25
|
+
def perform
|
26
|
+
create_path
|
27
|
+
node = @options.empty? ? ANY_SLAVE : "#{@options[:host]}:#{@options[:port]}"
|
28
|
+
@zk.set(ZNODE_PATH, node)
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# Creates the znode path used for coordinating manual failovers.
|
34
|
+
def create_path
|
35
|
+
@zk.create(ZNODE_PATH)
|
36
|
+
rescue ZK::Exceptions::NodeExists
|
37
|
+
# best effort
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,190 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# Represents a redis node (master or slave). Instances of this class
|
3
|
+
# are used by the NodeManager and NodeWatcher to manipulate real redis
|
4
|
+
# servers.
|
5
|
+
class Node
|
6
|
+
include Util
|
7
|
+
|
8
|
+
# Maximum amount of time given for any redis operation to complete.
|
9
|
+
# If a redis operation doesn't complete in the alotted time, a
|
10
|
+
# NodeUnavailableError will be raised.
|
11
|
+
MAX_OP_WAIT_TIME = 5
|
12
|
+
|
13
|
+
# @return [String] the redis server host
|
14
|
+
attr_reader :host
|
15
|
+
|
16
|
+
# @return [Integer] the redis server port
|
17
|
+
attr_reader :port
|
18
|
+
|
19
|
+
# Creates a new instance.
|
20
|
+
#
|
21
|
+
# @param [Hash] options the options used to create the node
|
22
|
+
# @option options [String] :host the host of the redis server
|
23
|
+
# @option options [String] :port the port of the redis server
|
24
|
+
def initialize(options = {})
|
25
|
+
@host = options.fetch(:host) { raise InvalidNodeError, 'missing host'}
|
26
|
+
@port = Integer(options[:port] || 6379)
|
27
|
+
@password = options[:password]
|
28
|
+
end
|
29
|
+
|
30
|
+
# @return [Boolean] true if this node is a master, false otherwise
|
31
|
+
def master?
|
32
|
+
role == 'master'
|
33
|
+
end
|
34
|
+
|
35
|
+
# @return [Boolean] true if this node is a slave, false otherwise
|
36
|
+
def slave?
|
37
|
+
!master?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Determines if this node is a slave of the given master.
|
41
|
+
#
|
42
|
+
# @param [Node] master the master to check
|
43
|
+
# @return [Boolean] true if slave of master, false otherwise
|
44
|
+
def slave_of?(master)
|
45
|
+
current_master == master
|
46
|
+
end
|
47
|
+
|
48
|
+
# Determines current master of this slave.
|
49
|
+
#
|
50
|
+
# @return [Node] the node representing the master of this slave
|
51
|
+
def current_master
|
52
|
+
info = fetch_info
|
53
|
+
return unless info[:role] == 'slave'
|
54
|
+
Node.new(:host => info[:master_host], :port => info[:master_port].to_i)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Waits until something interesting happens. If the connection
|
58
|
+
# with this node dies, the blpop call will raise an error. If
|
59
|
+
# the blpop call returns without error, then this will be due to
|
60
|
+
# a graceful shutdown signaled by #wakeup or a timeout.
|
61
|
+
def wait
|
62
|
+
perform_operation do |redis|
|
63
|
+
redis.blpop(wait_key, MAX_OP_WAIT_TIME - 3)
|
64
|
+
redis.del(wait_key)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Wakes up this node by pushing a value to its internal
|
69
|
+
# queue used by #wait.
|
70
|
+
def wakeup
|
71
|
+
perform_operation do |redis|
|
72
|
+
redis.lpush(wait_key, '1')
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Makes this node a slave of the given node.
|
77
|
+
#
|
78
|
+
# @param [Node] node the node of which to become a slave
|
79
|
+
def make_slave!(node)
|
80
|
+
perform_operation do |redis|
|
81
|
+
unless slave_of?(node)
|
82
|
+
redis.slaveof(node.host, node.port)
|
83
|
+
logger.info("#{self} is now a slave of #{node}")
|
84
|
+
wakeup
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# Makes this node a master node.
|
90
|
+
def make_master!
|
91
|
+
perform_operation do |redis|
|
92
|
+
unless master?
|
93
|
+
redis.slaveof('no', 'one')
|
94
|
+
logger.info("#{self} is now master")
|
95
|
+
wakeup
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return [String] an inspect string for this node
|
101
|
+
def inspect
|
102
|
+
"<RedisFailover::Node #{to_s}>"
|
103
|
+
end
|
104
|
+
|
105
|
+
# @return [String] a friendly string for this node
|
106
|
+
def to_s
|
107
|
+
"#{@host}:#{@port}"
|
108
|
+
end
|
109
|
+
|
110
|
+
# Determines if this node is equal to another node.
|
111
|
+
#
|
112
|
+
# @param [Node] other the other node to compare
|
113
|
+
# @return [Boolean] true if equal, false otherwise
|
114
|
+
def ==(other)
|
115
|
+
return false unless Node === other
|
116
|
+
return true if self.equal?(other)
|
117
|
+
[host, port] == [other.host, other.port]
|
118
|
+
end
|
119
|
+
alias_method :eql?, :==
|
120
|
+
|
121
|
+
|
122
|
+
# @return [Integer] a hash value for this node
|
123
|
+
def hash
|
124
|
+
to_s.hash
|
125
|
+
end
|
126
|
+
|
127
|
+
# Fetches information/stats for this node.
|
128
|
+
#
|
129
|
+
# @return [Hash] the info for this node
|
130
|
+
def fetch_info
|
131
|
+
perform_operation do |redis|
|
132
|
+
symbolize_keys(redis.info)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
alias_method :ping, :fetch_info
|
136
|
+
|
137
|
+
# @return [Boolean] determines if this node prohibits stale reads
|
138
|
+
def prohibits_stale_reads?
|
139
|
+
perform_operation do |redis|
|
140
|
+
redis.config('get', 'slave-serve-stale-data').last == 'no'
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
# @return [Boolean] determines if this node is syncing with its master
|
145
|
+
def syncing_with_master?
|
146
|
+
perform_operation do |redis|
|
147
|
+
fetch_info[:master_sync_in_progress] == '1'
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
# @return [String] the current role for this node
|
154
|
+
def role
|
155
|
+
fetch_info[:role]
|
156
|
+
end
|
157
|
+
|
158
|
+
# @return [String] the name of the wait queue for this node
|
159
|
+
def wait_key
|
160
|
+
@wait_key ||= "_redis_failover_#{SecureRandom.hex(32)}"
|
161
|
+
end
|
162
|
+
|
163
|
+
# @return [Redis] a new redis client instance for this node
|
164
|
+
def new_client
|
165
|
+
Redis.new(:host => @host, :password => @password, :port => @port)
|
166
|
+
end
|
167
|
+
|
168
|
+
# Safely performs a redis operation within a given timeout window.
|
169
|
+
#
|
170
|
+
# @yield [Redis] the redis client to use for the operation
|
171
|
+
# @raise [NodeUnavailableError] if node is currently unreachable
|
172
|
+
def perform_operation
|
173
|
+
redis = nil
|
174
|
+
Timeout.timeout(MAX_OP_WAIT_TIME) do
|
175
|
+
redis = new_client
|
176
|
+
yield redis
|
177
|
+
end
|
178
|
+
rescue
|
179
|
+
raise NodeUnavailableError, self, caller
|
180
|
+
ensure
|
181
|
+
if redis
|
182
|
+
begin
|
183
|
+
redis.client.disconnect
|
184
|
+
rescue
|
185
|
+
raise NodeUnavailableError, self, caller
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
@@ -0,0 +1,352 @@
|
|
1
|
+
module RedisFailover
|
2
|
+
# NodeManager manages a list of redis nodes. Upon startup, the NodeManager
|
3
|
+
# will discover the current redis master and slaves. Each redis node is
|
4
|
+
# monitored by a NodeWatcher instance. The NodeWatchers periodically
|
5
|
+
# report the current state of the redis node it's watching to the
|
6
|
+
# NodeManager via an asynchronous queue. The NodeManager processes the
|
7
|
+
# state reports and reacts appropriately by handling stale/dead nodes,
|
8
|
+
# and promoting a new redis master if it sees fit to do so.
|
9
|
+
class NodeManager
|
10
|
+
include Util
|
11
|
+
|
12
|
+
# Name for the znode that handles exclusive locking between multiple
|
13
|
+
# Node Manager processes. Whoever holds the lock will be considered
|
14
|
+
# the "master" Node Manager, and will be responsible for monitoring
|
15
|
+
# the redis nodes. When a Node Manager that holds the lock disappears
|
16
|
+
# or fails, another Node Manager process will grab the lock and
|
17
|
+
# become the master.
|
18
|
+
LOCK_PATH = 'master_node_manager'
|
19
|
+
|
20
|
+
# Number of seconds to wait before retrying bootstrap process.
|
21
|
+
TIMEOUT = 5
|
22
|
+
|
23
|
+
# Creates a new instance.
|
24
|
+
#
|
25
|
+
# @param [Hash] options the options used to initialize the manager
|
26
|
+
# @option options [String] :zkservers comma-separated ZK host:port pairs
|
27
|
+
# @option options [String] :znode_path znode path override for redis nodes
|
28
|
+
# @option options [String] :password password for redis nodes
|
29
|
+
# @option options [Array<String>] :nodes the nodes to manage
|
30
|
+
# @option options [String] :max_failures the max failures for a node
|
31
|
+
def initialize(options)
|
32
|
+
logger.info("Redis Node Manager v#{VERSION} starting (#{RUBY_DESCRIPTION})")
|
33
|
+
@options = options
|
34
|
+
@znode = @options[:znode_path] || Util::DEFAULT_ZNODE_PATH
|
35
|
+
@manual_znode = ManualFailover::ZNODE_PATH
|
36
|
+
@mutex = Mutex.new
|
37
|
+
end
|
38
|
+
|
39
|
+
# Starts the node manager.
|
40
|
+
#
|
41
|
+
# @note This method does not return until the manager terminates.
|
42
|
+
def start
|
43
|
+
@queue = Queue.new
|
44
|
+
@leader = false
|
45
|
+
setup_zk
|
46
|
+
logger.info('Waiting to become master Node Manager ...')
|
47
|
+
@zk.with_lock(LOCK_PATH) do
|
48
|
+
@leader = true
|
49
|
+
logger.info('Acquired master Node Manager lock')
|
50
|
+
discover_nodes
|
51
|
+
initialize_path
|
52
|
+
spawn_watchers
|
53
|
+
handle_state_reports
|
54
|
+
end
|
55
|
+
rescue ZK::Exceptions::InterruptedSession => ex
|
56
|
+
logger.error("ZK error while attempting to manage nodes: #{ex.inspect}")
|
57
|
+
logger.error(ex.backtrace.join("\n"))
|
58
|
+
shutdown
|
59
|
+
sleep(TIMEOUT)
|
60
|
+
retry
|
61
|
+
end
|
62
|
+
|
63
|
+
# Notifies the manager of a state change. Used primarily by
|
64
|
+
# {RedisFailover::NodeWatcher} to inform the manager of watched node states.
|
65
|
+
#
|
66
|
+
# @param [Node] node the node
|
67
|
+
# @param [Symbol] state the state
|
68
|
+
def notify_state(node, state)
|
69
|
+
@queue << [node, state]
|
70
|
+
end
|
71
|
+
|
72
|
+
# Performs a graceful shutdown of the manager.
|
73
|
+
def shutdown
|
74
|
+
@queue.clear
|
75
|
+
@queue << nil
|
76
|
+
@watchers.each(&:shutdown) if @watchers
|
77
|
+
@zk.close! if @zk
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
# Configures the ZooKeeper client.
|
83
|
+
def setup_zk
|
84
|
+
@zk.close! if @zk
|
85
|
+
@zk = ZK.new("#{@options[:zkservers]}#{@options[:chroot] || ''}")
|
86
|
+
|
87
|
+
@zk.register(@manual_znode) do |event|
|
88
|
+
@mutex.synchronize do
|
89
|
+
if event.node_changed?
|
90
|
+
schedule_manual_failover
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
@zk.on_connected { @zk.stat(@manual_znode, :watch => true) }
|
96
|
+
@zk.stat(@manual_znode, :watch => true)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Handles periodic state reports from {RedisFailover::NodeWatcher} instances.
|
100
|
+
def handle_state_reports
|
101
|
+
while state_report = @queue.pop
|
102
|
+
begin
|
103
|
+
node, state = state_report
|
104
|
+
case state
|
105
|
+
when :unavailable then handle_unavailable(node)
|
106
|
+
when :available then handle_available(node)
|
107
|
+
when :syncing then handle_syncing(node)
|
108
|
+
when :manual_failover then handle_manual_failover(node)
|
109
|
+
else raise InvalidNodeStateError.new(node, state)
|
110
|
+
end
|
111
|
+
|
112
|
+
# flush current state
|
113
|
+
write_state
|
114
|
+
rescue ZK::Exceptions::InterruptedSession
|
115
|
+
# fail hard if this is a ZK connection-related error
|
116
|
+
raise
|
117
|
+
rescue => ex
|
118
|
+
logger.error("Error handling #{state_report.inspect}: #{ex.inspect}")
|
119
|
+
logger.error(ex.backtrace.join("\n"))
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Handles an unavailable node.
|
125
|
+
#
|
126
|
+
# @param [Node] node the unavailable node
|
127
|
+
def handle_unavailable(node)
|
128
|
+
# no-op if we already know about this node
|
129
|
+
return if @unavailable.include?(node)
|
130
|
+
logger.info("Handling unavailable node: #{node}")
|
131
|
+
|
132
|
+
@unavailable << node
|
133
|
+
# find a new master if this node was a master
|
134
|
+
if node == @master
|
135
|
+
logger.info("Demoting currently unavailable master #{node}.")
|
136
|
+
promote_new_master
|
137
|
+
else
|
138
|
+
@slaves.delete(node)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Handles an available node.
|
143
|
+
#
|
144
|
+
# @param [Node] node the available node
|
145
|
+
def handle_available(node)
|
146
|
+
reconcile(node)
|
147
|
+
|
148
|
+
# no-op if we already know about this node
|
149
|
+
return if @master == node || @slaves.include?(node)
|
150
|
+
logger.info("Handling available node: #{node}")
|
151
|
+
|
152
|
+
if @master
|
153
|
+
# master already exists, make a slave
|
154
|
+
node.make_slave!(@master)
|
155
|
+
@slaves << node
|
156
|
+
else
|
157
|
+
# no master exists, make this the new master
|
158
|
+
promote_new_master(node)
|
159
|
+
end
|
160
|
+
|
161
|
+
@unavailable.delete(node)
|
162
|
+
end
|
163
|
+
|
164
|
+
# Handles a node that is currently syncing.
|
165
|
+
#
|
166
|
+
# @param [Node] node the syncing node
|
167
|
+
def handle_syncing(node)
|
168
|
+
reconcile(node)
|
169
|
+
|
170
|
+
if node.syncing_with_master? && node.prohibits_stale_reads?
|
171
|
+
logger.info("Node #{node} not ready yet, still syncing with master.")
|
172
|
+
force_unavailable_slave(node)
|
173
|
+
return
|
174
|
+
end
|
175
|
+
|
176
|
+
# otherwise, we can use this node
|
177
|
+
handle_available(node)
|
178
|
+
end
|
179
|
+
|
180
|
+
# Handles a manual failover request to the given node.
|
181
|
+
#
|
182
|
+
# @param [Node] node the candidate node for failover
|
183
|
+
def handle_manual_failover(node)
|
184
|
+
# no-op if node to be failed over is already master
|
185
|
+
return if @master == node
|
186
|
+
logger.info("Handling manual failover")
|
187
|
+
|
188
|
+
# make current master a slave, and promote new master
|
189
|
+
@slaves << @master
|
190
|
+
@slaves.delete(node)
|
191
|
+
promote_new_master(node)
|
192
|
+
end
|
193
|
+
|
194
|
+
# Promotes a new master.
|
195
|
+
#
|
196
|
+
# @param [Node] node the optional node to promote
|
197
|
+
# @note if no node is specified, a random slave will be used
|
198
|
+
def promote_new_master(node = nil)
|
199
|
+
delete_path
|
200
|
+
@master = nil
|
201
|
+
|
202
|
+
# make a specific node or slave the new master
|
203
|
+
candidate = node || @slaves.pop
|
204
|
+
unless candidate
|
205
|
+
logger.error('Failed to promote a new master, no candidate available.')
|
206
|
+
return
|
207
|
+
end
|
208
|
+
|
209
|
+
redirect_slaves_to(candidate)
|
210
|
+
candidate.make_master!
|
211
|
+
@master = candidate
|
212
|
+
|
213
|
+
create_path
|
214
|
+
write_state
|
215
|
+
logger.info("Successfully promoted #{candidate} to master.")
|
216
|
+
end
|
217
|
+
|
218
|
+
# Discovers the current master and slave nodes.
|
219
|
+
def discover_nodes
|
220
|
+
@unavailable = []
|
221
|
+
nodes = @options[:nodes].map { |opts| Node.new(opts) }.uniq
|
222
|
+
raise NoMasterError unless @master = find_master(nodes)
|
223
|
+
@slaves = nodes - [@master]
|
224
|
+
logger.info("Managing master (#{@master}) and slaves" +
|
225
|
+
" (#{@slaves.map(&:to_s).join(', ')})")
|
226
|
+
|
227
|
+
# ensure that slaves are correctly pointing to this master
|
228
|
+
redirect_slaves_to(@master)
|
229
|
+
end
|
230
|
+
|
231
|
+
# Spawns the {RedisFailover::NodeWatcher} instances for each managed node.
|
232
|
+
def spawn_watchers
|
233
|
+
@watchers = [@master, @slaves, @unavailable].flatten.map do |node|
|
234
|
+
NodeWatcher.new(self, node, @options[:max_failures] || 3)
|
235
|
+
end
|
236
|
+
@watchers.each(&:watch)
|
237
|
+
end
|
238
|
+
|
239
|
+
# Searches for the master node.
|
240
|
+
#
|
241
|
+
# @param [Array<Node>] nodes the nodes to search
|
242
|
+
# @return [Node] the found master node, nil if not found
|
243
|
+
def find_master(nodes)
|
244
|
+
nodes.find do |node|
|
245
|
+
begin
|
246
|
+
node.master?
|
247
|
+
rescue NodeUnavailableError
|
248
|
+
false
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# Redirects all slaves to the specified node.
|
254
|
+
#
|
255
|
+
# @param [Node] node the node to which slaves are redirected
|
256
|
+
def redirect_slaves_to(node)
|
257
|
+
@slaves.dup.each do |slave|
|
258
|
+
begin
|
259
|
+
slave.make_slave!(node)
|
260
|
+
rescue NodeUnavailableError
|
261
|
+
logger.info("Failed to redirect unreachable slave #{slave} to #{node}")
|
262
|
+
force_unavailable_slave(slave)
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
# Forces a slave to be marked as unavailable.
|
268
|
+
#
|
269
|
+
# @param [Node] node the node to force as unavailable
|
270
|
+
def force_unavailable_slave(node)
|
271
|
+
@slaves.delete(node)
|
272
|
+
@unavailable << node unless @unavailable.include?(node)
|
273
|
+
end
|
274
|
+
|
275
|
+
# It's possible that a newly available node may have been restarted
|
276
|
+
# and completely lost its dynamically set run-time role by the node
|
277
|
+
# manager. This method ensures that the node resumes its role as
|
278
|
+
# determined by the manager.
|
279
|
+
#
|
280
|
+
# @param [Node] node the node to reconcile
|
281
|
+
def reconcile(node)
|
282
|
+
return if @master == node && node.master?
|
283
|
+
return if @master && node.slave_of?(@master)
|
284
|
+
|
285
|
+
logger.info("Reconciling node #{node}")
|
286
|
+
if @master == node && !node.master?
|
287
|
+
# we think the node is a master, but the node doesn't
|
288
|
+
node.make_master!
|
289
|
+
return
|
290
|
+
end
|
291
|
+
|
292
|
+
# verify that node is a slave for the current master
|
293
|
+
if @master && !node.slave_of?(@master)
|
294
|
+
node.make_slave!(@master)
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
# @return [Hash] the set of current nodes grouped by category
|
299
|
+
def current_nodes
|
300
|
+
{
|
301
|
+
:master => @master ? @master.to_s : nil,
|
302
|
+
:slaves => @slaves.map(&:to_s),
|
303
|
+
:unavailable => @unavailable.map(&:to_s)
|
304
|
+
}
|
305
|
+
end
|
306
|
+
|
307
|
+
# Deletes the znode path containing the redis nodes.
|
308
|
+
def delete_path
|
309
|
+
@zk.delete(@znode)
|
310
|
+
logger.info("Deleted ZooKeeper node #{@znode}")
|
311
|
+
rescue ZK::Exceptions::NoNode => ex
|
312
|
+
logger.info("Tried to delete missing znode: #{ex.inspect}")
|
313
|
+
end
|
314
|
+
|
315
|
+
# Creates the znode path containing the redis nodes.
|
316
|
+
def create_path
|
317
|
+
unless @zk.exists?(@znode)
|
318
|
+
@zk.create(@znode, encode(current_nodes), :ephemeral => true)
|
319
|
+
logger.info("Created ZooKeeper node #{@znode}")
|
320
|
+
end
|
321
|
+
rescue ZK::Exceptions::NodeExists
|
322
|
+
# best effort
|
323
|
+
end
|
324
|
+
|
325
|
+
# Initializes the znode path containing the redis nodes.
|
326
|
+
def initialize_path
|
327
|
+
create_path
|
328
|
+
write_state
|
329
|
+
end
|
330
|
+
|
331
|
+
# Writes the current redis nodes state to the znode path.
|
332
|
+
def write_state
|
333
|
+
create_path
|
334
|
+
@zk.set(@znode, encode(current_nodes))
|
335
|
+
end
|
336
|
+
|
337
|
+
# Schedules a manual failover to a redis node.
|
338
|
+
def schedule_manual_failover
|
339
|
+
return unless @leader
|
340
|
+
new_master = @zk.get(@manual_znode, :watch => true).first
|
341
|
+
logger.info("Received manual failover request for: #{new_master}")
|
342
|
+
|
343
|
+
node = if new_master == ManualFailover::ANY_SLAVE
|
344
|
+
@slaves.sample
|
345
|
+
else
|
346
|
+
host, port = new_master.split(':', 2)
|
347
|
+
Node.new(:host => host, :port => port, :password => @options[:password])
|
348
|
+
end
|
349
|
+
notify_state(node, :manual_failover) if node
|
350
|
+
end
|
351
|
+
end
|
352
|
+
end
|