spbtv_redis_failover 1.0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/.travis.yml +7 -0
  4. data/.yardopts +6 -0
  5. data/Changes.md +191 -0
  6. data/Gemfile +2 -0
  7. data/LICENSE +22 -0
  8. data/README.md +240 -0
  9. data/Rakefile +9 -0
  10. data/bin/redis_node_manager +7 -0
  11. data/examples/config.yml +17 -0
  12. data/examples/multiple_environments_config.yml +15 -0
  13. data/lib/redis_failover.rb +25 -0
  14. data/lib/redis_failover/cli.rb +142 -0
  15. data/lib/redis_failover/client.rb +517 -0
  16. data/lib/redis_failover/errors.rb +54 -0
  17. data/lib/redis_failover/failover_strategy.rb +25 -0
  18. data/lib/redis_failover/failover_strategy/latency.rb +21 -0
  19. data/lib/redis_failover/manual_failover.rb +52 -0
  20. data/lib/redis_failover/node.rb +190 -0
  21. data/lib/redis_failover/node_manager.rb +741 -0
  22. data/lib/redis_failover/node_snapshot.rb +81 -0
  23. data/lib/redis_failover/node_strategy.rb +34 -0
  24. data/lib/redis_failover/node_strategy/consensus.rb +18 -0
  25. data/lib/redis_failover/node_strategy/majority.rb +18 -0
  26. data/lib/redis_failover/node_strategy/single.rb +17 -0
  27. data/lib/redis_failover/node_watcher.rb +83 -0
  28. data/lib/redis_failover/runner.rb +27 -0
  29. data/lib/redis_failover/util.rb +137 -0
  30. data/lib/redis_failover/version.rb +3 -0
  31. data/misc/redis_failover.png +0 -0
  32. data/spbtv_redis_failover.gemspec +26 -0
  33. data/spec/cli_spec.rb +75 -0
  34. data/spec/client_spec.rb +153 -0
  35. data/spec/failover_strategy/latency_spec.rb +41 -0
  36. data/spec/failover_strategy_spec.rb +17 -0
  37. data/spec/node_manager_spec.rb +136 -0
  38. data/spec/node_snapshot_spec.rb +30 -0
  39. data/spec/node_spec.rb +84 -0
  40. data/spec/node_strategy/consensus_spec.rb +30 -0
  41. data/spec/node_strategy/majority_spec.rb +22 -0
  42. data/spec/node_strategy/single_spec.rb +22 -0
  43. data/spec/node_strategy_spec.rb +22 -0
  44. data/spec/node_watcher_spec.rb +58 -0
  45. data/spec/spec_helper.rb +21 -0
  46. data/spec/support/config/multiple_environments.yml +15 -0
  47. data/spec/support/config/multiple_environments_with_chroot.yml +17 -0
  48. data/spec/support/config/single_environment.yml +7 -0
  49. data/spec/support/config/single_environment_with_chroot.yml +8 -0
  50. data/spec/support/node_manager_stub.rb +87 -0
  51. data/spec/support/redis_stub.rb +105 -0
  52. data/spec/util_spec.rb +21 -0
  53. metadata +207 -0
@@ -0,0 +1,54 @@
1
+ module RedisFailover
2
+ # Base class for all RedisFailover errors.
3
+ class Error < StandardError
4
+ end
5
+
6
+ # Raised when a node is specified incorrectly.
7
+ class InvalidNodeError < Error
8
+ end
9
+
10
+ # Raised when a node changes to an invalid/unknown state.
11
+ class InvalidNodeStateError < Error
12
+ def initialize(node, state)
13
+ super("Invalid state change `#{state}` for node #{node}")
14
+ end
15
+ end
16
+
17
+ # Raised when a node is unavailable (i.e., unreachable via network).
18
+ class NodeUnavailableError < Error
19
+ def initialize(node)
20
+ super("Node: #{node}")
21
+ end
22
+ end
23
+
24
+ # Raised when no master is currently available.
25
+ class NoMasterError < Error
26
+ end
27
+
28
+ # Raised when more than one master is found on startup.
29
+ class MultipleMastersError < Error
30
+ def initialize(nodes)
31
+ super("Multiple nodes with master role: #{nodes.map(&:to_s)}")
32
+ end
33
+ end
34
+
35
+ # Raised when no slave is currently available.
36
+ class NoSlaveError < Error
37
+ end
38
+
39
+ # Raised when a redis server is no longer using the same role
40
+ # as previously assumed.
41
+ class InvalidNodeRoleError < Error
42
+ def initialize(node, assumed, actual)
43
+ super("Invalid role detected for node #{node}, client thought " +
44
+ "it was a #{assumed}, but it's now a #{actual}")
45
+ end
46
+ end
47
+
48
+ # Raised when an unsupported redis operation is performed.
49
+ class UnsupportedOperationError < Error
50
+ def initialize(operation)
51
+ super("Operation `#{operation}` is currently unsupported")
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,25 @@
1
+ module RedisFailover
2
+ # Base class for strategies that determine which node is used during failover.
3
+ class FailoverStrategy
4
+ include Util
5
+
6
+ # Loads a strategy based on the given name.
7
+ #
8
+ # @param [String, Symbol] name the strategy name
9
+ # @return [Object] a new strategy instance
10
+ def self.for(name)
11
+ require "redis_failover/failover_strategy/#{name.downcase}"
12
+ const_get(name.capitalize).new
13
+ rescue LoadError, NameError
14
+ raise "Failed to find failover strategy: #{name}"
15
+ end
16
+
17
+ # Returns a candidate node as determined by this strategy.
18
+ #
19
+ # @param [Hash<Node, NodeSnapshot>] snapshots the node snapshots
20
+ # @return [Node] the candidate node or nil if one couldn't be found
21
+ def find_candidate(snapshots)
22
+ raise NotImplementedError
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ module RedisFailover
2
+ class FailoverStrategy
3
+ # Failover strategy that selects an available node that is both seen by all
4
+ # node managers and has the lowest reported health check latency.
5
+ class Latency < FailoverStrategy
6
+ # @see RedisFailover::FailoverStrategy#find_candidate
7
+ def find_candidate(snapshots)
8
+ candidates = {}
9
+ snapshots.each do |node, snapshot|
10
+ if snapshot.all_available?
11
+ candidates[node] = snapshot.avg_latency
12
+ end
13
+ end
14
+
15
+ if candidate = candidates.min_by(&:last)
16
+ candidate.first
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,52 @@
1
+ module RedisFailover
2
+ # Provides manual failover support to a new master.
3
+ class ManualFailover
4
+ # Path for manual failover communication.
5
+ ZNODE_PATH = 'manual_failover'.freeze
6
+
7
+ # Denotes that any slave can be used as a candidate for promotion.
8
+ ANY_SLAVE = "ANY_SLAVE".freeze
9
+
10
+ def self.path(root_znode)
11
+ "#{root_znode}/#{ZNODE_PATH}"
12
+ end
13
+
14
+ # Creates a new instance.
15
+ #
16
+ # @param [ZK] zk the ZooKeeper client
17
+ # @param [ZNode] root_znode the root ZK node
18
+ # @param [Hash] options the options used for manual failover
19
+ # @option options [String] :host the host of the failover candidate
20
+ # @option options [String] :port the port of the failover candidate
21
+ # @note
22
+ # If options is empty, a random slave will be used
23
+ # as a failover candidate.
24
+ def initialize(zk, root_znode, options = {})
25
+ @zk = zk
26
+ @root_znode = root_znode
27
+ @options = options
28
+
29
+ unless @options.empty?
30
+ port = Integer(@options[:port]) rescue nil
31
+ raise ArgumentError, ':host not properly specified' if @options[:host].to_s.empty?
32
+ raise ArgumentError, ':port not properly specified' if port.nil?
33
+ end
34
+ end
35
+
36
+ # Performs a manual failover.
37
+ def perform
38
+ create_path
39
+ node = @options.empty? ? ANY_SLAVE : "#{@options[:host]}:#{@options[:port]}"
40
+ @zk.set(self.class.path(@root_znode), node)
41
+ end
42
+
43
+ private
44
+
45
+ # Creates the znode path used for coordinating manual failovers.
46
+ def create_path
47
+ @zk.create(self.class.path(@root_znode))
48
+ rescue ZK::Exceptions::NodeExists
49
+ # best effort
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,190 @@
1
+ module RedisFailover
2
+ # Represents a redis node (master or slave). Instances of this class
3
+ # are used by the NodeManager and NodeWatcher to manipulate real redis
4
+ # servers.
5
+ class Node
6
+ include Util
7
+
8
+ # Maximum amount of time given for any redis operation to complete.
9
+ # If a redis operation doesn't complete in the alotted time, a
10
+ # NodeUnavailableError will be raised.
11
+ MAX_OP_WAIT_TIME = 5
12
+
13
+ # @return [String] the redis server host
14
+ attr_reader :host
15
+
16
+ # @return [Integer] the redis server port
17
+ attr_reader :port
18
+
19
+ # Creates a new instance.
20
+ #
21
+ # @param [Hash] options the options used to create the node
22
+ # @option options [String] :host the host of the redis server
23
+ # @option options [String] :port the port of the redis server
24
+ def initialize(options = {})
25
+ @host = options[:host]
26
+ raise InvalidNodeError, 'missing host' if @host.to_s.empty?
27
+ @port = Integer(options[:port] || 6379)
28
+ @password = options[:password]
29
+ end
30
+
31
+ # @return [Boolean] true if this node is a master, false otherwise
32
+ def master?
33
+ role == 'master'
34
+ end
35
+
36
+ # @return [Boolean] true if this node is a slave, false otherwise
37
+ def slave?
38
+ !master?
39
+ end
40
+
41
+ # Determines if this node is a slave of the given master.
42
+ #
43
+ # @param [Node] master the master to check
44
+ # @return [Boolean] true if slave of master, false otherwise
45
+ def slave_of?(master)
46
+ current_master == master
47
+ end
48
+
49
+ # Determines current master of this slave.
50
+ #
51
+ # @return [Node] the node representing the master of this slave
52
+ def current_master
53
+ info = fetch_info
54
+ return unless info[:role] == 'slave'
55
+ Node.new(:host => info[:master_host], :port => info[:master_port].to_i)
56
+ end
57
+
58
+ # Waits until something interesting happens. If the connection
59
+ # with this node dies, the blpop call will raise an error. If
60
+ # the blpop call returns without error, then this will be due to
61
+ # a graceful shutdown signaled by #wakeup or a timeout.
62
+ def wait
63
+ perform_operation do |redis|
64
+ redis.blpop(wait_key, MAX_OP_WAIT_TIME - 3)
65
+ redis.del(wait_key)
66
+ end
67
+ end
68
+
69
+ # Wakes up this node by pushing a value to its internal
70
+ # queue used by #wait.
71
+ def wakeup
72
+ perform_operation do |redis|
73
+ redis.lpush(wait_key, '1')
74
+ end
75
+ end
76
+
77
+ # Makes this node a slave of the given node.
78
+ #
79
+ # @param [Node] node the node of which to become a slave
80
+ def make_slave!(node)
81
+ perform_operation do |redis|
82
+ unless slave_of?(node)
83
+ redis.slaveof(node.host, node.port)
84
+ logger.info("#{self} is now a slave of #{node}")
85
+ wakeup
86
+ end
87
+ end
88
+ end
89
+
90
+ # Makes this node a master node.
91
+ def make_master!
92
+ perform_operation do |redis|
93
+ unless master?
94
+ redis.slaveof('no', 'one')
95
+ logger.info("#{self} is now master")
96
+ wakeup
97
+ end
98
+ end
99
+ end
100
+
101
+ # @return [String] an inspect string for this node
102
+ def inspect
103
+ "<RedisFailover::Node #{to_s}>"
104
+ end
105
+
106
+ # @return [String] a friendly string for this node
107
+ def to_s
108
+ "#{@host}:#{@port}"
109
+ end
110
+
111
+ # Determines if this node is equal to another node.
112
+ #
113
+ # @param [Node] other the other node to compare
114
+ # @return [Boolean] true if equal, false otherwise
115
+ def ==(other)
116
+ return false unless Node === other
117
+ return true if self.equal?(other)
118
+ [host, port] == [other.host, other.port]
119
+ end
120
+ alias_method :eql?, :==
121
+
122
+ # @return [Integer] a hash value for this node
123
+ def hash
124
+ to_s.hash
125
+ end
126
+
127
+ # Fetches information/stats for this node.
128
+ #
129
+ # @return [Hash] the info for this node
130
+ def fetch_info
131
+ perform_operation do |redis|
132
+ symbolize_keys(redis.info)
133
+ end
134
+ end
135
+ alias_method :ping, :fetch_info
136
+
137
+ # @return [Boolean] determines if this node prohibits stale reads
138
+ def prohibits_stale_reads?
139
+ perform_operation do |redis|
140
+ redis.config('get', 'slave-serve-stale-data').last == 'no'
141
+ end
142
+ end
143
+
144
+ # @return [Boolean] determines if this node is syncing with its master
145
+ def syncing_with_master?
146
+ perform_operation do |redis|
147
+ fetch_info[:master_sync_in_progress] == '1'
148
+ end
149
+ end
150
+
151
+ private
152
+
153
+ # @return [String] the current role for this node
154
+ def role
155
+ fetch_info[:role]
156
+ end
157
+
158
+ # @return [String] the name of the wait queue for this node
159
+ def wait_key
160
+ @wait_key ||= "_redis_failover_#{SecureRandom.hex(32)}"
161
+ end
162
+
163
+ # @return [Redis] a new redis client instance for this node
164
+ def new_client
165
+ Redis.new(:host => @host, :password => @password, :port => @port)
166
+ end
167
+
168
+ # Safely performs a redis operation within a given timeout window.
169
+ #
170
+ # @yield [Redis] the redis client to use for the operation
171
+ # @raise [NodeUnavailableError] if node is currently unreachable
172
+ def perform_operation
173
+ redis = nil
174
+ Timeout.timeout(MAX_OP_WAIT_TIME) do
175
+ redis = new_client
176
+ yield redis
177
+ end
178
+ rescue Exception => ex
179
+ raise NodeUnavailableError, "#{ex.class}: #{ex.message}", ex.backtrace
180
+ ensure
181
+ if redis
182
+ begin
183
+ redis.client.disconnect
184
+ rescue Exception => ex
185
+ raise NodeUnavailableError, "#{ex.class}: #{ex.message}", ex.backtrace
186
+ end
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,741 @@
1
+ module RedisFailover
2
+ # NodeManager manages a list of redis nodes. Upon startup, the NodeManager
3
+ # will discover the current redis master and slaves. Each redis node is
4
+ # monitored by a NodeWatcher instance. The NodeWatchers periodically
5
+ # report the current state of the redis node it's watching to the
6
+ # NodeManager. The NodeManager processes the state reports and reacts
7
+ # appropriately by handling stale/dead nodes, and promoting a new redis master
8
+ # if it sees fit to do so.
9
+ class NodeManager
10
+ include Util
11
+
12
+ # Number of seconds to wait before retrying bootstrap process.
13
+ TIMEOUT = 5
14
+ # Number of seconds for checking node snapshots.
15
+ CHECK_INTERVAL = 5
16
+ # Number of max attempts to promote a master before releasing master lock.
17
+ MAX_PROMOTION_ATTEMPTS = 3
18
+ # Latency threshold for recording node state.
19
+ LATENCY_THRESHOLD = 0.5
20
+
21
+ # Errors that can happen during the node discovery process.
22
+ NODE_DISCOVERY_ERRORS = [
23
+ InvalidNodeRoleError,
24
+ NodeUnavailableError,
25
+ NoMasterError,
26
+ MultipleMastersError
27
+ ].freeze
28
+
29
+ # Creates a new instance.
30
+ #
31
+ # @param [Hash] options the options used to initialize the manager
32
+ # @option options [String] :zkservers comma-separated ZK host:port pairs
33
+ # @option options [String] :znode_path znode path override for redis nodes
34
+ # @option options [String] :password password for redis nodes
35
+ # @option options [Array<String>] :nodes the nodes to manage
36
+ # @option options [String] :max_failures the max failures for a node
37
+ def initialize(options)
38
+ logger.info("Redis Node Manager v#{VERSION} starting (#{RUBY_DESCRIPTION})")
39
+ @options = options
40
+ @required_node_managers = options.fetch(:required_node_managers, 1)
41
+ @root_znode = options.fetch(:znode_path, Util::DEFAULT_ROOT_ZNODE_PATH)
42
+ @node_strategy = NodeStrategy.for(options.fetch(:node_strategy, :majority))
43
+ @failover_strategy = FailoverStrategy.for(options.fetch(:failover_strategy, :latency))
44
+ @nodes = Array(@options[:nodes]).map { |opts| Node.new(opts) }.uniq
45
+ @master_manager = false
46
+ @master_promotion_attempts = 0
47
+ @sufficient_node_managers = false
48
+ @lock = Monitor.new
49
+ @shutdown = false
50
+ end
51
+
52
+ # Starts the node manager.
53
+ #
54
+ # @note This method does not return until the manager terminates.
55
+ def start
56
+ return unless running?
57
+ setup_zk
58
+ spawn_watchers
59
+ wait_until_master
60
+ rescue *ZK_ERRORS => ex
61
+ logger.error("ZK error while attempting to manage nodes: #{ex.inspect}")
62
+ reset
63
+ sleep(TIMEOUT)
64
+ retry
65
+ rescue NoMasterError
66
+ logger.error("Failed to promote a new master after #{MAX_PROMOTION_ATTEMPTS} attempts.")
67
+ reset
68
+ sleep(TIMEOUT)
69
+ retry
70
+ end
71
+
72
+ # Notifies the manager of a state change. Used primarily by
73
+ # {RedisFailover::NodeWatcher} to inform the manager of watched node states.
74
+ #
75
+ # @param [Node] node the node
76
+ # @param [Symbol] state the state
77
+ # @param [Integer] latency an optional latency
78
+ def notify_state(node, state, latency = nil)
79
+ @lock.synchronize do
80
+ if running?
81
+ update_current_state(node, state, latency)
82
+ end
83
+ end
84
+ rescue => ex
85
+ logger.error("Error handling state report #{[node, state].inspect}: #{ex.inspect}")
86
+ logger.error(ex.backtrace.join("\n"))
87
+ end
88
+
89
+ # Performs a reset of the manager.
90
+ def reset
91
+ @master_manager = false
92
+ @master_promotion_attempts = 0
93
+ @watchers.each(&:shutdown) if @watchers
94
+ end
95
+
96
+ # Initiates a graceful shutdown.
97
+ def shutdown
98
+ logger.info('Shutting down ...')
99
+ @lock.synchronize do
100
+ @shutdown = true
101
+ end
102
+
103
+ reset
104
+ exit
105
+ end
106
+
107
+ private
108
+
109
+ # Configures the ZooKeeper client.
110
+ def setup_zk
111
+ unless @zk
112
+ @zk = ZK.new("#{@options[:zkservers]}#{@options[:chroot] || ''}")
113
+ @zk.register(manual_failover_path) do |event|
114
+ handle_manual_failover_update(event)
115
+ end
116
+ @zk.on_connected { @zk.stat(manual_failover_path, :watch => true) }
117
+ end
118
+
119
+ create_path(@root_znode)
120
+ create_path(current_state_root)
121
+ @zk.stat(manual_failover_path, :watch => true)
122
+ end
123
+
124
+ # Handles an unavailable node.
125
+ #
126
+ # @param [Node] node the unavailable node
127
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
128
+ def handle_unavailable(node, snapshots)
129
+ # no-op if we already know about this node
130
+ return if @unavailable.include?(node)
131
+ logger.info("Handling unavailable node: #{node}")
132
+
133
+ @unavailable << node
134
+ # find a new master if this node was a master
135
+ if node == @master
136
+ logger.info("Demoting currently unavailable master #{node}.")
137
+ promote_new_master(snapshots)
138
+ else
139
+ @slaves.delete(node)
140
+ end
141
+ end
142
+
143
+ # Handles an available node.
144
+ #
145
+ # @param [Node] node the available node
146
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
147
+ def handle_available(node, snapshots)
148
+ reconcile(node)
149
+
150
+ # no-op if we already know about this node
151
+ return if @master == node || (@master && @slaves.include?(node))
152
+ logger.info("Handling available node: #{node}")
153
+
154
+ if @master
155
+ # master already exists, make a slave
156
+ node.make_slave!(@master)
157
+ @slaves << node
158
+ else
159
+ # no master exists, make this the new master
160
+ promote_new_master(snapshots, node)
161
+ end
162
+
163
+ @unavailable.delete(node)
164
+ end
165
+
166
+ # Handles a node that is currently syncing.
167
+ #
168
+ # @param [Node] node the syncing node
169
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
170
+ def handle_syncing(node, snapshots)
171
+ reconcile(node)
172
+
173
+ if node.syncing_with_master? && node.prohibits_stale_reads?
174
+ logger.info("Node #{node} not ready yet, still syncing with master.")
175
+ force_unavailable_slave(node)
176
+ else
177
+ # otherwise, we can use this node
178
+ handle_available(node, snapshots)
179
+ end
180
+ end
181
+
182
+ # Handles a manual failover request to the given node.
183
+ #
184
+ # @param [Node] node the candidate node for failover
185
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
186
+ def handle_manual_failover(node, snapshots)
187
+ # no-op if node to be failed over is already master
188
+ return if @master == node
189
+ logger.info("Handling manual failover")
190
+
191
+ # ensure we can talk to the node
192
+ node.ping
193
+
194
+ # make current master a slave, and promote new master
195
+ @slaves << @master if @master
196
+ @slaves.delete(node)
197
+ promote_new_master(snapshots, node)
198
+ end
199
+
200
+ # Promotes a new master.
201
+ #
202
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
203
+ # @param [Node] node the optional node to promote
204
+ def promote_new_master(snapshots, node = nil)
205
+ delete_path(redis_nodes_path)
206
+ @master = nil
207
+
208
+ # make a specific node or selected candidate the new master
209
+ candidate = node || failover_strategy_candidate(snapshots)
210
+
211
+ if candidate.nil?
212
+ logger.error('Failed to promote a new master, no candidate available.')
213
+ else
214
+ @slaves.delete(candidate)
215
+ @unavailable.delete(candidate)
216
+ redirect_slaves_to(candidate)
217
+ candidate.make_master!
218
+ @master = candidate
219
+ write_current_redis_nodes
220
+ @master_promotion_attempts = 0
221
+ logger.info("Successfully promoted #{candidate} to master.")
222
+ end
223
+ end
224
+
225
+ # Discovers the current master and slave nodes.
226
+ # @return [Boolean] true if nodes successfully discovered, false otherwise
227
+ def discover_nodes
228
+ @lock.synchronize do
229
+ return unless running?
230
+ @slaves, @unavailable = [], []
231
+ if @master = find_existing_master
232
+ logger.info("Using master #{@master} from existing znode config.")
233
+ elsif @master = guess_master(@nodes)
234
+ logger.info("Guessed master #{@master} from known redis nodes.")
235
+ end
236
+ @slaves = @nodes - [@master]
237
+ logger.info("Managing master (#{@master}) and slaves #{stringify_nodes(@slaves)}")
238
+ end
239
+ rescue *NODE_DISCOVERY_ERRORS => ex
240
+ msg = <<-MSG.gsub(/\s+/, ' ')
241
+ Failed to discover master node: #{ex.inspect}
242
+ In order to ensure a safe startup, redis_failover requires that all redis
243
+ nodes be accessible, and only a single node indicating that it's the master.
244
+ In order to fix this, you can perform a manual failover via redis_failover,
245
+ or manually fix the individual redis servers. This discovery process will
246
+ retry in #{TIMEOUT}s.
247
+ MSG
248
+ logger.warn(msg)
249
+ sleep(TIMEOUT)
250
+ retry
251
+ end
252
+
253
+ # Seeds the initial node master from an existing znode config.
254
+ def find_existing_master
255
+ if data = @zk.get(redis_nodes_path).first
256
+ nodes = symbolize_keys(decode(data))
257
+ master = node_from(nodes[:master])
258
+ logger.info("Master from existing znode config: #{master || 'none'}")
259
+ # Check for case where a node previously thought to be the master was
260
+ # somehow manually reconfigured to be a slave outside of the node manager's
261
+ # control.
262
+ begin
263
+ if master && master.slave?
264
+ raise InvalidNodeRoleError.new(master, :master, :slave)
265
+ end
266
+ rescue RedisFailover::NodeUnavailableError => ex
267
+ logger.warn("Failed to check whether existing master has invalid role: #{ex.inspect}")
268
+ end
269
+
270
+ master
271
+ end
272
+ rescue ZK::Exceptions::NoNode
273
+ # blank slate, no last known master
274
+ nil
275
+ end
276
+
277
+ # Creates a Node instance from a string.
278
+ #
279
+ # @param [String] node_string a string representation of a node (e.g., host:port)
280
+ # @return [Node] the Node representation
281
+ def node_from(node_string)
282
+ return if node_string.nil?
283
+ host, port = node_string.split(':', 2)
284
+ Node.new(:host => host, :port => port, :password => @options[:password])
285
+ end
286
+
287
+ # Spawns the {RedisFailover::NodeWatcher} instances for each managed node.
288
+ def spawn_watchers
289
+ @zk.delete(current_state_path, :ignore => :no_node)
290
+ @monitored_available, @monitored_unavailable = {}, []
291
+ @watchers = @nodes.map do |node|
292
+ NodeWatcher.new(self, node, @options.fetch(:max_failures, 3))
293
+ end
294
+ @watchers.each(&:watch)
295
+ logger.info("Monitoring redis nodes at #{stringify_nodes(@nodes)}")
296
+ end
297
+
298
+ # Searches for the master node.
299
+ #
300
+ # @param [Array<Node>] nodes the nodes to search
301
+ # @return [Node] the found master node, nil if not found
302
+ def guess_master(nodes)
303
+ master_nodes = nodes.select { |node| node.master? }
304
+ raise NoMasterError if master_nodes.empty?
305
+ raise MultipleMastersError.new(master_nodes) if master_nodes.size > 1
306
+ master_nodes.first
307
+ end
308
+
309
+ # Redirects all slaves to the specified node.
310
+ #
311
+ # @param [Node] node the node to which slaves are redirected
312
+ def redirect_slaves_to(node)
313
+ @slaves.dup.each do |slave|
314
+ begin
315
+ slave.make_slave!(node)
316
+ rescue NodeUnavailableError
317
+ logger.info("Failed to redirect unreachable slave #{slave} to #{node}")
318
+ force_unavailable_slave(slave)
319
+ end
320
+ end
321
+ end
322
+
323
+ # Forces a slave to be marked as unavailable.
324
+ #
325
+ # @param [Node] node the node to force as unavailable
326
+ def force_unavailable_slave(node)
327
+ @slaves.delete(node)
328
+ @unavailable << node unless @unavailable.include?(node)
329
+ end
330
+
331
+ # It's possible that a newly available node may have been restarted
332
+ # and completely lost its dynamically set run-time role by the node
333
+ # manager. This method ensures that the node resumes its role as
334
+ # determined by the manager.
335
+ #
336
+ # @param [Node] node the node to reconcile
337
+ def reconcile(node)
338
+ return if @master == node && node.master?
339
+ return if @master && node.slave_of?(@master)
340
+
341
+ logger.info("Reconciling node #{node}")
342
+ if @master == node && !node.master?
343
+ # we think the node is a master, but the node doesn't
344
+ node.make_master!
345
+ return
346
+ end
347
+
348
+ # verify that node is a slave for the current master
349
+ if @master && !node.slave_of?(@master)
350
+ node.make_slave!(@master)
351
+ end
352
+ end
353
+
354
+ # @return [Hash] the set of current nodes grouped by category
355
+ def current_nodes
356
+ {
357
+ :master => @master ? @master.to_s : nil,
358
+ :slaves => @slaves.map(&:to_s),
359
+ :unavailable => @unavailable.map(&:to_s)
360
+ }
361
+ end
362
+
363
+ # @return [Hash] the set of currently available/unavailable nodes as
364
+ # seen by this node manager instance
365
+ def node_availability_state
366
+ {
367
+ :available => Hash[@monitored_available.map { |k, v| [k.to_s, v] }],
368
+ :unavailable => @monitored_unavailable.map(&:to_s)
369
+ }
370
+ end
371
+
372
+ # Deletes the znode path containing the redis nodes.
373
+ #
374
+ # @param [String] path the znode path to delete
375
+ def delete_path(path)
376
+ @zk.delete(path)
377
+ logger.info("Deleted ZK node #{path}")
378
+ rescue ZK::Exceptions::NoNode => ex
379
+ logger.info("Tried to delete missing znode: #{ex.inspect}")
380
+ end
381
+
382
+ # Creates a znode path.
383
+ #
384
+ # @param [String] path the znode path to create
385
+ # @param [Hash] options the options used to create the path
386
+ # @option options [String] :initial_value an initial value for the znode
387
+ # @option options [Boolean] :ephemeral true if node is ephemeral, false otherwise
388
+ def create_path(path, options = {})
389
+ unless @zk.exists?(path)
390
+ @zk.create(path,
391
+ options[:initial_value],
392
+ :ephemeral => options.fetch(:ephemeral, false))
393
+ logger.info("Created ZK node #{path}")
394
+ end
395
+ rescue ZK::Exceptions::NodeExists
396
+ # best effort
397
+ end
398
+
399
+ # Writes state to a particular znode path.
400
+ #
401
+ # @param [String] path the znode path that should be written to
402
+ # @param [String] value the value to write to the znode
403
+ # @param [Hash] options the default options to be used when creating the node
404
+ # @note the path will be created if it doesn't exist
405
+ def write_state(path, value, options = {})
406
+ create_path(path, options.merge(:initial_value => value))
407
+ @zk.set(path, value)
408
+ end
409
+
410
+ # Handles a manual failover znode update.
411
+ #
412
+ # @param [ZK::Event] event the ZK event to handle
413
+ def handle_manual_failover_update(event)
414
+ if event.node_created? || event.node_changed?
415
+ perform_manual_failover
416
+ end
417
+ rescue => ex
418
+ logger.error("Error scheduling a manual failover: #{ex.inspect}")
419
+ logger.error(ex.backtrace.join("\n"))
420
+ ensure
421
+ @zk.stat(manual_failover_path, :watch => true)
422
+ end
423
+
424
+ # Produces a FQDN id for this Node Manager.
425
+ #
426
+ # @return [String] the FQDN for this Node Manager
427
+ def manager_id
428
+ @manager_id ||= [
429
+ Socket.gethostbyname(Socket.gethostname)[0],
430
+ Process.pid
431
+ ].join('-')
432
+ end
433
+
434
+ # Writes the current master list of redis nodes. This method is only invoked
435
+ # if this node manager instance is the master/primary manager.
436
+ def write_current_redis_nodes
437
+ write_state(redis_nodes_path, encode(current_nodes))
438
+ end
439
+
440
+ # Writes the current monitored list of redis nodes. This method is always
441
+ # invoked by all running node managers.
442
+ def write_current_monitored_state
443
+ write_state(current_state_path, encode(node_availability_state), :ephemeral => true)
444
+ end
445
+
446
+ # @return [String] root path for current node manager state
447
+ def current_state_root
448
+ "#{@root_znode}/manager_node_state"
449
+ end
450
+
451
+ # @return [String] the znode path for this node manager's view
452
+ # of available nodes
453
+ def current_state_path
454
+ "#{current_state_root}/#{manager_id}"
455
+ end
456
+
457
+ # @return [String] the znode path for the master redis nodes config
458
+ def redis_nodes_path
459
+ "#{@root_znode}/nodes"
460
+ end
461
+
462
+ # @return [String] root path for current node manager lock
463
+ def current_lock_path
464
+ "#{@root_znode}/master_redis_node_manager_lock"
465
+ end
466
+
467
+ # @return [String] the znode path used for performing manual failovers
468
+ def manual_failover_path
469
+ ManualFailover.path(@root_znode)
470
+ end
471
+
472
+ # @return [Boolean] true if this node manager is the master, false otherwise
473
+ def master_manager?
474
+ @master_manager
475
+ end
476
+
477
+ # Used to update the master node manager state. These states are only handled if
478
+ # this node manager instance is serving as the master manager.
479
+ #
480
+ # @param [Node] node the node to handle
481
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
482
+ def update_master_state(node, snapshots)
483
+ state = @node_strategy.determine_state(node, snapshots)
484
+ case state
485
+ when :unavailable
486
+ handle_unavailable(node, snapshots)
487
+ when :available
488
+ if node.syncing_with_master?
489
+ handle_syncing(node, snapshots)
490
+ else
491
+ handle_available(node, snapshots)
492
+ end
493
+ else
494
+ raise InvalidNodeStateError.new(node, state)
495
+ end
496
+ rescue *ZK_ERRORS
497
+ # fail hard if this is a ZK connection-related error
498
+ raise
499
+ rescue => ex
500
+ logger.error("Error handling state report for #{[node, state].inspect}: #{ex.inspect}")
501
+ end
502
+
503
+ # Updates the current view of the world for this particular node
504
+ # manager instance. All node managers write this state regardless
505
+ # of whether they are the master manager or not.
506
+ #
507
+ # @param [Node] node the node to handle
508
+ # @param [Symbol] state the node state
509
+ # @param [Integer] latency an optional latency
510
+ def update_current_state(node, state, latency = nil)
511
+ old_unavailable = @monitored_unavailable.dup
512
+ old_available = @monitored_available.dup
513
+
514
+ case state
515
+ when :unavailable
516
+ unless @monitored_unavailable.include?(node)
517
+ @monitored_unavailable << node
518
+ @monitored_available.delete(node)
519
+ write_current_monitored_state
520
+ end
521
+ when :available
522
+ last_latency = @monitored_available[node]
523
+ if last_latency.nil? || (latency - last_latency) > LATENCY_THRESHOLD
524
+ @monitored_available[node] = latency
525
+ @monitored_unavailable.delete(node)
526
+ write_current_monitored_state
527
+ end
528
+ else
529
+ raise InvalidNodeStateError.new(node, state)
530
+ end
531
+ rescue => ex
532
+ # if an error occurs, make sure that we rollback to the old state
533
+ @monitored_unavailable = old_unavailable
534
+ @monitored_available = old_available
535
+ raise
536
+ end
537
+
538
+ # Fetches each currently running node manager's view of the
539
+ # world in terms of which nodes they think are available/unavailable.
540
+ #
541
+ # @return [Hash<String, Array>] a hash of node manager to host states
542
+ def fetch_node_manager_states
543
+ states = {}
544
+ @zk.children(current_state_root).each do |child|
545
+ full_path = "#{current_state_root}/#{child}"
546
+ begin
547
+ states[child] = symbolize_keys(decode(@zk.get(full_path).first))
548
+ rescue ZK::Exceptions::NoNode
549
+ # ignore, this is an edge case that can happen when a node manager
550
+ # process dies while fetching its state
551
+ rescue => ex
552
+ logger.error("Failed to fetch states for #{full_path}: #{ex.inspect}")
553
+ end
554
+ end
555
+ states
556
+ end
557
+
558
+ # Builds current snapshots of nodes across all running node managers.
559
+ #
560
+ # @return [Hash<Node, NodeSnapshot>] the snapshots for all nodes
561
+ def current_node_snapshots
562
+ nodes = {}
563
+ snapshots = Hash.new { |h, k| h[k] = NodeSnapshot.new(k) }
564
+ fetch_node_manager_states.each do |node_manager, states|
565
+ available, unavailable = states.values_at(:available, :unavailable)
566
+ available.each do |node_string, latency|
567
+ node = nodes[node_string] ||= node_from(node_string)
568
+ snapshots[node].viewable_by(node_manager, latency)
569
+ end
570
+ unavailable.each do |node_string|
571
+ node = nodes[node_string] ||= node_from(node_string)
572
+ snapshots[node].unviewable_by(node_manager)
573
+ end
574
+ end
575
+
576
+ snapshots
577
+ end
578
+
579
+ # Waits until this node manager becomes the master.
580
+ def wait_until_master
581
+ logger.info('Waiting to become master Node Manager ...')
582
+
583
+ with_lock do
584
+ @master_manager = true
585
+ logger.info('Acquired master Node Manager lock.')
586
+ logger.info("Configured node strategy #{@node_strategy.class}")
587
+ logger.info("Configured failover strategy #{@failover_strategy.class}")
588
+ logger.info("Required Node Managers to make a decision: #{@required_node_managers}")
589
+ manage_nodes
590
+ end
591
+ end
592
+
593
+ # Manages the redis nodes by periodically processing snapshots.
594
+ def manage_nodes
595
+ # Re-discover nodes, since the state of the world may have been changed
596
+ # by the time we've become the primary node manager.
597
+ discover_nodes
598
+
599
+ # ensure that slaves are correctly pointing to this master
600
+ redirect_slaves_to(@master)
601
+
602
+ # Periodically update master config state.
603
+ while running? && master_manager?
604
+ @zk_lock.assert!
605
+ sleep(CHECK_INTERVAL)
606
+
607
+ @lock.synchronize do
608
+ snapshots = current_node_snapshots
609
+ if ensure_sufficient_node_managers(snapshots)
610
+ snapshots.each_key do |node|
611
+ update_master_state(node, snapshots)
612
+ end
613
+
614
+ # flush current master state
615
+ write_current_redis_nodes
616
+
617
+ # check if we've exhausted our attempts to promote a master
618
+ unless @master
619
+ @master_promotion_attempts += 1
620
+ raise NoMasterError if @master_promotion_attempts > MAX_PROMOTION_ATTEMPTS
621
+ end
622
+ end
623
+ end
624
+ end
625
+ end
626
+
627
+ # Creates a Node instance from a string.
628
+ #
629
+ # @param [String] node_string a string representation of a node (e.g., host:port)
630
+ # @return [Node] the Node representation
631
+ def node_from(node_string)
632
+ return if node_string.nil?
633
+ host, port = node_string.split(':', 2)
634
+ Node.new(:host => host, :port => port, :password => @options[:password])
635
+ end
636
+
637
+ # Executes a block wrapped in a ZK exclusive lock.
638
+ def with_lock
639
+ @zk_lock ||= @zk.locker(current_lock_path)
640
+
641
+ begin
642
+ @zk_lock.lock!(true)
643
+ rescue Exception
644
+ # handle shutdown case
645
+ running? ? raise : return
646
+ end
647
+
648
+ if running?
649
+ @zk_lock.assert!
650
+ yield
651
+ end
652
+ ensure
653
+ if @zk_lock
654
+ begin
655
+ @zk_lock.unlock!
656
+ rescue => ex
657
+ logger.warn("Failed to release lock: #{ex.inspect}")
658
+ end
659
+ end
660
+ end
661
+
662
+ # Perform a manual failover to a redis node.
663
+ def perform_manual_failover
664
+ @lock.synchronize do
665
+ return unless running? && @master_manager && @zk_lock
666
+ @zk_lock.assert!
667
+ new_master = @zk.get(manual_failover_path, :watch => true).first
668
+ return unless new_master && new_master.size > 0
669
+ logger.info("Received manual failover request for: #{new_master}")
670
+ logger.info("Current nodes: #{current_nodes.inspect}")
671
+ snapshots = current_node_snapshots
672
+
673
+ node = if new_master == ManualFailover::ANY_SLAVE
674
+ failover_strategy_candidate(snapshots)
675
+ else
676
+ node_from(new_master)
677
+ end
678
+
679
+ if node
680
+ handle_manual_failover(node, snapshots)
681
+ else
682
+ logger.error('Failed to perform manual failover, no candidate found.')
683
+ end
684
+ end
685
+ rescue => ex
686
+ logger.error("Error handling manual failover: #{ex.inspect}")
687
+ logger.error(ex.backtrace.join("\n"))
688
+ ensure
689
+ @zk.stat(manual_failover_path, :watch => true)
690
+ end
691
+
692
+ # @return [Boolean] true if running, false otherwise
693
+ def running?
694
+ @lock.synchronize { !@shutdown }
695
+ end
696
+
697
+ # @return [String] a stringified version of redis nodes
698
+ def stringify_nodes(nodes)
699
+ "(#{nodes.map(&:to_s).join(', ')})"
700
+ end
701
+
702
+ # Determines if each snapshot has a sufficient number of node managers.
703
+ #
704
+ # @param [Hash<Node, Snapshot>] snapshots the current snapshots
705
+ # @return [Boolean] true if sufficient, false otherwise
706
+ def ensure_sufficient_node_managers(snapshots)
707
+ currently_sufficient = true
708
+ snapshots.each do |node, snapshot|
709
+ node_managers = snapshot.node_managers
710
+ if node_managers.size < @required_node_managers
711
+ logger.error("Not enough Node Managers in snapshot for node #{node}. " +
712
+ "Required: #{@required_node_managers}, " +
713
+ "Available: #{node_managers.size} #{node_managers}")
714
+ currently_sufficient = false
715
+ end
716
+ end
717
+
718
+ if currently_sufficient && !@sufficient_node_managers
719
+ logger.info("Required Node Managers are visible: #{@required_node_managers}")
720
+ end
721
+
722
+ @sufficient_node_managers = currently_sufficient
723
+ @sufficient_node_managers
724
+ end
725
+
726
+ # Invokes the configured failover strategy.
727
+ #
728
+ # @param [Hash<Node, NodeSnapshot>] snapshots the node snapshots
729
+ # @return [Node] a failover candidate
730
+ def failover_strategy_candidate(snapshots)
731
+ # only include nodes that this master Node Manager can see
732
+ filtered_snapshots = snapshots.select do |node, snapshot|
733
+ snapshot.viewable_by?(manager_id)
734
+ end
735
+
736
+ logger.info('Attempting to find candidate from snapshots:')
737
+ logger.info("\n" + filtered_snapshots.values.join("\n"))
738
+ @failover_strategy.find_candidate(filtered_snapshots)
739
+ end
740
+ end
741
+ end