spbtv_redis_failover 1.0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +19 -0
  3. data/.travis.yml +7 -0
  4. data/.yardopts +6 -0
  5. data/Changes.md +191 -0
  6. data/Gemfile +2 -0
  7. data/LICENSE +22 -0
  8. data/README.md +240 -0
  9. data/Rakefile +9 -0
  10. data/bin/redis_node_manager +7 -0
  11. data/examples/config.yml +17 -0
  12. data/examples/multiple_environments_config.yml +15 -0
  13. data/lib/redis_failover.rb +25 -0
  14. data/lib/redis_failover/cli.rb +142 -0
  15. data/lib/redis_failover/client.rb +517 -0
  16. data/lib/redis_failover/errors.rb +54 -0
  17. data/lib/redis_failover/failover_strategy.rb +25 -0
  18. data/lib/redis_failover/failover_strategy/latency.rb +21 -0
  19. data/lib/redis_failover/manual_failover.rb +52 -0
  20. data/lib/redis_failover/node.rb +190 -0
  21. data/lib/redis_failover/node_manager.rb +741 -0
  22. data/lib/redis_failover/node_snapshot.rb +81 -0
  23. data/lib/redis_failover/node_strategy.rb +34 -0
  24. data/lib/redis_failover/node_strategy/consensus.rb +18 -0
  25. data/lib/redis_failover/node_strategy/majority.rb +18 -0
  26. data/lib/redis_failover/node_strategy/single.rb +17 -0
  27. data/lib/redis_failover/node_watcher.rb +83 -0
  28. data/lib/redis_failover/runner.rb +27 -0
  29. data/lib/redis_failover/util.rb +137 -0
  30. data/lib/redis_failover/version.rb +3 -0
  31. data/misc/redis_failover.png +0 -0
  32. data/spbtv_redis_failover.gemspec +26 -0
  33. data/spec/cli_spec.rb +75 -0
  34. data/spec/client_spec.rb +153 -0
  35. data/spec/failover_strategy/latency_spec.rb +41 -0
  36. data/spec/failover_strategy_spec.rb +17 -0
  37. data/spec/node_manager_spec.rb +136 -0
  38. data/spec/node_snapshot_spec.rb +30 -0
  39. data/spec/node_spec.rb +84 -0
  40. data/spec/node_strategy/consensus_spec.rb +30 -0
  41. data/spec/node_strategy/majority_spec.rb +22 -0
  42. data/spec/node_strategy/single_spec.rb +22 -0
  43. data/spec/node_strategy_spec.rb +22 -0
  44. data/spec/node_watcher_spec.rb +58 -0
  45. data/spec/spec_helper.rb +21 -0
  46. data/spec/support/config/multiple_environments.yml +15 -0
  47. data/spec/support/config/multiple_environments_with_chroot.yml +17 -0
  48. data/spec/support/config/single_environment.yml +7 -0
  49. data/spec/support/config/single_environment_with_chroot.yml +8 -0
  50. data/spec/support/node_manager_stub.rb +87 -0
  51. data/spec/support/redis_stub.rb +105 -0
  52. data/spec/util_spec.rb +21 -0
  53. metadata +207 -0
@@ -0,0 +1,54 @@
1
+ module RedisFailover
2
+ # Base class for all RedisFailover errors.
3
+ class Error < StandardError
4
+ end
5
+
6
+ # Raised when a node is specified incorrectly.
7
+ class InvalidNodeError < Error
8
+ end
9
+
10
+ # Raised when a node changes to an invalid/unknown state.
11
+ class InvalidNodeStateError < Error
12
+ def initialize(node, state)
13
+ super("Invalid state change `#{state}` for node #{node}")
14
+ end
15
+ end
16
+
17
+ # Raised when a node is unavailable (i.e., unreachable via network).
18
+ class NodeUnavailableError < Error
19
+ def initialize(node)
20
+ super("Node: #{node}")
21
+ end
22
+ end
23
+
24
+ # Raised when no master is currently available.
25
+ class NoMasterError < Error
26
+ end
27
+
28
+ # Raised when more than one master is found on startup.
29
+ class MultipleMastersError < Error
30
+ def initialize(nodes)
31
+ super("Multiple nodes with master role: #{nodes.map(&:to_s)}")
32
+ end
33
+ end
34
+
35
+ # Raised when no slave is currently available.
36
+ class NoSlaveError < Error
37
+ end
38
+
39
+ # Raised when a redis server is no longer using the same role
40
+ # as previously assumed.
41
+ class InvalidNodeRoleError < Error
42
+ def initialize(node, assumed, actual)
43
+ super("Invalid role detected for node #{node}, client thought " +
44
+ "it was a #{assumed}, but it's now a #{actual}")
45
+ end
46
+ end
47
+
48
+ # Raised when an unsupported redis operation is performed.
49
+ class UnsupportedOperationError < Error
50
+ def initialize(operation)
51
+ super("Operation `#{operation}` is currently unsupported")
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,25 @@
1
+ module RedisFailover
2
+ # Base class for strategies that determine which node is used during failover.
3
+ class FailoverStrategy
4
+ include Util
5
+
6
+ # Loads a strategy based on the given name.
7
+ #
8
+ # @param [String, Symbol] name the strategy name
9
+ # @return [Object] a new strategy instance
10
+ def self.for(name)
11
+ require "redis_failover/failover_strategy/#{name.downcase}"
12
+ const_get(name.capitalize).new
13
+ rescue LoadError, NameError
14
+ raise "Failed to find failover strategy: #{name}"
15
+ end
16
+
17
+ # Returns a candidate node as determined by this strategy.
18
+ #
19
+ # @param [Hash<Node, NodeSnapshot>] snapshots the node snapshots
20
+ # @return [Node] the candidate node or nil if one couldn't be found
21
+ def find_candidate(snapshots)
22
+ raise NotImplementedError
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ module RedisFailover
2
+ class FailoverStrategy
3
+ # Failover strategy that selects an available node that is both seen by all
4
+ # node managers and has the lowest reported health check latency.
5
+ class Latency < FailoverStrategy
6
+ # @see RedisFailover::FailoverStrategy#find_candidate
7
+ def find_candidate(snapshots)
8
+ candidates = {}
9
+ snapshots.each do |node, snapshot|
10
+ if snapshot.all_available?
11
+ candidates[node] = snapshot.avg_latency
12
+ end
13
+ end
14
+
15
+ if candidate = candidates.min_by(&:last)
16
+ candidate.first
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,52 @@
1
+ module RedisFailover
2
+ # Provides manual failover support to a new master.
3
+ class ManualFailover
4
+ # Path for manual failover communication.
5
+ ZNODE_PATH = 'manual_failover'.freeze
6
+
7
+ # Denotes that any slave can be used as a candidate for promotion.
8
+ ANY_SLAVE = "ANY_SLAVE".freeze
9
+
10
+ def self.path(root_znode)
11
+ "#{root_znode}/#{ZNODE_PATH}"
12
+ end
13
+
14
+ # Creates a new instance.
15
+ #
16
+ # @param [ZK] zk the ZooKeeper client
17
+ # @param [ZNode] root_znode the root ZK node
18
+ # @param [Hash] options the options used for manual failover
19
+ # @option options [String] :host the host of the failover candidate
20
+ # @option options [String] :port the port of the failover candidate
21
+ # @note
22
+ # If options is empty, a random slave will be used
23
+ # as a failover candidate.
24
+ def initialize(zk, root_znode, options = {})
25
+ @zk = zk
26
+ @root_znode = root_znode
27
+ @options = options
28
+
29
+ unless @options.empty?
30
+ port = Integer(@options[:port]) rescue nil
31
+ raise ArgumentError, ':host not properly specified' if @options[:host].to_s.empty?
32
+ raise ArgumentError, ':port not properly specified' if port.nil?
33
+ end
34
+ end
35
+
36
+ # Performs a manual failover.
37
+ def perform
38
+ create_path
39
+ node = @options.empty? ? ANY_SLAVE : "#{@options[:host]}:#{@options[:port]}"
40
+ @zk.set(self.class.path(@root_znode), node)
41
+ end
42
+
43
+ private
44
+
45
+ # Creates the znode path used for coordinating manual failovers.
46
+ def create_path
47
+ @zk.create(self.class.path(@root_znode))
48
+ rescue ZK::Exceptions::NodeExists
49
+ # best effort
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,190 @@
1
+ module RedisFailover
2
+ # Represents a redis node (master or slave). Instances of this class
3
+ # are used by the NodeManager and NodeWatcher to manipulate real redis
4
+ # servers.
5
+ class Node
6
+ include Util
7
+
8
+ # Maximum amount of time given for any redis operation to complete.
9
+ # If a redis operation doesn't complete in the alotted time, a
10
+ # NodeUnavailableError will be raised.
11
+ MAX_OP_WAIT_TIME = 5
12
+
13
+ # @return [String] the redis server host
14
+ attr_reader :host
15
+
16
+ # @return [Integer] the redis server port
17
+ attr_reader :port
18
+
19
+ # Creates a new instance.
20
+ #
21
+ # @param [Hash] options the options used to create the node
22
+ # @option options [String] :host the host of the redis server
23
+ # @option options [String] :port the port of the redis server
24
+ def initialize(options = {})
25
+ @host = options[:host]
26
+ raise InvalidNodeError, 'missing host' if @host.to_s.empty?
27
+ @port = Integer(options[:port] || 6379)
28
+ @password = options[:password]
29
+ end
30
+
31
+ # @return [Boolean] true if this node is a master, false otherwise
32
+ def master?
33
+ role == 'master'
34
+ end
35
+
36
+ # @return [Boolean] true if this node is a slave, false otherwise
37
+ def slave?
38
+ !master?
39
+ end
40
+
41
+ # Determines if this node is a slave of the given master.
42
+ #
43
+ # @param [Node] master the master to check
44
+ # @return [Boolean] true if slave of master, false otherwise
45
+ def slave_of?(master)
46
+ current_master == master
47
+ end
48
+
49
+ # Determines current master of this slave.
50
+ #
51
+ # @return [Node] the node representing the master of this slave
52
+ def current_master
53
+ info = fetch_info
54
+ return unless info[:role] == 'slave'
55
+ Node.new(:host => info[:master_host], :port => info[:master_port].to_i)
56
+ end
57
+
58
+ # Waits until something interesting happens. If the connection
59
+ # with this node dies, the blpop call will raise an error. If
60
+ # the blpop call returns without error, then this will be due to
61
+ # a graceful shutdown signaled by #wakeup or a timeout.
62
+ def wait
63
+ perform_operation do |redis|
64
+ redis.blpop(wait_key, MAX_OP_WAIT_TIME - 3)
65
+ redis.del(wait_key)
66
+ end
67
+ end
68
+
69
+ # Wakes up this node by pushing a value to its internal
70
+ # queue used by #wait.
71
+ def wakeup
72
+ perform_operation do |redis|
73
+ redis.lpush(wait_key, '1')
74
+ end
75
+ end
76
+
77
+ # Makes this node a slave of the given node.
78
+ #
79
+ # @param [Node] node the node of which to become a slave
80
+ def make_slave!(node)
81
+ perform_operation do |redis|
82
+ unless slave_of?(node)
83
+ redis.slaveof(node.host, node.port)
84
+ logger.info("#{self} is now a slave of #{node}")
85
+ wakeup
86
+ end
87
+ end
88
+ end
89
+
90
+ # Makes this node a master node.
91
+ def make_master!
92
+ perform_operation do |redis|
93
+ unless master?
94
+ redis.slaveof('no', 'one')
95
+ logger.info("#{self} is now master")
96
+ wakeup
97
+ end
98
+ end
99
+ end
100
+
101
+ # @return [String] an inspect string for this node
102
+ def inspect
103
+ "<RedisFailover::Node #{to_s}>"
104
+ end
105
+
106
+ # @return [String] a friendly string for this node
107
+ def to_s
108
+ "#{@host}:#{@port}"
109
+ end
110
+
111
+ # Determines if this node is equal to another node.
112
+ #
113
+ # @param [Node] other the other node to compare
114
+ # @return [Boolean] true if equal, false otherwise
115
+ def ==(other)
116
+ return false unless Node === other
117
+ return true if self.equal?(other)
118
+ [host, port] == [other.host, other.port]
119
+ end
120
+ alias_method :eql?, :==
121
+
122
+ # @return [Integer] a hash value for this node
123
+ def hash
124
+ to_s.hash
125
+ end
126
+
127
+ # Fetches information/stats for this node.
128
+ #
129
+ # @return [Hash] the info for this node
130
+ def fetch_info
131
+ perform_operation do |redis|
132
+ symbolize_keys(redis.info)
133
+ end
134
+ end
135
+ alias_method :ping, :fetch_info
136
+
137
+ # @return [Boolean] determines if this node prohibits stale reads
138
+ def prohibits_stale_reads?
139
+ perform_operation do |redis|
140
+ redis.config('get', 'slave-serve-stale-data').last == 'no'
141
+ end
142
+ end
143
+
144
+ # @return [Boolean] determines if this node is syncing with its master
145
+ def syncing_with_master?
146
+ perform_operation do |redis|
147
+ fetch_info[:master_sync_in_progress] == '1'
148
+ end
149
+ end
150
+
151
+ private
152
+
153
+ # @return [String] the current role for this node
154
+ def role
155
+ fetch_info[:role]
156
+ end
157
+
158
+ # @return [String] the name of the wait queue for this node
159
+ def wait_key
160
+ @wait_key ||= "_redis_failover_#{SecureRandom.hex(32)}"
161
+ end
162
+
163
+ # @return [Redis] a new redis client instance for this node
164
+ def new_client
165
+ Redis.new(:host => @host, :password => @password, :port => @port)
166
+ end
167
+
168
+ # Safely performs a redis operation within a given timeout window.
169
+ #
170
+ # @yield [Redis] the redis client to use for the operation
171
+ # @raise [NodeUnavailableError] if node is currently unreachable
172
+ def perform_operation
173
+ redis = nil
174
+ Timeout.timeout(MAX_OP_WAIT_TIME) do
175
+ redis = new_client
176
+ yield redis
177
+ end
178
+ rescue Exception => ex
179
+ raise NodeUnavailableError, "#{ex.class}: #{ex.message}", ex.backtrace
180
+ ensure
181
+ if redis
182
+ begin
183
+ redis.client.disconnect
184
+ rescue Exception => ex
185
+ raise NodeUnavailableError, "#{ex.class}: #{ex.message}", ex.backtrace
186
+ end
187
+ end
188
+ end
189
+ end
190
+ end
@@ -0,0 +1,741 @@
1
+ module RedisFailover
2
+ # NodeManager manages a list of redis nodes. Upon startup, the NodeManager
3
+ # will discover the current redis master and slaves. Each redis node is
4
+ # monitored by a NodeWatcher instance. The NodeWatchers periodically
5
+ # report the current state of the redis node it's watching to the
6
+ # NodeManager. The NodeManager processes the state reports and reacts
7
+ # appropriately by handling stale/dead nodes, and promoting a new redis master
8
+ # if it sees fit to do so.
9
+ class NodeManager
10
+ include Util
11
+
12
+ # Number of seconds to wait before retrying bootstrap process.
13
+ TIMEOUT = 5
14
+ # Number of seconds for checking node snapshots.
15
+ CHECK_INTERVAL = 5
16
+ # Number of max attempts to promote a master before releasing master lock.
17
+ MAX_PROMOTION_ATTEMPTS = 3
18
+ # Latency threshold for recording node state.
19
+ LATENCY_THRESHOLD = 0.5
20
+
21
+ # Errors that can happen during the node discovery process.
22
+ NODE_DISCOVERY_ERRORS = [
23
+ InvalidNodeRoleError,
24
+ NodeUnavailableError,
25
+ NoMasterError,
26
+ MultipleMastersError
27
+ ].freeze
28
+
29
+ # Creates a new instance.
30
+ #
31
+ # @param [Hash] options the options used to initialize the manager
32
+ # @option options [String] :zkservers comma-separated ZK host:port pairs
33
+ # @option options [String] :znode_path znode path override for redis nodes
34
+ # @option options [String] :password password for redis nodes
35
+ # @option options [Array<String>] :nodes the nodes to manage
36
+ # @option options [String] :max_failures the max failures for a node
37
+ def initialize(options)
38
+ logger.info("Redis Node Manager v#{VERSION} starting (#{RUBY_DESCRIPTION})")
39
+ @options = options
40
+ @required_node_managers = options.fetch(:required_node_managers, 1)
41
+ @root_znode = options.fetch(:znode_path, Util::DEFAULT_ROOT_ZNODE_PATH)
42
+ @node_strategy = NodeStrategy.for(options.fetch(:node_strategy, :majority))
43
+ @failover_strategy = FailoverStrategy.for(options.fetch(:failover_strategy, :latency))
44
+ @nodes = Array(@options[:nodes]).map { |opts| Node.new(opts) }.uniq
45
+ @master_manager = false
46
+ @master_promotion_attempts = 0
47
+ @sufficient_node_managers = false
48
+ @lock = Monitor.new
49
+ @shutdown = false
50
+ end
51
+
52
+ # Starts the node manager.
53
+ #
54
+ # @note This method does not return until the manager terminates.
55
+ def start
56
+ return unless running?
57
+ setup_zk
58
+ spawn_watchers
59
+ wait_until_master
60
+ rescue *ZK_ERRORS => ex
61
+ logger.error("ZK error while attempting to manage nodes: #{ex.inspect}")
62
+ reset
63
+ sleep(TIMEOUT)
64
+ retry
65
+ rescue NoMasterError
66
+ logger.error("Failed to promote a new master after #{MAX_PROMOTION_ATTEMPTS} attempts.")
67
+ reset
68
+ sleep(TIMEOUT)
69
+ retry
70
+ end
71
+
72
+ # Notifies the manager of a state change. Used primarily by
73
+ # {RedisFailover::NodeWatcher} to inform the manager of watched node states.
74
+ #
75
+ # @param [Node] node the node
76
+ # @param [Symbol] state the state
77
+ # @param [Integer] latency an optional latency
78
+ def notify_state(node, state, latency = nil)
79
+ @lock.synchronize do
80
+ if running?
81
+ update_current_state(node, state, latency)
82
+ end
83
+ end
84
+ rescue => ex
85
+ logger.error("Error handling state report #{[node, state].inspect}: #{ex.inspect}")
86
+ logger.error(ex.backtrace.join("\n"))
87
+ end
88
+
89
+ # Performs a reset of the manager.
90
+ def reset
91
+ @master_manager = false
92
+ @master_promotion_attempts = 0
93
+ @watchers.each(&:shutdown) if @watchers
94
+ end
95
+
96
+ # Initiates a graceful shutdown.
97
+ def shutdown
98
+ logger.info('Shutting down ...')
99
+ @lock.synchronize do
100
+ @shutdown = true
101
+ end
102
+
103
+ reset
104
+ exit
105
+ end
106
+
107
+ private
108
+
109
+ # Configures the ZooKeeper client.
110
+ def setup_zk
111
+ unless @zk
112
+ @zk = ZK.new("#{@options[:zkservers]}#{@options[:chroot] || ''}")
113
+ @zk.register(manual_failover_path) do |event|
114
+ handle_manual_failover_update(event)
115
+ end
116
+ @zk.on_connected { @zk.stat(manual_failover_path, :watch => true) }
117
+ end
118
+
119
+ create_path(@root_znode)
120
+ create_path(current_state_root)
121
+ @zk.stat(manual_failover_path, :watch => true)
122
+ end
123
+
124
+ # Handles an unavailable node.
125
+ #
126
+ # @param [Node] node the unavailable node
127
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
128
+ def handle_unavailable(node, snapshots)
129
+ # no-op if we already know about this node
130
+ return if @unavailable.include?(node)
131
+ logger.info("Handling unavailable node: #{node}")
132
+
133
+ @unavailable << node
134
+ # find a new master if this node was a master
135
+ if node == @master
136
+ logger.info("Demoting currently unavailable master #{node}.")
137
+ promote_new_master(snapshots)
138
+ else
139
+ @slaves.delete(node)
140
+ end
141
+ end
142
+
143
+ # Handles an available node.
144
+ #
145
+ # @param [Node] node the available node
146
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
147
+ def handle_available(node, snapshots)
148
+ reconcile(node)
149
+
150
+ # no-op if we already know about this node
151
+ return if @master == node || (@master && @slaves.include?(node))
152
+ logger.info("Handling available node: #{node}")
153
+
154
+ if @master
155
+ # master already exists, make a slave
156
+ node.make_slave!(@master)
157
+ @slaves << node
158
+ else
159
+ # no master exists, make this the new master
160
+ promote_new_master(snapshots, node)
161
+ end
162
+
163
+ @unavailable.delete(node)
164
+ end
165
+
166
+ # Handles a node that is currently syncing.
167
+ #
168
+ # @param [Node] node the syncing node
169
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
170
+ def handle_syncing(node, snapshots)
171
+ reconcile(node)
172
+
173
+ if node.syncing_with_master? && node.prohibits_stale_reads?
174
+ logger.info("Node #{node} not ready yet, still syncing with master.")
175
+ force_unavailable_slave(node)
176
+ else
177
+ # otherwise, we can use this node
178
+ handle_available(node, snapshots)
179
+ end
180
+ end
181
+
182
+ # Handles a manual failover request to the given node.
183
+ #
184
+ # @param [Node] node the candidate node for failover
185
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
186
+ def handle_manual_failover(node, snapshots)
187
+ # no-op if node to be failed over is already master
188
+ return if @master == node
189
+ logger.info("Handling manual failover")
190
+
191
+ # ensure we can talk to the node
192
+ node.ping
193
+
194
+ # make current master a slave, and promote new master
195
+ @slaves << @master if @master
196
+ @slaves.delete(node)
197
+ promote_new_master(snapshots, node)
198
+ end
199
+
200
+ # Promotes a new master.
201
+ #
202
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
203
+ # @param [Node] node the optional node to promote
204
+ def promote_new_master(snapshots, node = nil)
205
+ delete_path(redis_nodes_path)
206
+ @master = nil
207
+
208
+ # make a specific node or selected candidate the new master
209
+ candidate = node || failover_strategy_candidate(snapshots)
210
+
211
+ if candidate.nil?
212
+ logger.error('Failed to promote a new master, no candidate available.')
213
+ else
214
+ @slaves.delete(candidate)
215
+ @unavailable.delete(candidate)
216
+ redirect_slaves_to(candidate)
217
+ candidate.make_master!
218
+ @master = candidate
219
+ write_current_redis_nodes
220
+ @master_promotion_attempts = 0
221
+ logger.info("Successfully promoted #{candidate} to master.")
222
+ end
223
+ end
224
+
225
+ # Discovers the current master and slave nodes.
226
+ # @return [Boolean] true if nodes successfully discovered, false otherwise
227
+ def discover_nodes
228
+ @lock.synchronize do
229
+ return unless running?
230
+ @slaves, @unavailable = [], []
231
+ if @master = find_existing_master
232
+ logger.info("Using master #{@master} from existing znode config.")
233
+ elsif @master = guess_master(@nodes)
234
+ logger.info("Guessed master #{@master} from known redis nodes.")
235
+ end
236
+ @slaves = @nodes - [@master]
237
+ logger.info("Managing master (#{@master}) and slaves #{stringify_nodes(@slaves)}")
238
+ end
239
+ rescue *NODE_DISCOVERY_ERRORS => ex
240
+ msg = <<-MSG.gsub(/\s+/, ' ')
241
+ Failed to discover master node: #{ex.inspect}
242
+ In order to ensure a safe startup, redis_failover requires that all redis
243
+ nodes be accessible, and only a single node indicating that it's the master.
244
+ In order to fix this, you can perform a manual failover via redis_failover,
245
+ or manually fix the individual redis servers. This discovery process will
246
+ retry in #{TIMEOUT}s.
247
+ MSG
248
+ logger.warn(msg)
249
+ sleep(TIMEOUT)
250
+ retry
251
+ end
252
+
253
+ # Seeds the initial node master from an existing znode config.
254
+ def find_existing_master
255
+ if data = @zk.get(redis_nodes_path).first
256
+ nodes = symbolize_keys(decode(data))
257
+ master = node_from(nodes[:master])
258
+ logger.info("Master from existing znode config: #{master || 'none'}")
259
+ # Check for case where a node previously thought to be the master was
260
+ # somehow manually reconfigured to be a slave outside of the node manager's
261
+ # control.
262
+ begin
263
+ if master && master.slave?
264
+ raise InvalidNodeRoleError.new(master, :master, :slave)
265
+ end
266
+ rescue RedisFailover::NodeUnavailableError => ex
267
+ logger.warn("Failed to check whether existing master has invalid role: #{ex.inspect}")
268
+ end
269
+
270
+ master
271
+ end
272
+ rescue ZK::Exceptions::NoNode
273
+ # blank slate, no last known master
274
+ nil
275
+ end
276
+
277
+ # Creates a Node instance from a string.
278
+ #
279
+ # @param [String] node_string a string representation of a node (e.g., host:port)
280
+ # @return [Node] the Node representation
281
+ def node_from(node_string)
282
+ return if node_string.nil?
283
+ host, port = node_string.split(':', 2)
284
+ Node.new(:host => host, :port => port, :password => @options[:password])
285
+ end
286
+
287
+ # Spawns the {RedisFailover::NodeWatcher} instances for each managed node.
288
+ def spawn_watchers
289
+ @zk.delete(current_state_path, :ignore => :no_node)
290
+ @monitored_available, @monitored_unavailable = {}, []
291
+ @watchers = @nodes.map do |node|
292
+ NodeWatcher.new(self, node, @options.fetch(:max_failures, 3))
293
+ end
294
+ @watchers.each(&:watch)
295
+ logger.info("Monitoring redis nodes at #{stringify_nodes(@nodes)}")
296
+ end
297
+
298
+ # Searches for the master node.
299
+ #
300
+ # @param [Array<Node>] nodes the nodes to search
301
+ # @return [Node] the found master node, nil if not found
302
+ def guess_master(nodes)
303
+ master_nodes = nodes.select { |node| node.master? }
304
+ raise NoMasterError if master_nodes.empty?
305
+ raise MultipleMastersError.new(master_nodes) if master_nodes.size > 1
306
+ master_nodes.first
307
+ end
308
+
309
+ # Redirects all slaves to the specified node.
310
+ #
311
+ # @param [Node] node the node to which slaves are redirected
312
+ def redirect_slaves_to(node)
313
+ @slaves.dup.each do |slave|
314
+ begin
315
+ slave.make_slave!(node)
316
+ rescue NodeUnavailableError
317
+ logger.info("Failed to redirect unreachable slave #{slave} to #{node}")
318
+ force_unavailable_slave(slave)
319
+ end
320
+ end
321
+ end
322
+
323
+ # Forces a slave to be marked as unavailable.
324
+ #
325
+ # @param [Node] node the node to force as unavailable
326
+ def force_unavailable_slave(node)
327
+ @slaves.delete(node)
328
+ @unavailable << node unless @unavailable.include?(node)
329
+ end
330
+
331
+ # It's possible that a newly available node may have been restarted
332
+ # and completely lost its dynamically set run-time role by the node
333
+ # manager. This method ensures that the node resumes its role as
334
+ # determined by the manager.
335
+ #
336
+ # @param [Node] node the node to reconcile
337
+ def reconcile(node)
338
+ return if @master == node && node.master?
339
+ return if @master && node.slave_of?(@master)
340
+
341
+ logger.info("Reconciling node #{node}")
342
+ if @master == node && !node.master?
343
+ # we think the node is a master, but the node doesn't
344
+ node.make_master!
345
+ return
346
+ end
347
+
348
+ # verify that node is a slave for the current master
349
+ if @master && !node.slave_of?(@master)
350
+ node.make_slave!(@master)
351
+ end
352
+ end
353
+
354
+ # @return [Hash] the set of current nodes grouped by category
355
+ def current_nodes
356
+ {
357
+ :master => @master ? @master.to_s : nil,
358
+ :slaves => @slaves.map(&:to_s),
359
+ :unavailable => @unavailable.map(&:to_s)
360
+ }
361
+ end
362
+
363
+ # @return [Hash] the set of currently available/unavailable nodes as
364
+ # seen by this node manager instance
365
+ def node_availability_state
366
+ {
367
+ :available => Hash[@monitored_available.map { |k, v| [k.to_s, v] }],
368
+ :unavailable => @monitored_unavailable.map(&:to_s)
369
+ }
370
+ end
371
+
372
+ # Deletes the znode path containing the redis nodes.
373
+ #
374
+ # @param [String] path the znode path to delete
375
+ def delete_path(path)
376
+ @zk.delete(path)
377
+ logger.info("Deleted ZK node #{path}")
378
+ rescue ZK::Exceptions::NoNode => ex
379
+ logger.info("Tried to delete missing znode: #{ex.inspect}")
380
+ end
381
+
382
+ # Creates a znode path.
383
+ #
384
+ # @param [String] path the znode path to create
385
+ # @param [Hash] options the options used to create the path
386
+ # @option options [String] :initial_value an initial value for the znode
387
+ # @option options [Boolean] :ephemeral true if node is ephemeral, false otherwise
388
+ def create_path(path, options = {})
389
+ unless @zk.exists?(path)
390
+ @zk.create(path,
391
+ options[:initial_value],
392
+ :ephemeral => options.fetch(:ephemeral, false))
393
+ logger.info("Created ZK node #{path}")
394
+ end
395
+ rescue ZK::Exceptions::NodeExists
396
+ # best effort
397
+ end
398
+
399
+ # Writes state to a particular znode path.
400
+ #
401
+ # @param [String] path the znode path that should be written to
402
+ # @param [String] value the value to write to the znode
403
+ # @param [Hash] options the default options to be used when creating the node
404
+ # @note the path will be created if it doesn't exist
405
+ def write_state(path, value, options = {})
406
+ create_path(path, options.merge(:initial_value => value))
407
+ @zk.set(path, value)
408
+ end
409
+
410
+ # Handles a manual failover znode update.
411
+ #
412
+ # @param [ZK::Event] event the ZK event to handle
413
+ def handle_manual_failover_update(event)
414
+ if event.node_created? || event.node_changed?
415
+ perform_manual_failover
416
+ end
417
+ rescue => ex
418
+ logger.error("Error scheduling a manual failover: #{ex.inspect}")
419
+ logger.error(ex.backtrace.join("\n"))
420
+ ensure
421
+ @zk.stat(manual_failover_path, :watch => true)
422
+ end
423
+
424
+ # Produces a FQDN id for this Node Manager.
425
+ #
426
+ # @return [String] the FQDN for this Node Manager
427
+ def manager_id
428
+ @manager_id ||= [
429
+ Socket.gethostbyname(Socket.gethostname)[0],
430
+ Process.pid
431
+ ].join('-')
432
+ end
433
+
434
+ # Writes the current master list of redis nodes. This method is only invoked
435
+ # if this node manager instance is the master/primary manager.
436
+ def write_current_redis_nodes
437
+ write_state(redis_nodes_path, encode(current_nodes))
438
+ end
439
+
440
+ # Writes the current monitored list of redis nodes. This method is always
441
+ # invoked by all running node managers.
442
+ def write_current_monitored_state
443
+ write_state(current_state_path, encode(node_availability_state), :ephemeral => true)
444
+ end
445
+
446
+ # @return [String] root path for current node manager state
447
+ def current_state_root
448
+ "#{@root_znode}/manager_node_state"
449
+ end
450
+
451
+ # @return [String] the znode path for this node manager's view
452
+ # of available nodes
453
+ def current_state_path
454
+ "#{current_state_root}/#{manager_id}"
455
+ end
456
+
457
+ # @return [String] the znode path for the master redis nodes config
458
+ def redis_nodes_path
459
+ "#{@root_znode}/nodes"
460
+ end
461
+
462
+ # @return [String] root path for current node manager lock
463
+ def current_lock_path
464
+ "#{@root_znode}/master_redis_node_manager_lock"
465
+ end
466
+
467
+ # @return [String] the znode path used for performing manual failovers
468
+ def manual_failover_path
469
+ ManualFailover.path(@root_znode)
470
+ end
471
+
472
+ # @return [Boolean] true if this node manager is the master, false otherwise
473
+ def master_manager?
474
+ @master_manager
475
+ end
476
+
477
+ # Used to update the master node manager state. These states are only handled if
478
+ # this node manager instance is serving as the master manager.
479
+ #
480
+ # @param [Node] node the node to handle
481
+ # @param [Hash<Node, NodeSnapshot>] snapshots the current set of snapshots
482
+ def update_master_state(node, snapshots)
483
+ state = @node_strategy.determine_state(node, snapshots)
484
+ case state
485
+ when :unavailable
486
+ handle_unavailable(node, snapshots)
487
+ when :available
488
+ if node.syncing_with_master?
489
+ handle_syncing(node, snapshots)
490
+ else
491
+ handle_available(node, snapshots)
492
+ end
493
+ else
494
+ raise InvalidNodeStateError.new(node, state)
495
+ end
496
+ rescue *ZK_ERRORS
497
+ # fail hard if this is a ZK connection-related error
498
+ raise
499
+ rescue => ex
500
+ logger.error("Error handling state report for #{[node, state].inspect}: #{ex.inspect}")
501
+ end
502
+
503
+ # Updates the current view of the world for this particular node
504
+ # manager instance. All node managers write this state regardless
505
+ # of whether they are the master manager or not.
506
+ #
507
+ # @param [Node] node the node to handle
508
+ # @param [Symbol] state the node state
509
+ # @param [Integer] latency an optional latency
510
+ def update_current_state(node, state, latency = nil)
511
+ old_unavailable = @monitored_unavailable.dup
512
+ old_available = @monitored_available.dup
513
+
514
+ case state
515
+ when :unavailable
516
+ unless @monitored_unavailable.include?(node)
517
+ @monitored_unavailable << node
518
+ @monitored_available.delete(node)
519
+ write_current_monitored_state
520
+ end
521
+ when :available
522
+ last_latency = @monitored_available[node]
523
+ if last_latency.nil? || (latency - last_latency) > LATENCY_THRESHOLD
524
+ @monitored_available[node] = latency
525
+ @monitored_unavailable.delete(node)
526
+ write_current_monitored_state
527
+ end
528
+ else
529
+ raise InvalidNodeStateError.new(node, state)
530
+ end
531
+ rescue => ex
532
+ # if an error occurs, make sure that we rollback to the old state
533
+ @monitored_unavailable = old_unavailable
534
+ @monitored_available = old_available
535
+ raise
536
+ end
537
+
538
+ # Fetches each currently running node manager's view of the
539
+ # world in terms of which nodes they think are available/unavailable.
540
+ #
541
+ # @return [Hash<String, Array>] a hash of node manager to host states
542
+ def fetch_node_manager_states
543
+ states = {}
544
+ @zk.children(current_state_root).each do |child|
545
+ full_path = "#{current_state_root}/#{child}"
546
+ begin
547
+ states[child] = symbolize_keys(decode(@zk.get(full_path).first))
548
+ rescue ZK::Exceptions::NoNode
549
+ # ignore, this is an edge case that can happen when a node manager
550
+ # process dies while fetching its state
551
+ rescue => ex
552
+ logger.error("Failed to fetch states for #{full_path}: #{ex.inspect}")
553
+ end
554
+ end
555
+ states
556
+ end
557
+
558
+ # Builds current snapshots of nodes across all running node managers.
559
+ #
560
+ # @return [Hash<Node, NodeSnapshot>] the snapshots for all nodes
561
+ def current_node_snapshots
562
+ nodes = {}
563
+ snapshots = Hash.new { |h, k| h[k] = NodeSnapshot.new(k) }
564
+ fetch_node_manager_states.each do |node_manager, states|
565
+ available, unavailable = states.values_at(:available, :unavailable)
566
+ available.each do |node_string, latency|
567
+ node = nodes[node_string] ||= node_from(node_string)
568
+ snapshots[node].viewable_by(node_manager, latency)
569
+ end
570
+ unavailable.each do |node_string|
571
+ node = nodes[node_string] ||= node_from(node_string)
572
+ snapshots[node].unviewable_by(node_manager)
573
+ end
574
+ end
575
+
576
+ snapshots
577
+ end
578
+
579
+ # Waits until this node manager becomes the master.
580
+ def wait_until_master
581
+ logger.info('Waiting to become master Node Manager ...')
582
+
583
+ with_lock do
584
+ @master_manager = true
585
+ logger.info('Acquired master Node Manager lock.')
586
+ logger.info("Configured node strategy #{@node_strategy.class}")
587
+ logger.info("Configured failover strategy #{@failover_strategy.class}")
588
+ logger.info("Required Node Managers to make a decision: #{@required_node_managers}")
589
+ manage_nodes
590
+ end
591
+ end
592
+
593
+ # Manages the redis nodes by periodically processing snapshots.
594
+ def manage_nodes
595
+ # Re-discover nodes, since the state of the world may have been changed
596
+ # by the time we've become the primary node manager.
597
+ discover_nodes
598
+
599
+ # ensure that slaves are correctly pointing to this master
600
+ redirect_slaves_to(@master)
601
+
602
+ # Periodically update master config state.
603
+ while running? && master_manager?
604
+ @zk_lock.assert!
605
+ sleep(CHECK_INTERVAL)
606
+
607
+ @lock.synchronize do
608
+ snapshots = current_node_snapshots
609
+ if ensure_sufficient_node_managers(snapshots)
610
+ snapshots.each_key do |node|
611
+ update_master_state(node, snapshots)
612
+ end
613
+
614
+ # flush current master state
615
+ write_current_redis_nodes
616
+
617
+ # check if we've exhausted our attempts to promote a master
618
+ unless @master
619
+ @master_promotion_attempts += 1
620
+ raise NoMasterError if @master_promotion_attempts > MAX_PROMOTION_ATTEMPTS
621
+ end
622
+ end
623
+ end
624
+ end
625
+ end
626
+
627
+ # Creates a Node instance from a string.
628
+ #
629
+ # @param [String] node_string a string representation of a node (e.g., host:port)
630
+ # @return [Node] the Node representation
631
+ def node_from(node_string)
632
+ return if node_string.nil?
633
+ host, port = node_string.split(':', 2)
634
+ Node.new(:host => host, :port => port, :password => @options[:password])
635
+ end
636
+
637
+ # Executes a block wrapped in a ZK exclusive lock.
638
+ def with_lock
639
+ @zk_lock ||= @zk.locker(current_lock_path)
640
+
641
+ begin
642
+ @zk_lock.lock!(true)
643
+ rescue Exception
644
+ # handle shutdown case
645
+ running? ? raise : return
646
+ end
647
+
648
+ if running?
649
+ @zk_lock.assert!
650
+ yield
651
+ end
652
+ ensure
653
+ if @zk_lock
654
+ begin
655
+ @zk_lock.unlock!
656
+ rescue => ex
657
+ logger.warn("Failed to release lock: #{ex.inspect}")
658
+ end
659
+ end
660
+ end
661
+
662
+ # Perform a manual failover to a redis node.
663
+ def perform_manual_failover
664
+ @lock.synchronize do
665
+ return unless running? && @master_manager && @zk_lock
666
+ @zk_lock.assert!
667
+ new_master = @zk.get(manual_failover_path, :watch => true).first
668
+ return unless new_master && new_master.size > 0
669
+ logger.info("Received manual failover request for: #{new_master}")
670
+ logger.info("Current nodes: #{current_nodes.inspect}")
671
+ snapshots = current_node_snapshots
672
+
673
+ node = if new_master == ManualFailover::ANY_SLAVE
674
+ failover_strategy_candidate(snapshots)
675
+ else
676
+ node_from(new_master)
677
+ end
678
+
679
+ if node
680
+ handle_manual_failover(node, snapshots)
681
+ else
682
+ logger.error('Failed to perform manual failover, no candidate found.')
683
+ end
684
+ end
685
+ rescue => ex
686
+ logger.error("Error handling manual failover: #{ex.inspect}")
687
+ logger.error(ex.backtrace.join("\n"))
688
+ ensure
689
+ @zk.stat(manual_failover_path, :watch => true)
690
+ end
691
+
692
+ # @return [Boolean] true if running, false otherwise
693
+ def running?
694
+ @lock.synchronize { !@shutdown }
695
+ end
696
+
697
+ # @return [String] a stringified version of redis nodes
698
+ def stringify_nodes(nodes)
699
+ "(#{nodes.map(&:to_s).join(', ')})"
700
+ end
701
+
702
+ # Determines if each snapshot has a sufficient number of node managers.
703
+ #
704
+ # @param [Hash<Node, Snapshot>] snapshots the current snapshots
705
+ # @return [Boolean] true if sufficient, false otherwise
706
+ def ensure_sufficient_node_managers(snapshots)
707
+ currently_sufficient = true
708
+ snapshots.each do |node, snapshot|
709
+ node_managers = snapshot.node_managers
710
+ if node_managers.size < @required_node_managers
711
+ logger.error("Not enough Node Managers in snapshot for node #{node}. " +
712
+ "Required: #{@required_node_managers}, " +
713
+ "Available: #{node_managers.size} #{node_managers}")
714
+ currently_sufficient = false
715
+ end
716
+ end
717
+
718
+ if currently_sufficient && !@sufficient_node_managers
719
+ logger.info("Required Node Managers are visible: #{@required_node_managers}")
720
+ end
721
+
722
+ @sufficient_node_managers = currently_sufficient
723
+ @sufficient_node_managers
724
+ end
725
+
726
+ # Invokes the configured failover strategy.
727
+ #
728
+ # @param [Hash<Node, NodeSnapshot>] snapshots the node snapshots
729
+ # @return [Node] a failover candidate
730
+ def failover_strategy_candidate(snapshots)
731
+ # only include nodes that this master Node Manager can see
732
+ filtered_snapshots = snapshots.select do |node, snapshot|
733
+ snapshot.viewable_by?(manager_id)
734
+ end
735
+
736
+ logger.info('Attempting to find candidate from snapshots:')
737
+ logger.info("\n" + filtered_snapshots.values.join("\n"))
738
+ @failover_strategy.find_candidate(filtered_snapshots)
739
+ end
740
+ end
741
+ end