rloss 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,366 @@
1
+ # encoding: utf-8
2
+
3
+ require 'floss/rpc/zmq'
4
+ require 'floss/log/simple'
5
+ require 'floss/log'
6
+ require 'floss/peer'
7
+ require 'floss/one_off_latch'
8
+ require 'floss/count_down_latch'
9
+ require 'floss/log_replicator'
10
+
11
+ class Floss::Node
12
+ include Celluloid
13
+ include Celluloid::FSM
14
+ include Celluloid::Logger
15
+
16
+ execute_block_on_receiver :initialize
17
+ finalizer :finalize
18
+
19
+ state(:follower, default: true, to: :candidate)
20
+
21
+ state(:candidate, to: [:leader, :follower]) do
22
+ enter_new_term
23
+ start_election
24
+ end
25
+
26
+ state(:leader, to: [:follower]) do
27
+ start_log_replication
28
+ end
29
+
30
+ # Default broadcast time.
31
+ # @see #broadcast_time
32
+ BROADCAST_TIME = 0.020
33
+
34
+ # Default election timeout.
35
+ # @see #election_timeout
36
+ ELECTION_TIMEOUT = (0.150..0.300)
37
+
38
+ # @return [Floss::Log] The replicated log.
39
+ attr_reader :log
40
+
41
+ attr_reader :current_term
42
+
43
+ # @return [Floss::RPC::Server]
44
+ attr_accessor :server
45
+
46
+ DEFAULT_OPTIONS = {
47
+ rpc: Floss::RPC::ZMQ,
48
+ log: Floss::Log::Simple,
49
+ run: true
50
+ }.freeze
51
+
52
+ # @param [Hash] options
53
+ # @option options [String] :id A string identifying this node, often its RPC address.
54
+ # @option options [Array<String>] :peers Identifiers of all peers in the cluster.
55
+ # @option options [Module,Class] :rpc Namespace containing `Server` and `Client` classes.
56
+ def initialize(options = {}, &handler)
57
+ super
58
+
59
+ @handler = handler
60
+ @options = DEFAULT_OPTIONS.merge(options)
61
+ @current_term = 0
62
+ @ready_latch = Floss::OneOffLatch.new
63
+ @running = false
64
+
65
+ async.run if @options[:run]
66
+ end
67
+
68
+ def run
69
+ raise 'Already running' if @running
70
+
71
+ @running = true
72
+ @log = @options[:log].new
73
+
74
+ self.server = link(rpc_server_class.new(id, &method(:handle_rpc)))
75
+ @election_timeout = after(random_timeout) { on_election_timeout }
76
+ end
77
+
78
+ # Blocks until the node is ready for executing commands.
79
+ def wait_until_ready
80
+ @ready_latch.wait
81
+ end
82
+
83
+ def rpc_server_class
84
+ @options[:rpc].const_get('Server')
85
+ end
86
+
87
+ def rpc_client_class
88
+ @options[:rpc].const_get('Client')
89
+ end
90
+
91
+ # Returns this node's id.
92
+ # @return [String]
93
+ def id
94
+ @options[:id]
95
+ end
96
+
97
+ # Returns peers in the cluster.
98
+ # @return [Array<Floss::Peer>]
99
+ def peers
100
+ @peers ||= @options[:peers].map { |peer| Floss::Peer.new(peer, rpc_client_class: rpc_client_class) }
101
+ end
102
+
103
+ # Returns the cluster's quorum.
104
+ # @return [Fixnum]
105
+ def cluster_quorum
106
+ (cluster_size / 2) + 1
107
+ end
108
+
109
+ # Returns the number of nodes in the cluster.
110
+ # @return [Fixnum]
111
+ def cluster_size
112
+ peers.size + 1
113
+ end
114
+
115
+ # The interval between heartbeats (in seconds). See Section 5.7.
116
+ #
117
+ # > The broadcast time must be an order of magnitude less than the election timeout so that leaders can reliably send
118
+ # > the heartbeat messages required to keep followers from starting elections.
119
+ #
120
+ # @return [Float]
121
+ def broadcast_time
122
+ @options[:broadcast_time] || BROADCAST_TIME
123
+ end
124
+
125
+ # Randomized election timeout as defined in Section 5.2.
126
+ #
127
+ # This timeout is used in multiple ways:
128
+ #
129
+ # * If a follower does not receive any activity, it starts a new election.
130
+ # * As a candidate, if the election does not resolve within this time, it is restarted.
131
+ #
132
+ # @return [Float]
133
+ def random_timeout
134
+ range = @options[:election_timeout] || ELECTION_TIMEOUT
135
+ min, max = range.first, range.last
136
+ min + rand(max - min)
137
+ end
138
+
139
+ def enter_new_term(new_term = nil)
140
+ @current_term = (new_term || @current_term + 1)
141
+ @voted_for = nil
142
+ end
143
+
144
+ %w(info debug warn error).each do |m|
145
+ define_method(m) do |str|
146
+ super("[#{id}] #{str}")
147
+ end
148
+ end
149
+
150
+ states.each do |name, _|
151
+ define_method(:"#{name}?") do
152
+ self.state == name
153
+ end
154
+ end
155
+
156
+ def execute(entry)
157
+ if leader?
158
+ entry = Floss::Log::Entry.new(entry, @current_term)
159
+
160
+ # Replicate entry to all peers, then apply it.
161
+ # TODO: Failure handling.
162
+ @log_replicator.append(entry)
163
+ @handler.call(entry.command) if @handler
164
+ else
165
+ raise "Cannot redirect command because leader is unknown." unless @leader_id
166
+ leader = peers.find { |peer| peer.id == @leader_id }
167
+ leader.execute(entry)
168
+ end
169
+ end
170
+
171
+ def wait_for_quorum_commit(index)
172
+ latch = Floss::CountDownLatch.new(cluster_quorum)
173
+ peers.each { |peer| peer.signal_on_commit(index, latch) }
174
+ latch.wait
175
+ end
176
+
177
+ def handle_rpc(command, payload)
178
+ handler = :"handle_#{command}"
179
+
180
+ if respond_to?(handler, true)
181
+ send(handler, payload)
182
+ else
183
+ abort ArgumentError.new('Unknown command.')
184
+ end
185
+ end
186
+
187
+ protected
188
+
189
+ def handle_execute(entry)
190
+ raise 'Only the leader can accept commands.' unless leader?
191
+ execute(entry)
192
+ end
193
+
194
+ # @param [Hash] request
195
+ # @option message [Fixnum] :term The candidate's term.
196
+ # @option message [String] :candidate_id The candidate requesting the vote.
197
+ # @option message [Fixnum] :last_log_index Index of the candidate's last log entry.
198
+ # @option message [Fixnum] :last_log_term Term of the candidate's last log entry.
199
+ #
200
+ # @return [Hash] response
201
+ # @option response [Boolean] :vote_granted Whether the candidate's receives the vote.
202
+ # @option response [Fixnum] :term This node's current term.
203
+ def handle_vote_request(request)
204
+ info("[RPC] Received VoteRequest: #{request}")
205
+
206
+ term = request[:term]
207
+ candidate_id = request[:candidate_id]
208
+
209
+ if term < @current_term
210
+ return {term: @current_term, vote_granted: false}
211
+ end
212
+
213
+ if term > @current_term
214
+ enter_new_term(term)
215
+ stop_log_replication if leader?
216
+ transition(:follower) if candidate? || leader?
217
+ end
218
+
219
+ valid_candidate = @voted_for.nil? || @voted_for == candidate_id
220
+ log_complete = log.complete?(request[:last_log_term], request[:last_log_index])
221
+
222
+ vote_granted = (valid_candidate && log_complete)
223
+
224
+ if vote_granted
225
+ @voted_for = candidate_id
226
+ @election_timeout.reset
227
+ end
228
+
229
+ return {term: @current_term, vote_granted: vote_granted}
230
+ end
231
+
232
+ def handle_append_entries(payload)
233
+ info("[RPC] Received AppendEntries: #{payload}")
234
+
235
+ # Marks the node as ready for accepting commands.
236
+ @ready_latch.signal
237
+
238
+ term = payload[:term]
239
+
240
+ # Reject RPCs with a lesser term.
241
+ if term < @current_term
242
+ return {term: @current_term, success: false}
243
+ end
244
+
245
+ # Accept terms greater than the local one.
246
+ if term > @current_term
247
+ enter_new_term(term)
248
+ end
249
+
250
+ # Step down if another node sends a valid AppendEntries RPC.
251
+ stop_log_replication if leader?
252
+ transition(:follower) if candidate? || leader?
253
+
254
+ # Remember the leader.
255
+ @leader_id = payload[:leader_id]
256
+
257
+ # A valid AppendEntries RPC resets the election timeout.
258
+ @election_timeout.reset
259
+
260
+ success = if payload[:entries].any?
261
+ if log.validate(payload[:prev_log_index], payload[:prev_log_term])
262
+ log.append(payload[:entries])
263
+ true
264
+ else
265
+ false
266
+ end
267
+ else
268
+ true
269
+ end
270
+
271
+ if payload[:commit_index] && @handler
272
+ (@commit_index ? @commit_index + 1 : 0).upto(payload[:commit_index]) do |index|
273
+ @handler.call(log[index].command) if @handler
274
+ end
275
+ end
276
+
277
+ @commit_index = payload[:commit_index]
278
+
279
+ unless success
280
+ debug("[RPC] I did not accept AppendEntries: #{payload}")
281
+ end
282
+
283
+ return {term: @current_term, success: success}
284
+ end
285
+
286
+ def on_election_timeout
287
+ if follower?
288
+ transition(:candidate)
289
+ end
290
+
291
+ if candidate?
292
+ enter_new_term
293
+ transition(:candidate)
294
+ end
295
+ end
296
+
297
+ # @group Candidate methods
298
+
299
+ def start_election
300
+ @votes = Floss::CountDownLatch.new(cluster_quorum)
301
+ collect_votes
302
+
303
+ @votes.wait
304
+
305
+ transition(:leader)
306
+
307
+ # Marks the node as ready for accepting commands.
308
+ @ready_latch.signal
309
+ end
310
+
311
+ def collect_votes
312
+ payload = {
313
+ term: @current_term,
314
+ last_log_term: log.last_term,
315
+ last_log_index: log.last_index,
316
+ candidate_id: id
317
+ }
318
+
319
+ peers.each do |peer|
320
+ async.request_vote(peer, payload)
321
+ end
322
+ end
323
+
324
+ # TODO: The candidate should retry the RPC if a peer doesn't answer.
325
+ def request_vote(peer, payload)
326
+ response = begin
327
+ peer.request_vote(payload)
328
+ rescue Floss::TimeoutError
329
+ debug("A vote request to #{peer.id} timed out. Retrying.")
330
+ retry
331
+ end
332
+
333
+ term = response[:term]
334
+
335
+ # Ignore old responses.
336
+ return if @current_term > term
337
+
338
+ # Step down when a higher term is detected.
339
+ # Accept votes from peers in the same term.
340
+ # Ignore votes from peers with an older term.
341
+ if @current_term < term
342
+ enter_new_term(term)
343
+ transition(:follower)
344
+
345
+ return
346
+ end
347
+
348
+ @votes.signal if response[:vote_granted]
349
+ end
350
+
351
+ # @group Leader methods
352
+
353
+ def start_log_replication
354
+ raise "A log replicator is already running." if @log_replicator
355
+ @log_replicator = link Floss::LogReplicator.new(current_actor)
356
+ end
357
+
358
+ def stop_log_replication
359
+ @log_replicator.terminate
360
+ @log_replicator = nil
361
+ end
362
+
363
+ def finalize
364
+ @log_replicator.terminate if @log_replicator
365
+ end
366
+ end
@@ -0,0 +1,23 @@
1
+ require 'floss'
2
+
3
+ class Floss::OneOffLatch
4
+ attr_accessor :ready
5
+ attr_accessor :condition
6
+
7
+ def initialize
8
+ self.ready = false
9
+ self.condition = Celluloid::Condition.new
10
+ end
11
+
12
+ def signal
13
+ return if ready
14
+
15
+ self.ready = true
16
+ condition.broadcast
17
+ end
18
+
19
+ def wait
20
+ return if ready
21
+ condition.wait
22
+ end
23
+ end
@@ -0,0 +1,32 @@
1
+ require 'floss'
2
+ require 'floss/rpc/zmq'
3
+
4
+ # A peer is a remote node within the same cluster.
5
+ class Floss::Peer
6
+ include Celluloid::Logger
7
+
8
+ # @return [String] Remote address of the peer.
9
+ attr_accessor :id
10
+
11
+ # @return [Floss::RPC::Client]
12
+ attr_accessor :client
13
+
14
+ def initialize(id, opts = {})
15
+ self.id = id
16
+
17
+ client_class = opts[:rpc_client_class] || Floss::RPC::ZMQ::Client
18
+ self.client = client_class.new(id)
19
+ end
20
+
21
+ def execute(payload)
22
+ client.call(:execute, payload)
23
+ end
24
+
25
+ def append_entries(payload)
26
+ client.call(:append_entries, payload)
27
+ end
28
+
29
+ def request_vote(payload)
30
+ client.call(:vote_request, payload)
31
+ end
32
+ end
@@ -0,0 +1,25 @@
1
+ require 'celluloid/proxies/abstract_proxy'
2
+ require 'floss/node'
3
+
4
+ # A {Floss::Proxy} wraps a FSM and runs it on a cluster.
5
+ class Floss::Proxy < Celluloid::AbstractProxy
6
+ # @param [Object] fsm The fsm to expose.
7
+ # @param [Hash] options Options as used by {Floss::Node}.
8
+ def initialize(fsm, options)
9
+ @fsm = fsm
10
+ @node = ::Floss::Node.new(options) { |command| fsm.send(*command) }
11
+ end
12
+
13
+ # Executes all methods exposed by the FSM in the cluster.
14
+ def method_missing(method, *args, &block)
15
+ raise ArgumentError, "Can not accept blocks." if block_given?
16
+ return super unless respond_to?(method)
17
+ @node.wait_until_ready
18
+ @node.execute([method, *args])
19
+ end
20
+
21
+ def respond_to?(method, include_private = false)
22
+ @fsm.respond_to?(method, include_private)
23
+ end
24
+ end
25
+
@@ -0,0 +1,22 @@
1
+ require 'floss'
2
+
3
+ module Floss::RPC
4
+ TIMEOUT = 0.3
5
+
6
+ class Client
7
+ def call(command, payload)
8
+ raise NotImplementedError
9
+ end
10
+ end
11
+
12
+ # Listens to a ZMQ Socket and handles commands from peers.
13
+ class Server
14
+ attr_accessor :address
15
+ attr_accessor :handler
16
+
17
+ def initialize(address, &handler)
18
+ self.address = address
19
+ self.handler = handler
20
+ end
21
+ end
22
+ end