rloss 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,366 @@
1
+ # encoding: utf-8
2
+
3
+ require 'floss/rpc/zmq'
4
+ require 'floss/log/simple'
5
+ require 'floss/log'
6
+ require 'floss/peer'
7
+ require 'floss/one_off_latch'
8
+ require 'floss/count_down_latch'
9
+ require 'floss/log_replicator'
10
+
11
+ class Floss::Node
12
+ include Celluloid
13
+ include Celluloid::FSM
14
+ include Celluloid::Logger
15
+
16
+ execute_block_on_receiver :initialize
17
+ finalizer :finalize
18
+
19
+ state(:follower, default: true, to: :candidate)
20
+
21
+ state(:candidate, to: [:leader, :follower]) do
22
+ enter_new_term
23
+ start_election
24
+ end
25
+
26
+ state(:leader, to: [:follower]) do
27
+ start_log_replication
28
+ end
29
+
30
+ # Default broadcast time.
31
+ # @see #broadcast_time
32
+ BROADCAST_TIME = 0.020
33
+
34
+ # Default election timeout.
35
+ # @see #election_timeout
36
+ ELECTION_TIMEOUT = (0.150..0.300)
37
+
38
+ # @return [Floss::Log] The replicated log.
39
+ attr_reader :log
40
+
41
+ attr_reader :current_term
42
+
43
+ # @return [Floss::RPC::Server]
44
+ attr_accessor :server
45
+
46
+ DEFAULT_OPTIONS = {
47
+ rpc: Floss::RPC::ZMQ,
48
+ log: Floss::Log::Simple,
49
+ run: true
50
+ }.freeze
51
+
52
+ # @param [Hash] options
53
+ # @option options [String] :id A string identifying this node, often its RPC address.
54
+ # @option options [Array<String>] :peers Identifiers of all peers in the cluster.
55
+ # @option options [Module,Class] :rpc Namespace containing `Server` and `Client` classes.
56
+ def initialize(options = {}, &handler)
57
+ super
58
+
59
+ @handler = handler
60
+ @options = DEFAULT_OPTIONS.merge(options)
61
+ @current_term = 0
62
+ @ready_latch = Floss::OneOffLatch.new
63
+ @running = false
64
+
65
+ async.run if @options[:run]
66
+ end
67
+
68
+ def run
69
+ raise 'Already running' if @running
70
+
71
+ @running = true
72
+ @log = @options[:log].new
73
+
74
+ self.server = link(rpc_server_class.new(id, &method(:handle_rpc)))
75
+ @election_timeout = after(random_timeout) { on_election_timeout }
76
+ end
77
+
78
+ # Blocks until the node is ready for executing commands.
79
+ def wait_until_ready
80
+ @ready_latch.wait
81
+ end
82
+
83
+ def rpc_server_class
84
+ @options[:rpc].const_get('Server')
85
+ end
86
+
87
+ def rpc_client_class
88
+ @options[:rpc].const_get('Client')
89
+ end
90
+
91
+ # Returns this node's id.
92
+ # @return [String]
93
+ def id
94
+ @options[:id]
95
+ end
96
+
97
+ # Returns peers in the cluster.
98
+ # @return [Array<Floss::Peer>]
99
+ def peers
100
+ @peers ||= @options[:peers].map { |peer| Floss::Peer.new(peer, rpc_client_class: rpc_client_class) }
101
+ end
102
+
103
+ # Returns the cluster's quorum.
104
+ # @return [Fixnum]
105
+ def cluster_quorum
106
+ (cluster_size / 2) + 1
107
+ end
108
+
109
+ # Returns the number of nodes in the cluster.
110
+ # @return [Fixnum]
111
+ def cluster_size
112
+ peers.size + 1
113
+ end
114
+
115
+ # The interval between heartbeats (in seconds). See Section 5.7.
116
+ #
117
+ # > The broadcast time must be an order of magnitude less than the election timeout so that leaders can reliably send
118
+ # > the heartbeat messages required to keep followers from starting elections.
119
+ #
120
+ # @return [Float]
121
+ def broadcast_time
122
+ @options[:broadcast_time] || BROADCAST_TIME
123
+ end
124
+
125
+ # Randomized election timeout as defined in Section 5.2.
126
+ #
127
+ # This timeout is used in multiple ways:
128
+ #
129
+ # * If a follower does not receive any activity, it starts a new election.
130
+ # * As a candidate, if the election does not resolve within this time, it is restarted.
131
+ #
132
+ # @return [Float]
133
+ def random_timeout
134
+ range = @options[:election_timeout] || ELECTION_TIMEOUT
135
+ min, max = range.first, range.last
136
+ min + rand(max - min)
137
+ end
138
+
139
+ def enter_new_term(new_term = nil)
140
+ @current_term = (new_term || @current_term + 1)
141
+ @voted_for = nil
142
+ end
143
+
144
+ %w(info debug warn error).each do |m|
145
+ define_method(m) do |str|
146
+ super("[#{id}] #{str}")
147
+ end
148
+ end
149
+
150
+ states.each do |name, _|
151
+ define_method(:"#{name}?") do
152
+ self.state == name
153
+ end
154
+ end
155
+
156
+ def execute(entry)
157
+ if leader?
158
+ entry = Floss::Log::Entry.new(entry, @current_term)
159
+
160
+ # Replicate entry to all peers, then apply it.
161
+ # TODO: Failure handling.
162
+ @log_replicator.append(entry)
163
+ @handler.call(entry.command) if @handler
164
+ else
165
+ raise "Cannot redirect command because leader is unknown." unless @leader_id
166
+ leader = peers.find { |peer| peer.id == @leader_id }
167
+ leader.execute(entry)
168
+ end
169
+ end
170
+
171
+ def wait_for_quorum_commit(index)
172
+ latch = Floss::CountDownLatch.new(cluster_quorum)
173
+ peers.each { |peer| peer.signal_on_commit(index, latch) }
174
+ latch.wait
175
+ end
176
+
177
+ def handle_rpc(command, payload)
178
+ handler = :"handle_#{command}"
179
+
180
+ if respond_to?(handler, true)
181
+ send(handler, payload)
182
+ else
183
+ abort ArgumentError.new('Unknown command.')
184
+ end
185
+ end
186
+
187
+ protected
188
+
189
+ def handle_execute(entry)
190
+ raise 'Only the leader can accept commands.' unless leader?
191
+ execute(entry)
192
+ end
193
+
194
+ # @param [Hash] request
195
+ # @option message [Fixnum] :term The candidate's term.
196
+ # @option message [String] :candidate_id The candidate requesting the vote.
197
+ # @option message [Fixnum] :last_log_index Index of the candidate's last log entry.
198
+ # @option message [Fixnum] :last_log_term Term of the candidate's last log entry.
199
+ #
200
+ # @return [Hash] response
201
+ # @option response [Boolean] :vote_granted Whether the candidate's receives the vote.
202
+ # @option response [Fixnum] :term This node's current term.
203
+ def handle_vote_request(request)
204
+ info("[RPC] Received VoteRequest: #{request}")
205
+
206
+ term = request[:term]
207
+ candidate_id = request[:candidate_id]
208
+
209
+ if term < @current_term
210
+ return {term: @current_term, vote_granted: false}
211
+ end
212
+
213
+ if term > @current_term
214
+ enter_new_term(term)
215
+ stop_log_replication if leader?
216
+ transition(:follower) if candidate? || leader?
217
+ end
218
+
219
+ valid_candidate = @voted_for.nil? || @voted_for == candidate_id
220
+ log_complete = log.complete?(request[:last_log_term], request[:last_log_index])
221
+
222
+ vote_granted = (valid_candidate && log_complete)
223
+
224
+ if vote_granted
225
+ @voted_for = candidate_id
226
+ @election_timeout.reset
227
+ end
228
+
229
+ return {term: @current_term, vote_granted: vote_granted}
230
+ end
231
+
232
+ def handle_append_entries(payload)
233
+ info("[RPC] Received AppendEntries: #{payload}")
234
+
235
+ # Marks the node as ready for accepting commands.
236
+ @ready_latch.signal
237
+
238
+ term = payload[:term]
239
+
240
+ # Reject RPCs with a lesser term.
241
+ if term < @current_term
242
+ return {term: @current_term, success: false}
243
+ end
244
+
245
+ # Accept terms greater than the local one.
246
+ if term > @current_term
247
+ enter_new_term(term)
248
+ end
249
+
250
+ # Step down if another node sends a valid AppendEntries RPC.
251
+ stop_log_replication if leader?
252
+ transition(:follower) if candidate? || leader?
253
+
254
+ # Remember the leader.
255
+ @leader_id = payload[:leader_id]
256
+
257
+ # A valid AppendEntries RPC resets the election timeout.
258
+ @election_timeout.reset
259
+
260
+ success = if payload[:entries].any?
261
+ if log.validate(payload[:prev_log_index], payload[:prev_log_term])
262
+ log.append(payload[:entries])
263
+ true
264
+ else
265
+ false
266
+ end
267
+ else
268
+ true
269
+ end
270
+
271
+ if payload[:commit_index] && @handler
272
+ (@commit_index ? @commit_index + 1 : 0).upto(payload[:commit_index]) do |index|
273
+ @handler.call(log[index].command) if @handler
274
+ end
275
+ end
276
+
277
+ @commit_index = payload[:commit_index]
278
+
279
+ unless success
280
+ debug("[RPC] I did not accept AppendEntries: #{payload}")
281
+ end
282
+
283
+ return {term: @current_term, success: success}
284
+ end
285
+
286
+ def on_election_timeout
287
+ if follower?
288
+ transition(:candidate)
289
+ end
290
+
291
+ if candidate?
292
+ enter_new_term
293
+ transition(:candidate)
294
+ end
295
+ end
296
+
297
+ # @group Candidate methods
298
+
299
+ def start_election
300
+ @votes = Floss::CountDownLatch.new(cluster_quorum)
301
+ collect_votes
302
+
303
+ @votes.wait
304
+
305
+ transition(:leader)
306
+
307
+ # Marks the node as ready for accepting commands.
308
+ @ready_latch.signal
309
+ end
310
+
311
+ def collect_votes
312
+ payload = {
313
+ term: @current_term,
314
+ last_log_term: log.last_term,
315
+ last_log_index: log.last_index,
316
+ candidate_id: id
317
+ }
318
+
319
+ peers.each do |peer|
320
+ async.request_vote(peer, payload)
321
+ end
322
+ end
323
+
324
+ # TODO: The candidate should retry the RPC if a peer doesn't answer.
325
+ def request_vote(peer, payload)
326
+ response = begin
327
+ peer.request_vote(payload)
328
+ rescue Floss::TimeoutError
329
+ debug("A vote request to #{peer.id} timed out. Retrying.")
330
+ retry
331
+ end
332
+
333
+ term = response[:term]
334
+
335
+ # Ignore old responses.
336
+ return if @current_term > term
337
+
338
+ # Step down when a higher term is detected.
339
+ # Accept votes from peers in the same term.
340
+ # Ignore votes from peers with an older term.
341
+ if @current_term < term
342
+ enter_new_term(term)
343
+ transition(:follower)
344
+
345
+ return
346
+ end
347
+
348
+ @votes.signal if response[:vote_granted]
349
+ end
350
+
351
+ # @group Leader methods
352
+
353
+ def start_log_replication
354
+ raise "A log replicator is already running." if @log_replicator
355
+ @log_replicator = link Floss::LogReplicator.new(current_actor)
356
+ end
357
+
358
+ def stop_log_replication
359
+ @log_replicator.terminate
360
+ @log_replicator = nil
361
+ end
362
+
363
+ def finalize
364
+ @log_replicator.terminate if @log_replicator
365
+ end
366
+ end
@@ -0,0 +1,23 @@
1
+ require 'floss'
2
+
3
+ class Floss::OneOffLatch
4
+ attr_accessor :ready
5
+ attr_accessor :condition
6
+
7
+ def initialize
8
+ self.ready = false
9
+ self.condition = Celluloid::Condition.new
10
+ end
11
+
12
+ def signal
13
+ return if ready
14
+
15
+ self.ready = true
16
+ condition.broadcast
17
+ end
18
+
19
+ def wait
20
+ return if ready
21
+ condition.wait
22
+ end
23
+ end
@@ -0,0 +1,32 @@
1
+ require 'floss'
2
+ require 'floss/rpc/zmq'
3
+
4
+ # A peer is a remote node within the same cluster.
5
+ class Floss::Peer
6
+ include Celluloid::Logger
7
+
8
+ # @return [String] Remote address of the peer.
9
+ attr_accessor :id
10
+
11
+ # @return [Floss::RPC::Client]
12
+ attr_accessor :client
13
+
14
+ def initialize(id, opts = {})
15
+ self.id = id
16
+
17
+ client_class = opts[:rpc_client_class] || Floss::RPC::ZMQ::Client
18
+ self.client = client_class.new(id)
19
+ end
20
+
21
+ def execute(payload)
22
+ client.call(:execute, payload)
23
+ end
24
+
25
+ def append_entries(payload)
26
+ client.call(:append_entries, payload)
27
+ end
28
+
29
+ def request_vote(payload)
30
+ client.call(:vote_request, payload)
31
+ end
32
+ end
@@ -0,0 +1,25 @@
1
+ require 'celluloid/proxies/abstract_proxy'
2
+ require 'floss/node'
3
+
4
+ # A {Floss::Proxy} wraps a FSM and runs it on a cluster.
5
+ class Floss::Proxy < Celluloid::AbstractProxy
6
+ # @param [Object] fsm The fsm to expose.
7
+ # @param [Hash] options Options as used by {Floss::Node}.
8
+ def initialize(fsm, options)
9
+ @fsm = fsm
10
+ @node = ::Floss::Node.new(options) { |command| fsm.send(*command) }
11
+ end
12
+
13
+ # Executes all methods exposed by the FSM in the cluster.
14
+ def method_missing(method, *args, &block)
15
+ raise ArgumentError, "Can not accept blocks." if block_given?
16
+ return super unless respond_to?(method)
17
+ @node.wait_until_ready
18
+ @node.execute([method, *args])
19
+ end
20
+
21
+ def respond_to?(method, include_private = false)
22
+ @fsm.respond_to?(method, include_private)
23
+ end
24
+ end
25
+
@@ -0,0 +1,22 @@
1
+ require 'floss'
2
+
3
+ module Floss::RPC
4
+ TIMEOUT = 0.3
5
+
6
+ class Client
7
+ def call(command, payload)
8
+ raise NotImplementedError
9
+ end
10
+ end
11
+
12
+ # Listens to a ZMQ Socket and handles commands from peers.
13
+ class Server
14
+ attr_accessor :address
15
+ attr_accessor :handler
16
+
17
+ def initialize(address, &handler)
18
+ self.address = address
19
+ self.handler = handler
20
+ end
21
+ end
22
+ end