rloss 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.rspec +1 -0
- data/.travis.yml +3 -0
- data/.yardopts +3 -0
- data/Gemfile +13 -0
- data/LICENSE +22 -0
- data/README.md +102 -0
- data/Rakefile +8 -0
- data/examples/distributed_hash.rb +36 -0
- data/lib/floss.rb +7 -0
- data/lib/floss/count_down_latch.rb +23 -0
- data/lib/floss/latch.rb +53 -0
- data/lib/floss/log.rb +69 -0
- data/lib/floss/log/simple.rb +55 -0
- data/lib/floss/log_replicator.rb +148 -0
- data/lib/floss/node.rb +366 -0
- data/lib/floss/one_off_latch.rb +23 -0
- data/lib/floss/peer.rb +32 -0
- data/lib/floss/proxy.rb +25 -0
- data/lib/floss/rpc.rb +22 -0
- data/lib/floss/rpc/in_memory.rb +39 -0
- data/lib/floss/rpc/zmq.rb +120 -0
- data/lib/floss/test_helper.rb +15 -0
- data/lib/floss/version.rb +3 -0
- data/rloss.gemspec +23 -0
- data/spec/functional/log_spec.rb +59 -0
- data/spec/functional/node_spec.rb +10 -0
- data/spec/functional/rpc_spec.rb +76 -0
- data/spec/spec_helper.rb +9 -0
- data/test.rb +51 -0
- metadata +136 -0
data/lib/floss/node.rb
ADDED
@@ -0,0 +1,366 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'floss/rpc/zmq'
|
4
|
+
require 'floss/log/simple'
|
5
|
+
require 'floss/log'
|
6
|
+
require 'floss/peer'
|
7
|
+
require 'floss/one_off_latch'
|
8
|
+
require 'floss/count_down_latch'
|
9
|
+
require 'floss/log_replicator'
|
10
|
+
|
11
|
+
class Floss::Node
|
12
|
+
include Celluloid
|
13
|
+
include Celluloid::FSM
|
14
|
+
include Celluloid::Logger
|
15
|
+
|
16
|
+
execute_block_on_receiver :initialize
|
17
|
+
finalizer :finalize
|
18
|
+
|
19
|
+
state(:follower, default: true, to: :candidate)
|
20
|
+
|
21
|
+
state(:candidate, to: [:leader, :follower]) do
|
22
|
+
enter_new_term
|
23
|
+
start_election
|
24
|
+
end
|
25
|
+
|
26
|
+
state(:leader, to: [:follower]) do
|
27
|
+
start_log_replication
|
28
|
+
end
|
29
|
+
|
30
|
+
# Default broadcast time.
|
31
|
+
# @see #broadcast_time
|
32
|
+
BROADCAST_TIME = 0.020
|
33
|
+
|
34
|
+
# Default election timeout.
|
35
|
+
# @see #election_timeout
|
36
|
+
ELECTION_TIMEOUT = (0.150..0.300)
|
37
|
+
|
38
|
+
# @return [Floss::Log] The replicated log.
|
39
|
+
attr_reader :log
|
40
|
+
|
41
|
+
attr_reader :current_term
|
42
|
+
|
43
|
+
# @return [Floss::RPC::Server]
|
44
|
+
attr_accessor :server
|
45
|
+
|
46
|
+
DEFAULT_OPTIONS = {
|
47
|
+
rpc: Floss::RPC::ZMQ,
|
48
|
+
log: Floss::Log::Simple,
|
49
|
+
run: true
|
50
|
+
}.freeze
|
51
|
+
|
52
|
+
# @param [Hash] options
|
53
|
+
# @option options [String] :id A string identifying this node, often its RPC address.
|
54
|
+
# @option options [Array<String>] :peers Identifiers of all peers in the cluster.
|
55
|
+
# @option options [Module,Class] :rpc Namespace containing `Server` and `Client` classes.
|
56
|
+
def initialize(options = {}, &handler)
|
57
|
+
super
|
58
|
+
|
59
|
+
@handler = handler
|
60
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
61
|
+
@current_term = 0
|
62
|
+
@ready_latch = Floss::OneOffLatch.new
|
63
|
+
@running = false
|
64
|
+
|
65
|
+
async.run if @options[:run]
|
66
|
+
end
|
67
|
+
|
68
|
+
def run
|
69
|
+
raise 'Already running' if @running
|
70
|
+
|
71
|
+
@running = true
|
72
|
+
@log = @options[:log].new
|
73
|
+
|
74
|
+
self.server = link(rpc_server_class.new(id, &method(:handle_rpc)))
|
75
|
+
@election_timeout = after(random_timeout) { on_election_timeout }
|
76
|
+
end
|
77
|
+
|
78
|
+
# Blocks until the node is ready for executing commands.
|
79
|
+
def wait_until_ready
|
80
|
+
@ready_latch.wait
|
81
|
+
end
|
82
|
+
|
83
|
+
def rpc_server_class
|
84
|
+
@options[:rpc].const_get('Server')
|
85
|
+
end
|
86
|
+
|
87
|
+
def rpc_client_class
|
88
|
+
@options[:rpc].const_get('Client')
|
89
|
+
end
|
90
|
+
|
91
|
+
# Returns this node's id.
|
92
|
+
# @return [String]
|
93
|
+
def id
|
94
|
+
@options[:id]
|
95
|
+
end
|
96
|
+
|
97
|
+
# Returns peers in the cluster.
|
98
|
+
# @return [Array<Floss::Peer>]
|
99
|
+
def peers
|
100
|
+
@peers ||= @options[:peers].map { |peer| Floss::Peer.new(peer, rpc_client_class: rpc_client_class) }
|
101
|
+
end
|
102
|
+
|
103
|
+
# Returns the cluster's quorum.
|
104
|
+
# @return [Fixnum]
|
105
|
+
def cluster_quorum
|
106
|
+
(cluster_size / 2) + 1
|
107
|
+
end
|
108
|
+
|
109
|
+
# Returns the number of nodes in the cluster.
|
110
|
+
# @return [Fixnum]
|
111
|
+
def cluster_size
|
112
|
+
peers.size + 1
|
113
|
+
end
|
114
|
+
|
115
|
+
# The interval between heartbeats (in seconds). See Section 5.7.
|
116
|
+
#
|
117
|
+
# > The broadcast time must be an order of magnitude less than the election timeout so that leaders can reliably send
|
118
|
+
# > the heartbeat messages required to keep followers from starting elections.
|
119
|
+
#
|
120
|
+
# @return [Float]
|
121
|
+
def broadcast_time
|
122
|
+
@options[:broadcast_time] || BROADCAST_TIME
|
123
|
+
end
|
124
|
+
|
125
|
+
# Randomized election timeout as defined in Section 5.2.
|
126
|
+
#
|
127
|
+
# This timeout is used in multiple ways:
|
128
|
+
#
|
129
|
+
# * If a follower does not receive any activity, it starts a new election.
|
130
|
+
# * As a candidate, if the election does not resolve within this time, it is restarted.
|
131
|
+
#
|
132
|
+
# @return [Float]
|
133
|
+
def random_timeout
|
134
|
+
range = @options[:election_timeout] || ELECTION_TIMEOUT
|
135
|
+
min, max = range.first, range.last
|
136
|
+
min + rand(max - min)
|
137
|
+
end
|
138
|
+
|
139
|
+
def enter_new_term(new_term = nil)
|
140
|
+
@current_term = (new_term || @current_term + 1)
|
141
|
+
@voted_for = nil
|
142
|
+
end
|
143
|
+
|
144
|
+
%w(info debug warn error).each do |m|
|
145
|
+
define_method(m) do |str|
|
146
|
+
super("[#{id}] #{str}")
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
states.each do |name, _|
|
151
|
+
define_method(:"#{name}?") do
|
152
|
+
self.state == name
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
def execute(entry)
|
157
|
+
if leader?
|
158
|
+
entry = Floss::Log::Entry.new(entry, @current_term)
|
159
|
+
|
160
|
+
# Replicate entry to all peers, then apply it.
|
161
|
+
# TODO: Failure handling.
|
162
|
+
@log_replicator.append(entry)
|
163
|
+
@handler.call(entry.command) if @handler
|
164
|
+
else
|
165
|
+
raise "Cannot redirect command because leader is unknown." unless @leader_id
|
166
|
+
leader = peers.find { |peer| peer.id == @leader_id }
|
167
|
+
leader.execute(entry)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def wait_for_quorum_commit(index)
|
172
|
+
latch = Floss::CountDownLatch.new(cluster_quorum)
|
173
|
+
peers.each { |peer| peer.signal_on_commit(index, latch) }
|
174
|
+
latch.wait
|
175
|
+
end
|
176
|
+
|
177
|
+
def handle_rpc(command, payload)
|
178
|
+
handler = :"handle_#{command}"
|
179
|
+
|
180
|
+
if respond_to?(handler, true)
|
181
|
+
send(handler, payload)
|
182
|
+
else
|
183
|
+
abort ArgumentError.new('Unknown command.')
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
protected
|
188
|
+
|
189
|
+
def handle_execute(entry)
|
190
|
+
raise 'Only the leader can accept commands.' unless leader?
|
191
|
+
execute(entry)
|
192
|
+
end
|
193
|
+
|
194
|
+
# @param [Hash] request
|
195
|
+
# @option message [Fixnum] :term The candidate's term.
|
196
|
+
# @option message [String] :candidate_id The candidate requesting the vote.
|
197
|
+
# @option message [Fixnum] :last_log_index Index of the candidate's last log entry.
|
198
|
+
# @option message [Fixnum] :last_log_term Term of the candidate's last log entry.
|
199
|
+
#
|
200
|
+
# @return [Hash] response
|
201
|
+
# @option response [Boolean] :vote_granted Whether the candidate's receives the vote.
|
202
|
+
# @option response [Fixnum] :term This node's current term.
|
203
|
+
def handle_vote_request(request)
|
204
|
+
info("[RPC] Received VoteRequest: #{request}")
|
205
|
+
|
206
|
+
term = request[:term]
|
207
|
+
candidate_id = request[:candidate_id]
|
208
|
+
|
209
|
+
if term < @current_term
|
210
|
+
return {term: @current_term, vote_granted: false}
|
211
|
+
end
|
212
|
+
|
213
|
+
if term > @current_term
|
214
|
+
enter_new_term(term)
|
215
|
+
stop_log_replication if leader?
|
216
|
+
transition(:follower) if candidate? || leader?
|
217
|
+
end
|
218
|
+
|
219
|
+
valid_candidate = @voted_for.nil? || @voted_for == candidate_id
|
220
|
+
log_complete = log.complete?(request[:last_log_term], request[:last_log_index])
|
221
|
+
|
222
|
+
vote_granted = (valid_candidate && log_complete)
|
223
|
+
|
224
|
+
if vote_granted
|
225
|
+
@voted_for = candidate_id
|
226
|
+
@election_timeout.reset
|
227
|
+
end
|
228
|
+
|
229
|
+
return {term: @current_term, vote_granted: vote_granted}
|
230
|
+
end
|
231
|
+
|
232
|
+
def handle_append_entries(payload)
|
233
|
+
info("[RPC] Received AppendEntries: #{payload}")
|
234
|
+
|
235
|
+
# Marks the node as ready for accepting commands.
|
236
|
+
@ready_latch.signal
|
237
|
+
|
238
|
+
term = payload[:term]
|
239
|
+
|
240
|
+
# Reject RPCs with a lesser term.
|
241
|
+
if term < @current_term
|
242
|
+
return {term: @current_term, success: false}
|
243
|
+
end
|
244
|
+
|
245
|
+
# Accept terms greater than the local one.
|
246
|
+
if term > @current_term
|
247
|
+
enter_new_term(term)
|
248
|
+
end
|
249
|
+
|
250
|
+
# Step down if another node sends a valid AppendEntries RPC.
|
251
|
+
stop_log_replication if leader?
|
252
|
+
transition(:follower) if candidate? || leader?
|
253
|
+
|
254
|
+
# Remember the leader.
|
255
|
+
@leader_id = payload[:leader_id]
|
256
|
+
|
257
|
+
# A valid AppendEntries RPC resets the election timeout.
|
258
|
+
@election_timeout.reset
|
259
|
+
|
260
|
+
success = if payload[:entries].any?
|
261
|
+
if log.validate(payload[:prev_log_index], payload[:prev_log_term])
|
262
|
+
log.append(payload[:entries])
|
263
|
+
true
|
264
|
+
else
|
265
|
+
false
|
266
|
+
end
|
267
|
+
else
|
268
|
+
true
|
269
|
+
end
|
270
|
+
|
271
|
+
if payload[:commit_index] && @handler
|
272
|
+
(@commit_index ? @commit_index + 1 : 0).upto(payload[:commit_index]) do |index|
|
273
|
+
@handler.call(log[index].command) if @handler
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
@commit_index = payload[:commit_index]
|
278
|
+
|
279
|
+
unless success
|
280
|
+
debug("[RPC] I did not accept AppendEntries: #{payload}")
|
281
|
+
end
|
282
|
+
|
283
|
+
return {term: @current_term, success: success}
|
284
|
+
end
|
285
|
+
|
286
|
+
def on_election_timeout
|
287
|
+
if follower?
|
288
|
+
transition(:candidate)
|
289
|
+
end
|
290
|
+
|
291
|
+
if candidate?
|
292
|
+
enter_new_term
|
293
|
+
transition(:candidate)
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
# @group Candidate methods
|
298
|
+
|
299
|
+
def start_election
|
300
|
+
@votes = Floss::CountDownLatch.new(cluster_quorum)
|
301
|
+
collect_votes
|
302
|
+
|
303
|
+
@votes.wait
|
304
|
+
|
305
|
+
transition(:leader)
|
306
|
+
|
307
|
+
# Marks the node as ready for accepting commands.
|
308
|
+
@ready_latch.signal
|
309
|
+
end
|
310
|
+
|
311
|
+
def collect_votes
|
312
|
+
payload = {
|
313
|
+
term: @current_term,
|
314
|
+
last_log_term: log.last_term,
|
315
|
+
last_log_index: log.last_index,
|
316
|
+
candidate_id: id
|
317
|
+
}
|
318
|
+
|
319
|
+
peers.each do |peer|
|
320
|
+
async.request_vote(peer, payload)
|
321
|
+
end
|
322
|
+
end
|
323
|
+
|
324
|
+
# TODO: The candidate should retry the RPC if a peer doesn't answer.
|
325
|
+
def request_vote(peer, payload)
|
326
|
+
response = begin
|
327
|
+
peer.request_vote(payload)
|
328
|
+
rescue Floss::TimeoutError
|
329
|
+
debug("A vote request to #{peer.id} timed out. Retrying.")
|
330
|
+
retry
|
331
|
+
end
|
332
|
+
|
333
|
+
term = response[:term]
|
334
|
+
|
335
|
+
# Ignore old responses.
|
336
|
+
return if @current_term > term
|
337
|
+
|
338
|
+
# Step down when a higher term is detected.
|
339
|
+
# Accept votes from peers in the same term.
|
340
|
+
# Ignore votes from peers with an older term.
|
341
|
+
if @current_term < term
|
342
|
+
enter_new_term(term)
|
343
|
+
transition(:follower)
|
344
|
+
|
345
|
+
return
|
346
|
+
end
|
347
|
+
|
348
|
+
@votes.signal if response[:vote_granted]
|
349
|
+
end
|
350
|
+
|
351
|
+
# @group Leader methods
|
352
|
+
|
353
|
+
def start_log_replication
|
354
|
+
raise "A log replicator is already running." if @log_replicator
|
355
|
+
@log_replicator = link Floss::LogReplicator.new(current_actor)
|
356
|
+
end
|
357
|
+
|
358
|
+
def stop_log_replication
|
359
|
+
@log_replicator.terminate
|
360
|
+
@log_replicator = nil
|
361
|
+
end
|
362
|
+
|
363
|
+
def finalize
|
364
|
+
@log_replicator.terminate if @log_replicator
|
365
|
+
end
|
366
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'floss'
|
2
|
+
|
3
|
+
class Floss::OneOffLatch
|
4
|
+
attr_accessor :ready
|
5
|
+
attr_accessor :condition
|
6
|
+
|
7
|
+
def initialize
|
8
|
+
self.ready = false
|
9
|
+
self.condition = Celluloid::Condition.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def signal
|
13
|
+
return if ready
|
14
|
+
|
15
|
+
self.ready = true
|
16
|
+
condition.broadcast
|
17
|
+
end
|
18
|
+
|
19
|
+
def wait
|
20
|
+
return if ready
|
21
|
+
condition.wait
|
22
|
+
end
|
23
|
+
end
|
data/lib/floss/peer.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'floss'
|
2
|
+
require 'floss/rpc/zmq'
|
3
|
+
|
4
|
+
# A peer is a remote node within the same cluster.
|
5
|
+
class Floss::Peer
|
6
|
+
include Celluloid::Logger
|
7
|
+
|
8
|
+
# @return [String] Remote address of the peer.
|
9
|
+
attr_accessor :id
|
10
|
+
|
11
|
+
# @return [Floss::RPC::Client]
|
12
|
+
attr_accessor :client
|
13
|
+
|
14
|
+
def initialize(id, opts = {})
|
15
|
+
self.id = id
|
16
|
+
|
17
|
+
client_class = opts[:rpc_client_class] || Floss::RPC::ZMQ::Client
|
18
|
+
self.client = client_class.new(id)
|
19
|
+
end
|
20
|
+
|
21
|
+
def execute(payload)
|
22
|
+
client.call(:execute, payload)
|
23
|
+
end
|
24
|
+
|
25
|
+
def append_entries(payload)
|
26
|
+
client.call(:append_entries, payload)
|
27
|
+
end
|
28
|
+
|
29
|
+
def request_vote(payload)
|
30
|
+
client.call(:vote_request, payload)
|
31
|
+
end
|
32
|
+
end
|
data/lib/floss/proxy.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'celluloid/proxies/abstract_proxy'
|
2
|
+
require 'floss/node'
|
3
|
+
|
4
|
+
# A {Floss::Proxy} wraps a FSM and runs it on a cluster.
|
5
|
+
class Floss::Proxy < Celluloid::AbstractProxy
|
6
|
+
# @param [Object] fsm The fsm to expose.
|
7
|
+
# @param [Hash] options Options as used by {Floss::Node}.
|
8
|
+
def initialize(fsm, options)
|
9
|
+
@fsm = fsm
|
10
|
+
@node = ::Floss::Node.new(options) { |command| fsm.send(*command) }
|
11
|
+
end
|
12
|
+
|
13
|
+
# Executes all methods exposed by the FSM in the cluster.
|
14
|
+
def method_missing(method, *args, &block)
|
15
|
+
raise ArgumentError, "Can not accept blocks." if block_given?
|
16
|
+
return super unless respond_to?(method)
|
17
|
+
@node.wait_until_ready
|
18
|
+
@node.execute([method, *args])
|
19
|
+
end
|
20
|
+
|
21
|
+
def respond_to?(method, include_private = false)
|
22
|
+
@fsm.respond_to?(method, include_private)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
data/lib/floss/rpc.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'floss'
|
2
|
+
|
3
|
+
module Floss::RPC
|
4
|
+
TIMEOUT = 0.3
|
5
|
+
|
6
|
+
class Client
|
7
|
+
def call(command, payload)
|
8
|
+
raise NotImplementedError
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# Listens to a ZMQ Socket and handles commands from peers.
|
13
|
+
class Server
|
14
|
+
attr_accessor :address
|
15
|
+
attr_accessor :handler
|
16
|
+
|
17
|
+
def initialize(address, &handler)
|
18
|
+
self.address = address
|
19
|
+
self.handler = handler
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|