evinrude 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +23 -0
  3. data/.gitignore +6 -0
  4. data/.yardopts +1 -0
  5. data/CODE_OF_CONDUCT.md +49 -0
  6. data/CONTRIBUTING.md +10 -0
  7. data/LICENCE +674 -0
  8. data/README.md +410 -0
  9. data/evinrude.gemspec +42 -0
  10. data/lib/evinrude.rb +1233 -0
  11. data/lib/evinrude/backoff.rb +19 -0
  12. data/lib/evinrude/cluster_configuration.rb +162 -0
  13. data/lib/evinrude/config_change_queue_entry.rb +19 -0
  14. data/lib/evinrude/config_change_queue_entry/add_node.rb +13 -0
  15. data/lib/evinrude/config_change_queue_entry/remove_node.rb +14 -0
  16. data/lib/evinrude/freedom_patches/range.rb +5 -0
  17. data/lib/evinrude/log.rb +102 -0
  18. data/lib/evinrude/log_entries.rb +3 -0
  19. data/lib/evinrude/log_entry.rb +13 -0
  20. data/lib/evinrude/log_entry/cluster_configuration.rb +15 -0
  21. data/lib/evinrude/log_entry/null.rb +6 -0
  22. data/lib/evinrude/log_entry/state_machine_command.rb +13 -0
  23. data/lib/evinrude/logging_helpers.rb +40 -0
  24. data/lib/evinrude/message.rb +19 -0
  25. data/lib/evinrude/message/append_entries_reply.rb +13 -0
  26. data/lib/evinrude/message/append_entries_request.rb +18 -0
  27. data/lib/evinrude/message/command_reply.rb +13 -0
  28. data/lib/evinrude/message/command_request.rb +18 -0
  29. data/lib/evinrude/message/install_snapshot_reply.rb +13 -0
  30. data/lib/evinrude/message/install_snapshot_request.rb +18 -0
  31. data/lib/evinrude/message/join_reply.rb +13 -0
  32. data/lib/evinrude/message/join_request.rb +18 -0
  33. data/lib/evinrude/message/node_removal_reply.rb +13 -0
  34. data/lib/evinrude/message/node_removal_request.rb +18 -0
  35. data/lib/evinrude/message/read_reply.rb +13 -0
  36. data/lib/evinrude/message/read_request.rb +18 -0
  37. data/lib/evinrude/message/vote_reply.rb +13 -0
  38. data/lib/evinrude/message/vote_request.rb +18 -0
  39. data/lib/evinrude/messages.rb +14 -0
  40. data/lib/evinrude/metrics.rb +50 -0
  41. data/lib/evinrude/network.rb +69 -0
  42. data/lib/evinrude/network/connection.rb +144 -0
  43. data/lib/evinrude/network/protocol.rb +69 -0
  44. data/lib/evinrude/node_info.rb +35 -0
  45. data/lib/evinrude/peer.rb +50 -0
  46. data/lib/evinrude/resolver.rb +96 -0
  47. data/lib/evinrude/snapshot.rb +9 -0
  48. data/lib/evinrude/state_machine.rb +15 -0
  49. data/lib/evinrude/state_machine/register.rb +25 -0
  50. data/smoke_tests/001_single_node_cluster.rb +20 -0
  51. data/smoke_tests/002_three_node_cluster.rb +43 -0
  52. data/smoke_tests/003_spill.rb +25 -0
  53. data/smoke_tests/004_stale_read.rb +67 -0
  54. data/smoke_tests/005_sleepy_master.rb +28 -0
  55. data/smoke_tests/006_join_via_follower.rb +26 -0
  56. data/smoke_tests/007_snapshot_madness.rb +97 -0
  57. data/smoke_tests/008_downsizing.rb +43 -0
  58. data/smoke_tests/009_disaster_recovery.rb +46 -0
  59. data/smoke_tests/999_final_smoke_test.rb +279 -0
  60. data/smoke_tests/run +22 -0
  61. data/smoke_tests/smoke_test_helper.rb +199 -0
  62. metadata +318 -0
@@ -0,0 +1,1233 @@
1
+ require "async"
2
+ require "async/dns"
3
+ require "fiber"
4
+ require "logger"
5
+ require "pathname"
6
+ require "securerandom"
7
+ require "tempfile"
8
+
9
+ require_relative "./evinrude/logging_helpers"
10
+
11
+ class Evinrude
12
+ include Evinrude::LoggingHelpers
13
+
14
+ class Error < StandardError; end
15
+
16
+ class NoLeaderError < Error; end
17
+
18
+ class NodeExpiredError < Error; end
19
+
20
+ attr_reader :node_name
21
+
22
+ def initialize(join_hints: [], shared_keys:, state_machine: Evinrude::StateMachine::Register,
23
+ logger: Logger.new("/dev/null"), node_name: nil, storage_dir: nil,
24
+ heartbeat_interval: 0.25, heartbeat_timeout: 1..2,
25
+ listen: {}, advertise: {}, metrics_registry: Prometheus::Client::Registry.new)
26
+ @join_hints, @keys, @logger, @heartbeat_interval, @heartbeat_timeout = join_hints, shared_keys, logger, heartbeat_interval, heartbeat_timeout
27
+
28
+ @metrics = Evinrude::Metrics.new(metrics_registry)
29
+
30
+ @listen, @advertise = listen, advertise
31
+ @listen[:address] ||= "::"
32
+ @listen[:port] ||= 0
33
+
34
+ if storage_dir
35
+ @storage_dir = Pathname.new(storage_dir)
36
+ end
37
+
38
+ snapshot = if @storage_dir
39
+ if !@storage_dir.exist?
40
+ @storage_dir.mkdir
41
+ end
42
+
43
+ if !@storage_dir.directory?
44
+ raise ArgumentError, "Storage directory #{@storage_dir} isn't *actually* a directory"
45
+ end
46
+
47
+ snapshot_file = @storage_dir.join("snapshot.yaml")
48
+
49
+ if snapshot_file.exist?
50
+ @metrics.snapshot_file_size.set(snapshot_file.stat.size)
51
+ YAML.load_file(snapshot_file)
52
+ end
53
+ end
54
+
55
+ @state_machine_class = state_machine
56
+
57
+ if snapshot
58
+ @node_name = snapshot.node_name
59
+ @state_machine = @state_machine_class.new(snapshot: snapshot.state)
60
+ @last_command_ids = snapshot.last_command_ids
61
+ else
62
+ @node_name = node_name || SecureRandom.uuid
63
+ @state_machine = @state_machine_class.new
64
+ @last_command_ids = {}
65
+ end
66
+
67
+ @sm_mutex = Mutex.new
68
+
69
+ if snapshot
70
+ @config = snapshot.cluster_config
71
+ @config_index = snapshot.cluster_config_index
72
+ @config.metrics = @metrics
73
+ @config.logger = logger
74
+ else
75
+ @config = Evinrude::ClusterConfiguration.new(logger: logger, metrics: @metrics)
76
+ @config_index = 0
77
+ end
78
+
79
+ @last_append = Time.at(0)
80
+ @current_term = 0
81
+ @voted_for = nil
82
+ @mode = :init
83
+
84
+ @metrics.term.set(0)
85
+
86
+ if snapshot
87
+ logger.debug(logloc) { "Configuring log from snapshot; snapshot_last_term=#{snapshot.last_term} snapshot_last_index=#{snapshot.last_index}" }
88
+ @log = Evinrude::Log.new(snapshot_last_term: snapshot.last_term, snapshot_last_index: snapshot.last_index, logger: logger)
89
+ else
90
+ @log = Evinrude::Log.new(logger: logger)
91
+ end
92
+
93
+ if snapshot
94
+ logger.debug(logloc) { "Setting commit_index to #{snapshot.last_index} from snapshot" }
95
+ @commit_index = snapshot.last_index
96
+ else
97
+ @commit_index = 0
98
+ end
99
+
100
+ @metrics.commit_index.set(@commit_index)
101
+
102
+ @peers = Hash.new do |h, k|
103
+ backoff = Evinrude::Backoff.new
104
+
105
+ peer_conn = @network.connect(address: k.address, port: k.port)
106
+
107
+ h[k] = Peer.new(metrics: @metrics, conn: peer_conn, node_info: k, next_index: @log.last_index + 1)
108
+ end
109
+
110
+ @config_change_queue = []
111
+ @config_change_request_in_progress = nil
112
+ @cc_sem = Async::Semaphore.new
113
+ end
114
+
115
+ def command(s)
116
+ @metrics.command_execution.measure do
117
+ Async(logger: logger) do |task|
118
+ command_id = SecureRandom.uuid
119
+
120
+ loop do
121
+ reply = rpc_to_leader(Message::CommandRequest.new(command: s, id: command_id, node_name: @node_name), task)
122
+
123
+ if reply.success
124
+ break true
125
+ end
126
+ end
127
+ end.result
128
+ end
129
+ end
130
+
131
+ def state
132
+ @metrics.read_state.measure do
133
+ Async(logger: logger) do |task|
134
+ loop do
135
+ state_object = nil
136
+ commit_index = nil
137
+
138
+ @sm_mutex.synchronize do
139
+ # Disturbingly, this appears to be one of the best available ways
140
+ # to make a guaranteed deep copy of an arbitrary object
141
+ state_object = YAML.load(@state_machine.current_state.to_yaml)
142
+ commit_index = @commit_index
143
+ end
144
+
145
+ logger.debug(logloc) { "(in #{@node_name}) Checking if #{state_object.inspect} at commit_index=#{commit_index} is the most up-to-date state" }
146
+
147
+ reply = rpc_to_leader(Evinrude::Message::ReadRequest.new(commit_index: commit_index), task)
148
+
149
+ if reply.success
150
+ break state_object
151
+ end
152
+ end
153
+ end.result
154
+ end
155
+ end
156
+
157
+ def run
158
+ logger.info(logloc) { "Evinrude node #{@node_name} starting up" }
159
+
160
+ @metrics.start_time.set(Time.now.to_f)
161
+
162
+ if @storage_dir
163
+ @metrics.log_loaded_from_disk.set(1)
164
+ load_log_from_disk
165
+ else
166
+ @metrics.log_loaded_from_disk.set(0)
167
+ end
168
+
169
+ Async do |task| #(logger: logger) do |task|
170
+ @async_task = task
171
+ @network = Network.new(keys: @keys, logger: logger, metrics: @metrics, listen: @listen, advertise: @advertise).start
172
+
173
+ logger.info(logloc) { "Node #{@node_name} listening on #{address}:#{port}" }
174
+
175
+ @metrics.info.set(1, labels: { node_name: @node_name, listen_address: @network.listen_address, listen_port: @network.listen_port, advertise_address: address, advertise_port: port })
176
+
177
+ task.async { process_rpc_requests }
178
+
179
+ join_or_create_cluster
180
+ end.return
181
+ rescue => ex
182
+ log_exception(ex) { "Fatal error" }
183
+ raise
184
+ end
185
+
186
+ def remove_node(node_info, unsafe: false)
187
+ if unsafe
188
+ logger.warn(logloc) { "Unsafely removing node #{node_info.inspect} from the local configuration" }
189
+
190
+ @config.remove_node(node_info, force: true)
191
+ else
192
+ @metrics.remove_node.measure do
193
+ Async(logger: logger) do |task|
194
+ loop do
195
+ logger.debug(logloc) { "(in #{@node_name}) Requesting removal of #{node_info.inspect}" }
196
+
197
+ reply = rpc_to_leader(Evinrude::Message::NodeRemovalRequest.new(node_info: node_info, unsafe: unsafe), task)
198
+
199
+ if reply.success
200
+ break true
201
+ end
202
+ end
203
+ end.result
204
+ end
205
+ end
206
+ end
207
+
208
+ def address
209
+ @network&.advertised_address
210
+ end
211
+
212
+ def port
213
+ @network&.advertised_port
214
+ end
215
+
216
+ def nodes
217
+ @config.nodes
218
+ end
219
+
220
+ def leader?
221
+ @mode == :leader
222
+ end
223
+
224
+ def follower?
225
+ @mode == :follower
226
+ end
227
+
228
+ def candidate?
229
+ @mode == :candidate
230
+ end
231
+
232
+ def init?
233
+ @mode == :init
234
+ end
235
+
236
+ def expired?
237
+ !!(!leader? && @heartbeat_timeout_time && @heartbeat_timeout_time < Time.now)
238
+ end
239
+
240
+ def node_info
241
+ if @network.nil?
242
+ raise RuntimeError, "Cannot determine node info until the network is up"
243
+ end
244
+
245
+ @node_info ||= Evinrude::NodeInfo.new(address: address, port: port, name: @node_name)
246
+ end
247
+
248
+ private
249
+
250
+ def load_log_from_disk
251
+ log_file = @storage_dir.join("log.yaml")
252
+
253
+ if log_file.exist?
254
+ logger.debug(logloc) { "Loading log entries from #{log_file}" }
255
+ @metrics.log_file_size.set(log_file.stat.size)
256
+
257
+ # Temporarily unsetting @storage_dir prevents the calls we make from
258
+ # writing all the log entries straight back to disk again
259
+ tmp_storage_dir, @storage_dir = @storage_dir, nil
260
+
261
+ begin
262
+ log_file.open do |fd|
263
+ YAML.load_stream(fd) do |entry|
264
+ unless entry.is_a?(Hash)
265
+ logger.fatal(logloc) { "SHENANIGAN ALERT: persisted log entry #{entry} is not a hash!" }
266
+ exit 42
267
+ end
268
+
269
+ m, args = entry.to_a.first
270
+
271
+ unless %i{process_log_entry commit_entries_to}.include?(m)
272
+ logger.fatal(logloc) { "SHENANIGAN ALERT: log includes unexpected operation #{m.inspect}(*#{args.inspect})!!!" }
273
+ exit 42
274
+ end
275
+
276
+ logger.debug(logloc) { "Running #{m}(#{args.inspect}) from disk log" }
277
+
278
+ self.__send__(m, *args)
279
+ end
280
+ end
281
+ ensure
282
+ @storage_dir = tmp_storage_dir
283
+ end
284
+
285
+ logger.debug(logloc) { "Completed log read" }
286
+ end
287
+ end
288
+
289
+ def rpc_to_leader(msg, task)
290
+ backoff = Evinrude::Backoff.new
291
+ reply = nil
292
+ command_id = SecureRandom.uuid
293
+
294
+ logger.debug(logloc) { "(in #{@node_name}) Sending message #{msg.inspect} to cluster leader" }
295
+
296
+ loop do
297
+ until leader? || follower? || expired?
298
+ logger.debug(logloc) { "(in #{@node_name}) Waiting until we're in the cluster before sending RPC to leader" }
299
+ task.sleep 0.5
300
+ end
301
+
302
+ begin
303
+ remote = reply&.leader_info || @leader_info
304
+
305
+ if remote.nil?
306
+ raise NoLeaderError, "No leader could be discerned for the cluster at present"
307
+ end
308
+
309
+ conn = @network.connect(address: remote.address, port: remote.port)
310
+
311
+ reply = task.with_timeout(5) do |t|
312
+ conn.rpc(msg)
313
+ end
314
+
315
+ if reply.nil?
316
+ logger.debug(logloc) { "(in #{@node_name}) RPC to leader #{remote.inspect} timed out" }
317
+ elsif reply.leader_info
318
+ logger.debug(logloc) { "(in #{@node_name}) Redirected to #{reply.leader_info.inspect}" }
319
+ # No need to wait for the backoff time here
320
+ next
321
+ else
322
+ logger.debug(logloc) { "(in #{@node_name}) RPC to leader returned #{reply.inspect}" }
323
+ return reply
324
+ end
325
+
326
+ task.sleep backoff.wait_time
327
+ rescue Evinrude::Error, Async::TimeoutError, Async::Wrapper::Cancelled, SystemCallError, IOError => ex
328
+ @metrics.rpc_exception.increment(labels: { target: "#{remote.address}:#{remote.port}", node_name: remote.name, class: ex.class.to_s })
329
+ log_exception(ex) { "(in #{@node_name}) RPC to leader raised exception" }
330
+ conn&.close
331
+ reply = nil
332
+
333
+ if expired?
334
+ raise NodeExpiredError, "This node instance is not active in the cluster (mode=#{@mode})"
335
+ end
336
+
337
+ task.sleep backoff.wait_time
338
+ end
339
+ end
340
+ end
341
+
342
+ def become_leader
343
+ reset_peers
344
+
345
+ logger.info(logloc) { "Assuming leadership of the cluster" }
346
+
347
+ @mode = :leader
348
+
349
+ @leader_info = node_info
350
+ @commands_in_progress = {}
351
+
352
+ @cc_sem.acquire do
353
+ @config_change_queue = []
354
+ @config_change_request_in_progress = nil
355
+ end
356
+
357
+ @async_task.async do |subtask|
358
+ while leader?
359
+ subtask.sleep @heartbeat_interval
360
+
361
+ if leader?
362
+ logger.debug(logloc) { "Triggering periodic AE heartbeat" }
363
+ issue_append_entries_to_cluster
364
+ end
365
+ end
366
+ end
367
+
368
+ propose_log_entry(
369
+ LogEntry::Null.new(term: @current_term) do
370
+ logger.debug(logloc) { "Null log entry to mark start-of-term replicated" }
371
+ end
372
+ )
373
+
374
+ @metrics.state.set(3)
375
+ end
376
+
377
+ def become_follower
378
+ reset_peers
379
+
380
+ logger.info(logloc) { "Becoming follower" }
381
+
382
+ @mode = :follower
383
+
384
+ @heartbeat_timeout_time = Time.now + @heartbeat_timeout.rand
385
+
386
+ @async_task.async do |subtask|
387
+ while follower?
388
+ logger.debug(logloc) { "#{@heartbeat_timeout_time - Time.now}s until heartbeat timer expires" }
389
+
390
+ subtask.sleep [0.01, @heartbeat_timeout_time - Time.now].max
391
+
392
+ if follower? && @heartbeat_timeout_time < Time.now
393
+ logger.info(logloc) { "Heartbeat timeout expired; triggering election" }
394
+ trigger_election
395
+ end
396
+ end
397
+ end
398
+
399
+ @metrics.state.set(2)
400
+ end
401
+
402
+ def become_candidate
403
+ reset_peers
404
+
405
+ logger.info(logloc) { "Becoming a candidate" }
406
+
407
+ @mode = :candidate
408
+
409
+ @async_task.async do |subtask|
410
+ election_timeout = @heartbeat_timeout.rand
411
+ logger.debug(logloc) { "Waiting #{election_timeout}s for election to complete" }
412
+ subtask.sleep election_timeout
413
+
414
+ if candidate?
415
+ logger.info(logloc) { "Election timeout expired without a leader being elected; triggering a new election" }
416
+ trigger_election
417
+ end
418
+ end
419
+
420
+ @metrics.state.set(1)
421
+ end
422
+
423
+ def reset_peers
424
+ @peers.values.each { |f| f.conn.close }
425
+ @peers.clear
426
+ @metrics.clear_peer_metrics
427
+ end
428
+
429
+ def new_term(n)
430
+ logger.debug(logloc) { "Setting up for term #{n}" }
431
+ @current_term = n
432
+ @voted_for = nil
433
+
434
+ @metrics.term.set(@current_term)
435
+ end
436
+
437
+ def persist_to_disk(e)
438
+ if @storage_dir
439
+ file = @storage_dir.join("log.yaml")
440
+
441
+ if file.exist? && file.stat.size > 1024 * 1024
442
+ logger.debug(logloc) { "Log is getting a bit big; time for a new snapshot, methinks" }
443
+ take_snapshot
444
+ end
445
+
446
+ logger.debug(logloc) { "Persisting #{e.inspect} to #{file}" }
447
+ file.open("a") do |fd|
448
+ logger.debug(logloc) { "Doin' the write thing" }
449
+ fd.puts e.to_yaml
450
+ fd.fdatasync
451
+ end
452
+
453
+ @metrics.log_entries_persisted.increment
454
+ @metrics.log_file_size.set(file.stat.size)
455
+ end
456
+ end
457
+
458
+ def propose_log_entry(entry)
459
+ unless leader?
460
+ logger.error(logloc) { with_backtrace("propose_log_entry called while not leader!") }
461
+ return
462
+ end
463
+
464
+ @log.append(entry)
465
+ persist_to_disk(process_log_entry: [entry, @log.last_index])
466
+
467
+ logger.debug(logloc) { "Proposing #{entry.inspect} as ##{@log.last_index}" }
468
+
469
+ if @config.nodes.length == 1
470
+ # Flyin' solo! Means we can skip all that inconvenient AppendEntries stuff,
471
+ # but we still need to do what needs to be done after the entry has been
472
+ # "replicated everywhere" (ie "here")
473
+ check_for_new_replication_majority
474
+ else
475
+ issue_append_entries_to_cluster
476
+ end
477
+ end
478
+
479
+ def issue_append_entries_to_cluster(&blk)
480
+ nodes.each do |n|
481
+ next if n == node_info
482
+
483
+ @async_task.async do
484
+ begin
485
+ issue_append_entries(@peers[n], &blk)
486
+ rescue Evinrude::Log::SnapshottedEntryError
487
+ issue_snapshot(@peers[n])
488
+ rescue => ex
489
+ log_exception(ex) { "Failed to issue AppendEntries to #{n.inspect}" }
490
+ end
491
+ end
492
+ end
493
+ end
494
+
495
+ def issue_append_entries(follower)
496
+ logger.debug(logloc) { "Issuing AppendEntries to #{follower.node_info.inspect}" }
497
+ entries = @log.entries_from(follower.next_index)
498
+ prev_index = [follower.next_index - 1, @log.last_index].min
499
+ prev_entry = @log[prev_index]
500
+
501
+ logger.debug(logloc) { "Previous log entry (##{prev_index}) is #{prev_entry.inspect}" }
502
+
503
+ reply = follower.rpc(
504
+ Message::AppendEntriesRequest.new(
505
+ term: @current_term,
506
+ leader_info: node_info,
507
+ leader_commit: @commit_index,
508
+ prev_log_index: prev_index,
509
+ prev_log_term: prev_entry.term,
510
+ entries: entries,
511
+ )
512
+ )
513
+
514
+ if leader?
515
+ if reply.nil?
516
+ logger.debug(logloc) { "AppendEntriesRequest to #{follower.node_info.inspect} was not answered. C'est la vie." }
517
+ follower.conn.close
518
+ @peers.delete(follower.node_info)
519
+ elsif block_given?
520
+ yield reply, follower.node_info
521
+ elsif reply.term > @current_term
522
+ logger.debug(logloc) { "Received term from #{follower.node_info.inspect} greater than our own. Demotion required!" }
523
+ new_term(reply.term)
524
+ become_follower
525
+ elsif reply.success
526
+ logger.debug(logloc) { "Successful AppendEntriesReply received from #{follower.node_info.inspect}" }
527
+ follower.successful_append(prev_index + entries.length)
528
+ check_for_new_replication_majority
529
+ else
530
+ logger.debug(logloc) { "AppendEntries to #{follower.node_info.inspect} failed; retrying after next_index decrement" }
531
+ if reply.last_index && reply.last_index < follower.next_index - 1
532
+ follower.failed_append(reply.last_index)
533
+ else
534
+ follower.failed_append
535
+ end
536
+ if follower.next_index <= @log.snapshot_last_index
537
+ issue_snapshot(follower)
538
+ else
539
+ issue_append_entries(follower)
540
+ end
541
+ end
542
+ else
543
+ logger.debug(logloc) { "Ignoring AppendEntriesReply received when we're not leader" }
544
+ end
545
+ end
546
+
547
+ def check_for_new_replication_majority
548
+ new_commits = false
549
+
550
+ ((@commit_index + 1)..@log.last_index).each do |idx|
551
+ present_nodes = @peers.values.select { |f| f.match_index >= idx }.map(&:node_info) + [node_info]
552
+
553
+ logger.debug(logloc) { "Checking for replication majority on ##{idx} (present: #{present_nodes.inspect})" }
554
+ if @config.quorum_met?(present_nodes)
555
+ logger.debug(logloc) { "Log index #{idx} has met majority" }
556
+ @metrics.replication_majority.set(idx)
557
+
558
+ entry = @log[idx]
559
+
560
+ case entry
561
+ when LogEntry::ClusterConfiguration
562
+ logger.debug(logloc) { "Newly majoritied (majoritised?) log entry is a ClusterConfig; @config_index=#{@config_index}" }
563
+
564
+ # Dealing with potentially out-of-date cluster configurations is
565
+ # absofuckinglutely mind-bending. As near as I can tell, however,
566
+ # since the leader by definition has all of the log entries, it
567
+ # also has the latest and greatest config live and in concert,
568
+ # so we can make some assumptions about future log entries on
569
+ # that basis.
570
+ if idx == @config_index
571
+ logger.debug(logloc) { "Replication of current config #{@config.inspect} complete" }
572
+ if @config.transitioning?
573
+ logger.debug(logloc) { "Proposing post-joint config" }
574
+ @config.joint_configuration_replicated
575
+ propose_log_entry(LogEntry::ClusterConfiguration.new(term: @current_term, config: @config))
576
+ @config_index = @log.last_index
577
+ else
578
+ # Transition complete; time to let the requestor know they're good
579
+ # to go
580
+ logger.debug(logloc) { "Post-joint config replicated; config change saga completed" }
581
+ @config_index = @log.last_index
582
+
583
+ @cc_sem.acquire do
584
+ if @config_change_request_in_progress
585
+ logger.debug(logloc) { "Letting #{@config_change_request_in_progress.node_info.inspect} know their config change request was successful" }
586
+
587
+ # This is technically only necessary for certain config changes
588
+ # (like when a node changes address/port but keeps the same
589
+ # name) but there's no harm in doing it all the time.
590
+ if @peers.key?(@config_change_request_in_progress.node_info)
591
+ @peers[@config_change_request_in_progress.node_info].conn.close
592
+ @peers.delete(@config_change_request_in_progress.node_info)
593
+ end
594
+
595
+ @config_change_request_in_progress.send_successful_reply
596
+ @config_change_request_in_progress = nil
597
+ else
598
+ logger.debug(logloc) { "Nobody to send a successful config change reply to; oh well" }
599
+ end
600
+ end
601
+
602
+ process_config_change_queue
603
+ end
604
+ else
605
+ logger.debug(logloc) { "Quorum met on out-of-date config #{entry.config.inspect}; ignoring" }
606
+ end
607
+ when LogEntry::StateMachineCommand
608
+ @sm_mutex.synchronize do
609
+ logger.debug(logloc) { "Applying state machine command #{entry.command} (id #{entry.id})" }
610
+ @state_machine.process_command(entry.command)
611
+ if conn = @commands_in_progress.delete(entry.id)
612
+ logger.debug(logloc) { "Letting the client know their command is cooked" }
613
+ conn.send_reply(Message::CommandReply.new(success: true))
614
+ else
615
+ logger.debug(logloc) { "No client around to notify of command application; they'll figure it out eventually" }
616
+ end
617
+ end
618
+ end
619
+
620
+ @commit_index = idx
621
+ @metrics.commit_index.set(@commit_index)
622
+ persist_to_disk(commit_entries_to: [idx])
623
+ new_commits = true
624
+ else
625
+ logger.debug(logloc) { "Replication majority not yet met on ##{idx}. Better luck next time." }
626
+ end
627
+ end
628
+
629
+ if new_commits
630
+ # We want to get the good word out to everyone as soon as possible that
631
+ # there's new log entries that can be committed.
632
+ issue_append_entries_to_cluster
633
+ end
634
+ end
635
+
636
+ def take_snapshot
637
+ return unless @storage_dir
638
+
639
+ snapshot = @sm_mutex.synchronize do
640
+ Evinrude::Snapshot.new(node_name: @node_name, state: @state_machine.snapshot, cluster_config: @config, cluster_config_index: @config_index, last_term: @log.last_entry_term, last_index: @log.last_index, last_command_ids: @last_command_ids)
641
+ end
642
+
643
+ Tempfile.open("snapshot", @storage_dir) do |f|
644
+ logger.debug(logloc) { "Writing snapshot data to #{f.path}" }
645
+ f.write(snapshot.to_yaml)
646
+ f.fdatasync
647
+ f.close
648
+ File.rename(f.path, @storage_dir.join("snapshot.yaml"))
649
+ File.open(@storage_dir) { |d| d.fsync }
650
+ end
651
+
652
+ @metrics.snapshot_file_size.set(@storage_dir.join("snapshot.yaml").stat.size)
653
+
654
+ begin
655
+ logger.debug(logloc) { "Deleting now-stale log.yaml" }
656
+ File.unlink(File.join(@storage_dir, "log.yaml"))
657
+ rescue Errno::ENOENT
658
+ # Yes, this is in fact exactly what we're trying to achieve
659
+ end
660
+
661
+ @metrics.log_file_size.set(0)
662
+ end
663
+
664
+ def issue_snapshot(follower)
665
+ msg = @sm_mutex.synchronize do
666
+ Message::InstallSnapshotRequest.new(term: @current_term, leader_info: @leader_info, last_included_index: @commit_index, last_included_term: @log[@commit_index].term, data: @state_machine.snapshot)
667
+ end
668
+
669
+ reply = follower.rpc(msg)
670
+
671
+ if reply.term > @current_term
672
+ new_term(reply.term)
673
+ else
674
+ follower.successful_append(@commit_index)
675
+ end
676
+ end
677
+
678
+ def async_resolver
679
+ @async_resolver ||= Evinrude::Resolver.new
680
+ end
681
+
682
+ def expand_join_hints
683
+ return [] if @join_hints.nil?
684
+
685
+ # Where's Enumerable.amap when you need it?
686
+ sem = Async::Semaphore.new
687
+
688
+ [].tap do |r|
689
+ @join_hints.each do |jh|
690
+ Async(logger: logger) do |t|
691
+ if jh.is_a?(String)
692
+ async_resolver.getresources(jh).each do |srv|
693
+ t.async do
694
+ async_resolver.getaddresses(srv.target.to_s).each do |addr|
695
+ sem.acquire { r << { address: addr, port: srv.port } }
696
+ end
697
+ end
698
+ end
699
+ elsif jh.is_a?(Hash) || jh.is_a?(NodeInfo)
700
+ begin
701
+ IPAddr.new(jh[:address])
702
+ # It's an IP address already; excellent
703
+ sem.acquire { r << jh }
704
+ rescue ArgumentError
705
+ # It's a hostname(ish)
706
+ async_resolver.getaddresses(jh[:address]).each do |addr|
707
+ sem.acquire { r << { address: addr, port: srv.port } }
708
+ end
709
+ end
710
+ else
711
+ raise ArgumentError, "Invalid join hint entry: #{jh.inspect}"
712
+ end
713
+ end.result
714
+ end
715
+ end
716
+ end
717
+
718
+ def join_targets
719
+ expand_join_hints + @config.nodes.reject { |n| n.name == node_info.name }
720
+ end
721
+
722
+ def join_or_create_cluster
723
+ if @join_hints.nil? && join_targets.empty?
724
+ logger.info(logloc) { "No hints of an existing cluster found; configuring for standalone mode" }
725
+ new_term(1)
726
+
727
+ @config.add_node(node_info)
728
+ @config.joint_configuration_replicated
729
+
730
+ become_leader
731
+
732
+ propose_log_entry(LogEntry::ClusterConfiguration.new(term: @current_term, config: @config))
733
+
734
+ take_snapshot
735
+ else
736
+ logger.info(logloc) { "Joining existing cluster" }
737
+ join_cluster_via(join_targets)
738
+
739
+ # Taking a snapshot immediately after joining allows us to capture an
740
+ # up-to-date config, as well as our node name, in case of accidents.
741
+ take_snapshot
742
+ end
743
+ end
744
+
745
+ def join_cluster_via(targets)
746
+ connected = false
747
+
748
+ logger.debug(logloc) { "Attempting to join cluster via targets #{targets.inspect}" }
749
+
750
+ # I call this algorithm "happy joinballs".
751
+ #
752
+ # I will not be taking questions at this time.
753
+ conn_tasks = targets.map do |t|
754
+ @async_task.async do |subtask|
755
+ logger.debug(logloc) { "Initiating happy joinballs connection to #{t[:address]}:#{t[:port]}" }
756
+
757
+ begin
758
+ conn = @network.connect(address: t[:address], port: t[:port])
759
+ rescue StandardError => ex
760
+ logger.warn(logloc) { "Failed to connect to #{t[:address]}:#{t[:port]}: #{ex.class} (#{ex.message})" }
761
+ if targets.length == 1
762
+ logger.warn(logloc) { "Cluster leader not responsive; restarting join attempt" }
763
+ join_or_create_cluster
764
+ end
765
+
766
+ next
767
+ end
768
+
769
+ # If we get here, we have won the happy joinballs race
770
+ conn_tasks.each do |ct|
771
+ next if ct == Async::Task.current
772
+
773
+ ct.stop
774
+ end
775
+
776
+ logger.debug(logloc) { "Sending a join request to #{conn.peer_info}" }
777
+ reply = subtask.with_timeout(5) do |t|
778
+ conn.rpc(Message::JoinRequest.new(node_info: node_info))
779
+ rescue Async::TimeoutError
780
+ nil
781
+ end
782
+
783
+ if reply&.success
784
+ logger.info(logloc) { "Joined cluster; #{reply.inspect}" }
785
+ become_follower
786
+ elsif reply&.leader_info
787
+ logger.debug(logloc) { "Redirected to leader #{reply.leader_info.inspect}" }
788
+ join_cluster_via([reply.leader_info])
789
+ else
790
+ logger.error(logloc) { "Cluster join via #{t.inspect} failed: #{reply.nil? ? "RPC timeout" : reply.inspect}" }
791
+ # Obviously that target is busticated, so we'll retry without it.
792
+ # The problem is that the busticated target might have been a
793
+ # leader we were erroneously redirected to; in that case, the
794
+ # targets list will have only one node, and we'll need to go
795
+ # back to joinballing everyone. Hopefully by now the cluster
796
+ # will have agreed on a *live* leader for us to join via.
797
+ if targets.length == 1
798
+ join_cluster_via(join_targets - [t])
799
+ else
800
+ join_cluster_via(targets - [t])
801
+ end
802
+ end
803
+ end
804
+ end
805
+
806
+ conn_tasks.each(&:wait)
807
+ end
808
+
809
+ def process_rpc_requests
810
+ logger.debug(logloc) { "Commencing to process RPC requests" }
811
+ @network.each_message do |msg, conn|
812
+ @metrics.messages_received.increment(labels: { type: msg.class.to_s.split("::").last })
813
+
814
+ logger.debug(logloc) { "Received #{msg} from #{conn.peer_info}" }
815
+ reply = case msg
816
+ when Message::AppendEntriesRequest
817
+ process_append_entries_request(msg, conn)
818
+ when Message::CommandRequest
819
+ process_command_request(msg, conn)
820
+ when Message::JoinRequest
821
+ process_join_request(msg, conn)
822
+ when Message::NodeRemovalRequest
823
+ process_node_removal_request(msg, conn)
824
+ when Message::ReadRequest
825
+ process_read_request(msg, conn)
826
+ when Message::VoteRequest
827
+ process_vote_request(msg, conn)
828
+ when Message::InstallSnapshotRequest
829
+ process_install_snapshot_request(msg, conn)
830
+ else
831
+ logger.warn(logloc) { "Unexpected #{msg.class.to_s.split("::").last} received from #{conn.peer_info}" }
832
+ nil
833
+ end
834
+
835
+ if reply
836
+ logger.debug(logloc) { "Sending reply #{reply.inspect} to #{conn.peer_info}" }
837
+ conn.send_reply(reply)
838
+ else
839
+ logger.warn(logloc) { "No immediate reply to #{msg.inspect} from #{conn.peer_info}" }
840
+ end
841
+ end
842
+ end
843
+
844
+ def process_join_request(msg, conn)
845
+ logger.debug(logloc) { "Join request #{msg.inspect} received from #{conn.peer_info}" }
846
+
847
+ if follower?
848
+ logger.debug(logloc) { "Not leader; redirecting" }
849
+ Message::JoinReply.new(success: false, leader_info: @leader_info)
850
+ elsif leader?
851
+ logger.debug(logloc) { "Queueing join request" }
852
+ @config_change_queue << ConfigChangeQueueEntry::AddNode.new(msg, conn)
853
+
854
+ if @config_change_queue.length == 1 && @config_change_request_in_progress.nil?
855
+ logger.debug(logloc) { "Triggering new config change queue cascade" }
856
+ process_config_change_queue
857
+ end
858
+
859
+ # No immediate reply; will be sent once the join is completed
860
+ nil
861
+ else
862
+ logger.debug(logloc) { "Ignoring join request from #{msg.node_info} because not leader or follower" }
863
+ nil
864
+ end
865
+ end
866
+
867
+ def process_node_removal_request(msg, conn)
868
+ logger.debug(logloc) { "Node removal request #{msg.inspect} received from #{conn.peer_info}" }
869
+
870
+ if follower?
871
+ logger.debug(logloc) { "Not leader; redirecting" }
872
+ Message::NodeRemovalReply.new(success: false, leader_info: @leader_info)
873
+ elsif leader?
874
+ logger.debug(logloc) { "Queueing node removal request" }
875
+ @config_change_queue << ConfigChangeQueueEntry::RemoveNode.new(msg, conn)
876
+
877
+ if @config_change_queue.length == 1
878
+ logger.debug(logloc) { "Triggering new config change queue cascade" }
879
+ process_config_change_queue
880
+ end
881
+
882
+ # No immediate reply; will be sent once the join is completed
883
+ nil
884
+ else
885
+ logger.debug(logloc) { "Ignoring node removal request from #{msg.node_info} because not leader or follower" }
886
+ nil
887
+ end
888
+ end
889
+
890
+ def process_config_change_queue
891
+ if @config_change_queue.empty?
892
+ logger.debug(logloc) { "No more entries in the config change queue" }
893
+ return
894
+ end
895
+
896
+ if @config_change_request_in_progress
897
+ logger.error(logloc) { "Change queue processing requested while change request in progress!" }
898
+ return
899
+ end
900
+
901
+ @config_change_request_in_progress = @config_change_queue.shift
902
+ logger.debug(logloc) { "Processing config change queue entry #{@config_change_request_in_progress.inspect}" }
903
+
904
+ unless leader?
905
+ @cc_sem.acquire do
906
+ @config_change_request_in_progress.send_redirect_reply(@leader_info)
907
+ @config_change_request_in_progress = nil
908
+ end
909
+ process_config_change_queue
910
+ return
911
+ end
912
+
913
+ case @config_change_request_in_progress
914
+ when ConfigChangeQueueEntry::AddNode
915
+ if @config.nodes.include?(@config_change_request_in_progress.node_info)
916
+ # "Dude, you're *already* part of the cluster! Duuuuuuuuuuuuuuude!"
917
+ @cc_sem.acquire do
918
+ @config_change_request_in_progress.send_successful_reply
919
+ @config_change_request_in_progress = nil
920
+ end
921
+ process_config_change_queue
922
+ else
923
+ logger.debug(logloc) { "Transitioning configuration to add #{@config_change_request_in_progress.node_info.inspect}" }
924
+
925
+ @config.add_node(@config_change_request_in_progress.node_info)
926
+ propose_log_entry(LogEntry::ClusterConfiguration.new(term: @current_term, config: @config))
927
+ @config_index = @log.last_index
928
+ end
929
+ when ConfigChangeQueueEntry::RemoveNode
930
+ if !@config.nodes.include?(@config_change_request_in_progress.node_info)
931
+ @cc_sem.acquire do
932
+ @config_change_request_in_progress.send_successful_reply
933
+ @config_change_request_in_progress = nil
934
+ end
935
+ process_config_change_queue
936
+ else
937
+ logger.debug(logloc) { "Transitioning configuration to remove #{@config_change_request_in_progress.node_info.inspect}" }
938
+
939
+ @config.remove_node(@config_change_request_in_progress.node_info)
940
+ propose_log_entry(LogEntry::ClusterConfiguration.new(term: @current_term, config: @config))
941
+ @config_index = @log.last_index
942
+ end
943
+ else
944
+ logger.error(logloc) { "Unsupported change request type #{@config_change_request_in_progress.class}; this really shouldn't ever happen, bug report welcome" }
945
+ logger.debug(logloc) { "Unsupported change request was #{@config_change_request_in_progress.inspect}" }
946
+ @config_change_request_in_progress = nil
947
+ process_config_change_queue
948
+ end
949
+ end
950
+
951
+ def process_append_entries_request(msg, conn)
952
+ logger.debug(logloc) { "Processing append_entries request #{msg.inspect} from #{conn.peer_info}" }
953
+
954
+ if msg.term < @current_term
955
+ logger.debug(logloc) { "AppendEntries request term less than our current term #{@current_term}" }
956
+ Message::AppendEntriesReply.new(success: false, term: @current_term)
957
+ else
958
+ @last_append = Time.now
959
+
960
+ if !@log.has_entry?(msg.prev_log_index)
961
+ logger.debug(logloc) { "We don't have log entry prev_log_index=#{msg.prev_log_index}; asking for more entries" }
962
+ Message::AppendEntriesReply.new(success: false, term: @current_term, last_index: @log.last_index)
963
+ elsif @log.snapshotted_entry?(msg.prev_log_index + 1)
964
+ logger.error(logloc) { "Got AppendEntriesRequest with a prev_log_index=#{msg.prev_log_index} that's buried in the snapshot" }
965
+ # Closing the connection to the leader will cause it to recycle the
966
+ # follower state, which will reset it to start sending us AppendEntries
967
+ # from the most recent entry.
968
+ conn.close
969
+ elsif msg.prev_log_term != @log.entry_term(msg.prev_log_index)
970
+ logger.debug(logloc) { "AppendEntries log fork; msg.prev_log_index=#{msg.prev_log_index} msg.prev_log_term=#{msg.prev_log_term} @log.entry_term(msg.prev_log_index=#{@log.entry_term(msg.prev_log_index)} @log.last_index=#{@log.last_index}" }
971
+ @log.truncate_to(msg.prev_log_index - 1)
972
+ Message::AppendEntriesReply.new(success: false, term: @current_term)
973
+ else
974
+ @leader_info = msg.leader_info
975
+
976
+ if msg.term > @current_term || (candidate? && msg.term == @current_term)
977
+ logger.debug(logloc) { "Received term-updating AppendEntries; msg.term=#{msg.term} @current_term=#{@current_term} node_info.mode=#{node_info.instance_variable_get(:@mode).inspect}" }
978
+ new_term(msg.term)
979
+ become_follower
980
+ end
981
+
982
+ @heartbeat_timeout_time = Time.now + @heartbeat_timeout.rand
983
+
984
+ msg.entries.each.with_index do |new_entry, i|
985
+ idx = msg.prev_log_index + i + 1 # Dratted 1-index addressing
986
+ process_log_entry(new_entry, idx)
987
+ end
988
+
989
+ new_commit_point = [@log.last_index, msg.leader_commit].min
990
+
991
+ if new_commit_point > @commit_index
992
+ commit_entries_to(new_commit_point)
993
+ end
994
+
995
+ Message::AppendEntriesReply.new(success: true, term: @current_term)
996
+ end
997
+ end
998
+ end
999
+
1000
+ def process_log_entry(entry, log_index)
1001
+ logger.debug(logloc) { "Processing #{entry.inspect} at log index #{log_index}" }
1002
+
1003
+ existing_entry = @log[log_index]
1004
+
1005
+ if existing_entry.nil?
1006
+ @log.append(entry)
1007
+
1008
+ persist_to_disk(process_log_entry: [entry, log_index])
1009
+
1010
+ # Configuration changes take place immediately, not after consensus;
1011
+ # raft.pdf p11, "a server always uses the latest configuration in its
1012
+ # log, regardless of whether the entry is committed".
1013
+ if LogEntry::ClusterConfiguration === entry
1014
+ logger.debug(logloc) { "Using new configuration from log entry ##{log_index}" }
1015
+ @config = entry.config
1016
+ @config_index = log_index
1017
+ end
1018
+ elsif existing_entry.term != entry.term
1019
+ logger.debug(logloc) { "Discovered fork at #{log_index} (existing_entry=#{existing_entry.inspect} new_entry=#{entry.inspect}); discarding our remaining log entries" }
1020
+ @log.truncate_to(log_index - 1)
1021
+ else
1022
+ logger.debug(logloc) { "Already got log entry ##{log_index}; skipping" }
1023
+ end
1024
+
1025
+ end
1026
+
1027
+ def commit_entries_to(idx)
1028
+ ((@commit_index + 1)..idx).each do |i|
1029
+ @sm_mutex.synchronize do
1030
+ logger.debug(logloc) { "Committing log entry ##{i}" }
1031
+
1032
+ if LogEntry::StateMachineCommand === @log[i]
1033
+ logger.debug(logloc) { "Applying state machine command #{@log[i].command}" }
1034
+ @state_machine.process_command(@log[i].command)
1035
+ @last_command_ids[@log[i].node_name] = @log[i].id
1036
+ else
1037
+ logger.debug(logloc) { "Entry ##{i} is a #{@log[i].class}; no commit action necessary" }
1038
+ end
1039
+
1040
+ @commit_index = i
1041
+ @metrics.commit_index.set(i)
1042
+ end
1043
+ end
1044
+
1045
+ persist_to_disk(commit_entries_to: [idx])
1046
+ end
1047
+
1048
+ def process_command_request(msg, conn)
1049
+ logger.debug(logloc) { "Command request #{msg.inspect} received from #{conn.peer_info}" }
1050
+
1051
+ if follower?
1052
+ Message::CommandReply.new(success: false, leader_info: @leader_info)
1053
+ elsif leader?
1054
+ if @last_command_ids[msg.node_name] == msg.id
1055
+ Message::CommandReply.new(success: true)
1056
+ else
1057
+ logger.debug(logloc) { "Noting that #{msg.id} is a command in progress" }
1058
+ @commands_in_progress[msg.id] = conn
1059
+ propose_log_entry(LogEntry::StateMachineCommand.new(term: @current_term, command: msg.command, id: msg.id, node_name: msg.node_name))
1060
+
1061
+ # Deferred reply to log entry commit will occur after replication is complete
1062
+ nil
1063
+ end
1064
+ else
1065
+ Message::CommandReply.new(success: false)
1066
+ end
1067
+ end
1068
+
1069
+ def process_vote_request(msg, conn)
1070
+ if Time.now - @last_append < @heartbeat_timeout.first
1071
+ # Avoid rogue servers disrupting the cluster by calling votes
1072
+ # just because they can.
1073
+ logger.debug(logloc) { "Ignoring vote request from scurvy rogue #{msg.candidate_info}" }
1074
+ return nil
1075
+ end
1076
+
1077
+ if msg.term > @current_term
1078
+ new_term(msg.term)
1079
+ end
1080
+
1081
+ if msg.term == @current_term &&
1082
+ (@voted_for.nil? || @voted_for == msg.candidate_info) &&
1083
+ ((msg.last_log_index >= @log.last_index && msg.last_log_term == @log.last_entry_term) || msg.last_log_term > @log.last_entry_term)
1084
+ @voted_for = msg.candidate_info
1085
+ become_follower
1086
+ logger.debug(logloc) { "Voted for #{msg.candidate_info.inspect} for term #{msg.term} leader" }
1087
+ Message::VoteReply.new(term: @current_term, vote_granted: true)
1088
+ else
1089
+ logger.debug(logloc) { "Rejected #{msg.candidate_info.inspect} for term #{msg.term} leader; @current_term=#{@current_term} @voted_for=#{@voted_for.inspect} msg.last_log_index=#{msg.last_log_index} @log.last_index=#{@log.last_index} msg.last_log_term=#{msg.last_log_term} @log.last_entry_term=#{@log.last_entry_term}" }
1090
+ Message::VoteReply.new(term: @current_term, vote_granted: false)
1091
+ end
1092
+ end
1093
+
1094
+ def process_read_request(msg, conn)
1095
+ if !leader?
1096
+ Message::ReadReply.new(success: false, leader_info: @leader_info)
1097
+ elsif @commit_index > msg.commit_index
1098
+ # We already *know* this is never going to succeed, may as well save ourselves
1099
+ # the hassle
1100
+ logger.debug(logloc) { "ReadRequest is for an out-of-date commit_index; nopeing out" }
1101
+ Message::ReadReply.new(success: false)
1102
+ elsif @config.nodes.length == 1
1103
+ # Flyin' solo!
1104
+ if @commit_index == msg.commit_index
1105
+ Message::ReadReply.new(success: true)
1106
+ else
1107
+ Message::ReadReply.new(success: false)
1108
+ end
1109
+ else
1110
+ responders = [node_info]
1111
+
1112
+ issue_append_entries_to_cluster do |reply, node_info|
1113
+ # responders will be set to nil when quorum has been met, so all remaining
1114
+ # AE replies can be quietly ignored
1115
+ next if responders.nil?
1116
+
1117
+ if reply.success
1118
+ responders << node_info
1119
+ logger.debug(logloc) { "Checking if #{responders.inspect} meets read request quorum" }
1120
+ if @config.quorum_met?(responders)
1121
+ logger.debug(logloc) { "Have met read request quorum; reply sent" }
1122
+ if @commit_index == msg.commit_index
1123
+ conn.send_reply(Message::ReadReply.new(success: true))
1124
+ else
1125
+ conn.send_reply(Message::ReadReply.new(success: false))
1126
+ end
1127
+ responders = nil
1128
+ else
1129
+ logger.debug(logloc) { "Not yet met read request quorum" }
1130
+ end
1131
+ end
1132
+ end
1133
+
1134
+ # Deferred reply
1135
+ nil
1136
+ end
1137
+ end
1138
+
1139
+ def process_install_snapshot_request(msg, conn)
1140
+ if msg.term < @current_term
1141
+ conn.send_reply(Message::InstallSnapshotReply.new(term: @current_term))
1142
+ return
1143
+ end
1144
+
1145
+ @sm_mutex.synchronize do
1146
+ @state_machine = @state_machine_class.new(snapshot: msg.data)
1147
+ @log.new_snapshot(msg.last_included_term, msg.last_included_index)
1148
+ @commit_index = msg.last_included_index
1149
+ end
1150
+
1151
+ conn.send_reply(Message::InstallSnapshotReply.new(term: @current_term))
1152
+ end
1153
+
1154
+ def trigger_election
1155
+ new_term(@current_term + 1)
1156
+ logger.debug(logloc) { "Initiating election for term #{@current_term}" }
1157
+ become_candidate
1158
+
1159
+ if @config.nodes.length == 1
1160
+ # Flyin' solo!
1161
+ logger.debug(logloc) { "No need for an election, as we're in single-node mode" }
1162
+ become_leader
1163
+ else
1164
+ election_term = @current_term
1165
+ electors = [node_info]
1166
+ @voted_for = node_info
1167
+
1168
+ logger.debug(logloc) { "Canvassing the electorate" }
1169
+ @config.nodes.each do |n|
1170
+ next if n == node_info
1171
+
1172
+ @async_task.async do
1173
+ logger.debug(logloc) { "Sending vote request to #{n.inspect}" }
1174
+ begin
1175
+ reply = @peers[n].rpc(Message::VoteRequest.new(term: election_term, candidate_info: node_info, last_log_index: @log.last_index, last_log_term: @log.last_entry_term))
1176
+ rescue => ex
1177
+ log_exception(ex) { "Failed to send vote to #{n.inspect}" }
1178
+ if @peers.key?(n)
1179
+ @peers[n].conn.close
1180
+ @peers.delete(n)
1181
+ end
1182
+ next
1183
+ end
1184
+
1185
+ if electors.nil?
1186
+ # No need to process a vote if we're not running an election at the moment
1187
+ next
1188
+ end
1189
+
1190
+ unless candidate?
1191
+ logger.debug(logloc) { "Received ballot from #{n.inspect}: #{reply.inspect} while in #{@mode} mode" }
1192
+ next
1193
+ end
1194
+
1195
+ logger.debug(logloc) { "Processing vote #{reply.inspect} from #{n.inspect}" }
1196
+ if reply.nil?
1197
+ logger.debug(logloc) { "Received no reply to vote from #{n.inspect}" }
1198
+ elsif reply.term > @current_term
1199
+ logger.debug(logloc) { "Received higher term from #{n.inspect}; canceling election" }
1200
+ new_term(reply.term)
1201
+ become_follower
1202
+ electors = nil
1203
+ elsif reply.vote_granted
1204
+ logger.debug(logloc) { "Received the vote of #{n.inspect}" }
1205
+ electors << n
1206
+
1207
+ logger.debug(logloc) { "Got #{electors.length} votes so far" }
1208
+
1209
+ if @config.quorum_met?(electors)
1210
+ become_leader
1211
+ electors = nil
1212
+ end
1213
+ end
1214
+ end
1215
+ end
1216
+ end
1217
+ end
1218
+ end
1219
+
1220
+ require_relative "./evinrude/backoff"
1221
+ require_relative "./evinrude/config_change_queue_entry/add_node"
1222
+ require_relative "./evinrude/config_change_queue_entry/remove_node"
1223
+ require_relative "./evinrude/cluster_configuration"
1224
+ require_relative "./evinrude/freedom_patches/range"
1225
+ require_relative "./evinrude/log"
1226
+ require_relative "./evinrude/log_entries"
1227
+ require_relative "./evinrude/messages"
1228
+ require_relative "./evinrude/metrics"
1229
+ require_relative "./evinrude/network"
1230
+ require_relative "./evinrude/node_info"
1231
+ require_relative "./evinrude/peer"
1232
+ require_relative "./evinrude/snapshot"
1233
+ require_relative "./evinrude/state_machine/register"