evinrude 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.editorconfig +23 -0
  3. data/.gitignore +6 -0
  4. data/.yardopts +1 -0
  5. data/CODE_OF_CONDUCT.md +49 -0
  6. data/CONTRIBUTING.md +10 -0
  7. data/LICENCE +674 -0
  8. data/README.md +410 -0
  9. data/evinrude.gemspec +42 -0
  10. data/lib/evinrude.rb +1233 -0
  11. data/lib/evinrude/backoff.rb +19 -0
  12. data/lib/evinrude/cluster_configuration.rb +162 -0
  13. data/lib/evinrude/config_change_queue_entry.rb +19 -0
  14. data/lib/evinrude/config_change_queue_entry/add_node.rb +13 -0
  15. data/lib/evinrude/config_change_queue_entry/remove_node.rb +14 -0
  16. data/lib/evinrude/freedom_patches/range.rb +5 -0
  17. data/lib/evinrude/log.rb +102 -0
  18. data/lib/evinrude/log_entries.rb +3 -0
  19. data/lib/evinrude/log_entry.rb +13 -0
  20. data/lib/evinrude/log_entry/cluster_configuration.rb +15 -0
  21. data/lib/evinrude/log_entry/null.rb +6 -0
  22. data/lib/evinrude/log_entry/state_machine_command.rb +13 -0
  23. data/lib/evinrude/logging_helpers.rb +40 -0
  24. data/lib/evinrude/message.rb +19 -0
  25. data/lib/evinrude/message/append_entries_reply.rb +13 -0
  26. data/lib/evinrude/message/append_entries_request.rb +18 -0
  27. data/lib/evinrude/message/command_reply.rb +13 -0
  28. data/lib/evinrude/message/command_request.rb +18 -0
  29. data/lib/evinrude/message/install_snapshot_reply.rb +13 -0
  30. data/lib/evinrude/message/install_snapshot_request.rb +18 -0
  31. data/lib/evinrude/message/join_reply.rb +13 -0
  32. data/lib/evinrude/message/join_request.rb +18 -0
  33. data/lib/evinrude/message/node_removal_reply.rb +13 -0
  34. data/lib/evinrude/message/node_removal_request.rb +18 -0
  35. data/lib/evinrude/message/read_reply.rb +13 -0
  36. data/lib/evinrude/message/read_request.rb +18 -0
  37. data/lib/evinrude/message/vote_reply.rb +13 -0
  38. data/lib/evinrude/message/vote_request.rb +18 -0
  39. data/lib/evinrude/messages.rb +14 -0
  40. data/lib/evinrude/metrics.rb +50 -0
  41. data/lib/evinrude/network.rb +69 -0
  42. data/lib/evinrude/network/connection.rb +144 -0
  43. data/lib/evinrude/network/protocol.rb +69 -0
  44. data/lib/evinrude/node_info.rb +35 -0
  45. data/lib/evinrude/peer.rb +50 -0
  46. data/lib/evinrude/resolver.rb +96 -0
  47. data/lib/evinrude/snapshot.rb +9 -0
  48. data/lib/evinrude/state_machine.rb +15 -0
  49. data/lib/evinrude/state_machine/register.rb +25 -0
  50. data/smoke_tests/001_single_node_cluster.rb +20 -0
  51. data/smoke_tests/002_three_node_cluster.rb +43 -0
  52. data/smoke_tests/003_spill.rb +25 -0
  53. data/smoke_tests/004_stale_read.rb +67 -0
  54. data/smoke_tests/005_sleepy_master.rb +28 -0
  55. data/smoke_tests/006_join_via_follower.rb +26 -0
  56. data/smoke_tests/007_snapshot_madness.rb +97 -0
  57. data/smoke_tests/008_downsizing.rb +43 -0
  58. data/smoke_tests/009_disaster_recovery.rb +46 -0
  59. data/smoke_tests/999_final_smoke_test.rb +279 -0
  60. data/smoke_tests/run +22 -0
  61. data/smoke_tests/smoke_test_helper.rb +199 -0
  62. metadata +318 -0
@@ -0,0 +1,1233 @@
1
+ require "async"
2
+ require "async/dns"
3
+ require "fiber"
4
+ require "logger"
5
+ require "pathname"
6
+ require "securerandom"
7
+ require "tempfile"
8
+
9
+ require_relative "./evinrude/logging_helpers"
10
+
11
+ class Evinrude
12
+ include Evinrude::LoggingHelpers
13
+
14
+ class Error < StandardError; end
15
+
16
+ class NoLeaderError < Error; end
17
+
18
+ class NodeExpiredError < Error; end
19
+
20
+ attr_reader :node_name
21
+
22
+ def initialize(join_hints: [], shared_keys:, state_machine: Evinrude::StateMachine::Register,
23
+ logger: Logger.new("/dev/null"), node_name: nil, storage_dir: nil,
24
+ heartbeat_interval: 0.25, heartbeat_timeout: 1..2,
25
+ listen: {}, advertise: {}, metrics_registry: Prometheus::Client::Registry.new)
26
+ @join_hints, @keys, @logger, @heartbeat_interval, @heartbeat_timeout = join_hints, shared_keys, logger, heartbeat_interval, heartbeat_timeout
27
+
28
+ @metrics = Evinrude::Metrics.new(metrics_registry)
29
+
30
+ @listen, @advertise = listen, advertise
31
+ @listen[:address] ||= "::"
32
+ @listen[:port] ||= 0
33
+
34
+ if storage_dir
35
+ @storage_dir = Pathname.new(storage_dir)
36
+ end
37
+
38
+ snapshot = if @storage_dir
39
+ if !@storage_dir.exist?
40
+ @storage_dir.mkdir
41
+ end
42
+
43
+ if !@storage_dir.directory?
44
+ raise ArgumentError, "Storage directory #{@storage_dir} isn't *actually* a directory"
45
+ end
46
+
47
+ snapshot_file = @storage_dir.join("snapshot.yaml")
48
+
49
+ if snapshot_file.exist?
50
+ @metrics.snapshot_file_size.set(snapshot_file.stat.size)
51
+ YAML.load_file(snapshot_file)
52
+ end
53
+ end
54
+
55
+ @state_machine_class = state_machine
56
+
57
+ if snapshot
58
+ @node_name = snapshot.node_name
59
+ @state_machine = @state_machine_class.new(snapshot: snapshot.state)
60
+ @last_command_ids = snapshot.last_command_ids
61
+ else
62
+ @node_name = node_name || SecureRandom.uuid
63
+ @state_machine = @state_machine_class.new
64
+ @last_command_ids = {}
65
+ end
66
+
67
+ @sm_mutex = Mutex.new
68
+
69
+ if snapshot
70
+ @config = snapshot.cluster_config
71
+ @config_index = snapshot.cluster_config_index
72
+ @config.metrics = @metrics
73
+ @config.logger = logger
74
+ else
75
+ @config = Evinrude::ClusterConfiguration.new(logger: logger, metrics: @metrics)
76
+ @config_index = 0
77
+ end
78
+
79
+ @last_append = Time.at(0)
80
+ @current_term = 0
81
+ @voted_for = nil
82
+ @mode = :init
83
+
84
+ @metrics.term.set(0)
85
+
86
+ if snapshot
87
+ logger.debug(logloc) { "Configuring log from snapshot; snapshot_last_term=#{snapshot.last_term} snapshot_last_index=#{snapshot.last_index}" }
88
+ @log = Evinrude::Log.new(snapshot_last_term: snapshot.last_term, snapshot_last_index: snapshot.last_index, logger: logger)
89
+ else
90
+ @log = Evinrude::Log.new(logger: logger)
91
+ end
92
+
93
+ if snapshot
94
+ logger.debug(logloc) { "Setting commit_index to #{snapshot.last_index} from snapshot" }
95
+ @commit_index = snapshot.last_index
96
+ else
97
+ @commit_index = 0
98
+ end
99
+
100
+ @metrics.commit_index.set(@commit_index)
101
+
102
+ @peers = Hash.new do |h, k|
103
+ backoff = Evinrude::Backoff.new
104
+
105
+ peer_conn = @network.connect(address: k.address, port: k.port)
106
+
107
+ h[k] = Peer.new(metrics: @metrics, conn: peer_conn, node_info: k, next_index: @log.last_index + 1)
108
+ end
109
+
110
+ @config_change_queue = []
111
+ @config_change_request_in_progress = nil
112
+ @cc_sem = Async::Semaphore.new
113
+ end
114
+
115
+ def command(s)
116
+ @metrics.command_execution.measure do
117
+ Async(logger: logger) do |task|
118
+ command_id = SecureRandom.uuid
119
+
120
+ loop do
121
+ reply = rpc_to_leader(Message::CommandRequest.new(command: s, id: command_id, node_name: @node_name), task)
122
+
123
+ if reply.success
124
+ break true
125
+ end
126
+ end
127
+ end.result
128
+ end
129
+ end
130
+
131
+ def state
132
+ @metrics.read_state.measure do
133
+ Async(logger: logger) do |task|
134
+ loop do
135
+ state_object = nil
136
+ commit_index = nil
137
+
138
+ @sm_mutex.synchronize do
139
+ # Disturbingly, this appears to be one of the best available ways
140
+ # to make a guaranteed deep copy of an arbitrary object
141
+ state_object = YAML.load(@state_machine.current_state.to_yaml)
142
+ commit_index = @commit_index
143
+ end
144
+
145
+ logger.debug(logloc) { "(in #{@node_name}) Checking if #{state_object.inspect} at commit_index=#{commit_index} is the most up-to-date state" }
146
+
147
+ reply = rpc_to_leader(Evinrude::Message::ReadRequest.new(commit_index: commit_index), task)
148
+
149
+ if reply.success
150
+ break state_object
151
+ end
152
+ end
153
+ end.result
154
+ end
155
+ end
156
+
157
+ def run
158
+ logger.info(logloc) { "Evinrude node #{@node_name} starting up" }
159
+
160
+ @metrics.start_time.set(Time.now.to_f)
161
+
162
+ if @storage_dir
163
+ @metrics.log_loaded_from_disk.set(1)
164
+ load_log_from_disk
165
+ else
166
+ @metrics.log_loaded_from_disk.set(0)
167
+ end
168
+
169
+ Async do |task| #(logger: logger) do |task|
170
+ @async_task = task
171
+ @network = Network.new(keys: @keys, logger: logger, metrics: @metrics, listen: @listen, advertise: @advertise).start
172
+
173
+ logger.info(logloc) { "Node #{@node_name} listening on #{address}:#{port}" }
174
+
175
+ @metrics.info.set(1, labels: { node_name: @node_name, listen_address: @network.listen_address, listen_port: @network.listen_port, advertise_address: address, advertise_port: port })
176
+
177
+ task.async { process_rpc_requests }
178
+
179
+ join_or_create_cluster
180
+ end.return
181
+ rescue => ex
182
+ log_exception(ex) { "Fatal error" }
183
+ raise
184
+ end
185
+
186
+ def remove_node(node_info, unsafe: false)
187
+ if unsafe
188
+ logger.warn(logloc) { "Unsafely removing node #{node_info.inspect} from the local configuration" }
189
+
190
+ @config.remove_node(node_info, force: true)
191
+ else
192
+ @metrics.remove_node.measure do
193
+ Async(logger: logger) do |task|
194
+ loop do
195
+ logger.debug(logloc) { "(in #{@node_name}) Requesting removal of #{node_info.inspect}" }
196
+
197
+ reply = rpc_to_leader(Evinrude::Message::NodeRemovalRequest.new(node_info: node_info, unsafe: unsafe), task)
198
+
199
+ if reply.success
200
+ break true
201
+ end
202
+ end
203
+ end.result
204
+ end
205
+ end
206
+ end
207
+
208
+ def address
209
+ @network&.advertised_address
210
+ end
211
+
212
+ def port
213
+ @network&.advertised_port
214
+ end
215
+
216
+ def nodes
217
+ @config.nodes
218
+ end
219
+
220
+ def leader?
221
+ @mode == :leader
222
+ end
223
+
224
+ def follower?
225
+ @mode == :follower
226
+ end
227
+
228
+ def candidate?
229
+ @mode == :candidate
230
+ end
231
+
232
+ def init?
233
+ @mode == :init
234
+ end
235
+
236
+ def expired?
237
+ !!(!leader? && @heartbeat_timeout_time && @heartbeat_timeout_time < Time.now)
238
+ end
239
+
240
+ def node_info
241
+ if @network.nil?
242
+ raise RuntimeError, "Cannot determine node info until the network is up"
243
+ end
244
+
245
+ @node_info ||= Evinrude::NodeInfo.new(address: address, port: port, name: @node_name)
246
+ end
247
+
248
+ private
249
+
250
+ def load_log_from_disk
251
+ log_file = @storage_dir.join("log.yaml")
252
+
253
+ if log_file.exist?
254
+ logger.debug(logloc) { "Loading log entries from #{log_file}" }
255
+ @metrics.log_file_size.set(log_file.stat.size)
256
+
257
+ # Temporarily unsetting @storage_dir prevents the calls we make from
258
+ # writing all the log entries straight back to disk again
259
+ tmp_storage_dir, @storage_dir = @storage_dir, nil
260
+
261
+ begin
262
+ log_file.open do |fd|
263
+ YAML.load_stream(fd) do |entry|
264
+ unless entry.is_a?(Hash)
265
+ logger.fatal(logloc) { "SHENANIGAN ALERT: persisted log entry #{entry} is not a hash!" }
266
+ exit 42
267
+ end
268
+
269
+ m, args = entry.to_a.first
270
+
271
+ unless %i{process_log_entry commit_entries_to}.include?(m)
272
+ logger.fatal(logloc) { "SHENANIGAN ALERT: log includes unexpected operation #{m.inspect}(*#{args.inspect})!!!" }
273
+ exit 42
274
+ end
275
+
276
+ logger.debug(logloc) { "Running #{m}(#{args.inspect}) from disk log" }
277
+
278
+ self.__send__(m, *args)
279
+ end
280
+ end
281
+ ensure
282
+ @storage_dir = tmp_storage_dir
283
+ end
284
+
285
+ logger.debug(logloc) { "Completed log read" }
286
+ end
287
+ end
288
+
289
+ def rpc_to_leader(msg, task)
290
+ backoff = Evinrude::Backoff.new
291
+ reply = nil
292
+ command_id = SecureRandom.uuid
293
+
294
+ logger.debug(logloc) { "(in #{@node_name}) Sending message #{msg.inspect} to cluster leader" }
295
+
296
+ loop do
297
+ until leader? || follower? || expired?
298
+ logger.debug(logloc) { "(in #{@node_name}) Waiting until we're in the cluster before sending RPC to leader" }
299
+ task.sleep 0.5
300
+ end
301
+
302
+ begin
303
+ remote = reply&.leader_info || @leader_info
304
+
305
+ if remote.nil?
306
+ raise NoLeaderError, "No leader could be discerned for the cluster at present"
307
+ end
308
+
309
+ conn = @network.connect(address: remote.address, port: remote.port)
310
+
311
+ reply = task.with_timeout(5) do |t|
312
+ conn.rpc(msg)
313
+ end
314
+
315
+ if reply.nil?
316
+ logger.debug(logloc) { "(in #{@node_name}) RPC to leader #{remote.inspect} timed out" }
317
+ elsif reply.leader_info
318
+ logger.debug(logloc) { "(in #{@node_name}) Redirected to #{reply.leader_info.inspect}" }
319
+ # No need to wait for the backoff time here
320
+ next
321
+ else
322
+ logger.debug(logloc) { "(in #{@node_name}) RPC to leader returned #{reply.inspect}" }
323
+ return reply
324
+ end
325
+
326
+ task.sleep backoff.wait_time
327
+ rescue Evinrude::Error, Async::TimeoutError, Async::Wrapper::Cancelled, SystemCallError, IOError => ex
328
+ @metrics.rpc_exception.increment(labels: { target: "#{remote.address}:#{remote.port}", node_name: remote.name, class: ex.class.to_s })
329
+ log_exception(ex) { "(in #{@node_name}) RPC to leader raised exception" }
330
+ conn&.close
331
+ reply = nil
332
+
333
+ if expired?
334
+ raise NodeExpiredError, "This node instance is not active in the cluster (mode=#{@mode})"
335
+ end
336
+
337
+ task.sleep backoff.wait_time
338
+ end
339
+ end
340
+ end
341
+
342
+ def become_leader
343
+ reset_peers
344
+
345
+ logger.info(logloc) { "Assuming leadership of the cluster" }
346
+
347
+ @mode = :leader
348
+
349
+ @leader_info = node_info
350
+ @commands_in_progress = {}
351
+
352
+ @cc_sem.acquire do
353
+ @config_change_queue = []
354
+ @config_change_request_in_progress = nil
355
+ end
356
+
357
+ @async_task.async do |subtask|
358
+ while leader?
359
+ subtask.sleep @heartbeat_interval
360
+
361
+ if leader?
362
+ logger.debug(logloc) { "Triggering periodic AE heartbeat" }
363
+ issue_append_entries_to_cluster
364
+ end
365
+ end
366
+ end
367
+
368
+ propose_log_entry(
369
+ LogEntry::Null.new(term: @current_term) do
370
+ logger.debug(logloc) { "Null log entry to mark start-of-term replicated" }
371
+ end
372
+ )
373
+
374
+ @metrics.state.set(3)
375
+ end
376
+
377
+ def become_follower
378
+ reset_peers
379
+
380
+ logger.info(logloc) { "Becoming follower" }
381
+
382
+ @mode = :follower
383
+
384
+ @heartbeat_timeout_time = Time.now + @heartbeat_timeout.rand
385
+
386
+ @async_task.async do |subtask|
387
+ while follower?
388
+ logger.debug(logloc) { "#{@heartbeat_timeout_time - Time.now}s until heartbeat timer expires" }
389
+
390
+ subtask.sleep [0.01, @heartbeat_timeout_time - Time.now].max
391
+
392
+ if follower? && @heartbeat_timeout_time < Time.now
393
+ logger.info(logloc) { "Heartbeat timeout expired; triggering election" }
394
+ trigger_election
395
+ end
396
+ end
397
+ end
398
+
399
+ @metrics.state.set(2)
400
+ end
401
+
402
+ def become_candidate
403
+ reset_peers
404
+
405
+ logger.info(logloc) { "Becoming a candidate" }
406
+
407
+ @mode = :candidate
408
+
409
+ @async_task.async do |subtask|
410
+ election_timeout = @heartbeat_timeout.rand
411
+ logger.debug(logloc) { "Waiting #{election_timeout}s for election to complete" }
412
+ subtask.sleep election_timeout
413
+
414
+ if candidate?
415
+ logger.info(logloc) { "Election timeout expired without a leader being elected; triggering a new election" }
416
+ trigger_election
417
+ end
418
+ end
419
+
420
+ @metrics.state.set(1)
421
+ end
422
+
423
+ def reset_peers
424
+ @peers.values.each { |f| f.conn.close }
425
+ @peers.clear
426
+ @metrics.clear_peer_metrics
427
+ end
428
+
429
+ def new_term(n)
430
+ logger.debug(logloc) { "Setting up for term #{n}" }
431
+ @current_term = n
432
+ @voted_for = nil
433
+
434
+ @metrics.term.set(@current_term)
435
+ end
436
+
437
+ def persist_to_disk(e)
438
+ if @storage_dir
439
+ file = @storage_dir.join("log.yaml")
440
+
441
+ if file.exist? && file.stat.size > 1024 * 1024
442
+ logger.debug(logloc) { "Log is getting a bit big; time for a new snapshot, methinks" }
443
+ take_snapshot
444
+ end
445
+
446
+ logger.debug(logloc) { "Persisting #{e.inspect} to #{file}" }
447
+ file.open("a") do |fd|
448
+ logger.debug(logloc) { "Doin' the write thing" }
449
+ fd.puts e.to_yaml
450
+ fd.fdatasync
451
+ end
452
+
453
+ @metrics.log_entries_persisted.increment
454
+ @metrics.log_file_size.set(file.stat.size)
455
+ end
456
+ end
457
+
458
+ def propose_log_entry(entry)
459
+ unless leader?
460
+ logger.error(logloc) { with_backtrace("propose_log_entry called while not leader!") }
461
+ return
462
+ end
463
+
464
+ @log.append(entry)
465
+ persist_to_disk(process_log_entry: [entry, @log.last_index])
466
+
467
+ logger.debug(logloc) { "Proposing #{entry.inspect} as ##{@log.last_index}" }
468
+
469
+ if @config.nodes.length == 1
470
+ # Flyin' solo! Means we can skip all that inconvenient AppendEntries stuff,
471
+ # but we still need to do what needs to be done after the entry has been
472
+ # "replicated everywhere" (ie "here")
473
+ check_for_new_replication_majority
474
+ else
475
+ issue_append_entries_to_cluster
476
+ end
477
+ end
478
+
479
+ def issue_append_entries_to_cluster(&blk)
480
+ nodes.each do |n|
481
+ next if n == node_info
482
+
483
+ @async_task.async do
484
+ begin
485
+ issue_append_entries(@peers[n], &blk)
486
+ rescue Evinrude::Log::SnapshottedEntryError
487
+ issue_snapshot(@peers[n])
488
+ rescue => ex
489
+ log_exception(ex) { "Failed to issue AppendEntries to #{n.inspect}" }
490
+ end
491
+ end
492
+ end
493
+ end
494
+
495
+ def issue_append_entries(follower)
496
+ logger.debug(logloc) { "Issuing AppendEntries to #{follower.node_info.inspect}" }
497
+ entries = @log.entries_from(follower.next_index)
498
+ prev_index = [follower.next_index - 1, @log.last_index].min
499
+ prev_entry = @log[prev_index]
500
+
501
+ logger.debug(logloc) { "Previous log entry (##{prev_index}) is #{prev_entry.inspect}" }
502
+
503
+ reply = follower.rpc(
504
+ Message::AppendEntriesRequest.new(
505
+ term: @current_term,
506
+ leader_info: node_info,
507
+ leader_commit: @commit_index,
508
+ prev_log_index: prev_index,
509
+ prev_log_term: prev_entry.term,
510
+ entries: entries,
511
+ )
512
+ )
513
+
514
+ if leader?
515
+ if reply.nil?
516
+ logger.debug(logloc) { "AppendEntriesRequest to #{follower.node_info.inspect} was not answered. C'est la vie." }
517
+ follower.conn.close
518
+ @peers.delete(follower.node_info)
519
+ elsif block_given?
520
+ yield reply, follower.node_info
521
+ elsif reply.term > @current_term
522
+ logger.debug(logloc) { "Received term from #{follower.node_info.inspect} greater than our own. Demotion required!" }
523
+ new_term(reply.term)
524
+ become_follower
525
+ elsif reply.success
526
+ logger.debug(logloc) { "Successful AppendEntriesReply received from #{follower.node_info.inspect}" }
527
+ follower.successful_append(prev_index + entries.length)
528
+ check_for_new_replication_majority
529
+ else
530
+ logger.debug(logloc) { "AppendEntries to #{follower.node_info.inspect} failed; retrying after next_index decrement" }
531
+ if reply.last_index && reply.last_index < follower.next_index - 1
532
+ follower.failed_append(reply.last_index)
533
+ else
534
+ follower.failed_append
535
+ end
536
+ if follower.next_index <= @log.snapshot_last_index
537
+ issue_snapshot(follower)
538
+ else
539
+ issue_append_entries(follower)
540
+ end
541
+ end
542
+ else
543
+ logger.debug(logloc) { "Ignoring AppendEntriesReply received when we're not leader" }
544
+ end
545
+ end
546
+
547
+ def check_for_new_replication_majority
548
+ new_commits = false
549
+
550
+ ((@commit_index + 1)..@log.last_index).each do |idx|
551
+ present_nodes = @peers.values.select { |f| f.match_index >= idx }.map(&:node_info) + [node_info]
552
+
553
+ logger.debug(logloc) { "Checking for replication majority on ##{idx} (present: #{present_nodes.inspect})" }
554
+ if @config.quorum_met?(present_nodes)
555
+ logger.debug(logloc) { "Log index #{idx} has met majority" }
556
+ @metrics.replication_majority.set(idx)
557
+
558
+ entry = @log[idx]
559
+
560
+ case entry
561
+ when LogEntry::ClusterConfiguration
562
+ logger.debug(logloc) { "Newly majoritied (majoritised?) log entry is a ClusterConfig; @config_index=#{@config_index}" }
563
+
564
+ # Dealing with potentially out-of-date cluster configurations is
565
+ # absofuckinglutely mind-bending. As near as I can tell, however,
566
+ # since the leader by definition has all of the log entries, it
567
+ # also has the latest and greatest config live and in concert,
568
+ # so we can make some assumptions about future log entries on
569
+ # that basis.
570
+ if idx == @config_index
571
+ logger.debug(logloc) { "Replication of current config #{@config.inspect} complete" }
572
+ if @config.transitioning?
573
+ logger.debug(logloc) { "Proposing post-joint config" }
574
+ @config.joint_configuration_replicated
575
+ propose_log_entry(LogEntry::ClusterConfiguration.new(term: @current_term, config: @config))
576
+ @config_index = @log.last_index
577
+ else
578
+ # Transition complete; time to let the requestor know they're good
579
+ # to go
580
+ logger.debug(logloc) { "Post-joint config replicated; config change saga completed" }
581
+ @config_index = @log.last_index
582
+
583
+ @cc_sem.acquire do
584
+ if @config_change_request_in_progress
585
+ logger.debug(logloc) { "Letting #{@config_change_request_in_progress.node_info.inspect} know their config change request was successful" }
586
+
587
+ # This is technically only necessary for certain config changes
588
+ # (like when a node changes address/port but keeps the same
589
+ # name) but there's no harm in doing it all the time.
590
+ if @peers.key?(@config_change_request_in_progress.node_info)
591
+ @peers[@config_change_request_in_progress.node_info].conn.close
592
+ @peers.delete(@config_change_request_in_progress.node_info)
593
+ end
594
+
595
+ @config_change_request_in_progress.send_successful_reply
596
+ @config_change_request_in_progress = nil
597
+ else
598
+ logger.debug(logloc) { "Nobody to send a successful config change reply to; oh well" }
599
+ end
600
+ end
601
+
602
+ process_config_change_queue
603
+ end
604
+ else
605
+ logger.debug(logloc) { "Quorum met on out-of-date config #{entry.config.inspect}; ignoring" }
606
+ end
607
+ when LogEntry::StateMachineCommand
608
+ @sm_mutex.synchronize do
609
+ logger.debug(logloc) { "Applying state machine command #{entry.command} (id #{entry.id})" }
610
+ @state_machine.process_command(entry.command)
611
+ if conn = @commands_in_progress.delete(entry.id)
612
+ logger.debug(logloc) { "Letting the client know their command is cooked" }
613
+ conn.send_reply(Message::CommandReply.new(success: true))
614
+ else
615
+ logger.debug(logloc) { "No client around to notify of command application; they'll figure it out eventually" }
616
+ end
617
+ end
618
+ end
619
+
620
+ @commit_index = idx
621
+ @metrics.commit_index.set(@commit_index)
622
+ persist_to_disk(commit_entries_to: [idx])
623
+ new_commits = true
624
+ else
625
+ logger.debug(logloc) { "Replication majority not yet met on ##{idx}. Better luck next time." }
626
+ end
627
+ end
628
+
629
+ if new_commits
630
+ # We want to get the good word out to everyone as soon as possible that
631
+ # there's new log entries that can be committed.
632
+ issue_append_entries_to_cluster
633
+ end
634
+ end
635
+
636
+ def take_snapshot
637
+ return unless @storage_dir
638
+
639
+ snapshot = @sm_mutex.synchronize do
640
+ Evinrude::Snapshot.new(node_name: @node_name, state: @state_machine.snapshot, cluster_config: @config, cluster_config_index: @config_index, last_term: @log.last_entry_term, last_index: @log.last_index, last_command_ids: @last_command_ids)
641
+ end
642
+
643
+ Tempfile.open("snapshot", @storage_dir) do |f|
644
+ logger.debug(logloc) { "Writing snapshot data to #{f.path}" }
645
+ f.write(snapshot.to_yaml)
646
+ f.fdatasync
647
+ f.close
648
+ File.rename(f.path, @storage_dir.join("snapshot.yaml"))
649
+ File.open(@storage_dir) { |d| d.fsync }
650
+ end
651
+
652
+ @metrics.snapshot_file_size.set(@storage_dir.join("snapshot.yaml").stat.size)
653
+
654
+ begin
655
+ logger.debug(logloc) { "Deleting now-stale log.yaml" }
656
+ File.unlink(File.join(@storage_dir, "log.yaml"))
657
+ rescue Errno::ENOENT
658
+ # Yes, this is in fact exactly what we're trying to achieve
659
+ end
660
+
661
+ @metrics.log_file_size.set(0)
662
+ end
663
+
664
+ def issue_snapshot(follower)
665
+ msg = @sm_mutex.synchronize do
666
+ Message::InstallSnapshotRequest.new(term: @current_term, leader_info: @leader_info, last_included_index: @commit_index, last_included_term: @log[@commit_index].term, data: @state_machine.snapshot)
667
+ end
668
+
669
+ reply = follower.rpc(msg)
670
+
671
+ if reply.term > @current_term
672
+ new_term(reply.term)
673
+ else
674
+ follower.successful_append(@commit_index)
675
+ end
676
+ end
677
+
678
+ def async_resolver
679
+ @async_resolver ||= Evinrude::Resolver.new
680
+ end
681
+
682
+ def expand_join_hints
683
+ return [] if @join_hints.nil?
684
+
685
+ # Where's Enumerable.amap when you need it?
686
+ sem = Async::Semaphore.new
687
+
688
+ [].tap do |r|
689
+ @join_hints.each do |jh|
690
+ Async(logger: logger) do |t|
691
+ if jh.is_a?(String)
692
+ async_resolver.getresources(jh).each do |srv|
693
+ t.async do
694
+ async_resolver.getaddresses(srv.target.to_s).each do |addr|
695
+ sem.acquire { r << { address: addr, port: srv.port } }
696
+ end
697
+ end
698
+ end
699
+ elsif jh.is_a?(Hash) || jh.is_a?(NodeInfo)
700
+ begin
701
+ IPAddr.new(jh[:address])
702
+ # It's an IP address already; excellent
703
+ sem.acquire { r << jh }
704
+ rescue ArgumentError
705
+ # It's a hostname(ish)
706
+ async_resolver.getaddresses(jh[:address]).each do |addr|
707
+ sem.acquire { r << { address: addr, port: srv.port } }
708
+ end
709
+ end
710
+ else
711
+ raise ArgumentError, "Invalid join hint entry: #{jh.inspect}"
712
+ end
713
+ end.result
714
+ end
715
+ end
716
+ end
717
+
718
+ def join_targets
719
+ expand_join_hints + @config.nodes.reject { |n| n.name == node_info.name }
720
+ end
721
+
722
+ def join_or_create_cluster
723
+ if @join_hints.nil? && join_targets.empty?
724
+ logger.info(logloc) { "No hints of an existing cluster found; configuring for standalone mode" }
725
+ new_term(1)
726
+
727
+ @config.add_node(node_info)
728
+ @config.joint_configuration_replicated
729
+
730
+ become_leader
731
+
732
+ propose_log_entry(LogEntry::ClusterConfiguration.new(term: @current_term, config: @config))
733
+
734
+ take_snapshot
735
+ else
736
+ logger.info(logloc) { "Joining existing cluster" }
737
+ join_cluster_via(join_targets)
738
+
739
+ # Taking a snapshot immediately after joining allows us to capture an
740
+ # up-to-date config, as well as our node name, in case of accidents.
741
+ take_snapshot
742
+ end
743
+ end
744
+
745
+ def join_cluster_via(targets)
746
+ connected = false
747
+
748
+ logger.debug(logloc) { "Attempting to join cluster via targets #{targets.inspect}" }
749
+
750
+ # I call this algorithm "happy joinballs".
751
+ #
752
+ # I will not be taking questions at this time.
753
+ conn_tasks = targets.map do |t|
754
+ @async_task.async do |subtask|
755
+ logger.debug(logloc) { "Initiating happy joinballs connection to #{t[:address]}:#{t[:port]}" }
756
+
757
+ begin
758
+ conn = @network.connect(address: t[:address], port: t[:port])
759
+ rescue StandardError => ex
760
+ logger.warn(logloc) { "Failed to connect to #{t[:address]}:#{t[:port]}: #{ex.class} (#{ex.message})" }
761
+ if targets.length == 1
762
+ logger.warn(logloc) { "Cluster leader not responsive; restarting join attempt" }
763
+ join_or_create_cluster
764
+ end
765
+
766
+ next
767
+ end
768
+
769
+ # If we get here, we have won the happy joinballs race
770
+ conn_tasks.each do |ct|
771
+ next if ct == Async::Task.current
772
+
773
+ ct.stop
774
+ end
775
+
776
+ logger.debug(logloc) { "Sending a join request to #{conn.peer_info}" }
777
+ reply = subtask.with_timeout(5) do |t|
778
+ conn.rpc(Message::JoinRequest.new(node_info: node_info))
779
+ rescue Async::TimeoutError
780
+ nil
781
+ end
782
+
783
+ if reply&.success
784
+ logger.info(logloc) { "Joined cluster; #{reply.inspect}" }
785
+ become_follower
786
+ elsif reply&.leader_info
787
+ logger.debug(logloc) { "Redirected to leader #{reply.leader_info.inspect}" }
788
+ join_cluster_via([reply.leader_info])
789
+ else
790
+ logger.error(logloc) { "Cluster join via #{t.inspect} failed: #{reply.nil? ? "RPC timeout" : reply.inspect}" }
791
+ # Obviously that target is busticated, so we'll retry without it.
792
+ # The problem is that the busticated target might have been a
793
+ # leader we were erroneously redirected to; in that case, the
794
+ # targets list will have only one node, and we'll need to go
795
+ # back to joinballing everyone. Hopefully by now the cluster
796
+ # will have agreed on a *live* leader for us to join via.
797
+ if targets.length == 1
798
+ join_cluster_via(join_targets - [t])
799
+ else
800
+ join_cluster_via(targets - [t])
801
+ end
802
+ end
803
+ end
804
+ end
805
+
806
+ conn_tasks.each(&:wait)
807
+ end
808
+
809
+ def process_rpc_requests
810
+ logger.debug(logloc) { "Commencing to process RPC requests" }
811
+ @network.each_message do |msg, conn|
812
+ @metrics.messages_received.increment(labels: { type: msg.class.to_s.split("::").last })
813
+
814
+ logger.debug(logloc) { "Received #{msg} from #{conn.peer_info}" }
815
+ reply = case msg
816
+ when Message::AppendEntriesRequest
817
+ process_append_entries_request(msg, conn)
818
+ when Message::CommandRequest
819
+ process_command_request(msg, conn)
820
+ when Message::JoinRequest
821
+ process_join_request(msg, conn)
822
+ when Message::NodeRemovalRequest
823
+ process_node_removal_request(msg, conn)
824
+ when Message::ReadRequest
825
+ process_read_request(msg, conn)
826
+ when Message::VoteRequest
827
+ process_vote_request(msg, conn)
828
+ when Message::InstallSnapshotRequest
829
+ process_install_snapshot_request(msg, conn)
830
+ else
831
+ logger.warn(logloc) { "Unexpected #{msg.class.to_s.split("::").last} received from #{conn.peer_info}" }
832
+ nil
833
+ end
834
+
835
+ if reply
836
+ logger.debug(logloc) { "Sending reply #{reply.inspect} to #{conn.peer_info}" }
837
+ conn.send_reply(reply)
838
+ else
839
+ logger.warn(logloc) { "No immediate reply to #{msg.inspect} from #{conn.peer_info}" }
840
+ end
841
+ end
842
+ end
843
+
844
+ def process_join_request(msg, conn)
845
+ logger.debug(logloc) { "Join request #{msg.inspect} received from #{conn.peer_info}" }
846
+
847
+ if follower?
848
+ logger.debug(logloc) { "Not leader; redirecting" }
849
+ Message::JoinReply.new(success: false, leader_info: @leader_info)
850
+ elsif leader?
851
+ logger.debug(logloc) { "Queueing join request" }
852
+ @config_change_queue << ConfigChangeQueueEntry::AddNode.new(msg, conn)
853
+
854
+ if @config_change_queue.length == 1 && @config_change_request_in_progress.nil?
855
+ logger.debug(logloc) { "Triggering new config change queue cascade" }
856
+ process_config_change_queue
857
+ end
858
+
859
+ # No immediate reply; will be sent once the join is completed
860
+ nil
861
+ else
862
+ logger.debug(logloc) { "Ignoring join request from #{msg.node_info} because not leader or follower" }
863
+ nil
864
+ end
865
+ end
866
+
867
+ def process_node_removal_request(msg, conn)
868
+ logger.debug(logloc) { "Node removal request #{msg.inspect} received from #{conn.peer_info}" }
869
+
870
+ if follower?
871
+ logger.debug(logloc) { "Not leader; redirecting" }
872
+ Message::NodeRemovalReply.new(success: false, leader_info: @leader_info)
873
+ elsif leader?
874
+ logger.debug(logloc) { "Queueing node removal request" }
875
+ @config_change_queue << ConfigChangeQueueEntry::RemoveNode.new(msg, conn)
876
+
877
+ if @config_change_queue.length == 1
878
+ logger.debug(logloc) { "Triggering new config change queue cascade" }
879
+ process_config_change_queue
880
+ end
881
+
882
+ # No immediate reply; will be sent once the join is completed
883
+ nil
884
+ else
885
+ logger.debug(logloc) { "Ignoring node removal request from #{msg.node_info} because not leader or follower" }
886
+ nil
887
+ end
888
+ end
889
+
890
+ def process_config_change_queue
891
+ if @config_change_queue.empty?
892
+ logger.debug(logloc) { "No more entries in the config change queue" }
893
+ return
894
+ end
895
+
896
+ if @config_change_request_in_progress
897
+ logger.error(logloc) { "Change queue processing requested while change request in progress!" }
898
+ return
899
+ end
900
+
901
+ @config_change_request_in_progress = @config_change_queue.shift
902
+ logger.debug(logloc) { "Processing config change queue entry #{@config_change_request_in_progress.inspect}" }
903
+
904
+ unless leader?
905
+ @cc_sem.acquire do
906
+ @config_change_request_in_progress.send_redirect_reply(@leader_info)
907
+ @config_change_request_in_progress = nil
908
+ end
909
+ process_config_change_queue
910
+ return
911
+ end
912
+
913
+ case @config_change_request_in_progress
914
+ when ConfigChangeQueueEntry::AddNode
915
+ if @config.nodes.include?(@config_change_request_in_progress.node_info)
916
+ # "Dude, you're *already* part of the cluster! Duuuuuuuuuuuuuuude!"
917
+ @cc_sem.acquire do
918
+ @config_change_request_in_progress.send_successful_reply
919
+ @config_change_request_in_progress = nil
920
+ end
921
+ process_config_change_queue
922
+ else
923
+ logger.debug(logloc) { "Transitioning configuration to add #{@config_change_request_in_progress.node_info.inspect}" }
924
+
925
+ @config.add_node(@config_change_request_in_progress.node_info)
926
+ propose_log_entry(LogEntry::ClusterConfiguration.new(term: @current_term, config: @config))
927
+ @config_index = @log.last_index
928
+ end
929
+ when ConfigChangeQueueEntry::RemoveNode
930
+ if !@config.nodes.include?(@config_change_request_in_progress.node_info)
931
+ @cc_sem.acquire do
932
+ @config_change_request_in_progress.send_successful_reply
933
+ @config_change_request_in_progress = nil
934
+ end
935
+ process_config_change_queue
936
+ else
937
+ logger.debug(logloc) { "Transitioning configuration to remove #{@config_change_request_in_progress.node_info.inspect}" }
938
+
939
+ @config.remove_node(@config_change_request_in_progress.node_info)
940
+ propose_log_entry(LogEntry::ClusterConfiguration.new(term: @current_term, config: @config))
941
+ @config_index = @log.last_index
942
+ end
943
+ else
944
+ logger.error(logloc) { "Unsupported change request type #{@config_change_request_in_progress.class}; this really shouldn't ever happen, bug report welcome" }
945
+ logger.debug(logloc) { "Unsupported change request was #{@config_change_request_in_progress.inspect}" }
946
+ @config_change_request_in_progress = nil
947
+ process_config_change_queue
948
+ end
949
+ end
950
+
951
+ def process_append_entries_request(msg, conn)
952
+ logger.debug(logloc) { "Processing append_entries request #{msg.inspect} from #{conn.peer_info}" }
953
+
954
+ if msg.term < @current_term
955
+ logger.debug(logloc) { "AppendEntries request term less than our current term #{@current_term}" }
956
+ Message::AppendEntriesReply.new(success: false, term: @current_term)
957
+ else
958
+ @last_append = Time.now
959
+
960
+ if !@log.has_entry?(msg.prev_log_index)
961
+ logger.debug(logloc) { "We don't have log entry prev_log_index=#{msg.prev_log_index}; asking for more entries" }
962
+ Message::AppendEntriesReply.new(success: false, term: @current_term, last_index: @log.last_index)
963
+ elsif @log.snapshotted_entry?(msg.prev_log_index + 1)
964
+ logger.error(logloc) { "Got AppendEntriesRequest with a prev_log_index=#{msg.prev_log_index} that's buried in the snapshot" }
965
+ # Closing the connection to the leader will cause it to recycle the
966
+ # follower state, which will reset it to start sending us AppendEntries
967
+ # from the most recent entry.
968
+ conn.close
969
+ elsif msg.prev_log_term != @log.entry_term(msg.prev_log_index)
970
+ logger.debug(logloc) { "AppendEntries log fork; msg.prev_log_index=#{msg.prev_log_index} msg.prev_log_term=#{msg.prev_log_term} @log.entry_term(msg.prev_log_index=#{@log.entry_term(msg.prev_log_index)} @log.last_index=#{@log.last_index}" }
971
+ @log.truncate_to(msg.prev_log_index - 1)
972
+ Message::AppendEntriesReply.new(success: false, term: @current_term)
973
+ else
974
+ @leader_info = msg.leader_info
975
+
976
+ if msg.term > @current_term || (candidate? && msg.term == @current_term)
977
+ logger.debug(logloc) { "Received term-updating AppendEntries; msg.term=#{msg.term} @current_term=#{@current_term} node_info.mode=#{node_info.instance_variable_get(:@mode).inspect}" }
978
+ new_term(msg.term)
979
+ become_follower
980
+ end
981
+
982
+ @heartbeat_timeout_time = Time.now + @heartbeat_timeout.rand
983
+
984
+ msg.entries.each.with_index do |new_entry, i|
985
+ idx = msg.prev_log_index + i + 1 # Dratted 1-index addressing
986
+ process_log_entry(new_entry, idx)
987
+ end
988
+
989
+ new_commit_point = [@log.last_index, msg.leader_commit].min
990
+
991
+ if new_commit_point > @commit_index
992
+ commit_entries_to(new_commit_point)
993
+ end
994
+
995
+ Message::AppendEntriesReply.new(success: true, term: @current_term)
996
+ end
997
+ end
998
+ end
999
+
1000
+ def process_log_entry(entry, log_index)
1001
+ logger.debug(logloc) { "Processing #{entry.inspect} at log index #{log_index}" }
1002
+
1003
+ existing_entry = @log[log_index]
1004
+
1005
+ if existing_entry.nil?
1006
+ @log.append(entry)
1007
+
1008
+ persist_to_disk(process_log_entry: [entry, log_index])
1009
+
1010
+ # Configuration changes take place immediately, not after consensus;
1011
+ # raft.pdf p11, "a server always uses the latest configuration in its
1012
+ # log, regardless of whether the entry is committed".
1013
+ if LogEntry::ClusterConfiguration === entry
1014
+ logger.debug(logloc) { "Using new configuration from log entry ##{log_index}" }
1015
+ @config = entry.config
1016
+ @config_index = log_index
1017
+ end
1018
+ elsif existing_entry.term != entry.term
1019
+ logger.debug(logloc) { "Discovered fork at #{log_index} (existing_entry=#{existing_entry.inspect} new_entry=#{entry.inspect}); discarding our remaining log entries" }
1020
+ @log.truncate_to(log_index - 1)
1021
+ else
1022
+ logger.debug(logloc) { "Already got log entry ##{log_index}; skipping" }
1023
+ end
1024
+
1025
+ end
1026
+
1027
+ def commit_entries_to(idx)
1028
+ ((@commit_index + 1)..idx).each do |i|
1029
+ @sm_mutex.synchronize do
1030
+ logger.debug(logloc) { "Committing log entry ##{i}" }
1031
+
1032
+ if LogEntry::StateMachineCommand === @log[i]
1033
+ logger.debug(logloc) { "Applying state machine command #{@log[i].command}" }
1034
+ @state_machine.process_command(@log[i].command)
1035
+ @last_command_ids[@log[i].node_name] = @log[i].id
1036
+ else
1037
+ logger.debug(logloc) { "Entry ##{i} is a #{@log[i].class}; no commit action necessary" }
1038
+ end
1039
+
1040
+ @commit_index = i
1041
+ @metrics.commit_index.set(i)
1042
+ end
1043
+ end
1044
+
1045
+ persist_to_disk(commit_entries_to: [idx])
1046
+ end
1047
+
1048
+ def process_command_request(msg, conn)
1049
+ logger.debug(logloc) { "Command request #{msg.inspect} received from #{conn.peer_info}" }
1050
+
1051
+ if follower?
1052
+ Message::CommandReply.new(success: false, leader_info: @leader_info)
1053
+ elsif leader?
1054
+ if @last_command_ids[msg.node_name] == msg.id
1055
+ Message::CommandReply.new(success: true)
1056
+ else
1057
+ logger.debug(logloc) { "Noting that #{msg.id} is a command in progress" }
1058
+ @commands_in_progress[msg.id] = conn
1059
+ propose_log_entry(LogEntry::StateMachineCommand.new(term: @current_term, command: msg.command, id: msg.id, node_name: msg.node_name))
1060
+
1061
+ # Deferred reply to log entry commit will occur after replication is complete
1062
+ nil
1063
+ end
1064
+ else
1065
+ Message::CommandReply.new(success: false)
1066
+ end
1067
+ end
1068
+
1069
+ def process_vote_request(msg, conn)
1070
+ if Time.now - @last_append < @heartbeat_timeout.first
1071
+ # Avoid rogue servers disrupting the cluster by calling votes
1072
+ # just because they can.
1073
+ logger.debug(logloc) { "Ignoring vote request from scurvy rogue #{msg.candidate_info}" }
1074
+ return nil
1075
+ end
1076
+
1077
+ if msg.term > @current_term
1078
+ new_term(msg.term)
1079
+ end
1080
+
1081
+ if msg.term == @current_term &&
1082
+ (@voted_for.nil? || @voted_for == msg.candidate_info) &&
1083
+ ((msg.last_log_index >= @log.last_index && msg.last_log_term == @log.last_entry_term) || msg.last_log_term > @log.last_entry_term)
1084
+ @voted_for = msg.candidate_info
1085
+ become_follower
1086
+ logger.debug(logloc) { "Voted for #{msg.candidate_info.inspect} for term #{msg.term} leader" }
1087
+ Message::VoteReply.new(term: @current_term, vote_granted: true)
1088
+ else
1089
+ logger.debug(logloc) { "Rejected #{msg.candidate_info.inspect} for term #{msg.term} leader; @current_term=#{@current_term} @voted_for=#{@voted_for.inspect} msg.last_log_index=#{msg.last_log_index} @log.last_index=#{@log.last_index} msg.last_log_term=#{msg.last_log_term} @log.last_entry_term=#{@log.last_entry_term}" }
1090
+ Message::VoteReply.new(term: @current_term, vote_granted: false)
1091
+ end
1092
+ end
1093
+
1094
+ def process_read_request(msg, conn)
1095
+ if !leader?
1096
+ Message::ReadReply.new(success: false, leader_info: @leader_info)
1097
+ elsif @commit_index > msg.commit_index
1098
+ # We already *know* this is never going to succeed, may as well save ourselves
1099
+ # the hassle
1100
+ logger.debug(logloc) { "ReadRequest is for an out-of-date commit_index; nopeing out" }
1101
+ Message::ReadReply.new(success: false)
1102
+ elsif @config.nodes.length == 1
1103
+ # Flyin' solo!
1104
+ if @commit_index == msg.commit_index
1105
+ Message::ReadReply.new(success: true)
1106
+ else
1107
+ Message::ReadReply.new(success: false)
1108
+ end
1109
+ else
1110
+ responders = [node_info]
1111
+
1112
+ issue_append_entries_to_cluster do |reply, node_info|
1113
+ # responders will be set to nil when quorum has been met, so all remaining
1114
+ # AE replies can be quietly ignored
1115
+ next if responders.nil?
1116
+
1117
+ if reply.success
1118
+ responders << node_info
1119
+ logger.debug(logloc) { "Checking if #{responders.inspect} meets read request quorum" }
1120
+ if @config.quorum_met?(responders)
1121
+ logger.debug(logloc) { "Have met read request quorum; reply sent" }
1122
+ if @commit_index == msg.commit_index
1123
+ conn.send_reply(Message::ReadReply.new(success: true))
1124
+ else
1125
+ conn.send_reply(Message::ReadReply.new(success: false))
1126
+ end
1127
+ responders = nil
1128
+ else
1129
+ logger.debug(logloc) { "Not yet met read request quorum" }
1130
+ end
1131
+ end
1132
+ end
1133
+
1134
+ # Deferred reply
1135
+ nil
1136
+ end
1137
+ end
1138
+
1139
+ def process_install_snapshot_request(msg, conn)
1140
+ if msg.term < @current_term
1141
+ conn.send_reply(Message::InstallSnapshotReply.new(term: @current_term))
1142
+ return
1143
+ end
1144
+
1145
+ @sm_mutex.synchronize do
1146
+ @state_machine = @state_machine_class.new(snapshot: msg.data)
1147
+ @log.new_snapshot(msg.last_included_term, msg.last_included_index)
1148
+ @commit_index = msg.last_included_index
1149
+ end
1150
+
1151
+ conn.send_reply(Message::InstallSnapshotReply.new(term: @current_term))
1152
+ end
1153
+
1154
+ def trigger_election
1155
+ new_term(@current_term + 1)
1156
+ logger.debug(logloc) { "Initiating election for term #{@current_term}" }
1157
+ become_candidate
1158
+
1159
+ if @config.nodes.length == 1
1160
+ # Flyin' solo!
1161
+ logger.debug(logloc) { "No need for an election, as we're in single-node mode" }
1162
+ become_leader
1163
+ else
1164
+ election_term = @current_term
1165
+ electors = [node_info]
1166
+ @voted_for = node_info
1167
+
1168
+ logger.debug(logloc) { "Canvassing the electorate" }
1169
+ @config.nodes.each do |n|
1170
+ next if n == node_info
1171
+
1172
+ @async_task.async do
1173
+ logger.debug(logloc) { "Sending vote request to #{n.inspect}" }
1174
+ begin
1175
+ reply = @peers[n].rpc(Message::VoteRequest.new(term: election_term, candidate_info: node_info, last_log_index: @log.last_index, last_log_term: @log.last_entry_term))
1176
+ rescue => ex
1177
+ log_exception(ex) { "Failed to send vote to #{n.inspect}" }
1178
+ if @peers.key?(n)
1179
+ @peers[n].conn.close
1180
+ @peers.delete(n)
1181
+ end
1182
+ next
1183
+ end
1184
+
1185
+ if electors.nil?
1186
+ # No need to process a vote if we're not running an election at the moment
1187
+ next
1188
+ end
1189
+
1190
+ unless candidate?
1191
+ logger.debug(logloc) { "Received ballot from #{n.inspect}: #{reply.inspect} while in #{@mode} mode" }
1192
+ next
1193
+ end
1194
+
1195
+ logger.debug(logloc) { "Processing vote #{reply.inspect} from #{n.inspect}" }
1196
+ if reply.nil?
1197
+ logger.debug(logloc) { "Received no reply to vote from #{n.inspect}" }
1198
+ elsif reply.term > @current_term
1199
+ logger.debug(logloc) { "Received higher term from #{n.inspect}; canceling election" }
1200
+ new_term(reply.term)
1201
+ become_follower
1202
+ electors = nil
1203
+ elsif reply.vote_granted
1204
+ logger.debug(logloc) { "Received the vote of #{n.inspect}" }
1205
+ electors << n
1206
+
1207
+ logger.debug(logloc) { "Got #{electors.length} votes so far" }
1208
+
1209
+ if @config.quorum_met?(electors)
1210
+ become_leader
1211
+ electors = nil
1212
+ end
1213
+ end
1214
+ end
1215
+ end
1216
+ end
1217
+ end
1218
+ end
1219
+
1220
+ require_relative "./evinrude/backoff"
1221
+ require_relative "./evinrude/config_change_queue_entry/add_node"
1222
+ require_relative "./evinrude/config_change_queue_entry/remove_node"
1223
+ require_relative "./evinrude/cluster_configuration"
1224
+ require_relative "./evinrude/freedom_patches/range"
1225
+ require_relative "./evinrude/log"
1226
+ require_relative "./evinrude/log_entries"
1227
+ require_relative "./evinrude/messages"
1228
+ require_relative "./evinrude/metrics"
1229
+ require_relative "./evinrude/network"
1230
+ require_relative "./evinrude/node_info"
1231
+ require_relative "./evinrude/peer"
1232
+ require_relative "./evinrude/snapshot"
1233
+ require_relative "./evinrude/state_machine/register"