diskeyval 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/test.yml +31 -0
- package/LICENSE +7 -0
- package/PLAN.md +169 -0
- package/README.md +119 -0
- package/TESTING.md +94 -0
- package/example.ts +672 -0
- package/lib/index.ts +448 -0
- package/lib/raft/in-memory-network.ts +96 -0
- package/lib/raft/index.ts +24 -0
- package/lib/raft/node.ts +883 -0
- package/lib/raft/tls-transport.ts +451 -0
- package/lib/raft/types.ts +150 -0
- package/lib/storage/file-raft-storage.ts +84 -0
- package/package.json +23 -0
- package/test/e2e/cluster.e2e.test.ts +280 -0
- package/test/sim/cluster.sim.test.ts +105 -0
- package/test/soak/cluster.soak.test.ts +97 -0
- package/test/support/certs.ts +126 -0
- package/test/support/helpers.ts +51 -0
- package/test/support/ports.ts +33 -0
- package/test/support/tls-cluster.ts +84 -0
- package/test/unit/diskeyval.coverage.test.ts +193 -0
- package/test/unit/in-memory-network.coverage.test.ts +111 -0
- package/test/unit/raft.coverage.test.ts +534 -0
- package/test/unit/raft.election.test.ts +101 -0
- package/test/unit/raft.phase5.test.ts +90 -0
- package/test/unit/raft.reads.test.ts +96 -0
- package/test/unit/raft.replication.test.ts +106 -0
- package/test/unit/tls-transport.coverage.test.ts +332 -0
- package/test/unit/tls-transport.internals.coverage.test.ts +209 -0
package/lib/raft/node.ts
ADDED
|
@@ -0,0 +1,883 @@
|
|
|
1
|
+
import {
|
|
2
|
+
createNotLeaderError,
|
|
3
|
+
createQuorumUnavailableError,
|
|
4
|
+
type AppendEntriesRequest,
|
|
5
|
+
type AppendEntriesResponse,
|
|
6
|
+
type RaftLogEntry,
|
|
7
|
+
type RaftMetrics,
|
|
8
|
+
type RaftNodeOptions,
|
|
9
|
+
type RaftPersistentState,
|
|
10
|
+
type RaftRole,
|
|
11
|
+
type RequestVoteRequest,
|
|
12
|
+
type RequestVoteResponse
|
|
13
|
+
} from './types.ts';
|
|
14
|
+
|
|
15
|
+
const INITIAL_LOG_ENTRY: RaftLogEntry<unknown> = {
|
|
16
|
+
index: 0,
|
|
17
|
+
term: 0,
|
|
18
|
+
command: null
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
function delay(ms: number): Promise<void> {
|
|
22
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
type PendingProposal = {
|
|
26
|
+
resolve: () => void;
|
|
27
|
+
reject: (error: unknown) => void;
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
type RaftNodeEventMap<T> = {
|
|
31
|
+
'role-change': RaftRole;
|
|
32
|
+
leader: string | null;
|
|
33
|
+
commit: RaftLogEntry<T>;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
type EventKey<T> = keyof RaftNodeEventMap<T>;
|
|
37
|
+
type EventHandler<T, K extends EventKey<T>> = (payload: RaftNodeEventMap<T>[K]) => void;
|
|
38
|
+
|
|
39
|
+
function createEventBus<T>() {
|
|
40
|
+
const handlers = new Map<keyof RaftNodeEventMap<T>, Set<(payload: unknown) => void>>();
|
|
41
|
+
|
|
42
|
+
const on = <K extends EventKey<T>>(event: K, handler: EventHandler<T, K>): void => {
|
|
43
|
+
const existing = handlers.get(event);
|
|
44
|
+
if (existing) {
|
|
45
|
+
existing.add(handler as (payload: unknown) => void);
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
handlers.set(event, new Set([(handler as (payload: unknown) => void)]));
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const off = <K extends EventKey<T>>(event: K, handler: EventHandler<T, K>): void => {
|
|
53
|
+
const existing = handlers.get(event);
|
|
54
|
+
if (!existing) {
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
existing.delete(handler as (payload: unknown) => void);
|
|
59
|
+
if (existing.size === 0) {
|
|
60
|
+
handlers.delete(event);
|
|
61
|
+
}
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
const emit = <K extends EventKey<T>>(event: K, payload: RaftNodeEventMap<T>[K]): void => {
|
|
65
|
+
const existing = handlers.get(event);
|
|
66
|
+
if (!existing) {
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
for (const handler of existing) {
|
|
71
|
+
handler(payload);
|
|
72
|
+
}
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
on,
|
|
77
|
+
off,
|
|
78
|
+
emit
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function createRaftMetrics(): RaftMetrics {
|
|
83
|
+
return {
|
|
84
|
+
electionsStarted: 0,
|
|
85
|
+
leadershipsWon: 0,
|
|
86
|
+
requestVoteSent: 0,
|
|
87
|
+
requestVoteFailed: 0,
|
|
88
|
+
appendEntriesSent: 0,
|
|
89
|
+
appendEntriesFailed: 0,
|
|
90
|
+
appendEntriesRetries: 0,
|
|
91
|
+
proposalsReceived: 0,
|
|
92
|
+
proposalTimeouts: 0,
|
|
93
|
+
proposalsCommitted: 0,
|
|
94
|
+
readBarriersRequested: 0,
|
|
95
|
+
readBarriersFailed: 0,
|
|
96
|
+
commitsApplied: 0,
|
|
97
|
+
persistenceSaves: 0,
|
|
98
|
+
persistenceSaveFailures: 0,
|
|
99
|
+
persistenceLoads: 0,
|
|
100
|
+
persistenceLoadFailures: 0
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export type RaftNodeStateSnapshot = {
|
|
105
|
+
role: RaftRole;
|
|
106
|
+
currentTerm: number;
|
|
107
|
+
commitIndex: number;
|
|
108
|
+
lastApplied: number;
|
|
109
|
+
lastLogIndex: number;
|
|
110
|
+
leaderNodeId: string | null;
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
export type RaftNode<T = unknown> = {
|
|
114
|
+
nodeId: string;
|
|
115
|
+
peerIds: string[];
|
|
116
|
+
start: () => Promise<void>;
|
|
117
|
+
stop: () => Promise<void>;
|
|
118
|
+
isLeader: () => boolean;
|
|
119
|
+
leaderId: () => string | null;
|
|
120
|
+
getState: () => RaftNodeStateSnapshot;
|
|
121
|
+
getMetrics: () => RaftMetrics;
|
|
122
|
+
replacePeers: (peerIds: string[]) => void;
|
|
123
|
+
startElection: () => Promise<void>;
|
|
124
|
+
propose: (command: T) => Promise<void>;
|
|
125
|
+
readBarrier: () => Promise<void>;
|
|
126
|
+
onRequestVote: (fromNodeId: string, request: RequestVoteRequest) => Promise<RequestVoteResponse>;
|
|
127
|
+
onAppendEntries: (
|
|
128
|
+
fromNodeId: string,
|
|
129
|
+
request: AppendEntriesRequest<T>
|
|
130
|
+
) => Promise<AppendEntriesResponse>;
|
|
131
|
+
on: <K extends EventKey<T>>(event: K, handler: EventHandler<T, K>) => void;
|
|
132
|
+
off: <K extends EventKey<T>>(event: K, handler: EventHandler<T, K>) => void;
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
type RaftRuntimeContext<T> = {
|
|
136
|
+
nodeId: string;
|
|
137
|
+
peerIds: string[];
|
|
138
|
+
transport: RaftNodeOptions<T>['transport'];
|
|
139
|
+
applyCommand: RaftNodeOptions<T>['applyCommand'];
|
|
140
|
+
electionTimeoutMs: number;
|
|
141
|
+
heartbeatMs: number;
|
|
142
|
+
proposalTimeoutMs: number;
|
|
143
|
+
retryBackoffMs: number;
|
|
144
|
+
maxReplicationPassesPerRound: number;
|
|
145
|
+
storage?: RaftNodeOptions<T>['storage'];
|
|
146
|
+
random: () => number;
|
|
147
|
+
|
|
148
|
+
electionTimer: NodeJS.Timeout | null;
|
|
149
|
+
heartbeatTimer: NodeJS.Timeout | null;
|
|
150
|
+
|
|
151
|
+
running: boolean;
|
|
152
|
+
|
|
153
|
+
role: RaftRole;
|
|
154
|
+
currentTerm: number;
|
|
155
|
+
votedFor: string | null;
|
|
156
|
+
leaderNodeId: string | null;
|
|
157
|
+
|
|
158
|
+
log: RaftLogEntry<T>[];
|
|
159
|
+
commitIndex: number;
|
|
160
|
+
lastApplied: number;
|
|
161
|
+
|
|
162
|
+
nextIndex: Map<string, number>;
|
|
163
|
+
matchIndex: Map<string, number>;
|
|
164
|
+
|
|
165
|
+
pendingProposals: Map<number, PendingProposal>;
|
|
166
|
+
|
|
167
|
+
persistQueue: Promise<void>;
|
|
168
|
+
events: ReturnType<typeof createEventBus<T>>;
|
|
169
|
+
metrics: RaftMetrics;
|
|
170
|
+
};
|
|
171
|
+
|
|
172
|
+
function incrementMetric(context: RaftRuntimeContext<unknown>, key: keyof RaftMetrics, amount = 1): void {
|
|
173
|
+
context.metrics[key] += amount;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function lastLogIndex<T>(context: RaftRuntimeContext<T>): number {
|
|
177
|
+
return context.log.length - 1;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function lastLogTerm<T>(context: RaftRuntimeContext<T>): number {
|
|
181
|
+
return context.log[lastLogIndex(context)]?.term ?? 0;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function majorityCount<T>(context: RaftRuntimeContext<T>): number {
|
|
185
|
+
const totalNodes = context.peerIds.length + 1;
|
|
186
|
+
return Math.floor(totalNodes / 2) + 1;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function clearElectionTimer<T>(context: RaftRuntimeContext<T>): void {
|
|
190
|
+
if (!context.electionTimer) {
|
|
191
|
+
return;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
clearTimeout(context.electionTimer);
|
|
195
|
+
context.electionTimer = null;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
function clearHeartbeatTimer<T>(context: RaftRuntimeContext<T>): void {
|
|
199
|
+
if (!context.heartbeatTimer) {
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
clearInterval(context.heartbeatTimer);
|
|
204
|
+
context.heartbeatTimer = null;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
function resetElectionTimer<T>(context: RaftRuntimeContext<T>, startElectionFn: () => Promise<void>): void {
|
|
208
|
+
if (!context.running) {
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
clearElectionTimer(context);
|
|
213
|
+
const jitter = Math.floor(context.random() * context.electionTimeoutMs);
|
|
214
|
+
context.electionTimer = setTimeout(() => {
|
|
215
|
+
void startElectionFn();
|
|
216
|
+
}, context.electionTimeoutMs + jitter);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
function logMatches<T>(context: RaftRuntimeContext<T>, index: number, term: number): boolean {
|
|
220
|
+
const entry = context.log[index];
|
|
221
|
+
return !!entry && entry.term === term;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function isCandidateLogUpToDate<T>(
|
|
225
|
+
context: RaftRuntimeContext<T>,
|
|
226
|
+
candidateLastLogIndex: number,
|
|
227
|
+
candidateLastLogTerm: number
|
|
228
|
+
): boolean {
|
|
229
|
+
const localLastLogTerm = lastLogTerm(context);
|
|
230
|
+
if (candidateLastLogTerm !== localLastLogTerm) {
|
|
231
|
+
return candidateLastLogTerm > localLastLogTerm;
|
|
232
|
+
}
|
|
233
|
+
return candidateLastLogIndex >= lastLogIndex(context);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function persistentStateFromContext<T>(context: RaftRuntimeContext<T>): RaftPersistentState<T> {
|
|
237
|
+
return {
|
|
238
|
+
currentTerm: context.currentTerm,
|
|
239
|
+
votedFor: context.votedFor,
|
|
240
|
+
log: context.log,
|
|
241
|
+
commitIndex: context.commitIndex
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function schedulePersist<T>(context: RaftRuntimeContext<T>): void {
|
|
246
|
+
if (!context.storage) {
|
|
247
|
+
return;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const state = persistentStateFromContext(context);
|
|
251
|
+
context.persistQueue = context.persistQueue
|
|
252
|
+
.then(async () => {
|
|
253
|
+
await context.storage?.save(state);
|
|
254
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'persistenceSaves');
|
|
255
|
+
})
|
|
256
|
+
.catch(() => {
|
|
257
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'persistenceSaveFailures');
|
|
258
|
+
});
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function mergeEntries<T>(
|
|
262
|
+
context: RaftRuntimeContext<T>,
|
|
263
|
+
prevLogIndex: number,
|
|
264
|
+
incomingEntries: RaftLogEntry<T>[]
|
|
265
|
+
): void {
|
|
266
|
+
let writeIndex = prevLogIndex + 1;
|
|
267
|
+
|
|
268
|
+
for (const incomingEntry of incomingEntries) {
|
|
269
|
+
const currentEntry = context.log[writeIndex];
|
|
270
|
+
if (currentEntry && currentEntry.term !== incomingEntry.term) {
|
|
271
|
+
context.log.splice(writeIndex);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
if (!context.log[writeIndex]) {
|
|
275
|
+
context.log[writeIndex] = {
|
|
276
|
+
index: writeIndex,
|
|
277
|
+
term: incomingEntry.term,
|
|
278
|
+
command: incomingEntry.command
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
writeIndex += 1;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function applyCommittedEntries<T>(context: RaftRuntimeContext<T>, emitEvents: boolean): void {
|
|
287
|
+
while (context.lastApplied < context.commitIndex) {
|
|
288
|
+
context.lastApplied += 1;
|
|
289
|
+
const entry = context.log[context.lastApplied];
|
|
290
|
+
context.applyCommand(entry);
|
|
291
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'commitsApplied');
|
|
292
|
+
if (emitEvents) {
|
|
293
|
+
context.events.emit('commit', entry);
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function resolveCommittedProposals<T>(context: RaftRuntimeContext<T>): void {
|
|
299
|
+
for (const [entryIndex, pendingProposal] of context.pendingProposals.entries()) {
|
|
300
|
+
if (entryIndex > context.commitIndex) {
|
|
301
|
+
continue;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
pendingProposal.resolve();
|
|
305
|
+
context.pendingProposals.delete(entryIndex);
|
|
306
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'proposalsCommitted');
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function transitionToFollower<T>(
|
|
311
|
+
context: RaftRuntimeContext<T>,
|
|
312
|
+
term: number,
|
|
313
|
+
leaderNodeId: string | null,
|
|
314
|
+
startElectionFn: () => Promise<void>
|
|
315
|
+
): void {
|
|
316
|
+
if (term > context.currentTerm) {
|
|
317
|
+
context.currentTerm = term;
|
|
318
|
+
context.votedFor = null;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
context.role = 'follower';
|
|
322
|
+
context.leaderNodeId = leaderNodeId;
|
|
323
|
+
|
|
324
|
+
clearHeartbeatTimer(context);
|
|
325
|
+
resetElectionTimer(context, startElectionFn);
|
|
326
|
+
|
|
327
|
+
context.events.emit('role-change', context.role);
|
|
328
|
+
context.events.emit('leader', context.leaderNodeId);
|
|
329
|
+
|
|
330
|
+
for (const [entryIndex, pendingProposal] of context.pendingProposals.entries()) {
|
|
331
|
+
if (entryIndex <= context.commitIndex) {
|
|
332
|
+
continue;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
pendingProposal.reject(createNotLeaderError('Leader stepped down before commit', context.leaderNodeId));
|
|
336
|
+
context.pendingProposals.delete(entryIndex);
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
schedulePersist(context);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function transitionToCandidate<T>(
|
|
343
|
+
context: RaftRuntimeContext<T>,
|
|
344
|
+
startElectionFn: () => Promise<void>
|
|
345
|
+
): void {
|
|
346
|
+
context.role = 'candidate';
|
|
347
|
+
context.currentTerm += 1;
|
|
348
|
+
context.votedFor = context.nodeId;
|
|
349
|
+
context.leaderNodeId = null;
|
|
350
|
+
|
|
351
|
+
clearHeartbeatTimer(context);
|
|
352
|
+
resetElectionTimer(context, startElectionFn);
|
|
353
|
+
|
|
354
|
+
context.events.emit('role-change', context.role);
|
|
355
|
+
context.events.emit('leader', context.leaderNodeId);
|
|
356
|
+
|
|
357
|
+
schedulePersist(context);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function createAppendEntriesRequest<T>(
|
|
361
|
+
context: RaftRuntimeContext<T>,
|
|
362
|
+
prevLogIndex: number,
|
|
363
|
+
entries: RaftLogEntry<T>[]
|
|
364
|
+
): AppendEntriesRequest<T> {
|
|
365
|
+
return {
|
|
366
|
+
term: context.currentTerm,
|
|
367
|
+
leaderId: context.nodeId,
|
|
368
|
+
prevLogIndex,
|
|
369
|
+
prevLogTerm: context.log[prevLogIndex]?.term ?? 0,
|
|
370
|
+
entries,
|
|
371
|
+
leaderCommit: context.commitIndex
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
async function replicateToPeer<T>(
|
|
376
|
+
context: RaftRuntimeContext<T>,
|
|
377
|
+
peerId: string,
|
|
378
|
+
startElectionFn: () => Promise<void>
|
|
379
|
+
): Promise<void> {
|
|
380
|
+
if (context.role !== 'leader') {
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
for (let pass = 0; pass < context.maxReplicationPassesPerRound; pass += 1) {
|
|
385
|
+
if (context.role !== 'leader') {
|
|
386
|
+
return;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
const nextPeerIndex = context.nextIndex.get(peerId) ?? lastLogIndex(context) + 1;
|
|
390
|
+
const entries = context.log.slice(nextPeerIndex);
|
|
391
|
+
const request = createAppendEntriesRequest(context, nextPeerIndex - 1, entries);
|
|
392
|
+
|
|
393
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'appendEntriesSent');
|
|
394
|
+
|
|
395
|
+
try {
|
|
396
|
+
const response = await context.transport.appendEntries(context.nodeId, peerId, request);
|
|
397
|
+
|
|
398
|
+
if (response.term > context.currentTerm) {
|
|
399
|
+
transitionToFollower(context, response.term, null, startElectionFn);
|
|
400
|
+
return;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
if (context.role !== 'leader') {
|
|
404
|
+
return;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
if (response.success) {
|
|
408
|
+
context.matchIndex.set(peerId, response.matchIndex);
|
|
409
|
+
context.nextIndex.set(peerId, response.matchIndex + 1);
|
|
410
|
+
|
|
411
|
+
if (response.matchIndex >= lastLogIndex(context)) {
|
|
412
|
+
return;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'appendEntriesRetries');
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
context.nextIndex.set(peerId, Math.max(1, nextPeerIndex - 1));
|
|
420
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'appendEntriesRetries');
|
|
421
|
+
await delay(context.retryBackoffMs);
|
|
422
|
+
} catch {
|
|
423
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'appendEntriesFailed');
|
|
424
|
+
await delay(context.retryBackoffMs);
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function advanceCommitIndex<T>(context: RaftRuntimeContext<T>): void {
|
|
430
|
+
if (context.role !== 'leader') {
|
|
431
|
+
return;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
for (let entryIndex = lastLogIndex(context); entryIndex > context.commitIndex; entryIndex -= 1) {
|
|
435
|
+
const entry = context.log[entryIndex];
|
|
436
|
+
if (!entry || entry.term !== context.currentTerm) {
|
|
437
|
+
continue;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
let replicated = 1;
|
|
441
|
+
for (const peerId of context.peerIds) {
|
|
442
|
+
const peerMatchIndex = context.matchIndex.get(peerId) ?? 0;
|
|
443
|
+
if (peerMatchIndex >= entryIndex) {
|
|
444
|
+
replicated += 1;
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
if (replicated < majorityCount(context)) {
|
|
449
|
+
continue;
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
context.commitIndex = entryIndex;
|
|
453
|
+
applyCommittedEntries(context, true);
|
|
454
|
+
resolveCommittedProposals(context);
|
|
455
|
+
schedulePersist(context);
|
|
456
|
+
break;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
function ensureValidLoadedLog<T>(loadedLog: RaftLogEntry<T>[]): RaftLogEntry<T>[] {
|
|
461
|
+
if (loadedLog.length === 0) {
|
|
462
|
+
return [INITIAL_LOG_ENTRY as RaftLogEntry<T>];
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
const first = loadedLog[0];
|
|
466
|
+
if (first.index === 0 && first.term === 0) {
|
|
467
|
+
return loadedLog;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
return [INITIAL_LOG_ENTRY as RaftLogEntry<T>, ...loadedLog];
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
export function createRaftNode<T>(options: RaftNodeOptions<T>): RaftNode<T> {
|
|
474
|
+
const context: RaftRuntimeContext<T> = {
|
|
475
|
+
nodeId: options.nodeId,
|
|
476
|
+
peerIds: [...options.peerIds],
|
|
477
|
+
transport: options.transport,
|
|
478
|
+
applyCommand: options.applyCommand,
|
|
479
|
+
electionTimeoutMs: options.electionTimeoutMs ?? 200,
|
|
480
|
+
heartbeatMs: options.heartbeatMs ?? 50,
|
|
481
|
+
proposalTimeoutMs: options.proposalTimeoutMs ?? 1_000,
|
|
482
|
+
retryBackoffMs: options.retryBackoffMs ?? 5,
|
|
483
|
+
maxReplicationPassesPerRound: options.maxReplicationPassesPerRound ?? 3,
|
|
484
|
+
storage: options.storage,
|
|
485
|
+
random: options.random ?? Math.random,
|
|
486
|
+
|
|
487
|
+
electionTimer: null,
|
|
488
|
+
heartbeatTimer: null,
|
|
489
|
+
|
|
490
|
+
running: false,
|
|
491
|
+
|
|
492
|
+
role: 'follower',
|
|
493
|
+
currentTerm: 0,
|
|
494
|
+
votedFor: null,
|
|
495
|
+
leaderNodeId: null,
|
|
496
|
+
|
|
497
|
+
log: [INITIAL_LOG_ENTRY as RaftLogEntry<T>],
|
|
498
|
+
commitIndex: 0,
|
|
499
|
+
lastApplied: 0,
|
|
500
|
+
|
|
501
|
+
nextIndex: new Map<string, number>(),
|
|
502
|
+
matchIndex: new Map<string, number>(),
|
|
503
|
+
|
|
504
|
+
pendingProposals: new Map<number, PendingProposal>(),
|
|
505
|
+
|
|
506
|
+
persistQueue: Promise.resolve(),
|
|
507
|
+
events: createEventBus<T>(),
|
|
508
|
+
metrics: createRaftMetrics()
|
|
509
|
+
};
|
|
510
|
+
|
|
511
|
+
const replicateRound = async (): Promise<void> => {
|
|
512
|
+
if (context.role !== 'leader') {
|
|
513
|
+
return;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
await Promise.all(
|
|
517
|
+
context.peerIds.map(async (peerId) => replicateToPeer(context, peerId, startElection))
|
|
518
|
+
);
|
|
519
|
+
|
|
520
|
+
advanceCommitIndex(context);
|
|
521
|
+
};
|
|
522
|
+
|
|
523
|
+
const replacePeers = (incomingPeerIds: string[]): void => {
|
|
524
|
+
const unique = Array.from(new Set(incomingPeerIds.filter((peerId) => peerId !== context.nodeId)));
|
|
525
|
+
const previous = new Set(context.peerIds);
|
|
526
|
+
const next = new Set(unique);
|
|
527
|
+
|
|
528
|
+
for (const peerId of context.peerIds) {
|
|
529
|
+
if (next.has(peerId)) {
|
|
530
|
+
continue;
|
|
531
|
+
}
|
|
532
|
+
context.nextIndex.delete(peerId);
|
|
533
|
+
context.matchIndex.delete(peerId);
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
context.peerIds.splice(0, context.peerIds.length, ...unique);
|
|
537
|
+
|
|
538
|
+
for (const peerId of context.peerIds) {
|
|
539
|
+
if (previous.has(peerId)) {
|
|
540
|
+
continue;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
if (context.role === 'leader') {
|
|
544
|
+
context.nextIndex.set(peerId, 1);
|
|
545
|
+
context.matchIndex.set(peerId, 0);
|
|
546
|
+
} else {
|
|
547
|
+
context.nextIndex.set(peerId, lastLogIndex(context) + 1);
|
|
548
|
+
context.matchIndex.set(peerId, 0);
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
};
|
|
552
|
+
|
|
553
|
+
const startElection = async (): Promise<void> => {
|
|
554
|
+
if (!context.running) {
|
|
555
|
+
return;
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'electionsStarted');
|
|
559
|
+
transitionToCandidate(context, startElection);
|
|
560
|
+
|
|
561
|
+
const request: RequestVoteRequest = {
|
|
562
|
+
term: context.currentTerm,
|
|
563
|
+
candidateId: context.nodeId,
|
|
564
|
+
lastLogIndex: lastLogIndex(context),
|
|
565
|
+
lastLogTerm: lastLogTerm(context)
|
|
566
|
+
};
|
|
567
|
+
|
|
568
|
+
let votes = 1;
|
|
569
|
+
const majority = majorityCount(context);
|
|
570
|
+
|
|
571
|
+
const becomeLeader = (): void => {
|
|
572
|
+
context.role = 'leader';
|
|
573
|
+
context.leaderNodeId = context.nodeId;
|
|
574
|
+
clearElectionTimer(context);
|
|
575
|
+
|
|
576
|
+
const nextLogIndex = lastLogIndex(context) + 1;
|
|
577
|
+
context.matchIndex.set(context.nodeId, lastLogIndex(context));
|
|
578
|
+
for (const followerId of context.peerIds) {
|
|
579
|
+
context.nextIndex.set(followerId, nextLogIndex);
|
|
580
|
+
context.matchIndex.set(followerId, 0);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
clearHeartbeatTimer(context);
|
|
584
|
+
context.heartbeatTimer = setInterval(() => {
|
|
585
|
+
void replicateRound();
|
|
586
|
+
}, context.heartbeatMs);
|
|
587
|
+
|
|
588
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'leadershipsWon');
|
|
589
|
+
context.events.emit('role-change', context.role);
|
|
590
|
+
context.events.emit('leader', context.leaderNodeId);
|
|
591
|
+
|
|
592
|
+
void replicateRound();
|
|
593
|
+
void replicateRound();
|
|
594
|
+
};
|
|
595
|
+
|
|
596
|
+
if (votes >= majority && context.role === 'candidate') {
|
|
597
|
+
becomeLeader();
|
|
598
|
+
return;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
await Promise.all(
|
|
602
|
+
context.peerIds.map(async (peerId) => {
|
|
603
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'requestVoteSent');
|
|
604
|
+
|
|
605
|
+
try {
|
|
606
|
+
const response = await context.transport.requestVote(context.nodeId, peerId, request);
|
|
607
|
+
|
|
608
|
+
if (response.term > context.currentTerm) {
|
|
609
|
+
transitionToFollower(context, response.term, null, startElection);
|
|
610
|
+
return;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
if (context.role !== 'candidate' || context.currentTerm !== request.term) {
|
|
614
|
+
return;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
if (!response.voteGranted) {
|
|
618
|
+
return;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
votes += 1;
|
|
622
|
+
if (votes < majority || context.role !== 'candidate') {
|
|
623
|
+
return;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
becomeLeader();
|
|
627
|
+
} catch {
|
|
628
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'requestVoteFailed');
|
|
629
|
+
}
|
|
630
|
+
})
|
|
631
|
+
);
|
|
632
|
+
};
|
|
633
|
+
|
|
634
|
+
const start = async (): Promise<void> => {
|
|
635
|
+
if (context.running) {
|
|
636
|
+
return;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
if (context.storage) {
|
|
640
|
+
try {
|
|
641
|
+
const loaded = await context.storage.load();
|
|
642
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'persistenceLoads');
|
|
643
|
+
|
|
644
|
+
if (loaded) {
|
|
645
|
+
context.currentTerm = loaded.currentTerm;
|
|
646
|
+
context.votedFor = loaded.votedFor;
|
|
647
|
+
context.log = ensureValidLoadedLog(loaded.log);
|
|
648
|
+
context.commitIndex = Math.min(loaded.commitIndex, lastLogIndex(context));
|
|
649
|
+
context.lastApplied = 0;
|
|
650
|
+
applyCommittedEntries(context, false);
|
|
651
|
+
}
|
|
652
|
+
} catch {
|
|
653
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'persistenceLoadFailures');
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
context.running = true;
|
|
658
|
+
resetElectionTimer(context, startElection);
|
|
659
|
+
};
|
|
660
|
+
|
|
661
|
+
const stop = async (): Promise<void> => {
|
|
662
|
+
context.running = false;
|
|
663
|
+
clearElectionTimer(context);
|
|
664
|
+
clearHeartbeatTimer(context);
|
|
665
|
+
|
|
666
|
+
for (const pendingProposal of context.pendingProposals.values()) {
|
|
667
|
+
pendingProposal.reject(new Error('Node stopped before proposal committed'));
|
|
668
|
+
}
|
|
669
|
+
context.pendingProposals.clear();
|
|
670
|
+
|
|
671
|
+
await context.persistQueue;
|
|
672
|
+
};
|
|
673
|
+
|
|
674
|
+
const propose = async (command: T): Promise<void> => {
|
|
675
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'proposalsReceived');
|
|
676
|
+
|
|
677
|
+
if (context.role !== 'leader') {
|
|
678
|
+
throw createNotLeaderError('Writes must be handled by the current leader', context.leaderNodeId);
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
const entryIndex = context.log.length;
|
|
682
|
+
const entry: RaftLogEntry<T> = {
|
|
683
|
+
index: entryIndex,
|
|
684
|
+
term: context.currentTerm,
|
|
685
|
+
command
|
|
686
|
+
};
|
|
687
|
+
|
|
688
|
+
context.log.push(entry);
|
|
689
|
+
context.matchIndex.set(context.nodeId, lastLogIndex(context));
|
|
690
|
+
schedulePersist(context);
|
|
691
|
+
|
|
692
|
+
await new Promise<void>(async (resolve, reject) => {
|
|
693
|
+
context.pendingProposals.set(entryIndex, { resolve, reject });
|
|
694
|
+
const deadline = Date.now() + context.proposalTimeoutMs;
|
|
695
|
+
|
|
696
|
+
try {
|
|
697
|
+
while (context.commitIndex < entryIndex) {
|
|
698
|
+
if (context.role !== 'leader') {
|
|
699
|
+
throw createNotLeaderError('Leadership changed before commit', context.leaderNodeId);
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
if (Date.now() > deadline) {
|
|
703
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'proposalTimeouts');
|
|
704
|
+
throw createQuorumUnavailableError('Timed out waiting for majority replication');
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
await replicateRound();
|
|
708
|
+
await delay(5);
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
await replicateRound();
|
|
712
|
+
} catch (error) {
|
|
713
|
+
const pending = context.pendingProposals.get(entryIndex);
|
|
714
|
+
if (!pending) {
|
|
715
|
+
return;
|
|
716
|
+
}
|
|
717
|
+
|
|
718
|
+
context.pendingProposals.delete(entryIndex);
|
|
719
|
+
pending.reject(error);
|
|
720
|
+
}
|
|
721
|
+
});
|
|
722
|
+
};
|
|
723
|
+
|
|
724
|
+
const readBarrier = async (): Promise<void> => {
|
|
725
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'readBarriersRequested');
|
|
726
|
+
|
|
727
|
+
if (context.role !== 'leader') {
|
|
728
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'readBarriersFailed');
|
|
729
|
+
throw createNotLeaderError('Linearizable read requires leader', context.leaderNodeId);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
const readTerm = context.currentTerm;
|
|
733
|
+
let acknowledgements = 1;
|
|
734
|
+
const request = createAppendEntriesRequest(context, lastLogIndex(context), []);
|
|
735
|
+
|
|
736
|
+
await Promise.all(
|
|
737
|
+
context.peerIds.map(async (peerId) => {
|
|
738
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'appendEntriesSent');
|
|
739
|
+
|
|
740
|
+
try {
|
|
741
|
+
const response = await context.transport.appendEntries(context.nodeId, peerId, request);
|
|
742
|
+
|
|
743
|
+
if (response.term > context.currentTerm) {
|
|
744
|
+
transitionToFollower(context, response.term, null, startElection);
|
|
745
|
+
return;
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
if (context.currentTerm !== readTerm || context.role !== 'leader') {
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
if (response.success) {
|
|
753
|
+
acknowledgements += 1;
|
|
754
|
+
}
|
|
755
|
+
} catch {
|
|
756
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'appendEntriesFailed');
|
|
757
|
+
}
|
|
758
|
+
})
|
|
759
|
+
);
|
|
760
|
+
|
|
761
|
+
if (context.currentTerm !== readTerm || context.role !== 'leader') {
|
|
762
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'readBarriersFailed');
|
|
763
|
+
throw createNotLeaderError('Leadership changed during read barrier', context.leaderNodeId);
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
if (acknowledgements < majorityCount(context)) {
|
|
767
|
+
incrementMetric(context as unknown as RaftRuntimeContext<unknown>, 'readBarriersFailed');
|
|
768
|
+
throw createQuorumUnavailableError('Quorum unavailable for linearizable read');
|
|
769
|
+
}
|
|
770
|
+
};
|
|
771
|
+
|
|
772
|
+
const onRequestVote = async (
|
|
773
|
+
_fromNodeId: string,
|
|
774
|
+
request: RequestVoteRequest
|
|
775
|
+
): Promise<RequestVoteResponse> => {
|
|
776
|
+
if (!context.running) {
|
|
777
|
+
throw new Error(`Node ${context.nodeId} is stopped`);
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
if (request.term < context.currentTerm) {
|
|
781
|
+
return {
|
|
782
|
+
term: context.currentTerm,
|
|
783
|
+
voteGranted: false
|
|
784
|
+
};
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
if (request.term > context.currentTerm) {
|
|
788
|
+
transitionToFollower(context, request.term, null, startElection);
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
const candidateUpToDate = isCandidateLogUpToDate(
|
|
792
|
+
context,
|
|
793
|
+
request.lastLogIndex,
|
|
794
|
+
request.lastLogTerm
|
|
795
|
+
);
|
|
796
|
+
const canVote = context.votedFor === null || context.votedFor === request.candidateId;
|
|
797
|
+
const voteGranted = canVote && candidateUpToDate;
|
|
798
|
+
|
|
799
|
+
if (voteGranted) {
|
|
800
|
+
context.votedFor = request.candidateId;
|
|
801
|
+
resetElectionTimer(context, startElection);
|
|
802
|
+
schedulePersist(context);
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
return {
|
|
806
|
+
term: context.currentTerm,
|
|
807
|
+
voteGranted
|
|
808
|
+
};
|
|
809
|
+
};
|
|
810
|
+
|
|
811
|
+
const onAppendEntries = async (
|
|
812
|
+
_fromNodeId: string,
|
|
813
|
+
request: AppendEntriesRequest<T>
|
|
814
|
+
): Promise<AppendEntriesResponse> => {
|
|
815
|
+
if (!context.running) {
|
|
816
|
+
throw new Error(`Node ${context.nodeId} is stopped`);
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
if (request.term < context.currentTerm) {
|
|
820
|
+
return {
|
|
821
|
+
term: context.currentTerm,
|
|
822
|
+
success: false,
|
|
823
|
+
matchIndex: lastLogIndex(context)
|
|
824
|
+
};
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
if (request.term > context.currentTerm || context.role !== 'follower') {
|
|
828
|
+
transitionToFollower(context, request.term, request.leaderId, startElection);
|
|
829
|
+
}
|
|
830
|
+
|
|
831
|
+
context.leaderNodeId = request.leaderId;
|
|
832
|
+
resetElectionTimer(context, startElection);
|
|
833
|
+
|
|
834
|
+
if (!logMatches(context, request.prevLogIndex, request.prevLogTerm)) {
|
|
835
|
+
return {
|
|
836
|
+
term: context.currentTerm,
|
|
837
|
+
success: false,
|
|
838
|
+
matchIndex: lastLogIndex(context)
|
|
839
|
+
};
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
mergeEntries(context, request.prevLogIndex, request.entries);
|
|
843
|
+
|
|
844
|
+
if (request.leaderCommit > context.commitIndex) {
|
|
845
|
+
context.commitIndex = Math.min(request.leaderCommit, lastLogIndex(context));
|
|
846
|
+
applyCommittedEntries(context, true);
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
schedulePersist(context);
|
|
850
|
+
|
|
851
|
+
return {
|
|
852
|
+
term: context.currentTerm,
|
|
853
|
+
success: true,
|
|
854
|
+
matchIndex: lastLogIndex(context)
|
|
855
|
+
};
|
|
856
|
+
};
|
|
857
|
+
|
|
858
|
+
return {
|
|
859
|
+
nodeId: context.nodeId,
|
|
860
|
+
peerIds: context.peerIds,
|
|
861
|
+
start,
|
|
862
|
+
stop,
|
|
863
|
+
isLeader: () => context.role === 'leader',
|
|
864
|
+
leaderId: () => context.leaderNodeId,
|
|
865
|
+
getState: () => ({
|
|
866
|
+
role: context.role,
|
|
867
|
+
currentTerm: context.currentTerm,
|
|
868
|
+
commitIndex: context.commitIndex,
|
|
869
|
+
lastApplied: context.lastApplied,
|
|
870
|
+
lastLogIndex: lastLogIndex(context),
|
|
871
|
+
leaderNodeId: context.leaderNodeId
|
|
872
|
+
}),
|
|
873
|
+
getMetrics: () => ({ ...context.metrics }),
|
|
874
|
+
replacePeers,
|
|
875
|
+
startElection,
|
|
876
|
+
propose,
|
|
877
|
+
readBarrier,
|
|
878
|
+
onRequestVote,
|
|
879
|
+
onAppendEntries,
|
|
880
|
+
on: context.events.on,
|
|
881
|
+
off: context.events.off
|
|
882
|
+
};
|
|
883
|
+
}
|