@peerbit/shared-log 13.1.0 → 13.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/join-backfill-repair.d.ts +2 -0
- package/dist/benchmark/join-backfill-repair.d.ts.map +1 -0
- package/dist/benchmark/join-backfill-repair.js +288 -0
- package/dist/benchmark/join-backfill-repair.js.map +1 -0
- package/dist/src/index.d.ts +38 -2
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +989 -119
- package/dist/src/index.js.map +1 -1
- package/dist/src/pid.d.ts.map +1 -1
- package/dist/src/pid.js +16 -2
- package/dist/src/pid.js.map +1 -1
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +8 -1
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/simple.d.ts +7 -0
- package/dist/src/sync/simple.d.ts.map +1 -1
- package/dist/src/sync/simple.js +71 -22
- package/dist/src/sync/simple.js.map +1 -1
- package/package.json +8 -7
- package/src/index.ts +1347 -230
- package/src/pid.ts +22 -2
- package/src/ranges.ts +9 -1
- package/src/sync/simple.ts +56 -23
package/src/index.ts
CHANGED
|
@@ -168,7 +168,7 @@ import type {
|
|
|
168
168
|
Syncronizer,
|
|
169
169
|
} from "./sync/index.js";
|
|
170
170
|
import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
|
|
171
|
-
import { SimpleSyncronizer } from "./sync/simple.js";
|
|
171
|
+
import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
|
|
172
172
|
import { groupByGid } from "./utils.js";
|
|
173
173
|
|
|
174
174
|
const toLocalPublicSignKey = (
|
|
@@ -468,6 +468,7 @@ export type SharedLogOptions<
|
|
|
468
468
|
waitForReplicatorRequestMaxAttempts?: number;
|
|
469
469
|
waitForPruneDelay?: number;
|
|
470
470
|
distributionDebounceTime?: number;
|
|
471
|
+
strictFullReplicaFallback?: boolean;
|
|
471
472
|
compatibility?: number;
|
|
472
473
|
domain?: ReplicationDomainConstructor<D>;
|
|
473
474
|
eagerBlocks?: boolean | { cacheSize?: number };
|
|
@@ -513,10 +514,143 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
|
|
|
513
514
|
// Churn/join repair can race with pruning and transient missed sync requests under
|
|
514
515
|
// heavy event-loop load. Keep retries alive with a longer tail so reassigned
|
|
515
516
|
// entries are retried after short bursts and slower recovery windows.
|
|
516
|
-
const
|
|
517
|
+
const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
|
|
517
518
|
0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
|
|
518
519
|
];
|
|
519
|
-
const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
|
|
520
|
+
const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
|
|
521
|
+
0,
|
|
522
|
+
1_000,
|
|
523
|
+
3_000,
|
|
524
|
+
7_000,
|
|
525
|
+
15_000,
|
|
526
|
+
30_000,
|
|
527
|
+
60_000,
|
|
528
|
+
];
|
|
529
|
+
const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
|
|
530
|
+
0,
|
|
531
|
+
1_000,
|
|
532
|
+
3_000,
|
|
533
|
+
7_000,
|
|
534
|
+
15_000,
|
|
535
|
+
30_000,
|
|
536
|
+
60_000,
|
|
537
|
+
];
|
|
538
|
+
const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
|
|
539
|
+
const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
|
|
540
|
+
const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
|
|
541
|
+
JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
|
|
542
|
+
7_000,
|
|
543
|
+
15_000,
|
|
544
|
+
30_000,
|
|
545
|
+
];
|
|
546
|
+
const APPEND_BACKFILL_DELAY_MS = 500;
|
|
547
|
+
const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
|
|
548
|
+
const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
|
|
549
|
+
|
|
550
|
+
type RepairDispatchMode =
|
|
551
|
+
| "join-warmup"
|
|
552
|
+
| "join-authoritative"
|
|
553
|
+
| "append-backfill"
|
|
554
|
+
| "churn";
|
|
555
|
+
type RepairTransportMode = "rateless" | "simple";
|
|
556
|
+
type RepairMetricBucket = {
|
|
557
|
+
dispatches: number;
|
|
558
|
+
entries: number;
|
|
559
|
+
ratelessFirstPasses: number;
|
|
560
|
+
simpleFallbackPasses: number;
|
|
561
|
+
};
|
|
562
|
+
type RepairMetrics = Record<RepairDispatchMode, RepairMetricBucket>;
|
|
563
|
+
|
|
564
|
+
const REPAIR_DISPATCH_MODES: RepairDispatchMode[] = [
|
|
565
|
+
"join-warmup",
|
|
566
|
+
"join-authoritative",
|
|
567
|
+
"append-backfill",
|
|
568
|
+
"churn",
|
|
569
|
+
];
|
|
570
|
+
|
|
571
|
+
const createRepairMetricBucket = (): RepairMetricBucket => ({
|
|
572
|
+
dispatches: 0,
|
|
573
|
+
entries: 0,
|
|
574
|
+
ratelessFirstPasses: 0,
|
|
575
|
+
simpleFallbackPasses: 0,
|
|
576
|
+
});
|
|
577
|
+
|
|
578
|
+
const createRepairMetrics = (): RepairMetrics => ({
|
|
579
|
+
"join-warmup": createRepairMetricBucket(),
|
|
580
|
+
"join-authoritative": createRepairMetricBucket(),
|
|
581
|
+
"append-backfill": createRepairMetricBucket(),
|
|
582
|
+
churn: createRepairMetricBucket(),
|
|
583
|
+
});
|
|
584
|
+
|
|
585
|
+
const createRepairPendingPeersByMode = () =>
|
|
586
|
+
new Map<RepairDispatchMode, Set<string>>(
|
|
587
|
+
REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set<string>()]),
|
|
588
|
+
);
|
|
589
|
+
|
|
590
|
+
const cloneRepairPendingPeersByMode = (
|
|
591
|
+
pending: Map<RepairDispatchMode, Set<string>>,
|
|
592
|
+
) =>
|
|
593
|
+
new Map<RepairDispatchMode, Set<string>>(
|
|
594
|
+
REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]),
|
|
595
|
+
);
|
|
596
|
+
|
|
597
|
+
const createRepairFrontierByMode = () =>
|
|
598
|
+
new Map<
|
|
599
|
+
RepairDispatchMode,
|
|
600
|
+
Map<string, Map<string, EntryReplicated<any>>>
|
|
601
|
+
>(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
|
|
602
|
+
|
|
603
|
+
const createRepairActiveTargetsByMode = () =>
|
|
604
|
+
new Map<RepairDispatchMode, Set<string>>(
|
|
605
|
+
REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]),
|
|
606
|
+
);
|
|
607
|
+
|
|
608
|
+
const getRepairRetrySchedule = (mode: RepairDispatchMode) => {
|
|
609
|
+
switch (mode) {
|
|
610
|
+
case "join-warmup":
|
|
611
|
+
return JOIN_WARMUP_RETRY_SCHEDULE_MS;
|
|
612
|
+
case "join-authoritative":
|
|
613
|
+
return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
|
|
614
|
+
case "append-backfill":
|
|
615
|
+
return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
|
|
616
|
+
case "churn":
|
|
617
|
+
return CHURN_REPAIR_RETRY_SCHEDULE_MS;
|
|
618
|
+
}
|
|
619
|
+
};
|
|
620
|
+
|
|
621
|
+
const resolveRepairRetrySchedule = (
|
|
622
|
+
mode: RepairDispatchMode,
|
|
623
|
+
override?: number[],
|
|
624
|
+
trackedFrontier = false,
|
|
625
|
+
) => {
|
|
626
|
+
const fallback = getRepairRetrySchedule(mode);
|
|
627
|
+
if (!override || override.length === 0) {
|
|
628
|
+
return fallback;
|
|
629
|
+
}
|
|
630
|
+
if (
|
|
631
|
+
trackedFrontier &&
|
|
632
|
+
override.length === 1 &&
|
|
633
|
+
override[0] === 0 &&
|
|
634
|
+
fallback.length > 1
|
|
635
|
+
) {
|
|
636
|
+
// A tracked frontier with only an immediate retry would otherwise stay on
|
|
637
|
+
// attempt 0 forever, which means rateless-only retries and no sparse-tail
|
|
638
|
+
// simple fallback. Keep the immediate seed, then continue with the normal
|
|
639
|
+
// tracked repair schedule.
|
|
640
|
+
return [0, ...fallback.slice(1)];
|
|
641
|
+
}
|
|
642
|
+
return override;
|
|
643
|
+
};
|
|
644
|
+
|
|
645
|
+
const getRepairTransportForAttempt = (
|
|
646
|
+
mode: RepairDispatchMode,
|
|
647
|
+
attemptIndex: number,
|
|
648
|
+
): RepairTransportMode => {
|
|
649
|
+
if (mode === "churn") {
|
|
650
|
+
return "simple";
|
|
651
|
+
}
|
|
652
|
+
return attemptIndex === 0 ? "rateless" : "simple";
|
|
653
|
+
};
|
|
520
654
|
|
|
521
655
|
const toPositiveInteger = (
|
|
522
656
|
value: number | undefined,
|
|
@@ -751,8 +885,24 @@ export class SharedLog<
|
|
|
751
885
|
private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
|
|
752
886
|
private _recentRepairDispatch!: Map<string, Map<string, number>>;
|
|
753
887
|
private _repairSweepRunning!: boolean;
|
|
754
|
-
private
|
|
755
|
-
private
|
|
888
|
+
private _repairSweepPendingModes!: Set<RepairDispatchMode>;
|
|
889
|
+
private _repairSweepPendingPeersByMode!: Map<RepairDispatchMode, Set<string>>;
|
|
890
|
+
private _repairFrontierByMode!: Map<
|
|
891
|
+
RepairDispatchMode,
|
|
892
|
+
Map<string, Map<string, EntryReplicated<R>>>
|
|
893
|
+
>;
|
|
894
|
+
private _repairFrontierActiveTargetsByMode!: Map<RepairDispatchMode, Set<string>>;
|
|
895
|
+
private _repairSweepOptimisticGidPeersPending!: Map<string, Map<string, number>>;
|
|
896
|
+
private _entryKnownPeers!: Map<string, Set<string>>;
|
|
897
|
+
private _joinAuthoritativeRepairTimersByDelay!: Map<
|
|
898
|
+
number,
|
|
899
|
+
ReturnType<typeof setTimeout>
|
|
900
|
+
>;
|
|
901
|
+
private _joinAuthoritativeRepairPeersByDelay!: Map<number, Set<string>>;
|
|
902
|
+
private _assumeSyncedRepairSuppressedUntil!: number;
|
|
903
|
+
private _appendBackfillTimer?: ReturnType<typeof setTimeout>;
|
|
904
|
+
private _appendBackfillPendingByTarget!: Map<string, Map<string, EntryReplicated<R>>>;
|
|
905
|
+
private _repairMetrics!: RepairMetrics;
|
|
756
906
|
private _topicSubscribersCache!: Map<
|
|
757
907
|
string,
|
|
758
908
|
{ expiresAt: number; keys: PublicSignKey[] }
|
|
@@ -1187,6 +1337,7 @@ export class SharedLog<
|
|
|
1187
1337
|
|
|
1188
1338
|
private async _appendDeliverToReplicators(
|
|
1189
1339
|
entry: Entry<T>,
|
|
1340
|
+
coordinates: NumberFromType<R>[],
|
|
1190
1341
|
minReplicasValue: number,
|
|
1191
1342
|
leaders: Map<string, any>,
|
|
1192
1343
|
selfHash: string,
|
|
@@ -1204,11 +1355,35 @@ export class SharedLog<
|
|
|
1204
1355
|
? { timeoutMs: delivery.timeout, signal: delivery.signal }
|
|
1205
1356
|
: undefined;
|
|
1206
1357
|
|
|
1358
|
+
const fullReplicaDeliveryCandidates =
|
|
1359
|
+
await this.getFullReplicaRepairCandidates(undefined, {
|
|
1360
|
+
includeSubscribers: false,
|
|
1361
|
+
});
|
|
1362
|
+
if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
|
|
1363
|
+
for (const peer of fullReplicaDeliveryCandidates) {
|
|
1364
|
+
if (!leaders.has(peer)) {
|
|
1365
|
+
leaders.set(peer, { intersecting: true });
|
|
1366
|
+
}
|
|
1367
|
+
}
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
|
|
1371
|
+
entry,
|
|
1372
|
+
coordinates,
|
|
1373
|
+
leaders: leaders as Map<string, { intersecting: boolean }>,
|
|
1374
|
+
replicas: minReplicasValue,
|
|
1375
|
+
});
|
|
1207
1376
|
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
1208
1377
|
await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
|
|
1209
|
-
const
|
|
1378
|
+
const authoritativeRecipients = new Set(leaders.keys());
|
|
1379
|
+
const leadersForDelivery = delivery
|
|
1380
|
+
? new Set(authoritativeRecipients)
|
|
1381
|
+
: undefined;
|
|
1210
1382
|
|
|
1211
|
-
|
|
1383
|
+
// Outbound append delivery only tells us who we intend to send to, not who has
|
|
1384
|
+
// actually stored the entry. Keep this recipient set local so later repair
|
|
1385
|
+
// sweeps can still backfill peers that missed the initial delivery.
|
|
1386
|
+
const set = new Set(leaders.keys());
|
|
1212
1387
|
let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
1213
1388
|
const allowSubscriberFallback =
|
|
1214
1389
|
this.syncronizer instanceof SimpleSyncronizer ||
|
|
@@ -1239,6 +1414,17 @@ export class SharedLog<
|
|
|
1239
1414
|
}
|
|
1240
1415
|
|
|
1241
1416
|
if (!delivery) {
|
|
1417
|
+
for (const peer of authoritativeRecipients) {
|
|
1418
|
+
if (peer === selfHash) {
|
|
1419
|
+
continue;
|
|
1420
|
+
}
|
|
1421
|
+
// Default live append delivery is still optimistic. If one remote misses
|
|
1422
|
+
// the initial heads exchange and the caller did not opt into explicit
|
|
1423
|
+
// delivery acks, we still need a targeted backfill source of truth for the
|
|
1424
|
+
// authoritative recipients or one entry can get stuck at 2/3 replicas
|
|
1425
|
+
// forever. Best-effort fallback subscribers are not repair-worthy.
|
|
1426
|
+
this.queueAppendBackfill(peer, entryReplicatedForRepair);
|
|
1427
|
+
}
|
|
1242
1428
|
this.rpc
|
|
1243
1429
|
.send(message, {
|
|
1244
1430
|
mode: isLeader
|
|
@@ -1268,6 +1454,7 @@ export class SharedLog<
|
|
|
1268
1454
|
|
|
1269
1455
|
const ackTo: string[] = [];
|
|
1270
1456
|
let silentTo: string[] | undefined;
|
|
1457
|
+
const repairTargets = new Set<string>();
|
|
1271
1458
|
// Default delivery semantics: require enough remote ACKs to reach the requested
|
|
1272
1459
|
// replication degree (local append counts as 1).
|
|
1273
1460
|
const defaultMinAcks = Math.max(0, minReplicasValue - 1);
|
|
@@ -1279,6 +1466,9 @@ export class SharedLog<
|
|
|
1279
1466
|
);
|
|
1280
1467
|
|
|
1281
1468
|
for (const peer of orderedRemoteRecipients) {
|
|
1469
|
+
if (authoritativeRecipients.has(peer)) {
|
|
1470
|
+
repairTargets.add(peer);
|
|
1471
|
+
}
|
|
1282
1472
|
if (ackTo.length < ackLimit) {
|
|
1283
1473
|
ackTo.push(peer);
|
|
1284
1474
|
} else {
|
|
@@ -1317,6 +1507,12 @@ export class SharedLog<
|
|
|
1317
1507
|
})
|
|
1318
1508
|
.catch((error) => logger.error(error));
|
|
1319
1509
|
}
|
|
1510
|
+
for (const peer of repairTargets) {
|
|
1511
|
+
// Direct append delivery is intentionally optimistic. Queue one delayed,
|
|
1512
|
+
// batched maybe-sync pass for the intended recipients so stable 3-peer
|
|
1513
|
+
// append workloads do not depend on perfect first-try delivery ordering.
|
|
1514
|
+
this.queueAppendBackfill(peer, entryReplicatedForRepair);
|
|
1515
|
+
}
|
|
1320
1516
|
}
|
|
1321
1517
|
|
|
1322
1518
|
if (pending.length > 0) {
|
|
@@ -2016,6 +2212,7 @@ export class SharedLog<
|
|
|
2016
2212
|
// Keep local sync/prune state consistent even when a peer disappears
|
|
2017
2213
|
// through replication-info updates without a topic unsubscribe event.
|
|
2018
2214
|
this.removePeerFromGidPeerHistory(keyHash);
|
|
2215
|
+
this.removeRepairFrontierTarget(keyHash);
|
|
2019
2216
|
this._recentRepairDispatch.delete(keyHash);
|
|
2020
2217
|
if (!isMe) {
|
|
2021
2218
|
this.syncronizer.onPeerDisconnected(keyHash);
|
|
@@ -2483,6 +2680,7 @@ export class SharedLog<
|
|
|
2483
2680
|
for (const key of this._gidPeersHistory.keys()) {
|
|
2484
2681
|
this.removePeerFromGidPeerHistory(publicKeyHash, key);
|
|
2485
2682
|
}
|
|
2683
|
+
this.removePeerFromEntryKnownPeers(publicKeyHash);
|
|
2486
2684
|
}
|
|
2487
2685
|
}
|
|
2488
2686
|
|
|
@@ -2507,19 +2705,448 @@ export class SharedLog<
|
|
|
2507
2705
|
return set;
|
|
2508
2706
|
}
|
|
2509
2707
|
|
|
2708
|
+
private markEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
|
|
2709
|
+
for (const hash of hashes) {
|
|
2710
|
+
let peers = this._entryKnownPeers.get(hash);
|
|
2711
|
+
if (!peers) {
|
|
2712
|
+
peers = new Set();
|
|
2713
|
+
this._entryKnownPeers.set(hash, peers);
|
|
2714
|
+
}
|
|
2715
|
+
peers.add(peer);
|
|
2716
|
+
}
|
|
2717
|
+
}
|
|
2718
|
+
|
|
2719
|
+
private removeEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
|
|
2720
|
+
for (const hash of hashes) {
|
|
2721
|
+
const peers = this._entryKnownPeers.get(hash);
|
|
2722
|
+
if (!peers) {
|
|
2723
|
+
continue;
|
|
2724
|
+
}
|
|
2725
|
+
peers.delete(peer);
|
|
2726
|
+
if (peers.size === 0) {
|
|
2727
|
+
this._entryKnownPeers.delete(hash);
|
|
2728
|
+
}
|
|
2729
|
+
}
|
|
2730
|
+
}
|
|
2731
|
+
|
|
2732
|
+
private removePeerFromEntryKnownPeers(peer: string) {
|
|
2733
|
+
for (const [hash, peers] of this._entryKnownPeers) {
|
|
2734
|
+
peers.delete(peer);
|
|
2735
|
+
if (peers.size === 0) {
|
|
2736
|
+
this._entryKnownPeers.delete(hash);
|
|
2737
|
+
}
|
|
2738
|
+
}
|
|
2739
|
+
}
|
|
2740
|
+
|
|
2741
|
+
private isEntryKnownByPeer(hash: string, peer: string) {
|
|
2742
|
+
return this._entryKnownPeers.get(hash)?.has(peer) === true;
|
|
2743
|
+
}
|
|
2744
|
+
|
|
2745
|
+
private markRepairSweepOptimisticPeer(gid: string, peer: string) {
|
|
2746
|
+
let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
|
|
2747
|
+
if (!peers) {
|
|
2748
|
+
peers = new Map();
|
|
2749
|
+
this._repairSweepOptimisticGidPeersPending.set(gid, peers);
|
|
2750
|
+
}
|
|
2751
|
+
peers.set(peer, (peers.get(peer) || 0) + 1);
|
|
2752
|
+
}
|
|
2753
|
+
|
|
2754
|
+
private hasPendingRepairSweepOptimisticPeer(gid: string, peer: string) {
|
|
2755
|
+
return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
|
|
2756
|
+
}
|
|
2757
|
+
|
|
2758
|
+
private createEntryReplicatedForRepair(properties: {
|
|
2759
|
+
entry: Entry<T>;
|
|
2760
|
+
coordinates: NumberFromType<R>[];
|
|
2761
|
+
leaders: Map<string, { intersecting: boolean }>;
|
|
2762
|
+
replicas: number;
|
|
2763
|
+
}) {
|
|
2764
|
+
const assignedToRangeBoundary = shouldAssignToRangeBoundary(
|
|
2765
|
+
properties.leaders,
|
|
2766
|
+
properties.replicas,
|
|
2767
|
+
);
|
|
2768
|
+
const cidObject = cidifyString(properties.entry.hash);
|
|
2769
|
+
const hashNumber = this.indexableDomain.numbers.bytesToNumber(
|
|
2770
|
+
cidObject.multihash.digest,
|
|
2771
|
+
);
|
|
2772
|
+
return new this.indexableDomain.constructorEntry({
|
|
2773
|
+
assignedToRangeBoundary,
|
|
2774
|
+
coordinates: properties.coordinates,
|
|
2775
|
+
meta: properties.entry.meta,
|
|
2776
|
+
hash: properties.entry.hash,
|
|
2777
|
+
hashNumber,
|
|
2778
|
+
});
|
|
2779
|
+
}
|
|
2780
|
+
|
|
2781
|
+
private isAssumeSyncedRepairSuppressed() {
|
|
2782
|
+
return this._assumeSyncedRepairSuppressedUntil > Date.now();
|
|
2783
|
+
}
|
|
2784
|
+
|
|
2785
|
+
private isFrontierTrackedRepairMode(mode: RepairDispatchMode) {
|
|
2786
|
+
return mode !== "join-warmup";
|
|
2787
|
+
}
|
|
2788
|
+
|
|
2789
|
+
private async sleepTracked(delayMs: number) {
|
|
2790
|
+
if (delayMs <= 0) {
|
|
2791
|
+
return;
|
|
2792
|
+
}
|
|
2793
|
+
await new Promise<void>((resolve) => {
|
|
2794
|
+
const timer = setTimeout(() => {
|
|
2795
|
+
this._repairRetryTimers.delete(timer);
|
|
2796
|
+
resolve();
|
|
2797
|
+
}, delayMs);
|
|
2798
|
+
timer.unref?.();
|
|
2799
|
+
this._repairRetryTimers.add(timer);
|
|
2800
|
+
});
|
|
2801
|
+
}
|
|
2802
|
+
|
|
2803
|
+
private queueRepairFrontierEntries(
|
|
2804
|
+
mode: RepairDispatchMode,
|
|
2805
|
+
target: string,
|
|
2806
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2807
|
+
) {
|
|
2808
|
+
let targets = this._repairFrontierByMode.get(mode);
|
|
2809
|
+
if (!targets) {
|
|
2810
|
+
targets = new Map();
|
|
2811
|
+
this._repairFrontierByMode.set(mode, targets);
|
|
2812
|
+
}
|
|
2813
|
+
let pending = targets.get(target);
|
|
2814
|
+
if (!pending) {
|
|
2815
|
+
pending = new Map();
|
|
2816
|
+
targets.set(target, pending);
|
|
2817
|
+
}
|
|
2818
|
+
for (const [hash, entry] of entries) {
|
|
2819
|
+
pending.set(hash, entry);
|
|
2820
|
+
}
|
|
2821
|
+
}
|
|
2822
|
+
|
|
2823
|
+
private clearRepairFrontierHashes(target: string, hashes: Iterable<string>) {
|
|
2824
|
+
const hashList = [...hashes];
|
|
2825
|
+
if (hashList.length === 0) {
|
|
2826
|
+
return;
|
|
2827
|
+
}
|
|
2828
|
+
for (const mode of REPAIR_DISPATCH_MODES) {
|
|
2829
|
+
const pending = this._repairFrontierByMode.get(mode)?.get(target);
|
|
2830
|
+
if (!pending) {
|
|
2831
|
+
continue;
|
|
2832
|
+
}
|
|
2833
|
+
for (const hash of hashList) {
|
|
2834
|
+
pending.delete(hash);
|
|
2835
|
+
}
|
|
2836
|
+
if (pending.size === 0) {
|
|
2837
|
+
this._repairFrontierByMode.get(mode)?.delete(target);
|
|
2838
|
+
}
|
|
2839
|
+
}
|
|
2840
|
+
}
|
|
2841
|
+
|
|
2842
|
+
private async getFullReplicaRepairCandidates(
|
|
2843
|
+
extraPeers?: Iterable<string>,
|
|
2844
|
+
options?: { includeSubscribers?: boolean },
|
|
2845
|
+
) {
|
|
2846
|
+
const candidates = new Set<string>([
|
|
2847
|
+
this.node.identity.publicKey.hashcode(),
|
|
2848
|
+
]);
|
|
2849
|
+
try {
|
|
2850
|
+
for (const peer of await this.getReplicators()) {
|
|
2851
|
+
candidates.add(peer);
|
|
2852
|
+
}
|
|
2853
|
+
} catch {
|
|
2854
|
+
for (const peer of this.uniqueReplicators) {
|
|
2855
|
+
candidates.add(peer);
|
|
2856
|
+
}
|
|
2857
|
+
}
|
|
2858
|
+
for (const peer of extraPeers ?? []) {
|
|
2859
|
+
candidates.add(peer);
|
|
2860
|
+
}
|
|
2861
|
+
if (options?.includeSubscribers !== false) {
|
|
2862
|
+
try {
|
|
2863
|
+
for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
|
|
2864
|
+
candidates.add(subscriber.hashcode());
|
|
2865
|
+
}
|
|
2866
|
+
} catch {
|
|
2867
|
+
// Best-effort only; explicit repair peers still keep the path safe.
|
|
2868
|
+
}
|
|
2869
|
+
}
|
|
2870
|
+
return candidates;
|
|
2871
|
+
}
|
|
2872
|
+
|
|
2873
|
+
private removeRepairFrontierTarget(target: string) {
|
|
2874
|
+
for (const mode of REPAIR_DISPATCH_MODES) {
|
|
2875
|
+
this._repairFrontierByMode.get(mode)?.delete(target);
|
|
2876
|
+
this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
|
|
2877
|
+
}
|
|
2878
|
+
}
|
|
2879
|
+
|
|
2880
|
+
private async sendRepairConfirmation(
|
|
2881
|
+
target: PublicSignKey,
|
|
2882
|
+
hashes: Iterable<string>,
|
|
2883
|
+
) {
|
|
2884
|
+
const uniqueHashes = [...new Set(hashes)];
|
|
2885
|
+
for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
|
|
2886
|
+
const chunk = uniqueHashes.slice(
|
|
2887
|
+
i,
|
|
2888
|
+
i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE,
|
|
2889
|
+
);
|
|
2890
|
+
await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
|
|
2891
|
+
priority: 1,
|
|
2892
|
+
mode: new SilentDelivery({ to: [target], redundancy: 1 }),
|
|
2893
|
+
});
|
|
2894
|
+
}
|
|
2895
|
+
}
|
|
2896
|
+
|
|
2897
|
+
private async pushRepairEntries(
|
|
2898
|
+
target: string,
|
|
2899
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2900
|
+
) {
|
|
2901
|
+
for await (const message of createExchangeHeadsMessages(
|
|
2902
|
+
this.log,
|
|
2903
|
+
[...entries.keys()],
|
|
2904
|
+
)) {
|
|
2905
|
+
await this.rpc.send(message, {
|
|
2906
|
+
priority: 1,
|
|
2907
|
+
mode: new SilentDelivery({ to: [target], redundancy: 1 }),
|
|
2908
|
+
});
|
|
2909
|
+
}
|
|
2910
|
+
}
|
|
2911
|
+
|
|
2912
|
+
private async sendRepairEntriesWithTransport(
|
|
2913
|
+
target: string,
|
|
2914
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2915
|
+
transport: RepairTransportMode,
|
|
2916
|
+
options?: { bypassKnownPeers?: boolean },
|
|
2917
|
+
) {
|
|
2918
|
+
const unknownEntries = new Map<string, EntryReplicated<R>>();
|
|
2919
|
+
const knownHashes: string[] = [];
|
|
2920
|
+
for (const [hash, entry] of entries) {
|
|
2921
|
+
if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
|
|
2922
|
+
unknownEntries.set(hash, entry);
|
|
2923
|
+
} else {
|
|
2924
|
+
knownHashes.push(hash);
|
|
2925
|
+
}
|
|
2926
|
+
}
|
|
2927
|
+
this.clearRepairFrontierHashes(target, knownHashes);
|
|
2928
|
+
if (unknownEntries.size === 0) {
|
|
2929
|
+
return;
|
|
2930
|
+
}
|
|
2931
|
+
if (transport === "simple") {
|
|
2932
|
+
// Fallback repair should not depend on the target completing the
|
|
2933
|
+
// RequestMaybeSync -> ResponseMaybeSync round trip.
|
|
2934
|
+
await this.pushRepairEntries(target, unknownEntries);
|
|
2935
|
+
return;
|
|
2936
|
+
}
|
|
2937
|
+
|
|
2938
|
+
await this.syncronizer.onMaybeMissingEntries({
|
|
2939
|
+
entries: unknownEntries,
|
|
2940
|
+
targets: [target],
|
|
2941
|
+
});
|
|
2942
|
+
}
|
|
2943
|
+
|
|
2944
|
+
private async sendMaybeMissingEntriesNow(
|
|
2945
|
+
target: string,
|
|
2946
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2947
|
+
options: {
|
|
2948
|
+
mode: RepairDispatchMode;
|
|
2949
|
+
transport: RepairTransportMode;
|
|
2950
|
+
bypassRecentDedupe?: boolean;
|
|
2951
|
+
},
|
|
2952
|
+
) {
|
|
2953
|
+
if (entries.size === 0) {
|
|
2954
|
+
return;
|
|
2955
|
+
}
|
|
2956
|
+
|
|
2957
|
+
const now = Date.now();
|
|
2958
|
+
let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
|
|
2959
|
+
if (!recentlyDispatchedByHash) {
|
|
2960
|
+
recentlyDispatchedByHash = new Map();
|
|
2961
|
+
this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
|
|
2962
|
+
}
|
|
2963
|
+
for (const [hash, ts] of recentlyDispatchedByHash) {
|
|
2964
|
+
if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
2965
|
+
recentlyDispatchedByHash.delete(hash);
|
|
2966
|
+
}
|
|
2967
|
+
}
|
|
2968
|
+
|
|
2969
|
+
const filteredEntries =
|
|
2970
|
+
options.bypassRecentDedupe === true
|
|
2971
|
+
? new Map(entries)
|
|
2972
|
+
: new Map<string, EntryReplicated<any>>();
|
|
2973
|
+
if (options.bypassRecentDedupe !== true) {
|
|
2974
|
+
for (const [hash, entry] of entries) {
|
|
2975
|
+
const prev = recentlyDispatchedByHash.get(hash);
|
|
2976
|
+
if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
2977
|
+
continue;
|
|
2978
|
+
}
|
|
2979
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
2980
|
+
filteredEntries.set(hash, entry);
|
|
2981
|
+
}
|
|
2982
|
+
} else {
|
|
2983
|
+
for (const hash of entries.keys()) {
|
|
2984
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
2985
|
+
}
|
|
2986
|
+
}
|
|
2987
|
+
if (filteredEntries.size === 0) {
|
|
2988
|
+
return;
|
|
2989
|
+
}
|
|
2990
|
+
|
|
2991
|
+
const bucket = this._repairMetrics[options.mode];
|
|
2992
|
+
bucket.dispatches += 1;
|
|
2993
|
+
bucket.entries += filteredEntries.size;
|
|
2994
|
+
if (options.transport === "simple") {
|
|
2995
|
+
bucket.simpleFallbackPasses += 1;
|
|
2996
|
+
} else {
|
|
2997
|
+
bucket.ratelessFirstPasses += 1;
|
|
2998
|
+
}
|
|
2999
|
+
|
|
3000
|
+
await Promise.resolve(
|
|
3001
|
+
this.sendRepairEntriesWithTransport(
|
|
3002
|
+
target,
|
|
3003
|
+
filteredEntries,
|
|
3004
|
+
options.transport,
|
|
3005
|
+
{ bypassKnownPeers: options.mode === "churn" },
|
|
3006
|
+
),
|
|
3007
|
+
).catch((error: any) => logger.error(error));
|
|
3008
|
+
}
|
|
3009
|
+
|
|
3010
|
+
private ensureRepairFrontierRunner(
|
|
3011
|
+
mode: RepairDispatchMode,
|
|
3012
|
+
target: string,
|
|
3013
|
+
retryScheduleMs?: number[],
|
|
3014
|
+
) {
|
|
3015
|
+
const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
|
|
3016
|
+
if (!activeTargets || activeTargets.has(target) || this.closed) {
|
|
3017
|
+
return;
|
|
3018
|
+
}
|
|
3019
|
+
activeTargets.add(target);
|
|
3020
|
+
const retrySchedule = resolveRepairRetrySchedule(
|
|
3021
|
+
mode,
|
|
3022
|
+
retryScheduleMs,
|
|
3023
|
+
this.isFrontierTrackedRepairMode(mode),
|
|
3024
|
+
);
|
|
3025
|
+
const steadyStateDelay =
|
|
3026
|
+
retrySchedule.length > 1
|
|
3027
|
+
? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
|
|
3028
|
+
: Math.max(retrySchedule[0] || 1_000, 1_000);
|
|
3029
|
+
|
|
3030
|
+
void (async () => {
|
|
3031
|
+
let attemptIndex = 0;
|
|
3032
|
+
try {
|
|
3033
|
+
for (;;) {
|
|
3034
|
+
if (this.closed) {
|
|
3035
|
+
return;
|
|
3036
|
+
}
|
|
3037
|
+
const pending = this._repairFrontierByMode.get(mode)?.get(target);
|
|
3038
|
+
if (!pending || pending.size === 0) {
|
|
3039
|
+
return;
|
|
3040
|
+
}
|
|
3041
|
+
|
|
3042
|
+
if (
|
|
3043
|
+
(mode === "join-warmup" || mode === "join-authoritative") &&
|
|
3044
|
+
this.isAssumeSyncedRepairSuppressed()
|
|
3045
|
+
) {
|
|
3046
|
+
await this.sleepTracked(
|
|
3047
|
+
Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()),
|
|
3048
|
+
);
|
|
3049
|
+
continue;
|
|
3050
|
+
}
|
|
3051
|
+
|
|
3052
|
+
await this.sendMaybeMissingEntriesNow(target, pending, {
|
|
3053
|
+
mode,
|
|
3054
|
+
transport: getRepairTransportForAttempt(mode, attemptIndex),
|
|
3055
|
+
bypassRecentDedupe: true,
|
|
3056
|
+
});
|
|
3057
|
+
|
|
3058
|
+
const remaining = this._repairFrontierByMode.get(mode)?.get(target);
|
|
3059
|
+
if (!remaining || remaining.size === 0) {
|
|
3060
|
+
return;
|
|
3061
|
+
}
|
|
3062
|
+
|
|
3063
|
+
const waitMs =
|
|
3064
|
+
attemptIndex + 1 < retrySchedule.length
|
|
3065
|
+
? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
|
|
3066
|
+
: steadyStateDelay;
|
|
3067
|
+
attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
|
|
3068
|
+
await this.sleepTracked(waitMs);
|
|
3069
|
+
}
|
|
3070
|
+
} finally {
|
|
3071
|
+
activeTargets.delete(target);
|
|
3072
|
+
if (
|
|
3073
|
+
!this.closed &&
|
|
3074
|
+
(this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0
|
|
3075
|
+
) {
|
|
3076
|
+
this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
|
|
3077
|
+
}
|
|
3078
|
+
}
|
|
3079
|
+
})().catch((error: any) => {
|
|
3080
|
+
activeTargets.delete(target);
|
|
3081
|
+
logger.error(error);
|
|
3082
|
+
});
|
|
3083
|
+
}
|
|
3084
|
+
|
|
3085
|
+
private flushAppendBackfill() {
|
|
3086
|
+
if (this._appendBackfillPendingByTarget.size === 0) {
|
|
3087
|
+
return;
|
|
3088
|
+
}
|
|
3089
|
+
const pending = this._appendBackfillPendingByTarget;
|
|
3090
|
+
this._appendBackfillPendingByTarget = new Map();
|
|
3091
|
+
for (const [target, entries] of pending) {
|
|
3092
|
+
this.dispatchMaybeMissingEntries(target, entries, {
|
|
3093
|
+
mode: "append-backfill",
|
|
3094
|
+
});
|
|
3095
|
+
}
|
|
3096
|
+
}
|
|
3097
|
+
|
|
3098
|
+
private queueAppendBackfill(target: string, entry: EntryReplicated<R>) {
|
|
3099
|
+
let entries = this._appendBackfillPendingByTarget.get(target);
|
|
3100
|
+
if (!entries) {
|
|
3101
|
+
entries = new Map();
|
|
3102
|
+
this._appendBackfillPendingByTarget.set(target, entries);
|
|
3103
|
+
}
|
|
3104
|
+
entries.set(entry.hash, entry);
|
|
3105
|
+
if (entries.size >= this.repairSweepTargetBufferSize) {
|
|
3106
|
+
this.flushAppendBackfill();
|
|
3107
|
+
return;
|
|
3108
|
+
}
|
|
3109
|
+
if (this._appendBackfillTimer || this.closed) {
|
|
3110
|
+
return;
|
|
3111
|
+
}
|
|
3112
|
+
const timer = setTimeout(() => {
|
|
3113
|
+
this._repairRetryTimers.delete(timer);
|
|
3114
|
+
if (this._appendBackfillTimer === timer) {
|
|
3115
|
+
this._appendBackfillTimer = undefined;
|
|
3116
|
+
}
|
|
3117
|
+
if (this.closed) {
|
|
3118
|
+
return;
|
|
3119
|
+
}
|
|
3120
|
+
this.flushAppendBackfill();
|
|
3121
|
+
}, APPEND_BACKFILL_DELAY_MS);
|
|
3122
|
+
timer.unref?.();
|
|
3123
|
+
this._repairRetryTimers.add(timer);
|
|
3124
|
+
this._appendBackfillTimer = timer;
|
|
3125
|
+
}
|
|
3126
|
+
|
|
2510
3127
|
private dispatchMaybeMissingEntries(
|
|
2511
3128
|
target: string,
|
|
2512
3129
|
entries: Map<string, EntryReplicated<R>>,
|
|
2513
|
-
options
|
|
3130
|
+
options: {
|
|
3131
|
+
mode: RepairDispatchMode;
|
|
2514
3132
|
bypassRecentDedupe?: boolean;
|
|
2515
3133
|
retryScheduleMs?: number[];
|
|
2516
|
-
forceFreshDelivery?: boolean;
|
|
2517
3134
|
},
|
|
2518
3135
|
) {
|
|
2519
3136
|
if (entries.size === 0) {
|
|
2520
3137
|
return;
|
|
2521
3138
|
}
|
|
2522
3139
|
|
|
3140
|
+
if (this.isFrontierTrackedRepairMode(options.mode)) {
|
|
3141
|
+
this.queueRepairFrontierEntries(options.mode, target, entries);
|
|
3142
|
+
this.ensureRepairFrontierRunner(
|
|
3143
|
+
options.mode,
|
|
3144
|
+
target,
|
|
3145
|
+
options.retryScheduleMs,
|
|
3146
|
+
);
|
|
3147
|
+
return;
|
|
3148
|
+
}
|
|
3149
|
+
|
|
2523
3150
|
const now = Date.now();
|
|
2524
3151
|
let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
|
|
2525
3152
|
if (!recentlyDispatchedByHash) {
|
|
@@ -2533,10 +3160,10 @@ export class SharedLog<
|
|
|
2533
3160
|
}
|
|
2534
3161
|
|
|
2535
3162
|
const filteredEntries =
|
|
2536
|
-
options
|
|
3163
|
+
options.bypassRecentDedupe === true
|
|
2537
3164
|
? new Map(entries)
|
|
2538
3165
|
: new Map<string, EntryReplicated<any>>();
|
|
2539
|
-
if (options
|
|
3166
|
+
if (options.bypassRecentDedupe !== true) {
|
|
2540
3167
|
for (const [hash, entry] of entries) {
|
|
2541
3168
|
const prev = recentlyDispatchedByHash.get(hash);
|
|
2542
3169
|
if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
@@ -2553,64 +3180,69 @@ export class SharedLog<
|
|
|
2553
3180
|
if (filteredEntries.size === 0) {
|
|
2554
3181
|
return;
|
|
2555
3182
|
}
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
3183
|
+
|
|
3184
|
+
if (
|
|
3185
|
+
(options.mode === "join-warmup" ||
|
|
3186
|
+
options.mode === "join-authoritative") &&
|
|
3187
|
+
this.isAssumeSyncedRepairSuppressed()
|
|
3188
|
+
) {
|
|
3189
|
+
return;
|
|
3190
|
+
}
|
|
3191
|
+
|
|
3192
|
+
const retrySchedule = resolveRepairRetrySchedule(
|
|
3193
|
+
options.mode,
|
|
3194
|
+
options.retryScheduleMs,
|
|
3195
|
+
this.isFrontierTrackedRepairMode(options.mode),
|
|
3196
|
+
);
|
|
3197
|
+
const bucket = this._repairMetrics[options.mode];
|
|
3198
|
+
bucket.dispatches += 1;
|
|
3199
|
+
bucket.entries += filteredEntries.size;
|
|
3200
|
+
|
|
3201
|
+
const run = (transport: RepairTransportMode) => {
|
|
3202
|
+
if (transport === "simple") {
|
|
3203
|
+
bucket.simpleFallbackPasses += 1;
|
|
3204
|
+
} else {
|
|
3205
|
+
bucket.ratelessFirstPasses += 1;
|
|
2578
3206
|
}
|
|
2579
3207
|
|
|
2580
3208
|
return Promise.resolve(
|
|
2581
|
-
this.
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
|
|
3209
|
+
this.sendRepairEntriesWithTransport(
|
|
3210
|
+
target,
|
|
3211
|
+
filteredEntries,
|
|
3212
|
+
transport,
|
|
3213
|
+
{ bypassKnownPeers: options.mode === "churn" },
|
|
3214
|
+
),
|
|
2585
3215
|
).catch((error: any) => logger.error(error));
|
|
2586
3216
|
};
|
|
2587
3217
|
|
|
2588
|
-
|
|
3218
|
+
retrySchedule.forEach((delayMs, index) => {
|
|
3219
|
+
const transport = getRepairTransportForAttempt(options.mode, index);
|
|
2589
3220
|
if (delayMs === 0) {
|
|
2590
|
-
void run();
|
|
2591
|
-
|
|
3221
|
+
void run(transport);
|
|
3222
|
+
return;
|
|
2592
3223
|
}
|
|
2593
3224
|
const timer = setTimeout(() => {
|
|
2594
3225
|
this._repairRetryTimers.delete(timer);
|
|
2595
3226
|
if (this.closed) {
|
|
2596
3227
|
return;
|
|
2597
3228
|
}
|
|
2598
|
-
void run();
|
|
3229
|
+
void run(transport);
|
|
2599
3230
|
}, delayMs);
|
|
2600
3231
|
timer.unref?.();
|
|
2601
3232
|
this._repairRetryTimers.add(timer);
|
|
2602
|
-
}
|
|
3233
|
+
});
|
|
2603
3234
|
}
|
|
2604
3235
|
|
|
2605
3236
|
private scheduleRepairSweep(options: {
|
|
2606
|
-
|
|
2607
|
-
|
|
3237
|
+
mode: RepairDispatchMode;
|
|
3238
|
+
peers?: Iterable<string>;
|
|
2608
3239
|
}) {
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
3240
|
+
this._repairSweepPendingModes.add(options.mode);
|
|
3241
|
+
const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
|
|
3242
|
+
if (pendingPeers) {
|
|
3243
|
+
for (const peer of options.peers ?? []) {
|
|
3244
|
+
pendingPeers.add(peer);
|
|
3245
|
+
}
|
|
2614
3246
|
}
|
|
2615
3247
|
if (!this._repairSweepRunning && !this.closed) {
|
|
2616
3248
|
this._repairSweepRunning = true;
|
|
@@ -2618,88 +3250,293 @@ export class SharedLog<
|
|
|
2618
3250
|
}
|
|
2619
3251
|
}
|
|
2620
3252
|
|
|
3253
|
+
private scheduleJoinAuthoritativeRepair(peers: Set<string>) {
|
|
3254
|
+
if (this.closed || peers.size === 0) {
|
|
3255
|
+
return;
|
|
3256
|
+
}
|
|
3257
|
+
|
|
3258
|
+
for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
|
|
3259
|
+
let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
|
|
3260
|
+
if (!pendingPeers) {
|
|
3261
|
+
pendingPeers = new Set();
|
|
3262
|
+
this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
|
|
3263
|
+
}
|
|
3264
|
+
for (const peer of peers) {
|
|
3265
|
+
pendingPeers.add(peer);
|
|
3266
|
+
}
|
|
3267
|
+
|
|
3268
|
+
if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
|
|
3269
|
+
continue;
|
|
3270
|
+
}
|
|
3271
|
+
|
|
3272
|
+
const timer = setTimeout(() => {
|
|
3273
|
+
this._repairRetryTimers.delete(timer);
|
|
3274
|
+
this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
|
|
3275
|
+
if (this.closed) {
|
|
3276
|
+
return;
|
|
3277
|
+
}
|
|
3278
|
+
|
|
3279
|
+
const peersForSweep = new Set(
|
|
3280
|
+
this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? [],
|
|
3281
|
+
);
|
|
3282
|
+
this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
|
|
3283
|
+
if (peersForSweep.size === 0) {
|
|
3284
|
+
return;
|
|
3285
|
+
}
|
|
3286
|
+
|
|
3287
|
+
// A joiner's leader view can still be partial on the first delayed pass
|
|
3288
|
+
// under pubsub jitter. Bounded per-peer rescans widen the authoritative
|
|
3289
|
+
// frontier without adding per-append sweeps.
|
|
3290
|
+
this.scheduleRepairSweep({
|
|
3291
|
+
mode: "join-authoritative",
|
|
3292
|
+
peers: peersForSweep,
|
|
3293
|
+
});
|
|
3294
|
+
}, delayMs);
|
|
3295
|
+
timer.unref?.();
|
|
3296
|
+
this._repairRetryTimers.add(timer);
|
|
3297
|
+
this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
|
|
3298
|
+
}
|
|
3299
|
+
}
|
|
3300
|
+
|
|
2621
3301
|
private async runRepairSweep() {
|
|
2622
3302
|
try {
|
|
2623
3303
|
while (!this.closed) {
|
|
2624
|
-
const
|
|
2625
|
-
const
|
|
2626
|
-
|
|
2627
|
-
|
|
3304
|
+
const pendingModes = new Set(this._repairSweepPendingModes);
|
|
3305
|
+
const pendingPeersByMode = cloneRepairPendingPeersByMode(
|
|
3306
|
+
this._repairSweepPendingPeersByMode,
|
|
3307
|
+
);
|
|
3308
|
+
this._repairSweepPendingModes.clear();
|
|
3309
|
+
for (const peers of this._repairSweepPendingPeersByMode.values()) {
|
|
3310
|
+
peers.clear();
|
|
3311
|
+
}
|
|
2628
3312
|
|
|
2629
|
-
if (
|
|
3313
|
+
if (pendingModes.size === 0) {
|
|
2630
3314
|
return;
|
|
2631
3315
|
}
|
|
2632
3316
|
|
|
2633
|
-
const
|
|
2634
|
-
|
|
2635
|
-
|
|
3317
|
+
const optimisticGidPeersByMode = new Map<
|
|
3318
|
+
RepairDispatchMode,
|
|
3319
|
+
Map<string, Set<string>>
|
|
3320
|
+
>();
|
|
3321
|
+
const optimisticGidPeersConsumedByMode = new Map<
|
|
3322
|
+
RepairDispatchMode,
|
|
3323
|
+
Map<string, Map<string, number>>
|
|
3324
|
+
>();
|
|
3325
|
+
for (const mode of pendingModes) {
|
|
3326
|
+
const modePeers = pendingPeersByMode.get(mode);
|
|
3327
|
+
if (!modePeers || modePeers.size === 0) {
|
|
3328
|
+
continue;
|
|
3329
|
+
}
|
|
3330
|
+
const optimisticGidPeers = new Map<string, Set<string>>();
|
|
3331
|
+
const optimisticGidPeersConsumed = new Map<string, Map<string, number>>();
|
|
3332
|
+
for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
|
|
3333
|
+
let matchedPeers: Set<string> | undefined;
|
|
3334
|
+
let matchedCounts: Map<string, number> | undefined;
|
|
3335
|
+
for (const [peer, count] of peerCounts) {
|
|
3336
|
+
if (!modePeers.has(peer)) {
|
|
3337
|
+
continue;
|
|
3338
|
+
}
|
|
3339
|
+
matchedPeers ||= new Set();
|
|
3340
|
+
matchedCounts ||= new Map();
|
|
3341
|
+
matchedPeers.add(peer);
|
|
3342
|
+
matchedCounts.set(peer, count);
|
|
3343
|
+
}
|
|
3344
|
+
if (matchedPeers && matchedCounts) {
|
|
3345
|
+
optimisticGidPeers.set(gid, matchedPeers);
|
|
3346
|
+
optimisticGidPeersConsumed.set(gid, matchedCounts);
|
|
3347
|
+
}
|
|
3348
|
+
}
|
|
3349
|
+
if (optimisticGidPeers.size > 0) {
|
|
3350
|
+
optimisticGidPeersByMode.set(mode, optimisticGidPeers);
|
|
3351
|
+
optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
|
|
3352
|
+
}
|
|
3353
|
+
}
|
|
3354
|
+
|
|
3355
|
+
const pendingByMode = new Map<
|
|
3356
|
+
RepairDispatchMode,
|
|
3357
|
+
Map<string, Map<string, EntryReplicated<any>>>
|
|
3358
|
+
>(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
|
|
3359
|
+
const pendingRepairPeers = new Set<string>();
|
|
3360
|
+
for (const peers of pendingPeersByMode.values()) {
|
|
3361
|
+
for (const peer of peers) {
|
|
3362
|
+
pendingRepairPeers.add(peer);
|
|
3363
|
+
}
|
|
3364
|
+
}
|
|
3365
|
+
const fullReplicaRepairCandidates =
|
|
3366
|
+
await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
|
|
3367
|
+
includeSubscribers: false,
|
|
3368
|
+
});
|
|
3369
|
+
const fullReplicaRepairCandidateCount = Math.max(
|
|
3370
|
+
1,
|
|
3371
|
+
fullReplicaRepairCandidates.size,
|
|
3372
|
+
);
|
|
3373
|
+
const nextFrontierByMode = new Map<
|
|
3374
|
+
RepairDispatchMode,
|
|
3375
|
+
Map<string, Map<string, EntryReplicated<any>>>
|
|
3376
|
+
>([
|
|
3377
|
+
["join-authoritative", new Map()],
|
|
3378
|
+
["churn", new Map()],
|
|
3379
|
+
]);
|
|
3380
|
+
const flushTarget = (mode: RepairDispatchMode, target: string) => {
|
|
3381
|
+
const targets = pendingByMode.get(mode);
|
|
3382
|
+
const entries = targets?.get(target);
|
|
2636
3383
|
if (!entries || entries.size === 0) {
|
|
2637
3384
|
return;
|
|
2638
3385
|
}
|
|
2639
|
-
const isJoinWarmupTarget = addedPeers.has(target);
|
|
2640
|
-
const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
|
|
2641
3386
|
this.dispatchMaybeMissingEntries(target, entries, {
|
|
2642
|
-
bypassRecentDedupe,
|
|
2643
|
-
|
|
2644
|
-
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
2645
|
-
: undefined,
|
|
2646
|
-
forceFreshDelivery,
|
|
3387
|
+
bypassRecentDedupe: true,
|
|
3388
|
+
mode,
|
|
2647
3389
|
});
|
|
2648
|
-
|
|
3390
|
+
targets?.delete(target);
|
|
2649
3391
|
};
|
|
2650
3392
|
const queueEntryForTarget = (
|
|
3393
|
+
mode: RepairDispatchMode,
|
|
2651
3394
|
target: string,
|
|
2652
3395
|
entry: EntryReplicated<any>,
|
|
2653
3396
|
) => {
|
|
2654
|
-
|
|
3397
|
+
const sweepTargets = nextFrontierByMode.get(mode);
|
|
3398
|
+
if (sweepTargets) {
|
|
3399
|
+
let sweepSet = sweepTargets.get(target);
|
|
3400
|
+
if (!sweepSet) {
|
|
3401
|
+
sweepSet = new Map();
|
|
3402
|
+
sweepTargets.set(target, sweepSet);
|
|
3403
|
+
}
|
|
3404
|
+
sweepSet.set(entry.hash, entry);
|
|
3405
|
+
}
|
|
3406
|
+
const targets = pendingByMode.get(mode)!;
|
|
3407
|
+
let set = targets.get(target);
|
|
2655
3408
|
if (!set) {
|
|
2656
3409
|
set = new Map();
|
|
2657
|
-
|
|
3410
|
+
targets.set(target, set);
|
|
2658
3411
|
}
|
|
2659
3412
|
if (set.has(entry.hash)) {
|
|
2660
3413
|
return;
|
|
2661
3414
|
}
|
|
2662
3415
|
set.set(entry.hash, entry);
|
|
2663
3416
|
if (set.size >= this.repairSweepTargetBufferSize) {
|
|
2664
|
-
flushTarget(target);
|
|
3417
|
+
flushTarget(mode, target);
|
|
2665
3418
|
}
|
|
2666
3419
|
};
|
|
2667
3420
|
|
|
2668
3421
|
const iterator = this.entryCoordinatesIndex.iterate({});
|
|
2669
3422
|
try {
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
3423
|
+
while (!this.closed && !iterator.done()) {
|
|
3424
|
+
const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
|
|
3425
|
+
for (const entry of entries) {
|
|
3426
|
+
const entryReplicated = entry.value;
|
|
3427
|
+
const gid = entryReplicated.gid;
|
|
3428
|
+
const knownPeers = this._gidPeersHistory.get(gid);
|
|
3429
|
+
const requestedReplicas =
|
|
3430
|
+
decodeReplicas(entryReplicated).getValue(this);
|
|
3431
|
+
const currentPeers = await this.findLeaders(
|
|
3432
|
+
entryReplicated.coordinates,
|
|
3433
|
+
entryReplicated,
|
|
3434
|
+
{ roleAge: 0 },
|
|
3435
|
+
);
|
|
3436
|
+
|
|
3437
|
+
if (pendingModes.has("churn")) {
|
|
2681
3438
|
for (const [currentPeer] of currentPeers) {
|
|
2682
3439
|
if (currentPeer === this.node.identity.publicKey.hashcode()) {
|
|
2683
3440
|
continue;
|
|
2684
3441
|
}
|
|
2685
|
-
queueEntryForTarget(currentPeer, entryReplicated);
|
|
3442
|
+
queueEntryForTarget("churn", currentPeer, entryReplicated);
|
|
2686
3443
|
}
|
|
2687
3444
|
}
|
|
2688
|
-
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
3445
|
+
|
|
3446
|
+
for (const mode of pendingModes) {
|
|
3447
|
+
const modePeers = pendingPeersByMode.get(mode);
|
|
3448
|
+
if (!modePeers || modePeers.size === 0) {
|
|
3449
|
+
continue;
|
|
3450
|
+
}
|
|
3451
|
+
const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
|
|
3452
|
+
for (const peer of modePeers) {
|
|
3453
|
+
if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
|
|
3454
|
+
continue;
|
|
3455
|
+
}
|
|
3456
|
+
const wasOptimisticallyAssigned =
|
|
3457
|
+
optimisticPeers?.has(peer) === true;
|
|
3458
|
+
const isCoveredByFullReplicaRepair =
|
|
3459
|
+
mode === "join-authoritative" &&
|
|
3460
|
+
fullReplicaRepairCandidates.has(peer) &&
|
|
3461
|
+
requestedReplicas >= fullReplicaRepairCandidateCount;
|
|
3462
|
+
const shouldQueue =
|
|
3463
|
+
mode === "join-authoritative"
|
|
3464
|
+
? currentPeers.has(peer) || isCoveredByFullReplicaRepair
|
|
3465
|
+
: wasOptimisticallyAssigned ||
|
|
3466
|
+
(currentPeers.has(peer) && !knownPeers?.has(peer));
|
|
3467
|
+
if (shouldQueue) {
|
|
3468
|
+
// Authoritative join repair must not trust partial gid peer history,
|
|
3469
|
+
// otherwise a late joiner can get stuck with a partial historical
|
|
3470
|
+
// backfill forever. Once we enter the authoritative pass, queue every
|
|
3471
|
+
// entry whose current leader set still includes the added peer.
|
|
3472
|
+
queueEntryForTarget(mode, peer, entryReplicated);
|
|
2693
3473
|
}
|
|
2694
3474
|
}
|
|
2695
3475
|
}
|
|
3476
|
+
}
|
|
2696
3477
|
}
|
|
2697
3478
|
} finally {
|
|
2698
3479
|
await iterator.close();
|
|
2699
3480
|
}
|
|
2700
3481
|
|
|
2701
|
-
for (const
|
|
2702
|
-
|
|
3482
|
+
for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
|
|
3483
|
+
for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
|
|
3484
|
+
const pendingPeerCounts =
|
|
3485
|
+
this._repairSweepOptimisticGidPeersPending.get(gid);
|
|
3486
|
+
if (!pendingPeerCounts) {
|
|
3487
|
+
continue;
|
|
3488
|
+
}
|
|
3489
|
+
for (const [peer, count] of peerCounts) {
|
|
3490
|
+
const current = pendingPeerCounts.get(peer) || 0;
|
|
3491
|
+
const next = current - count;
|
|
3492
|
+
if (next > 0) {
|
|
3493
|
+
pendingPeerCounts.set(peer, next);
|
|
3494
|
+
} else {
|
|
3495
|
+
pendingPeerCounts.delete(peer);
|
|
3496
|
+
}
|
|
3497
|
+
}
|
|
3498
|
+
if (pendingPeerCounts.size === 0) {
|
|
3499
|
+
this._repairSweepOptimisticGidPeersPending.delete(gid);
|
|
3500
|
+
}
|
|
3501
|
+
}
|
|
3502
|
+
}
|
|
3503
|
+
|
|
3504
|
+
for (const mode of pendingModes) {
|
|
3505
|
+
if (mode !== "join-authoritative" && mode !== "churn") {
|
|
3506
|
+
continue;
|
|
3507
|
+
}
|
|
3508
|
+
const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
|
|
3509
|
+
const frontierTargets = this._repairFrontierByMode.get(mode);
|
|
3510
|
+
for (const target of pendingPeersByMode.get(mode) ?? []) {
|
|
3511
|
+
const replacement = nextTargets.get(target);
|
|
3512
|
+
if (mode === "join-authoritative") {
|
|
3513
|
+
// Authoritative join repair is receipt-driven: a later sweep can have a
|
|
3514
|
+
// narrower transient leader view, but it must not forget unconfirmed
|
|
3515
|
+
// hashes that were already queued for this joiner.
|
|
3516
|
+
if (replacement && replacement.size > 0) {
|
|
3517
|
+
const existing = frontierTargets?.get(target);
|
|
3518
|
+
if (existing && existing.size > 0) {
|
|
3519
|
+
for (const [hash, entry] of replacement) {
|
|
3520
|
+
existing.set(hash, entry);
|
|
3521
|
+
}
|
|
3522
|
+
} else {
|
|
3523
|
+
frontierTargets?.set(target, replacement);
|
|
3524
|
+
}
|
|
3525
|
+
}
|
|
3526
|
+
continue;
|
|
3527
|
+
}
|
|
3528
|
+
if (replacement && replacement.size > 0) {
|
|
3529
|
+
frontierTargets?.set(target, replacement);
|
|
3530
|
+
} else {
|
|
3531
|
+
frontierTargets?.delete(target);
|
|
3532
|
+
}
|
|
3533
|
+
}
|
|
3534
|
+
}
|
|
3535
|
+
|
|
3536
|
+
for (const [mode, targets] of pendingByMode) {
|
|
3537
|
+
for (const target of [...targets.keys()]) {
|
|
3538
|
+
flushTarget(mode, target);
|
|
3539
|
+
}
|
|
2703
3540
|
}
|
|
2704
3541
|
}
|
|
2705
3542
|
} catch (error: any) {
|
|
@@ -2708,11 +3545,7 @@ export class SharedLog<
|
|
|
2708
3545
|
}
|
|
2709
3546
|
} finally {
|
|
2710
3547
|
this._repairSweepRunning = false;
|
|
2711
|
-
if (
|
|
2712
|
-
!this.closed &&
|
|
2713
|
-
(this._repairSweepForceFreshPending ||
|
|
2714
|
-
this._repairSweepAddedPeersPending.size > 0)
|
|
2715
|
-
) {
|
|
3548
|
+
if (!this.closed && this._repairSweepPendingModes.size > 0) {
|
|
2716
3549
|
this._repairSweepRunning = true;
|
|
2717
3550
|
void this.runRepairSweep();
|
|
2718
3551
|
}
|
|
@@ -2725,9 +3558,89 @@ export class SharedLog<
|
|
|
2725
3558
|
entry: Entry<T> | ShallowEntry | EntryReplicated<R>;
|
|
2726
3559
|
leaders: Map<string, any>;
|
|
2727
3560
|
};
|
|
2728
|
-
}) {
|
|
2729
|
-
if (
|
|
2730
|
-
return
|
|
3561
|
+
}): Promise<boolean> {
|
|
3562
|
+
if (this.keep && (await this.keep(args.value.entry))) {
|
|
3563
|
+
return false;
|
|
3564
|
+
}
|
|
3565
|
+
void this.pruneDebouncedFn.add(args);
|
|
3566
|
+
return true;
|
|
3567
|
+
}
|
|
3568
|
+
|
|
3569
|
+
private async pruneJoinedEntriesNoLongerLed(entries: Entry<T>[]) {
|
|
3570
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3571
|
+
for (const entry of entries) {
|
|
3572
|
+
if (this.closed || this._pendingDeletes.has(entry.hash)) {
|
|
3573
|
+
continue;
|
|
3574
|
+
}
|
|
3575
|
+
|
|
3576
|
+
const leaders = await this.findLeadersFromEntry(
|
|
3577
|
+
entry,
|
|
3578
|
+
decodeReplicas(entry).getValue(this),
|
|
3579
|
+
{ roleAge: 0 },
|
|
3580
|
+
);
|
|
3581
|
+
|
|
3582
|
+
if (leaders.has(selfHash)) {
|
|
3583
|
+
this.pruneDebouncedFn.delete(entry.hash);
|
|
3584
|
+
continue;
|
|
3585
|
+
}
|
|
3586
|
+
|
|
3587
|
+
if (leaders.size === 0) {
|
|
3588
|
+
continue;
|
|
3589
|
+
}
|
|
3590
|
+
|
|
3591
|
+
await this.pruneDebouncedFnAddIfNotKeeping({
|
|
3592
|
+
key: entry.hash,
|
|
3593
|
+
value: { entry, leaders },
|
|
3594
|
+
});
|
|
3595
|
+
this.responseToPruneDebouncedFn.delete(entry.hash);
|
|
3596
|
+
}
|
|
3597
|
+
}
|
|
3598
|
+
|
|
3599
|
+
private async pruneIndexedEntriesNoLongerLed() {
|
|
3600
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3601
|
+
const iterator = this.entryCoordinatesIndex.iterate({});
|
|
3602
|
+
let enqueuedPrune = false;
|
|
3603
|
+
try {
|
|
3604
|
+
while (!this.closed && !iterator.done()) {
|
|
3605
|
+
const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
|
|
3606
|
+
for (const entry of entries) {
|
|
3607
|
+
const entryReplicated = entry.value;
|
|
3608
|
+
if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
|
|
3609
|
+
continue;
|
|
3610
|
+
}
|
|
3611
|
+
|
|
3612
|
+
const leaders = await this.findLeaders(
|
|
3613
|
+
entryReplicated.coordinates,
|
|
3614
|
+
entryReplicated,
|
|
3615
|
+
{ roleAge: 0 },
|
|
3616
|
+
);
|
|
3617
|
+
|
|
3618
|
+
if (leaders.has(selfHash)) {
|
|
3619
|
+
this.pruneDebouncedFn.delete(entryReplicated.hash);
|
|
3620
|
+
await this._pendingDeletes
|
|
3621
|
+
.get(entryReplicated.hash)
|
|
3622
|
+
?.reject(new Error("Failed to delete, is leader again"));
|
|
3623
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
3624
|
+
continue;
|
|
3625
|
+
}
|
|
3626
|
+
|
|
3627
|
+
if (leaders.size === 0) {
|
|
3628
|
+
continue;
|
|
3629
|
+
}
|
|
3630
|
+
|
|
3631
|
+
enqueuedPrune =
|
|
3632
|
+
(await this.pruneDebouncedFnAddIfNotKeeping({
|
|
3633
|
+
key: entryReplicated.hash,
|
|
3634
|
+
value: { entry: entryReplicated, leaders },
|
|
3635
|
+
})) || enqueuedPrune;
|
|
3636
|
+
this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
|
|
3637
|
+
}
|
|
3638
|
+
}
|
|
3639
|
+
} finally {
|
|
3640
|
+
await iterator.close();
|
|
3641
|
+
}
|
|
3642
|
+
if (enqueuedPrune && !this.closed) {
|
|
3643
|
+
await this.pruneDebouncedFn.flush();
|
|
2731
3644
|
}
|
|
2732
3645
|
}
|
|
2733
3646
|
|
|
@@ -2904,6 +3817,7 @@ export class SharedLog<
|
|
|
2904
3817
|
} else {
|
|
2905
3818
|
await this._appendDeliverToReplicators(
|
|
2906
3819
|
result.entry,
|
|
3820
|
+
coordinates,
|
|
2907
3821
|
minReplicasValue,
|
|
2908
3822
|
leaders,
|
|
2909
3823
|
selfHash,
|
|
@@ -2913,13 +3827,14 @@ export class SharedLog<
|
|
|
2913
3827
|
}
|
|
2914
3828
|
}
|
|
2915
3829
|
|
|
2916
|
-
|
|
3830
|
+
const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
|
|
3831
|
+
if (!isLeader && !delayAdaptiveRebalance) {
|
|
2917
3832
|
this.pruneDebouncedFnAddIfNotKeeping({
|
|
2918
3833
|
key: result.entry.hash,
|
|
2919
3834
|
value: { entry: result.entry, leaders },
|
|
2920
3835
|
});
|
|
2921
3836
|
}
|
|
2922
|
-
if (!
|
|
3837
|
+
if (!delayAdaptiveRebalance) {
|
|
2923
3838
|
this.rebalanceParticipationDebounced?.call();
|
|
2924
3839
|
}
|
|
2925
3840
|
|
|
@@ -2961,8 +3876,21 @@ export class SharedLog<
|
|
|
2961
3876
|
this._repairRetryTimers = new Set();
|
|
2962
3877
|
this._recentRepairDispatch = new Map();
|
|
2963
3878
|
this._repairSweepRunning = false;
|
|
2964
|
-
this.
|
|
2965
|
-
this.
|
|
3879
|
+
this._repairSweepPendingModes = new Set();
|
|
3880
|
+
this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
|
|
3881
|
+
this._repairFrontierByMode = createRepairFrontierByMode() as Map<
|
|
3882
|
+
RepairDispatchMode,
|
|
3883
|
+
Map<string, Map<string, EntryReplicated<R>>>
|
|
3884
|
+
>;
|
|
3885
|
+
this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
|
|
3886
|
+
this._repairSweepOptimisticGidPeersPending = new Map();
|
|
3887
|
+
this._entryKnownPeers = new Map();
|
|
3888
|
+
this._joinAuthoritativeRepairTimersByDelay = new Map();
|
|
3889
|
+
this._joinAuthoritativeRepairPeersByDelay = new Map();
|
|
3890
|
+
this._assumeSyncedRepairSuppressedUntil = 0;
|
|
3891
|
+
this._appendBackfillTimer = undefined;
|
|
3892
|
+
this._appendBackfillPendingByTarget = new Map();
|
|
3893
|
+
this._repairMetrics = createRepairMetrics();
|
|
2966
3894
|
this._topicSubscribersCache = new Map();
|
|
2967
3895
|
this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
|
|
2968
3896
|
this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
|
|
@@ -3041,7 +3969,10 @@ export class SharedLog<
|
|
|
3041
3969
|
this.pendingMaturity = new Map();
|
|
3042
3970
|
|
|
3043
3971
|
const id = sha256Base64Sync(this.log.id);
|
|
3044
|
-
const storage = await
|
|
3972
|
+
const [storage, logScope] = await Promise.all([
|
|
3973
|
+
this.node.storage.sublevel(id),
|
|
3974
|
+
this.node.indexer.scope(id),
|
|
3975
|
+
]);
|
|
3045
3976
|
|
|
3046
3977
|
const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
|
|
3047
3978
|
const fanoutService = getSharedLogFanoutService(this.node.services);
|
|
@@ -3104,20 +4035,19 @@ export class SharedLog<
|
|
|
3104
4035
|
},
|
|
3105
4036
|
});
|
|
3106
4037
|
|
|
3107
|
-
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
|
|
4038
|
+
const remoteBlocksStartPromise = this.remoteBlocks.start();
|
|
4039
|
+
const [replicationIndex, logIndex] = await Promise.all([
|
|
4040
|
+
logScope.scope("replication"),
|
|
4041
|
+
logScope.scope("log"),
|
|
4042
|
+
]);
|
|
3111
4043
|
this._replicationRangeIndex = await replicationIndex.init({
|
|
3112
4044
|
schema: this.indexableDomain.constructorRange,
|
|
3113
4045
|
});
|
|
3114
|
-
|
|
3115
4046
|
this._entryCoordinatesIndex = await replicationIndex.init({
|
|
3116
4047
|
schema: this.indexableDomain.constructorEntry,
|
|
3117
4048
|
});
|
|
3118
4049
|
|
|
3119
|
-
|
|
3120
|
-
|
|
4050
|
+
await remoteBlocksStartPromise;
|
|
3121
4051
|
const hasIndexedReplicationInfo =
|
|
3122
4052
|
(await this.replicationIndex.count({
|
|
3123
4053
|
query: [
|
|
@@ -3279,47 +4209,50 @@ export class SharedLog<
|
|
|
3279
4209
|
}
|
|
3280
4210
|
|
|
3281
4211
|
// Open for communcation
|
|
3282
|
-
await this.rpc.open({
|
|
3283
|
-
queryType: TransportMessage,
|
|
3284
|
-
responseType: TransportMessage,
|
|
3285
|
-
responseHandler: (query, context) => this.onMessage(query, context),
|
|
3286
|
-
topic: this.topic,
|
|
3287
|
-
});
|
|
3288
|
-
|
|
3289
4212
|
this._onSubscriptionFn =
|
|
3290
4213
|
this._onSubscriptionFn || this._onSubscription.bind(this);
|
|
3291
|
-
await this.node.services.pubsub.addEventListener(
|
|
3292
|
-
"subscribe",
|
|
3293
|
-
this._onSubscriptionFn,
|
|
3294
|
-
);
|
|
3295
|
-
|
|
3296
4214
|
this._onUnsubscriptionFn =
|
|
3297
4215
|
this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
|
|
3298
|
-
await
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
4216
|
+
await Promise.all([
|
|
4217
|
+
this.rpc.open({
|
|
4218
|
+
queryType: TransportMessage,
|
|
4219
|
+
responseType: TransportMessage,
|
|
4220
|
+
responseHandler: (query, context) => this.onMessage(query, context),
|
|
4221
|
+
topic: this.topic,
|
|
4222
|
+
}),
|
|
4223
|
+
this.node.services.pubsub.addEventListener(
|
|
4224
|
+
"subscribe",
|
|
4225
|
+
this._onSubscriptionFn,
|
|
4226
|
+
),
|
|
4227
|
+
this.node.services.pubsub.addEventListener(
|
|
4228
|
+
"unsubscribe",
|
|
4229
|
+
this._onUnsubscriptionFn,
|
|
4230
|
+
),
|
|
4231
|
+
]);
|
|
3305
4232
|
|
|
3306
|
-
|
|
3307
|
-
|
|
4233
|
+
const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
|
|
4234
|
+
// Mark previously-owned replication ranges as "new" only when they already exist.
|
|
4235
|
+
// Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
|
|
4236
|
+
const updateOwnedReplicationPromise = hasIndexedReplicationInfo
|
|
4237
|
+
? this.updateTimestampOfOwnedReplicationRanges()
|
|
4238
|
+
: Promise.resolve();
|
|
4239
|
+
await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
|
|
3308
4240
|
|
|
3309
4241
|
// if we had a previous session with replication info, and new replication info dictates that we unreplicate
|
|
3310
4242
|
// we should do that. Otherwise if options is a unreplication we dont need to do anything because
|
|
3311
4243
|
// we are already unreplicated (as we are just opening)
|
|
3312
4244
|
|
|
3313
|
-
|
|
4245
|
+
const isUnreplicationOptionsDefined = isUnreplicationOptions(
|
|
3314
4246
|
options?.replicate,
|
|
3315
4247
|
);
|
|
3316
4248
|
|
|
3317
4249
|
const canResumeReplication =
|
|
4250
|
+
hasIndexedReplicationInfo &&
|
|
3318
4251
|
(await isReplicationOptionsDependentOnPreviousState(
|
|
3319
4252
|
options?.replicate,
|
|
3320
4253
|
this.replicationIndex,
|
|
3321
4254
|
this.node.identity.publicKey,
|
|
3322
|
-
))
|
|
4255
|
+
));
|
|
3323
4256
|
|
|
3324
4257
|
if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
|
|
3325
4258
|
await this.replicate(options?.replicate, { checkDuplicates: true });
|
|
@@ -3372,25 +4305,26 @@ export class SharedLog<
|
|
|
3372
4305
|
|
|
3373
4306
|
async afterOpen(): Promise<void> {
|
|
3374
4307
|
await super.afterOpen();
|
|
4308
|
+
const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
|
|
3375
4309
|
|
|
3376
4310
|
// We do this here, because these calls requires this.closed == false
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
4311
|
+
void this.pruneOfflineReplicators()
|
|
4312
|
+
.then(() => {
|
|
4313
|
+
this._replicatorsReconciled = true;
|
|
4314
|
+
})
|
|
3381
4315
|
.catch((error) => {
|
|
3382
4316
|
if (isNotStartedError(error as Error)) {
|
|
3383
4317
|
return;
|
|
3384
4318
|
}
|
|
3385
|
-
|
|
3386
|
-
|
|
4319
|
+
logger.error(error);
|
|
4320
|
+
});
|
|
3387
4321
|
|
|
3388
|
-
|
|
4322
|
+
this.startReplicatorLivenessSweep();
|
|
3389
4323
|
|
|
3390
|
-
|
|
4324
|
+
await this.rebalanceParticipation();
|
|
3391
4325
|
|
|
3392
4326
|
// Take into account existing subscription
|
|
3393
|
-
(await
|
|
4327
|
+
(await existingSubscribersPromise)?.forEach((v) => {
|
|
3394
4328
|
if (v.equals(this.node.identity.publicKey)) {
|
|
3395
4329
|
return;
|
|
3396
4330
|
}
|
|
@@ -4021,8 +4955,28 @@ export class SharedLog<
|
|
|
4021
4955
|
this._repairRetryTimers.clear();
|
|
4022
4956
|
this._recentRepairDispatch.clear();
|
|
4023
4957
|
this._repairSweepRunning = false;
|
|
4024
|
-
this.
|
|
4025
|
-
this.
|
|
4958
|
+
this._repairSweepPendingModes.clear();
|
|
4959
|
+
for (const peers of this._repairSweepPendingPeersByMode.values()) {
|
|
4960
|
+
peers.clear();
|
|
4961
|
+
}
|
|
4962
|
+
this._repairSweepOptimisticGidPeersPending.clear();
|
|
4963
|
+
this._entryKnownPeers.clear();
|
|
4964
|
+
for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
|
|
4965
|
+
clearTimeout(timer);
|
|
4966
|
+
}
|
|
4967
|
+
this._joinAuthoritativeRepairTimersByDelay.clear();
|
|
4968
|
+
this._joinAuthoritativeRepairPeersByDelay.clear();
|
|
4969
|
+
for (const targets of this._repairFrontierByMode.values()) {
|
|
4970
|
+
targets.clear();
|
|
4971
|
+
}
|
|
4972
|
+
for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
|
|
4973
|
+
targets.clear();
|
|
4974
|
+
}
|
|
4975
|
+
if (this._appendBackfillTimer) {
|
|
4976
|
+
clearTimeout(this._appendBackfillTimer);
|
|
4977
|
+
this._appendBackfillTimer = undefined;
|
|
4978
|
+
}
|
|
4979
|
+
this._appendBackfillPendingByTarget.clear();
|
|
4026
4980
|
|
|
4027
4981
|
for (const [_k, v] of this._pendingDeletes) {
|
|
4028
4982
|
v.clear();
|
|
@@ -4205,6 +5159,7 @@ export class SharedLog<
|
|
|
4205
5159
|
|
|
4206
5160
|
if (heads) {
|
|
4207
5161
|
const filteredHeads: EntryWithRefs<any>[] = [];
|
|
5162
|
+
const confirmedHashes = new Set<string>();
|
|
4208
5163
|
for (const head of heads) {
|
|
4209
5164
|
if (!(await this.log.has(head.entry.hash))) {
|
|
4210
5165
|
head.entry.init({
|
|
@@ -4213,10 +5168,22 @@ export class SharedLog<
|
|
|
4213
5168
|
encoding: this.log.encoding,
|
|
4214
5169
|
});
|
|
4215
5170
|
filteredHeads.push(head);
|
|
5171
|
+
} else {
|
|
5172
|
+
confirmedHashes.add(head.entry.hash);
|
|
4216
5173
|
}
|
|
4217
5174
|
}
|
|
5175
|
+
const fromIsSelf = context.from.equals(this.node.identity.publicKey);
|
|
5176
|
+
if (!fromIsSelf) {
|
|
5177
|
+
this.markEntriesKnownByPeer(
|
|
5178
|
+
heads.map((head) => head.entry.hash),
|
|
5179
|
+
context.from.hashcode(),
|
|
5180
|
+
);
|
|
5181
|
+
}
|
|
4218
5182
|
|
|
4219
5183
|
if (filteredHeads.length === 0) {
|
|
5184
|
+
if (confirmedHashes.size > 0 && !fromIsSelf) {
|
|
5185
|
+
await this.sendRepairConfirmation(context.from!, confirmedHashes);
|
|
5186
|
+
}
|
|
4220
5187
|
return;
|
|
4221
5188
|
}
|
|
4222
5189
|
const groupedByGid = await groupByGid(filteredHeads);
|
|
@@ -4350,7 +5317,15 @@ export class SharedLog<
|
|
|
4350
5317
|
}
|
|
4351
5318
|
|
|
4352
5319
|
if (toMerge.length > 0) {
|
|
5320
|
+
this.markEntriesKnownByPeer(
|
|
5321
|
+
toMerge.map((entry) => entry.hash),
|
|
5322
|
+
context.from!.hashcode(),
|
|
5323
|
+
);
|
|
4353
5324
|
await this.log.join(toMerge);
|
|
5325
|
+
for (const merged of toMerge) {
|
|
5326
|
+
confirmedHashes.add(merged.hash);
|
|
5327
|
+
}
|
|
5328
|
+
await this.pruneJoinedEntriesNoLongerLed(toMerge);
|
|
4354
5329
|
|
|
4355
5330
|
toDelete?.map((x) =>
|
|
4356
5331
|
// TODO types
|
|
@@ -4397,6 +5372,10 @@ export class SharedLog<
|
|
|
4397
5372
|
promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
|
|
4398
5373
|
}
|
|
4399
5374
|
await Promise.all(promises);
|
|
5375
|
+
if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
|
|
5376
|
+
this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
|
|
5377
|
+
await this.sendRepairConfirmation(context.from!, confirmedHashes);
|
|
5378
|
+
}
|
|
4400
5379
|
}
|
|
4401
5380
|
} else if (msg instanceof RequestIPrune) {
|
|
4402
5381
|
const hasAndIsLeader: string[] = [];
|
|
@@ -4404,6 +5383,7 @@ export class SharedLog<
|
|
|
4404
5383
|
|
|
4405
5384
|
for (const hash of msg.hashes) {
|
|
4406
5385
|
this.removePruneRequestSent(hash, from);
|
|
5386
|
+
this.removeEntriesKnownByPeer([hash], from);
|
|
4407
5387
|
|
|
4408
5388
|
// if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
|
|
4409
5389
|
// this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
|
|
@@ -4518,6 +5498,10 @@ export class SharedLog<
|
|
|
4518
5498
|
for (const hash of msg.hashes) {
|
|
4519
5499
|
this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
|
|
4520
5500
|
}
|
|
5501
|
+
} else if (msg instanceof ConfirmEntriesMessage) {
|
|
5502
|
+
this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
|
|
5503
|
+
this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
|
|
5504
|
+
return;
|
|
4521
5505
|
} else if (await this.syncronizer.onMessage(msg, context)) {
|
|
4522
5506
|
return; // the syncronizer has handled the message
|
|
4523
5507
|
} else if (msg instanceof BlocksMessage) {
|
|
@@ -4948,6 +5932,11 @@ export class SharedLog<
|
|
|
4948
5932
|
let messageToSend: AddedReplicationSegmentMessage | undefined = undefined;
|
|
4949
5933
|
|
|
4950
5934
|
if (assumeSynced) {
|
|
5935
|
+
// `assumeSynced` is an explicit contract that this join should trust the
|
|
5936
|
+
// supplied history and avoid initiating outbound repair while the local
|
|
5937
|
+
// replication ranges settle.
|
|
5938
|
+
this._assumeSyncedRepairSuppressedUntil =
|
|
5939
|
+
Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
|
|
4951
5940
|
for (const entry of entriesToReplicate) {
|
|
4952
5941
|
await seedAssumeSyncedPeerHistory(entry);
|
|
4953
5942
|
}
|
|
@@ -5033,9 +6022,14 @@ export class SharedLog<
|
|
|
5033
6022
|
clear();
|
|
5034
6023
|
// `waitForReplicator()` is typically used as a precondition before join/replicate
|
|
5035
6024
|
// flows. A replicator can become mature and enqueue a debounced rebalance
|
|
5036
|
-
// (`replicationChangeDebounceFn`) slightly later.
|
|
5037
|
-
//
|
|
5038
|
-
|
|
6025
|
+
// (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
|
|
6026
|
+
// make membership waits depend on all rebalance work finishing; callers that
|
|
6027
|
+
// need settled distribution already wait for that explicitly.
|
|
6028
|
+
this.replicationChangeDebounceFn?.flush?.().catch((error: any) => {
|
|
6029
|
+
if (!isNotStartedError(error)) {
|
|
6030
|
+
logger.error(error?.toString?.() ?? String(error));
|
|
6031
|
+
}
|
|
6032
|
+
});
|
|
5039
6033
|
deferred.resolve();
|
|
5040
6034
|
};
|
|
5041
6035
|
|
|
@@ -5580,6 +6574,18 @@ export class SharedLog<
|
|
|
5580
6574
|
}
|
|
5581
6575
|
}
|
|
5582
6576
|
}
|
|
6577
|
+
|
|
6578
|
+
if (!options?.candidates) {
|
|
6579
|
+
const fullReplicaLeaders = await this.findFullReplicaLeaders(
|
|
6580
|
+
cursors.length,
|
|
6581
|
+
roleAge,
|
|
6582
|
+
peerFilter,
|
|
6583
|
+
);
|
|
6584
|
+
if (fullReplicaLeaders) {
|
|
6585
|
+
return fullReplicaLeaders;
|
|
6586
|
+
}
|
|
6587
|
+
}
|
|
6588
|
+
|
|
5583
6589
|
return getSamples<R>(
|
|
5584
6590
|
cursors,
|
|
5585
6591
|
this.replicationIndex,
|
|
@@ -5592,6 +6598,50 @@ export class SharedLog<
|
|
|
5592
6598
|
);
|
|
5593
6599
|
}
|
|
5594
6600
|
|
|
6601
|
+
private async findFullReplicaLeaders(
|
|
6602
|
+
replicas: number,
|
|
6603
|
+
roleAge: number,
|
|
6604
|
+
peerFilter?: Set<string>,
|
|
6605
|
+
): Promise<Map<string, { intersecting: boolean }> | undefined> {
|
|
6606
|
+
const now = Date.now();
|
|
6607
|
+
const leaders = new Map<string, { intersecting: boolean }>();
|
|
6608
|
+
const includeStrict =
|
|
6609
|
+
this._logProperties?.strictFullReplicaFallback !== false;
|
|
6610
|
+
const iterator = this.replicationIndex.iterate(
|
|
6611
|
+
{},
|
|
6612
|
+
{ shape: { hash: true, timestamp: true, mode: true } },
|
|
6613
|
+
);
|
|
6614
|
+
|
|
6615
|
+
try {
|
|
6616
|
+
for (;;) {
|
|
6617
|
+
const batch = await iterator.next(64);
|
|
6618
|
+
if (batch.length === 0) {
|
|
6619
|
+
break;
|
|
6620
|
+
}
|
|
6621
|
+
for (const result of batch) {
|
|
6622
|
+
const range = result.value;
|
|
6623
|
+
if (peerFilter && !peerFilter.has(range.hash)) {
|
|
6624
|
+
continue;
|
|
6625
|
+
}
|
|
6626
|
+
if (!isMatured(range, now, roleAge)) {
|
|
6627
|
+
continue;
|
|
6628
|
+
}
|
|
6629
|
+
if (range.mode === ReplicationIntent.Strict && !includeStrict) {
|
|
6630
|
+
continue;
|
|
6631
|
+
}
|
|
6632
|
+
leaders.set(range.hash, { intersecting: true });
|
|
6633
|
+
if (leaders.size > replicas) {
|
|
6634
|
+
return undefined;
|
|
6635
|
+
}
|
|
6636
|
+
}
|
|
6637
|
+
}
|
|
6638
|
+
} finally {
|
|
6639
|
+
await iterator.close();
|
|
6640
|
+
}
|
|
6641
|
+
|
|
6642
|
+
return leaders.size > 0 ? leaders : undefined;
|
|
6643
|
+
}
|
|
6644
|
+
|
|
5595
6645
|
async findLeadersFromEntry(
|
|
5596
6646
|
entry: ShallowOrFullEntry<any> | EntryReplicated<R>,
|
|
5597
6647
|
replicas: number,
|
|
@@ -6231,16 +7281,33 @@ export class SharedLog<
|
|
|
6231
7281
|
|
|
6232
7282
|
const changed = false;
|
|
6233
7283
|
const addedPeers = new Set<string>();
|
|
7284
|
+
const authoritativeRepairPeers = new Set<string>();
|
|
6234
7285
|
const warmupPeers = new Set<string>();
|
|
7286
|
+
const churnRepairPeers = new Set<string>();
|
|
6235
7287
|
const hasSelfWarmupChange = changes.some(
|
|
6236
7288
|
(change) =>
|
|
6237
7289
|
change.range.hash === selfHash &&
|
|
6238
7290
|
(change.type === "added" || change.type === "replaced"),
|
|
6239
7291
|
);
|
|
7292
|
+
const hasSelfRangeRemoval = changes.some(
|
|
7293
|
+
(change) =>
|
|
7294
|
+
change.range.hash === selfHash &&
|
|
7295
|
+
(change.type === "removed" || change.type === "replaced"),
|
|
7296
|
+
);
|
|
6240
7297
|
for (const change of changes) {
|
|
7298
|
+
if (
|
|
7299
|
+
change.range.hash !== selfHash &&
|
|
7300
|
+
(change.type === "removed" || change.type === "replaced")
|
|
7301
|
+
) {
|
|
7302
|
+
this.removePeerFromEntryKnownPeers(change.range.hash);
|
|
7303
|
+
}
|
|
6241
7304
|
if (change.type === "added" || change.type === "replaced") {
|
|
6242
7305
|
const hash = change.range.hash;
|
|
6243
7306
|
if (hash !== selfHash) {
|
|
7307
|
+
// Existing peers can widen/shift ranges after the initial join. If we
|
|
7308
|
+
// only rescan on first-seen "added", late authoritative range updates can
|
|
7309
|
+
// leave historical backfill permanently partial under load.
|
|
7310
|
+
authoritativeRepairPeers.add(hash);
|
|
6244
7311
|
// Range updates can reassign entries to an existing peer shortly after it
|
|
6245
7312
|
// already received a subset. Avoid suppressing legitimate follow-up repair.
|
|
6246
7313
|
this._recentRepairDispatch.delete(hash);
|
|
@@ -6277,26 +7344,34 @@ export class SharedLog<
|
|
|
6277
7344
|
string,
|
|
6278
7345
|
Map<string, EntryReplicated<any>>
|
|
6279
7346
|
> = new Map();
|
|
6280
|
-
|
|
6281
|
-
|
|
6282
|
-
|
|
6283
|
-
|
|
6284
|
-
|
|
7347
|
+
const flushUncheckedDeliverTarget = (target: string) => {
|
|
7348
|
+
const entries = uncheckedDeliver.get(target);
|
|
7349
|
+
if (!entries || entries.size === 0) {
|
|
7350
|
+
return;
|
|
7351
|
+
}
|
|
6285
7352
|
const isWarmupTarget = warmupPeers.has(target);
|
|
6286
|
-
const
|
|
7353
|
+
const mode: RepairDispatchMode = forceFreshDelivery
|
|
7354
|
+
? "churn"
|
|
7355
|
+
: isWarmupTarget
|
|
7356
|
+
? "join-warmup"
|
|
7357
|
+
: "join-authoritative";
|
|
6287
7358
|
this.dispatchMaybeMissingEntries(target, entries, {
|
|
6288
|
-
bypassRecentDedupe,
|
|
6289
|
-
|
|
6290
|
-
|
|
6291
|
-
|
|
6292
|
-
|
|
7359
|
+
bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
|
|
7360
|
+
mode,
|
|
7361
|
+
retryScheduleMs:
|
|
7362
|
+
mode === "join-warmup"
|
|
7363
|
+
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
7364
|
+
: mode === "join-authoritative"
|
|
7365
|
+
? [0]
|
|
7366
|
+
: undefined,
|
|
6293
7367
|
});
|
|
6294
|
-
|
|
6295
|
-
|
|
7368
|
+
uncheckedDeliver.delete(target);
|
|
7369
|
+
};
|
|
6296
7370
|
const queueUncheckedDeliver = (
|
|
6297
7371
|
target: string,
|
|
6298
7372
|
entry: EntryReplicated<any>,
|
|
6299
7373
|
) => {
|
|
7374
|
+
churnRepairPeers.add(target);
|
|
6300
7375
|
let set = uncheckedDeliver.get(target);
|
|
6301
7376
|
if (!set) {
|
|
6302
7377
|
set = new Map();
|
|
@@ -6320,74 +7395,85 @@ export class SharedLog<
|
|
|
6320
7395
|
forceFresh: forceFreshDelivery || useJoinWarmupFastPath,
|
|
6321
7396
|
},
|
|
6322
7397
|
)) {
|
|
6323
|
-
|
|
6324
|
-
|
|
6325
|
-
}
|
|
6326
|
-
|
|
6327
|
-
if (useJoinWarmupFastPath) {
|
|
6328
|
-
let oldPeersSet: Set<string> | undefined;
|
|
6329
|
-
const gid = entryReplicated.gid;
|
|
6330
|
-
oldPeersSet = gidPeersHistorySnapshot.get(gid);
|
|
6331
|
-
if (!gidPeersHistorySnapshot.has(gid)) {
|
|
6332
|
-
const existing = this._gidPeersHistory.get(gid);
|
|
6333
|
-
oldPeersSet = existing ? new Set(existing) : undefined;
|
|
6334
|
-
gidPeersHistorySnapshot.set(gid, oldPeersSet);
|
|
7398
|
+
if (this.closed) {
|
|
7399
|
+
break;
|
|
6335
7400
|
}
|
|
6336
7401
|
|
|
6337
|
-
|
|
6338
|
-
|
|
6339
|
-
|
|
7402
|
+
if (useJoinWarmupFastPath) {
|
|
7403
|
+
let oldPeersSet: Set<string> | undefined;
|
|
7404
|
+
const gid = entryReplicated.gid;
|
|
7405
|
+
oldPeersSet = gidPeersHistorySnapshot.get(gid);
|
|
7406
|
+
if (!gidPeersHistorySnapshot.has(gid)) {
|
|
7407
|
+
const existing = this._gidPeersHistory.get(gid);
|
|
7408
|
+
oldPeersSet = existing ? new Set(existing) : undefined;
|
|
7409
|
+
gidPeersHistorySnapshot.set(gid, oldPeersSet);
|
|
7410
|
+
}
|
|
6340
7411
|
|
|
6341
|
-
|
|
6342
|
-
|
|
6343
|
-
candidatePeers.add(target);
|
|
6344
|
-
}
|
|
6345
|
-
if (oldPeersSet) {
|
|
6346
|
-
for (const oldPeer of oldPeersSet) {
|
|
6347
|
-
candidatePeers.add(oldPeer);
|
|
7412
|
+
for (const target of warmupPeers) {
|
|
7413
|
+
queueUncheckedDeliver(target, entryReplicated);
|
|
6348
7414
|
}
|
|
6349
|
-
}
|
|
6350
7415
|
|
|
6351
|
-
|
|
6352
|
-
|
|
6353
|
-
|
|
6354
|
-
|
|
6355
|
-
|
|
6356
|
-
|
|
6357
|
-
|
|
6358
|
-
|
|
6359
|
-
|
|
7416
|
+
const candidatePeers = new Set<string>([selfHash]);
|
|
7417
|
+
for (const target of warmupPeers) {
|
|
7418
|
+
candidatePeers.add(target);
|
|
7419
|
+
}
|
|
7420
|
+
if (oldPeersSet) {
|
|
7421
|
+
for (const oldPeer of oldPeersSet) {
|
|
7422
|
+
candidatePeers.add(oldPeer);
|
|
7423
|
+
}
|
|
7424
|
+
}
|
|
6360
7425
|
|
|
6361
|
-
|
|
6362
|
-
|
|
6363
|
-
|
|
6364
|
-
|
|
7426
|
+
const currentPeers = await this.findLeaders(
|
|
7427
|
+
entryReplicated.coordinates,
|
|
7428
|
+
entryReplicated,
|
|
7429
|
+
{
|
|
7430
|
+
roleAge: 0,
|
|
7431
|
+
candidates: candidatePeers,
|
|
7432
|
+
persist: false,
|
|
7433
|
+
},
|
|
7434
|
+
);
|
|
7435
|
+
|
|
7436
|
+
if (oldPeersSet) {
|
|
7437
|
+
for (const oldPeer of oldPeersSet) {
|
|
7438
|
+
if (!currentPeers.has(oldPeer)) {
|
|
7439
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
7440
|
+
}
|
|
6365
7441
|
}
|
|
6366
7442
|
}
|
|
6367
|
-
}
|
|
6368
7443
|
|
|
6369
|
-
|
|
6370
|
-
|
|
6371
|
-
|
|
6372
|
-
|
|
6373
|
-
|
|
7444
|
+
for (const [peer] of currentPeers) {
|
|
7445
|
+
if (warmupPeers.has(peer)) {
|
|
7446
|
+
this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
|
|
7447
|
+
}
|
|
7448
|
+
}
|
|
6374
7449
|
|
|
6375
|
-
|
|
6376
|
-
|
|
6377
|
-
|
|
6378
|
-
|
|
6379
|
-
|
|
7450
|
+
const authoritativePeers = [...currentPeers.keys()].filter(
|
|
7451
|
+
(peer) =>
|
|
7452
|
+
!warmupPeers.has(peer) &&
|
|
7453
|
+
!this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
|
|
7454
|
+
);
|
|
7455
|
+
this.addPeersToGidPeerHistory(
|
|
7456
|
+
entryReplicated.gid,
|
|
7457
|
+
authoritativePeers,
|
|
7458
|
+
true,
|
|
7459
|
+
);
|
|
6380
7460
|
|
|
6381
|
-
|
|
6382
|
-
|
|
6383
|
-
|
|
6384
|
-
|
|
6385
|
-
|
|
6386
|
-
|
|
6387
|
-
|
|
7461
|
+
if (!currentPeers.has(selfHash)) {
|
|
7462
|
+
this.pruneDebouncedFnAddIfNotKeeping({
|
|
7463
|
+
key: entryReplicated.hash,
|
|
7464
|
+
value: { entry: entryReplicated, leaders: currentPeers },
|
|
7465
|
+
});
|
|
7466
|
+
|
|
7467
|
+
this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
|
|
7468
|
+
} else {
|
|
7469
|
+
this.pruneDebouncedFn.delete(entryReplicated.hash);
|
|
7470
|
+
await this._pendingDeletes
|
|
7471
|
+
.get(entryReplicated.hash)
|
|
7472
|
+
?.reject(new Error("Failed to delete, is leader again"));
|
|
7473
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
7474
|
+
}
|
|
7475
|
+
continue;
|
|
6388
7476
|
}
|
|
6389
|
-
continue;
|
|
6390
|
-
}
|
|
6391
7477
|
|
|
6392
7478
|
let oldPeersSet: Set<string> | undefined;
|
|
6393
7479
|
const gid = entryReplicated.gid;
|
|
@@ -6421,19 +7507,30 @@ export class SharedLog<
|
|
|
6421
7507
|
}
|
|
6422
7508
|
}
|
|
6423
7509
|
|
|
6424
|
-
|
|
6425
|
-
|
|
6426
|
-
|
|
6427
|
-
|
|
7510
|
+
if (oldPeersSet) {
|
|
7511
|
+
for (const oldPeer of oldPeersSet) {
|
|
7512
|
+
if (!currentPeers.has(oldPeer)) {
|
|
7513
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
7514
|
+
}
|
|
6428
7515
|
}
|
|
6429
7516
|
}
|
|
6430
|
-
}
|
|
6431
7517
|
|
|
6432
|
-
|
|
6433
|
-
|
|
6434
|
-
|
|
6435
|
-
|
|
6436
|
-
|
|
7518
|
+
for (const [peer] of currentPeers) {
|
|
7519
|
+
if (addedPeers.has(peer)) {
|
|
7520
|
+
this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
|
|
7521
|
+
}
|
|
7522
|
+
}
|
|
7523
|
+
|
|
7524
|
+
const authoritativePeers = [...currentPeers.keys()].filter(
|
|
7525
|
+
(peer) =>
|
|
7526
|
+
!addedPeers.has(peer) &&
|
|
7527
|
+
!this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
|
|
7528
|
+
);
|
|
7529
|
+
this.addPeersToGidPeerHistory(
|
|
7530
|
+
entryReplicated.gid,
|
|
7531
|
+
authoritativePeers,
|
|
7532
|
+
true,
|
|
7533
|
+
);
|
|
6437
7534
|
|
|
6438
7535
|
if (!isLeader) {
|
|
6439
7536
|
this.pruneDebouncedFnAddIfNotKeeping({
|
|
@@ -6452,9 +7549,18 @@ export class SharedLog<
|
|
|
6452
7549
|
}
|
|
6453
7550
|
}
|
|
6454
7551
|
|
|
7552
|
+
if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
|
|
7553
|
+
await this.pruneIndexedEntriesNoLongerLed();
|
|
7554
|
+
}
|
|
7555
|
+
|
|
6455
7556
|
if (forceFreshDelivery) {
|
|
6456
|
-
//
|
|
6457
|
-
|
|
7557
|
+
// Pure leave/shrink churn can have zero `addedPeers`, but the peers that
|
|
7558
|
+
// received redistributed entries still need a follow-up repair pass if the
|
|
7559
|
+
// immediate maybe-sync misses one entry.
|
|
7560
|
+
this.scheduleRepairSweep({
|
|
7561
|
+
mode: "churn",
|
|
7562
|
+
peers: churnRepairPeers,
|
|
7563
|
+
});
|
|
6458
7564
|
} else if (useJoinWarmupFastPath) {
|
|
6459
7565
|
// Pure join warmup uses the cheap immediate maybe-missing dispatch above,
|
|
6460
7566
|
// then defers the authoritative sweep so it does not compete with the
|
|
@@ -6466,19 +7572,23 @@ export class SharedLog<
|
|
|
6466
7572
|
return;
|
|
6467
7573
|
}
|
|
6468
7574
|
this.scheduleRepairSweep({
|
|
6469
|
-
|
|
6470
|
-
|
|
7575
|
+
mode: "join-warmup",
|
|
7576
|
+
peers,
|
|
6471
7577
|
});
|
|
6472
7578
|
}, 250);
|
|
6473
7579
|
timer.unref?.();
|
|
6474
7580
|
this._repairRetryTimers.add(timer);
|
|
6475
|
-
} else if (
|
|
7581
|
+
} else if (authoritativeRepairPeers.size > 0) {
|
|
6476
7582
|
this.scheduleRepairSweep({
|
|
6477
|
-
|
|
6478
|
-
|
|
7583
|
+
mode: "join-authoritative",
|
|
7584
|
+
peers: authoritativeRepairPeers,
|
|
6479
7585
|
});
|
|
6480
7586
|
}
|
|
6481
7587
|
|
|
7588
|
+
if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
|
|
7589
|
+
this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
|
|
7590
|
+
}
|
|
7591
|
+
|
|
6482
7592
|
for (const target of [...uncheckedDeliver.keys()]) {
|
|
6483
7593
|
flushUncheckedDeliverTarget(target);
|
|
6484
7594
|
}
|
|
@@ -6585,6 +7695,13 @@ export class SharedLog<
|
|
|
6585
7695
|
return; // not allowed to replicate
|
|
6586
7696
|
}
|
|
6587
7697
|
|
|
7698
|
+
if (
|
|
7699
|
+
this.replicationController.maxMemoryLimit != null &&
|
|
7700
|
+
usedMemory > this.replicationController.maxMemoryLimit
|
|
7701
|
+
) {
|
|
7702
|
+
await this.pruneIndexedEntriesNoLongerLed();
|
|
7703
|
+
}
|
|
7704
|
+
|
|
6588
7705
|
const peersSize = (await peers.getSize()) || 1;
|
|
6589
7706
|
const totalParticipation = await this.calculateTotalParticipation();
|
|
6590
7707
|
|