@peerbit/shared-log 13.1.0 → 13.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/join-backfill-repair.d.ts +2 -0
- package/dist/benchmark/join-backfill-repair.d.ts.map +1 -0
- package/dist/benchmark/join-backfill-repair.js +288 -0
- package/dist/benchmark/join-backfill-repair.js.map +1 -0
- package/dist/src/exchange-heads.d.ts +1 -0
- package/dist/src/exchange-heads.d.ts.map +1 -1
- package/dist/src/exchange-heads.js +2 -0
- package/dist/src/exchange-heads.js.map +1 -1
- package/dist/src/index.d.ts +38 -2
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1011 -123
- package/dist/src/index.js.map +1 -1
- package/dist/src/pid.d.ts.map +1 -1
- package/dist/src/pid.js +40 -9
- package/dist/src/pid.js.map +1 -1
- package/dist/src/ranges.d.ts.map +1 -1
- package/dist/src/ranges.js +8 -1
- package/dist/src/ranges.js.map +1 -1
- package/dist/src/sync/simple.d.ts +7 -0
- package/dist/src/sync/simple.d.ts.map +1 -1
- package/dist/src/sync/simple.js +71 -22
- package/dist/src/sync/simple.js.map +1 -1
- package/package.json +7 -6
- package/src/exchange-heads.ts +3 -0
- package/src/index.ts +1376 -233
- package/src/pid.ts +56 -9
- package/src/ranges.ts +9 -1
- package/src/sync/simple.ts +56 -23
package/src/index.ts
CHANGED
|
@@ -88,6 +88,7 @@ const getSharedLogFanoutService = (
|
|
|
88
88
|
): FanoutTree | undefined =>
|
|
89
89
|
(services as SharedLogServicesWithFanout).fanout;
|
|
90
90
|
import {
|
|
91
|
+
EXCHANGE_HEADS_REPAIR_HINT,
|
|
91
92
|
EntryWithRefs,
|
|
92
93
|
ExchangeHeadsMessage,
|
|
93
94
|
RequestIPrune,
|
|
@@ -168,7 +169,7 @@ import type {
|
|
|
168
169
|
Syncronizer,
|
|
169
170
|
} from "./sync/index.js";
|
|
170
171
|
import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
|
|
171
|
-
import { SimpleSyncronizer } from "./sync/simple.js";
|
|
172
|
+
import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
|
|
172
173
|
import { groupByGid } from "./utils.js";
|
|
173
174
|
|
|
174
175
|
const toLocalPublicSignKey = (
|
|
@@ -468,6 +469,7 @@ export type SharedLogOptions<
|
|
|
468
469
|
waitForReplicatorRequestMaxAttempts?: number;
|
|
469
470
|
waitForPruneDelay?: number;
|
|
470
471
|
distributionDebounceTime?: number;
|
|
472
|
+
strictFullReplicaFallback?: boolean;
|
|
471
473
|
compatibility?: number;
|
|
472
474
|
domain?: ReplicationDomainConstructor<D>;
|
|
473
475
|
eagerBlocks?: boolean | { cacheSize?: number };
|
|
@@ -513,10 +515,143 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
|
|
|
513
515
|
// Churn/join repair can race with pruning and transient missed sync requests under
|
|
514
516
|
// heavy event-loop load. Keep retries alive with a longer tail so reassigned
|
|
515
517
|
// entries are retried after short bursts and slower recovery windows.
|
|
516
|
-
const
|
|
518
|
+
const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
|
|
517
519
|
0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
|
|
518
520
|
];
|
|
519
|
-
const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
|
|
521
|
+
const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
|
|
522
|
+
0,
|
|
523
|
+
1_000,
|
|
524
|
+
3_000,
|
|
525
|
+
7_000,
|
|
526
|
+
15_000,
|
|
527
|
+
30_000,
|
|
528
|
+
60_000,
|
|
529
|
+
];
|
|
530
|
+
const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
|
|
531
|
+
0,
|
|
532
|
+
1_000,
|
|
533
|
+
3_000,
|
|
534
|
+
7_000,
|
|
535
|
+
15_000,
|
|
536
|
+
30_000,
|
|
537
|
+
60_000,
|
|
538
|
+
];
|
|
539
|
+
const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
|
|
540
|
+
const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
|
|
541
|
+
const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
|
|
542
|
+
JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
|
|
543
|
+
7_000,
|
|
544
|
+
15_000,
|
|
545
|
+
30_000,
|
|
546
|
+
];
|
|
547
|
+
const APPEND_BACKFILL_DELAY_MS = 500;
|
|
548
|
+
const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
|
|
549
|
+
const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
|
|
550
|
+
|
|
551
|
+
type RepairDispatchMode =
|
|
552
|
+
| "join-warmup"
|
|
553
|
+
| "join-authoritative"
|
|
554
|
+
| "append-backfill"
|
|
555
|
+
| "churn";
|
|
556
|
+
type RepairTransportMode = "rateless" | "simple";
|
|
557
|
+
type RepairMetricBucket = {
|
|
558
|
+
dispatches: number;
|
|
559
|
+
entries: number;
|
|
560
|
+
ratelessFirstPasses: number;
|
|
561
|
+
simpleFallbackPasses: number;
|
|
562
|
+
};
|
|
563
|
+
type RepairMetrics = Record<RepairDispatchMode, RepairMetricBucket>;
|
|
564
|
+
|
|
565
|
+
const REPAIR_DISPATCH_MODES: RepairDispatchMode[] = [
|
|
566
|
+
"join-warmup",
|
|
567
|
+
"join-authoritative",
|
|
568
|
+
"append-backfill",
|
|
569
|
+
"churn",
|
|
570
|
+
];
|
|
571
|
+
|
|
572
|
+
const createRepairMetricBucket = (): RepairMetricBucket => ({
|
|
573
|
+
dispatches: 0,
|
|
574
|
+
entries: 0,
|
|
575
|
+
ratelessFirstPasses: 0,
|
|
576
|
+
simpleFallbackPasses: 0,
|
|
577
|
+
});
|
|
578
|
+
|
|
579
|
+
const createRepairMetrics = (): RepairMetrics => ({
|
|
580
|
+
"join-warmup": createRepairMetricBucket(),
|
|
581
|
+
"join-authoritative": createRepairMetricBucket(),
|
|
582
|
+
"append-backfill": createRepairMetricBucket(),
|
|
583
|
+
churn: createRepairMetricBucket(),
|
|
584
|
+
});
|
|
585
|
+
|
|
586
|
+
const createRepairPendingPeersByMode = () =>
|
|
587
|
+
new Map<RepairDispatchMode, Set<string>>(
|
|
588
|
+
REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set<string>()]),
|
|
589
|
+
);
|
|
590
|
+
|
|
591
|
+
const cloneRepairPendingPeersByMode = (
|
|
592
|
+
pending: Map<RepairDispatchMode, Set<string>>,
|
|
593
|
+
) =>
|
|
594
|
+
new Map<RepairDispatchMode, Set<string>>(
|
|
595
|
+
REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]),
|
|
596
|
+
);
|
|
597
|
+
|
|
598
|
+
const createRepairFrontierByMode = () =>
|
|
599
|
+
new Map<
|
|
600
|
+
RepairDispatchMode,
|
|
601
|
+
Map<string, Map<string, EntryReplicated<any>>>
|
|
602
|
+
>(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
|
|
603
|
+
|
|
604
|
+
const createRepairActiveTargetsByMode = () =>
|
|
605
|
+
new Map<RepairDispatchMode, Set<string>>(
|
|
606
|
+
REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]),
|
|
607
|
+
);
|
|
608
|
+
|
|
609
|
+
const getRepairRetrySchedule = (mode: RepairDispatchMode) => {
|
|
610
|
+
switch (mode) {
|
|
611
|
+
case "join-warmup":
|
|
612
|
+
return JOIN_WARMUP_RETRY_SCHEDULE_MS;
|
|
613
|
+
case "join-authoritative":
|
|
614
|
+
return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
|
|
615
|
+
case "append-backfill":
|
|
616
|
+
return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
|
|
617
|
+
case "churn":
|
|
618
|
+
return CHURN_REPAIR_RETRY_SCHEDULE_MS;
|
|
619
|
+
}
|
|
620
|
+
};
|
|
621
|
+
|
|
622
|
+
const resolveRepairRetrySchedule = (
|
|
623
|
+
mode: RepairDispatchMode,
|
|
624
|
+
override?: number[],
|
|
625
|
+
trackedFrontier = false,
|
|
626
|
+
) => {
|
|
627
|
+
const fallback = getRepairRetrySchedule(mode);
|
|
628
|
+
if (!override || override.length === 0) {
|
|
629
|
+
return fallback;
|
|
630
|
+
}
|
|
631
|
+
if (
|
|
632
|
+
trackedFrontier &&
|
|
633
|
+
override.length === 1 &&
|
|
634
|
+
override[0] === 0 &&
|
|
635
|
+
fallback.length > 1
|
|
636
|
+
) {
|
|
637
|
+
// A tracked frontier with only an immediate retry would otherwise stay on
|
|
638
|
+
// attempt 0 forever, which means rateless-only retries and no sparse-tail
|
|
639
|
+
// simple fallback. Keep the immediate seed, then continue with the normal
|
|
640
|
+
// tracked repair schedule.
|
|
641
|
+
return [0, ...fallback.slice(1)];
|
|
642
|
+
}
|
|
643
|
+
return override;
|
|
644
|
+
};
|
|
645
|
+
|
|
646
|
+
const getRepairTransportForAttempt = (
|
|
647
|
+
mode: RepairDispatchMode,
|
|
648
|
+
attemptIndex: number,
|
|
649
|
+
): RepairTransportMode => {
|
|
650
|
+
if (mode === "churn") {
|
|
651
|
+
return "simple";
|
|
652
|
+
}
|
|
653
|
+
return attemptIndex === 0 ? "rateless" : "simple";
|
|
654
|
+
};
|
|
520
655
|
|
|
521
656
|
const toPositiveInteger = (
|
|
522
657
|
value: number | undefined,
|
|
@@ -751,8 +886,24 @@ export class SharedLog<
|
|
|
751
886
|
private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
|
|
752
887
|
private _recentRepairDispatch!: Map<string, Map<string, number>>;
|
|
753
888
|
private _repairSweepRunning!: boolean;
|
|
754
|
-
private
|
|
755
|
-
private
|
|
889
|
+
private _repairSweepPendingModes!: Set<RepairDispatchMode>;
|
|
890
|
+
private _repairSweepPendingPeersByMode!: Map<RepairDispatchMode, Set<string>>;
|
|
891
|
+
private _repairFrontierByMode!: Map<
|
|
892
|
+
RepairDispatchMode,
|
|
893
|
+
Map<string, Map<string, EntryReplicated<R>>>
|
|
894
|
+
>;
|
|
895
|
+
private _repairFrontierActiveTargetsByMode!: Map<RepairDispatchMode, Set<string>>;
|
|
896
|
+
private _repairSweepOptimisticGidPeersPending!: Map<string, Map<string, number>>;
|
|
897
|
+
private _entryKnownPeers!: Map<string, Set<string>>;
|
|
898
|
+
private _joinAuthoritativeRepairTimersByDelay!: Map<
|
|
899
|
+
number,
|
|
900
|
+
ReturnType<typeof setTimeout>
|
|
901
|
+
>;
|
|
902
|
+
private _joinAuthoritativeRepairPeersByDelay!: Map<number, Set<string>>;
|
|
903
|
+
private _assumeSyncedRepairSuppressedUntil!: number;
|
|
904
|
+
private _appendBackfillTimer?: ReturnType<typeof setTimeout>;
|
|
905
|
+
private _appendBackfillPendingByTarget!: Map<string, Map<string, EntryReplicated<R>>>;
|
|
906
|
+
private _repairMetrics!: RepairMetrics;
|
|
756
907
|
private _topicSubscribersCache!: Map<
|
|
757
908
|
string,
|
|
758
909
|
{ expiresAt: number; keys: PublicSignKey[] }
|
|
@@ -1187,6 +1338,7 @@ export class SharedLog<
|
|
|
1187
1338
|
|
|
1188
1339
|
private async _appendDeliverToReplicators(
|
|
1189
1340
|
entry: Entry<T>,
|
|
1341
|
+
coordinates: NumberFromType<R>[],
|
|
1190
1342
|
minReplicasValue: number,
|
|
1191
1343
|
leaders: Map<string, any>,
|
|
1192
1344
|
selfHash: string,
|
|
@@ -1204,11 +1356,35 @@ export class SharedLog<
|
|
|
1204
1356
|
? { timeoutMs: delivery.timeout, signal: delivery.signal }
|
|
1205
1357
|
: undefined;
|
|
1206
1358
|
|
|
1359
|
+
const fullReplicaDeliveryCandidates =
|
|
1360
|
+
await this.getFullReplicaRepairCandidates(undefined, {
|
|
1361
|
+
includeSubscribers: false,
|
|
1362
|
+
});
|
|
1363
|
+
if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
|
|
1364
|
+
for (const peer of fullReplicaDeliveryCandidates) {
|
|
1365
|
+
if (!leaders.has(peer)) {
|
|
1366
|
+
leaders.set(peer, { intersecting: true });
|
|
1367
|
+
}
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
|
|
1372
|
+
entry,
|
|
1373
|
+
coordinates,
|
|
1374
|
+
leaders: leaders as Map<string, { intersecting: boolean }>,
|
|
1375
|
+
replicas: minReplicasValue,
|
|
1376
|
+
});
|
|
1207
1377
|
for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
|
|
1208
1378
|
await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
|
|
1209
|
-
const
|
|
1379
|
+
const authoritativeRecipients = new Set(leaders.keys());
|
|
1380
|
+
const leadersForDelivery = delivery
|
|
1381
|
+
? new Set(authoritativeRecipients)
|
|
1382
|
+
: undefined;
|
|
1210
1383
|
|
|
1211
|
-
|
|
1384
|
+
// Outbound append delivery only tells us who we intend to send to, not who has
|
|
1385
|
+
// actually stored the entry. Keep this recipient set local so later repair
|
|
1386
|
+
// sweeps can still backfill peers that missed the initial delivery.
|
|
1387
|
+
const set = new Set(leaders.keys());
|
|
1212
1388
|
let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
|
|
1213
1389
|
const allowSubscriberFallback =
|
|
1214
1390
|
this.syncronizer instanceof SimpleSyncronizer ||
|
|
@@ -1239,6 +1415,17 @@ export class SharedLog<
|
|
|
1239
1415
|
}
|
|
1240
1416
|
|
|
1241
1417
|
if (!delivery) {
|
|
1418
|
+
for (const peer of authoritativeRecipients) {
|
|
1419
|
+
if (peer === selfHash) {
|
|
1420
|
+
continue;
|
|
1421
|
+
}
|
|
1422
|
+
// Default live append delivery is still optimistic. If one remote misses
|
|
1423
|
+
// the initial heads exchange and the caller did not opt into explicit
|
|
1424
|
+
// delivery acks, we still need a targeted backfill source of truth for the
|
|
1425
|
+
// authoritative recipients or one entry can get stuck at 2/3 replicas
|
|
1426
|
+
// forever. Best-effort fallback subscribers are not repair-worthy.
|
|
1427
|
+
this.queueAppendBackfill(peer, entryReplicatedForRepair);
|
|
1428
|
+
}
|
|
1242
1429
|
this.rpc
|
|
1243
1430
|
.send(message, {
|
|
1244
1431
|
mode: isLeader
|
|
@@ -1268,6 +1455,7 @@ export class SharedLog<
|
|
|
1268
1455
|
|
|
1269
1456
|
const ackTo: string[] = [];
|
|
1270
1457
|
let silentTo: string[] | undefined;
|
|
1458
|
+
const repairTargets = new Set<string>();
|
|
1271
1459
|
// Default delivery semantics: require enough remote ACKs to reach the requested
|
|
1272
1460
|
// replication degree (local append counts as 1).
|
|
1273
1461
|
const defaultMinAcks = Math.max(0, minReplicasValue - 1);
|
|
@@ -1279,6 +1467,9 @@ export class SharedLog<
|
|
|
1279
1467
|
);
|
|
1280
1468
|
|
|
1281
1469
|
for (const peer of orderedRemoteRecipients) {
|
|
1470
|
+
if (authoritativeRecipients.has(peer)) {
|
|
1471
|
+
repairTargets.add(peer);
|
|
1472
|
+
}
|
|
1282
1473
|
if (ackTo.length < ackLimit) {
|
|
1283
1474
|
ackTo.push(peer);
|
|
1284
1475
|
} else {
|
|
@@ -1317,6 +1508,12 @@ export class SharedLog<
|
|
|
1317
1508
|
})
|
|
1318
1509
|
.catch((error) => logger.error(error));
|
|
1319
1510
|
}
|
|
1511
|
+
for (const peer of repairTargets) {
|
|
1512
|
+
// Direct append delivery is intentionally optimistic. Queue one delayed,
|
|
1513
|
+
// batched maybe-sync pass for the intended recipients so stable 3-peer
|
|
1514
|
+
// append workloads do not depend on perfect first-try delivery ordering.
|
|
1515
|
+
this.queueAppendBackfill(peer, entryReplicatedForRepair);
|
|
1516
|
+
}
|
|
1320
1517
|
}
|
|
1321
1518
|
|
|
1322
1519
|
if (pending.length > 0) {
|
|
@@ -2016,6 +2213,7 @@ export class SharedLog<
|
|
|
2016
2213
|
// Keep local sync/prune state consistent even when a peer disappears
|
|
2017
2214
|
// through replication-info updates without a topic unsubscribe event.
|
|
2018
2215
|
this.removePeerFromGidPeerHistory(keyHash);
|
|
2216
|
+
this.removeRepairFrontierTarget(keyHash);
|
|
2019
2217
|
this._recentRepairDispatch.delete(keyHash);
|
|
2020
2218
|
if (!isMe) {
|
|
2021
2219
|
this.syncronizer.onPeerDisconnected(keyHash);
|
|
@@ -2483,6 +2681,7 @@ export class SharedLog<
|
|
|
2483
2681
|
for (const key of this._gidPeersHistory.keys()) {
|
|
2484
2682
|
this.removePeerFromGidPeerHistory(publicKeyHash, key);
|
|
2485
2683
|
}
|
|
2684
|
+
this.removePeerFromEntryKnownPeers(publicKeyHash);
|
|
2486
2685
|
}
|
|
2487
2686
|
}
|
|
2488
2687
|
|
|
@@ -2507,19 +2706,449 @@ export class SharedLog<
|
|
|
2507
2706
|
return set;
|
|
2508
2707
|
}
|
|
2509
2708
|
|
|
2709
|
+
private markEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
|
|
2710
|
+
for (const hash of hashes) {
|
|
2711
|
+
let peers = this._entryKnownPeers.get(hash);
|
|
2712
|
+
if (!peers) {
|
|
2713
|
+
peers = new Set();
|
|
2714
|
+
this._entryKnownPeers.set(hash, peers);
|
|
2715
|
+
}
|
|
2716
|
+
peers.add(peer);
|
|
2717
|
+
}
|
|
2718
|
+
}
|
|
2719
|
+
|
|
2720
|
+
private removeEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
|
|
2721
|
+
for (const hash of hashes) {
|
|
2722
|
+
const peers = this._entryKnownPeers.get(hash);
|
|
2723
|
+
if (!peers) {
|
|
2724
|
+
continue;
|
|
2725
|
+
}
|
|
2726
|
+
peers.delete(peer);
|
|
2727
|
+
if (peers.size === 0) {
|
|
2728
|
+
this._entryKnownPeers.delete(hash);
|
|
2729
|
+
}
|
|
2730
|
+
}
|
|
2731
|
+
}
|
|
2732
|
+
|
|
2733
|
+
private removePeerFromEntryKnownPeers(peer: string) {
|
|
2734
|
+
for (const [hash, peers] of this._entryKnownPeers) {
|
|
2735
|
+
peers.delete(peer);
|
|
2736
|
+
if (peers.size === 0) {
|
|
2737
|
+
this._entryKnownPeers.delete(hash);
|
|
2738
|
+
}
|
|
2739
|
+
}
|
|
2740
|
+
}
|
|
2741
|
+
|
|
2742
|
+
private isEntryKnownByPeer(hash: string, peer: string) {
|
|
2743
|
+
return this._entryKnownPeers.get(hash)?.has(peer) === true;
|
|
2744
|
+
}
|
|
2745
|
+
|
|
2746
|
+
private markRepairSweepOptimisticPeer(gid: string, peer: string) {
|
|
2747
|
+
let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
|
|
2748
|
+
if (!peers) {
|
|
2749
|
+
peers = new Map();
|
|
2750
|
+
this._repairSweepOptimisticGidPeersPending.set(gid, peers);
|
|
2751
|
+
}
|
|
2752
|
+
peers.set(peer, (peers.get(peer) || 0) + 1);
|
|
2753
|
+
}
|
|
2754
|
+
|
|
2755
|
+
private hasPendingRepairSweepOptimisticPeer(gid: string, peer: string) {
|
|
2756
|
+
return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
|
|
2757
|
+
}
|
|
2758
|
+
|
|
2759
|
+
private createEntryReplicatedForRepair(properties: {
|
|
2760
|
+
entry: Entry<T>;
|
|
2761
|
+
coordinates: NumberFromType<R>[];
|
|
2762
|
+
leaders: Map<string, { intersecting: boolean }>;
|
|
2763
|
+
replicas: number;
|
|
2764
|
+
}) {
|
|
2765
|
+
const assignedToRangeBoundary = shouldAssignToRangeBoundary(
|
|
2766
|
+
properties.leaders,
|
|
2767
|
+
properties.replicas,
|
|
2768
|
+
);
|
|
2769
|
+
const cidObject = cidifyString(properties.entry.hash);
|
|
2770
|
+
const hashNumber = this.indexableDomain.numbers.bytesToNumber(
|
|
2771
|
+
cidObject.multihash.digest,
|
|
2772
|
+
);
|
|
2773
|
+
return new this.indexableDomain.constructorEntry({
|
|
2774
|
+
assignedToRangeBoundary,
|
|
2775
|
+
coordinates: properties.coordinates,
|
|
2776
|
+
meta: properties.entry.meta,
|
|
2777
|
+
hash: properties.entry.hash,
|
|
2778
|
+
hashNumber,
|
|
2779
|
+
});
|
|
2780
|
+
}
|
|
2781
|
+
|
|
2782
|
+
private isAssumeSyncedRepairSuppressed() {
|
|
2783
|
+
return this._assumeSyncedRepairSuppressedUntil > Date.now();
|
|
2784
|
+
}
|
|
2785
|
+
|
|
2786
|
+
private isFrontierTrackedRepairMode(mode: RepairDispatchMode) {
|
|
2787
|
+
return mode !== "join-warmup";
|
|
2788
|
+
}
|
|
2789
|
+
|
|
2790
|
+
private async sleepTracked(delayMs: number) {
|
|
2791
|
+
if (delayMs <= 0) {
|
|
2792
|
+
return;
|
|
2793
|
+
}
|
|
2794
|
+
await new Promise<void>((resolve) => {
|
|
2795
|
+
const timer = setTimeout(() => {
|
|
2796
|
+
this._repairRetryTimers.delete(timer);
|
|
2797
|
+
resolve();
|
|
2798
|
+
}, delayMs);
|
|
2799
|
+
timer.unref?.();
|
|
2800
|
+
this._repairRetryTimers.add(timer);
|
|
2801
|
+
});
|
|
2802
|
+
}
|
|
2803
|
+
|
|
2804
|
+
private queueRepairFrontierEntries(
|
|
2805
|
+
mode: RepairDispatchMode,
|
|
2806
|
+
target: string,
|
|
2807
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2808
|
+
) {
|
|
2809
|
+
let targets = this._repairFrontierByMode.get(mode);
|
|
2810
|
+
if (!targets) {
|
|
2811
|
+
targets = new Map();
|
|
2812
|
+
this._repairFrontierByMode.set(mode, targets);
|
|
2813
|
+
}
|
|
2814
|
+
let pending = targets.get(target);
|
|
2815
|
+
if (!pending) {
|
|
2816
|
+
pending = new Map();
|
|
2817
|
+
targets.set(target, pending);
|
|
2818
|
+
}
|
|
2819
|
+
for (const [hash, entry] of entries) {
|
|
2820
|
+
pending.set(hash, entry);
|
|
2821
|
+
}
|
|
2822
|
+
}
|
|
2823
|
+
|
|
2824
|
+
private clearRepairFrontierHashes(target: string, hashes: Iterable<string>) {
|
|
2825
|
+
const hashList = [...hashes];
|
|
2826
|
+
if (hashList.length === 0) {
|
|
2827
|
+
return;
|
|
2828
|
+
}
|
|
2829
|
+
for (const mode of REPAIR_DISPATCH_MODES) {
|
|
2830
|
+
const pending = this._repairFrontierByMode.get(mode)?.get(target);
|
|
2831
|
+
if (!pending) {
|
|
2832
|
+
continue;
|
|
2833
|
+
}
|
|
2834
|
+
for (const hash of hashList) {
|
|
2835
|
+
pending.delete(hash);
|
|
2836
|
+
}
|
|
2837
|
+
if (pending.size === 0) {
|
|
2838
|
+
this._repairFrontierByMode.get(mode)?.delete(target);
|
|
2839
|
+
}
|
|
2840
|
+
}
|
|
2841
|
+
}
|
|
2842
|
+
|
|
2843
|
+
private async getFullReplicaRepairCandidates(
|
|
2844
|
+
extraPeers?: Iterable<string>,
|
|
2845
|
+
options?: { includeSubscribers?: boolean },
|
|
2846
|
+
) {
|
|
2847
|
+
const candidates = new Set<string>([
|
|
2848
|
+
this.node.identity.publicKey.hashcode(),
|
|
2849
|
+
]);
|
|
2850
|
+
try {
|
|
2851
|
+
for (const peer of await this.getReplicators()) {
|
|
2852
|
+
candidates.add(peer);
|
|
2853
|
+
}
|
|
2854
|
+
} catch {
|
|
2855
|
+
for (const peer of this.uniqueReplicators) {
|
|
2856
|
+
candidates.add(peer);
|
|
2857
|
+
}
|
|
2858
|
+
}
|
|
2859
|
+
for (const peer of extraPeers ?? []) {
|
|
2860
|
+
candidates.add(peer);
|
|
2861
|
+
}
|
|
2862
|
+
if (options?.includeSubscribers !== false) {
|
|
2863
|
+
try {
|
|
2864
|
+
for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
|
|
2865
|
+
candidates.add(subscriber.hashcode());
|
|
2866
|
+
}
|
|
2867
|
+
} catch {
|
|
2868
|
+
// Best-effort only; explicit repair peers still keep the path safe.
|
|
2869
|
+
}
|
|
2870
|
+
}
|
|
2871
|
+
return candidates;
|
|
2872
|
+
}
|
|
2873
|
+
|
|
2874
|
+
private removeRepairFrontierTarget(target: string) {
|
|
2875
|
+
for (const mode of REPAIR_DISPATCH_MODES) {
|
|
2876
|
+
this._repairFrontierByMode.get(mode)?.delete(target);
|
|
2877
|
+
this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
|
|
2878
|
+
}
|
|
2879
|
+
}
|
|
2880
|
+
|
|
2881
|
+
private async sendRepairConfirmation(
|
|
2882
|
+
target: PublicSignKey,
|
|
2883
|
+
hashes: Iterable<string>,
|
|
2884
|
+
) {
|
|
2885
|
+
const uniqueHashes = [...new Set(hashes)];
|
|
2886
|
+
for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
|
|
2887
|
+
const chunk = uniqueHashes.slice(
|
|
2888
|
+
i,
|
|
2889
|
+
i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE,
|
|
2890
|
+
);
|
|
2891
|
+
await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
|
|
2892
|
+
priority: 1,
|
|
2893
|
+
mode: new SilentDelivery({ to: [target], redundancy: 1 }),
|
|
2894
|
+
});
|
|
2895
|
+
}
|
|
2896
|
+
}
|
|
2897
|
+
|
|
2898
|
+
private async pushRepairEntries(
|
|
2899
|
+
target: string,
|
|
2900
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2901
|
+
) {
|
|
2902
|
+
for await (const message of createExchangeHeadsMessages(
|
|
2903
|
+
this.log,
|
|
2904
|
+
[...entries.keys()],
|
|
2905
|
+
)) {
|
|
2906
|
+
message.reserved[0] |= EXCHANGE_HEADS_REPAIR_HINT;
|
|
2907
|
+
await this.rpc.send(message, {
|
|
2908
|
+
priority: 1,
|
|
2909
|
+
mode: new SilentDelivery({ to: [target], redundancy: 1 }),
|
|
2910
|
+
});
|
|
2911
|
+
}
|
|
2912
|
+
}
|
|
2913
|
+
|
|
2914
|
+
private async sendRepairEntriesWithTransport(
|
|
2915
|
+
target: string,
|
|
2916
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2917
|
+
transport: RepairTransportMode,
|
|
2918
|
+
options?: { bypassKnownPeers?: boolean },
|
|
2919
|
+
) {
|
|
2920
|
+
const unknownEntries = new Map<string, EntryReplicated<R>>();
|
|
2921
|
+
const knownHashes: string[] = [];
|
|
2922
|
+
for (const [hash, entry] of entries) {
|
|
2923
|
+
if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
|
|
2924
|
+
unknownEntries.set(hash, entry);
|
|
2925
|
+
} else {
|
|
2926
|
+
knownHashes.push(hash);
|
|
2927
|
+
}
|
|
2928
|
+
}
|
|
2929
|
+
this.clearRepairFrontierHashes(target, knownHashes);
|
|
2930
|
+
if (unknownEntries.size === 0) {
|
|
2931
|
+
return;
|
|
2932
|
+
}
|
|
2933
|
+
if (transport === "simple") {
|
|
2934
|
+
// Fallback repair should not depend on the target completing the
|
|
2935
|
+
// RequestMaybeSync -> ResponseMaybeSync round trip.
|
|
2936
|
+
await this.pushRepairEntries(target, unknownEntries);
|
|
2937
|
+
return;
|
|
2938
|
+
}
|
|
2939
|
+
|
|
2940
|
+
await this.syncronizer.onMaybeMissingEntries({
|
|
2941
|
+
entries: unknownEntries,
|
|
2942
|
+
targets: [target],
|
|
2943
|
+
});
|
|
2944
|
+
}
|
|
2945
|
+
|
|
2946
|
+
private async sendMaybeMissingEntriesNow(
|
|
2947
|
+
target: string,
|
|
2948
|
+
entries: Map<string, EntryReplicated<R>>,
|
|
2949
|
+
options: {
|
|
2950
|
+
mode: RepairDispatchMode;
|
|
2951
|
+
transport: RepairTransportMode;
|
|
2952
|
+
bypassRecentDedupe?: boolean;
|
|
2953
|
+
},
|
|
2954
|
+
) {
|
|
2955
|
+
if (entries.size === 0) {
|
|
2956
|
+
return;
|
|
2957
|
+
}
|
|
2958
|
+
|
|
2959
|
+
const now = Date.now();
|
|
2960
|
+
let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
|
|
2961
|
+
if (!recentlyDispatchedByHash) {
|
|
2962
|
+
recentlyDispatchedByHash = new Map();
|
|
2963
|
+
this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
|
|
2964
|
+
}
|
|
2965
|
+
for (const [hash, ts] of recentlyDispatchedByHash) {
|
|
2966
|
+
if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
2967
|
+
recentlyDispatchedByHash.delete(hash);
|
|
2968
|
+
}
|
|
2969
|
+
}
|
|
2970
|
+
|
|
2971
|
+
const filteredEntries =
|
|
2972
|
+
options.bypassRecentDedupe === true
|
|
2973
|
+
? new Map(entries)
|
|
2974
|
+
: new Map<string, EntryReplicated<any>>();
|
|
2975
|
+
if (options.bypassRecentDedupe !== true) {
|
|
2976
|
+
for (const [hash, entry] of entries) {
|
|
2977
|
+
const prev = recentlyDispatchedByHash.get(hash);
|
|
2978
|
+
if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
2979
|
+
continue;
|
|
2980
|
+
}
|
|
2981
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
2982
|
+
filteredEntries.set(hash, entry);
|
|
2983
|
+
}
|
|
2984
|
+
} else {
|
|
2985
|
+
for (const hash of entries.keys()) {
|
|
2986
|
+
recentlyDispatchedByHash.set(hash, now);
|
|
2987
|
+
}
|
|
2988
|
+
}
|
|
2989
|
+
if (filteredEntries.size === 0) {
|
|
2990
|
+
return;
|
|
2991
|
+
}
|
|
2992
|
+
|
|
2993
|
+
const bucket = this._repairMetrics[options.mode];
|
|
2994
|
+
bucket.dispatches += 1;
|
|
2995
|
+
bucket.entries += filteredEntries.size;
|
|
2996
|
+
if (options.transport === "simple") {
|
|
2997
|
+
bucket.simpleFallbackPasses += 1;
|
|
2998
|
+
} else {
|
|
2999
|
+
bucket.ratelessFirstPasses += 1;
|
|
3000
|
+
}
|
|
3001
|
+
|
|
3002
|
+
await Promise.resolve(
|
|
3003
|
+
this.sendRepairEntriesWithTransport(
|
|
3004
|
+
target,
|
|
3005
|
+
filteredEntries,
|
|
3006
|
+
options.transport,
|
|
3007
|
+
{ bypassKnownPeers: options.mode === "churn" },
|
|
3008
|
+
),
|
|
3009
|
+
).catch((error: any) => logger.error(error));
|
|
3010
|
+
}
|
|
3011
|
+
|
|
3012
|
+
private ensureRepairFrontierRunner(
|
|
3013
|
+
mode: RepairDispatchMode,
|
|
3014
|
+
target: string,
|
|
3015
|
+
retryScheduleMs?: number[],
|
|
3016
|
+
) {
|
|
3017
|
+
const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
|
|
3018
|
+
if (!activeTargets || activeTargets.has(target) || this.closed) {
|
|
3019
|
+
return;
|
|
3020
|
+
}
|
|
3021
|
+
activeTargets.add(target);
|
|
3022
|
+
const retrySchedule = resolveRepairRetrySchedule(
|
|
3023
|
+
mode,
|
|
3024
|
+
retryScheduleMs,
|
|
3025
|
+
this.isFrontierTrackedRepairMode(mode),
|
|
3026
|
+
);
|
|
3027
|
+
const steadyStateDelay =
|
|
3028
|
+
retrySchedule.length > 1
|
|
3029
|
+
? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
|
|
3030
|
+
: Math.max(retrySchedule[0] || 1_000, 1_000);
|
|
3031
|
+
|
|
3032
|
+
void (async () => {
|
|
3033
|
+
let attemptIndex = 0;
|
|
3034
|
+
try {
|
|
3035
|
+
for (;;) {
|
|
3036
|
+
if (this.closed) {
|
|
3037
|
+
return;
|
|
3038
|
+
}
|
|
3039
|
+
const pending = this._repairFrontierByMode.get(mode)?.get(target);
|
|
3040
|
+
if (!pending || pending.size === 0) {
|
|
3041
|
+
return;
|
|
3042
|
+
}
|
|
3043
|
+
|
|
3044
|
+
if (
|
|
3045
|
+
(mode === "join-warmup" || mode === "join-authoritative") &&
|
|
3046
|
+
this.isAssumeSyncedRepairSuppressed()
|
|
3047
|
+
) {
|
|
3048
|
+
await this.sleepTracked(
|
|
3049
|
+
Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()),
|
|
3050
|
+
);
|
|
3051
|
+
continue;
|
|
3052
|
+
}
|
|
3053
|
+
|
|
3054
|
+
await this.sendMaybeMissingEntriesNow(target, pending, {
|
|
3055
|
+
mode,
|
|
3056
|
+
transport: getRepairTransportForAttempt(mode, attemptIndex),
|
|
3057
|
+
bypassRecentDedupe: true,
|
|
3058
|
+
});
|
|
3059
|
+
|
|
3060
|
+
const remaining = this._repairFrontierByMode.get(mode)?.get(target);
|
|
3061
|
+
if (!remaining || remaining.size === 0) {
|
|
3062
|
+
return;
|
|
3063
|
+
}
|
|
3064
|
+
|
|
3065
|
+
const waitMs =
|
|
3066
|
+
attemptIndex + 1 < retrySchedule.length
|
|
3067
|
+
? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
|
|
3068
|
+
: steadyStateDelay;
|
|
3069
|
+
attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
|
|
3070
|
+
await this.sleepTracked(waitMs);
|
|
3071
|
+
}
|
|
3072
|
+
} finally {
|
|
3073
|
+
activeTargets.delete(target);
|
|
3074
|
+
if (
|
|
3075
|
+
!this.closed &&
|
|
3076
|
+
(this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0
|
|
3077
|
+
) {
|
|
3078
|
+
this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
|
|
3079
|
+
}
|
|
3080
|
+
}
|
|
3081
|
+
})().catch((error: any) => {
|
|
3082
|
+
activeTargets.delete(target);
|
|
3083
|
+
logger.error(error);
|
|
3084
|
+
});
|
|
3085
|
+
}
|
|
3086
|
+
|
|
3087
|
+
private flushAppendBackfill() {
|
|
3088
|
+
if (this._appendBackfillPendingByTarget.size === 0) {
|
|
3089
|
+
return;
|
|
3090
|
+
}
|
|
3091
|
+
const pending = this._appendBackfillPendingByTarget;
|
|
3092
|
+
this._appendBackfillPendingByTarget = new Map();
|
|
3093
|
+
for (const [target, entries] of pending) {
|
|
3094
|
+
this.dispatchMaybeMissingEntries(target, entries, {
|
|
3095
|
+
mode: "append-backfill",
|
|
3096
|
+
});
|
|
3097
|
+
}
|
|
3098
|
+
}
|
|
3099
|
+
|
|
3100
|
+
private queueAppendBackfill(target: string, entry: EntryReplicated<R>) {
|
|
3101
|
+
let entries = this._appendBackfillPendingByTarget.get(target);
|
|
3102
|
+
if (!entries) {
|
|
3103
|
+
entries = new Map();
|
|
3104
|
+
this._appendBackfillPendingByTarget.set(target, entries);
|
|
3105
|
+
}
|
|
3106
|
+
entries.set(entry.hash, entry);
|
|
3107
|
+
if (entries.size >= this.repairSweepTargetBufferSize) {
|
|
3108
|
+
this.flushAppendBackfill();
|
|
3109
|
+
return;
|
|
3110
|
+
}
|
|
3111
|
+
if (this._appendBackfillTimer || this.closed) {
|
|
3112
|
+
return;
|
|
3113
|
+
}
|
|
3114
|
+
const timer = setTimeout(() => {
|
|
3115
|
+
this._repairRetryTimers.delete(timer);
|
|
3116
|
+
if (this._appendBackfillTimer === timer) {
|
|
3117
|
+
this._appendBackfillTimer = undefined;
|
|
3118
|
+
}
|
|
3119
|
+
if (this.closed) {
|
|
3120
|
+
return;
|
|
3121
|
+
}
|
|
3122
|
+
this.flushAppendBackfill();
|
|
3123
|
+
}, APPEND_BACKFILL_DELAY_MS);
|
|
3124
|
+
timer.unref?.();
|
|
3125
|
+
this._repairRetryTimers.add(timer);
|
|
3126
|
+
this._appendBackfillTimer = timer;
|
|
3127
|
+
}
|
|
3128
|
+
|
|
2510
3129
|
private dispatchMaybeMissingEntries(
|
|
2511
3130
|
target: string,
|
|
2512
3131
|
entries: Map<string, EntryReplicated<R>>,
|
|
2513
|
-
options
|
|
3132
|
+
options: {
|
|
3133
|
+
mode: RepairDispatchMode;
|
|
2514
3134
|
bypassRecentDedupe?: boolean;
|
|
2515
3135
|
retryScheduleMs?: number[];
|
|
2516
|
-
forceFreshDelivery?: boolean;
|
|
2517
3136
|
},
|
|
2518
3137
|
) {
|
|
2519
3138
|
if (entries.size === 0) {
|
|
2520
3139
|
return;
|
|
2521
3140
|
}
|
|
2522
3141
|
|
|
3142
|
+
if (this.isFrontierTrackedRepairMode(options.mode)) {
|
|
3143
|
+
this.queueRepairFrontierEntries(options.mode, target, entries);
|
|
3144
|
+
this.ensureRepairFrontierRunner(
|
|
3145
|
+
options.mode,
|
|
3146
|
+
target,
|
|
3147
|
+
options.retryScheduleMs,
|
|
3148
|
+
);
|
|
3149
|
+
return;
|
|
3150
|
+
}
|
|
3151
|
+
|
|
2523
3152
|
const now = Date.now();
|
|
2524
3153
|
let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
|
|
2525
3154
|
if (!recentlyDispatchedByHash) {
|
|
@@ -2533,10 +3162,10 @@ export class SharedLog<
|
|
|
2533
3162
|
}
|
|
2534
3163
|
|
|
2535
3164
|
const filteredEntries =
|
|
2536
|
-
options
|
|
3165
|
+
options.bypassRecentDedupe === true
|
|
2537
3166
|
? new Map(entries)
|
|
2538
3167
|
: new Map<string, EntryReplicated<any>>();
|
|
2539
|
-
if (options
|
|
3168
|
+
if (options.bypassRecentDedupe !== true) {
|
|
2540
3169
|
for (const [hash, entry] of entries) {
|
|
2541
3170
|
const prev = recentlyDispatchedByHash.get(hash);
|
|
2542
3171
|
if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
|
|
@@ -2553,64 +3182,69 @@ export class SharedLog<
|
|
|
2553
3182
|
if (filteredEntries.size === 0) {
|
|
2554
3183
|
return;
|
|
2555
3184
|
}
|
|
2556
|
-
|
|
2557
|
-
|
|
2558
|
-
|
|
2559
|
-
|
|
2560
|
-
|
|
2561
|
-
|
|
2562
|
-
|
|
2563
|
-
|
|
2564
|
-
|
|
2565
|
-
|
|
2566
|
-
|
|
2567
|
-
|
|
2568
|
-
|
|
2569
|
-
|
|
2570
|
-
|
|
2571
|
-
|
|
2572
|
-
|
|
2573
|
-
|
|
2574
|
-
|
|
2575
|
-
|
|
2576
|
-
|
|
2577
|
-
|
|
3185
|
+
|
|
3186
|
+
if (
|
|
3187
|
+
(options.mode === "join-warmup" ||
|
|
3188
|
+
options.mode === "join-authoritative") &&
|
|
3189
|
+
this.isAssumeSyncedRepairSuppressed()
|
|
3190
|
+
) {
|
|
3191
|
+
return;
|
|
3192
|
+
}
|
|
3193
|
+
|
|
3194
|
+
const retrySchedule = resolveRepairRetrySchedule(
|
|
3195
|
+
options.mode,
|
|
3196
|
+
options.retryScheduleMs,
|
|
3197
|
+
this.isFrontierTrackedRepairMode(options.mode),
|
|
3198
|
+
);
|
|
3199
|
+
const bucket = this._repairMetrics[options.mode];
|
|
3200
|
+
bucket.dispatches += 1;
|
|
3201
|
+
bucket.entries += filteredEntries.size;
|
|
3202
|
+
|
|
3203
|
+
const run = (transport: RepairTransportMode) => {
|
|
3204
|
+
if (transport === "simple") {
|
|
3205
|
+
bucket.simpleFallbackPasses += 1;
|
|
3206
|
+
} else {
|
|
3207
|
+
bucket.ratelessFirstPasses += 1;
|
|
2578
3208
|
}
|
|
2579
3209
|
|
|
2580
3210
|
return Promise.resolve(
|
|
2581
|
-
this.
|
|
2582
|
-
|
|
2583
|
-
|
|
2584
|
-
|
|
3211
|
+
this.sendRepairEntriesWithTransport(
|
|
3212
|
+
target,
|
|
3213
|
+
filteredEntries,
|
|
3214
|
+
transport,
|
|
3215
|
+
{ bypassKnownPeers: options.mode === "churn" },
|
|
3216
|
+
),
|
|
2585
3217
|
).catch((error: any) => logger.error(error));
|
|
2586
3218
|
};
|
|
2587
3219
|
|
|
2588
|
-
|
|
3220
|
+
retrySchedule.forEach((delayMs, index) => {
|
|
3221
|
+
const transport = getRepairTransportForAttempt(options.mode, index);
|
|
2589
3222
|
if (delayMs === 0) {
|
|
2590
|
-
void run();
|
|
2591
|
-
|
|
3223
|
+
void run(transport);
|
|
3224
|
+
return;
|
|
2592
3225
|
}
|
|
2593
3226
|
const timer = setTimeout(() => {
|
|
2594
3227
|
this._repairRetryTimers.delete(timer);
|
|
2595
3228
|
if (this.closed) {
|
|
2596
3229
|
return;
|
|
2597
3230
|
}
|
|
2598
|
-
void run();
|
|
3231
|
+
void run(transport);
|
|
2599
3232
|
}, delayMs);
|
|
2600
3233
|
timer.unref?.();
|
|
2601
3234
|
this._repairRetryTimers.add(timer);
|
|
2602
|
-
}
|
|
3235
|
+
});
|
|
2603
3236
|
}
|
|
2604
3237
|
|
|
2605
3238
|
private scheduleRepairSweep(options: {
|
|
2606
|
-
|
|
2607
|
-
|
|
3239
|
+
mode: RepairDispatchMode;
|
|
3240
|
+
peers?: Iterable<string>;
|
|
2608
3241
|
}) {
|
|
2609
|
-
|
|
2610
|
-
|
|
2611
|
-
|
|
2612
|
-
|
|
2613
|
-
|
|
3242
|
+
this._repairSweepPendingModes.add(options.mode);
|
|
3243
|
+
const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
|
|
3244
|
+
if (pendingPeers) {
|
|
3245
|
+
for (const peer of options.peers ?? []) {
|
|
3246
|
+
pendingPeers.add(peer);
|
|
3247
|
+
}
|
|
2614
3248
|
}
|
|
2615
3249
|
if (!this._repairSweepRunning && !this.closed) {
|
|
2616
3250
|
this._repairSweepRunning = true;
|
|
@@ -2618,88 +3252,293 @@ export class SharedLog<
|
|
|
2618
3252
|
}
|
|
2619
3253
|
}
|
|
2620
3254
|
|
|
3255
|
+
private scheduleJoinAuthoritativeRepair(peers: Set<string>) {
|
|
3256
|
+
if (this.closed || peers.size === 0) {
|
|
3257
|
+
return;
|
|
3258
|
+
}
|
|
3259
|
+
|
|
3260
|
+
for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
|
|
3261
|
+
let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
|
|
3262
|
+
if (!pendingPeers) {
|
|
3263
|
+
pendingPeers = new Set();
|
|
3264
|
+
this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
|
|
3265
|
+
}
|
|
3266
|
+
for (const peer of peers) {
|
|
3267
|
+
pendingPeers.add(peer);
|
|
3268
|
+
}
|
|
3269
|
+
|
|
3270
|
+
if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
|
|
3271
|
+
continue;
|
|
3272
|
+
}
|
|
3273
|
+
|
|
3274
|
+
const timer = setTimeout(() => {
|
|
3275
|
+
this._repairRetryTimers.delete(timer);
|
|
3276
|
+
this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
|
|
3277
|
+
if (this.closed) {
|
|
3278
|
+
return;
|
|
3279
|
+
}
|
|
3280
|
+
|
|
3281
|
+
const peersForSweep = new Set(
|
|
3282
|
+
this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? [],
|
|
3283
|
+
);
|
|
3284
|
+
this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
|
|
3285
|
+
if (peersForSweep.size === 0) {
|
|
3286
|
+
return;
|
|
3287
|
+
}
|
|
3288
|
+
|
|
3289
|
+
// A joiner's leader view can still be partial on the first delayed pass
|
|
3290
|
+
// under pubsub jitter. Bounded per-peer rescans widen the authoritative
|
|
3291
|
+
// frontier without adding per-append sweeps.
|
|
3292
|
+
this.scheduleRepairSweep({
|
|
3293
|
+
mode: "join-authoritative",
|
|
3294
|
+
peers: peersForSweep,
|
|
3295
|
+
});
|
|
3296
|
+
}, delayMs);
|
|
3297
|
+
timer.unref?.();
|
|
3298
|
+
this._repairRetryTimers.add(timer);
|
|
3299
|
+
this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
|
|
3300
|
+
}
|
|
3301
|
+
}
|
|
3302
|
+
|
|
2621
3303
|
private async runRepairSweep() {
|
|
2622
3304
|
try {
|
|
2623
3305
|
while (!this.closed) {
|
|
2624
|
-
const
|
|
2625
|
-
const
|
|
2626
|
-
|
|
2627
|
-
|
|
3306
|
+
const pendingModes = new Set(this._repairSweepPendingModes);
|
|
3307
|
+
const pendingPeersByMode = cloneRepairPendingPeersByMode(
|
|
3308
|
+
this._repairSweepPendingPeersByMode,
|
|
3309
|
+
);
|
|
3310
|
+
this._repairSweepPendingModes.clear();
|
|
3311
|
+
for (const peers of this._repairSweepPendingPeersByMode.values()) {
|
|
3312
|
+
peers.clear();
|
|
3313
|
+
}
|
|
2628
3314
|
|
|
2629
|
-
if (
|
|
3315
|
+
if (pendingModes.size === 0) {
|
|
2630
3316
|
return;
|
|
2631
3317
|
}
|
|
2632
3318
|
|
|
2633
|
-
const
|
|
2634
|
-
|
|
2635
|
-
|
|
3319
|
+
const optimisticGidPeersByMode = new Map<
|
|
3320
|
+
RepairDispatchMode,
|
|
3321
|
+
Map<string, Set<string>>
|
|
3322
|
+
>();
|
|
3323
|
+
const optimisticGidPeersConsumedByMode = new Map<
|
|
3324
|
+
RepairDispatchMode,
|
|
3325
|
+
Map<string, Map<string, number>>
|
|
3326
|
+
>();
|
|
3327
|
+
for (const mode of pendingModes) {
|
|
3328
|
+
const modePeers = pendingPeersByMode.get(mode);
|
|
3329
|
+
if (!modePeers || modePeers.size === 0) {
|
|
3330
|
+
continue;
|
|
3331
|
+
}
|
|
3332
|
+
const optimisticGidPeers = new Map<string, Set<string>>();
|
|
3333
|
+
const optimisticGidPeersConsumed = new Map<string, Map<string, number>>();
|
|
3334
|
+
for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
|
|
3335
|
+
let matchedPeers: Set<string> | undefined;
|
|
3336
|
+
let matchedCounts: Map<string, number> | undefined;
|
|
3337
|
+
for (const [peer, count] of peerCounts) {
|
|
3338
|
+
if (!modePeers.has(peer)) {
|
|
3339
|
+
continue;
|
|
3340
|
+
}
|
|
3341
|
+
matchedPeers ||= new Set();
|
|
3342
|
+
matchedCounts ||= new Map();
|
|
3343
|
+
matchedPeers.add(peer);
|
|
3344
|
+
matchedCounts.set(peer, count);
|
|
3345
|
+
}
|
|
3346
|
+
if (matchedPeers && matchedCounts) {
|
|
3347
|
+
optimisticGidPeers.set(gid, matchedPeers);
|
|
3348
|
+
optimisticGidPeersConsumed.set(gid, matchedCounts);
|
|
3349
|
+
}
|
|
3350
|
+
}
|
|
3351
|
+
if (optimisticGidPeers.size > 0) {
|
|
3352
|
+
optimisticGidPeersByMode.set(mode, optimisticGidPeers);
|
|
3353
|
+
optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
|
|
3354
|
+
}
|
|
3355
|
+
}
|
|
3356
|
+
|
|
3357
|
+
const pendingByMode = new Map<
|
|
3358
|
+
RepairDispatchMode,
|
|
3359
|
+
Map<string, Map<string, EntryReplicated<any>>>
|
|
3360
|
+
>(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
|
|
3361
|
+
const pendingRepairPeers = new Set<string>();
|
|
3362
|
+
for (const peers of pendingPeersByMode.values()) {
|
|
3363
|
+
for (const peer of peers) {
|
|
3364
|
+
pendingRepairPeers.add(peer);
|
|
3365
|
+
}
|
|
3366
|
+
}
|
|
3367
|
+
const fullReplicaRepairCandidates =
|
|
3368
|
+
await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
|
|
3369
|
+
includeSubscribers: false,
|
|
3370
|
+
});
|
|
3371
|
+
const fullReplicaRepairCandidateCount = Math.max(
|
|
3372
|
+
1,
|
|
3373
|
+
fullReplicaRepairCandidates.size,
|
|
3374
|
+
);
|
|
3375
|
+
const nextFrontierByMode = new Map<
|
|
3376
|
+
RepairDispatchMode,
|
|
3377
|
+
Map<string, Map<string, EntryReplicated<any>>>
|
|
3378
|
+
>([
|
|
3379
|
+
["join-authoritative", new Map()],
|
|
3380
|
+
["churn", new Map()],
|
|
3381
|
+
]);
|
|
3382
|
+
const flushTarget = (mode: RepairDispatchMode, target: string) => {
|
|
3383
|
+
const targets = pendingByMode.get(mode);
|
|
3384
|
+
const entries = targets?.get(target);
|
|
2636
3385
|
if (!entries || entries.size === 0) {
|
|
2637
3386
|
return;
|
|
2638
3387
|
}
|
|
2639
|
-
const isJoinWarmupTarget = addedPeers.has(target);
|
|
2640
|
-
const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
|
|
2641
3388
|
this.dispatchMaybeMissingEntries(target, entries, {
|
|
2642
|
-
bypassRecentDedupe,
|
|
2643
|
-
|
|
2644
|
-
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
2645
|
-
: undefined,
|
|
2646
|
-
forceFreshDelivery,
|
|
3389
|
+
bypassRecentDedupe: true,
|
|
3390
|
+
mode,
|
|
2647
3391
|
});
|
|
2648
|
-
|
|
3392
|
+
targets?.delete(target);
|
|
2649
3393
|
};
|
|
2650
3394
|
const queueEntryForTarget = (
|
|
3395
|
+
mode: RepairDispatchMode,
|
|
2651
3396
|
target: string,
|
|
2652
3397
|
entry: EntryReplicated<any>,
|
|
2653
3398
|
) => {
|
|
2654
|
-
|
|
3399
|
+
const sweepTargets = nextFrontierByMode.get(mode);
|
|
3400
|
+
if (sweepTargets) {
|
|
3401
|
+
let sweepSet = sweepTargets.get(target);
|
|
3402
|
+
if (!sweepSet) {
|
|
3403
|
+
sweepSet = new Map();
|
|
3404
|
+
sweepTargets.set(target, sweepSet);
|
|
3405
|
+
}
|
|
3406
|
+
sweepSet.set(entry.hash, entry);
|
|
3407
|
+
}
|
|
3408
|
+
const targets = pendingByMode.get(mode)!;
|
|
3409
|
+
let set = targets.get(target);
|
|
2655
3410
|
if (!set) {
|
|
2656
3411
|
set = new Map();
|
|
2657
|
-
|
|
3412
|
+
targets.set(target, set);
|
|
2658
3413
|
}
|
|
2659
3414
|
if (set.has(entry.hash)) {
|
|
2660
3415
|
return;
|
|
2661
3416
|
}
|
|
2662
3417
|
set.set(entry.hash, entry);
|
|
2663
3418
|
if (set.size >= this.repairSweepTargetBufferSize) {
|
|
2664
|
-
flushTarget(target);
|
|
3419
|
+
flushTarget(mode, target);
|
|
2665
3420
|
}
|
|
2666
3421
|
};
|
|
2667
3422
|
|
|
2668
3423
|
const iterator = this.entryCoordinatesIndex.iterate({});
|
|
2669
3424
|
try {
|
|
2670
|
-
|
|
2671
|
-
|
|
2672
|
-
|
|
2673
|
-
|
|
2674
|
-
|
|
2675
|
-
|
|
2676
|
-
|
|
2677
|
-
|
|
2678
|
-
|
|
2679
|
-
|
|
2680
|
-
|
|
3425
|
+
while (!this.closed && !iterator.done()) {
|
|
3426
|
+
const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
|
|
3427
|
+
for (const entry of entries) {
|
|
3428
|
+
const entryReplicated = entry.value;
|
|
3429
|
+
const gid = entryReplicated.gid;
|
|
3430
|
+
const knownPeers = this._gidPeersHistory.get(gid);
|
|
3431
|
+
const requestedReplicas =
|
|
3432
|
+
decodeReplicas(entryReplicated).getValue(this);
|
|
3433
|
+
const currentPeers = await this.findLeaders(
|
|
3434
|
+
entryReplicated.coordinates,
|
|
3435
|
+
entryReplicated,
|
|
3436
|
+
{ roleAge: 0 },
|
|
3437
|
+
);
|
|
3438
|
+
|
|
3439
|
+
if (pendingModes.has("churn")) {
|
|
2681
3440
|
for (const [currentPeer] of currentPeers) {
|
|
2682
3441
|
if (currentPeer === this.node.identity.publicKey.hashcode()) {
|
|
2683
3442
|
continue;
|
|
2684
3443
|
}
|
|
2685
|
-
queueEntryForTarget(currentPeer, entryReplicated);
|
|
3444
|
+
queueEntryForTarget("churn", currentPeer, entryReplicated);
|
|
2686
3445
|
}
|
|
2687
3446
|
}
|
|
2688
|
-
|
|
2689
|
-
|
|
2690
|
-
|
|
2691
|
-
|
|
2692
|
-
|
|
3447
|
+
|
|
3448
|
+
for (const mode of pendingModes) {
|
|
3449
|
+
const modePeers = pendingPeersByMode.get(mode);
|
|
3450
|
+
if (!modePeers || modePeers.size === 0) {
|
|
3451
|
+
continue;
|
|
3452
|
+
}
|
|
3453
|
+
const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
|
|
3454
|
+
for (const peer of modePeers) {
|
|
3455
|
+
if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
|
|
3456
|
+
continue;
|
|
3457
|
+
}
|
|
3458
|
+
const wasOptimisticallyAssigned =
|
|
3459
|
+
optimisticPeers?.has(peer) === true;
|
|
3460
|
+
const isCoveredByFullReplicaRepair =
|
|
3461
|
+
mode === "join-authoritative" &&
|
|
3462
|
+
fullReplicaRepairCandidates.has(peer) &&
|
|
3463
|
+
requestedReplicas >= fullReplicaRepairCandidateCount;
|
|
3464
|
+
const shouldQueue =
|
|
3465
|
+
mode === "join-authoritative"
|
|
3466
|
+
? currentPeers.has(peer) || isCoveredByFullReplicaRepair
|
|
3467
|
+
: wasOptimisticallyAssigned ||
|
|
3468
|
+
(currentPeers.has(peer) && !knownPeers?.has(peer));
|
|
3469
|
+
if (shouldQueue) {
|
|
3470
|
+
// Authoritative join repair must not trust partial gid peer history,
|
|
3471
|
+
// otherwise a late joiner can get stuck with a partial historical
|
|
3472
|
+
// backfill forever. Once we enter the authoritative pass, queue every
|
|
3473
|
+
// entry whose current leader set still includes the added peer.
|
|
3474
|
+
queueEntryForTarget(mode, peer, entryReplicated);
|
|
2693
3475
|
}
|
|
2694
3476
|
}
|
|
2695
3477
|
}
|
|
3478
|
+
}
|
|
2696
3479
|
}
|
|
2697
3480
|
} finally {
|
|
2698
3481
|
await iterator.close();
|
|
2699
3482
|
}
|
|
2700
3483
|
|
|
2701
|
-
for (const
|
|
2702
|
-
|
|
3484
|
+
for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
|
|
3485
|
+
for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
|
|
3486
|
+
const pendingPeerCounts =
|
|
3487
|
+
this._repairSweepOptimisticGidPeersPending.get(gid);
|
|
3488
|
+
if (!pendingPeerCounts) {
|
|
3489
|
+
continue;
|
|
3490
|
+
}
|
|
3491
|
+
for (const [peer, count] of peerCounts) {
|
|
3492
|
+
const current = pendingPeerCounts.get(peer) || 0;
|
|
3493
|
+
const next = current - count;
|
|
3494
|
+
if (next > 0) {
|
|
3495
|
+
pendingPeerCounts.set(peer, next);
|
|
3496
|
+
} else {
|
|
3497
|
+
pendingPeerCounts.delete(peer);
|
|
3498
|
+
}
|
|
3499
|
+
}
|
|
3500
|
+
if (pendingPeerCounts.size === 0) {
|
|
3501
|
+
this._repairSweepOptimisticGidPeersPending.delete(gid);
|
|
3502
|
+
}
|
|
3503
|
+
}
|
|
3504
|
+
}
|
|
3505
|
+
|
|
3506
|
+
for (const mode of pendingModes) {
|
|
3507
|
+
if (mode !== "join-authoritative" && mode !== "churn") {
|
|
3508
|
+
continue;
|
|
3509
|
+
}
|
|
3510
|
+
const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
|
|
3511
|
+
const frontierTargets = this._repairFrontierByMode.get(mode);
|
|
3512
|
+
for (const target of pendingPeersByMode.get(mode) ?? []) {
|
|
3513
|
+
const replacement = nextTargets.get(target);
|
|
3514
|
+
if (mode === "join-authoritative") {
|
|
3515
|
+
// Authoritative join repair is receipt-driven: a later sweep can have a
|
|
3516
|
+
// narrower transient leader view, but it must not forget unconfirmed
|
|
3517
|
+
// hashes that were already queued for this joiner.
|
|
3518
|
+
if (replacement && replacement.size > 0) {
|
|
3519
|
+
const existing = frontierTargets?.get(target);
|
|
3520
|
+
if (existing && existing.size > 0) {
|
|
3521
|
+
for (const [hash, entry] of replacement) {
|
|
3522
|
+
existing.set(hash, entry);
|
|
3523
|
+
}
|
|
3524
|
+
} else {
|
|
3525
|
+
frontierTargets?.set(target, replacement);
|
|
3526
|
+
}
|
|
3527
|
+
}
|
|
3528
|
+
continue;
|
|
3529
|
+
}
|
|
3530
|
+
if (replacement && replacement.size > 0) {
|
|
3531
|
+
frontierTargets?.set(target, replacement);
|
|
3532
|
+
} else {
|
|
3533
|
+
frontierTargets?.delete(target);
|
|
3534
|
+
}
|
|
3535
|
+
}
|
|
3536
|
+
}
|
|
3537
|
+
|
|
3538
|
+
for (const [mode, targets] of pendingByMode) {
|
|
3539
|
+
for (const target of [...targets.keys()]) {
|
|
3540
|
+
flushTarget(mode, target);
|
|
3541
|
+
}
|
|
2703
3542
|
}
|
|
2704
3543
|
}
|
|
2705
3544
|
} catch (error: any) {
|
|
@@ -2708,11 +3547,7 @@ export class SharedLog<
|
|
|
2708
3547
|
}
|
|
2709
3548
|
} finally {
|
|
2710
3549
|
this._repairSweepRunning = false;
|
|
2711
|
-
if (
|
|
2712
|
-
!this.closed &&
|
|
2713
|
-
(this._repairSweepForceFreshPending ||
|
|
2714
|
-
this._repairSweepAddedPeersPending.size > 0)
|
|
2715
|
-
) {
|
|
3550
|
+
if (!this.closed && this._repairSweepPendingModes.size > 0) {
|
|
2716
3551
|
this._repairSweepRunning = true;
|
|
2717
3552
|
void this.runRepairSweep();
|
|
2718
3553
|
}
|
|
@@ -2725,9 +3560,89 @@ export class SharedLog<
|
|
|
2725
3560
|
entry: Entry<T> | ShallowEntry | EntryReplicated<R>;
|
|
2726
3561
|
leaders: Map<string, any>;
|
|
2727
3562
|
};
|
|
2728
|
-
}) {
|
|
2729
|
-
if (
|
|
2730
|
-
return
|
|
3563
|
+
}): Promise<boolean> {
|
|
3564
|
+
if (this.keep && (await this.keep(args.value.entry))) {
|
|
3565
|
+
return false;
|
|
3566
|
+
}
|
|
3567
|
+
void this.pruneDebouncedFn.add(args);
|
|
3568
|
+
return true;
|
|
3569
|
+
}
|
|
3570
|
+
|
|
3571
|
+
private async pruneJoinedEntriesNoLongerLed(entries: Entry<T>[]) {
|
|
3572
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3573
|
+
for (const entry of entries) {
|
|
3574
|
+
if (this.closed || this._pendingDeletes.has(entry.hash)) {
|
|
3575
|
+
continue;
|
|
3576
|
+
}
|
|
3577
|
+
|
|
3578
|
+
const leaders = await this.findLeadersFromEntry(
|
|
3579
|
+
entry,
|
|
3580
|
+
decodeReplicas(entry).getValue(this),
|
|
3581
|
+
{ roleAge: 0 },
|
|
3582
|
+
);
|
|
3583
|
+
|
|
3584
|
+
if (leaders.has(selfHash)) {
|
|
3585
|
+
this.pruneDebouncedFn.delete(entry.hash);
|
|
3586
|
+
continue;
|
|
3587
|
+
}
|
|
3588
|
+
|
|
3589
|
+
if (leaders.size === 0) {
|
|
3590
|
+
continue;
|
|
3591
|
+
}
|
|
3592
|
+
|
|
3593
|
+
await this.pruneDebouncedFnAddIfNotKeeping({
|
|
3594
|
+
key: entry.hash,
|
|
3595
|
+
value: { entry, leaders },
|
|
3596
|
+
});
|
|
3597
|
+
this.responseToPruneDebouncedFn.delete(entry.hash);
|
|
3598
|
+
}
|
|
3599
|
+
}
|
|
3600
|
+
|
|
3601
|
+
private async pruneIndexedEntriesNoLongerLed() {
|
|
3602
|
+
const selfHash = this.node.identity.publicKey.hashcode();
|
|
3603
|
+
const iterator = this.entryCoordinatesIndex.iterate({});
|
|
3604
|
+
let enqueuedPrune = false;
|
|
3605
|
+
try {
|
|
3606
|
+
while (!this.closed && !iterator.done()) {
|
|
3607
|
+
const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
|
|
3608
|
+
for (const entry of entries) {
|
|
3609
|
+
const entryReplicated = entry.value;
|
|
3610
|
+
if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
|
|
3611
|
+
continue;
|
|
3612
|
+
}
|
|
3613
|
+
|
|
3614
|
+
const leaders = await this.findLeaders(
|
|
3615
|
+
entryReplicated.coordinates,
|
|
3616
|
+
entryReplicated,
|
|
3617
|
+
{ roleAge: 0 },
|
|
3618
|
+
);
|
|
3619
|
+
|
|
3620
|
+
if (leaders.has(selfHash)) {
|
|
3621
|
+
this.pruneDebouncedFn.delete(entryReplicated.hash);
|
|
3622
|
+
await this._pendingDeletes
|
|
3623
|
+
.get(entryReplicated.hash)
|
|
3624
|
+
?.reject(new Error("Failed to delete, is leader again"));
|
|
3625
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
3626
|
+
continue;
|
|
3627
|
+
}
|
|
3628
|
+
|
|
3629
|
+
if (leaders.size === 0) {
|
|
3630
|
+
continue;
|
|
3631
|
+
}
|
|
3632
|
+
|
|
3633
|
+
enqueuedPrune =
|
|
3634
|
+
(await this.pruneDebouncedFnAddIfNotKeeping({
|
|
3635
|
+
key: entryReplicated.hash,
|
|
3636
|
+
value: { entry: entryReplicated, leaders },
|
|
3637
|
+
})) || enqueuedPrune;
|
|
3638
|
+
this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
|
|
3639
|
+
}
|
|
3640
|
+
}
|
|
3641
|
+
} finally {
|
|
3642
|
+
await iterator.close();
|
|
3643
|
+
}
|
|
3644
|
+
if (enqueuedPrune && !this.closed) {
|
|
3645
|
+
await this.pruneDebouncedFn.flush();
|
|
2731
3646
|
}
|
|
2732
3647
|
}
|
|
2733
3648
|
|
|
@@ -2904,6 +3819,7 @@ export class SharedLog<
|
|
|
2904
3819
|
} else {
|
|
2905
3820
|
await this._appendDeliverToReplicators(
|
|
2906
3821
|
result.entry,
|
|
3822
|
+
coordinates,
|
|
2907
3823
|
minReplicasValue,
|
|
2908
3824
|
leaders,
|
|
2909
3825
|
selfHash,
|
|
@@ -2913,13 +3829,14 @@ export class SharedLog<
|
|
|
2913
3829
|
}
|
|
2914
3830
|
}
|
|
2915
3831
|
|
|
2916
|
-
|
|
3832
|
+
const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
|
|
3833
|
+
if (!isLeader && !delayAdaptiveRebalance) {
|
|
2917
3834
|
this.pruneDebouncedFnAddIfNotKeeping({
|
|
2918
3835
|
key: result.entry.hash,
|
|
2919
3836
|
value: { entry: result.entry, leaders },
|
|
2920
3837
|
});
|
|
2921
3838
|
}
|
|
2922
|
-
if (!
|
|
3839
|
+
if (!delayAdaptiveRebalance) {
|
|
2923
3840
|
this.rebalanceParticipationDebounced?.call();
|
|
2924
3841
|
}
|
|
2925
3842
|
|
|
@@ -2961,8 +3878,21 @@ export class SharedLog<
|
|
|
2961
3878
|
this._repairRetryTimers = new Set();
|
|
2962
3879
|
this._recentRepairDispatch = new Map();
|
|
2963
3880
|
this._repairSweepRunning = false;
|
|
2964
|
-
this.
|
|
2965
|
-
this.
|
|
3881
|
+
this._repairSweepPendingModes = new Set();
|
|
3882
|
+
this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
|
|
3883
|
+
this._repairFrontierByMode = createRepairFrontierByMode() as Map<
|
|
3884
|
+
RepairDispatchMode,
|
|
3885
|
+
Map<string, Map<string, EntryReplicated<R>>>
|
|
3886
|
+
>;
|
|
3887
|
+
this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
|
|
3888
|
+
this._repairSweepOptimisticGidPeersPending = new Map();
|
|
3889
|
+
this._entryKnownPeers = new Map();
|
|
3890
|
+
this._joinAuthoritativeRepairTimersByDelay = new Map();
|
|
3891
|
+
this._joinAuthoritativeRepairPeersByDelay = new Map();
|
|
3892
|
+
this._assumeSyncedRepairSuppressedUntil = 0;
|
|
3893
|
+
this._appendBackfillTimer = undefined;
|
|
3894
|
+
this._appendBackfillPendingByTarget = new Map();
|
|
3895
|
+
this._repairMetrics = createRepairMetrics();
|
|
2966
3896
|
this._topicSubscribersCache = new Map();
|
|
2967
3897
|
this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
|
|
2968
3898
|
this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
|
|
@@ -3041,7 +3971,10 @@ export class SharedLog<
|
|
|
3041
3971
|
this.pendingMaturity = new Map();
|
|
3042
3972
|
|
|
3043
3973
|
const id = sha256Base64Sync(this.log.id);
|
|
3044
|
-
const storage = await
|
|
3974
|
+
const [storage, logScope] = await Promise.all([
|
|
3975
|
+
this.node.storage.sublevel(id),
|
|
3976
|
+
this.node.indexer.scope(id),
|
|
3977
|
+
]);
|
|
3045
3978
|
|
|
3046
3979
|
const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
|
|
3047
3980
|
const fanoutService = getSharedLogFanoutService(this.node.services);
|
|
@@ -3104,20 +4037,19 @@ export class SharedLog<
|
|
|
3104
4037
|
},
|
|
3105
4038
|
});
|
|
3106
4039
|
|
|
3107
|
-
|
|
3108
|
-
|
|
3109
|
-
|
|
3110
|
-
|
|
4040
|
+
const remoteBlocksStartPromise = this.remoteBlocks.start();
|
|
4041
|
+
const [replicationIndex, logIndex] = await Promise.all([
|
|
4042
|
+
logScope.scope("replication"),
|
|
4043
|
+
logScope.scope("log"),
|
|
4044
|
+
]);
|
|
3111
4045
|
this._replicationRangeIndex = await replicationIndex.init({
|
|
3112
4046
|
schema: this.indexableDomain.constructorRange,
|
|
3113
4047
|
});
|
|
3114
|
-
|
|
3115
4048
|
this._entryCoordinatesIndex = await replicationIndex.init({
|
|
3116
4049
|
schema: this.indexableDomain.constructorEntry,
|
|
3117
4050
|
});
|
|
3118
4051
|
|
|
3119
|
-
|
|
3120
|
-
|
|
4052
|
+
await remoteBlocksStartPromise;
|
|
3121
4053
|
const hasIndexedReplicationInfo =
|
|
3122
4054
|
(await this.replicationIndex.count({
|
|
3123
4055
|
query: [
|
|
@@ -3279,47 +4211,50 @@ export class SharedLog<
|
|
|
3279
4211
|
}
|
|
3280
4212
|
|
|
3281
4213
|
// Open for communcation
|
|
3282
|
-
await this.rpc.open({
|
|
3283
|
-
queryType: TransportMessage,
|
|
3284
|
-
responseType: TransportMessage,
|
|
3285
|
-
responseHandler: (query, context) => this.onMessage(query, context),
|
|
3286
|
-
topic: this.topic,
|
|
3287
|
-
});
|
|
3288
|
-
|
|
3289
4214
|
this._onSubscriptionFn =
|
|
3290
4215
|
this._onSubscriptionFn || this._onSubscription.bind(this);
|
|
3291
|
-
await this.node.services.pubsub.addEventListener(
|
|
3292
|
-
"subscribe",
|
|
3293
|
-
this._onSubscriptionFn,
|
|
3294
|
-
);
|
|
3295
|
-
|
|
3296
4216
|
this._onUnsubscriptionFn =
|
|
3297
4217
|
this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
|
|
3298
|
-
await
|
|
3299
|
-
|
|
3300
|
-
|
|
3301
|
-
|
|
3302
|
-
|
|
3303
|
-
|
|
3304
|
-
|
|
4218
|
+
await Promise.all([
|
|
4219
|
+
this.rpc.open({
|
|
4220
|
+
queryType: TransportMessage,
|
|
4221
|
+
responseType: TransportMessage,
|
|
4222
|
+
responseHandler: (query, context) => this.onMessage(query, context),
|
|
4223
|
+
topic: this.topic,
|
|
4224
|
+
}),
|
|
4225
|
+
this.node.services.pubsub.addEventListener(
|
|
4226
|
+
"subscribe",
|
|
4227
|
+
this._onSubscriptionFn,
|
|
4228
|
+
),
|
|
4229
|
+
this.node.services.pubsub.addEventListener(
|
|
4230
|
+
"unsubscribe",
|
|
4231
|
+
this._onUnsubscriptionFn,
|
|
4232
|
+
),
|
|
4233
|
+
]);
|
|
3305
4234
|
|
|
3306
|
-
|
|
3307
|
-
|
|
4235
|
+
const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
|
|
4236
|
+
// Mark previously-owned replication ranges as "new" only when they already exist.
|
|
4237
|
+
// Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
|
|
4238
|
+
const updateOwnedReplicationPromise = hasIndexedReplicationInfo
|
|
4239
|
+
? this.updateTimestampOfOwnedReplicationRanges()
|
|
4240
|
+
: Promise.resolve();
|
|
4241
|
+
await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
|
|
3308
4242
|
|
|
3309
4243
|
// if we had a previous session with replication info, and new replication info dictates that we unreplicate
|
|
3310
4244
|
// we should do that. Otherwise if options is a unreplication we dont need to do anything because
|
|
3311
4245
|
// we are already unreplicated (as we are just opening)
|
|
3312
4246
|
|
|
3313
|
-
|
|
4247
|
+
const isUnreplicationOptionsDefined = isUnreplicationOptions(
|
|
3314
4248
|
options?.replicate,
|
|
3315
4249
|
);
|
|
3316
4250
|
|
|
3317
4251
|
const canResumeReplication =
|
|
4252
|
+
hasIndexedReplicationInfo &&
|
|
3318
4253
|
(await isReplicationOptionsDependentOnPreviousState(
|
|
3319
4254
|
options?.replicate,
|
|
3320
4255
|
this.replicationIndex,
|
|
3321
4256
|
this.node.identity.publicKey,
|
|
3322
|
-
))
|
|
4257
|
+
));
|
|
3323
4258
|
|
|
3324
4259
|
if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
|
|
3325
4260
|
await this.replicate(options?.replicate, { checkDuplicates: true });
|
|
@@ -3372,25 +4307,26 @@ export class SharedLog<
|
|
|
3372
4307
|
|
|
3373
4308
|
async afterOpen(): Promise<void> {
|
|
3374
4309
|
await super.afterOpen();
|
|
4310
|
+
const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
|
|
3375
4311
|
|
|
3376
4312
|
// We do this here, because these calls requires this.closed == false
|
|
3377
|
-
|
|
3378
|
-
|
|
3379
|
-
|
|
3380
|
-
|
|
4313
|
+
void this.pruneOfflineReplicators()
|
|
4314
|
+
.then(() => {
|
|
4315
|
+
this._replicatorsReconciled = true;
|
|
4316
|
+
})
|
|
3381
4317
|
.catch((error) => {
|
|
3382
4318
|
if (isNotStartedError(error as Error)) {
|
|
3383
4319
|
return;
|
|
3384
4320
|
}
|
|
3385
|
-
|
|
3386
|
-
|
|
4321
|
+
logger.error(error);
|
|
4322
|
+
});
|
|
3387
4323
|
|
|
3388
|
-
|
|
4324
|
+
this.startReplicatorLivenessSweep();
|
|
3389
4325
|
|
|
3390
|
-
|
|
4326
|
+
await this.rebalanceParticipation();
|
|
3391
4327
|
|
|
3392
4328
|
// Take into account existing subscription
|
|
3393
|
-
(await
|
|
4329
|
+
(await existingSubscribersPromise)?.forEach((v) => {
|
|
3394
4330
|
if (v.equals(this.node.identity.publicKey)) {
|
|
3395
4331
|
return;
|
|
3396
4332
|
}
|
|
@@ -4021,8 +4957,28 @@ export class SharedLog<
|
|
|
4021
4957
|
this._repairRetryTimers.clear();
|
|
4022
4958
|
this._recentRepairDispatch.clear();
|
|
4023
4959
|
this._repairSweepRunning = false;
|
|
4024
|
-
this.
|
|
4025
|
-
this.
|
|
4960
|
+
this._repairSweepPendingModes.clear();
|
|
4961
|
+
for (const peers of this._repairSweepPendingPeersByMode.values()) {
|
|
4962
|
+
peers.clear();
|
|
4963
|
+
}
|
|
4964
|
+
this._repairSweepOptimisticGidPeersPending.clear();
|
|
4965
|
+
this._entryKnownPeers.clear();
|
|
4966
|
+
for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
|
|
4967
|
+
clearTimeout(timer);
|
|
4968
|
+
}
|
|
4969
|
+
this._joinAuthoritativeRepairTimersByDelay.clear();
|
|
4970
|
+
this._joinAuthoritativeRepairPeersByDelay.clear();
|
|
4971
|
+
for (const targets of this._repairFrontierByMode.values()) {
|
|
4972
|
+
targets.clear();
|
|
4973
|
+
}
|
|
4974
|
+
for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
|
|
4975
|
+
targets.clear();
|
|
4976
|
+
}
|
|
4977
|
+
if (this._appendBackfillTimer) {
|
|
4978
|
+
clearTimeout(this._appendBackfillTimer);
|
|
4979
|
+
this._appendBackfillTimer = undefined;
|
|
4980
|
+
}
|
|
4981
|
+
this._appendBackfillPendingByTarget.clear();
|
|
4026
4982
|
|
|
4027
4983
|
for (const [_k, v] of this._pendingDeletes) {
|
|
4028
4984
|
v.clear();
|
|
@@ -4196,6 +5152,8 @@ export class SharedLog<
|
|
|
4196
5152
|
*/
|
|
4197
5153
|
|
|
4198
5154
|
const { heads } = msg;
|
|
5155
|
+
const isRepairHint =
|
|
5156
|
+
(msg.reserved[0] & EXCHANGE_HEADS_REPAIR_HINT) !== 0;
|
|
4199
5157
|
|
|
4200
5158
|
logger.trace(
|
|
4201
5159
|
`${this.node.identity.publicKey.hashcode()}: Recieved heads: ${
|
|
@@ -4205,6 +5163,7 @@ export class SharedLog<
|
|
|
4205
5163
|
|
|
4206
5164
|
if (heads) {
|
|
4207
5165
|
const filteredHeads: EntryWithRefs<any>[] = [];
|
|
5166
|
+
const confirmedHashes = new Set<string>();
|
|
4208
5167
|
for (const head of heads) {
|
|
4209
5168
|
if (!(await this.log.has(head.entry.hash))) {
|
|
4210
5169
|
head.entry.init({
|
|
@@ -4213,10 +5172,22 @@ export class SharedLog<
|
|
|
4213
5172
|
encoding: this.log.encoding,
|
|
4214
5173
|
});
|
|
4215
5174
|
filteredHeads.push(head);
|
|
5175
|
+
} else {
|
|
5176
|
+
confirmedHashes.add(head.entry.hash);
|
|
4216
5177
|
}
|
|
4217
5178
|
}
|
|
5179
|
+
const fromIsSelf = context.from.equals(this.node.identity.publicKey);
|
|
5180
|
+
if (!fromIsSelf) {
|
|
5181
|
+
this.markEntriesKnownByPeer(
|
|
5182
|
+
heads.map((head) => head.entry.hash),
|
|
5183
|
+
context.from.hashcode(),
|
|
5184
|
+
);
|
|
5185
|
+
}
|
|
4218
5186
|
|
|
4219
5187
|
if (filteredHeads.length === 0) {
|
|
5188
|
+
if (confirmedHashes.size > 0 && !fromIsSelf) {
|
|
5189
|
+
await this.sendRepairConfirmation(context.from!, confirmedHashes);
|
|
5190
|
+
}
|
|
4220
5191
|
return;
|
|
4221
5192
|
}
|
|
4222
5193
|
const groupedByGid = await groupByGid(filteredHeads);
|
|
@@ -4303,8 +5274,15 @@ export class SharedLog<
|
|
|
4303
5274
|
|
|
4304
5275
|
let maybeDelete: EntryWithRefs<any>[][] | undefined;
|
|
4305
5276
|
let toMerge: Entry<any>[] = [];
|
|
5277
|
+
let toPersist: Entry<any>[] = [];
|
|
4306
5278
|
let toDelete: Entry<any>[] | undefined;
|
|
4307
|
-
|
|
5279
|
+
// Targeted repair is sent only to peers the sender currently believes
|
|
5280
|
+
// should store the entry. Accept it while local membership catches up;
|
|
5281
|
+
// the normal checked-prune path below can still remove it if this peer
|
|
5282
|
+
// truly no longer owns the entry.
|
|
5283
|
+
const acceptsTargetedRepair = isRepairHint && fromIsLeader;
|
|
5284
|
+
const keepAsLeader = isLeader || acceptsTargetedRepair;
|
|
5285
|
+
if (keepAsLeader) {
|
|
4308
5286
|
for (const entry of entries) {
|
|
4309
5287
|
this.pruneDebouncedFn.delete(entry.entry.hash);
|
|
4310
5288
|
this.removePruneRequestSent(entry.entry.hash);
|
|
@@ -4325,8 +5303,9 @@ export class SharedLog<
|
|
|
4325
5303
|
}
|
|
4326
5304
|
|
|
4327
5305
|
outer: for (const entry of entries) {
|
|
4328
|
-
if (
|
|
5306
|
+
if (keepAsLeader || (await this.keep?.(entry.entry))) {
|
|
4329
5307
|
toMerge.push(entry.entry);
|
|
5308
|
+
toPersist.push(entry.entry);
|
|
4330
5309
|
} else {
|
|
4331
5310
|
for (const ref of entry.gidRefrences) {
|
|
4332
5311
|
const map = await this.log.entryIndex.getHeads(ref).all();
|
|
@@ -4350,7 +5329,25 @@ export class SharedLog<
|
|
|
4350
5329
|
}
|
|
4351
5330
|
|
|
4352
5331
|
if (toMerge.length > 0) {
|
|
5332
|
+
this.markEntriesKnownByPeer(
|
|
5333
|
+
toMerge.map((entry) => entry.hash),
|
|
5334
|
+
context.from!.hashcode(),
|
|
5335
|
+
);
|
|
4353
5336
|
await this.log.join(toMerge);
|
|
5337
|
+
// Network joins bypass SharedLog.join(), but churn repair scans
|
|
5338
|
+
// the coordinate index to redistribute entries after membership changes.
|
|
5339
|
+
for (const entry of toPersist) {
|
|
5340
|
+
const replicas = decodeReplicas(entry).getValue(this);
|
|
5341
|
+
await this.findLeaders(
|
|
5342
|
+
await this.createCoordinates(entry, replicas),
|
|
5343
|
+
entry,
|
|
5344
|
+
{ roleAge: 0, persist: {} },
|
|
5345
|
+
);
|
|
5346
|
+
}
|
|
5347
|
+
for (const merged of toMerge) {
|
|
5348
|
+
confirmedHashes.add(merged.hash);
|
|
5349
|
+
}
|
|
5350
|
+
await this.pruneJoinedEntriesNoLongerLed(toMerge);
|
|
4354
5351
|
|
|
4355
5352
|
toDelete?.map((x) =>
|
|
4356
5353
|
// TODO types
|
|
@@ -4397,6 +5394,10 @@ export class SharedLog<
|
|
|
4397
5394
|
promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
|
|
4398
5395
|
}
|
|
4399
5396
|
await Promise.all(promises);
|
|
5397
|
+
if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
|
|
5398
|
+
this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
|
|
5399
|
+
await this.sendRepairConfirmation(context.from!, confirmedHashes);
|
|
5400
|
+
}
|
|
4400
5401
|
}
|
|
4401
5402
|
} else if (msg instanceof RequestIPrune) {
|
|
4402
5403
|
const hasAndIsLeader: string[] = [];
|
|
@@ -4404,6 +5405,7 @@ export class SharedLog<
|
|
|
4404
5405
|
|
|
4405
5406
|
for (const hash of msg.hashes) {
|
|
4406
5407
|
this.removePruneRequestSent(hash, from);
|
|
5408
|
+
this.removeEntriesKnownByPeer([hash], from);
|
|
4407
5409
|
|
|
4408
5410
|
// if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
|
|
4409
5411
|
// this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
|
|
@@ -4416,7 +5418,11 @@ export class SharedLog<
|
|
|
4416
5418
|
const indexedEntry = await this.log.entryIndex.getShallow(hash);
|
|
4417
5419
|
let isLeader = false;
|
|
4418
5420
|
|
|
4419
|
-
if (
|
|
5421
|
+
if (
|
|
5422
|
+
indexedEntry &&
|
|
5423
|
+
!this._pendingDeletes.has(hash) &&
|
|
5424
|
+
(await this.log.blocks.has(hash))
|
|
5425
|
+
) {
|
|
4420
5426
|
this.removePeerFromGidPeerHistory(
|
|
4421
5427
|
context.from!.hashcode(),
|
|
4422
5428
|
indexedEntry!.value.meta.gid,
|
|
@@ -4518,6 +5524,10 @@ export class SharedLog<
|
|
|
4518
5524
|
for (const hash of msg.hashes) {
|
|
4519
5525
|
this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
|
|
4520
5526
|
}
|
|
5527
|
+
} else if (msg instanceof ConfirmEntriesMessage) {
|
|
5528
|
+
this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
|
|
5529
|
+
this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
|
|
5530
|
+
return;
|
|
4521
5531
|
} else if (await this.syncronizer.onMessage(msg, context)) {
|
|
4522
5532
|
return; // the syncronizer has handled the message
|
|
4523
5533
|
} else if (msg instanceof BlocksMessage) {
|
|
@@ -4948,6 +5958,11 @@ export class SharedLog<
|
|
|
4948
5958
|
let messageToSend: AddedReplicationSegmentMessage | undefined = undefined;
|
|
4949
5959
|
|
|
4950
5960
|
if (assumeSynced) {
|
|
5961
|
+
// `assumeSynced` is an explicit contract that this join should trust the
|
|
5962
|
+
// supplied history and avoid initiating outbound repair while the local
|
|
5963
|
+
// replication ranges settle.
|
|
5964
|
+
this._assumeSyncedRepairSuppressedUntil =
|
|
5965
|
+
Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
|
|
4951
5966
|
for (const entry of entriesToReplicate) {
|
|
4952
5967
|
await seedAssumeSyncedPeerHistory(entry);
|
|
4953
5968
|
}
|
|
@@ -5033,9 +6048,14 @@ export class SharedLog<
|
|
|
5033
6048
|
clear();
|
|
5034
6049
|
// `waitForReplicator()` is typically used as a precondition before join/replicate
|
|
5035
6050
|
// flows. A replicator can become mature and enqueue a debounced rebalance
|
|
5036
|
-
// (`replicationChangeDebounceFn`) slightly later.
|
|
5037
|
-
//
|
|
5038
|
-
|
|
6051
|
+
// (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
|
|
6052
|
+
// make membership waits depend on all rebalance work finishing; callers that
|
|
6053
|
+
// need settled distribution already wait for that explicitly.
|
|
6054
|
+
this.replicationChangeDebounceFn?.flush?.().catch((error: any) => {
|
|
6055
|
+
if (!isNotStartedError(error)) {
|
|
6056
|
+
logger.error(error?.toString?.() ?? String(error));
|
|
6057
|
+
}
|
|
6058
|
+
});
|
|
5039
6059
|
deferred.resolve();
|
|
5040
6060
|
};
|
|
5041
6061
|
|
|
@@ -5580,6 +6600,18 @@ export class SharedLog<
|
|
|
5580
6600
|
}
|
|
5581
6601
|
}
|
|
5582
6602
|
}
|
|
6603
|
+
|
|
6604
|
+
if (!options?.candidates) {
|
|
6605
|
+
const fullReplicaLeaders = await this.findFullReplicaLeaders(
|
|
6606
|
+
cursors.length,
|
|
6607
|
+
roleAge,
|
|
6608
|
+
peerFilter,
|
|
6609
|
+
);
|
|
6610
|
+
if (fullReplicaLeaders) {
|
|
6611
|
+
return fullReplicaLeaders;
|
|
6612
|
+
}
|
|
6613
|
+
}
|
|
6614
|
+
|
|
5583
6615
|
return getSamples<R>(
|
|
5584
6616
|
cursors,
|
|
5585
6617
|
this.replicationIndex,
|
|
@@ -5592,6 +6624,50 @@ export class SharedLog<
|
|
|
5592
6624
|
);
|
|
5593
6625
|
}
|
|
5594
6626
|
|
|
6627
|
+
private async findFullReplicaLeaders(
|
|
6628
|
+
replicas: number,
|
|
6629
|
+
roleAge: number,
|
|
6630
|
+
peerFilter?: Set<string>,
|
|
6631
|
+
): Promise<Map<string, { intersecting: boolean }> | undefined> {
|
|
6632
|
+
const now = Date.now();
|
|
6633
|
+
const leaders = new Map<string, { intersecting: boolean }>();
|
|
6634
|
+
const includeStrict =
|
|
6635
|
+
this._logProperties?.strictFullReplicaFallback !== false;
|
|
6636
|
+
const iterator = this.replicationIndex.iterate(
|
|
6637
|
+
{},
|
|
6638
|
+
{ shape: { hash: true, timestamp: true, mode: true } },
|
|
6639
|
+
);
|
|
6640
|
+
|
|
6641
|
+
try {
|
|
6642
|
+
for (;;) {
|
|
6643
|
+
const batch = await iterator.next(64);
|
|
6644
|
+
if (batch.length === 0) {
|
|
6645
|
+
break;
|
|
6646
|
+
}
|
|
6647
|
+
for (const result of batch) {
|
|
6648
|
+
const range = result.value;
|
|
6649
|
+
if (peerFilter && !peerFilter.has(range.hash)) {
|
|
6650
|
+
continue;
|
|
6651
|
+
}
|
|
6652
|
+
if (!isMatured(range, now, roleAge)) {
|
|
6653
|
+
continue;
|
|
6654
|
+
}
|
|
6655
|
+
if (range.mode === ReplicationIntent.Strict && !includeStrict) {
|
|
6656
|
+
continue;
|
|
6657
|
+
}
|
|
6658
|
+
leaders.set(range.hash, { intersecting: true });
|
|
6659
|
+
if (leaders.size > replicas) {
|
|
6660
|
+
return undefined;
|
|
6661
|
+
}
|
|
6662
|
+
}
|
|
6663
|
+
}
|
|
6664
|
+
} finally {
|
|
6665
|
+
await iterator.close();
|
|
6666
|
+
}
|
|
6667
|
+
|
|
6668
|
+
return leaders.size > 0 ? leaders : undefined;
|
|
6669
|
+
}
|
|
6670
|
+
|
|
5595
6671
|
async findLeadersFromEntry(
|
|
5596
6672
|
entry: ShallowOrFullEntry<any> | EntryReplicated<R>,
|
|
5597
6673
|
replicas: number,
|
|
@@ -6231,16 +7307,33 @@ export class SharedLog<
|
|
|
6231
7307
|
|
|
6232
7308
|
const changed = false;
|
|
6233
7309
|
const addedPeers = new Set<string>();
|
|
7310
|
+
const authoritativeRepairPeers = new Set<string>();
|
|
6234
7311
|
const warmupPeers = new Set<string>();
|
|
7312
|
+
const churnRepairPeers = new Set<string>();
|
|
6235
7313
|
const hasSelfWarmupChange = changes.some(
|
|
6236
7314
|
(change) =>
|
|
6237
7315
|
change.range.hash === selfHash &&
|
|
6238
7316
|
(change.type === "added" || change.type === "replaced"),
|
|
6239
7317
|
);
|
|
7318
|
+
const hasSelfRangeRemoval = changes.some(
|
|
7319
|
+
(change) =>
|
|
7320
|
+
change.range.hash === selfHash &&
|
|
7321
|
+
(change.type === "removed" || change.type === "replaced"),
|
|
7322
|
+
);
|
|
6240
7323
|
for (const change of changes) {
|
|
7324
|
+
if (
|
|
7325
|
+
change.range.hash !== selfHash &&
|
|
7326
|
+
(change.type === "removed" || change.type === "replaced")
|
|
7327
|
+
) {
|
|
7328
|
+
this.removePeerFromEntryKnownPeers(change.range.hash);
|
|
7329
|
+
}
|
|
6241
7330
|
if (change.type === "added" || change.type === "replaced") {
|
|
6242
7331
|
const hash = change.range.hash;
|
|
6243
7332
|
if (hash !== selfHash) {
|
|
7333
|
+
// Existing peers can widen/shift ranges after the initial join. If we
|
|
7334
|
+
// only rescan on first-seen "added", late authoritative range updates can
|
|
7335
|
+
// leave historical backfill permanently partial under load.
|
|
7336
|
+
authoritativeRepairPeers.add(hash);
|
|
6244
7337
|
// Range updates can reassign entries to an existing peer shortly after it
|
|
6245
7338
|
// already received a subset. Avoid suppressing legitimate follow-up repair.
|
|
6246
7339
|
this._recentRepairDispatch.delete(hash);
|
|
@@ -6277,26 +7370,34 @@ export class SharedLog<
|
|
|
6277
7370
|
string,
|
|
6278
7371
|
Map<string, EntryReplicated<any>>
|
|
6279
7372
|
> = new Map();
|
|
6280
|
-
|
|
6281
|
-
|
|
6282
|
-
|
|
6283
|
-
|
|
6284
|
-
|
|
7373
|
+
const flushUncheckedDeliverTarget = (target: string) => {
|
|
7374
|
+
const entries = uncheckedDeliver.get(target);
|
|
7375
|
+
if (!entries || entries.size === 0) {
|
|
7376
|
+
return;
|
|
7377
|
+
}
|
|
6285
7378
|
const isWarmupTarget = warmupPeers.has(target);
|
|
6286
|
-
const
|
|
7379
|
+
const mode: RepairDispatchMode = forceFreshDelivery
|
|
7380
|
+
? "churn"
|
|
7381
|
+
: isWarmupTarget
|
|
7382
|
+
? "join-warmup"
|
|
7383
|
+
: "join-authoritative";
|
|
6287
7384
|
this.dispatchMaybeMissingEntries(target, entries, {
|
|
6288
|
-
bypassRecentDedupe,
|
|
6289
|
-
|
|
6290
|
-
|
|
6291
|
-
|
|
6292
|
-
|
|
7385
|
+
bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
|
|
7386
|
+
mode,
|
|
7387
|
+
retryScheduleMs:
|
|
7388
|
+
mode === "join-warmup"
|
|
7389
|
+
? JOIN_WARMUP_RETRY_SCHEDULE_MS
|
|
7390
|
+
: mode === "join-authoritative"
|
|
7391
|
+
? [0]
|
|
7392
|
+
: undefined,
|
|
6293
7393
|
});
|
|
6294
|
-
|
|
6295
|
-
|
|
7394
|
+
uncheckedDeliver.delete(target);
|
|
7395
|
+
};
|
|
6296
7396
|
const queueUncheckedDeliver = (
|
|
6297
7397
|
target: string,
|
|
6298
7398
|
entry: EntryReplicated<any>,
|
|
6299
7399
|
) => {
|
|
7400
|
+
churnRepairPeers.add(target);
|
|
6300
7401
|
let set = uncheckedDeliver.get(target);
|
|
6301
7402
|
if (!set) {
|
|
6302
7403
|
set = new Map();
|
|
@@ -6320,74 +7421,85 @@ export class SharedLog<
|
|
|
6320
7421
|
forceFresh: forceFreshDelivery || useJoinWarmupFastPath,
|
|
6321
7422
|
},
|
|
6322
7423
|
)) {
|
|
6323
|
-
|
|
6324
|
-
|
|
6325
|
-
}
|
|
6326
|
-
|
|
6327
|
-
if (useJoinWarmupFastPath) {
|
|
6328
|
-
let oldPeersSet: Set<string> | undefined;
|
|
6329
|
-
const gid = entryReplicated.gid;
|
|
6330
|
-
oldPeersSet = gidPeersHistorySnapshot.get(gid);
|
|
6331
|
-
if (!gidPeersHistorySnapshot.has(gid)) {
|
|
6332
|
-
const existing = this._gidPeersHistory.get(gid);
|
|
6333
|
-
oldPeersSet = existing ? new Set(existing) : undefined;
|
|
6334
|
-
gidPeersHistorySnapshot.set(gid, oldPeersSet);
|
|
7424
|
+
if (this.closed) {
|
|
7425
|
+
break;
|
|
6335
7426
|
}
|
|
6336
7427
|
|
|
6337
|
-
|
|
6338
|
-
|
|
6339
|
-
|
|
7428
|
+
if (useJoinWarmupFastPath) {
|
|
7429
|
+
let oldPeersSet: Set<string> | undefined;
|
|
7430
|
+
const gid = entryReplicated.gid;
|
|
7431
|
+
oldPeersSet = gidPeersHistorySnapshot.get(gid);
|
|
7432
|
+
if (!gidPeersHistorySnapshot.has(gid)) {
|
|
7433
|
+
const existing = this._gidPeersHistory.get(gid);
|
|
7434
|
+
oldPeersSet = existing ? new Set(existing) : undefined;
|
|
7435
|
+
gidPeersHistorySnapshot.set(gid, oldPeersSet);
|
|
7436
|
+
}
|
|
6340
7437
|
|
|
6341
|
-
|
|
6342
|
-
|
|
6343
|
-
candidatePeers.add(target);
|
|
6344
|
-
}
|
|
6345
|
-
if (oldPeersSet) {
|
|
6346
|
-
for (const oldPeer of oldPeersSet) {
|
|
6347
|
-
candidatePeers.add(oldPeer);
|
|
7438
|
+
for (const target of warmupPeers) {
|
|
7439
|
+
queueUncheckedDeliver(target, entryReplicated);
|
|
6348
7440
|
}
|
|
6349
|
-
}
|
|
6350
7441
|
|
|
6351
|
-
|
|
6352
|
-
|
|
6353
|
-
|
|
6354
|
-
|
|
6355
|
-
|
|
6356
|
-
|
|
6357
|
-
|
|
6358
|
-
|
|
6359
|
-
|
|
7442
|
+
const candidatePeers = new Set<string>([selfHash]);
|
|
7443
|
+
for (const target of warmupPeers) {
|
|
7444
|
+
candidatePeers.add(target);
|
|
7445
|
+
}
|
|
7446
|
+
if (oldPeersSet) {
|
|
7447
|
+
for (const oldPeer of oldPeersSet) {
|
|
7448
|
+
candidatePeers.add(oldPeer);
|
|
7449
|
+
}
|
|
7450
|
+
}
|
|
6360
7451
|
|
|
6361
|
-
|
|
6362
|
-
|
|
6363
|
-
|
|
6364
|
-
|
|
7452
|
+
const currentPeers = await this.findLeaders(
|
|
7453
|
+
entryReplicated.coordinates,
|
|
7454
|
+
entryReplicated,
|
|
7455
|
+
{
|
|
7456
|
+
roleAge: 0,
|
|
7457
|
+
candidates: candidatePeers,
|
|
7458
|
+
persist: false,
|
|
7459
|
+
},
|
|
7460
|
+
);
|
|
7461
|
+
|
|
7462
|
+
if (oldPeersSet) {
|
|
7463
|
+
for (const oldPeer of oldPeersSet) {
|
|
7464
|
+
if (!currentPeers.has(oldPeer)) {
|
|
7465
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
7466
|
+
}
|
|
6365
7467
|
}
|
|
6366
7468
|
}
|
|
6367
|
-
}
|
|
6368
7469
|
|
|
6369
|
-
|
|
6370
|
-
|
|
6371
|
-
|
|
6372
|
-
|
|
6373
|
-
|
|
7470
|
+
for (const [peer] of currentPeers) {
|
|
7471
|
+
if (warmupPeers.has(peer)) {
|
|
7472
|
+
this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
|
|
7473
|
+
}
|
|
7474
|
+
}
|
|
6374
7475
|
|
|
6375
|
-
|
|
6376
|
-
|
|
6377
|
-
|
|
6378
|
-
|
|
6379
|
-
|
|
7476
|
+
const authoritativePeers = [...currentPeers.keys()].filter(
|
|
7477
|
+
(peer) =>
|
|
7478
|
+
!warmupPeers.has(peer) &&
|
|
7479
|
+
!this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
|
|
7480
|
+
);
|
|
7481
|
+
this.addPeersToGidPeerHistory(
|
|
7482
|
+
entryReplicated.gid,
|
|
7483
|
+
authoritativePeers,
|
|
7484
|
+
true,
|
|
7485
|
+
);
|
|
6380
7486
|
|
|
6381
|
-
|
|
6382
|
-
|
|
6383
|
-
|
|
6384
|
-
|
|
6385
|
-
|
|
6386
|
-
|
|
6387
|
-
|
|
7487
|
+
if (!currentPeers.has(selfHash)) {
|
|
7488
|
+
this.pruneDebouncedFnAddIfNotKeeping({
|
|
7489
|
+
key: entryReplicated.hash,
|
|
7490
|
+
value: { entry: entryReplicated, leaders: currentPeers },
|
|
7491
|
+
});
|
|
7492
|
+
|
|
7493
|
+
this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
|
|
7494
|
+
} else {
|
|
7495
|
+
this.pruneDebouncedFn.delete(entryReplicated.hash);
|
|
7496
|
+
await this._pendingDeletes
|
|
7497
|
+
.get(entryReplicated.hash)
|
|
7498
|
+
?.reject(new Error("Failed to delete, is leader again"));
|
|
7499
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
7500
|
+
}
|
|
7501
|
+
continue;
|
|
6388
7502
|
}
|
|
6389
|
-
continue;
|
|
6390
|
-
}
|
|
6391
7503
|
|
|
6392
7504
|
let oldPeersSet: Set<string> | undefined;
|
|
6393
7505
|
const gid = entryReplicated.gid;
|
|
@@ -6421,19 +7533,30 @@ export class SharedLog<
|
|
|
6421
7533
|
}
|
|
6422
7534
|
}
|
|
6423
7535
|
|
|
6424
|
-
|
|
6425
|
-
|
|
6426
|
-
|
|
6427
|
-
|
|
7536
|
+
if (oldPeersSet) {
|
|
7537
|
+
for (const oldPeer of oldPeersSet) {
|
|
7538
|
+
if (!currentPeers.has(oldPeer)) {
|
|
7539
|
+
this.removePruneRequestSent(entryReplicated.hash);
|
|
7540
|
+
}
|
|
6428
7541
|
}
|
|
6429
7542
|
}
|
|
6430
|
-
}
|
|
6431
7543
|
|
|
6432
|
-
|
|
6433
|
-
|
|
6434
|
-
|
|
6435
|
-
|
|
6436
|
-
|
|
7544
|
+
for (const [peer] of currentPeers) {
|
|
7545
|
+
if (addedPeers.has(peer)) {
|
|
7546
|
+
this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
|
|
7547
|
+
}
|
|
7548
|
+
}
|
|
7549
|
+
|
|
7550
|
+
const authoritativePeers = [...currentPeers.keys()].filter(
|
|
7551
|
+
(peer) =>
|
|
7552
|
+
!addedPeers.has(peer) &&
|
|
7553
|
+
!this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
|
|
7554
|
+
);
|
|
7555
|
+
this.addPeersToGidPeerHistory(
|
|
7556
|
+
entryReplicated.gid,
|
|
7557
|
+
authoritativePeers,
|
|
7558
|
+
true,
|
|
7559
|
+
);
|
|
6437
7560
|
|
|
6438
7561
|
if (!isLeader) {
|
|
6439
7562
|
this.pruneDebouncedFnAddIfNotKeeping({
|
|
@@ -6452,9 +7575,18 @@ export class SharedLog<
|
|
|
6452
7575
|
}
|
|
6453
7576
|
}
|
|
6454
7577
|
|
|
7578
|
+
if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
|
|
7579
|
+
await this.pruneIndexedEntriesNoLongerLed();
|
|
7580
|
+
}
|
|
7581
|
+
|
|
6455
7582
|
if (forceFreshDelivery) {
|
|
6456
|
-
//
|
|
6457
|
-
|
|
7583
|
+
// Pure leave/shrink churn can have zero `addedPeers`, but the peers that
|
|
7584
|
+
// received redistributed entries still need a follow-up repair pass if the
|
|
7585
|
+
// immediate maybe-sync misses one entry.
|
|
7586
|
+
this.scheduleRepairSweep({
|
|
7587
|
+
mode: "churn",
|
|
7588
|
+
peers: churnRepairPeers,
|
|
7589
|
+
});
|
|
6458
7590
|
} else if (useJoinWarmupFastPath) {
|
|
6459
7591
|
// Pure join warmup uses the cheap immediate maybe-missing dispatch above,
|
|
6460
7592
|
// then defers the authoritative sweep so it does not compete with the
|
|
@@ -6466,19 +7598,23 @@ export class SharedLog<
|
|
|
6466
7598
|
return;
|
|
6467
7599
|
}
|
|
6468
7600
|
this.scheduleRepairSweep({
|
|
6469
|
-
|
|
6470
|
-
|
|
7601
|
+
mode: "join-warmup",
|
|
7602
|
+
peers,
|
|
6471
7603
|
});
|
|
6472
7604
|
}, 250);
|
|
6473
7605
|
timer.unref?.();
|
|
6474
7606
|
this._repairRetryTimers.add(timer);
|
|
6475
|
-
} else if (
|
|
7607
|
+
} else if (authoritativeRepairPeers.size > 0) {
|
|
6476
7608
|
this.scheduleRepairSweep({
|
|
6477
|
-
|
|
6478
|
-
|
|
7609
|
+
mode: "join-authoritative",
|
|
7610
|
+
peers: authoritativeRepairPeers,
|
|
6479
7611
|
});
|
|
6480
7612
|
}
|
|
6481
7613
|
|
|
7614
|
+
if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
|
|
7615
|
+
this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
|
|
7616
|
+
}
|
|
7617
|
+
|
|
6482
7618
|
for (const target of [...uncheckedDeliver.keys()]) {
|
|
6483
7619
|
flushUncheckedDeliverTarget(target);
|
|
6484
7620
|
}
|
|
@@ -6585,6 +7721,13 @@ export class SharedLog<
|
|
|
6585
7721
|
return; // not allowed to replicate
|
|
6586
7722
|
}
|
|
6587
7723
|
|
|
7724
|
+
if (
|
|
7725
|
+
this.replicationController.maxMemoryLimit != null &&
|
|
7726
|
+
usedMemory > this.replicationController.maxMemoryLimit
|
|
7727
|
+
) {
|
|
7728
|
+
await this.pruneIndexedEntriesNoLongerLed();
|
|
7729
|
+
}
|
|
7730
|
+
|
|
6588
7731
|
const peersSize = (await peers.getSize()) || 1;
|
|
6589
7732
|
const totalParticipation = await this.calculateTotalParticipation();
|
|
6590
7733
|
|