@peerbit/shared-log 13.1.0 → 13.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -168,7 +168,7 @@ import type {
168
168
  Syncronizer,
169
169
  } from "./sync/index.js";
170
170
  import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
171
- import { SimpleSyncronizer } from "./sync/simple.js";
171
+ import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
172
172
  import { groupByGid } from "./utils.js";
173
173
 
174
174
  const toLocalPublicSignKey = (
@@ -468,6 +468,7 @@ export type SharedLogOptions<
468
468
  waitForReplicatorRequestMaxAttempts?: number;
469
469
  waitForPruneDelay?: number;
470
470
  distributionDebounceTime?: number;
471
+ strictFullReplicaFallback?: boolean;
471
472
  compatibility?: number;
472
473
  domain?: ReplicationDomainConstructor<D>;
473
474
  eagerBlocks?: boolean | { cacheSize?: number };
@@ -513,10 +514,143 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
513
514
  // Churn/join repair can race with pruning and transient missed sync requests under
514
515
  // heavy event-loop load. Keep retries alive with a longer tail so reassigned
515
516
  // entries are retried after short bursts and slower recovery windows.
516
- const FORCE_FRESH_RETRY_SCHEDULE_MS = [
517
+ const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
517
518
  0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
518
519
  ];
519
- const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000, 15_000];
520
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
521
+ 0,
522
+ 1_000,
523
+ 3_000,
524
+ 7_000,
525
+ 15_000,
526
+ 30_000,
527
+ 60_000,
528
+ ];
529
+ const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
530
+ 0,
531
+ 1_000,
532
+ 3_000,
533
+ 7_000,
534
+ 15_000,
535
+ 30_000,
536
+ 60_000,
537
+ ];
538
+ const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
539
+ const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
540
+ const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
541
+ JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
542
+ 7_000,
543
+ 15_000,
544
+ 30_000,
545
+ ];
546
+ const APPEND_BACKFILL_DELAY_MS = 500;
547
+ const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
548
+ const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
549
+
550
+ type RepairDispatchMode =
551
+ | "join-warmup"
552
+ | "join-authoritative"
553
+ | "append-backfill"
554
+ | "churn";
555
+ type RepairTransportMode = "rateless" | "simple";
556
+ type RepairMetricBucket = {
557
+ dispatches: number;
558
+ entries: number;
559
+ ratelessFirstPasses: number;
560
+ simpleFallbackPasses: number;
561
+ };
562
+ type RepairMetrics = Record<RepairDispatchMode, RepairMetricBucket>;
563
+
564
+ const REPAIR_DISPATCH_MODES: RepairDispatchMode[] = [
565
+ "join-warmup",
566
+ "join-authoritative",
567
+ "append-backfill",
568
+ "churn",
569
+ ];
570
+
571
+ const createRepairMetricBucket = (): RepairMetricBucket => ({
572
+ dispatches: 0,
573
+ entries: 0,
574
+ ratelessFirstPasses: 0,
575
+ simpleFallbackPasses: 0,
576
+ });
577
+
578
+ const createRepairMetrics = (): RepairMetrics => ({
579
+ "join-warmup": createRepairMetricBucket(),
580
+ "join-authoritative": createRepairMetricBucket(),
581
+ "append-backfill": createRepairMetricBucket(),
582
+ churn: createRepairMetricBucket(),
583
+ });
584
+
585
+ const createRepairPendingPeersByMode = () =>
586
+ new Map<RepairDispatchMode, Set<string>>(
587
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set<string>()]),
588
+ );
589
+
590
+ const cloneRepairPendingPeersByMode = (
591
+ pending: Map<RepairDispatchMode, Set<string>>,
592
+ ) =>
593
+ new Map<RepairDispatchMode, Set<string>>(
594
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]),
595
+ );
596
+
597
+ const createRepairFrontierByMode = () =>
598
+ new Map<
599
+ RepairDispatchMode,
600
+ Map<string, Map<string, EntryReplicated<any>>>
601
+ >(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
602
+
603
+ const createRepairActiveTargetsByMode = () =>
604
+ new Map<RepairDispatchMode, Set<string>>(
605
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]),
606
+ );
607
+
608
+ const getRepairRetrySchedule = (mode: RepairDispatchMode) => {
609
+ switch (mode) {
610
+ case "join-warmup":
611
+ return JOIN_WARMUP_RETRY_SCHEDULE_MS;
612
+ case "join-authoritative":
613
+ return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
614
+ case "append-backfill":
615
+ return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
616
+ case "churn":
617
+ return CHURN_REPAIR_RETRY_SCHEDULE_MS;
618
+ }
619
+ };
620
+
621
+ const resolveRepairRetrySchedule = (
622
+ mode: RepairDispatchMode,
623
+ override?: number[],
624
+ trackedFrontier = false,
625
+ ) => {
626
+ const fallback = getRepairRetrySchedule(mode);
627
+ if (!override || override.length === 0) {
628
+ return fallback;
629
+ }
630
+ if (
631
+ trackedFrontier &&
632
+ override.length === 1 &&
633
+ override[0] === 0 &&
634
+ fallback.length > 1
635
+ ) {
636
+ // A tracked frontier with only an immediate retry would otherwise stay on
637
+ // attempt 0 forever, which means rateless-only retries and no sparse-tail
638
+ // simple fallback. Keep the immediate seed, then continue with the normal
639
+ // tracked repair schedule.
640
+ return [0, ...fallback.slice(1)];
641
+ }
642
+ return override;
643
+ };
644
+
645
+ const getRepairTransportForAttempt = (
646
+ mode: RepairDispatchMode,
647
+ attemptIndex: number,
648
+ ): RepairTransportMode => {
649
+ if (mode === "churn") {
650
+ return "simple";
651
+ }
652
+ return attemptIndex === 0 ? "rateless" : "simple";
653
+ };
520
654
 
521
655
  const toPositiveInteger = (
522
656
  value: number | undefined,
@@ -751,8 +885,24 @@ export class SharedLog<
751
885
  private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
752
886
  private _recentRepairDispatch!: Map<string, Map<string, number>>;
753
887
  private _repairSweepRunning!: boolean;
754
- private _repairSweepForceFreshPending!: boolean;
755
- private _repairSweepAddedPeersPending!: Set<string>;
888
+ private _repairSweepPendingModes!: Set<RepairDispatchMode>;
889
+ private _repairSweepPendingPeersByMode!: Map<RepairDispatchMode, Set<string>>;
890
+ private _repairFrontierByMode!: Map<
891
+ RepairDispatchMode,
892
+ Map<string, Map<string, EntryReplicated<R>>>
893
+ >;
894
+ private _repairFrontierActiveTargetsByMode!: Map<RepairDispatchMode, Set<string>>;
895
+ private _repairSweepOptimisticGidPeersPending!: Map<string, Map<string, number>>;
896
+ private _entryKnownPeers!: Map<string, Set<string>>;
897
+ private _joinAuthoritativeRepairTimersByDelay!: Map<
898
+ number,
899
+ ReturnType<typeof setTimeout>
900
+ >;
901
+ private _joinAuthoritativeRepairPeersByDelay!: Map<number, Set<string>>;
902
+ private _assumeSyncedRepairSuppressedUntil!: number;
903
+ private _appendBackfillTimer?: ReturnType<typeof setTimeout>;
904
+ private _appendBackfillPendingByTarget!: Map<string, Map<string, EntryReplicated<R>>>;
905
+ private _repairMetrics!: RepairMetrics;
756
906
  private _topicSubscribersCache!: Map<
757
907
  string,
758
908
  { expiresAt: number; keys: PublicSignKey[] }
@@ -1187,6 +1337,7 @@ export class SharedLog<
1187
1337
 
1188
1338
  private async _appendDeliverToReplicators(
1189
1339
  entry: Entry<T>,
1340
+ coordinates: NumberFromType<R>[],
1190
1341
  minReplicasValue: number,
1191
1342
  leaders: Map<string, any>,
1192
1343
  selfHash: string,
@@ -1204,11 +1355,35 @@ export class SharedLog<
1204
1355
  ? { timeoutMs: delivery.timeout, signal: delivery.signal }
1205
1356
  : undefined;
1206
1357
 
1358
+ const fullReplicaDeliveryCandidates =
1359
+ await this.getFullReplicaRepairCandidates(undefined, {
1360
+ includeSubscribers: false,
1361
+ });
1362
+ if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
1363
+ for (const peer of fullReplicaDeliveryCandidates) {
1364
+ if (!leaders.has(peer)) {
1365
+ leaders.set(peer, { intersecting: true });
1366
+ }
1367
+ }
1368
+ }
1369
+
1370
+ const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
1371
+ entry,
1372
+ coordinates,
1373
+ leaders: leaders as Map<string, { intersecting: boolean }>,
1374
+ replicas: minReplicasValue,
1375
+ });
1207
1376
  for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
1208
1377
  await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
1209
- const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
1378
+ const authoritativeRecipients = new Set(leaders.keys());
1379
+ const leadersForDelivery = delivery
1380
+ ? new Set(authoritativeRecipients)
1381
+ : undefined;
1210
1382
 
1211
- const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
1383
+ // Outbound append delivery only tells us who we intend to send to, not who has
1384
+ // actually stored the entry. Keep this recipient set local so later repair
1385
+ // sweeps can still backfill peers that missed the initial delivery.
1386
+ const set = new Set(leaders.keys());
1212
1387
  let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1213
1388
  const allowSubscriberFallback =
1214
1389
  this.syncronizer instanceof SimpleSyncronizer ||
@@ -1239,6 +1414,17 @@ export class SharedLog<
1239
1414
  }
1240
1415
 
1241
1416
  if (!delivery) {
1417
+ for (const peer of authoritativeRecipients) {
1418
+ if (peer === selfHash) {
1419
+ continue;
1420
+ }
1421
+ // Default live append delivery is still optimistic. If one remote misses
1422
+ // the initial heads exchange and the caller did not opt into explicit
1423
+ // delivery acks, we still need a targeted backfill source of truth for the
1424
+ // authoritative recipients or one entry can get stuck at 2/3 replicas
1425
+ // forever. Best-effort fallback subscribers are not repair-worthy.
1426
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
1427
+ }
1242
1428
  this.rpc
1243
1429
  .send(message, {
1244
1430
  mode: isLeader
@@ -1268,6 +1454,7 @@ export class SharedLog<
1268
1454
 
1269
1455
  const ackTo: string[] = [];
1270
1456
  let silentTo: string[] | undefined;
1457
+ const repairTargets = new Set<string>();
1271
1458
  // Default delivery semantics: require enough remote ACKs to reach the requested
1272
1459
  // replication degree (local append counts as 1).
1273
1460
  const defaultMinAcks = Math.max(0, minReplicasValue - 1);
@@ -1279,6 +1466,9 @@ export class SharedLog<
1279
1466
  );
1280
1467
 
1281
1468
  for (const peer of orderedRemoteRecipients) {
1469
+ if (authoritativeRecipients.has(peer)) {
1470
+ repairTargets.add(peer);
1471
+ }
1282
1472
  if (ackTo.length < ackLimit) {
1283
1473
  ackTo.push(peer);
1284
1474
  } else {
@@ -1317,6 +1507,12 @@ export class SharedLog<
1317
1507
  })
1318
1508
  .catch((error) => logger.error(error));
1319
1509
  }
1510
+ for (const peer of repairTargets) {
1511
+ // Direct append delivery is intentionally optimistic. Queue one delayed,
1512
+ // batched maybe-sync pass for the intended recipients so stable 3-peer
1513
+ // append workloads do not depend on perfect first-try delivery ordering.
1514
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
1515
+ }
1320
1516
  }
1321
1517
 
1322
1518
  if (pending.length > 0) {
@@ -2016,6 +2212,7 @@ export class SharedLog<
2016
2212
  // Keep local sync/prune state consistent even when a peer disappears
2017
2213
  // through replication-info updates without a topic unsubscribe event.
2018
2214
  this.removePeerFromGidPeerHistory(keyHash);
2215
+ this.removeRepairFrontierTarget(keyHash);
2019
2216
  this._recentRepairDispatch.delete(keyHash);
2020
2217
  if (!isMe) {
2021
2218
  this.syncronizer.onPeerDisconnected(keyHash);
@@ -2483,6 +2680,7 @@ export class SharedLog<
2483
2680
  for (const key of this._gidPeersHistory.keys()) {
2484
2681
  this.removePeerFromGidPeerHistory(publicKeyHash, key);
2485
2682
  }
2683
+ this.removePeerFromEntryKnownPeers(publicKeyHash);
2486
2684
  }
2487
2685
  }
2488
2686
 
@@ -2507,19 +2705,448 @@ export class SharedLog<
2507
2705
  return set;
2508
2706
  }
2509
2707
 
2708
+ private markEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
2709
+ for (const hash of hashes) {
2710
+ let peers = this._entryKnownPeers.get(hash);
2711
+ if (!peers) {
2712
+ peers = new Set();
2713
+ this._entryKnownPeers.set(hash, peers);
2714
+ }
2715
+ peers.add(peer);
2716
+ }
2717
+ }
2718
+
2719
+ private removeEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
2720
+ for (const hash of hashes) {
2721
+ const peers = this._entryKnownPeers.get(hash);
2722
+ if (!peers) {
2723
+ continue;
2724
+ }
2725
+ peers.delete(peer);
2726
+ if (peers.size === 0) {
2727
+ this._entryKnownPeers.delete(hash);
2728
+ }
2729
+ }
2730
+ }
2731
+
2732
+ private removePeerFromEntryKnownPeers(peer: string) {
2733
+ for (const [hash, peers] of this._entryKnownPeers) {
2734
+ peers.delete(peer);
2735
+ if (peers.size === 0) {
2736
+ this._entryKnownPeers.delete(hash);
2737
+ }
2738
+ }
2739
+ }
2740
+
2741
+ private isEntryKnownByPeer(hash: string, peer: string) {
2742
+ return this._entryKnownPeers.get(hash)?.has(peer) === true;
2743
+ }
2744
+
2745
+ private markRepairSweepOptimisticPeer(gid: string, peer: string) {
2746
+ let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
2747
+ if (!peers) {
2748
+ peers = new Map();
2749
+ this._repairSweepOptimisticGidPeersPending.set(gid, peers);
2750
+ }
2751
+ peers.set(peer, (peers.get(peer) || 0) + 1);
2752
+ }
2753
+
2754
+ private hasPendingRepairSweepOptimisticPeer(gid: string, peer: string) {
2755
+ return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
2756
+ }
2757
+
2758
+ private createEntryReplicatedForRepair(properties: {
2759
+ entry: Entry<T>;
2760
+ coordinates: NumberFromType<R>[];
2761
+ leaders: Map<string, { intersecting: boolean }>;
2762
+ replicas: number;
2763
+ }) {
2764
+ const assignedToRangeBoundary = shouldAssignToRangeBoundary(
2765
+ properties.leaders,
2766
+ properties.replicas,
2767
+ );
2768
+ const cidObject = cidifyString(properties.entry.hash);
2769
+ const hashNumber = this.indexableDomain.numbers.bytesToNumber(
2770
+ cidObject.multihash.digest,
2771
+ );
2772
+ return new this.indexableDomain.constructorEntry({
2773
+ assignedToRangeBoundary,
2774
+ coordinates: properties.coordinates,
2775
+ meta: properties.entry.meta,
2776
+ hash: properties.entry.hash,
2777
+ hashNumber,
2778
+ });
2779
+ }
2780
+
2781
+ private isAssumeSyncedRepairSuppressed() {
2782
+ return this._assumeSyncedRepairSuppressedUntil > Date.now();
2783
+ }
2784
+
2785
+ private isFrontierTrackedRepairMode(mode: RepairDispatchMode) {
2786
+ return mode !== "join-warmup";
2787
+ }
2788
+
2789
+ private async sleepTracked(delayMs: number) {
2790
+ if (delayMs <= 0) {
2791
+ return;
2792
+ }
2793
+ await new Promise<void>((resolve) => {
2794
+ const timer = setTimeout(() => {
2795
+ this._repairRetryTimers.delete(timer);
2796
+ resolve();
2797
+ }, delayMs);
2798
+ timer.unref?.();
2799
+ this._repairRetryTimers.add(timer);
2800
+ });
2801
+ }
2802
+
2803
+ private queueRepairFrontierEntries(
2804
+ mode: RepairDispatchMode,
2805
+ target: string,
2806
+ entries: Map<string, EntryReplicated<R>>,
2807
+ ) {
2808
+ let targets = this._repairFrontierByMode.get(mode);
2809
+ if (!targets) {
2810
+ targets = new Map();
2811
+ this._repairFrontierByMode.set(mode, targets);
2812
+ }
2813
+ let pending = targets.get(target);
2814
+ if (!pending) {
2815
+ pending = new Map();
2816
+ targets.set(target, pending);
2817
+ }
2818
+ for (const [hash, entry] of entries) {
2819
+ pending.set(hash, entry);
2820
+ }
2821
+ }
2822
+
2823
+ private clearRepairFrontierHashes(target: string, hashes: Iterable<string>) {
2824
+ const hashList = [...hashes];
2825
+ if (hashList.length === 0) {
2826
+ return;
2827
+ }
2828
+ for (const mode of REPAIR_DISPATCH_MODES) {
2829
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
2830
+ if (!pending) {
2831
+ continue;
2832
+ }
2833
+ for (const hash of hashList) {
2834
+ pending.delete(hash);
2835
+ }
2836
+ if (pending.size === 0) {
2837
+ this._repairFrontierByMode.get(mode)?.delete(target);
2838
+ }
2839
+ }
2840
+ }
2841
+
2842
+ private async getFullReplicaRepairCandidates(
2843
+ extraPeers?: Iterable<string>,
2844
+ options?: { includeSubscribers?: boolean },
2845
+ ) {
2846
+ const candidates = new Set<string>([
2847
+ this.node.identity.publicKey.hashcode(),
2848
+ ]);
2849
+ try {
2850
+ for (const peer of await this.getReplicators()) {
2851
+ candidates.add(peer);
2852
+ }
2853
+ } catch {
2854
+ for (const peer of this.uniqueReplicators) {
2855
+ candidates.add(peer);
2856
+ }
2857
+ }
2858
+ for (const peer of extraPeers ?? []) {
2859
+ candidates.add(peer);
2860
+ }
2861
+ if (options?.includeSubscribers !== false) {
2862
+ try {
2863
+ for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
2864
+ candidates.add(subscriber.hashcode());
2865
+ }
2866
+ } catch {
2867
+ // Best-effort only; explicit repair peers still keep the path safe.
2868
+ }
2869
+ }
2870
+ return candidates;
2871
+ }
2872
+
2873
+ private removeRepairFrontierTarget(target: string) {
2874
+ for (const mode of REPAIR_DISPATCH_MODES) {
2875
+ this._repairFrontierByMode.get(mode)?.delete(target);
2876
+ this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
2877
+ }
2878
+ }
2879
+
2880
+ private async sendRepairConfirmation(
2881
+ target: PublicSignKey,
2882
+ hashes: Iterable<string>,
2883
+ ) {
2884
+ const uniqueHashes = [...new Set(hashes)];
2885
+ for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
2886
+ const chunk = uniqueHashes.slice(
2887
+ i,
2888
+ i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE,
2889
+ );
2890
+ await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
2891
+ priority: 1,
2892
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2893
+ });
2894
+ }
2895
+ }
2896
+
2897
+ private async pushRepairEntries(
2898
+ target: string,
2899
+ entries: Map<string, EntryReplicated<R>>,
2900
+ ) {
2901
+ for await (const message of createExchangeHeadsMessages(
2902
+ this.log,
2903
+ [...entries.keys()],
2904
+ )) {
2905
+ await this.rpc.send(message, {
2906
+ priority: 1,
2907
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2908
+ });
2909
+ }
2910
+ }
2911
+
2912
+ private async sendRepairEntriesWithTransport(
2913
+ target: string,
2914
+ entries: Map<string, EntryReplicated<R>>,
2915
+ transport: RepairTransportMode,
2916
+ options?: { bypassKnownPeers?: boolean },
2917
+ ) {
2918
+ const unknownEntries = new Map<string, EntryReplicated<R>>();
2919
+ const knownHashes: string[] = [];
2920
+ for (const [hash, entry] of entries) {
2921
+ if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
2922
+ unknownEntries.set(hash, entry);
2923
+ } else {
2924
+ knownHashes.push(hash);
2925
+ }
2926
+ }
2927
+ this.clearRepairFrontierHashes(target, knownHashes);
2928
+ if (unknownEntries.size === 0) {
2929
+ return;
2930
+ }
2931
+ if (transport === "simple") {
2932
+ // Fallback repair should not depend on the target completing the
2933
+ // RequestMaybeSync -> ResponseMaybeSync round trip.
2934
+ await this.pushRepairEntries(target, unknownEntries);
2935
+ return;
2936
+ }
2937
+
2938
+ await this.syncronizer.onMaybeMissingEntries({
2939
+ entries: unknownEntries,
2940
+ targets: [target],
2941
+ });
2942
+ }
2943
+
2944
+ private async sendMaybeMissingEntriesNow(
2945
+ target: string,
2946
+ entries: Map<string, EntryReplicated<R>>,
2947
+ options: {
2948
+ mode: RepairDispatchMode;
2949
+ transport: RepairTransportMode;
2950
+ bypassRecentDedupe?: boolean;
2951
+ },
2952
+ ) {
2953
+ if (entries.size === 0) {
2954
+ return;
2955
+ }
2956
+
2957
+ const now = Date.now();
2958
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2959
+ if (!recentlyDispatchedByHash) {
2960
+ recentlyDispatchedByHash = new Map();
2961
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2962
+ }
2963
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2964
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2965
+ recentlyDispatchedByHash.delete(hash);
2966
+ }
2967
+ }
2968
+
2969
+ const filteredEntries =
2970
+ options.bypassRecentDedupe === true
2971
+ ? new Map(entries)
2972
+ : new Map<string, EntryReplicated<any>>();
2973
+ if (options.bypassRecentDedupe !== true) {
2974
+ for (const [hash, entry] of entries) {
2975
+ const prev = recentlyDispatchedByHash.get(hash);
2976
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2977
+ continue;
2978
+ }
2979
+ recentlyDispatchedByHash.set(hash, now);
2980
+ filteredEntries.set(hash, entry);
2981
+ }
2982
+ } else {
2983
+ for (const hash of entries.keys()) {
2984
+ recentlyDispatchedByHash.set(hash, now);
2985
+ }
2986
+ }
2987
+ if (filteredEntries.size === 0) {
2988
+ return;
2989
+ }
2990
+
2991
+ const bucket = this._repairMetrics[options.mode];
2992
+ bucket.dispatches += 1;
2993
+ bucket.entries += filteredEntries.size;
2994
+ if (options.transport === "simple") {
2995
+ bucket.simpleFallbackPasses += 1;
2996
+ } else {
2997
+ bucket.ratelessFirstPasses += 1;
2998
+ }
2999
+
3000
+ await Promise.resolve(
3001
+ this.sendRepairEntriesWithTransport(
3002
+ target,
3003
+ filteredEntries,
3004
+ options.transport,
3005
+ { bypassKnownPeers: options.mode === "churn" },
3006
+ ),
3007
+ ).catch((error: any) => logger.error(error));
3008
+ }
3009
+
3010
+ private ensureRepairFrontierRunner(
3011
+ mode: RepairDispatchMode,
3012
+ target: string,
3013
+ retryScheduleMs?: number[],
3014
+ ) {
3015
+ const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
3016
+ if (!activeTargets || activeTargets.has(target) || this.closed) {
3017
+ return;
3018
+ }
3019
+ activeTargets.add(target);
3020
+ const retrySchedule = resolveRepairRetrySchedule(
3021
+ mode,
3022
+ retryScheduleMs,
3023
+ this.isFrontierTrackedRepairMode(mode),
3024
+ );
3025
+ const steadyStateDelay =
3026
+ retrySchedule.length > 1
3027
+ ? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
3028
+ : Math.max(retrySchedule[0] || 1_000, 1_000);
3029
+
3030
+ void (async () => {
3031
+ let attemptIndex = 0;
3032
+ try {
3033
+ for (;;) {
3034
+ if (this.closed) {
3035
+ return;
3036
+ }
3037
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
3038
+ if (!pending || pending.size === 0) {
3039
+ return;
3040
+ }
3041
+
3042
+ if (
3043
+ (mode === "join-warmup" || mode === "join-authoritative") &&
3044
+ this.isAssumeSyncedRepairSuppressed()
3045
+ ) {
3046
+ await this.sleepTracked(
3047
+ Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()),
3048
+ );
3049
+ continue;
3050
+ }
3051
+
3052
+ await this.sendMaybeMissingEntriesNow(target, pending, {
3053
+ mode,
3054
+ transport: getRepairTransportForAttempt(mode, attemptIndex),
3055
+ bypassRecentDedupe: true,
3056
+ });
3057
+
3058
+ const remaining = this._repairFrontierByMode.get(mode)?.get(target);
3059
+ if (!remaining || remaining.size === 0) {
3060
+ return;
3061
+ }
3062
+
3063
+ const waitMs =
3064
+ attemptIndex + 1 < retrySchedule.length
3065
+ ? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
3066
+ : steadyStateDelay;
3067
+ attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
3068
+ await this.sleepTracked(waitMs);
3069
+ }
3070
+ } finally {
3071
+ activeTargets.delete(target);
3072
+ if (
3073
+ !this.closed &&
3074
+ (this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0
3075
+ ) {
3076
+ this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
3077
+ }
3078
+ }
3079
+ })().catch((error: any) => {
3080
+ activeTargets.delete(target);
3081
+ logger.error(error);
3082
+ });
3083
+ }
3084
+
3085
+ private flushAppendBackfill() {
3086
+ if (this._appendBackfillPendingByTarget.size === 0) {
3087
+ return;
3088
+ }
3089
+ const pending = this._appendBackfillPendingByTarget;
3090
+ this._appendBackfillPendingByTarget = new Map();
3091
+ for (const [target, entries] of pending) {
3092
+ this.dispatchMaybeMissingEntries(target, entries, {
3093
+ mode: "append-backfill",
3094
+ });
3095
+ }
3096
+ }
3097
+
3098
+ private queueAppendBackfill(target: string, entry: EntryReplicated<R>) {
3099
+ let entries = this._appendBackfillPendingByTarget.get(target);
3100
+ if (!entries) {
3101
+ entries = new Map();
3102
+ this._appendBackfillPendingByTarget.set(target, entries);
3103
+ }
3104
+ entries.set(entry.hash, entry);
3105
+ if (entries.size >= this.repairSweepTargetBufferSize) {
3106
+ this.flushAppendBackfill();
3107
+ return;
3108
+ }
3109
+ if (this._appendBackfillTimer || this.closed) {
3110
+ return;
3111
+ }
3112
+ const timer = setTimeout(() => {
3113
+ this._repairRetryTimers.delete(timer);
3114
+ if (this._appendBackfillTimer === timer) {
3115
+ this._appendBackfillTimer = undefined;
3116
+ }
3117
+ if (this.closed) {
3118
+ return;
3119
+ }
3120
+ this.flushAppendBackfill();
3121
+ }, APPEND_BACKFILL_DELAY_MS);
3122
+ timer.unref?.();
3123
+ this._repairRetryTimers.add(timer);
3124
+ this._appendBackfillTimer = timer;
3125
+ }
3126
+
2510
3127
  private dispatchMaybeMissingEntries(
2511
3128
  target: string,
2512
3129
  entries: Map<string, EntryReplicated<R>>,
2513
- options?: {
3130
+ options: {
3131
+ mode: RepairDispatchMode;
2514
3132
  bypassRecentDedupe?: boolean;
2515
3133
  retryScheduleMs?: number[];
2516
- forceFreshDelivery?: boolean;
2517
3134
  },
2518
3135
  ) {
2519
3136
  if (entries.size === 0) {
2520
3137
  return;
2521
3138
  }
2522
3139
 
3140
+ if (this.isFrontierTrackedRepairMode(options.mode)) {
3141
+ this.queueRepairFrontierEntries(options.mode, target, entries);
3142
+ this.ensureRepairFrontierRunner(
3143
+ options.mode,
3144
+ target,
3145
+ options.retryScheduleMs,
3146
+ );
3147
+ return;
3148
+ }
3149
+
2523
3150
  const now = Date.now();
2524
3151
  let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2525
3152
  if (!recentlyDispatchedByHash) {
@@ -2533,10 +3160,10 @@ export class SharedLog<
2533
3160
  }
2534
3161
 
2535
3162
  const filteredEntries =
2536
- options?.bypassRecentDedupe === true
3163
+ options.bypassRecentDedupe === true
2537
3164
  ? new Map(entries)
2538
3165
  : new Map<string, EntryReplicated<any>>();
2539
- if (options?.bypassRecentDedupe !== true) {
3166
+ if (options.bypassRecentDedupe !== true) {
2540
3167
  for (const [hash, entry] of entries) {
2541
3168
  const prev = recentlyDispatchedByHash.get(hash);
2542
3169
  if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
@@ -2553,64 +3180,69 @@ export class SharedLog<
2553
3180
  if (filteredEntries.size === 0) {
2554
3181
  return;
2555
3182
  }
2556
- const retrySchedule =
2557
- options?.retryScheduleMs && options.retryScheduleMs.length > 0
2558
- ? options.retryScheduleMs
2559
- : options?.forceFreshDelivery
2560
- ? FORCE_FRESH_RETRY_SCHEDULE_MS
2561
- : [0];
2562
-
2563
- const run = () => {
2564
- // For force-fresh churn repair we intentionally bypass rateless IBLT and
2565
- // use simple hash-based sync. This path is a directed "push these hashes
2566
- // to that peer" recovery flow; using simple sync here avoids occasional
2567
- // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
2568
- if (
2569
- options?.forceFreshDelivery &&
2570
- this.syncronizer instanceof RatelessIBLTSynchronizer
2571
- ) {
2572
- return Promise.resolve(
2573
- this.syncronizer.simple.onMaybeMissingEntries({
2574
- entries: filteredEntries,
2575
- targets: [target],
2576
- }),
2577
- ).catch((error: any) => logger.error(error));
3183
+
3184
+ if (
3185
+ (options.mode === "join-warmup" ||
3186
+ options.mode === "join-authoritative") &&
3187
+ this.isAssumeSyncedRepairSuppressed()
3188
+ ) {
3189
+ return;
3190
+ }
3191
+
3192
+ const retrySchedule = resolveRepairRetrySchedule(
3193
+ options.mode,
3194
+ options.retryScheduleMs,
3195
+ this.isFrontierTrackedRepairMode(options.mode),
3196
+ );
3197
+ const bucket = this._repairMetrics[options.mode];
3198
+ bucket.dispatches += 1;
3199
+ bucket.entries += filteredEntries.size;
3200
+
3201
+ const run = (transport: RepairTransportMode) => {
3202
+ if (transport === "simple") {
3203
+ bucket.simpleFallbackPasses += 1;
3204
+ } else {
3205
+ bucket.ratelessFirstPasses += 1;
2578
3206
  }
2579
3207
 
2580
3208
  return Promise.resolve(
2581
- this.syncronizer.onMaybeMissingEntries({
2582
- entries: filteredEntries,
2583
- targets: [target],
2584
- }),
3209
+ this.sendRepairEntriesWithTransport(
3210
+ target,
3211
+ filteredEntries,
3212
+ transport,
3213
+ { bypassKnownPeers: options.mode === "churn" },
3214
+ ),
2585
3215
  ).catch((error: any) => logger.error(error));
2586
3216
  };
2587
3217
 
2588
- for (const delayMs of retrySchedule) {
3218
+ retrySchedule.forEach((delayMs, index) => {
3219
+ const transport = getRepairTransportForAttempt(options.mode, index);
2589
3220
  if (delayMs === 0) {
2590
- void run();
2591
- continue;
3221
+ void run(transport);
3222
+ return;
2592
3223
  }
2593
3224
  const timer = setTimeout(() => {
2594
3225
  this._repairRetryTimers.delete(timer);
2595
3226
  if (this.closed) {
2596
3227
  return;
2597
3228
  }
2598
- void run();
3229
+ void run(transport);
2599
3230
  }, delayMs);
2600
3231
  timer.unref?.();
2601
3232
  this._repairRetryTimers.add(timer);
2602
- }
3233
+ });
2603
3234
  }
2604
3235
 
2605
3236
  private scheduleRepairSweep(options: {
2606
- forceFreshDelivery: boolean;
2607
- addedPeers: Set<string>;
3237
+ mode: RepairDispatchMode;
3238
+ peers?: Iterable<string>;
2608
3239
  }) {
2609
- if (options.forceFreshDelivery) {
2610
- this._repairSweepForceFreshPending = true;
2611
- }
2612
- for (const peer of options.addedPeers) {
2613
- this._repairSweepAddedPeersPending.add(peer);
3240
+ this._repairSweepPendingModes.add(options.mode);
3241
+ const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
3242
+ if (pendingPeers) {
3243
+ for (const peer of options.peers ?? []) {
3244
+ pendingPeers.add(peer);
3245
+ }
2614
3246
  }
2615
3247
  if (!this._repairSweepRunning && !this.closed) {
2616
3248
  this._repairSweepRunning = true;
@@ -2618,88 +3250,293 @@ export class SharedLog<
2618
3250
  }
2619
3251
  }
2620
3252
 
3253
+ private scheduleJoinAuthoritativeRepair(peers: Set<string>) {
3254
+ if (this.closed || peers.size === 0) {
3255
+ return;
3256
+ }
3257
+
3258
+ for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
3259
+ let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
3260
+ if (!pendingPeers) {
3261
+ pendingPeers = new Set();
3262
+ this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
3263
+ }
3264
+ for (const peer of peers) {
3265
+ pendingPeers.add(peer);
3266
+ }
3267
+
3268
+ if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
3269
+ continue;
3270
+ }
3271
+
3272
+ const timer = setTimeout(() => {
3273
+ this._repairRetryTimers.delete(timer);
3274
+ this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
3275
+ if (this.closed) {
3276
+ return;
3277
+ }
3278
+
3279
+ const peersForSweep = new Set(
3280
+ this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? [],
3281
+ );
3282
+ this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
3283
+ if (peersForSweep.size === 0) {
3284
+ return;
3285
+ }
3286
+
3287
+ // A joiner's leader view can still be partial on the first delayed pass
3288
+ // under pubsub jitter. Bounded per-peer rescans widen the authoritative
3289
+ // frontier without adding per-append sweeps.
3290
+ this.scheduleRepairSweep({
3291
+ mode: "join-authoritative",
3292
+ peers: peersForSweep,
3293
+ });
3294
+ }, delayMs);
3295
+ timer.unref?.();
3296
+ this._repairRetryTimers.add(timer);
3297
+ this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
3298
+ }
3299
+ }
3300
+
2621
3301
  private async runRepairSweep() {
2622
3302
  try {
2623
3303
  while (!this.closed) {
2624
- const forceFreshDelivery = this._repairSweepForceFreshPending;
2625
- const addedPeers = new Set(this._repairSweepAddedPeersPending);
2626
- this._repairSweepForceFreshPending = false;
2627
- this._repairSweepAddedPeersPending.clear();
3304
+ const pendingModes = new Set(this._repairSweepPendingModes);
3305
+ const pendingPeersByMode = cloneRepairPendingPeersByMode(
3306
+ this._repairSweepPendingPeersByMode,
3307
+ );
3308
+ this._repairSweepPendingModes.clear();
3309
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
3310
+ peers.clear();
3311
+ }
2628
3312
 
2629
- if (!forceFreshDelivery && addedPeers.size === 0) {
3313
+ if (pendingModes.size === 0) {
2630
3314
  return;
2631
3315
  }
2632
3316
 
2633
- const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
2634
- const flushTarget = (target: string) => {
2635
- const entries = pendingByTarget.get(target);
3317
+ const optimisticGidPeersByMode = new Map<
3318
+ RepairDispatchMode,
3319
+ Map<string, Set<string>>
3320
+ >();
3321
+ const optimisticGidPeersConsumedByMode = new Map<
3322
+ RepairDispatchMode,
3323
+ Map<string, Map<string, number>>
3324
+ >();
3325
+ for (const mode of pendingModes) {
3326
+ const modePeers = pendingPeersByMode.get(mode);
3327
+ if (!modePeers || modePeers.size === 0) {
3328
+ continue;
3329
+ }
3330
+ const optimisticGidPeers = new Map<string, Set<string>>();
3331
+ const optimisticGidPeersConsumed = new Map<string, Map<string, number>>();
3332
+ for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
3333
+ let matchedPeers: Set<string> | undefined;
3334
+ let matchedCounts: Map<string, number> | undefined;
3335
+ for (const [peer, count] of peerCounts) {
3336
+ if (!modePeers.has(peer)) {
3337
+ continue;
3338
+ }
3339
+ matchedPeers ||= new Set();
3340
+ matchedCounts ||= new Map();
3341
+ matchedPeers.add(peer);
3342
+ matchedCounts.set(peer, count);
3343
+ }
3344
+ if (matchedPeers && matchedCounts) {
3345
+ optimisticGidPeers.set(gid, matchedPeers);
3346
+ optimisticGidPeersConsumed.set(gid, matchedCounts);
3347
+ }
3348
+ }
3349
+ if (optimisticGidPeers.size > 0) {
3350
+ optimisticGidPeersByMode.set(mode, optimisticGidPeers);
3351
+ optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
3352
+ }
3353
+ }
3354
+
3355
+ const pendingByMode = new Map<
3356
+ RepairDispatchMode,
3357
+ Map<string, Map<string, EntryReplicated<any>>>
3358
+ >(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
3359
+ const pendingRepairPeers = new Set<string>();
3360
+ for (const peers of pendingPeersByMode.values()) {
3361
+ for (const peer of peers) {
3362
+ pendingRepairPeers.add(peer);
3363
+ }
3364
+ }
3365
+ const fullReplicaRepairCandidates =
3366
+ await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
3367
+ includeSubscribers: false,
3368
+ });
3369
+ const fullReplicaRepairCandidateCount = Math.max(
3370
+ 1,
3371
+ fullReplicaRepairCandidates.size,
3372
+ );
3373
+ const nextFrontierByMode = new Map<
3374
+ RepairDispatchMode,
3375
+ Map<string, Map<string, EntryReplicated<any>>>
3376
+ >([
3377
+ ["join-authoritative", new Map()],
3378
+ ["churn", new Map()],
3379
+ ]);
3380
+ const flushTarget = (mode: RepairDispatchMode, target: string) => {
3381
+ const targets = pendingByMode.get(mode);
3382
+ const entries = targets?.get(target);
2636
3383
  if (!entries || entries.size === 0) {
2637
3384
  return;
2638
3385
  }
2639
- const isJoinWarmupTarget = addedPeers.has(target);
2640
- const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
2641
3386
  this.dispatchMaybeMissingEntries(target, entries, {
2642
- bypassRecentDedupe,
2643
- retryScheduleMs: isJoinWarmupTarget
2644
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
2645
- : undefined,
2646
- forceFreshDelivery,
3387
+ bypassRecentDedupe: true,
3388
+ mode,
2647
3389
  });
2648
- pendingByTarget.delete(target);
3390
+ targets?.delete(target);
2649
3391
  };
2650
3392
  const queueEntryForTarget = (
3393
+ mode: RepairDispatchMode,
2651
3394
  target: string,
2652
3395
  entry: EntryReplicated<any>,
2653
3396
  ) => {
2654
- let set = pendingByTarget.get(target);
3397
+ const sweepTargets = nextFrontierByMode.get(mode);
3398
+ if (sweepTargets) {
3399
+ let sweepSet = sweepTargets.get(target);
3400
+ if (!sweepSet) {
3401
+ sweepSet = new Map();
3402
+ sweepTargets.set(target, sweepSet);
3403
+ }
3404
+ sweepSet.set(entry.hash, entry);
3405
+ }
3406
+ const targets = pendingByMode.get(mode)!;
3407
+ let set = targets.get(target);
2655
3408
  if (!set) {
2656
3409
  set = new Map();
2657
- pendingByTarget.set(target, set);
3410
+ targets.set(target, set);
2658
3411
  }
2659
3412
  if (set.has(entry.hash)) {
2660
3413
  return;
2661
3414
  }
2662
3415
  set.set(entry.hash, entry);
2663
3416
  if (set.size >= this.repairSweepTargetBufferSize) {
2664
- flushTarget(target);
3417
+ flushTarget(mode, target);
2665
3418
  }
2666
3419
  };
2667
3420
 
2668
3421
  const iterator = this.entryCoordinatesIndex.iterate({});
2669
3422
  try {
2670
- while (!this.closed && !iterator.done()) {
2671
- const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2672
- for (const entry of entries) {
2673
- const entryReplicated = entry.value;
2674
- const knownPeers = this._gidPeersHistory.get(entryReplicated.gid);
2675
- const currentPeers = await this.findLeaders(
2676
- entryReplicated.coordinates,
2677
- entryReplicated,
2678
- { roleAge: 0 },
2679
- );
2680
- if (forceFreshDelivery) {
3423
+ while (!this.closed && !iterator.done()) {
3424
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
3425
+ for (const entry of entries) {
3426
+ const entryReplicated = entry.value;
3427
+ const gid = entryReplicated.gid;
3428
+ const knownPeers = this._gidPeersHistory.get(gid);
3429
+ const requestedReplicas =
3430
+ decodeReplicas(entryReplicated).getValue(this);
3431
+ const currentPeers = await this.findLeaders(
3432
+ entryReplicated.coordinates,
3433
+ entryReplicated,
3434
+ { roleAge: 0 },
3435
+ );
3436
+
3437
+ if (pendingModes.has("churn")) {
2681
3438
  for (const [currentPeer] of currentPeers) {
2682
3439
  if (currentPeer === this.node.identity.publicKey.hashcode()) {
2683
3440
  continue;
2684
3441
  }
2685
- queueEntryForTarget(currentPeer, entryReplicated);
3442
+ queueEntryForTarget("churn", currentPeer, entryReplicated);
2686
3443
  }
2687
3444
  }
2688
- if (addedPeers.size > 0) {
2689
- for (const peer of addedPeers) {
2690
- if (currentPeers.has(peer) && !knownPeers?.has(peer)) {
2691
- queueEntryForTarget(peer, entryReplicated);
2692
- }
3445
+
3446
+ for (const mode of pendingModes) {
3447
+ const modePeers = pendingPeersByMode.get(mode);
3448
+ if (!modePeers || modePeers.size === 0) {
3449
+ continue;
3450
+ }
3451
+ const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
3452
+ for (const peer of modePeers) {
3453
+ if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
3454
+ continue;
3455
+ }
3456
+ const wasOptimisticallyAssigned =
3457
+ optimisticPeers?.has(peer) === true;
3458
+ const isCoveredByFullReplicaRepair =
3459
+ mode === "join-authoritative" &&
3460
+ fullReplicaRepairCandidates.has(peer) &&
3461
+ requestedReplicas >= fullReplicaRepairCandidateCount;
3462
+ const shouldQueue =
3463
+ mode === "join-authoritative"
3464
+ ? currentPeers.has(peer) || isCoveredByFullReplicaRepair
3465
+ : wasOptimisticallyAssigned ||
3466
+ (currentPeers.has(peer) && !knownPeers?.has(peer));
3467
+ if (shouldQueue) {
3468
+ // Authoritative join repair must not trust partial gid peer history,
3469
+ // otherwise a late joiner can get stuck with a partial historical
3470
+ // backfill forever. Once we enter the authoritative pass, queue every
3471
+ // entry whose current leader set still includes the added peer.
3472
+ queueEntryForTarget(mode, peer, entryReplicated);
2693
3473
  }
2694
3474
  }
2695
3475
  }
3476
+ }
2696
3477
  }
2697
3478
  } finally {
2698
3479
  await iterator.close();
2699
3480
  }
2700
3481
 
2701
- for (const target of [...pendingByTarget.keys()]) {
2702
- flushTarget(target);
3482
+ for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
3483
+ for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
3484
+ const pendingPeerCounts =
3485
+ this._repairSweepOptimisticGidPeersPending.get(gid);
3486
+ if (!pendingPeerCounts) {
3487
+ continue;
3488
+ }
3489
+ for (const [peer, count] of peerCounts) {
3490
+ const current = pendingPeerCounts.get(peer) || 0;
3491
+ const next = current - count;
3492
+ if (next > 0) {
3493
+ pendingPeerCounts.set(peer, next);
3494
+ } else {
3495
+ pendingPeerCounts.delete(peer);
3496
+ }
3497
+ }
3498
+ if (pendingPeerCounts.size === 0) {
3499
+ this._repairSweepOptimisticGidPeersPending.delete(gid);
3500
+ }
3501
+ }
3502
+ }
3503
+
3504
+ for (const mode of pendingModes) {
3505
+ if (mode !== "join-authoritative" && mode !== "churn") {
3506
+ continue;
3507
+ }
3508
+ const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
3509
+ const frontierTargets = this._repairFrontierByMode.get(mode);
3510
+ for (const target of pendingPeersByMode.get(mode) ?? []) {
3511
+ const replacement = nextTargets.get(target);
3512
+ if (mode === "join-authoritative") {
3513
+ // Authoritative join repair is receipt-driven: a later sweep can have a
3514
+ // narrower transient leader view, but it must not forget unconfirmed
3515
+ // hashes that were already queued for this joiner.
3516
+ if (replacement && replacement.size > 0) {
3517
+ const existing = frontierTargets?.get(target);
3518
+ if (existing && existing.size > 0) {
3519
+ for (const [hash, entry] of replacement) {
3520
+ existing.set(hash, entry);
3521
+ }
3522
+ } else {
3523
+ frontierTargets?.set(target, replacement);
3524
+ }
3525
+ }
3526
+ continue;
3527
+ }
3528
+ if (replacement && replacement.size > 0) {
3529
+ frontierTargets?.set(target, replacement);
3530
+ } else {
3531
+ frontierTargets?.delete(target);
3532
+ }
3533
+ }
3534
+ }
3535
+
3536
+ for (const [mode, targets] of pendingByMode) {
3537
+ for (const target of [...targets.keys()]) {
3538
+ flushTarget(mode, target);
3539
+ }
2703
3540
  }
2704
3541
  }
2705
3542
  } catch (error: any) {
@@ -2708,11 +3545,7 @@ export class SharedLog<
2708
3545
  }
2709
3546
  } finally {
2710
3547
  this._repairSweepRunning = false;
2711
- if (
2712
- !this.closed &&
2713
- (this._repairSweepForceFreshPending ||
2714
- this._repairSweepAddedPeersPending.size > 0)
2715
- ) {
3548
+ if (!this.closed && this._repairSweepPendingModes.size > 0) {
2716
3549
  this._repairSweepRunning = true;
2717
3550
  void this.runRepairSweep();
2718
3551
  }
@@ -2725,9 +3558,89 @@ export class SharedLog<
2725
3558
  entry: Entry<T> | ShallowEntry | EntryReplicated<R>;
2726
3559
  leaders: Map<string, any>;
2727
3560
  };
2728
- }) {
2729
- if (!this.keep || !(await this.keep(args.value.entry))) {
2730
- return this.pruneDebouncedFn.add(args);
3561
+ }): Promise<boolean> {
3562
+ if (this.keep && (await this.keep(args.value.entry))) {
3563
+ return false;
3564
+ }
3565
+ void this.pruneDebouncedFn.add(args);
3566
+ return true;
3567
+ }
3568
+
3569
+ private async pruneJoinedEntriesNoLongerLed(entries: Entry<T>[]) {
3570
+ const selfHash = this.node.identity.publicKey.hashcode();
3571
+ for (const entry of entries) {
3572
+ if (this.closed || this._pendingDeletes.has(entry.hash)) {
3573
+ continue;
3574
+ }
3575
+
3576
+ const leaders = await this.findLeadersFromEntry(
3577
+ entry,
3578
+ decodeReplicas(entry).getValue(this),
3579
+ { roleAge: 0 },
3580
+ );
3581
+
3582
+ if (leaders.has(selfHash)) {
3583
+ this.pruneDebouncedFn.delete(entry.hash);
3584
+ continue;
3585
+ }
3586
+
3587
+ if (leaders.size === 0) {
3588
+ continue;
3589
+ }
3590
+
3591
+ await this.pruneDebouncedFnAddIfNotKeeping({
3592
+ key: entry.hash,
3593
+ value: { entry, leaders },
3594
+ });
3595
+ this.responseToPruneDebouncedFn.delete(entry.hash);
3596
+ }
3597
+ }
3598
+
3599
+ private async pruneIndexedEntriesNoLongerLed() {
3600
+ const selfHash = this.node.identity.publicKey.hashcode();
3601
+ const iterator = this.entryCoordinatesIndex.iterate({});
3602
+ let enqueuedPrune = false;
3603
+ try {
3604
+ while (!this.closed && !iterator.done()) {
3605
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
3606
+ for (const entry of entries) {
3607
+ const entryReplicated = entry.value;
3608
+ if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
3609
+ continue;
3610
+ }
3611
+
3612
+ const leaders = await this.findLeaders(
3613
+ entryReplicated.coordinates,
3614
+ entryReplicated,
3615
+ { roleAge: 0 },
3616
+ );
3617
+
3618
+ if (leaders.has(selfHash)) {
3619
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
3620
+ await this._pendingDeletes
3621
+ .get(entryReplicated.hash)
3622
+ ?.reject(new Error("Failed to delete, is leader again"));
3623
+ this.removePruneRequestSent(entryReplicated.hash);
3624
+ continue;
3625
+ }
3626
+
3627
+ if (leaders.size === 0) {
3628
+ continue;
3629
+ }
3630
+
3631
+ enqueuedPrune =
3632
+ (await this.pruneDebouncedFnAddIfNotKeeping({
3633
+ key: entryReplicated.hash,
3634
+ value: { entry: entryReplicated, leaders },
3635
+ })) || enqueuedPrune;
3636
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
3637
+ }
3638
+ }
3639
+ } finally {
3640
+ await iterator.close();
3641
+ }
3642
+ if (enqueuedPrune && !this.closed) {
3643
+ await this.pruneDebouncedFn.flush();
2731
3644
  }
2732
3645
  }
2733
3646
 
@@ -2904,6 +3817,7 @@ export class SharedLog<
2904
3817
  } else {
2905
3818
  await this._appendDeliverToReplicators(
2906
3819
  result.entry,
3820
+ coordinates,
2907
3821
  minReplicasValue,
2908
3822
  leaders,
2909
3823
  selfHash,
@@ -2913,13 +3827,14 @@ export class SharedLog<
2913
3827
  }
2914
3828
  }
2915
3829
 
2916
- if (!isLeader && !this.shouldDelayAdaptiveRebalance()) {
3830
+ const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
3831
+ if (!isLeader && !delayAdaptiveRebalance) {
2917
3832
  this.pruneDebouncedFnAddIfNotKeeping({
2918
3833
  key: result.entry.hash,
2919
3834
  value: { entry: result.entry, leaders },
2920
3835
  });
2921
3836
  }
2922
- if (!this._isAdaptiveReplicating) {
3837
+ if (!delayAdaptiveRebalance) {
2923
3838
  this.rebalanceParticipationDebounced?.call();
2924
3839
  }
2925
3840
 
@@ -2961,8 +3876,21 @@ export class SharedLog<
2961
3876
  this._repairRetryTimers = new Set();
2962
3877
  this._recentRepairDispatch = new Map();
2963
3878
  this._repairSweepRunning = false;
2964
- this._repairSweepForceFreshPending = false;
2965
- this._repairSweepAddedPeersPending = new Set();
3879
+ this._repairSweepPendingModes = new Set();
3880
+ this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
3881
+ this._repairFrontierByMode = createRepairFrontierByMode() as Map<
3882
+ RepairDispatchMode,
3883
+ Map<string, Map<string, EntryReplicated<R>>>
3884
+ >;
3885
+ this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
3886
+ this._repairSweepOptimisticGidPeersPending = new Map();
3887
+ this._entryKnownPeers = new Map();
3888
+ this._joinAuthoritativeRepairTimersByDelay = new Map();
3889
+ this._joinAuthoritativeRepairPeersByDelay = new Map();
3890
+ this._assumeSyncedRepairSuppressedUntil = 0;
3891
+ this._appendBackfillTimer = undefined;
3892
+ this._appendBackfillPendingByTarget = new Map();
3893
+ this._repairMetrics = createRepairMetrics();
2966
3894
  this._topicSubscribersCache = new Map();
2967
3895
  this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2968
3896
  this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
@@ -3041,7 +3969,10 @@ export class SharedLog<
3041
3969
  this.pendingMaturity = new Map();
3042
3970
 
3043
3971
  const id = sha256Base64Sync(this.log.id);
3044
- const storage = await this.node.storage.sublevel(id);
3972
+ const [storage, logScope] = await Promise.all([
3973
+ this.node.storage.sublevel(id),
3974
+ this.node.indexer.scope(id),
3975
+ ]);
3045
3976
 
3046
3977
  const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
3047
3978
  const fanoutService = getSharedLogFanoutService(this.node.services);
@@ -3104,20 +4035,19 @@ export class SharedLog<
3104
4035
  },
3105
4036
  });
3106
4037
 
3107
- await this.remoteBlocks.start();
3108
-
3109
- const logScope = await this.node.indexer.scope(id);
3110
- const replicationIndex = await logScope.scope("replication");
4038
+ const remoteBlocksStartPromise = this.remoteBlocks.start();
4039
+ const [replicationIndex, logIndex] = await Promise.all([
4040
+ logScope.scope("replication"),
4041
+ logScope.scope("log"),
4042
+ ]);
3111
4043
  this._replicationRangeIndex = await replicationIndex.init({
3112
4044
  schema: this.indexableDomain.constructorRange,
3113
4045
  });
3114
-
3115
4046
  this._entryCoordinatesIndex = await replicationIndex.init({
3116
4047
  schema: this.indexableDomain.constructorEntry,
3117
4048
  });
3118
4049
 
3119
- const logIndex = await logScope.scope("log");
3120
-
4050
+ await remoteBlocksStartPromise;
3121
4051
  const hasIndexedReplicationInfo =
3122
4052
  (await this.replicationIndex.count({
3123
4053
  query: [
@@ -3279,47 +4209,50 @@ export class SharedLog<
3279
4209
  }
3280
4210
 
3281
4211
  // Open for communcation
3282
- await this.rpc.open({
3283
- queryType: TransportMessage,
3284
- responseType: TransportMessage,
3285
- responseHandler: (query, context) => this.onMessage(query, context),
3286
- topic: this.topic,
3287
- });
3288
-
3289
4212
  this._onSubscriptionFn =
3290
4213
  this._onSubscriptionFn || this._onSubscription.bind(this);
3291
- await this.node.services.pubsub.addEventListener(
3292
- "subscribe",
3293
- this._onSubscriptionFn,
3294
- );
3295
-
3296
4214
  this._onUnsubscriptionFn =
3297
4215
  this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
3298
- await this.node.services.pubsub.addEventListener(
3299
- "unsubscribe",
3300
- this._onUnsubscriptionFn,
3301
- );
3302
-
3303
- await this.rpc.subscribe();
3304
- await this._openFanoutChannel(options?.fanout);
4216
+ await Promise.all([
4217
+ this.rpc.open({
4218
+ queryType: TransportMessage,
4219
+ responseType: TransportMessage,
4220
+ responseHandler: (query, context) => this.onMessage(query, context),
4221
+ topic: this.topic,
4222
+ }),
4223
+ this.node.services.pubsub.addEventListener(
4224
+ "subscribe",
4225
+ this._onSubscriptionFn,
4226
+ ),
4227
+ this.node.services.pubsub.addEventListener(
4228
+ "unsubscribe",
4229
+ this._onUnsubscriptionFn,
4230
+ ),
4231
+ ]);
3305
4232
 
3306
- // mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
3307
- await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
4233
+ const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
4234
+ // Mark previously-owned replication ranges as "new" only when they already exist.
4235
+ // Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
4236
+ const updateOwnedReplicationPromise = hasIndexedReplicationInfo
4237
+ ? this.updateTimestampOfOwnedReplicationRanges()
4238
+ : Promise.resolve();
4239
+ await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
3308
4240
 
3309
4241
  // if we had a previous session with replication info, and new replication info dictates that we unreplicate
3310
4242
  // we should do that. Otherwise if options is a unreplication we dont need to do anything because
3311
4243
  // we are already unreplicated (as we are just opening)
3312
4244
 
3313
- let isUnreplicationOptionsDefined = isUnreplicationOptions(
4245
+ const isUnreplicationOptionsDefined = isUnreplicationOptions(
3314
4246
  options?.replicate,
3315
4247
  );
3316
4248
 
3317
4249
  const canResumeReplication =
4250
+ hasIndexedReplicationInfo &&
3318
4251
  (await isReplicationOptionsDependentOnPreviousState(
3319
4252
  options?.replicate,
3320
4253
  this.replicationIndex,
3321
4254
  this.node.identity.publicKey,
3322
- )) && hasIndexedReplicationInfo;
4255
+ ));
3323
4256
 
3324
4257
  if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
3325
4258
  await this.replicate(options?.replicate, { checkDuplicates: true });
@@ -3372,25 +4305,26 @@ export class SharedLog<
3372
4305
 
3373
4306
  async afterOpen(): Promise<void> {
3374
4307
  await super.afterOpen();
4308
+ const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
3375
4309
 
3376
4310
  // We do this here, because these calls requires this.closed == false
3377
- void this.pruneOfflineReplicators()
3378
- .then(() => {
3379
- this._replicatorsReconciled = true;
3380
- })
4311
+ void this.pruneOfflineReplicators()
4312
+ .then(() => {
4313
+ this._replicatorsReconciled = true;
4314
+ })
3381
4315
  .catch((error) => {
3382
4316
  if (isNotStartedError(error as Error)) {
3383
4317
  return;
3384
4318
  }
3385
- logger.error(error);
3386
- });
4319
+ logger.error(error);
4320
+ });
3387
4321
 
3388
- this.startReplicatorLivenessSweep();
4322
+ this.startReplicatorLivenessSweep();
3389
4323
 
3390
- await this.rebalanceParticipation();
4324
+ await this.rebalanceParticipation();
3391
4325
 
3392
4326
  // Take into account existing subscription
3393
- (await this._getTopicSubscribers(this.topic))?.forEach((v) => {
4327
+ (await existingSubscribersPromise)?.forEach((v) => {
3394
4328
  if (v.equals(this.node.identity.publicKey)) {
3395
4329
  return;
3396
4330
  }
@@ -4021,8 +4955,28 @@ export class SharedLog<
4021
4955
  this._repairRetryTimers.clear();
4022
4956
  this._recentRepairDispatch.clear();
4023
4957
  this._repairSweepRunning = false;
4024
- this._repairSweepForceFreshPending = false;
4025
- this._repairSweepAddedPeersPending.clear();
4958
+ this._repairSweepPendingModes.clear();
4959
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
4960
+ peers.clear();
4961
+ }
4962
+ this._repairSweepOptimisticGidPeersPending.clear();
4963
+ this._entryKnownPeers.clear();
4964
+ for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
4965
+ clearTimeout(timer);
4966
+ }
4967
+ this._joinAuthoritativeRepairTimersByDelay.clear();
4968
+ this._joinAuthoritativeRepairPeersByDelay.clear();
4969
+ for (const targets of this._repairFrontierByMode.values()) {
4970
+ targets.clear();
4971
+ }
4972
+ for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
4973
+ targets.clear();
4974
+ }
4975
+ if (this._appendBackfillTimer) {
4976
+ clearTimeout(this._appendBackfillTimer);
4977
+ this._appendBackfillTimer = undefined;
4978
+ }
4979
+ this._appendBackfillPendingByTarget.clear();
4026
4980
 
4027
4981
  for (const [_k, v] of this._pendingDeletes) {
4028
4982
  v.clear();
@@ -4205,6 +5159,7 @@ export class SharedLog<
4205
5159
 
4206
5160
  if (heads) {
4207
5161
  const filteredHeads: EntryWithRefs<any>[] = [];
5162
+ const confirmedHashes = new Set<string>();
4208
5163
  for (const head of heads) {
4209
5164
  if (!(await this.log.has(head.entry.hash))) {
4210
5165
  head.entry.init({
@@ -4213,10 +5168,22 @@ export class SharedLog<
4213
5168
  encoding: this.log.encoding,
4214
5169
  });
4215
5170
  filteredHeads.push(head);
5171
+ } else {
5172
+ confirmedHashes.add(head.entry.hash);
4216
5173
  }
4217
5174
  }
5175
+ const fromIsSelf = context.from.equals(this.node.identity.publicKey);
5176
+ if (!fromIsSelf) {
5177
+ this.markEntriesKnownByPeer(
5178
+ heads.map((head) => head.entry.hash),
5179
+ context.from.hashcode(),
5180
+ );
5181
+ }
4218
5182
 
4219
5183
  if (filteredHeads.length === 0) {
5184
+ if (confirmedHashes.size > 0 && !fromIsSelf) {
5185
+ await this.sendRepairConfirmation(context.from!, confirmedHashes);
5186
+ }
4220
5187
  return;
4221
5188
  }
4222
5189
  const groupedByGid = await groupByGid(filteredHeads);
@@ -4350,7 +5317,15 @@ export class SharedLog<
4350
5317
  }
4351
5318
 
4352
5319
  if (toMerge.length > 0) {
5320
+ this.markEntriesKnownByPeer(
5321
+ toMerge.map((entry) => entry.hash),
5322
+ context.from!.hashcode(),
5323
+ );
4353
5324
  await this.log.join(toMerge);
5325
+ for (const merged of toMerge) {
5326
+ confirmedHashes.add(merged.hash);
5327
+ }
5328
+ await this.pruneJoinedEntriesNoLongerLed(toMerge);
4354
5329
 
4355
5330
  toDelete?.map((x) =>
4356
5331
  // TODO types
@@ -4397,6 +5372,10 @@ export class SharedLog<
4397
5372
  promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
4398
5373
  }
4399
5374
  await Promise.all(promises);
5375
+ if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
5376
+ this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
5377
+ await this.sendRepairConfirmation(context.from!, confirmedHashes);
5378
+ }
4400
5379
  }
4401
5380
  } else if (msg instanceof RequestIPrune) {
4402
5381
  const hasAndIsLeader: string[] = [];
@@ -4404,6 +5383,7 @@ export class SharedLog<
4404
5383
 
4405
5384
  for (const hash of msg.hashes) {
4406
5385
  this.removePruneRequestSent(hash, from);
5386
+ this.removeEntriesKnownByPeer([hash], from);
4407
5387
 
4408
5388
  // if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
4409
5389
  // this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
@@ -4518,6 +5498,10 @@ export class SharedLog<
4518
5498
  for (const hash of msg.hashes) {
4519
5499
  this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
4520
5500
  }
5501
+ } else if (msg instanceof ConfirmEntriesMessage) {
5502
+ this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
5503
+ this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
5504
+ return;
4521
5505
  } else if (await this.syncronizer.onMessage(msg, context)) {
4522
5506
  return; // the syncronizer has handled the message
4523
5507
  } else if (msg instanceof BlocksMessage) {
@@ -4948,6 +5932,11 @@ export class SharedLog<
4948
5932
  let messageToSend: AddedReplicationSegmentMessage | undefined = undefined;
4949
5933
 
4950
5934
  if (assumeSynced) {
5935
+ // `assumeSynced` is an explicit contract that this join should trust the
5936
+ // supplied history and avoid initiating outbound repair while the local
5937
+ // replication ranges settle.
5938
+ this._assumeSyncedRepairSuppressedUntil =
5939
+ Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
4951
5940
  for (const entry of entriesToReplicate) {
4952
5941
  await seedAssumeSyncedPeerHistory(entry);
4953
5942
  }
@@ -5033,9 +6022,14 @@ export class SharedLog<
5033
6022
  clear();
5034
6023
  // `waitForReplicator()` is typically used as a precondition before join/replicate
5035
6024
  // flows. A replicator can become mature and enqueue a debounced rebalance
5036
- // (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
5037
- // observe a "late" rebalance after the wait resolves.
5038
- await this.replicationChangeDebounceFn?.flush?.();
6025
+ // (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
6026
+ // make membership waits depend on all rebalance work finishing; callers that
6027
+ // need settled distribution already wait for that explicitly.
6028
+ this.replicationChangeDebounceFn?.flush?.().catch((error: any) => {
6029
+ if (!isNotStartedError(error)) {
6030
+ logger.error(error?.toString?.() ?? String(error));
6031
+ }
6032
+ });
5039
6033
  deferred.resolve();
5040
6034
  };
5041
6035
 
@@ -5580,6 +6574,18 @@ export class SharedLog<
5580
6574
  }
5581
6575
  }
5582
6576
  }
6577
+
6578
+ if (!options?.candidates) {
6579
+ const fullReplicaLeaders = await this.findFullReplicaLeaders(
6580
+ cursors.length,
6581
+ roleAge,
6582
+ peerFilter,
6583
+ );
6584
+ if (fullReplicaLeaders) {
6585
+ return fullReplicaLeaders;
6586
+ }
6587
+ }
6588
+
5583
6589
  return getSamples<R>(
5584
6590
  cursors,
5585
6591
  this.replicationIndex,
@@ -5592,6 +6598,50 @@ export class SharedLog<
5592
6598
  );
5593
6599
  }
5594
6600
 
6601
+ private async findFullReplicaLeaders(
6602
+ replicas: number,
6603
+ roleAge: number,
6604
+ peerFilter?: Set<string>,
6605
+ ): Promise<Map<string, { intersecting: boolean }> | undefined> {
6606
+ const now = Date.now();
6607
+ const leaders = new Map<string, { intersecting: boolean }>();
6608
+ const includeStrict =
6609
+ this._logProperties?.strictFullReplicaFallback !== false;
6610
+ const iterator = this.replicationIndex.iterate(
6611
+ {},
6612
+ { shape: { hash: true, timestamp: true, mode: true } },
6613
+ );
6614
+
6615
+ try {
6616
+ for (;;) {
6617
+ const batch = await iterator.next(64);
6618
+ if (batch.length === 0) {
6619
+ break;
6620
+ }
6621
+ for (const result of batch) {
6622
+ const range = result.value;
6623
+ if (peerFilter && !peerFilter.has(range.hash)) {
6624
+ continue;
6625
+ }
6626
+ if (!isMatured(range, now, roleAge)) {
6627
+ continue;
6628
+ }
6629
+ if (range.mode === ReplicationIntent.Strict && !includeStrict) {
6630
+ continue;
6631
+ }
6632
+ leaders.set(range.hash, { intersecting: true });
6633
+ if (leaders.size > replicas) {
6634
+ return undefined;
6635
+ }
6636
+ }
6637
+ }
6638
+ } finally {
6639
+ await iterator.close();
6640
+ }
6641
+
6642
+ return leaders.size > 0 ? leaders : undefined;
6643
+ }
6644
+
5595
6645
  async findLeadersFromEntry(
5596
6646
  entry: ShallowOrFullEntry<any> | EntryReplicated<R>,
5597
6647
  replicas: number,
@@ -6231,16 +7281,33 @@ export class SharedLog<
6231
7281
 
6232
7282
  const changed = false;
6233
7283
  const addedPeers = new Set<string>();
7284
+ const authoritativeRepairPeers = new Set<string>();
6234
7285
  const warmupPeers = new Set<string>();
7286
+ const churnRepairPeers = new Set<string>();
6235
7287
  const hasSelfWarmupChange = changes.some(
6236
7288
  (change) =>
6237
7289
  change.range.hash === selfHash &&
6238
7290
  (change.type === "added" || change.type === "replaced"),
6239
7291
  );
7292
+ const hasSelfRangeRemoval = changes.some(
7293
+ (change) =>
7294
+ change.range.hash === selfHash &&
7295
+ (change.type === "removed" || change.type === "replaced"),
7296
+ );
6240
7297
  for (const change of changes) {
7298
+ if (
7299
+ change.range.hash !== selfHash &&
7300
+ (change.type === "removed" || change.type === "replaced")
7301
+ ) {
7302
+ this.removePeerFromEntryKnownPeers(change.range.hash);
7303
+ }
6241
7304
  if (change.type === "added" || change.type === "replaced") {
6242
7305
  const hash = change.range.hash;
6243
7306
  if (hash !== selfHash) {
7307
+ // Existing peers can widen/shift ranges after the initial join. If we
7308
+ // only rescan on first-seen "added", late authoritative range updates can
7309
+ // leave historical backfill permanently partial under load.
7310
+ authoritativeRepairPeers.add(hash);
6244
7311
  // Range updates can reassign entries to an existing peer shortly after it
6245
7312
  // already received a subset. Avoid suppressing legitimate follow-up repair.
6246
7313
  this._recentRepairDispatch.delete(hash);
@@ -6277,26 +7344,34 @@ export class SharedLog<
6277
7344
  string,
6278
7345
  Map<string, EntryReplicated<any>>
6279
7346
  > = new Map();
6280
- const flushUncheckedDeliverTarget = (target: string) => {
6281
- const entries = uncheckedDeliver.get(target);
6282
- if (!entries || entries.size === 0) {
6283
- return;
6284
- }
7347
+ const flushUncheckedDeliverTarget = (target: string) => {
7348
+ const entries = uncheckedDeliver.get(target);
7349
+ if (!entries || entries.size === 0) {
7350
+ return;
7351
+ }
6285
7352
  const isWarmupTarget = warmupPeers.has(target);
6286
- const bypassRecentDedupe = isWarmupTarget || forceFreshDelivery;
7353
+ const mode: RepairDispatchMode = forceFreshDelivery
7354
+ ? "churn"
7355
+ : isWarmupTarget
7356
+ ? "join-warmup"
7357
+ : "join-authoritative";
6287
7358
  this.dispatchMaybeMissingEntries(target, entries, {
6288
- bypassRecentDedupe,
6289
- retryScheduleMs: isWarmupTarget
6290
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
6291
- : undefined,
6292
- forceFreshDelivery,
7359
+ bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
7360
+ mode,
7361
+ retryScheduleMs:
7362
+ mode === "join-warmup"
7363
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
7364
+ : mode === "join-authoritative"
7365
+ ? [0]
7366
+ : undefined,
6293
7367
  });
6294
- uncheckedDeliver.delete(target);
6295
- };
7368
+ uncheckedDeliver.delete(target);
7369
+ };
6296
7370
  const queueUncheckedDeliver = (
6297
7371
  target: string,
6298
7372
  entry: EntryReplicated<any>,
6299
7373
  ) => {
7374
+ churnRepairPeers.add(target);
6300
7375
  let set = uncheckedDeliver.get(target);
6301
7376
  if (!set) {
6302
7377
  set = new Map();
@@ -6320,74 +7395,85 @@ export class SharedLog<
6320
7395
  forceFresh: forceFreshDelivery || useJoinWarmupFastPath,
6321
7396
  },
6322
7397
  )) {
6323
- if (this.closed) {
6324
- break;
6325
- }
6326
-
6327
- if (useJoinWarmupFastPath) {
6328
- let oldPeersSet: Set<string> | undefined;
6329
- const gid = entryReplicated.gid;
6330
- oldPeersSet = gidPeersHistorySnapshot.get(gid);
6331
- if (!gidPeersHistorySnapshot.has(gid)) {
6332
- const existing = this._gidPeersHistory.get(gid);
6333
- oldPeersSet = existing ? new Set(existing) : undefined;
6334
- gidPeersHistorySnapshot.set(gid, oldPeersSet);
7398
+ if (this.closed) {
7399
+ break;
6335
7400
  }
6336
7401
 
6337
- for (const target of warmupPeers) {
6338
- queueUncheckedDeliver(target, entryReplicated);
6339
- }
7402
+ if (useJoinWarmupFastPath) {
7403
+ let oldPeersSet: Set<string> | undefined;
7404
+ const gid = entryReplicated.gid;
7405
+ oldPeersSet = gidPeersHistorySnapshot.get(gid);
7406
+ if (!gidPeersHistorySnapshot.has(gid)) {
7407
+ const existing = this._gidPeersHistory.get(gid);
7408
+ oldPeersSet = existing ? new Set(existing) : undefined;
7409
+ gidPeersHistorySnapshot.set(gid, oldPeersSet);
7410
+ }
6340
7411
 
6341
- const candidatePeers = new Set<string>([selfHash]);
6342
- for (const target of warmupPeers) {
6343
- candidatePeers.add(target);
6344
- }
6345
- if (oldPeersSet) {
6346
- for (const oldPeer of oldPeersSet) {
6347
- candidatePeers.add(oldPeer);
7412
+ for (const target of warmupPeers) {
7413
+ queueUncheckedDeliver(target, entryReplicated);
6348
7414
  }
6349
- }
6350
7415
 
6351
- const currentPeers = await this.findLeaders(
6352
- entryReplicated.coordinates,
6353
- entryReplicated,
6354
- {
6355
- roleAge: 0,
6356
- candidates: candidatePeers,
6357
- persist: false,
6358
- },
6359
- );
7416
+ const candidatePeers = new Set<string>([selfHash]);
7417
+ for (const target of warmupPeers) {
7418
+ candidatePeers.add(target);
7419
+ }
7420
+ if (oldPeersSet) {
7421
+ for (const oldPeer of oldPeersSet) {
7422
+ candidatePeers.add(oldPeer);
7423
+ }
7424
+ }
6360
7425
 
6361
- if (oldPeersSet) {
6362
- for (const oldPeer of oldPeersSet) {
6363
- if (!currentPeers.has(oldPeer)) {
6364
- this.removePruneRequestSent(entryReplicated.hash);
7426
+ const currentPeers = await this.findLeaders(
7427
+ entryReplicated.coordinates,
7428
+ entryReplicated,
7429
+ {
7430
+ roleAge: 0,
7431
+ candidates: candidatePeers,
7432
+ persist: false,
7433
+ },
7434
+ );
7435
+
7436
+ if (oldPeersSet) {
7437
+ for (const oldPeer of oldPeersSet) {
7438
+ if (!currentPeers.has(oldPeer)) {
7439
+ this.removePruneRequestSent(entryReplicated.hash);
7440
+ }
6365
7441
  }
6366
7442
  }
6367
- }
6368
7443
 
6369
- this.addPeersToGidPeerHistory(
6370
- entryReplicated.gid,
6371
- currentPeers.keys(),
6372
- true,
6373
- );
7444
+ for (const [peer] of currentPeers) {
7445
+ if (warmupPeers.has(peer)) {
7446
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
7447
+ }
7448
+ }
6374
7449
 
6375
- if (!currentPeers.has(selfHash)) {
6376
- this.pruneDebouncedFnAddIfNotKeeping({
6377
- key: entryReplicated.hash,
6378
- value: { entry: entryReplicated, leaders: currentPeers },
6379
- });
7450
+ const authoritativePeers = [...currentPeers.keys()].filter(
7451
+ (peer) =>
7452
+ !warmupPeers.has(peer) &&
7453
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
7454
+ );
7455
+ this.addPeersToGidPeerHistory(
7456
+ entryReplicated.gid,
7457
+ authoritativePeers,
7458
+ true,
7459
+ );
6380
7460
 
6381
- this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
6382
- } else {
6383
- this.pruneDebouncedFn.delete(entryReplicated.hash);
6384
- await this._pendingDeletes
6385
- .get(entryReplicated.hash)
6386
- ?.reject(new Error("Failed to delete, is leader again"));
6387
- this.removePruneRequestSent(entryReplicated.hash);
7461
+ if (!currentPeers.has(selfHash)) {
7462
+ this.pruneDebouncedFnAddIfNotKeeping({
7463
+ key: entryReplicated.hash,
7464
+ value: { entry: entryReplicated, leaders: currentPeers },
7465
+ });
7466
+
7467
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
7468
+ } else {
7469
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
7470
+ await this._pendingDeletes
7471
+ .get(entryReplicated.hash)
7472
+ ?.reject(new Error("Failed to delete, is leader again"));
7473
+ this.removePruneRequestSent(entryReplicated.hash);
7474
+ }
7475
+ continue;
6388
7476
  }
6389
- continue;
6390
- }
6391
7477
 
6392
7478
  let oldPeersSet: Set<string> | undefined;
6393
7479
  const gid = entryReplicated.gid;
@@ -6421,19 +7507,30 @@ export class SharedLog<
6421
7507
  }
6422
7508
  }
6423
7509
 
6424
- if (oldPeersSet) {
6425
- for (const oldPeer of oldPeersSet) {
6426
- if (!currentPeers.has(oldPeer)) {
6427
- this.removePruneRequestSent(entryReplicated.hash);
7510
+ if (oldPeersSet) {
7511
+ for (const oldPeer of oldPeersSet) {
7512
+ if (!currentPeers.has(oldPeer)) {
7513
+ this.removePruneRequestSent(entryReplicated.hash);
7514
+ }
6428
7515
  }
6429
7516
  }
6430
- }
6431
7517
 
6432
- this.addPeersToGidPeerHistory(
6433
- entryReplicated.gid,
6434
- currentPeers.keys(),
6435
- true,
6436
- );
7518
+ for (const [peer] of currentPeers) {
7519
+ if (addedPeers.has(peer)) {
7520
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
7521
+ }
7522
+ }
7523
+
7524
+ const authoritativePeers = [...currentPeers.keys()].filter(
7525
+ (peer) =>
7526
+ !addedPeers.has(peer) &&
7527
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
7528
+ );
7529
+ this.addPeersToGidPeerHistory(
7530
+ entryReplicated.gid,
7531
+ authoritativePeers,
7532
+ true,
7533
+ );
6437
7534
 
6438
7535
  if (!isLeader) {
6439
7536
  this.pruneDebouncedFnAddIfNotKeeping({
@@ -6452,9 +7549,18 @@ export class SharedLog<
6452
7549
  }
6453
7550
  }
6454
7551
 
7552
+ if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
7553
+ await this.pruneIndexedEntriesNoLongerLed();
7554
+ }
7555
+
6455
7556
  if (forceFreshDelivery) {
6456
- // Removed/shrunk ranges still need the authoritative background pass.
6457
- this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
7557
+ // Pure leave/shrink churn can have zero `addedPeers`, but the peers that
7558
+ // received redistributed entries still need a follow-up repair pass if the
7559
+ // immediate maybe-sync misses one entry.
7560
+ this.scheduleRepairSweep({
7561
+ mode: "churn",
7562
+ peers: churnRepairPeers,
7563
+ });
6458
7564
  } else if (useJoinWarmupFastPath) {
6459
7565
  // Pure join warmup uses the cheap immediate maybe-missing dispatch above,
6460
7566
  // then defers the authoritative sweep so it does not compete with the
@@ -6466,19 +7572,23 @@ export class SharedLog<
6466
7572
  return;
6467
7573
  }
6468
7574
  this.scheduleRepairSweep({
6469
- forceFreshDelivery: false,
6470
- addedPeers: peers,
7575
+ mode: "join-warmup",
7576
+ peers,
6471
7577
  });
6472
7578
  }, 250);
6473
7579
  timer.unref?.();
6474
7580
  this._repairRetryTimers.add(timer);
6475
- } else if (addedPeers.size > 0) {
7581
+ } else if (authoritativeRepairPeers.size > 0) {
6476
7582
  this.scheduleRepairSweep({
6477
- forceFreshDelivery: false,
6478
- addedPeers,
7583
+ mode: "join-authoritative",
7584
+ peers: authoritativeRepairPeers,
6479
7585
  });
6480
7586
  }
6481
7587
 
7588
+ if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
7589
+ this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
7590
+ }
7591
+
6482
7592
  for (const target of [...uncheckedDeliver.keys()]) {
6483
7593
  flushUncheckedDeliverTarget(target);
6484
7594
  }
@@ -6585,6 +7695,13 @@ export class SharedLog<
6585
7695
  return; // not allowed to replicate
6586
7696
  }
6587
7697
 
7698
+ if (
7699
+ this.replicationController.maxMemoryLimit != null &&
7700
+ usedMemory > this.replicationController.maxMemoryLimit
7701
+ ) {
7702
+ await this.pruneIndexedEntriesNoLongerLed();
7703
+ }
7704
+
6588
7705
  const peersSize = (await peers.getSize()) || 1;
6589
7706
  const totalParticipation = await this.calculateTotalParticipation();
6590
7707