@peerbit/shared-log 13.1.0 → 13.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -88,6 +88,7 @@ const getSharedLogFanoutService = (
88
88
  ): FanoutTree | undefined =>
89
89
  (services as SharedLogServicesWithFanout).fanout;
90
90
  import {
91
+ EXCHANGE_HEADS_REPAIR_HINT,
91
92
  EntryWithRefs,
92
93
  ExchangeHeadsMessage,
93
94
  RequestIPrune,
@@ -168,7 +169,7 @@ import type {
168
169
  Syncronizer,
169
170
  } from "./sync/index.js";
170
171
  import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
171
- import { SimpleSyncronizer } from "./sync/simple.js";
172
+ import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
172
173
  import { groupByGid } from "./utils.js";
173
174
 
174
175
  const toLocalPublicSignKey = (
@@ -468,6 +469,7 @@ export type SharedLogOptions<
468
469
  waitForReplicatorRequestMaxAttempts?: number;
469
470
  waitForPruneDelay?: number;
470
471
  distributionDebounceTime?: number;
472
+ strictFullReplicaFallback?: boolean;
471
473
  compatibility?: number;
472
474
  domain?: ReplicationDomainConstructor<D>;
473
475
  eagerBlocks?: boolean | { cacheSize?: number };
@@ -513,10 +515,143 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
513
515
  // Churn/join repair can race with pruning and transient missed sync requests under
514
516
  // heavy event-loop load. Keep retries alive with a longer tail so reassigned
515
517
  // entries are retried after short bursts and slower recovery windows.
516
- const FORCE_FRESH_RETRY_SCHEDULE_MS = [
518
+ const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
517
519
  0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
518
520
  ];
519
- const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000, 15_000];
521
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
522
+ 0,
523
+ 1_000,
524
+ 3_000,
525
+ 7_000,
526
+ 15_000,
527
+ 30_000,
528
+ 60_000,
529
+ ];
530
+ const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
531
+ 0,
532
+ 1_000,
533
+ 3_000,
534
+ 7_000,
535
+ 15_000,
536
+ 30_000,
537
+ 60_000,
538
+ ];
539
+ const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
540
+ const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
541
+ const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
542
+ JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
543
+ 7_000,
544
+ 15_000,
545
+ 30_000,
546
+ ];
547
+ const APPEND_BACKFILL_DELAY_MS = 500;
548
+ const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
549
+ const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
550
+
551
+ type RepairDispatchMode =
552
+ | "join-warmup"
553
+ | "join-authoritative"
554
+ | "append-backfill"
555
+ | "churn";
556
+ type RepairTransportMode = "rateless" | "simple";
557
+ type RepairMetricBucket = {
558
+ dispatches: number;
559
+ entries: number;
560
+ ratelessFirstPasses: number;
561
+ simpleFallbackPasses: number;
562
+ };
563
+ type RepairMetrics = Record<RepairDispatchMode, RepairMetricBucket>;
564
+
565
+ const REPAIR_DISPATCH_MODES: RepairDispatchMode[] = [
566
+ "join-warmup",
567
+ "join-authoritative",
568
+ "append-backfill",
569
+ "churn",
570
+ ];
571
+
572
+ const createRepairMetricBucket = (): RepairMetricBucket => ({
573
+ dispatches: 0,
574
+ entries: 0,
575
+ ratelessFirstPasses: 0,
576
+ simpleFallbackPasses: 0,
577
+ });
578
+
579
+ const createRepairMetrics = (): RepairMetrics => ({
580
+ "join-warmup": createRepairMetricBucket(),
581
+ "join-authoritative": createRepairMetricBucket(),
582
+ "append-backfill": createRepairMetricBucket(),
583
+ churn: createRepairMetricBucket(),
584
+ });
585
+
586
+ const createRepairPendingPeersByMode = () =>
587
+ new Map<RepairDispatchMode, Set<string>>(
588
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set<string>()]),
589
+ );
590
+
591
+ const cloneRepairPendingPeersByMode = (
592
+ pending: Map<RepairDispatchMode, Set<string>>,
593
+ ) =>
594
+ new Map<RepairDispatchMode, Set<string>>(
595
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]),
596
+ );
597
+
598
+ const createRepairFrontierByMode = () =>
599
+ new Map<
600
+ RepairDispatchMode,
601
+ Map<string, Map<string, EntryReplicated<any>>>
602
+ >(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
603
+
604
+ const createRepairActiveTargetsByMode = () =>
605
+ new Map<RepairDispatchMode, Set<string>>(
606
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]),
607
+ );
608
+
609
+ const getRepairRetrySchedule = (mode: RepairDispatchMode) => {
610
+ switch (mode) {
611
+ case "join-warmup":
612
+ return JOIN_WARMUP_RETRY_SCHEDULE_MS;
613
+ case "join-authoritative":
614
+ return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
615
+ case "append-backfill":
616
+ return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
617
+ case "churn":
618
+ return CHURN_REPAIR_RETRY_SCHEDULE_MS;
619
+ }
620
+ };
621
+
622
+ const resolveRepairRetrySchedule = (
623
+ mode: RepairDispatchMode,
624
+ override?: number[],
625
+ trackedFrontier = false,
626
+ ) => {
627
+ const fallback = getRepairRetrySchedule(mode);
628
+ if (!override || override.length === 0) {
629
+ return fallback;
630
+ }
631
+ if (
632
+ trackedFrontier &&
633
+ override.length === 1 &&
634
+ override[0] === 0 &&
635
+ fallback.length > 1
636
+ ) {
637
+ // A tracked frontier with only an immediate retry would otherwise stay on
638
+ // attempt 0 forever, which means rateless-only retries and no sparse-tail
639
+ // simple fallback. Keep the immediate seed, then continue with the normal
640
+ // tracked repair schedule.
641
+ return [0, ...fallback.slice(1)];
642
+ }
643
+ return override;
644
+ };
645
+
646
+ const getRepairTransportForAttempt = (
647
+ mode: RepairDispatchMode,
648
+ attemptIndex: number,
649
+ ): RepairTransportMode => {
650
+ if (mode === "churn") {
651
+ return "simple";
652
+ }
653
+ return attemptIndex === 0 ? "rateless" : "simple";
654
+ };
520
655
 
521
656
  const toPositiveInteger = (
522
657
  value: number | undefined,
@@ -751,8 +886,24 @@ export class SharedLog<
751
886
  private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
752
887
  private _recentRepairDispatch!: Map<string, Map<string, number>>;
753
888
  private _repairSweepRunning!: boolean;
754
- private _repairSweepForceFreshPending!: boolean;
755
- private _repairSweepAddedPeersPending!: Set<string>;
889
+ private _repairSweepPendingModes!: Set<RepairDispatchMode>;
890
+ private _repairSweepPendingPeersByMode!: Map<RepairDispatchMode, Set<string>>;
891
+ private _repairFrontierByMode!: Map<
892
+ RepairDispatchMode,
893
+ Map<string, Map<string, EntryReplicated<R>>>
894
+ >;
895
+ private _repairFrontierActiveTargetsByMode!: Map<RepairDispatchMode, Set<string>>;
896
+ private _repairSweepOptimisticGidPeersPending!: Map<string, Map<string, number>>;
897
+ private _entryKnownPeers!: Map<string, Set<string>>;
898
+ private _joinAuthoritativeRepairTimersByDelay!: Map<
899
+ number,
900
+ ReturnType<typeof setTimeout>
901
+ >;
902
+ private _joinAuthoritativeRepairPeersByDelay!: Map<number, Set<string>>;
903
+ private _assumeSyncedRepairSuppressedUntil!: number;
904
+ private _appendBackfillTimer?: ReturnType<typeof setTimeout>;
905
+ private _appendBackfillPendingByTarget!: Map<string, Map<string, EntryReplicated<R>>>;
906
+ private _repairMetrics!: RepairMetrics;
756
907
  private _topicSubscribersCache!: Map<
757
908
  string,
758
909
  { expiresAt: number; keys: PublicSignKey[] }
@@ -1187,6 +1338,7 @@ export class SharedLog<
1187
1338
 
1188
1339
  private async _appendDeliverToReplicators(
1189
1340
  entry: Entry<T>,
1341
+ coordinates: NumberFromType<R>[],
1190
1342
  minReplicasValue: number,
1191
1343
  leaders: Map<string, any>,
1192
1344
  selfHash: string,
@@ -1204,11 +1356,35 @@ export class SharedLog<
1204
1356
  ? { timeoutMs: delivery.timeout, signal: delivery.signal }
1205
1357
  : undefined;
1206
1358
 
1359
+ const fullReplicaDeliveryCandidates =
1360
+ await this.getFullReplicaRepairCandidates(undefined, {
1361
+ includeSubscribers: false,
1362
+ });
1363
+ if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
1364
+ for (const peer of fullReplicaDeliveryCandidates) {
1365
+ if (!leaders.has(peer)) {
1366
+ leaders.set(peer, { intersecting: true });
1367
+ }
1368
+ }
1369
+ }
1370
+
1371
+ const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
1372
+ entry,
1373
+ coordinates,
1374
+ leaders: leaders as Map<string, { intersecting: boolean }>,
1375
+ replicas: minReplicasValue,
1376
+ });
1207
1377
  for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
1208
1378
  await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
1209
- const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
1379
+ const authoritativeRecipients = new Set(leaders.keys());
1380
+ const leadersForDelivery = delivery
1381
+ ? new Set(authoritativeRecipients)
1382
+ : undefined;
1210
1383
 
1211
- const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
1384
+ // Outbound append delivery only tells us who we intend to send to, not who has
1385
+ // actually stored the entry. Keep this recipient set local so later repair
1386
+ // sweeps can still backfill peers that missed the initial delivery.
1387
+ const set = new Set(leaders.keys());
1212
1388
  let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1213
1389
  const allowSubscriberFallback =
1214
1390
  this.syncronizer instanceof SimpleSyncronizer ||
@@ -1239,6 +1415,17 @@ export class SharedLog<
1239
1415
  }
1240
1416
 
1241
1417
  if (!delivery) {
1418
+ for (const peer of authoritativeRecipients) {
1419
+ if (peer === selfHash) {
1420
+ continue;
1421
+ }
1422
+ // Default live append delivery is still optimistic. If one remote misses
1423
+ // the initial heads exchange and the caller did not opt into explicit
1424
+ // delivery acks, we still need a targeted backfill source of truth for the
1425
+ // authoritative recipients or one entry can get stuck at 2/3 replicas
1426
+ // forever. Best-effort fallback subscribers are not repair-worthy.
1427
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
1428
+ }
1242
1429
  this.rpc
1243
1430
  .send(message, {
1244
1431
  mode: isLeader
@@ -1268,6 +1455,7 @@ export class SharedLog<
1268
1455
 
1269
1456
  const ackTo: string[] = [];
1270
1457
  let silentTo: string[] | undefined;
1458
+ const repairTargets = new Set<string>();
1271
1459
  // Default delivery semantics: require enough remote ACKs to reach the requested
1272
1460
  // replication degree (local append counts as 1).
1273
1461
  const defaultMinAcks = Math.max(0, minReplicasValue - 1);
@@ -1279,6 +1467,9 @@ export class SharedLog<
1279
1467
  );
1280
1468
 
1281
1469
  for (const peer of orderedRemoteRecipients) {
1470
+ if (authoritativeRecipients.has(peer)) {
1471
+ repairTargets.add(peer);
1472
+ }
1282
1473
  if (ackTo.length < ackLimit) {
1283
1474
  ackTo.push(peer);
1284
1475
  } else {
@@ -1317,6 +1508,12 @@ export class SharedLog<
1317
1508
  })
1318
1509
  .catch((error) => logger.error(error));
1319
1510
  }
1511
+ for (const peer of repairTargets) {
1512
+ // Direct append delivery is intentionally optimistic. Queue one delayed,
1513
+ // batched maybe-sync pass for the intended recipients so stable 3-peer
1514
+ // append workloads do not depend on perfect first-try delivery ordering.
1515
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
1516
+ }
1320
1517
  }
1321
1518
 
1322
1519
  if (pending.length > 0) {
@@ -2016,6 +2213,7 @@ export class SharedLog<
2016
2213
  // Keep local sync/prune state consistent even when a peer disappears
2017
2214
  // through replication-info updates without a topic unsubscribe event.
2018
2215
  this.removePeerFromGidPeerHistory(keyHash);
2216
+ this.removeRepairFrontierTarget(keyHash);
2019
2217
  this._recentRepairDispatch.delete(keyHash);
2020
2218
  if (!isMe) {
2021
2219
  this.syncronizer.onPeerDisconnected(keyHash);
@@ -2483,6 +2681,7 @@ export class SharedLog<
2483
2681
  for (const key of this._gidPeersHistory.keys()) {
2484
2682
  this.removePeerFromGidPeerHistory(publicKeyHash, key);
2485
2683
  }
2684
+ this.removePeerFromEntryKnownPeers(publicKeyHash);
2486
2685
  }
2487
2686
  }
2488
2687
 
@@ -2507,19 +2706,449 @@ export class SharedLog<
2507
2706
  return set;
2508
2707
  }
2509
2708
 
2709
+ private markEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
2710
+ for (const hash of hashes) {
2711
+ let peers = this._entryKnownPeers.get(hash);
2712
+ if (!peers) {
2713
+ peers = new Set();
2714
+ this._entryKnownPeers.set(hash, peers);
2715
+ }
2716
+ peers.add(peer);
2717
+ }
2718
+ }
2719
+
2720
+ private removeEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
2721
+ for (const hash of hashes) {
2722
+ const peers = this._entryKnownPeers.get(hash);
2723
+ if (!peers) {
2724
+ continue;
2725
+ }
2726
+ peers.delete(peer);
2727
+ if (peers.size === 0) {
2728
+ this._entryKnownPeers.delete(hash);
2729
+ }
2730
+ }
2731
+ }
2732
+
2733
+ private removePeerFromEntryKnownPeers(peer: string) {
2734
+ for (const [hash, peers] of this._entryKnownPeers) {
2735
+ peers.delete(peer);
2736
+ if (peers.size === 0) {
2737
+ this._entryKnownPeers.delete(hash);
2738
+ }
2739
+ }
2740
+ }
2741
+
2742
+ private isEntryKnownByPeer(hash: string, peer: string) {
2743
+ return this._entryKnownPeers.get(hash)?.has(peer) === true;
2744
+ }
2745
+
2746
+ private markRepairSweepOptimisticPeer(gid: string, peer: string) {
2747
+ let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
2748
+ if (!peers) {
2749
+ peers = new Map();
2750
+ this._repairSweepOptimisticGidPeersPending.set(gid, peers);
2751
+ }
2752
+ peers.set(peer, (peers.get(peer) || 0) + 1);
2753
+ }
2754
+
2755
+ private hasPendingRepairSweepOptimisticPeer(gid: string, peer: string) {
2756
+ return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
2757
+ }
2758
+
2759
+ private createEntryReplicatedForRepair(properties: {
2760
+ entry: Entry<T>;
2761
+ coordinates: NumberFromType<R>[];
2762
+ leaders: Map<string, { intersecting: boolean }>;
2763
+ replicas: number;
2764
+ }) {
2765
+ const assignedToRangeBoundary = shouldAssignToRangeBoundary(
2766
+ properties.leaders,
2767
+ properties.replicas,
2768
+ );
2769
+ const cidObject = cidifyString(properties.entry.hash);
2770
+ const hashNumber = this.indexableDomain.numbers.bytesToNumber(
2771
+ cidObject.multihash.digest,
2772
+ );
2773
+ return new this.indexableDomain.constructorEntry({
2774
+ assignedToRangeBoundary,
2775
+ coordinates: properties.coordinates,
2776
+ meta: properties.entry.meta,
2777
+ hash: properties.entry.hash,
2778
+ hashNumber,
2779
+ });
2780
+ }
2781
+
2782
+ private isAssumeSyncedRepairSuppressed() {
2783
+ return this._assumeSyncedRepairSuppressedUntil > Date.now();
2784
+ }
2785
+
2786
+ private isFrontierTrackedRepairMode(mode: RepairDispatchMode) {
2787
+ return mode !== "join-warmup";
2788
+ }
2789
+
2790
+ private async sleepTracked(delayMs: number) {
2791
+ if (delayMs <= 0) {
2792
+ return;
2793
+ }
2794
+ await new Promise<void>((resolve) => {
2795
+ const timer = setTimeout(() => {
2796
+ this._repairRetryTimers.delete(timer);
2797
+ resolve();
2798
+ }, delayMs);
2799
+ timer.unref?.();
2800
+ this._repairRetryTimers.add(timer);
2801
+ });
2802
+ }
2803
+
2804
+ private queueRepairFrontierEntries(
2805
+ mode: RepairDispatchMode,
2806
+ target: string,
2807
+ entries: Map<string, EntryReplicated<R>>,
2808
+ ) {
2809
+ let targets = this._repairFrontierByMode.get(mode);
2810
+ if (!targets) {
2811
+ targets = new Map();
2812
+ this._repairFrontierByMode.set(mode, targets);
2813
+ }
2814
+ let pending = targets.get(target);
2815
+ if (!pending) {
2816
+ pending = new Map();
2817
+ targets.set(target, pending);
2818
+ }
2819
+ for (const [hash, entry] of entries) {
2820
+ pending.set(hash, entry);
2821
+ }
2822
+ }
2823
+
2824
+ private clearRepairFrontierHashes(target: string, hashes: Iterable<string>) {
2825
+ const hashList = [...hashes];
2826
+ if (hashList.length === 0) {
2827
+ return;
2828
+ }
2829
+ for (const mode of REPAIR_DISPATCH_MODES) {
2830
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
2831
+ if (!pending) {
2832
+ continue;
2833
+ }
2834
+ for (const hash of hashList) {
2835
+ pending.delete(hash);
2836
+ }
2837
+ if (pending.size === 0) {
2838
+ this._repairFrontierByMode.get(mode)?.delete(target);
2839
+ }
2840
+ }
2841
+ }
2842
+
2843
+ private async getFullReplicaRepairCandidates(
2844
+ extraPeers?: Iterable<string>,
2845
+ options?: { includeSubscribers?: boolean },
2846
+ ) {
2847
+ const candidates = new Set<string>([
2848
+ this.node.identity.publicKey.hashcode(),
2849
+ ]);
2850
+ try {
2851
+ for (const peer of await this.getReplicators()) {
2852
+ candidates.add(peer);
2853
+ }
2854
+ } catch {
2855
+ for (const peer of this.uniqueReplicators) {
2856
+ candidates.add(peer);
2857
+ }
2858
+ }
2859
+ for (const peer of extraPeers ?? []) {
2860
+ candidates.add(peer);
2861
+ }
2862
+ if (options?.includeSubscribers !== false) {
2863
+ try {
2864
+ for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
2865
+ candidates.add(subscriber.hashcode());
2866
+ }
2867
+ } catch {
2868
+ // Best-effort only; explicit repair peers still keep the path safe.
2869
+ }
2870
+ }
2871
+ return candidates;
2872
+ }
2873
+
2874
+ private removeRepairFrontierTarget(target: string) {
2875
+ for (const mode of REPAIR_DISPATCH_MODES) {
2876
+ this._repairFrontierByMode.get(mode)?.delete(target);
2877
+ this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
2878
+ }
2879
+ }
2880
+
2881
+ private async sendRepairConfirmation(
2882
+ target: PublicSignKey,
2883
+ hashes: Iterable<string>,
2884
+ ) {
2885
+ const uniqueHashes = [...new Set(hashes)];
2886
+ for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
2887
+ const chunk = uniqueHashes.slice(
2888
+ i,
2889
+ i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE,
2890
+ );
2891
+ await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
2892
+ priority: 1,
2893
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2894
+ });
2895
+ }
2896
+ }
2897
+
2898
+ private async pushRepairEntries(
2899
+ target: string,
2900
+ entries: Map<string, EntryReplicated<R>>,
2901
+ ) {
2902
+ for await (const message of createExchangeHeadsMessages(
2903
+ this.log,
2904
+ [...entries.keys()],
2905
+ )) {
2906
+ message.reserved[0] |= EXCHANGE_HEADS_REPAIR_HINT;
2907
+ await this.rpc.send(message, {
2908
+ priority: 1,
2909
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2910
+ });
2911
+ }
2912
+ }
2913
+
2914
+ private async sendRepairEntriesWithTransport(
2915
+ target: string,
2916
+ entries: Map<string, EntryReplicated<R>>,
2917
+ transport: RepairTransportMode,
2918
+ options?: { bypassKnownPeers?: boolean },
2919
+ ) {
2920
+ const unknownEntries = new Map<string, EntryReplicated<R>>();
2921
+ const knownHashes: string[] = [];
2922
+ for (const [hash, entry] of entries) {
2923
+ if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
2924
+ unknownEntries.set(hash, entry);
2925
+ } else {
2926
+ knownHashes.push(hash);
2927
+ }
2928
+ }
2929
+ this.clearRepairFrontierHashes(target, knownHashes);
2930
+ if (unknownEntries.size === 0) {
2931
+ return;
2932
+ }
2933
+ if (transport === "simple") {
2934
+ // Fallback repair should not depend on the target completing the
2935
+ // RequestMaybeSync -> ResponseMaybeSync round trip.
2936
+ await this.pushRepairEntries(target, unknownEntries);
2937
+ return;
2938
+ }
2939
+
2940
+ await this.syncronizer.onMaybeMissingEntries({
2941
+ entries: unknownEntries,
2942
+ targets: [target],
2943
+ });
2944
+ }
2945
+
2946
+ private async sendMaybeMissingEntriesNow(
2947
+ target: string,
2948
+ entries: Map<string, EntryReplicated<R>>,
2949
+ options: {
2950
+ mode: RepairDispatchMode;
2951
+ transport: RepairTransportMode;
2952
+ bypassRecentDedupe?: boolean;
2953
+ },
2954
+ ) {
2955
+ if (entries.size === 0) {
2956
+ return;
2957
+ }
2958
+
2959
+ const now = Date.now();
2960
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2961
+ if (!recentlyDispatchedByHash) {
2962
+ recentlyDispatchedByHash = new Map();
2963
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2964
+ }
2965
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2966
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2967
+ recentlyDispatchedByHash.delete(hash);
2968
+ }
2969
+ }
2970
+
2971
+ const filteredEntries =
2972
+ options.bypassRecentDedupe === true
2973
+ ? new Map(entries)
2974
+ : new Map<string, EntryReplicated<any>>();
2975
+ if (options.bypassRecentDedupe !== true) {
2976
+ for (const [hash, entry] of entries) {
2977
+ const prev = recentlyDispatchedByHash.get(hash);
2978
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2979
+ continue;
2980
+ }
2981
+ recentlyDispatchedByHash.set(hash, now);
2982
+ filteredEntries.set(hash, entry);
2983
+ }
2984
+ } else {
2985
+ for (const hash of entries.keys()) {
2986
+ recentlyDispatchedByHash.set(hash, now);
2987
+ }
2988
+ }
2989
+ if (filteredEntries.size === 0) {
2990
+ return;
2991
+ }
2992
+
2993
+ const bucket = this._repairMetrics[options.mode];
2994
+ bucket.dispatches += 1;
2995
+ bucket.entries += filteredEntries.size;
2996
+ if (options.transport === "simple") {
2997
+ bucket.simpleFallbackPasses += 1;
2998
+ } else {
2999
+ bucket.ratelessFirstPasses += 1;
3000
+ }
3001
+
3002
+ await Promise.resolve(
3003
+ this.sendRepairEntriesWithTransport(
3004
+ target,
3005
+ filteredEntries,
3006
+ options.transport,
3007
+ { bypassKnownPeers: options.mode === "churn" },
3008
+ ),
3009
+ ).catch((error: any) => logger.error(error));
3010
+ }
3011
+
3012
+ private ensureRepairFrontierRunner(
3013
+ mode: RepairDispatchMode,
3014
+ target: string,
3015
+ retryScheduleMs?: number[],
3016
+ ) {
3017
+ const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
3018
+ if (!activeTargets || activeTargets.has(target) || this.closed) {
3019
+ return;
3020
+ }
3021
+ activeTargets.add(target);
3022
+ const retrySchedule = resolveRepairRetrySchedule(
3023
+ mode,
3024
+ retryScheduleMs,
3025
+ this.isFrontierTrackedRepairMode(mode),
3026
+ );
3027
+ const steadyStateDelay =
3028
+ retrySchedule.length > 1
3029
+ ? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
3030
+ : Math.max(retrySchedule[0] || 1_000, 1_000);
3031
+
3032
+ void (async () => {
3033
+ let attemptIndex = 0;
3034
+ try {
3035
+ for (;;) {
3036
+ if (this.closed) {
3037
+ return;
3038
+ }
3039
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
3040
+ if (!pending || pending.size === 0) {
3041
+ return;
3042
+ }
3043
+
3044
+ if (
3045
+ (mode === "join-warmup" || mode === "join-authoritative") &&
3046
+ this.isAssumeSyncedRepairSuppressed()
3047
+ ) {
3048
+ await this.sleepTracked(
3049
+ Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()),
3050
+ );
3051
+ continue;
3052
+ }
3053
+
3054
+ await this.sendMaybeMissingEntriesNow(target, pending, {
3055
+ mode,
3056
+ transport: getRepairTransportForAttempt(mode, attemptIndex),
3057
+ bypassRecentDedupe: true,
3058
+ });
3059
+
3060
+ const remaining = this._repairFrontierByMode.get(mode)?.get(target);
3061
+ if (!remaining || remaining.size === 0) {
3062
+ return;
3063
+ }
3064
+
3065
+ const waitMs =
3066
+ attemptIndex + 1 < retrySchedule.length
3067
+ ? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
3068
+ : steadyStateDelay;
3069
+ attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
3070
+ await this.sleepTracked(waitMs);
3071
+ }
3072
+ } finally {
3073
+ activeTargets.delete(target);
3074
+ if (
3075
+ !this.closed &&
3076
+ (this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0
3077
+ ) {
3078
+ this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
3079
+ }
3080
+ }
3081
+ })().catch((error: any) => {
3082
+ activeTargets.delete(target);
3083
+ logger.error(error);
3084
+ });
3085
+ }
3086
+
3087
+ private flushAppendBackfill() {
3088
+ if (this._appendBackfillPendingByTarget.size === 0) {
3089
+ return;
3090
+ }
3091
+ const pending = this._appendBackfillPendingByTarget;
3092
+ this._appendBackfillPendingByTarget = new Map();
3093
+ for (const [target, entries] of pending) {
3094
+ this.dispatchMaybeMissingEntries(target, entries, {
3095
+ mode: "append-backfill",
3096
+ });
3097
+ }
3098
+ }
3099
+
3100
+ private queueAppendBackfill(target: string, entry: EntryReplicated<R>) {
3101
+ let entries = this._appendBackfillPendingByTarget.get(target);
3102
+ if (!entries) {
3103
+ entries = new Map();
3104
+ this._appendBackfillPendingByTarget.set(target, entries);
3105
+ }
3106
+ entries.set(entry.hash, entry);
3107
+ if (entries.size >= this.repairSweepTargetBufferSize) {
3108
+ this.flushAppendBackfill();
3109
+ return;
3110
+ }
3111
+ if (this._appendBackfillTimer || this.closed) {
3112
+ return;
3113
+ }
3114
+ const timer = setTimeout(() => {
3115
+ this._repairRetryTimers.delete(timer);
3116
+ if (this._appendBackfillTimer === timer) {
3117
+ this._appendBackfillTimer = undefined;
3118
+ }
3119
+ if (this.closed) {
3120
+ return;
3121
+ }
3122
+ this.flushAppendBackfill();
3123
+ }, APPEND_BACKFILL_DELAY_MS);
3124
+ timer.unref?.();
3125
+ this._repairRetryTimers.add(timer);
3126
+ this._appendBackfillTimer = timer;
3127
+ }
3128
+
2510
3129
  private dispatchMaybeMissingEntries(
2511
3130
  target: string,
2512
3131
  entries: Map<string, EntryReplicated<R>>,
2513
- options?: {
3132
+ options: {
3133
+ mode: RepairDispatchMode;
2514
3134
  bypassRecentDedupe?: boolean;
2515
3135
  retryScheduleMs?: number[];
2516
- forceFreshDelivery?: boolean;
2517
3136
  },
2518
3137
  ) {
2519
3138
  if (entries.size === 0) {
2520
3139
  return;
2521
3140
  }
2522
3141
 
3142
+ if (this.isFrontierTrackedRepairMode(options.mode)) {
3143
+ this.queueRepairFrontierEntries(options.mode, target, entries);
3144
+ this.ensureRepairFrontierRunner(
3145
+ options.mode,
3146
+ target,
3147
+ options.retryScheduleMs,
3148
+ );
3149
+ return;
3150
+ }
3151
+
2523
3152
  const now = Date.now();
2524
3153
  let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2525
3154
  if (!recentlyDispatchedByHash) {
@@ -2533,10 +3162,10 @@ export class SharedLog<
2533
3162
  }
2534
3163
 
2535
3164
  const filteredEntries =
2536
- options?.bypassRecentDedupe === true
3165
+ options.bypassRecentDedupe === true
2537
3166
  ? new Map(entries)
2538
3167
  : new Map<string, EntryReplicated<any>>();
2539
- if (options?.bypassRecentDedupe !== true) {
3168
+ if (options.bypassRecentDedupe !== true) {
2540
3169
  for (const [hash, entry] of entries) {
2541
3170
  const prev = recentlyDispatchedByHash.get(hash);
2542
3171
  if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
@@ -2553,64 +3182,69 @@ export class SharedLog<
2553
3182
  if (filteredEntries.size === 0) {
2554
3183
  return;
2555
3184
  }
2556
- const retrySchedule =
2557
- options?.retryScheduleMs && options.retryScheduleMs.length > 0
2558
- ? options.retryScheduleMs
2559
- : options?.forceFreshDelivery
2560
- ? FORCE_FRESH_RETRY_SCHEDULE_MS
2561
- : [0];
2562
-
2563
- const run = () => {
2564
- // For force-fresh churn repair we intentionally bypass rateless IBLT and
2565
- // use simple hash-based sync. This path is a directed "push these hashes
2566
- // to that peer" recovery flow; using simple sync here avoids occasional
2567
- // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
2568
- if (
2569
- options?.forceFreshDelivery &&
2570
- this.syncronizer instanceof RatelessIBLTSynchronizer
2571
- ) {
2572
- return Promise.resolve(
2573
- this.syncronizer.simple.onMaybeMissingEntries({
2574
- entries: filteredEntries,
2575
- targets: [target],
2576
- }),
2577
- ).catch((error: any) => logger.error(error));
3185
+
3186
+ if (
3187
+ (options.mode === "join-warmup" ||
3188
+ options.mode === "join-authoritative") &&
3189
+ this.isAssumeSyncedRepairSuppressed()
3190
+ ) {
3191
+ return;
3192
+ }
3193
+
3194
+ const retrySchedule = resolveRepairRetrySchedule(
3195
+ options.mode,
3196
+ options.retryScheduleMs,
3197
+ this.isFrontierTrackedRepairMode(options.mode),
3198
+ );
3199
+ const bucket = this._repairMetrics[options.mode];
3200
+ bucket.dispatches += 1;
3201
+ bucket.entries += filteredEntries.size;
3202
+
3203
+ const run = (transport: RepairTransportMode) => {
3204
+ if (transport === "simple") {
3205
+ bucket.simpleFallbackPasses += 1;
3206
+ } else {
3207
+ bucket.ratelessFirstPasses += 1;
2578
3208
  }
2579
3209
 
2580
3210
  return Promise.resolve(
2581
- this.syncronizer.onMaybeMissingEntries({
2582
- entries: filteredEntries,
2583
- targets: [target],
2584
- }),
3211
+ this.sendRepairEntriesWithTransport(
3212
+ target,
3213
+ filteredEntries,
3214
+ transport,
3215
+ { bypassKnownPeers: options.mode === "churn" },
3216
+ ),
2585
3217
  ).catch((error: any) => logger.error(error));
2586
3218
  };
2587
3219
 
2588
- for (const delayMs of retrySchedule) {
3220
+ retrySchedule.forEach((delayMs, index) => {
3221
+ const transport = getRepairTransportForAttempt(options.mode, index);
2589
3222
  if (delayMs === 0) {
2590
- void run();
2591
- continue;
3223
+ void run(transport);
3224
+ return;
2592
3225
  }
2593
3226
  const timer = setTimeout(() => {
2594
3227
  this._repairRetryTimers.delete(timer);
2595
3228
  if (this.closed) {
2596
3229
  return;
2597
3230
  }
2598
- void run();
3231
+ void run(transport);
2599
3232
  }, delayMs);
2600
3233
  timer.unref?.();
2601
3234
  this._repairRetryTimers.add(timer);
2602
- }
3235
+ });
2603
3236
  }
2604
3237
 
2605
3238
  private scheduleRepairSweep(options: {
2606
- forceFreshDelivery: boolean;
2607
- addedPeers: Set<string>;
3239
+ mode: RepairDispatchMode;
3240
+ peers?: Iterable<string>;
2608
3241
  }) {
2609
- if (options.forceFreshDelivery) {
2610
- this._repairSweepForceFreshPending = true;
2611
- }
2612
- for (const peer of options.addedPeers) {
2613
- this._repairSweepAddedPeersPending.add(peer);
3242
+ this._repairSweepPendingModes.add(options.mode);
3243
+ const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
3244
+ if (pendingPeers) {
3245
+ for (const peer of options.peers ?? []) {
3246
+ pendingPeers.add(peer);
3247
+ }
2614
3248
  }
2615
3249
  if (!this._repairSweepRunning && !this.closed) {
2616
3250
  this._repairSweepRunning = true;
@@ -2618,88 +3252,293 @@ export class SharedLog<
2618
3252
  }
2619
3253
  }
2620
3254
 
3255
+ private scheduleJoinAuthoritativeRepair(peers: Set<string>) {
3256
+ if (this.closed || peers.size === 0) {
3257
+ return;
3258
+ }
3259
+
3260
+ for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
3261
+ let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
3262
+ if (!pendingPeers) {
3263
+ pendingPeers = new Set();
3264
+ this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
3265
+ }
3266
+ for (const peer of peers) {
3267
+ pendingPeers.add(peer);
3268
+ }
3269
+
3270
+ if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
3271
+ continue;
3272
+ }
3273
+
3274
+ const timer = setTimeout(() => {
3275
+ this._repairRetryTimers.delete(timer);
3276
+ this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
3277
+ if (this.closed) {
3278
+ return;
3279
+ }
3280
+
3281
+ const peersForSweep = new Set(
3282
+ this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? [],
3283
+ );
3284
+ this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
3285
+ if (peersForSweep.size === 0) {
3286
+ return;
3287
+ }
3288
+
3289
+ // A joiner's leader view can still be partial on the first delayed pass
3290
+ // under pubsub jitter. Bounded per-peer rescans widen the authoritative
3291
+ // frontier without adding per-append sweeps.
3292
+ this.scheduleRepairSweep({
3293
+ mode: "join-authoritative",
3294
+ peers: peersForSweep,
3295
+ });
3296
+ }, delayMs);
3297
+ timer.unref?.();
3298
+ this._repairRetryTimers.add(timer);
3299
+ this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
3300
+ }
3301
+ }
3302
+
2621
3303
  private async runRepairSweep() {
2622
3304
  try {
2623
3305
  while (!this.closed) {
2624
- const forceFreshDelivery = this._repairSweepForceFreshPending;
2625
- const addedPeers = new Set(this._repairSweepAddedPeersPending);
2626
- this._repairSweepForceFreshPending = false;
2627
- this._repairSweepAddedPeersPending.clear();
3306
+ const pendingModes = new Set(this._repairSweepPendingModes);
3307
+ const pendingPeersByMode = cloneRepairPendingPeersByMode(
3308
+ this._repairSweepPendingPeersByMode,
3309
+ );
3310
+ this._repairSweepPendingModes.clear();
3311
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
3312
+ peers.clear();
3313
+ }
2628
3314
 
2629
- if (!forceFreshDelivery && addedPeers.size === 0) {
3315
+ if (pendingModes.size === 0) {
2630
3316
  return;
2631
3317
  }
2632
3318
 
2633
- const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
2634
- const flushTarget = (target: string) => {
2635
- const entries = pendingByTarget.get(target);
3319
+ const optimisticGidPeersByMode = new Map<
3320
+ RepairDispatchMode,
3321
+ Map<string, Set<string>>
3322
+ >();
3323
+ const optimisticGidPeersConsumedByMode = new Map<
3324
+ RepairDispatchMode,
3325
+ Map<string, Map<string, number>>
3326
+ >();
3327
+ for (const mode of pendingModes) {
3328
+ const modePeers = pendingPeersByMode.get(mode);
3329
+ if (!modePeers || modePeers.size === 0) {
3330
+ continue;
3331
+ }
3332
+ const optimisticGidPeers = new Map<string, Set<string>>();
3333
+ const optimisticGidPeersConsumed = new Map<string, Map<string, number>>();
3334
+ for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
3335
+ let matchedPeers: Set<string> | undefined;
3336
+ let matchedCounts: Map<string, number> | undefined;
3337
+ for (const [peer, count] of peerCounts) {
3338
+ if (!modePeers.has(peer)) {
3339
+ continue;
3340
+ }
3341
+ matchedPeers ||= new Set();
3342
+ matchedCounts ||= new Map();
3343
+ matchedPeers.add(peer);
3344
+ matchedCounts.set(peer, count);
3345
+ }
3346
+ if (matchedPeers && matchedCounts) {
3347
+ optimisticGidPeers.set(gid, matchedPeers);
3348
+ optimisticGidPeersConsumed.set(gid, matchedCounts);
3349
+ }
3350
+ }
3351
+ if (optimisticGidPeers.size > 0) {
3352
+ optimisticGidPeersByMode.set(mode, optimisticGidPeers);
3353
+ optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
3354
+ }
3355
+ }
3356
+
3357
+ const pendingByMode = new Map<
3358
+ RepairDispatchMode,
3359
+ Map<string, Map<string, EntryReplicated<any>>>
3360
+ >(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
3361
+ const pendingRepairPeers = new Set<string>();
3362
+ for (const peers of pendingPeersByMode.values()) {
3363
+ for (const peer of peers) {
3364
+ pendingRepairPeers.add(peer);
3365
+ }
3366
+ }
3367
+ const fullReplicaRepairCandidates =
3368
+ await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
3369
+ includeSubscribers: false,
3370
+ });
3371
+ const fullReplicaRepairCandidateCount = Math.max(
3372
+ 1,
3373
+ fullReplicaRepairCandidates.size,
3374
+ );
3375
+ const nextFrontierByMode = new Map<
3376
+ RepairDispatchMode,
3377
+ Map<string, Map<string, EntryReplicated<any>>>
3378
+ >([
3379
+ ["join-authoritative", new Map()],
3380
+ ["churn", new Map()],
3381
+ ]);
3382
+ const flushTarget = (mode: RepairDispatchMode, target: string) => {
3383
+ const targets = pendingByMode.get(mode);
3384
+ const entries = targets?.get(target);
2636
3385
  if (!entries || entries.size === 0) {
2637
3386
  return;
2638
3387
  }
2639
- const isJoinWarmupTarget = addedPeers.has(target);
2640
- const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
2641
3388
  this.dispatchMaybeMissingEntries(target, entries, {
2642
- bypassRecentDedupe,
2643
- retryScheduleMs: isJoinWarmupTarget
2644
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
2645
- : undefined,
2646
- forceFreshDelivery,
3389
+ bypassRecentDedupe: true,
3390
+ mode,
2647
3391
  });
2648
- pendingByTarget.delete(target);
3392
+ targets?.delete(target);
2649
3393
  };
2650
3394
  const queueEntryForTarget = (
3395
+ mode: RepairDispatchMode,
2651
3396
  target: string,
2652
3397
  entry: EntryReplicated<any>,
2653
3398
  ) => {
2654
- let set = pendingByTarget.get(target);
3399
+ const sweepTargets = nextFrontierByMode.get(mode);
3400
+ if (sweepTargets) {
3401
+ let sweepSet = sweepTargets.get(target);
3402
+ if (!sweepSet) {
3403
+ sweepSet = new Map();
3404
+ sweepTargets.set(target, sweepSet);
3405
+ }
3406
+ sweepSet.set(entry.hash, entry);
3407
+ }
3408
+ const targets = pendingByMode.get(mode)!;
3409
+ let set = targets.get(target);
2655
3410
  if (!set) {
2656
3411
  set = new Map();
2657
- pendingByTarget.set(target, set);
3412
+ targets.set(target, set);
2658
3413
  }
2659
3414
  if (set.has(entry.hash)) {
2660
3415
  return;
2661
3416
  }
2662
3417
  set.set(entry.hash, entry);
2663
3418
  if (set.size >= this.repairSweepTargetBufferSize) {
2664
- flushTarget(target);
3419
+ flushTarget(mode, target);
2665
3420
  }
2666
3421
  };
2667
3422
 
2668
3423
  const iterator = this.entryCoordinatesIndex.iterate({});
2669
3424
  try {
2670
- while (!this.closed && !iterator.done()) {
2671
- const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2672
- for (const entry of entries) {
2673
- const entryReplicated = entry.value;
2674
- const knownPeers = this._gidPeersHistory.get(entryReplicated.gid);
2675
- const currentPeers = await this.findLeaders(
2676
- entryReplicated.coordinates,
2677
- entryReplicated,
2678
- { roleAge: 0 },
2679
- );
2680
- if (forceFreshDelivery) {
3425
+ while (!this.closed && !iterator.done()) {
3426
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
3427
+ for (const entry of entries) {
3428
+ const entryReplicated = entry.value;
3429
+ const gid = entryReplicated.gid;
3430
+ const knownPeers = this._gidPeersHistory.get(gid);
3431
+ const requestedReplicas =
3432
+ decodeReplicas(entryReplicated).getValue(this);
3433
+ const currentPeers = await this.findLeaders(
3434
+ entryReplicated.coordinates,
3435
+ entryReplicated,
3436
+ { roleAge: 0 },
3437
+ );
3438
+
3439
+ if (pendingModes.has("churn")) {
2681
3440
  for (const [currentPeer] of currentPeers) {
2682
3441
  if (currentPeer === this.node.identity.publicKey.hashcode()) {
2683
3442
  continue;
2684
3443
  }
2685
- queueEntryForTarget(currentPeer, entryReplicated);
3444
+ queueEntryForTarget("churn", currentPeer, entryReplicated);
2686
3445
  }
2687
3446
  }
2688
- if (addedPeers.size > 0) {
2689
- for (const peer of addedPeers) {
2690
- if (currentPeers.has(peer) && !knownPeers?.has(peer)) {
2691
- queueEntryForTarget(peer, entryReplicated);
2692
- }
3447
+
3448
+ for (const mode of pendingModes) {
3449
+ const modePeers = pendingPeersByMode.get(mode);
3450
+ if (!modePeers || modePeers.size === 0) {
3451
+ continue;
3452
+ }
3453
+ const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
3454
+ for (const peer of modePeers) {
3455
+ if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
3456
+ continue;
3457
+ }
3458
+ const wasOptimisticallyAssigned =
3459
+ optimisticPeers?.has(peer) === true;
3460
+ const isCoveredByFullReplicaRepair =
3461
+ mode === "join-authoritative" &&
3462
+ fullReplicaRepairCandidates.has(peer) &&
3463
+ requestedReplicas >= fullReplicaRepairCandidateCount;
3464
+ const shouldQueue =
3465
+ mode === "join-authoritative"
3466
+ ? currentPeers.has(peer) || isCoveredByFullReplicaRepair
3467
+ : wasOptimisticallyAssigned ||
3468
+ (currentPeers.has(peer) && !knownPeers?.has(peer));
3469
+ if (shouldQueue) {
3470
+ // Authoritative join repair must not trust partial gid peer history,
3471
+ // otherwise a late joiner can get stuck with a partial historical
3472
+ // backfill forever. Once we enter the authoritative pass, queue every
3473
+ // entry whose current leader set still includes the added peer.
3474
+ queueEntryForTarget(mode, peer, entryReplicated);
2693
3475
  }
2694
3476
  }
2695
3477
  }
3478
+ }
2696
3479
  }
2697
3480
  } finally {
2698
3481
  await iterator.close();
2699
3482
  }
2700
3483
 
2701
- for (const target of [...pendingByTarget.keys()]) {
2702
- flushTarget(target);
3484
+ for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
3485
+ for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
3486
+ const pendingPeerCounts =
3487
+ this._repairSweepOptimisticGidPeersPending.get(gid);
3488
+ if (!pendingPeerCounts) {
3489
+ continue;
3490
+ }
3491
+ for (const [peer, count] of peerCounts) {
3492
+ const current = pendingPeerCounts.get(peer) || 0;
3493
+ const next = current - count;
3494
+ if (next > 0) {
3495
+ pendingPeerCounts.set(peer, next);
3496
+ } else {
3497
+ pendingPeerCounts.delete(peer);
3498
+ }
3499
+ }
3500
+ if (pendingPeerCounts.size === 0) {
3501
+ this._repairSweepOptimisticGidPeersPending.delete(gid);
3502
+ }
3503
+ }
3504
+ }
3505
+
3506
+ for (const mode of pendingModes) {
3507
+ if (mode !== "join-authoritative" && mode !== "churn") {
3508
+ continue;
3509
+ }
3510
+ const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
3511
+ const frontierTargets = this._repairFrontierByMode.get(mode);
3512
+ for (const target of pendingPeersByMode.get(mode) ?? []) {
3513
+ const replacement = nextTargets.get(target);
3514
+ if (mode === "join-authoritative") {
3515
+ // Authoritative join repair is receipt-driven: a later sweep can have a
3516
+ // narrower transient leader view, but it must not forget unconfirmed
3517
+ // hashes that were already queued for this joiner.
3518
+ if (replacement && replacement.size > 0) {
3519
+ const existing = frontierTargets?.get(target);
3520
+ if (existing && existing.size > 0) {
3521
+ for (const [hash, entry] of replacement) {
3522
+ existing.set(hash, entry);
3523
+ }
3524
+ } else {
3525
+ frontierTargets?.set(target, replacement);
3526
+ }
3527
+ }
3528
+ continue;
3529
+ }
3530
+ if (replacement && replacement.size > 0) {
3531
+ frontierTargets?.set(target, replacement);
3532
+ } else {
3533
+ frontierTargets?.delete(target);
3534
+ }
3535
+ }
3536
+ }
3537
+
3538
+ for (const [mode, targets] of pendingByMode) {
3539
+ for (const target of [...targets.keys()]) {
3540
+ flushTarget(mode, target);
3541
+ }
2703
3542
  }
2704
3543
  }
2705
3544
  } catch (error: any) {
@@ -2708,11 +3547,7 @@ export class SharedLog<
2708
3547
  }
2709
3548
  } finally {
2710
3549
  this._repairSweepRunning = false;
2711
- if (
2712
- !this.closed &&
2713
- (this._repairSweepForceFreshPending ||
2714
- this._repairSweepAddedPeersPending.size > 0)
2715
- ) {
3550
+ if (!this.closed && this._repairSweepPendingModes.size > 0) {
2716
3551
  this._repairSweepRunning = true;
2717
3552
  void this.runRepairSweep();
2718
3553
  }
@@ -2725,9 +3560,89 @@ export class SharedLog<
2725
3560
  entry: Entry<T> | ShallowEntry | EntryReplicated<R>;
2726
3561
  leaders: Map<string, any>;
2727
3562
  };
2728
- }) {
2729
- if (!this.keep || !(await this.keep(args.value.entry))) {
2730
- return this.pruneDebouncedFn.add(args);
3563
+ }): Promise<boolean> {
3564
+ if (this.keep && (await this.keep(args.value.entry))) {
3565
+ return false;
3566
+ }
3567
+ void this.pruneDebouncedFn.add(args);
3568
+ return true;
3569
+ }
3570
+
3571
+ private async pruneJoinedEntriesNoLongerLed(entries: Entry<T>[]) {
3572
+ const selfHash = this.node.identity.publicKey.hashcode();
3573
+ for (const entry of entries) {
3574
+ if (this.closed || this._pendingDeletes.has(entry.hash)) {
3575
+ continue;
3576
+ }
3577
+
3578
+ const leaders = await this.findLeadersFromEntry(
3579
+ entry,
3580
+ decodeReplicas(entry).getValue(this),
3581
+ { roleAge: 0 },
3582
+ );
3583
+
3584
+ if (leaders.has(selfHash)) {
3585
+ this.pruneDebouncedFn.delete(entry.hash);
3586
+ continue;
3587
+ }
3588
+
3589
+ if (leaders.size === 0) {
3590
+ continue;
3591
+ }
3592
+
3593
+ await this.pruneDebouncedFnAddIfNotKeeping({
3594
+ key: entry.hash,
3595
+ value: { entry, leaders },
3596
+ });
3597
+ this.responseToPruneDebouncedFn.delete(entry.hash);
3598
+ }
3599
+ }
3600
+
3601
+ private async pruneIndexedEntriesNoLongerLed() {
3602
+ const selfHash = this.node.identity.publicKey.hashcode();
3603
+ const iterator = this.entryCoordinatesIndex.iterate({});
3604
+ let enqueuedPrune = false;
3605
+ try {
3606
+ while (!this.closed && !iterator.done()) {
3607
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
3608
+ for (const entry of entries) {
3609
+ const entryReplicated = entry.value;
3610
+ if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
3611
+ continue;
3612
+ }
3613
+
3614
+ const leaders = await this.findLeaders(
3615
+ entryReplicated.coordinates,
3616
+ entryReplicated,
3617
+ { roleAge: 0 },
3618
+ );
3619
+
3620
+ if (leaders.has(selfHash)) {
3621
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
3622
+ await this._pendingDeletes
3623
+ .get(entryReplicated.hash)
3624
+ ?.reject(new Error("Failed to delete, is leader again"));
3625
+ this.removePruneRequestSent(entryReplicated.hash);
3626
+ continue;
3627
+ }
3628
+
3629
+ if (leaders.size === 0) {
3630
+ continue;
3631
+ }
3632
+
3633
+ enqueuedPrune =
3634
+ (await this.pruneDebouncedFnAddIfNotKeeping({
3635
+ key: entryReplicated.hash,
3636
+ value: { entry: entryReplicated, leaders },
3637
+ })) || enqueuedPrune;
3638
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
3639
+ }
3640
+ }
3641
+ } finally {
3642
+ await iterator.close();
3643
+ }
3644
+ if (enqueuedPrune && !this.closed) {
3645
+ await this.pruneDebouncedFn.flush();
2731
3646
  }
2732
3647
  }
2733
3648
 
@@ -2904,6 +3819,7 @@ export class SharedLog<
2904
3819
  } else {
2905
3820
  await this._appendDeliverToReplicators(
2906
3821
  result.entry,
3822
+ coordinates,
2907
3823
  minReplicasValue,
2908
3824
  leaders,
2909
3825
  selfHash,
@@ -2913,13 +3829,14 @@ export class SharedLog<
2913
3829
  }
2914
3830
  }
2915
3831
 
2916
- if (!isLeader && !this.shouldDelayAdaptiveRebalance()) {
3832
+ const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
3833
+ if (!isLeader && !delayAdaptiveRebalance) {
2917
3834
  this.pruneDebouncedFnAddIfNotKeeping({
2918
3835
  key: result.entry.hash,
2919
3836
  value: { entry: result.entry, leaders },
2920
3837
  });
2921
3838
  }
2922
- if (!this._isAdaptiveReplicating) {
3839
+ if (!delayAdaptiveRebalance) {
2923
3840
  this.rebalanceParticipationDebounced?.call();
2924
3841
  }
2925
3842
 
@@ -2961,8 +3878,21 @@ export class SharedLog<
2961
3878
  this._repairRetryTimers = new Set();
2962
3879
  this._recentRepairDispatch = new Map();
2963
3880
  this._repairSweepRunning = false;
2964
- this._repairSweepForceFreshPending = false;
2965
- this._repairSweepAddedPeersPending = new Set();
3881
+ this._repairSweepPendingModes = new Set();
3882
+ this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
3883
+ this._repairFrontierByMode = createRepairFrontierByMode() as Map<
3884
+ RepairDispatchMode,
3885
+ Map<string, Map<string, EntryReplicated<R>>>
3886
+ >;
3887
+ this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
3888
+ this._repairSweepOptimisticGidPeersPending = new Map();
3889
+ this._entryKnownPeers = new Map();
3890
+ this._joinAuthoritativeRepairTimersByDelay = new Map();
3891
+ this._joinAuthoritativeRepairPeersByDelay = new Map();
3892
+ this._assumeSyncedRepairSuppressedUntil = 0;
3893
+ this._appendBackfillTimer = undefined;
3894
+ this._appendBackfillPendingByTarget = new Map();
3895
+ this._repairMetrics = createRepairMetrics();
2966
3896
  this._topicSubscribersCache = new Map();
2967
3897
  this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2968
3898
  this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
@@ -3041,7 +3971,10 @@ export class SharedLog<
3041
3971
  this.pendingMaturity = new Map();
3042
3972
 
3043
3973
  const id = sha256Base64Sync(this.log.id);
3044
- const storage = await this.node.storage.sublevel(id);
3974
+ const [storage, logScope] = await Promise.all([
3975
+ this.node.storage.sublevel(id),
3976
+ this.node.indexer.scope(id),
3977
+ ]);
3045
3978
 
3046
3979
  const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
3047
3980
  const fanoutService = getSharedLogFanoutService(this.node.services);
@@ -3104,20 +4037,19 @@ export class SharedLog<
3104
4037
  },
3105
4038
  });
3106
4039
 
3107
- await this.remoteBlocks.start();
3108
-
3109
- const logScope = await this.node.indexer.scope(id);
3110
- const replicationIndex = await logScope.scope("replication");
4040
+ const remoteBlocksStartPromise = this.remoteBlocks.start();
4041
+ const [replicationIndex, logIndex] = await Promise.all([
4042
+ logScope.scope("replication"),
4043
+ logScope.scope("log"),
4044
+ ]);
3111
4045
  this._replicationRangeIndex = await replicationIndex.init({
3112
4046
  schema: this.indexableDomain.constructorRange,
3113
4047
  });
3114
-
3115
4048
  this._entryCoordinatesIndex = await replicationIndex.init({
3116
4049
  schema: this.indexableDomain.constructorEntry,
3117
4050
  });
3118
4051
 
3119
- const logIndex = await logScope.scope("log");
3120
-
4052
+ await remoteBlocksStartPromise;
3121
4053
  const hasIndexedReplicationInfo =
3122
4054
  (await this.replicationIndex.count({
3123
4055
  query: [
@@ -3279,47 +4211,50 @@ export class SharedLog<
3279
4211
  }
3280
4212
 
3281
4213
  // Open for communcation
3282
- await this.rpc.open({
3283
- queryType: TransportMessage,
3284
- responseType: TransportMessage,
3285
- responseHandler: (query, context) => this.onMessage(query, context),
3286
- topic: this.topic,
3287
- });
3288
-
3289
4214
  this._onSubscriptionFn =
3290
4215
  this._onSubscriptionFn || this._onSubscription.bind(this);
3291
- await this.node.services.pubsub.addEventListener(
3292
- "subscribe",
3293
- this._onSubscriptionFn,
3294
- );
3295
-
3296
4216
  this._onUnsubscriptionFn =
3297
4217
  this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
3298
- await this.node.services.pubsub.addEventListener(
3299
- "unsubscribe",
3300
- this._onUnsubscriptionFn,
3301
- );
3302
-
3303
- await this.rpc.subscribe();
3304
- await this._openFanoutChannel(options?.fanout);
4218
+ await Promise.all([
4219
+ this.rpc.open({
4220
+ queryType: TransportMessage,
4221
+ responseType: TransportMessage,
4222
+ responseHandler: (query, context) => this.onMessage(query, context),
4223
+ topic: this.topic,
4224
+ }),
4225
+ this.node.services.pubsub.addEventListener(
4226
+ "subscribe",
4227
+ this._onSubscriptionFn,
4228
+ ),
4229
+ this.node.services.pubsub.addEventListener(
4230
+ "unsubscribe",
4231
+ this._onUnsubscriptionFn,
4232
+ ),
4233
+ ]);
3305
4234
 
3306
- // mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
3307
- await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
4235
+ const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
4236
+ // Mark previously-owned replication ranges as "new" only when they already exist.
4237
+ // Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
4238
+ const updateOwnedReplicationPromise = hasIndexedReplicationInfo
4239
+ ? this.updateTimestampOfOwnedReplicationRanges()
4240
+ : Promise.resolve();
4241
+ await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
3308
4242
 
3309
4243
  // if we had a previous session with replication info, and new replication info dictates that we unreplicate
3310
4244
  // we should do that. Otherwise if options is a unreplication we dont need to do anything because
3311
4245
  // we are already unreplicated (as we are just opening)
3312
4246
 
3313
- let isUnreplicationOptionsDefined = isUnreplicationOptions(
4247
+ const isUnreplicationOptionsDefined = isUnreplicationOptions(
3314
4248
  options?.replicate,
3315
4249
  );
3316
4250
 
3317
4251
  const canResumeReplication =
4252
+ hasIndexedReplicationInfo &&
3318
4253
  (await isReplicationOptionsDependentOnPreviousState(
3319
4254
  options?.replicate,
3320
4255
  this.replicationIndex,
3321
4256
  this.node.identity.publicKey,
3322
- )) && hasIndexedReplicationInfo;
4257
+ ));
3323
4258
 
3324
4259
  if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
3325
4260
  await this.replicate(options?.replicate, { checkDuplicates: true });
@@ -3372,25 +4307,26 @@ export class SharedLog<
3372
4307
 
3373
4308
  async afterOpen(): Promise<void> {
3374
4309
  await super.afterOpen();
4310
+ const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
3375
4311
 
3376
4312
  // We do this here, because these calls requires this.closed == false
3377
- void this.pruneOfflineReplicators()
3378
- .then(() => {
3379
- this._replicatorsReconciled = true;
3380
- })
4313
+ void this.pruneOfflineReplicators()
4314
+ .then(() => {
4315
+ this._replicatorsReconciled = true;
4316
+ })
3381
4317
  .catch((error) => {
3382
4318
  if (isNotStartedError(error as Error)) {
3383
4319
  return;
3384
4320
  }
3385
- logger.error(error);
3386
- });
4321
+ logger.error(error);
4322
+ });
3387
4323
 
3388
- this.startReplicatorLivenessSweep();
4324
+ this.startReplicatorLivenessSweep();
3389
4325
 
3390
- await this.rebalanceParticipation();
4326
+ await this.rebalanceParticipation();
3391
4327
 
3392
4328
  // Take into account existing subscription
3393
- (await this._getTopicSubscribers(this.topic))?.forEach((v) => {
4329
+ (await existingSubscribersPromise)?.forEach((v) => {
3394
4330
  if (v.equals(this.node.identity.publicKey)) {
3395
4331
  return;
3396
4332
  }
@@ -4021,8 +4957,28 @@ export class SharedLog<
4021
4957
  this._repairRetryTimers.clear();
4022
4958
  this._recentRepairDispatch.clear();
4023
4959
  this._repairSweepRunning = false;
4024
- this._repairSweepForceFreshPending = false;
4025
- this._repairSweepAddedPeersPending.clear();
4960
+ this._repairSweepPendingModes.clear();
4961
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
4962
+ peers.clear();
4963
+ }
4964
+ this._repairSweepOptimisticGidPeersPending.clear();
4965
+ this._entryKnownPeers.clear();
4966
+ for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
4967
+ clearTimeout(timer);
4968
+ }
4969
+ this._joinAuthoritativeRepairTimersByDelay.clear();
4970
+ this._joinAuthoritativeRepairPeersByDelay.clear();
4971
+ for (const targets of this._repairFrontierByMode.values()) {
4972
+ targets.clear();
4973
+ }
4974
+ for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
4975
+ targets.clear();
4976
+ }
4977
+ if (this._appendBackfillTimer) {
4978
+ clearTimeout(this._appendBackfillTimer);
4979
+ this._appendBackfillTimer = undefined;
4980
+ }
4981
+ this._appendBackfillPendingByTarget.clear();
4026
4982
 
4027
4983
  for (const [_k, v] of this._pendingDeletes) {
4028
4984
  v.clear();
@@ -4196,6 +5152,8 @@ export class SharedLog<
4196
5152
  */
4197
5153
 
4198
5154
  const { heads } = msg;
5155
+ const isRepairHint =
5156
+ (msg.reserved[0] & EXCHANGE_HEADS_REPAIR_HINT) !== 0;
4199
5157
 
4200
5158
  logger.trace(
4201
5159
  `${this.node.identity.publicKey.hashcode()}: Recieved heads: ${
@@ -4205,6 +5163,7 @@ export class SharedLog<
4205
5163
 
4206
5164
  if (heads) {
4207
5165
  const filteredHeads: EntryWithRefs<any>[] = [];
5166
+ const confirmedHashes = new Set<string>();
4208
5167
  for (const head of heads) {
4209
5168
  if (!(await this.log.has(head.entry.hash))) {
4210
5169
  head.entry.init({
@@ -4213,10 +5172,22 @@ export class SharedLog<
4213
5172
  encoding: this.log.encoding,
4214
5173
  });
4215
5174
  filteredHeads.push(head);
5175
+ } else {
5176
+ confirmedHashes.add(head.entry.hash);
4216
5177
  }
4217
5178
  }
5179
+ const fromIsSelf = context.from.equals(this.node.identity.publicKey);
5180
+ if (!fromIsSelf) {
5181
+ this.markEntriesKnownByPeer(
5182
+ heads.map((head) => head.entry.hash),
5183
+ context.from.hashcode(),
5184
+ );
5185
+ }
4218
5186
 
4219
5187
  if (filteredHeads.length === 0) {
5188
+ if (confirmedHashes.size > 0 && !fromIsSelf) {
5189
+ await this.sendRepairConfirmation(context.from!, confirmedHashes);
5190
+ }
4220
5191
  return;
4221
5192
  }
4222
5193
  const groupedByGid = await groupByGid(filteredHeads);
@@ -4303,8 +5274,15 @@ export class SharedLog<
4303
5274
 
4304
5275
  let maybeDelete: EntryWithRefs<any>[][] | undefined;
4305
5276
  let toMerge: Entry<any>[] = [];
5277
+ let toPersist: Entry<any>[] = [];
4306
5278
  let toDelete: Entry<any>[] | undefined;
4307
- if (isLeader) {
5279
+ // Targeted repair is sent only to peers the sender currently believes
5280
+ // should store the entry. Accept it while local membership catches up;
5281
+ // the normal checked-prune path below can still remove it if this peer
5282
+ // truly no longer owns the entry.
5283
+ const acceptsTargetedRepair = isRepairHint && fromIsLeader;
5284
+ const keepAsLeader = isLeader || acceptsTargetedRepair;
5285
+ if (keepAsLeader) {
4308
5286
  for (const entry of entries) {
4309
5287
  this.pruneDebouncedFn.delete(entry.entry.hash);
4310
5288
  this.removePruneRequestSent(entry.entry.hash);
@@ -4325,8 +5303,9 @@ export class SharedLog<
4325
5303
  }
4326
5304
 
4327
5305
  outer: for (const entry of entries) {
4328
- if (isLeader || (await this.keep?.(entry.entry))) {
5306
+ if (keepAsLeader || (await this.keep?.(entry.entry))) {
4329
5307
  toMerge.push(entry.entry);
5308
+ toPersist.push(entry.entry);
4330
5309
  } else {
4331
5310
  for (const ref of entry.gidRefrences) {
4332
5311
  const map = await this.log.entryIndex.getHeads(ref).all();
@@ -4350,7 +5329,25 @@ export class SharedLog<
4350
5329
  }
4351
5330
 
4352
5331
  if (toMerge.length > 0) {
5332
+ this.markEntriesKnownByPeer(
5333
+ toMerge.map((entry) => entry.hash),
5334
+ context.from!.hashcode(),
5335
+ );
4353
5336
  await this.log.join(toMerge);
5337
+ // Network joins bypass SharedLog.join(), but churn repair scans
5338
+ // the coordinate index to redistribute entries after membership changes.
5339
+ for (const entry of toPersist) {
5340
+ const replicas = decodeReplicas(entry).getValue(this);
5341
+ await this.findLeaders(
5342
+ await this.createCoordinates(entry, replicas),
5343
+ entry,
5344
+ { roleAge: 0, persist: {} },
5345
+ );
5346
+ }
5347
+ for (const merged of toMerge) {
5348
+ confirmedHashes.add(merged.hash);
5349
+ }
5350
+ await this.pruneJoinedEntriesNoLongerLed(toMerge);
4354
5351
 
4355
5352
  toDelete?.map((x) =>
4356
5353
  // TODO types
@@ -4397,6 +5394,10 @@ export class SharedLog<
4397
5394
  promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
4398
5395
  }
4399
5396
  await Promise.all(promises);
5397
+ if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
5398
+ this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
5399
+ await this.sendRepairConfirmation(context.from!, confirmedHashes);
5400
+ }
4400
5401
  }
4401
5402
  } else if (msg instanceof RequestIPrune) {
4402
5403
  const hasAndIsLeader: string[] = [];
@@ -4404,6 +5405,7 @@ export class SharedLog<
4404
5405
 
4405
5406
  for (const hash of msg.hashes) {
4406
5407
  this.removePruneRequestSent(hash, from);
5408
+ this.removeEntriesKnownByPeer([hash], from);
4407
5409
 
4408
5410
  // if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
4409
5411
  // this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
@@ -4416,7 +5418,11 @@ export class SharedLog<
4416
5418
  const indexedEntry = await this.log.entryIndex.getShallow(hash);
4417
5419
  let isLeader = false;
4418
5420
 
4419
- if (indexedEntry) {
5421
+ if (
5422
+ indexedEntry &&
5423
+ !this._pendingDeletes.has(hash) &&
5424
+ (await this.log.blocks.has(hash))
5425
+ ) {
4420
5426
  this.removePeerFromGidPeerHistory(
4421
5427
  context.from!.hashcode(),
4422
5428
  indexedEntry!.value.meta.gid,
@@ -4518,6 +5524,10 @@ export class SharedLog<
4518
5524
  for (const hash of msg.hashes) {
4519
5525
  this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
4520
5526
  }
5527
+ } else if (msg instanceof ConfirmEntriesMessage) {
5528
+ this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
5529
+ this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
5530
+ return;
4521
5531
  } else if (await this.syncronizer.onMessage(msg, context)) {
4522
5532
  return; // the syncronizer has handled the message
4523
5533
  } else if (msg instanceof BlocksMessage) {
@@ -4948,6 +5958,11 @@ export class SharedLog<
4948
5958
  let messageToSend: AddedReplicationSegmentMessage | undefined = undefined;
4949
5959
 
4950
5960
  if (assumeSynced) {
5961
+ // `assumeSynced` is an explicit contract that this join should trust the
5962
+ // supplied history and avoid initiating outbound repair while the local
5963
+ // replication ranges settle.
5964
+ this._assumeSyncedRepairSuppressedUntil =
5965
+ Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
4951
5966
  for (const entry of entriesToReplicate) {
4952
5967
  await seedAssumeSyncedPeerHistory(entry);
4953
5968
  }
@@ -5033,9 +6048,14 @@ export class SharedLog<
5033
6048
  clear();
5034
6049
  // `waitForReplicator()` is typically used as a precondition before join/replicate
5035
6050
  // flows. A replicator can become mature and enqueue a debounced rebalance
5036
- // (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
5037
- // observe a "late" rebalance after the wait resolves.
5038
- await this.replicationChangeDebounceFn?.flush?.();
6051
+ // (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
6052
+ // make membership waits depend on all rebalance work finishing; callers that
6053
+ // need settled distribution already wait for that explicitly.
6054
+ this.replicationChangeDebounceFn?.flush?.().catch((error: any) => {
6055
+ if (!isNotStartedError(error)) {
6056
+ logger.error(error?.toString?.() ?? String(error));
6057
+ }
6058
+ });
5039
6059
  deferred.resolve();
5040
6060
  };
5041
6061
 
@@ -5580,6 +6600,18 @@ export class SharedLog<
5580
6600
  }
5581
6601
  }
5582
6602
  }
6603
+
6604
+ if (!options?.candidates) {
6605
+ const fullReplicaLeaders = await this.findFullReplicaLeaders(
6606
+ cursors.length,
6607
+ roleAge,
6608
+ peerFilter,
6609
+ );
6610
+ if (fullReplicaLeaders) {
6611
+ return fullReplicaLeaders;
6612
+ }
6613
+ }
6614
+
5583
6615
  return getSamples<R>(
5584
6616
  cursors,
5585
6617
  this.replicationIndex,
@@ -5592,6 +6624,50 @@ export class SharedLog<
5592
6624
  );
5593
6625
  }
5594
6626
 
6627
+ private async findFullReplicaLeaders(
6628
+ replicas: number,
6629
+ roleAge: number,
6630
+ peerFilter?: Set<string>,
6631
+ ): Promise<Map<string, { intersecting: boolean }> | undefined> {
6632
+ const now = Date.now();
6633
+ const leaders = new Map<string, { intersecting: boolean }>();
6634
+ const includeStrict =
6635
+ this._logProperties?.strictFullReplicaFallback !== false;
6636
+ const iterator = this.replicationIndex.iterate(
6637
+ {},
6638
+ { shape: { hash: true, timestamp: true, mode: true } },
6639
+ );
6640
+
6641
+ try {
6642
+ for (;;) {
6643
+ const batch = await iterator.next(64);
6644
+ if (batch.length === 0) {
6645
+ break;
6646
+ }
6647
+ for (const result of batch) {
6648
+ const range = result.value;
6649
+ if (peerFilter && !peerFilter.has(range.hash)) {
6650
+ continue;
6651
+ }
6652
+ if (!isMatured(range, now, roleAge)) {
6653
+ continue;
6654
+ }
6655
+ if (range.mode === ReplicationIntent.Strict && !includeStrict) {
6656
+ continue;
6657
+ }
6658
+ leaders.set(range.hash, { intersecting: true });
6659
+ if (leaders.size > replicas) {
6660
+ return undefined;
6661
+ }
6662
+ }
6663
+ }
6664
+ } finally {
6665
+ await iterator.close();
6666
+ }
6667
+
6668
+ return leaders.size > 0 ? leaders : undefined;
6669
+ }
6670
+
5595
6671
  async findLeadersFromEntry(
5596
6672
  entry: ShallowOrFullEntry<any> | EntryReplicated<R>,
5597
6673
  replicas: number,
@@ -6231,16 +7307,33 @@ export class SharedLog<
6231
7307
 
6232
7308
  const changed = false;
6233
7309
  const addedPeers = new Set<string>();
7310
+ const authoritativeRepairPeers = new Set<string>();
6234
7311
  const warmupPeers = new Set<string>();
7312
+ const churnRepairPeers = new Set<string>();
6235
7313
  const hasSelfWarmupChange = changes.some(
6236
7314
  (change) =>
6237
7315
  change.range.hash === selfHash &&
6238
7316
  (change.type === "added" || change.type === "replaced"),
6239
7317
  );
7318
+ const hasSelfRangeRemoval = changes.some(
7319
+ (change) =>
7320
+ change.range.hash === selfHash &&
7321
+ (change.type === "removed" || change.type === "replaced"),
7322
+ );
6240
7323
  for (const change of changes) {
7324
+ if (
7325
+ change.range.hash !== selfHash &&
7326
+ (change.type === "removed" || change.type === "replaced")
7327
+ ) {
7328
+ this.removePeerFromEntryKnownPeers(change.range.hash);
7329
+ }
6241
7330
  if (change.type === "added" || change.type === "replaced") {
6242
7331
  const hash = change.range.hash;
6243
7332
  if (hash !== selfHash) {
7333
+ // Existing peers can widen/shift ranges after the initial join. If we
7334
+ // only rescan on first-seen "added", late authoritative range updates can
7335
+ // leave historical backfill permanently partial under load.
7336
+ authoritativeRepairPeers.add(hash);
6244
7337
  // Range updates can reassign entries to an existing peer shortly after it
6245
7338
  // already received a subset. Avoid suppressing legitimate follow-up repair.
6246
7339
  this._recentRepairDispatch.delete(hash);
@@ -6277,26 +7370,34 @@ export class SharedLog<
6277
7370
  string,
6278
7371
  Map<string, EntryReplicated<any>>
6279
7372
  > = new Map();
6280
- const flushUncheckedDeliverTarget = (target: string) => {
6281
- const entries = uncheckedDeliver.get(target);
6282
- if (!entries || entries.size === 0) {
6283
- return;
6284
- }
7373
+ const flushUncheckedDeliverTarget = (target: string) => {
7374
+ const entries = uncheckedDeliver.get(target);
7375
+ if (!entries || entries.size === 0) {
7376
+ return;
7377
+ }
6285
7378
  const isWarmupTarget = warmupPeers.has(target);
6286
- const bypassRecentDedupe = isWarmupTarget || forceFreshDelivery;
7379
+ const mode: RepairDispatchMode = forceFreshDelivery
7380
+ ? "churn"
7381
+ : isWarmupTarget
7382
+ ? "join-warmup"
7383
+ : "join-authoritative";
6287
7384
  this.dispatchMaybeMissingEntries(target, entries, {
6288
- bypassRecentDedupe,
6289
- retryScheduleMs: isWarmupTarget
6290
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
6291
- : undefined,
6292
- forceFreshDelivery,
7385
+ bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
7386
+ mode,
7387
+ retryScheduleMs:
7388
+ mode === "join-warmup"
7389
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
7390
+ : mode === "join-authoritative"
7391
+ ? [0]
7392
+ : undefined,
6293
7393
  });
6294
- uncheckedDeliver.delete(target);
6295
- };
7394
+ uncheckedDeliver.delete(target);
7395
+ };
6296
7396
  const queueUncheckedDeliver = (
6297
7397
  target: string,
6298
7398
  entry: EntryReplicated<any>,
6299
7399
  ) => {
7400
+ churnRepairPeers.add(target);
6300
7401
  let set = uncheckedDeliver.get(target);
6301
7402
  if (!set) {
6302
7403
  set = new Map();
@@ -6320,74 +7421,85 @@ export class SharedLog<
6320
7421
  forceFresh: forceFreshDelivery || useJoinWarmupFastPath,
6321
7422
  },
6322
7423
  )) {
6323
- if (this.closed) {
6324
- break;
6325
- }
6326
-
6327
- if (useJoinWarmupFastPath) {
6328
- let oldPeersSet: Set<string> | undefined;
6329
- const gid = entryReplicated.gid;
6330
- oldPeersSet = gidPeersHistorySnapshot.get(gid);
6331
- if (!gidPeersHistorySnapshot.has(gid)) {
6332
- const existing = this._gidPeersHistory.get(gid);
6333
- oldPeersSet = existing ? new Set(existing) : undefined;
6334
- gidPeersHistorySnapshot.set(gid, oldPeersSet);
7424
+ if (this.closed) {
7425
+ break;
6335
7426
  }
6336
7427
 
6337
- for (const target of warmupPeers) {
6338
- queueUncheckedDeliver(target, entryReplicated);
6339
- }
7428
+ if (useJoinWarmupFastPath) {
7429
+ let oldPeersSet: Set<string> | undefined;
7430
+ const gid = entryReplicated.gid;
7431
+ oldPeersSet = gidPeersHistorySnapshot.get(gid);
7432
+ if (!gidPeersHistorySnapshot.has(gid)) {
7433
+ const existing = this._gidPeersHistory.get(gid);
7434
+ oldPeersSet = existing ? new Set(existing) : undefined;
7435
+ gidPeersHistorySnapshot.set(gid, oldPeersSet);
7436
+ }
6340
7437
 
6341
- const candidatePeers = new Set<string>([selfHash]);
6342
- for (const target of warmupPeers) {
6343
- candidatePeers.add(target);
6344
- }
6345
- if (oldPeersSet) {
6346
- for (const oldPeer of oldPeersSet) {
6347
- candidatePeers.add(oldPeer);
7438
+ for (const target of warmupPeers) {
7439
+ queueUncheckedDeliver(target, entryReplicated);
6348
7440
  }
6349
- }
6350
7441
 
6351
- const currentPeers = await this.findLeaders(
6352
- entryReplicated.coordinates,
6353
- entryReplicated,
6354
- {
6355
- roleAge: 0,
6356
- candidates: candidatePeers,
6357
- persist: false,
6358
- },
6359
- );
7442
+ const candidatePeers = new Set<string>([selfHash]);
7443
+ for (const target of warmupPeers) {
7444
+ candidatePeers.add(target);
7445
+ }
7446
+ if (oldPeersSet) {
7447
+ for (const oldPeer of oldPeersSet) {
7448
+ candidatePeers.add(oldPeer);
7449
+ }
7450
+ }
6360
7451
 
6361
- if (oldPeersSet) {
6362
- for (const oldPeer of oldPeersSet) {
6363
- if (!currentPeers.has(oldPeer)) {
6364
- this.removePruneRequestSent(entryReplicated.hash);
7452
+ const currentPeers = await this.findLeaders(
7453
+ entryReplicated.coordinates,
7454
+ entryReplicated,
7455
+ {
7456
+ roleAge: 0,
7457
+ candidates: candidatePeers,
7458
+ persist: false,
7459
+ },
7460
+ );
7461
+
7462
+ if (oldPeersSet) {
7463
+ for (const oldPeer of oldPeersSet) {
7464
+ if (!currentPeers.has(oldPeer)) {
7465
+ this.removePruneRequestSent(entryReplicated.hash);
7466
+ }
6365
7467
  }
6366
7468
  }
6367
- }
6368
7469
 
6369
- this.addPeersToGidPeerHistory(
6370
- entryReplicated.gid,
6371
- currentPeers.keys(),
6372
- true,
6373
- );
7470
+ for (const [peer] of currentPeers) {
7471
+ if (warmupPeers.has(peer)) {
7472
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
7473
+ }
7474
+ }
6374
7475
 
6375
- if (!currentPeers.has(selfHash)) {
6376
- this.pruneDebouncedFnAddIfNotKeeping({
6377
- key: entryReplicated.hash,
6378
- value: { entry: entryReplicated, leaders: currentPeers },
6379
- });
7476
+ const authoritativePeers = [...currentPeers.keys()].filter(
7477
+ (peer) =>
7478
+ !warmupPeers.has(peer) &&
7479
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
7480
+ );
7481
+ this.addPeersToGidPeerHistory(
7482
+ entryReplicated.gid,
7483
+ authoritativePeers,
7484
+ true,
7485
+ );
6380
7486
 
6381
- this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
6382
- } else {
6383
- this.pruneDebouncedFn.delete(entryReplicated.hash);
6384
- await this._pendingDeletes
6385
- .get(entryReplicated.hash)
6386
- ?.reject(new Error("Failed to delete, is leader again"));
6387
- this.removePruneRequestSent(entryReplicated.hash);
7487
+ if (!currentPeers.has(selfHash)) {
7488
+ this.pruneDebouncedFnAddIfNotKeeping({
7489
+ key: entryReplicated.hash,
7490
+ value: { entry: entryReplicated, leaders: currentPeers },
7491
+ });
7492
+
7493
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
7494
+ } else {
7495
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
7496
+ await this._pendingDeletes
7497
+ .get(entryReplicated.hash)
7498
+ ?.reject(new Error("Failed to delete, is leader again"));
7499
+ this.removePruneRequestSent(entryReplicated.hash);
7500
+ }
7501
+ continue;
6388
7502
  }
6389
- continue;
6390
- }
6391
7503
 
6392
7504
  let oldPeersSet: Set<string> | undefined;
6393
7505
  const gid = entryReplicated.gid;
@@ -6421,19 +7533,30 @@ export class SharedLog<
6421
7533
  }
6422
7534
  }
6423
7535
 
6424
- if (oldPeersSet) {
6425
- for (const oldPeer of oldPeersSet) {
6426
- if (!currentPeers.has(oldPeer)) {
6427
- this.removePruneRequestSent(entryReplicated.hash);
7536
+ if (oldPeersSet) {
7537
+ for (const oldPeer of oldPeersSet) {
7538
+ if (!currentPeers.has(oldPeer)) {
7539
+ this.removePruneRequestSent(entryReplicated.hash);
7540
+ }
6428
7541
  }
6429
7542
  }
6430
- }
6431
7543
 
6432
- this.addPeersToGidPeerHistory(
6433
- entryReplicated.gid,
6434
- currentPeers.keys(),
6435
- true,
6436
- );
7544
+ for (const [peer] of currentPeers) {
7545
+ if (addedPeers.has(peer)) {
7546
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
7547
+ }
7548
+ }
7549
+
7550
+ const authoritativePeers = [...currentPeers.keys()].filter(
7551
+ (peer) =>
7552
+ !addedPeers.has(peer) &&
7553
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
7554
+ );
7555
+ this.addPeersToGidPeerHistory(
7556
+ entryReplicated.gid,
7557
+ authoritativePeers,
7558
+ true,
7559
+ );
6437
7560
 
6438
7561
  if (!isLeader) {
6439
7562
  this.pruneDebouncedFnAddIfNotKeeping({
@@ -6452,9 +7575,18 @@ export class SharedLog<
6452
7575
  }
6453
7576
  }
6454
7577
 
7578
+ if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
7579
+ await this.pruneIndexedEntriesNoLongerLed();
7580
+ }
7581
+
6455
7582
  if (forceFreshDelivery) {
6456
- // Removed/shrunk ranges still need the authoritative background pass.
6457
- this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
7583
+ // Pure leave/shrink churn can have zero `addedPeers`, but the peers that
7584
+ // received redistributed entries still need a follow-up repair pass if the
7585
+ // immediate maybe-sync misses one entry.
7586
+ this.scheduleRepairSweep({
7587
+ mode: "churn",
7588
+ peers: churnRepairPeers,
7589
+ });
6458
7590
  } else if (useJoinWarmupFastPath) {
6459
7591
  // Pure join warmup uses the cheap immediate maybe-missing dispatch above,
6460
7592
  // then defers the authoritative sweep so it does not compete with the
@@ -6466,19 +7598,23 @@ export class SharedLog<
6466
7598
  return;
6467
7599
  }
6468
7600
  this.scheduleRepairSweep({
6469
- forceFreshDelivery: false,
6470
- addedPeers: peers,
7601
+ mode: "join-warmup",
7602
+ peers,
6471
7603
  });
6472
7604
  }, 250);
6473
7605
  timer.unref?.();
6474
7606
  this._repairRetryTimers.add(timer);
6475
- } else if (addedPeers.size > 0) {
7607
+ } else if (authoritativeRepairPeers.size > 0) {
6476
7608
  this.scheduleRepairSweep({
6477
- forceFreshDelivery: false,
6478
- addedPeers,
7609
+ mode: "join-authoritative",
7610
+ peers: authoritativeRepairPeers,
6479
7611
  });
6480
7612
  }
6481
7613
 
7614
+ if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
7615
+ this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
7616
+ }
7617
+
6482
7618
  for (const target of [...uncheckedDeliver.keys()]) {
6483
7619
  flushUncheckedDeliverTarget(target);
6484
7620
  }
@@ -6585,6 +7721,13 @@ export class SharedLog<
6585
7721
  return; // not allowed to replicate
6586
7722
  }
6587
7723
 
7724
+ if (
7725
+ this.replicationController.maxMemoryLimit != null &&
7726
+ usedMemory > this.replicationController.maxMemoryLimit
7727
+ ) {
7728
+ await this.pruneIndexedEntriesNoLongerLed();
7729
+ }
7730
+
6588
7731
  const peersSize = (await peers.getSize()) || 1;
6589
7732
  const totalParticipation = await this.calculateTotalParticipation();
6590
7733