@peerbit/shared-log 13.0.24 → 13.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -168,7 +168,7 @@ import type {
168
168
  Syncronizer,
169
169
  } from "./sync/index.js";
170
170
  import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
171
- import { SimpleSyncronizer } from "./sync/simple.js";
171
+ import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
172
172
  import { groupByGid } from "./utils.js";
173
173
 
174
174
  const toLocalPublicSignKey = (
@@ -468,6 +468,7 @@ export type SharedLogOptions<
468
468
  waitForReplicatorRequestMaxAttempts?: number;
469
469
  waitForPruneDelay?: number;
470
470
  distributionDebounceTime?: number;
471
+ strictFullReplicaFallback?: boolean;
471
472
  compatibility?: number;
472
473
  domain?: ReplicationDomainConstructor<D>;
473
474
  eagerBlocks?: boolean | { cacheSize?: number };
@@ -513,10 +514,143 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
513
514
  // Churn/join repair can race with pruning and transient missed sync requests under
514
515
  // heavy event-loop load. Keep retries alive with a longer tail so reassigned
515
516
  // entries are retried after short bursts and slower recovery windows.
516
- const FORCE_FRESH_RETRY_SCHEDULE_MS = [
517
+ const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
517
518
  0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
518
519
  ];
519
- const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000, 15_000];
520
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
521
+ 0,
522
+ 1_000,
523
+ 3_000,
524
+ 7_000,
525
+ 15_000,
526
+ 30_000,
527
+ 60_000,
528
+ ];
529
+ const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
530
+ 0,
531
+ 1_000,
532
+ 3_000,
533
+ 7_000,
534
+ 15_000,
535
+ 30_000,
536
+ 60_000,
537
+ ];
538
+ const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
539
+ const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
540
+ const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
541
+ JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
542
+ 7_000,
543
+ 15_000,
544
+ 30_000,
545
+ ];
546
+ const APPEND_BACKFILL_DELAY_MS = 500;
547
+ const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
548
+ const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
549
+
550
+ type RepairDispatchMode =
551
+ | "join-warmup"
552
+ | "join-authoritative"
553
+ | "append-backfill"
554
+ | "churn";
555
+ type RepairTransportMode = "rateless" | "simple";
556
+ type RepairMetricBucket = {
557
+ dispatches: number;
558
+ entries: number;
559
+ ratelessFirstPasses: number;
560
+ simpleFallbackPasses: number;
561
+ };
562
+ type RepairMetrics = Record<RepairDispatchMode, RepairMetricBucket>;
563
+
564
+ const REPAIR_DISPATCH_MODES: RepairDispatchMode[] = [
565
+ "join-warmup",
566
+ "join-authoritative",
567
+ "append-backfill",
568
+ "churn",
569
+ ];
570
+
571
+ const createRepairMetricBucket = (): RepairMetricBucket => ({
572
+ dispatches: 0,
573
+ entries: 0,
574
+ ratelessFirstPasses: 0,
575
+ simpleFallbackPasses: 0,
576
+ });
577
+
578
+ const createRepairMetrics = (): RepairMetrics => ({
579
+ "join-warmup": createRepairMetricBucket(),
580
+ "join-authoritative": createRepairMetricBucket(),
581
+ "append-backfill": createRepairMetricBucket(),
582
+ churn: createRepairMetricBucket(),
583
+ });
584
+
585
+ const createRepairPendingPeersByMode = () =>
586
+ new Map<RepairDispatchMode, Set<string>>(
587
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set<string>()]),
588
+ );
589
+
590
+ const cloneRepairPendingPeersByMode = (
591
+ pending: Map<RepairDispatchMode, Set<string>>,
592
+ ) =>
593
+ new Map<RepairDispatchMode, Set<string>>(
594
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]),
595
+ );
596
+
597
+ const createRepairFrontierByMode = () =>
598
+ new Map<
599
+ RepairDispatchMode,
600
+ Map<string, Map<string, EntryReplicated<any>>>
601
+ >(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
602
+
603
+ const createRepairActiveTargetsByMode = () =>
604
+ new Map<RepairDispatchMode, Set<string>>(
605
+ REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]),
606
+ );
607
+
608
+ const getRepairRetrySchedule = (mode: RepairDispatchMode) => {
609
+ switch (mode) {
610
+ case "join-warmup":
611
+ return JOIN_WARMUP_RETRY_SCHEDULE_MS;
612
+ case "join-authoritative":
613
+ return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
614
+ case "append-backfill":
615
+ return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
616
+ case "churn":
617
+ return CHURN_REPAIR_RETRY_SCHEDULE_MS;
618
+ }
619
+ };
620
+
621
+ const resolveRepairRetrySchedule = (
622
+ mode: RepairDispatchMode,
623
+ override?: number[],
624
+ trackedFrontier = false,
625
+ ) => {
626
+ const fallback = getRepairRetrySchedule(mode);
627
+ if (!override || override.length === 0) {
628
+ return fallback;
629
+ }
630
+ if (
631
+ trackedFrontier &&
632
+ override.length === 1 &&
633
+ override[0] === 0 &&
634
+ fallback.length > 1
635
+ ) {
636
+ // A tracked frontier with only an immediate retry would otherwise stay on
637
+ // attempt 0 forever, which means rateless-only retries and no sparse-tail
638
+ // simple fallback. Keep the immediate seed, then continue with the normal
639
+ // tracked repair schedule.
640
+ return [0, ...fallback.slice(1)];
641
+ }
642
+ return override;
643
+ };
644
+
645
+ const getRepairTransportForAttempt = (
646
+ mode: RepairDispatchMode,
647
+ attemptIndex: number,
648
+ ): RepairTransportMode => {
649
+ if (mode === "churn") {
650
+ return "simple";
651
+ }
652
+ return attemptIndex === 0 ? "rateless" : "simple";
653
+ };
520
654
 
521
655
  const toPositiveInteger = (
522
656
  value: number | undefined,
@@ -751,8 +885,24 @@ export class SharedLog<
751
885
  private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
752
886
  private _recentRepairDispatch!: Map<string, Map<string, number>>;
753
887
  private _repairSweepRunning!: boolean;
754
- private _repairSweepForceFreshPending!: boolean;
755
- private _repairSweepAddedPeersPending!: Set<string>;
888
+ private _repairSweepPendingModes!: Set<RepairDispatchMode>;
889
+ private _repairSweepPendingPeersByMode!: Map<RepairDispatchMode, Set<string>>;
890
+ private _repairFrontierByMode!: Map<
891
+ RepairDispatchMode,
892
+ Map<string, Map<string, EntryReplicated<R>>>
893
+ >;
894
+ private _repairFrontierActiveTargetsByMode!: Map<RepairDispatchMode, Set<string>>;
895
+ private _repairSweepOptimisticGidPeersPending!: Map<string, Map<string, number>>;
896
+ private _entryKnownPeers!: Map<string, Set<string>>;
897
+ private _joinAuthoritativeRepairTimersByDelay!: Map<
898
+ number,
899
+ ReturnType<typeof setTimeout>
900
+ >;
901
+ private _joinAuthoritativeRepairPeersByDelay!: Map<number, Set<string>>;
902
+ private _assumeSyncedRepairSuppressedUntil!: number;
903
+ private _appendBackfillTimer?: ReturnType<typeof setTimeout>;
904
+ private _appendBackfillPendingByTarget!: Map<string, Map<string, EntryReplicated<R>>>;
905
+ private _repairMetrics!: RepairMetrics;
756
906
  private _topicSubscribersCache!: Map<
757
907
  string,
758
908
  { expiresAt: number; keys: PublicSignKey[] }
@@ -1187,6 +1337,7 @@ export class SharedLog<
1187
1337
 
1188
1338
  private async _appendDeliverToReplicators(
1189
1339
  entry: Entry<T>,
1340
+ coordinates: NumberFromType<R>[],
1190
1341
  minReplicasValue: number,
1191
1342
  leaders: Map<string, any>,
1192
1343
  selfHash: string,
@@ -1204,11 +1355,35 @@ export class SharedLog<
1204
1355
  ? { timeoutMs: delivery.timeout, signal: delivery.signal }
1205
1356
  : undefined;
1206
1357
 
1358
+ const fullReplicaDeliveryCandidates =
1359
+ await this.getFullReplicaRepairCandidates(undefined, {
1360
+ includeSubscribers: false,
1361
+ });
1362
+ if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
1363
+ for (const peer of fullReplicaDeliveryCandidates) {
1364
+ if (!leaders.has(peer)) {
1365
+ leaders.set(peer, { intersecting: true });
1366
+ }
1367
+ }
1368
+ }
1369
+
1370
+ const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
1371
+ entry,
1372
+ coordinates,
1373
+ leaders: leaders as Map<string, { intersecting: boolean }>,
1374
+ replicas: minReplicasValue,
1375
+ });
1207
1376
  for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
1208
1377
  await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
1209
- const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
1378
+ const authoritativeRecipients = new Set(leaders.keys());
1379
+ const leadersForDelivery = delivery
1380
+ ? new Set(authoritativeRecipients)
1381
+ : undefined;
1210
1382
 
1211
- const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
1383
+ // Outbound append delivery only tells us who we intend to send to, not who has
1384
+ // actually stored the entry. Keep this recipient set local so later repair
1385
+ // sweeps can still backfill peers that missed the initial delivery.
1386
+ const set = new Set(leaders.keys());
1212
1387
  let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1213
1388
  const allowSubscriberFallback =
1214
1389
  this.syncronizer instanceof SimpleSyncronizer ||
@@ -1239,6 +1414,17 @@ export class SharedLog<
1239
1414
  }
1240
1415
 
1241
1416
  if (!delivery) {
1417
+ for (const peer of authoritativeRecipients) {
1418
+ if (peer === selfHash) {
1419
+ continue;
1420
+ }
1421
+ // Default live append delivery is still optimistic. If one remote misses
1422
+ // the initial heads exchange and the caller did not opt into explicit
1423
+ // delivery acks, we still need a targeted backfill source of truth for the
1424
+ // authoritative recipients or one entry can get stuck at 2/3 replicas
1425
+ // forever. Best-effort fallback subscribers are not repair-worthy.
1426
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
1427
+ }
1242
1428
  this.rpc
1243
1429
  .send(message, {
1244
1430
  mode: isLeader
@@ -1268,6 +1454,7 @@ export class SharedLog<
1268
1454
 
1269
1455
  const ackTo: string[] = [];
1270
1456
  let silentTo: string[] | undefined;
1457
+ const repairTargets = new Set<string>();
1271
1458
  // Default delivery semantics: require enough remote ACKs to reach the requested
1272
1459
  // replication degree (local append counts as 1).
1273
1460
  const defaultMinAcks = Math.max(0, minReplicasValue - 1);
@@ -1279,6 +1466,9 @@ export class SharedLog<
1279
1466
  );
1280
1467
 
1281
1468
  for (const peer of orderedRemoteRecipients) {
1469
+ if (authoritativeRecipients.has(peer)) {
1470
+ repairTargets.add(peer);
1471
+ }
1282
1472
  if (ackTo.length < ackLimit) {
1283
1473
  ackTo.push(peer);
1284
1474
  } else {
@@ -1317,6 +1507,12 @@ export class SharedLog<
1317
1507
  })
1318
1508
  .catch((error) => logger.error(error));
1319
1509
  }
1510
+ for (const peer of repairTargets) {
1511
+ // Direct append delivery is intentionally optimistic. Queue one delayed,
1512
+ // batched maybe-sync pass for the intended recipients so stable 3-peer
1513
+ // append workloads do not depend on perfect first-try delivery ordering.
1514
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
1515
+ }
1320
1516
  }
1321
1517
 
1322
1518
  if (pending.length > 0) {
@@ -2016,6 +2212,7 @@ export class SharedLog<
2016
2212
  // Keep local sync/prune state consistent even when a peer disappears
2017
2213
  // through replication-info updates without a topic unsubscribe event.
2018
2214
  this.removePeerFromGidPeerHistory(keyHash);
2215
+ this.removeRepairFrontierTarget(keyHash);
2019
2216
  this._recentRepairDispatch.delete(keyHash);
2020
2217
  if (!isMe) {
2021
2218
  this.syncronizer.onPeerDisconnected(keyHash);
@@ -2483,6 +2680,7 @@ export class SharedLog<
2483
2680
  for (const key of this._gidPeersHistory.keys()) {
2484
2681
  this.removePeerFromGidPeerHistory(publicKeyHash, key);
2485
2682
  }
2683
+ this.removePeerFromEntryKnownPeers(publicKeyHash);
2486
2684
  }
2487
2685
  }
2488
2686
 
@@ -2507,19 +2705,448 @@ export class SharedLog<
2507
2705
  return set;
2508
2706
  }
2509
2707
 
2708
+ private markEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
2709
+ for (const hash of hashes) {
2710
+ let peers = this._entryKnownPeers.get(hash);
2711
+ if (!peers) {
2712
+ peers = new Set();
2713
+ this._entryKnownPeers.set(hash, peers);
2714
+ }
2715
+ peers.add(peer);
2716
+ }
2717
+ }
2718
+
2719
+ private removeEntriesKnownByPeer(hashes: Iterable<string>, peer: string) {
2720
+ for (const hash of hashes) {
2721
+ const peers = this._entryKnownPeers.get(hash);
2722
+ if (!peers) {
2723
+ continue;
2724
+ }
2725
+ peers.delete(peer);
2726
+ if (peers.size === 0) {
2727
+ this._entryKnownPeers.delete(hash);
2728
+ }
2729
+ }
2730
+ }
2731
+
2732
+ private removePeerFromEntryKnownPeers(peer: string) {
2733
+ for (const [hash, peers] of this._entryKnownPeers) {
2734
+ peers.delete(peer);
2735
+ if (peers.size === 0) {
2736
+ this._entryKnownPeers.delete(hash);
2737
+ }
2738
+ }
2739
+ }
2740
+
2741
+ private isEntryKnownByPeer(hash: string, peer: string) {
2742
+ return this._entryKnownPeers.get(hash)?.has(peer) === true;
2743
+ }
2744
+
2745
+ private markRepairSweepOptimisticPeer(gid: string, peer: string) {
2746
+ let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
2747
+ if (!peers) {
2748
+ peers = new Map();
2749
+ this._repairSweepOptimisticGidPeersPending.set(gid, peers);
2750
+ }
2751
+ peers.set(peer, (peers.get(peer) || 0) + 1);
2752
+ }
2753
+
2754
+ private hasPendingRepairSweepOptimisticPeer(gid: string, peer: string) {
2755
+ return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
2756
+ }
2757
+
2758
+ private createEntryReplicatedForRepair(properties: {
2759
+ entry: Entry<T>;
2760
+ coordinates: NumberFromType<R>[];
2761
+ leaders: Map<string, { intersecting: boolean }>;
2762
+ replicas: number;
2763
+ }) {
2764
+ const assignedToRangeBoundary = shouldAssignToRangeBoundary(
2765
+ properties.leaders,
2766
+ properties.replicas,
2767
+ );
2768
+ const cidObject = cidifyString(properties.entry.hash);
2769
+ const hashNumber = this.indexableDomain.numbers.bytesToNumber(
2770
+ cidObject.multihash.digest,
2771
+ );
2772
+ return new this.indexableDomain.constructorEntry({
2773
+ assignedToRangeBoundary,
2774
+ coordinates: properties.coordinates,
2775
+ meta: properties.entry.meta,
2776
+ hash: properties.entry.hash,
2777
+ hashNumber,
2778
+ });
2779
+ }
2780
+
2781
+ private isAssumeSyncedRepairSuppressed() {
2782
+ return this._assumeSyncedRepairSuppressedUntil > Date.now();
2783
+ }
2784
+
2785
+ private isFrontierTrackedRepairMode(mode: RepairDispatchMode) {
2786
+ return mode !== "join-warmup";
2787
+ }
2788
+
2789
+ private async sleepTracked(delayMs: number) {
2790
+ if (delayMs <= 0) {
2791
+ return;
2792
+ }
2793
+ await new Promise<void>((resolve) => {
2794
+ const timer = setTimeout(() => {
2795
+ this._repairRetryTimers.delete(timer);
2796
+ resolve();
2797
+ }, delayMs);
2798
+ timer.unref?.();
2799
+ this._repairRetryTimers.add(timer);
2800
+ });
2801
+ }
2802
+
2803
+ private queueRepairFrontierEntries(
2804
+ mode: RepairDispatchMode,
2805
+ target: string,
2806
+ entries: Map<string, EntryReplicated<R>>,
2807
+ ) {
2808
+ let targets = this._repairFrontierByMode.get(mode);
2809
+ if (!targets) {
2810
+ targets = new Map();
2811
+ this._repairFrontierByMode.set(mode, targets);
2812
+ }
2813
+ let pending = targets.get(target);
2814
+ if (!pending) {
2815
+ pending = new Map();
2816
+ targets.set(target, pending);
2817
+ }
2818
+ for (const [hash, entry] of entries) {
2819
+ pending.set(hash, entry);
2820
+ }
2821
+ }
2822
+
2823
+ private clearRepairFrontierHashes(target: string, hashes: Iterable<string>) {
2824
+ const hashList = [...hashes];
2825
+ if (hashList.length === 0) {
2826
+ return;
2827
+ }
2828
+ for (const mode of REPAIR_DISPATCH_MODES) {
2829
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
2830
+ if (!pending) {
2831
+ continue;
2832
+ }
2833
+ for (const hash of hashList) {
2834
+ pending.delete(hash);
2835
+ }
2836
+ if (pending.size === 0) {
2837
+ this._repairFrontierByMode.get(mode)?.delete(target);
2838
+ }
2839
+ }
2840
+ }
2841
+
2842
+ private async getFullReplicaRepairCandidates(
2843
+ extraPeers?: Iterable<string>,
2844
+ options?: { includeSubscribers?: boolean },
2845
+ ) {
2846
+ const candidates = new Set<string>([
2847
+ this.node.identity.publicKey.hashcode(),
2848
+ ]);
2849
+ try {
2850
+ for (const peer of await this.getReplicators()) {
2851
+ candidates.add(peer);
2852
+ }
2853
+ } catch {
2854
+ for (const peer of this.uniqueReplicators) {
2855
+ candidates.add(peer);
2856
+ }
2857
+ }
2858
+ for (const peer of extraPeers ?? []) {
2859
+ candidates.add(peer);
2860
+ }
2861
+ if (options?.includeSubscribers !== false) {
2862
+ try {
2863
+ for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
2864
+ candidates.add(subscriber.hashcode());
2865
+ }
2866
+ } catch {
2867
+ // Best-effort only; explicit repair peers still keep the path safe.
2868
+ }
2869
+ }
2870
+ return candidates;
2871
+ }
2872
+
2873
+ private removeRepairFrontierTarget(target: string) {
2874
+ for (const mode of REPAIR_DISPATCH_MODES) {
2875
+ this._repairFrontierByMode.get(mode)?.delete(target);
2876
+ this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
2877
+ }
2878
+ }
2879
+
2880
+ private async sendRepairConfirmation(
2881
+ target: PublicSignKey,
2882
+ hashes: Iterable<string>,
2883
+ ) {
2884
+ const uniqueHashes = [...new Set(hashes)];
2885
+ for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
2886
+ const chunk = uniqueHashes.slice(
2887
+ i,
2888
+ i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE,
2889
+ );
2890
+ await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
2891
+ priority: 1,
2892
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2893
+ });
2894
+ }
2895
+ }
2896
+
2897
+ private async pushRepairEntries(
2898
+ target: string,
2899
+ entries: Map<string, EntryReplicated<R>>,
2900
+ ) {
2901
+ for await (const message of createExchangeHeadsMessages(
2902
+ this.log,
2903
+ [...entries.keys()],
2904
+ )) {
2905
+ await this.rpc.send(message, {
2906
+ priority: 1,
2907
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2908
+ });
2909
+ }
2910
+ }
2911
+
2912
+ private async sendRepairEntriesWithTransport(
2913
+ target: string,
2914
+ entries: Map<string, EntryReplicated<R>>,
2915
+ transport: RepairTransportMode,
2916
+ options?: { bypassKnownPeers?: boolean },
2917
+ ) {
2918
+ const unknownEntries = new Map<string, EntryReplicated<R>>();
2919
+ const knownHashes: string[] = [];
2920
+ for (const [hash, entry] of entries) {
2921
+ if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
2922
+ unknownEntries.set(hash, entry);
2923
+ } else {
2924
+ knownHashes.push(hash);
2925
+ }
2926
+ }
2927
+ this.clearRepairFrontierHashes(target, knownHashes);
2928
+ if (unknownEntries.size === 0) {
2929
+ return;
2930
+ }
2931
+ if (transport === "simple") {
2932
+ // Fallback repair should not depend on the target completing the
2933
+ // RequestMaybeSync -> ResponseMaybeSync round trip.
2934
+ await this.pushRepairEntries(target, unknownEntries);
2935
+ return;
2936
+ }
2937
+
2938
+ await this.syncronizer.onMaybeMissingEntries({
2939
+ entries: unknownEntries,
2940
+ targets: [target],
2941
+ });
2942
+ }
2943
+
2944
+ private async sendMaybeMissingEntriesNow(
2945
+ target: string,
2946
+ entries: Map<string, EntryReplicated<R>>,
2947
+ options: {
2948
+ mode: RepairDispatchMode;
2949
+ transport: RepairTransportMode;
2950
+ bypassRecentDedupe?: boolean;
2951
+ },
2952
+ ) {
2953
+ if (entries.size === 0) {
2954
+ return;
2955
+ }
2956
+
2957
+ const now = Date.now();
2958
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2959
+ if (!recentlyDispatchedByHash) {
2960
+ recentlyDispatchedByHash = new Map();
2961
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2962
+ }
2963
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2964
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2965
+ recentlyDispatchedByHash.delete(hash);
2966
+ }
2967
+ }
2968
+
2969
+ const filteredEntries =
2970
+ options.bypassRecentDedupe === true
2971
+ ? new Map(entries)
2972
+ : new Map<string, EntryReplicated<any>>();
2973
+ if (options.bypassRecentDedupe !== true) {
2974
+ for (const [hash, entry] of entries) {
2975
+ const prev = recentlyDispatchedByHash.get(hash);
2976
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2977
+ continue;
2978
+ }
2979
+ recentlyDispatchedByHash.set(hash, now);
2980
+ filteredEntries.set(hash, entry);
2981
+ }
2982
+ } else {
2983
+ for (const hash of entries.keys()) {
2984
+ recentlyDispatchedByHash.set(hash, now);
2985
+ }
2986
+ }
2987
+ if (filteredEntries.size === 0) {
2988
+ return;
2989
+ }
2990
+
2991
+ const bucket = this._repairMetrics[options.mode];
2992
+ bucket.dispatches += 1;
2993
+ bucket.entries += filteredEntries.size;
2994
+ if (options.transport === "simple") {
2995
+ bucket.simpleFallbackPasses += 1;
2996
+ } else {
2997
+ bucket.ratelessFirstPasses += 1;
2998
+ }
2999
+
3000
+ await Promise.resolve(
3001
+ this.sendRepairEntriesWithTransport(
3002
+ target,
3003
+ filteredEntries,
3004
+ options.transport,
3005
+ { bypassKnownPeers: options.mode === "churn" },
3006
+ ),
3007
+ ).catch((error: any) => logger.error(error));
3008
+ }
3009
+
3010
+ private ensureRepairFrontierRunner(
3011
+ mode: RepairDispatchMode,
3012
+ target: string,
3013
+ retryScheduleMs?: number[],
3014
+ ) {
3015
+ const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
3016
+ if (!activeTargets || activeTargets.has(target) || this.closed) {
3017
+ return;
3018
+ }
3019
+ activeTargets.add(target);
3020
+ const retrySchedule = resolveRepairRetrySchedule(
3021
+ mode,
3022
+ retryScheduleMs,
3023
+ this.isFrontierTrackedRepairMode(mode),
3024
+ );
3025
+ const steadyStateDelay =
3026
+ retrySchedule.length > 1
3027
+ ? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
3028
+ : Math.max(retrySchedule[0] || 1_000, 1_000);
3029
+
3030
+ void (async () => {
3031
+ let attemptIndex = 0;
3032
+ try {
3033
+ for (;;) {
3034
+ if (this.closed) {
3035
+ return;
3036
+ }
3037
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
3038
+ if (!pending || pending.size === 0) {
3039
+ return;
3040
+ }
3041
+
3042
+ if (
3043
+ (mode === "join-warmup" || mode === "join-authoritative") &&
3044
+ this.isAssumeSyncedRepairSuppressed()
3045
+ ) {
3046
+ await this.sleepTracked(
3047
+ Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()),
3048
+ );
3049
+ continue;
3050
+ }
3051
+
3052
+ await this.sendMaybeMissingEntriesNow(target, pending, {
3053
+ mode,
3054
+ transport: getRepairTransportForAttempt(mode, attemptIndex),
3055
+ bypassRecentDedupe: true,
3056
+ });
3057
+
3058
+ const remaining = this._repairFrontierByMode.get(mode)?.get(target);
3059
+ if (!remaining || remaining.size === 0) {
3060
+ return;
3061
+ }
3062
+
3063
+ const waitMs =
3064
+ attemptIndex + 1 < retrySchedule.length
3065
+ ? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
3066
+ : steadyStateDelay;
3067
+ attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
3068
+ await this.sleepTracked(waitMs);
3069
+ }
3070
+ } finally {
3071
+ activeTargets.delete(target);
3072
+ if (
3073
+ !this.closed &&
3074
+ (this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0
3075
+ ) {
3076
+ this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
3077
+ }
3078
+ }
3079
+ })().catch((error: any) => {
3080
+ activeTargets.delete(target);
3081
+ logger.error(error);
3082
+ });
3083
+ }
3084
+
3085
+ private flushAppendBackfill() {
3086
+ if (this._appendBackfillPendingByTarget.size === 0) {
3087
+ return;
3088
+ }
3089
+ const pending = this._appendBackfillPendingByTarget;
3090
+ this._appendBackfillPendingByTarget = new Map();
3091
+ for (const [target, entries] of pending) {
3092
+ this.dispatchMaybeMissingEntries(target, entries, {
3093
+ mode: "append-backfill",
3094
+ });
3095
+ }
3096
+ }
3097
+
3098
+ private queueAppendBackfill(target: string, entry: EntryReplicated<R>) {
3099
+ let entries = this._appendBackfillPendingByTarget.get(target);
3100
+ if (!entries) {
3101
+ entries = new Map();
3102
+ this._appendBackfillPendingByTarget.set(target, entries);
3103
+ }
3104
+ entries.set(entry.hash, entry);
3105
+ if (entries.size >= this.repairSweepTargetBufferSize) {
3106
+ this.flushAppendBackfill();
3107
+ return;
3108
+ }
3109
+ if (this._appendBackfillTimer || this.closed) {
3110
+ return;
3111
+ }
3112
+ const timer = setTimeout(() => {
3113
+ this._repairRetryTimers.delete(timer);
3114
+ if (this._appendBackfillTimer === timer) {
3115
+ this._appendBackfillTimer = undefined;
3116
+ }
3117
+ if (this.closed) {
3118
+ return;
3119
+ }
3120
+ this.flushAppendBackfill();
3121
+ }, APPEND_BACKFILL_DELAY_MS);
3122
+ timer.unref?.();
3123
+ this._repairRetryTimers.add(timer);
3124
+ this._appendBackfillTimer = timer;
3125
+ }
3126
+
2510
3127
  private dispatchMaybeMissingEntries(
2511
3128
  target: string,
2512
3129
  entries: Map<string, EntryReplicated<R>>,
2513
- options?: {
3130
+ options: {
3131
+ mode: RepairDispatchMode;
2514
3132
  bypassRecentDedupe?: boolean;
2515
3133
  retryScheduleMs?: number[];
2516
- forceFreshDelivery?: boolean;
2517
3134
  },
2518
3135
  ) {
2519
3136
  if (entries.size === 0) {
2520
3137
  return;
2521
3138
  }
2522
3139
 
3140
+ if (this.isFrontierTrackedRepairMode(options.mode)) {
3141
+ this.queueRepairFrontierEntries(options.mode, target, entries);
3142
+ this.ensureRepairFrontierRunner(
3143
+ options.mode,
3144
+ target,
3145
+ options.retryScheduleMs,
3146
+ );
3147
+ return;
3148
+ }
3149
+
2523
3150
  const now = Date.now();
2524
3151
  let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2525
3152
  if (!recentlyDispatchedByHash) {
@@ -2533,10 +3160,10 @@ export class SharedLog<
2533
3160
  }
2534
3161
 
2535
3162
  const filteredEntries =
2536
- options?.bypassRecentDedupe === true
3163
+ options.bypassRecentDedupe === true
2537
3164
  ? new Map(entries)
2538
3165
  : new Map<string, EntryReplicated<any>>();
2539
- if (options?.bypassRecentDedupe !== true) {
3166
+ if (options.bypassRecentDedupe !== true) {
2540
3167
  for (const [hash, entry] of entries) {
2541
3168
  const prev = recentlyDispatchedByHash.get(hash);
2542
3169
  if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
@@ -2553,64 +3180,69 @@ export class SharedLog<
2553
3180
  if (filteredEntries.size === 0) {
2554
3181
  return;
2555
3182
  }
2556
- const retrySchedule =
2557
- options?.retryScheduleMs && options.retryScheduleMs.length > 0
2558
- ? options.retryScheduleMs
2559
- : options?.forceFreshDelivery
2560
- ? FORCE_FRESH_RETRY_SCHEDULE_MS
2561
- : [0];
2562
-
2563
- const run = () => {
2564
- // For force-fresh churn repair we intentionally bypass rateless IBLT and
2565
- // use simple hash-based sync. This path is a directed "push these hashes
2566
- // to that peer" recovery flow; using simple sync here avoids occasional
2567
- // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
2568
- if (
2569
- options?.forceFreshDelivery &&
2570
- this.syncronizer instanceof RatelessIBLTSynchronizer
2571
- ) {
2572
- return Promise.resolve(
2573
- this.syncronizer.simple.onMaybeMissingEntries({
2574
- entries: filteredEntries,
2575
- targets: [target],
2576
- }),
2577
- ).catch((error: any) => logger.error(error));
3183
+
3184
+ if (
3185
+ (options.mode === "join-warmup" ||
3186
+ options.mode === "join-authoritative") &&
3187
+ this.isAssumeSyncedRepairSuppressed()
3188
+ ) {
3189
+ return;
3190
+ }
3191
+
3192
+ const retrySchedule = resolveRepairRetrySchedule(
3193
+ options.mode,
3194
+ options.retryScheduleMs,
3195
+ this.isFrontierTrackedRepairMode(options.mode),
3196
+ );
3197
+ const bucket = this._repairMetrics[options.mode];
3198
+ bucket.dispatches += 1;
3199
+ bucket.entries += filteredEntries.size;
3200
+
3201
+ const run = (transport: RepairTransportMode) => {
3202
+ if (transport === "simple") {
3203
+ bucket.simpleFallbackPasses += 1;
3204
+ } else {
3205
+ bucket.ratelessFirstPasses += 1;
2578
3206
  }
2579
3207
 
2580
3208
  return Promise.resolve(
2581
- this.syncronizer.onMaybeMissingEntries({
2582
- entries: filteredEntries,
2583
- targets: [target],
2584
- }),
3209
+ this.sendRepairEntriesWithTransport(
3210
+ target,
3211
+ filteredEntries,
3212
+ transport,
3213
+ { bypassKnownPeers: options.mode === "churn" },
3214
+ ),
2585
3215
  ).catch((error: any) => logger.error(error));
2586
3216
  };
2587
3217
 
2588
- for (const delayMs of retrySchedule) {
3218
+ retrySchedule.forEach((delayMs, index) => {
3219
+ const transport = getRepairTransportForAttempt(options.mode, index);
2589
3220
  if (delayMs === 0) {
2590
- void run();
2591
- continue;
3221
+ void run(transport);
3222
+ return;
2592
3223
  }
2593
3224
  const timer = setTimeout(() => {
2594
3225
  this._repairRetryTimers.delete(timer);
2595
3226
  if (this.closed) {
2596
3227
  return;
2597
3228
  }
2598
- void run();
3229
+ void run(transport);
2599
3230
  }, delayMs);
2600
3231
  timer.unref?.();
2601
3232
  this._repairRetryTimers.add(timer);
2602
- }
3233
+ });
2603
3234
  }
2604
3235
 
2605
3236
  private scheduleRepairSweep(options: {
2606
- forceFreshDelivery: boolean;
2607
- addedPeers: Set<string>;
3237
+ mode: RepairDispatchMode;
3238
+ peers?: Iterable<string>;
2608
3239
  }) {
2609
- if (options.forceFreshDelivery) {
2610
- this._repairSweepForceFreshPending = true;
2611
- }
2612
- for (const peer of options.addedPeers) {
2613
- this._repairSweepAddedPeersPending.add(peer);
3240
+ this._repairSweepPendingModes.add(options.mode);
3241
+ const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
3242
+ if (pendingPeers) {
3243
+ for (const peer of options.peers ?? []) {
3244
+ pendingPeers.add(peer);
3245
+ }
2614
3246
  }
2615
3247
  if (!this._repairSweepRunning && !this.closed) {
2616
3248
  this._repairSweepRunning = true;
@@ -2618,50 +3250,171 @@ export class SharedLog<
2618
3250
  }
2619
3251
  }
2620
3252
 
3253
+ private scheduleJoinAuthoritativeRepair(peers: Set<string>) {
3254
+ if (this.closed || peers.size === 0) {
3255
+ return;
3256
+ }
3257
+
3258
+ for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
3259
+ let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
3260
+ if (!pendingPeers) {
3261
+ pendingPeers = new Set();
3262
+ this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
3263
+ }
3264
+ for (const peer of peers) {
3265
+ pendingPeers.add(peer);
3266
+ }
3267
+
3268
+ if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
3269
+ continue;
3270
+ }
3271
+
3272
+ const timer = setTimeout(() => {
3273
+ this._repairRetryTimers.delete(timer);
3274
+ this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
3275
+ if (this.closed) {
3276
+ return;
3277
+ }
3278
+
3279
+ const peersForSweep = new Set(
3280
+ this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? [],
3281
+ );
3282
+ this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
3283
+ if (peersForSweep.size === 0) {
3284
+ return;
3285
+ }
3286
+
3287
+ // A joiner's leader view can still be partial on the first delayed pass
3288
+ // under pubsub jitter. Bounded per-peer rescans widen the authoritative
3289
+ // frontier without adding per-append sweeps.
3290
+ this.scheduleRepairSweep({
3291
+ mode: "join-authoritative",
3292
+ peers: peersForSweep,
3293
+ });
3294
+ }, delayMs);
3295
+ timer.unref?.();
3296
+ this._repairRetryTimers.add(timer);
3297
+ this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
3298
+ }
3299
+ }
3300
+
2621
3301
  private async runRepairSweep() {
2622
3302
  try {
2623
3303
  while (!this.closed) {
2624
- const forceFreshDelivery = this._repairSweepForceFreshPending;
2625
- const addedPeers = new Set(this._repairSweepAddedPeersPending);
2626
- this._repairSweepForceFreshPending = false;
2627
- this._repairSweepAddedPeersPending.clear();
3304
+ const pendingModes = new Set(this._repairSweepPendingModes);
3305
+ const pendingPeersByMode = cloneRepairPendingPeersByMode(
3306
+ this._repairSweepPendingPeersByMode,
3307
+ );
3308
+ this._repairSweepPendingModes.clear();
3309
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
3310
+ peers.clear();
3311
+ }
2628
3312
 
2629
- if (!forceFreshDelivery && addedPeers.size === 0) {
3313
+ if (pendingModes.size === 0) {
2630
3314
  return;
2631
3315
  }
2632
3316
 
2633
- const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
2634
- const flushTarget = (target: string) => {
2635
- const entries = pendingByTarget.get(target);
3317
+ const optimisticGidPeersByMode = new Map<
3318
+ RepairDispatchMode,
3319
+ Map<string, Set<string>>
3320
+ >();
3321
+ const optimisticGidPeersConsumedByMode = new Map<
3322
+ RepairDispatchMode,
3323
+ Map<string, Map<string, number>>
3324
+ >();
3325
+ for (const mode of pendingModes) {
3326
+ const modePeers = pendingPeersByMode.get(mode);
3327
+ if (!modePeers || modePeers.size === 0) {
3328
+ continue;
3329
+ }
3330
+ const optimisticGidPeers = new Map<string, Set<string>>();
3331
+ const optimisticGidPeersConsumed = new Map<string, Map<string, number>>();
3332
+ for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
3333
+ let matchedPeers: Set<string> | undefined;
3334
+ let matchedCounts: Map<string, number> | undefined;
3335
+ for (const [peer, count] of peerCounts) {
3336
+ if (!modePeers.has(peer)) {
3337
+ continue;
3338
+ }
3339
+ matchedPeers ||= new Set();
3340
+ matchedCounts ||= new Map();
3341
+ matchedPeers.add(peer);
3342
+ matchedCounts.set(peer, count);
3343
+ }
3344
+ if (matchedPeers && matchedCounts) {
3345
+ optimisticGidPeers.set(gid, matchedPeers);
3346
+ optimisticGidPeersConsumed.set(gid, matchedCounts);
3347
+ }
3348
+ }
3349
+ if (optimisticGidPeers.size > 0) {
3350
+ optimisticGidPeersByMode.set(mode, optimisticGidPeers);
3351
+ optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
3352
+ }
3353
+ }
3354
+
3355
+ const pendingByMode = new Map<
3356
+ RepairDispatchMode,
3357
+ Map<string, Map<string, EntryReplicated<any>>>
3358
+ >(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
3359
+ const pendingRepairPeers = new Set<string>();
3360
+ for (const peers of pendingPeersByMode.values()) {
3361
+ for (const peer of peers) {
3362
+ pendingRepairPeers.add(peer);
3363
+ }
3364
+ }
3365
+ const fullReplicaRepairCandidates =
3366
+ await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
3367
+ includeSubscribers: false,
3368
+ });
3369
+ const fullReplicaRepairCandidateCount = Math.max(
3370
+ 1,
3371
+ fullReplicaRepairCandidates.size,
3372
+ );
3373
+ const nextFrontierByMode = new Map<
3374
+ RepairDispatchMode,
3375
+ Map<string, Map<string, EntryReplicated<any>>>
3376
+ >([
3377
+ ["join-authoritative", new Map()],
3378
+ ["churn", new Map()],
3379
+ ]);
3380
+ const flushTarget = (mode: RepairDispatchMode, target: string) => {
3381
+ const targets = pendingByMode.get(mode);
3382
+ const entries = targets?.get(target);
2636
3383
  if (!entries || entries.size === 0) {
2637
3384
  return;
2638
3385
  }
2639
- const isJoinWarmupTarget = addedPeers.has(target);
2640
- const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
2641
3386
  this.dispatchMaybeMissingEntries(target, entries, {
2642
- bypassRecentDedupe,
2643
- retryScheduleMs: isJoinWarmupTarget
2644
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
2645
- : undefined,
2646
- forceFreshDelivery,
3387
+ bypassRecentDedupe: true,
3388
+ mode,
2647
3389
  });
2648
- pendingByTarget.delete(target);
3390
+ targets?.delete(target);
2649
3391
  };
2650
3392
  const queueEntryForTarget = (
3393
+ mode: RepairDispatchMode,
2651
3394
  target: string,
2652
3395
  entry: EntryReplicated<any>,
2653
3396
  ) => {
2654
- let set = pendingByTarget.get(target);
3397
+ const sweepTargets = nextFrontierByMode.get(mode);
3398
+ if (sweepTargets) {
3399
+ let sweepSet = sweepTargets.get(target);
3400
+ if (!sweepSet) {
3401
+ sweepSet = new Map();
3402
+ sweepTargets.set(target, sweepSet);
3403
+ }
3404
+ sweepSet.set(entry.hash, entry);
3405
+ }
3406
+ const targets = pendingByMode.get(mode)!;
3407
+ let set = targets.get(target);
2655
3408
  if (!set) {
2656
3409
  set = new Map();
2657
- pendingByTarget.set(target, set);
3410
+ targets.set(target, set);
2658
3411
  }
2659
3412
  if (set.has(entry.hash)) {
2660
3413
  return;
2661
3414
  }
2662
3415
  set.set(entry.hash, entry);
2663
3416
  if (set.size >= this.repairSweepTargetBufferSize) {
2664
- flushTarget(target);
3417
+ flushTarget(mode, target);
2665
3418
  }
2666
3419
  };
2667
3420
 
@@ -2671,23 +3424,52 @@ export class SharedLog<
2671
3424
  const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2672
3425
  for (const entry of entries) {
2673
3426
  const entryReplicated = entry.value;
3427
+ const gid = entryReplicated.gid;
3428
+ const knownPeers = this._gidPeersHistory.get(gid);
3429
+ const requestedReplicas =
3430
+ decodeReplicas(entryReplicated).getValue(this);
2674
3431
  const currentPeers = await this.findLeaders(
2675
3432
  entryReplicated.coordinates,
2676
3433
  entryReplicated,
2677
3434
  { roleAge: 0 },
2678
3435
  );
2679
- if (forceFreshDelivery) {
3436
+
3437
+ if (pendingModes.has("churn")) {
2680
3438
  for (const [currentPeer] of currentPeers) {
2681
3439
  if (currentPeer === this.node.identity.publicKey.hashcode()) {
2682
3440
  continue;
2683
3441
  }
2684
- queueEntryForTarget(currentPeer, entryReplicated);
3442
+ queueEntryForTarget("churn", currentPeer, entryReplicated);
2685
3443
  }
2686
3444
  }
2687
- if (addedPeers.size > 0) {
2688
- for (const peer of addedPeers) {
2689
- if (currentPeers.has(peer)) {
2690
- queueEntryForTarget(peer, entryReplicated);
3445
+
3446
+ for (const mode of pendingModes) {
3447
+ const modePeers = pendingPeersByMode.get(mode);
3448
+ if (!modePeers || modePeers.size === 0) {
3449
+ continue;
3450
+ }
3451
+ const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
3452
+ for (const peer of modePeers) {
3453
+ if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
3454
+ continue;
3455
+ }
3456
+ const wasOptimisticallyAssigned =
3457
+ optimisticPeers?.has(peer) === true;
3458
+ const isCoveredByFullReplicaRepair =
3459
+ mode === "join-authoritative" &&
3460
+ fullReplicaRepairCandidates.has(peer) &&
3461
+ requestedReplicas >= fullReplicaRepairCandidateCount;
3462
+ const shouldQueue =
3463
+ mode === "join-authoritative"
3464
+ ? currentPeers.has(peer) || isCoveredByFullReplicaRepair
3465
+ : wasOptimisticallyAssigned ||
3466
+ (currentPeers.has(peer) && !knownPeers?.has(peer));
3467
+ if (shouldQueue) {
3468
+ // Authoritative join repair must not trust partial gid peer history,
3469
+ // otherwise a late joiner can get stuck with a partial historical
3470
+ // backfill forever. Once we enter the authoritative pass, queue every
3471
+ // entry whose current leader set still includes the added peer.
3472
+ queueEntryForTarget(mode, peer, entryReplicated);
2691
3473
  }
2692
3474
  }
2693
3475
  }
@@ -2697,8 +3479,64 @@ export class SharedLog<
2697
3479
  await iterator.close();
2698
3480
  }
2699
3481
 
2700
- for (const target of [...pendingByTarget.keys()]) {
2701
- flushTarget(target);
3482
+ for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
3483
+ for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
3484
+ const pendingPeerCounts =
3485
+ this._repairSweepOptimisticGidPeersPending.get(gid);
3486
+ if (!pendingPeerCounts) {
3487
+ continue;
3488
+ }
3489
+ for (const [peer, count] of peerCounts) {
3490
+ const current = pendingPeerCounts.get(peer) || 0;
3491
+ const next = current - count;
3492
+ if (next > 0) {
3493
+ pendingPeerCounts.set(peer, next);
3494
+ } else {
3495
+ pendingPeerCounts.delete(peer);
3496
+ }
3497
+ }
3498
+ if (pendingPeerCounts.size === 0) {
3499
+ this._repairSweepOptimisticGidPeersPending.delete(gid);
3500
+ }
3501
+ }
3502
+ }
3503
+
3504
+ for (const mode of pendingModes) {
3505
+ if (mode !== "join-authoritative" && mode !== "churn") {
3506
+ continue;
3507
+ }
3508
+ const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
3509
+ const frontierTargets = this._repairFrontierByMode.get(mode);
3510
+ for (const target of pendingPeersByMode.get(mode) ?? []) {
3511
+ const replacement = nextTargets.get(target);
3512
+ if (mode === "join-authoritative") {
3513
+ // Authoritative join repair is receipt-driven: a later sweep can have a
3514
+ // narrower transient leader view, but it must not forget unconfirmed
3515
+ // hashes that were already queued for this joiner.
3516
+ if (replacement && replacement.size > 0) {
3517
+ const existing = frontierTargets?.get(target);
3518
+ if (existing && existing.size > 0) {
3519
+ for (const [hash, entry] of replacement) {
3520
+ existing.set(hash, entry);
3521
+ }
3522
+ } else {
3523
+ frontierTargets?.set(target, replacement);
3524
+ }
3525
+ }
3526
+ continue;
3527
+ }
3528
+ if (replacement && replacement.size > 0) {
3529
+ frontierTargets?.set(target, replacement);
3530
+ } else {
3531
+ frontierTargets?.delete(target);
3532
+ }
3533
+ }
3534
+ }
3535
+
3536
+ for (const [mode, targets] of pendingByMode) {
3537
+ for (const target of [...targets.keys()]) {
3538
+ flushTarget(mode, target);
3539
+ }
2702
3540
  }
2703
3541
  }
2704
3542
  } catch (error: any) {
@@ -2707,11 +3545,7 @@ export class SharedLog<
2707
3545
  }
2708
3546
  } finally {
2709
3547
  this._repairSweepRunning = false;
2710
- if (
2711
- !this.closed &&
2712
- (this._repairSweepForceFreshPending ||
2713
- this._repairSweepAddedPeersPending.size > 0)
2714
- ) {
3548
+ if (!this.closed && this._repairSweepPendingModes.size > 0) {
2715
3549
  this._repairSweepRunning = true;
2716
3550
  void this.runRepairSweep();
2717
3551
  }
@@ -2724,9 +3558,89 @@ export class SharedLog<
2724
3558
  entry: Entry<T> | ShallowEntry | EntryReplicated<R>;
2725
3559
  leaders: Map<string, any>;
2726
3560
  };
2727
- }) {
2728
- if (!this.keep || !(await this.keep(args.value.entry))) {
2729
- return this.pruneDebouncedFn.add(args);
3561
+ }): Promise<boolean> {
3562
+ if (this.keep && (await this.keep(args.value.entry))) {
3563
+ return false;
3564
+ }
3565
+ void this.pruneDebouncedFn.add(args);
3566
+ return true;
3567
+ }
3568
+
3569
+ private async pruneJoinedEntriesNoLongerLed(entries: Entry<T>[]) {
3570
+ const selfHash = this.node.identity.publicKey.hashcode();
3571
+ for (const entry of entries) {
3572
+ if (this.closed || this._pendingDeletes.has(entry.hash)) {
3573
+ continue;
3574
+ }
3575
+
3576
+ const leaders = await this.findLeadersFromEntry(
3577
+ entry,
3578
+ decodeReplicas(entry).getValue(this),
3579
+ { roleAge: 0 },
3580
+ );
3581
+
3582
+ if (leaders.has(selfHash)) {
3583
+ this.pruneDebouncedFn.delete(entry.hash);
3584
+ continue;
3585
+ }
3586
+
3587
+ if (leaders.size === 0) {
3588
+ continue;
3589
+ }
3590
+
3591
+ await this.pruneDebouncedFnAddIfNotKeeping({
3592
+ key: entry.hash,
3593
+ value: { entry, leaders },
3594
+ });
3595
+ this.responseToPruneDebouncedFn.delete(entry.hash);
3596
+ }
3597
+ }
3598
+
3599
+ private async pruneIndexedEntriesNoLongerLed() {
3600
+ const selfHash = this.node.identity.publicKey.hashcode();
3601
+ const iterator = this.entryCoordinatesIndex.iterate({});
3602
+ let enqueuedPrune = false;
3603
+ try {
3604
+ while (!this.closed && !iterator.done()) {
3605
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
3606
+ for (const entry of entries) {
3607
+ const entryReplicated = entry.value;
3608
+ if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
3609
+ continue;
3610
+ }
3611
+
3612
+ const leaders = await this.findLeaders(
3613
+ entryReplicated.coordinates,
3614
+ entryReplicated,
3615
+ { roleAge: 0 },
3616
+ );
3617
+
3618
+ if (leaders.has(selfHash)) {
3619
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
3620
+ await this._pendingDeletes
3621
+ .get(entryReplicated.hash)
3622
+ ?.reject(new Error("Failed to delete, is leader again"));
3623
+ this.removePruneRequestSent(entryReplicated.hash);
3624
+ continue;
3625
+ }
3626
+
3627
+ if (leaders.size === 0) {
3628
+ continue;
3629
+ }
3630
+
3631
+ enqueuedPrune =
3632
+ (await this.pruneDebouncedFnAddIfNotKeeping({
3633
+ key: entryReplicated.hash,
3634
+ value: { entry: entryReplicated, leaders },
3635
+ })) || enqueuedPrune;
3636
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
3637
+ }
3638
+ }
3639
+ } finally {
3640
+ await iterator.close();
3641
+ }
3642
+ if (enqueuedPrune && !this.closed) {
3643
+ await this.pruneDebouncedFn.flush();
2730
3644
  }
2731
3645
  }
2732
3646
 
@@ -2903,6 +3817,7 @@ export class SharedLog<
2903
3817
  } else {
2904
3818
  await this._appendDeliverToReplicators(
2905
3819
  result.entry,
3820
+ coordinates,
2906
3821
  minReplicasValue,
2907
3822
  leaders,
2908
3823
  selfHash,
@@ -2912,13 +3827,14 @@ export class SharedLog<
2912
3827
  }
2913
3828
  }
2914
3829
 
2915
- if (!isLeader && !this.shouldDelayAdaptiveRebalance()) {
3830
+ const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
3831
+ if (!isLeader && !delayAdaptiveRebalance) {
2916
3832
  this.pruneDebouncedFnAddIfNotKeeping({
2917
3833
  key: result.entry.hash,
2918
3834
  value: { entry: result.entry, leaders },
2919
3835
  });
2920
3836
  }
2921
- if (!this._isAdaptiveReplicating) {
3837
+ if (!delayAdaptiveRebalance) {
2922
3838
  this.rebalanceParticipationDebounced?.call();
2923
3839
  }
2924
3840
 
@@ -2960,8 +3876,21 @@ export class SharedLog<
2960
3876
  this._repairRetryTimers = new Set();
2961
3877
  this._recentRepairDispatch = new Map();
2962
3878
  this._repairSweepRunning = false;
2963
- this._repairSweepForceFreshPending = false;
2964
- this._repairSweepAddedPeersPending = new Set();
3879
+ this._repairSweepPendingModes = new Set();
3880
+ this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
3881
+ this._repairFrontierByMode = createRepairFrontierByMode() as Map<
3882
+ RepairDispatchMode,
3883
+ Map<string, Map<string, EntryReplicated<R>>>
3884
+ >;
3885
+ this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
3886
+ this._repairSweepOptimisticGidPeersPending = new Map();
3887
+ this._entryKnownPeers = new Map();
3888
+ this._joinAuthoritativeRepairTimersByDelay = new Map();
3889
+ this._joinAuthoritativeRepairPeersByDelay = new Map();
3890
+ this._assumeSyncedRepairSuppressedUntil = 0;
3891
+ this._appendBackfillTimer = undefined;
3892
+ this._appendBackfillPendingByTarget = new Map();
3893
+ this._repairMetrics = createRepairMetrics();
2965
3894
  this._topicSubscribersCache = new Map();
2966
3895
  this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2967
3896
  this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
@@ -3040,7 +3969,10 @@ export class SharedLog<
3040
3969
  this.pendingMaturity = new Map();
3041
3970
 
3042
3971
  const id = sha256Base64Sync(this.log.id);
3043
- const storage = await this.node.storage.sublevel(id);
3972
+ const [storage, logScope] = await Promise.all([
3973
+ this.node.storage.sublevel(id),
3974
+ this.node.indexer.scope(id),
3975
+ ]);
3044
3976
 
3045
3977
  const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
3046
3978
  const fanoutService = getSharedLogFanoutService(this.node.services);
@@ -3078,6 +4010,18 @@ export class SharedLog<
3078
4010
  })) ?? []
3079
4011
  );
3080
4012
  },
4013
+ watchProviders: fanoutService
4014
+ ? (cid, opts) =>
4015
+ fanoutService.watchProviders(blockProviderNamespace(cid), {
4016
+ signal: opts.signal,
4017
+ want: 8,
4018
+ ttlMs: 10_000,
4019
+ renewIntervalMs: 5_000,
4020
+ bootstrapMaxPeers: 2,
4021
+ onProviders: (providers) =>
4022
+ opts.onProviders(providers.map((provider) => provider.hash)),
4023
+ })
4024
+ : undefined,
3081
4025
  onPut: async (cid) => {
3082
4026
  // Best-effort directory announce for "get without remote.from" workflows.
3083
4027
  try {
@@ -3091,20 +4035,19 @@ export class SharedLog<
3091
4035
  },
3092
4036
  });
3093
4037
 
3094
- await this.remoteBlocks.start();
3095
-
3096
- const logScope = await this.node.indexer.scope(id);
3097
- const replicationIndex = await logScope.scope("replication");
4038
+ const remoteBlocksStartPromise = this.remoteBlocks.start();
4039
+ const [replicationIndex, logIndex] = await Promise.all([
4040
+ logScope.scope("replication"),
4041
+ logScope.scope("log"),
4042
+ ]);
3098
4043
  this._replicationRangeIndex = await replicationIndex.init({
3099
4044
  schema: this.indexableDomain.constructorRange,
3100
4045
  });
3101
-
3102
4046
  this._entryCoordinatesIndex = await replicationIndex.init({
3103
4047
  schema: this.indexableDomain.constructorEntry,
3104
4048
  });
3105
4049
 
3106
- const logIndex = await logScope.scope("log");
3107
-
4050
+ await remoteBlocksStartPromise;
3108
4051
  const hasIndexedReplicationInfo =
3109
4052
  (await this.replicationIndex.count({
3110
4053
  query: [
@@ -3266,47 +4209,50 @@ export class SharedLog<
3266
4209
  }
3267
4210
 
3268
4211
  // Open for communcation
3269
- await this.rpc.open({
3270
- queryType: TransportMessage,
3271
- responseType: TransportMessage,
3272
- responseHandler: (query, context) => this.onMessage(query, context),
3273
- topic: this.topic,
3274
- });
3275
-
3276
4212
  this._onSubscriptionFn =
3277
4213
  this._onSubscriptionFn || this._onSubscription.bind(this);
3278
- await this.node.services.pubsub.addEventListener(
3279
- "subscribe",
3280
- this._onSubscriptionFn,
3281
- );
3282
-
3283
4214
  this._onUnsubscriptionFn =
3284
4215
  this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
3285
- await this.node.services.pubsub.addEventListener(
3286
- "unsubscribe",
3287
- this._onUnsubscriptionFn,
3288
- );
3289
-
3290
- await this.rpc.subscribe();
3291
- await this._openFanoutChannel(options?.fanout);
4216
+ await Promise.all([
4217
+ this.rpc.open({
4218
+ queryType: TransportMessage,
4219
+ responseType: TransportMessage,
4220
+ responseHandler: (query, context) => this.onMessage(query, context),
4221
+ topic: this.topic,
4222
+ }),
4223
+ this.node.services.pubsub.addEventListener(
4224
+ "subscribe",
4225
+ this._onSubscriptionFn,
4226
+ ),
4227
+ this.node.services.pubsub.addEventListener(
4228
+ "unsubscribe",
4229
+ this._onUnsubscriptionFn,
4230
+ ),
4231
+ ]);
3292
4232
 
3293
- // mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
3294
- await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
4233
+ const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
4234
+ // Mark previously-owned replication ranges as "new" only when they already exist.
4235
+ // Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
4236
+ const updateOwnedReplicationPromise = hasIndexedReplicationInfo
4237
+ ? this.updateTimestampOfOwnedReplicationRanges()
4238
+ : Promise.resolve();
4239
+ await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
3295
4240
 
3296
4241
  // if we had a previous session with replication info, and new replication info dictates that we unreplicate
3297
4242
  // we should do that. Otherwise if options is a unreplication we dont need to do anything because
3298
4243
  // we are already unreplicated (as we are just opening)
3299
4244
 
3300
- let isUnreplicationOptionsDefined = isUnreplicationOptions(
4245
+ const isUnreplicationOptionsDefined = isUnreplicationOptions(
3301
4246
  options?.replicate,
3302
4247
  );
3303
4248
 
3304
4249
  const canResumeReplication =
4250
+ hasIndexedReplicationInfo &&
3305
4251
  (await isReplicationOptionsDependentOnPreviousState(
3306
4252
  options?.replicate,
3307
4253
  this.replicationIndex,
3308
4254
  this.node.identity.publicKey,
3309
- )) && hasIndexedReplicationInfo;
4255
+ ));
3310
4256
 
3311
4257
  if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
3312
4258
  await this.replicate(options?.replicate, { checkDuplicates: true });
@@ -3359,25 +4305,26 @@ export class SharedLog<
3359
4305
 
3360
4306
  async afterOpen(): Promise<void> {
3361
4307
  await super.afterOpen();
4308
+ const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
3362
4309
 
3363
4310
  // We do this here, because these calls requires this.closed == false
3364
- void this.pruneOfflineReplicators()
3365
- .then(() => {
3366
- this._replicatorsReconciled = true;
3367
- })
4311
+ void this.pruneOfflineReplicators()
4312
+ .then(() => {
4313
+ this._replicatorsReconciled = true;
4314
+ })
3368
4315
  .catch((error) => {
3369
4316
  if (isNotStartedError(error as Error)) {
3370
4317
  return;
3371
4318
  }
3372
- logger.error(error);
3373
- });
4319
+ logger.error(error);
4320
+ });
3374
4321
 
3375
- this.startReplicatorLivenessSweep();
4322
+ this.startReplicatorLivenessSweep();
3376
4323
 
3377
- await this.rebalanceParticipation();
4324
+ await this.rebalanceParticipation();
3378
4325
 
3379
4326
  // Take into account existing subscription
3380
- (await this._getTopicSubscribers(this.topic))?.forEach((v) => {
4327
+ (await existingSubscribersPromise)?.forEach((v) => {
3381
4328
  if (v.equals(this.node.identity.publicKey)) {
3382
4329
  return;
3383
4330
  }
@@ -4008,8 +4955,28 @@ export class SharedLog<
4008
4955
  this._repairRetryTimers.clear();
4009
4956
  this._recentRepairDispatch.clear();
4010
4957
  this._repairSweepRunning = false;
4011
- this._repairSweepForceFreshPending = false;
4012
- this._repairSweepAddedPeersPending.clear();
4958
+ this._repairSweepPendingModes.clear();
4959
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
4960
+ peers.clear();
4961
+ }
4962
+ this._repairSweepOptimisticGidPeersPending.clear();
4963
+ this._entryKnownPeers.clear();
4964
+ for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
4965
+ clearTimeout(timer);
4966
+ }
4967
+ this._joinAuthoritativeRepairTimersByDelay.clear();
4968
+ this._joinAuthoritativeRepairPeersByDelay.clear();
4969
+ for (const targets of this._repairFrontierByMode.values()) {
4970
+ targets.clear();
4971
+ }
4972
+ for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
4973
+ targets.clear();
4974
+ }
4975
+ if (this._appendBackfillTimer) {
4976
+ clearTimeout(this._appendBackfillTimer);
4977
+ this._appendBackfillTimer = undefined;
4978
+ }
4979
+ this._appendBackfillPendingByTarget.clear();
4013
4980
 
4014
4981
  for (const [_k, v] of this._pendingDeletes) {
4015
4982
  v.clear();
@@ -4192,6 +5159,7 @@ export class SharedLog<
4192
5159
 
4193
5160
  if (heads) {
4194
5161
  const filteredHeads: EntryWithRefs<any>[] = [];
5162
+ const confirmedHashes = new Set<string>();
4195
5163
  for (const head of heads) {
4196
5164
  if (!(await this.log.has(head.entry.hash))) {
4197
5165
  head.entry.init({
@@ -4200,10 +5168,22 @@ export class SharedLog<
4200
5168
  encoding: this.log.encoding,
4201
5169
  });
4202
5170
  filteredHeads.push(head);
5171
+ } else {
5172
+ confirmedHashes.add(head.entry.hash);
4203
5173
  }
4204
5174
  }
5175
+ const fromIsSelf = context.from.equals(this.node.identity.publicKey);
5176
+ if (!fromIsSelf) {
5177
+ this.markEntriesKnownByPeer(
5178
+ heads.map((head) => head.entry.hash),
5179
+ context.from.hashcode(),
5180
+ );
5181
+ }
4205
5182
 
4206
5183
  if (filteredHeads.length === 0) {
5184
+ if (confirmedHashes.size > 0 && !fromIsSelf) {
5185
+ await this.sendRepairConfirmation(context.from!, confirmedHashes);
5186
+ }
4207
5187
  return;
4208
5188
  }
4209
5189
  const groupedByGid = await groupByGid(filteredHeads);
@@ -4337,7 +5317,15 @@ export class SharedLog<
4337
5317
  }
4338
5318
 
4339
5319
  if (toMerge.length > 0) {
5320
+ this.markEntriesKnownByPeer(
5321
+ toMerge.map((entry) => entry.hash),
5322
+ context.from!.hashcode(),
5323
+ );
4340
5324
  await this.log.join(toMerge);
5325
+ for (const merged of toMerge) {
5326
+ confirmedHashes.add(merged.hash);
5327
+ }
5328
+ await this.pruneJoinedEntriesNoLongerLed(toMerge);
4341
5329
 
4342
5330
  toDelete?.map((x) =>
4343
5331
  // TODO types
@@ -4384,6 +5372,10 @@ export class SharedLog<
4384
5372
  promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
4385
5373
  }
4386
5374
  await Promise.all(promises);
5375
+ if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
5376
+ this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
5377
+ await this.sendRepairConfirmation(context.from!, confirmedHashes);
5378
+ }
4387
5379
  }
4388
5380
  } else if (msg instanceof RequestIPrune) {
4389
5381
  const hasAndIsLeader: string[] = [];
@@ -4391,6 +5383,7 @@ export class SharedLog<
4391
5383
 
4392
5384
  for (const hash of msg.hashes) {
4393
5385
  this.removePruneRequestSent(hash, from);
5386
+ this.removeEntriesKnownByPeer([hash], from);
4394
5387
 
4395
5388
  // if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
4396
5389
  // this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
@@ -4505,6 +5498,10 @@ export class SharedLog<
4505
5498
  for (const hash of msg.hashes) {
4506
5499
  this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
4507
5500
  }
5501
+ } else if (msg instanceof ConfirmEntriesMessage) {
5502
+ this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
5503
+ this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
5504
+ return;
4508
5505
  } else if (await this.syncronizer.onMessage(msg, context)) {
4509
5506
  return; // the syncronizer has handled the message
4510
5507
  } else if (msg instanceof BlocksMessage) {
@@ -4876,6 +5873,23 @@ export class SharedLog<
4876
5873
  options?.replicate &&
4877
5874
  typeof options.replicate !== "boolean" &&
4878
5875
  options.replicate.assumeSynced;
5876
+ const seedAssumeSyncedPeerHistory = async (entry: Entry<T>) => {
5877
+ if (!assumeSynced) {
5878
+ return;
5879
+ }
5880
+
5881
+ const minReplicas = decodeReplicas(entry).getValue(this);
5882
+ const leaders = await this.findLeaders(
5883
+ await this.createCoordinates(entry, minReplicas),
5884
+ entry,
5885
+ {
5886
+ roleAge: 0,
5887
+ persist: false,
5888
+ },
5889
+ );
5890
+
5891
+ this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
5892
+ };
4879
5893
  const persistCoordinate = async (entry: Entry<T>) => {
4880
5894
  const minReplicas = decodeReplicas(entry).getValue(this);
4881
5895
  const leaders = await this.findLeaders(
@@ -4917,6 +5931,17 @@ export class SharedLog<
4917
5931
  if (options?.replicate) {
4918
5932
  let messageToSend: AddedReplicationSegmentMessage | undefined = undefined;
4919
5933
 
5934
+ if (assumeSynced) {
5935
+ // `assumeSynced` is an explicit contract that this join should trust the
5936
+ // supplied history and avoid initiating outbound repair while the local
5937
+ // replication ranges settle.
5938
+ this._assumeSyncedRepairSuppressedUntil =
5939
+ Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
5940
+ for (const entry of entriesToReplicate) {
5941
+ await seedAssumeSyncedPeerHistory(entry);
5942
+ }
5943
+ }
5944
+
4920
5945
  await this.replicate(entriesToReplicate, {
4921
5946
  rebalance: assumeSynced ? false : true,
4922
5947
  checkDuplicates: true,
@@ -4997,9 +6022,14 @@ export class SharedLog<
4997
6022
  clear();
4998
6023
  // `waitForReplicator()` is typically used as a precondition before join/replicate
4999
6024
  // flows. A replicator can become mature and enqueue a debounced rebalance
5000
- // (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
5001
- // observe a "late" rebalance after the wait resolves.
5002
- await this.replicationChangeDebounceFn?.flush?.();
6025
+ // (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
6026
+ // make membership waits depend on all rebalance work finishing; callers that
6027
+ // need settled distribution already wait for that explicitly.
6028
+ this.replicationChangeDebounceFn?.flush?.().catch((error: any) => {
6029
+ if (!isNotStartedError(error)) {
6030
+ logger.error(error?.toString?.() ?? String(error));
6031
+ }
6032
+ });
5003
6033
  deferred.resolve();
5004
6034
  };
5005
6035
 
@@ -5544,6 +6574,18 @@ export class SharedLog<
5544
6574
  }
5545
6575
  }
5546
6576
  }
6577
+
6578
+ if (!options?.candidates) {
6579
+ const fullReplicaLeaders = await this.findFullReplicaLeaders(
6580
+ cursors.length,
6581
+ roleAge,
6582
+ peerFilter,
6583
+ );
6584
+ if (fullReplicaLeaders) {
6585
+ return fullReplicaLeaders;
6586
+ }
6587
+ }
6588
+
5547
6589
  return getSamples<R>(
5548
6590
  cursors,
5549
6591
  this.replicationIndex,
@@ -5556,6 +6598,50 @@ export class SharedLog<
5556
6598
  );
5557
6599
  }
5558
6600
 
6601
+ private async findFullReplicaLeaders(
6602
+ replicas: number,
6603
+ roleAge: number,
6604
+ peerFilter?: Set<string>,
6605
+ ): Promise<Map<string, { intersecting: boolean }> | undefined> {
6606
+ const now = Date.now();
6607
+ const leaders = new Map<string, { intersecting: boolean }>();
6608
+ const includeStrict =
6609
+ this._logProperties?.strictFullReplicaFallback !== false;
6610
+ const iterator = this.replicationIndex.iterate(
6611
+ {},
6612
+ { shape: { hash: true, timestamp: true, mode: true } },
6613
+ );
6614
+
6615
+ try {
6616
+ for (;;) {
6617
+ const batch = await iterator.next(64);
6618
+ if (batch.length === 0) {
6619
+ break;
6620
+ }
6621
+ for (const result of batch) {
6622
+ const range = result.value;
6623
+ if (peerFilter && !peerFilter.has(range.hash)) {
6624
+ continue;
6625
+ }
6626
+ if (!isMatured(range, now, roleAge)) {
6627
+ continue;
6628
+ }
6629
+ if (range.mode === ReplicationIntent.Strict && !includeStrict) {
6630
+ continue;
6631
+ }
6632
+ leaders.set(range.hash, { intersecting: true });
6633
+ if (leaders.size > replicas) {
6634
+ return undefined;
6635
+ }
6636
+ }
6637
+ }
6638
+ } finally {
6639
+ await iterator.close();
6640
+ }
6641
+
6642
+ return leaders.size > 0 ? leaders : undefined;
6643
+ }
6644
+
5559
6645
  async findLeadersFromEntry(
5560
6646
  entry: ShallowOrFullEntry<any> | EntryReplicated<R>,
5561
6647
  replicas: number,
@@ -6195,16 +7281,33 @@ export class SharedLog<
6195
7281
 
6196
7282
  const changed = false;
6197
7283
  const addedPeers = new Set<string>();
7284
+ const authoritativeRepairPeers = new Set<string>();
6198
7285
  const warmupPeers = new Set<string>();
7286
+ const churnRepairPeers = new Set<string>();
6199
7287
  const hasSelfWarmupChange = changes.some(
6200
7288
  (change) =>
6201
7289
  change.range.hash === selfHash &&
6202
7290
  (change.type === "added" || change.type === "replaced"),
6203
7291
  );
7292
+ const hasSelfRangeRemoval = changes.some(
7293
+ (change) =>
7294
+ change.range.hash === selfHash &&
7295
+ (change.type === "removed" || change.type === "replaced"),
7296
+ );
6204
7297
  for (const change of changes) {
7298
+ if (
7299
+ change.range.hash !== selfHash &&
7300
+ (change.type === "removed" || change.type === "replaced")
7301
+ ) {
7302
+ this.removePeerFromEntryKnownPeers(change.range.hash);
7303
+ }
6205
7304
  if (change.type === "added" || change.type === "replaced") {
6206
7305
  const hash = change.range.hash;
6207
7306
  if (hash !== selfHash) {
7307
+ // Existing peers can widen/shift ranges after the initial join. If we
7308
+ // only rescan on first-seen "added", late authoritative range updates can
7309
+ // leave historical backfill permanently partial under load.
7310
+ authoritativeRepairPeers.add(hash);
6208
7311
  // Range updates can reassign entries to an existing peer shortly after it
6209
7312
  // already received a subset. Avoid suppressing legitimate follow-up repair.
6210
7313
  this._recentRepairDispatch.delete(hash);
@@ -6241,26 +7344,34 @@ export class SharedLog<
6241
7344
  string,
6242
7345
  Map<string, EntryReplicated<any>>
6243
7346
  > = new Map();
6244
- const flushUncheckedDeliverTarget = (target: string) => {
6245
- const entries = uncheckedDeliver.get(target);
6246
- if (!entries || entries.size === 0) {
6247
- return;
6248
- }
7347
+ const flushUncheckedDeliverTarget = (target: string) => {
7348
+ const entries = uncheckedDeliver.get(target);
7349
+ if (!entries || entries.size === 0) {
7350
+ return;
7351
+ }
6249
7352
  const isWarmupTarget = warmupPeers.has(target);
6250
- const bypassRecentDedupe = isWarmupTarget || forceFreshDelivery;
7353
+ const mode: RepairDispatchMode = forceFreshDelivery
7354
+ ? "churn"
7355
+ : isWarmupTarget
7356
+ ? "join-warmup"
7357
+ : "join-authoritative";
6251
7358
  this.dispatchMaybeMissingEntries(target, entries, {
6252
- bypassRecentDedupe,
6253
- retryScheduleMs: isWarmupTarget
6254
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
6255
- : undefined,
6256
- forceFreshDelivery,
7359
+ bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
7360
+ mode,
7361
+ retryScheduleMs:
7362
+ mode === "join-warmup"
7363
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
7364
+ : mode === "join-authoritative"
7365
+ ? [0]
7366
+ : undefined,
6257
7367
  });
6258
- uncheckedDeliver.delete(target);
6259
- };
7368
+ uncheckedDeliver.delete(target);
7369
+ };
6260
7370
  const queueUncheckedDeliver = (
6261
7371
  target: string,
6262
7372
  entry: EntryReplicated<any>,
6263
7373
  ) => {
7374
+ churnRepairPeers.add(target);
6264
7375
  let set = uncheckedDeliver.get(target);
6265
7376
  if (!set) {
6266
7377
  set = new Map();
@@ -6284,74 +7395,85 @@ export class SharedLog<
6284
7395
  forceFresh: forceFreshDelivery || useJoinWarmupFastPath,
6285
7396
  },
6286
7397
  )) {
6287
- if (this.closed) {
6288
- break;
6289
- }
6290
-
6291
- if (useJoinWarmupFastPath) {
6292
- let oldPeersSet: Set<string> | undefined;
6293
- const gid = entryReplicated.gid;
6294
- oldPeersSet = gidPeersHistorySnapshot.get(gid);
6295
- if (!gidPeersHistorySnapshot.has(gid)) {
6296
- const existing = this._gidPeersHistory.get(gid);
6297
- oldPeersSet = existing ? new Set(existing) : undefined;
6298
- gidPeersHistorySnapshot.set(gid, oldPeersSet);
7398
+ if (this.closed) {
7399
+ break;
6299
7400
  }
6300
7401
 
6301
- for (const target of warmupPeers) {
6302
- queueUncheckedDeliver(target, entryReplicated);
6303
- }
7402
+ if (useJoinWarmupFastPath) {
7403
+ let oldPeersSet: Set<string> | undefined;
7404
+ const gid = entryReplicated.gid;
7405
+ oldPeersSet = gidPeersHistorySnapshot.get(gid);
7406
+ if (!gidPeersHistorySnapshot.has(gid)) {
7407
+ const existing = this._gidPeersHistory.get(gid);
7408
+ oldPeersSet = existing ? new Set(existing) : undefined;
7409
+ gidPeersHistorySnapshot.set(gid, oldPeersSet);
7410
+ }
6304
7411
 
6305
- const candidatePeers = new Set<string>([selfHash]);
6306
- for (const target of warmupPeers) {
6307
- candidatePeers.add(target);
6308
- }
6309
- if (oldPeersSet) {
6310
- for (const oldPeer of oldPeersSet) {
6311
- candidatePeers.add(oldPeer);
7412
+ for (const target of warmupPeers) {
7413
+ queueUncheckedDeliver(target, entryReplicated);
6312
7414
  }
6313
- }
6314
7415
 
6315
- const currentPeers = await this.findLeaders(
6316
- entryReplicated.coordinates,
6317
- entryReplicated,
6318
- {
6319
- roleAge: 0,
6320
- candidates: candidatePeers,
6321
- persist: false,
6322
- },
6323
- );
7416
+ const candidatePeers = new Set<string>([selfHash]);
7417
+ for (const target of warmupPeers) {
7418
+ candidatePeers.add(target);
7419
+ }
7420
+ if (oldPeersSet) {
7421
+ for (const oldPeer of oldPeersSet) {
7422
+ candidatePeers.add(oldPeer);
7423
+ }
7424
+ }
6324
7425
 
6325
- if (oldPeersSet) {
6326
- for (const oldPeer of oldPeersSet) {
6327
- if (!currentPeers.has(oldPeer)) {
6328
- this.removePruneRequestSent(entryReplicated.hash);
7426
+ const currentPeers = await this.findLeaders(
7427
+ entryReplicated.coordinates,
7428
+ entryReplicated,
7429
+ {
7430
+ roleAge: 0,
7431
+ candidates: candidatePeers,
7432
+ persist: false,
7433
+ },
7434
+ );
7435
+
7436
+ if (oldPeersSet) {
7437
+ for (const oldPeer of oldPeersSet) {
7438
+ if (!currentPeers.has(oldPeer)) {
7439
+ this.removePruneRequestSent(entryReplicated.hash);
7440
+ }
6329
7441
  }
6330
7442
  }
6331
- }
6332
7443
 
6333
- this.addPeersToGidPeerHistory(
6334
- entryReplicated.gid,
6335
- currentPeers.keys(),
6336
- true,
6337
- );
7444
+ for (const [peer] of currentPeers) {
7445
+ if (warmupPeers.has(peer)) {
7446
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
7447
+ }
7448
+ }
6338
7449
 
6339
- if (!currentPeers.has(selfHash)) {
6340
- this.pruneDebouncedFnAddIfNotKeeping({
6341
- key: entryReplicated.hash,
6342
- value: { entry: entryReplicated, leaders: currentPeers },
6343
- });
7450
+ const authoritativePeers = [...currentPeers.keys()].filter(
7451
+ (peer) =>
7452
+ !warmupPeers.has(peer) &&
7453
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
7454
+ );
7455
+ this.addPeersToGidPeerHistory(
7456
+ entryReplicated.gid,
7457
+ authoritativePeers,
7458
+ true,
7459
+ );
6344
7460
 
6345
- this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
6346
- } else {
6347
- this.pruneDebouncedFn.delete(entryReplicated.hash);
6348
- await this._pendingDeletes
6349
- .get(entryReplicated.hash)
6350
- ?.reject(new Error("Failed to delete, is leader again"));
6351
- this.removePruneRequestSent(entryReplicated.hash);
7461
+ if (!currentPeers.has(selfHash)) {
7462
+ this.pruneDebouncedFnAddIfNotKeeping({
7463
+ key: entryReplicated.hash,
7464
+ value: { entry: entryReplicated, leaders: currentPeers },
7465
+ });
7466
+
7467
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
7468
+ } else {
7469
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
7470
+ await this._pendingDeletes
7471
+ .get(entryReplicated.hash)
7472
+ ?.reject(new Error("Failed to delete, is leader again"));
7473
+ this.removePruneRequestSent(entryReplicated.hash);
7474
+ }
7475
+ continue;
6352
7476
  }
6353
- continue;
6354
- }
6355
7477
 
6356
7478
  let oldPeersSet: Set<string> | undefined;
6357
7479
  const gid = entryReplicated.gid;
@@ -6385,19 +7507,30 @@ export class SharedLog<
6385
7507
  }
6386
7508
  }
6387
7509
 
6388
- if (oldPeersSet) {
6389
- for (const oldPeer of oldPeersSet) {
6390
- if (!currentPeers.has(oldPeer)) {
6391
- this.removePruneRequestSent(entryReplicated.hash);
7510
+ if (oldPeersSet) {
7511
+ for (const oldPeer of oldPeersSet) {
7512
+ if (!currentPeers.has(oldPeer)) {
7513
+ this.removePruneRequestSent(entryReplicated.hash);
7514
+ }
6392
7515
  }
6393
7516
  }
6394
- }
6395
7517
 
6396
- this.addPeersToGidPeerHistory(
6397
- entryReplicated.gid,
6398
- currentPeers.keys(),
6399
- true,
6400
- );
7518
+ for (const [peer] of currentPeers) {
7519
+ if (addedPeers.has(peer)) {
7520
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
7521
+ }
7522
+ }
7523
+
7524
+ const authoritativePeers = [...currentPeers.keys()].filter(
7525
+ (peer) =>
7526
+ !addedPeers.has(peer) &&
7527
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer),
7528
+ );
7529
+ this.addPeersToGidPeerHistory(
7530
+ entryReplicated.gid,
7531
+ authoritativePeers,
7532
+ true,
7533
+ );
6401
7534
 
6402
7535
  if (!isLeader) {
6403
7536
  this.pruneDebouncedFnAddIfNotKeeping({
@@ -6416,9 +7549,18 @@ export class SharedLog<
6416
7549
  }
6417
7550
  }
6418
7551
 
7552
+ if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
7553
+ await this.pruneIndexedEntriesNoLongerLed();
7554
+ }
7555
+
6419
7556
  if (forceFreshDelivery) {
6420
- // Removed/shrunk ranges still need the authoritative background pass.
6421
- this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
7557
+ // Pure leave/shrink churn can have zero `addedPeers`, but the peers that
7558
+ // received redistributed entries still need a follow-up repair pass if the
7559
+ // immediate maybe-sync misses one entry.
7560
+ this.scheduleRepairSweep({
7561
+ mode: "churn",
7562
+ peers: churnRepairPeers,
7563
+ });
6422
7564
  } else if (useJoinWarmupFastPath) {
6423
7565
  // Pure join warmup uses the cheap immediate maybe-missing dispatch above,
6424
7566
  // then defers the authoritative sweep so it does not compete with the
@@ -6430,19 +7572,23 @@ export class SharedLog<
6430
7572
  return;
6431
7573
  }
6432
7574
  this.scheduleRepairSweep({
6433
- forceFreshDelivery: false,
6434
- addedPeers: peers,
7575
+ mode: "join-warmup",
7576
+ peers,
6435
7577
  });
6436
7578
  }, 250);
6437
7579
  timer.unref?.();
6438
7580
  this._repairRetryTimers.add(timer);
6439
- } else if (addedPeers.size > 0) {
7581
+ } else if (authoritativeRepairPeers.size > 0) {
6440
7582
  this.scheduleRepairSweep({
6441
- forceFreshDelivery: false,
6442
- addedPeers,
7583
+ mode: "join-authoritative",
7584
+ peers: authoritativeRepairPeers,
6443
7585
  });
6444
7586
  }
6445
7587
 
7588
+ if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
7589
+ this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
7590
+ }
7591
+
6446
7592
  for (const target of [...uncheckedDeliver.keys()]) {
6447
7593
  flushUncheckedDeliverTarget(target);
6448
7594
  }
@@ -6549,6 +7695,13 @@ export class SharedLog<
6549
7695
  return; // not allowed to replicate
6550
7696
  }
6551
7697
 
7698
+ if (
7699
+ this.replicationController.maxMemoryLimit != null &&
7700
+ usedMemory > this.replicationController.maxMemoryLimit
7701
+ ) {
7702
+ await this.pruneIndexedEntriesNoLongerLed();
7703
+ }
7704
+
6552
7705
  const peersSize = (await peers.getSize()) || 1;
6553
7706
  const totalParticipation = await this.calculateTotalParticipation();
6554
7707