@peerbit/shared-log 12.3.5 → 13.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/dist/benchmark/sync-batch-sweep.d.ts +2 -0
  2. package/dist/benchmark/sync-batch-sweep.d.ts.map +1 -0
  3. package/dist/benchmark/sync-batch-sweep.js +305 -0
  4. package/dist/benchmark/sync-batch-sweep.js.map +1 -0
  5. package/dist/src/fanout-envelope.d.ts +18 -0
  6. package/dist/src/fanout-envelope.d.ts.map +1 -0
  7. package/dist/src/fanout-envelope.js +85 -0
  8. package/dist/src/fanout-envelope.js.map +1 -0
  9. package/dist/src/index.d.ts +55 -6
  10. package/dist/src/index.d.ts.map +1 -1
  11. package/dist/src/index.js +1595 -339
  12. package/dist/src/index.js.map +1 -1
  13. package/dist/src/pid.d.ts.map +1 -1
  14. package/dist/src/pid.js +21 -5
  15. package/dist/src/pid.js.map +1 -1
  16. package/dist/src/ranges.d.ts +3 -1
  17. package/dist/src/ranges.d.ts.map +1 -1
  18. package/dist/src/ranges.js +14 -5
  19. package/dist/src/ranges.js.map +1 -1
  20. package/dist/src/sync/index.d.ts +45 -1
  21. package/dist/src/sync/index.d.ts.map +1 -1
  22. package/dist/src/sync/rateless-iblt.d.ts +13 -2
  23. package/dist/src/sync/rateless-iblt.d.ts.map +1 -1
  24. package/dist/src/sync/rateless-iblt.js +194 -3
  25. package/dist/src/sync/rateless-iblt.js.map +1 -1
  26. package/dist/src/sync/simple.d.ts +24 -3
  27. package/dist/src/sync/simple.d.ts.map +1 -1
  28. package/dist/src/sync/simple.js +330 -32
  29. package/dist/src/sync/simple.js.map +1 -1
  30. package/package.json +16 -16
  31. package/src/fanout-envelope.ts +27 -0
  32. package/src/index.ts +2162 -691
  33. package/src/pid.ts +22 -4
  34. package/src/ranges.ts +14 -4
  35. package/src/sync/index.ts +53 -1
  36. package/src/sync/rateless-iblt.ts +237 -4
  37. package/src/sync/simple.ts +427 -41
package/src/index.ts CHANGED
@@ -1,10 +1,11 @@
1
- import { BorshError, field, variant } from "@dao-xyz/borsh";
1
+ import { BorshError, deserialize, field, serialize, variant } from "@dao-xyz/borsh";
2
2
  import { AnyBlockStore, RemoteBlocks } from "@peerbit/blocks";
3
3
  import { cidifyString } from "@peerbit/blocks-interface";
4
4
  import { Cache } from "@peerbit/cache";
5
5
  import {
6
6
  AccessError,
7
7
  PublicSignKey,
8
+ getPublicKeyFromPeerId,
8
9
  sha256Base64Sync,
9
10
  sha256Sync,
10
11
  } from "@peerbit/crypto";
@@ -31,7 +32,16 @@ import {
31
32
  } from "@peerbit/log";
32
33
  import { logger as loggerFn } from "@peerbit/logger";
33
34
  import { ClosedError, Program, type ProgramEvents } from "@peerbit/program";
34
- import { waitForSubscribers } from "@peerbit/pubsub";
35
+ import {
36
+ FanoutChannel,
37
+ type FanoutProviderHandle,
38
+ type FanoutTree,
39
+ type FanoutTreeChannelOptions,
40
+ type FanoutTreeDataEvent,
41
+ type FanoutTreeUnicastEvent,
42
+ type FanoutTreeJoinOptions,
43
+ waitForSubscribers,
44
+ } from "@peerbit/pubsub";
35
45
  import {
36
46
  SubscriptionEvent,
37
47
  UnsubcriptionEvent,
@@ -40,10 +50,11 @@ import { RPC, type RequestContext } from "@peerbit/rpc";
40
50
  import {
41
51
  AcknowledgeDelivery,
42
52
  AnyWhere,
53
+ DataMessage,
54
+ MessageHeader,
43
55
  NotStartedError,
44
- SeekDelivery,
56
+ type RouteHint,
45
57
  SilentDelivery,
46
- type WithMode,
47
58
  } from "@peerbit/stream-interface";
48
59
  import {
49
60
  AbortError,
@@ -69,6 +80,7 @@ import {
69
80
  ResponseIPrune,
70
81
  createExchangeHeadsMessages,
71
82
  } from "./exchange-heads.js";
83
+ import { FanoutEnvelope } from "./fanout-envelope.js";
72
84
  import {
73
85
  MAX_U32,
74
86
  MAX_U64,
@@ -189,6 +201,36 @@ const getLatestEntry = (
189
201
  return latest;
190
202
  };
191
203
 
204
+ const hashToSeed32 = (str: string) => {
205
+ // FNV-1a 32-bit, fast and deterministic.
206
+ let hash = 0x811c9dc5;
207
+ for (let i = 0; i < str.length; i++) {
208
+ hash ^= str.charCodeAt(i);
209
+ hash = Math.imul(hash, 0x01000193);
210
+ }
211
+ return hash >>> 0;
212
+ };
213
+
214
+ const pickDeterministicSubset = (peers: string[], seed: number, max: number) => {
215
+ if (peers.length <= max) return peers;
216
+
217
+ const subset: string[] = [];
218
+ const used = new Set<string>();
219
+ let x = seed || 1;
220
+ while (subset.length < max) {
221
+ // xorshift32
222
+ x ^= x << 13;
223
+ x ^= x >>> 17;
224
+ x ^= x << 5;
225
+ const peer = peers[(x >>> 0) % peers.length];
226
+ if (!used.has(peer)) {
227
+ used.add(peer);
228
+ subset.push(peer);
229
+ }
230
+ }
231
+ return subset;
232
+ };
233
+
192
234
  export type ReplicationLimitsOptions =
193
235
  | Partial<ReplicationLimits>
194
236
  | { min?: number; max?: number };
@@ -373,6 +415,7 @@ export type SharedLogOptions<
373
415
  compatibility?: number;
374
416
  domain?: ReplicationDomainConstructor<D>;
375
417
  eagerBlocks?: boolean | { cacheSize?: number };
418
+ fanout?: SharedLogFanoutOptions;
376
419
  };
377
420
 
378
421
  export const DEFAULT_MIN_REPLICAS = 2;
@@ -385,6 +428,10 @@ export const WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS = 3;
385
428
  // Prefer making pruning robust without timing-based heuristics.
386
429
  export const WAIT_FOR_PRUNE_DELAY = 0;
387
430
  const PRUNE_DEBOUNCE_INTERVAL = 500;
431
+ const CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS = 250;
432
+ const CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS = 5_000;
433
+ const CHECKED_PRUNE_RETRY_MAX_ATTEMPTS = 3;
434
+ const CHECKED_PRUNE_RETRY_MAX_DELAY_MS = 30_000;
388
435
 
389
436
  // DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
390
437
  const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
@@ -394,6 +441,36 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
394
441
  const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
395
442
 
396
443
  const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
444
+ const RECENT_REPAIR_DISPATCH_TTL_MS = 5_000;
445
+ const REPAIR_SWEEP_ENTRY_BATCH_SIZE = 1_000;
446
+ const REPAIR_SWEEP_TARGET_BUFFER_SIZE = 1024;
447
+ const FORCE_FRESH_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
448
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000];
449
+
450
+ const toPositiveInteger = (
451
+ value: number | undefined,
452
+ fallback: number,
453
+ label: string,
454
+ ) => {
455
+ if (value == null) {
456
+ return fallback;
457
+ }
458
+ if (!Number.isFinite(value) || value <= 0) {
459
+ throw new Error(`${label} must be a positive number`);
460
+ }
461
+ return Math.max(1, Math.floor(value));
462
+ };
463
+
464
+ const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS: Omit<
465
+ FanoutTreeChannelOptions,
466
+ "role"
467
+ > = {
468
+ msgRate: 30,
469
+ msgSize: 1024,
470
+ uploadLimitBps: 5_000_000,
471
+ maxChildren: 24,
472
+ repair: true,
473
+ };
397
474
 
398
475
  const getIdForDynamicRange = (publicKey: PublicSignKey) => {
399
476
  return sha256Sync(
@@ -417,20 +494,39 @@ export type Args<
417
494
  : "u32",
418
495
  > = LogProperties<T> & LogEvents<T> & SharedLogOptions<T, D, R>;
419
496
 
497
+ export type DeliveryReliability = "ack" | "best-effort";
498
+
420
499
  export type DeliveryOptions = {
421
- settle?: true | { min: number };
500
+ reliability?: DeliveryReliability;
501
+ minAcks?: number;
422
502
  requireRecipients?: boolean;
423
503
  timeout?: number;
424
504
  signal?: AbortSignal;
425
505
  };
426
506
 
427
- export type SharedAppendOptions<T> = AppendOptions<T> & {
507
+ export type SharedLogFanoutOptions = {
508
+ root?: string;
509
+ channel?: Partial<Omit<FanoutTreeChannelOptions, "role">>;
510
+ join?: FanoutTreeJoinOptions;
511
+ };
512
+
513
+ type SharedAppendBaseOptions<T> = AppendOptions<T> & {
428
514
  replicas?: AbsoluteReplicas | number;
429
515
  replicate?: boolean;
430
- target?: "all" | "replicators" | "none";
431
- delivery?: false | true | DeliveryOptions;
432
516
  };
433
517
 
518
+ export type SharedAppendOptions<T> =
519
+ | (SharedAppendBaseOptions<T> & {
520
+ target?: "replicators" | "none";
521
+ delivery?: false | true | DeliveryOptions;
522
+ })
523
+ | (SharedAppendBaseOptions<T> & {
524
+ // target=all uses the fanout data plane and intentionally does not expose
525
+ // per-recipient settle semantics from RPC delivery options.
526
+ target: "all";
527
+ delivery?: false | undefined;
528
+ });
529
+
434
530
  export type ReplicatorJoinEvent = { publicKey: PublicSignKey };
435
531
  export type ReplicatorLeaveEvent = { publicKey: PublicSignKey };
436
532
  export type ReplicationChangeEvent = { publicKey: PublicSignKey };
@@ -463,11 +559,12 @@ export class SharedLog<
463
559
 
464
560
  private _replicationRangeIndex!: Index<ReplicationRangeIndexable<R>>;
465
561
  private _entryCoordinatesIndex!: Index<EntryReplicated<R>>;
466
- private coordinateToHash!: Cache<string>;
467
- private recentlyRebalanced!: Cache<string>;
562
+ private coordinateToHash!: Cache<string>;
563
+ private recentlyRebalanced!: Cache<string>;
468
564
 
469
- uniqueReplicators!: Set<string>;
470
- private _replicatorsReconciled!: boolean;
565
+ uniqueReplicators!: Set<string>;
566
+ private _replicatorJoinEmitted!: Set<string>;
567
+ private _replicatorsReconciled!: boolean;
471
568
 
472
569
  /* private _totalParticipation!: number; */
473
570
 
@@ -476,6 +573,10 @@ export class SharedLog<
476
573
 
477
574
  private _onSubscriptionFn!: (arg: any) => any;
478
575
  private _onUnsubscriptionFn!: (arg: any) => any;
576
+ private _onFanoutDataFn?: (arg: any) => void;
577
+ private _onFanoutUnicastFn?: (arg: any) => void;
578
+ private _fanoutChannel?: FanoutChannel;
579
+ private _providerHandle?: FanoutProviderHandle;
479
580
 
480
581
  private _isTrustedReplicator?: (
481
582
  publicKey: PublicSignKey,
@@ -519,6 +620,15 @@ export class SharedLog<
519
620
  >; // map of peerId to timeout
520
621
 
521
622
  private latestReplicationInfoMessage!: Map<string, bigint>;
623
+ // Peers that have unsubscribed from this log's topic. We ignore replication-info
624
+ // messages from them until we see a new subscription, to avoid re-introducing
625
+ // stale membership state during close/unsubscribe races.
626
+ private _replicationInfoBlockedPeers!: Set<string>;
627
+ private _replicationInfoRequestByPeer!: Map<
628
+ string,
629
+ { attempts: number; timer?: ReturnType<typeof setTimeout> }
630
+ >;
631
+ private _replicationInfoApplyQueueByPeer!: Map<string, Promise<void>>;
522
632
 
523
633
  private remoteBlocks!: RemoteBlocks;
524
634
 
@@ -552,10 +662,19 @@ export class SharedLog<
552
662
 
553
663
  private _requestIPruneSent!: Map<string, Set<string>>; // tracks entry hash to peer hash for requesting I prune messages
554
664
  private _requestIPruneResponseReplicatorSet!: Map<string, Set<string>>; // tracks entry hash to peer hash
665
+ private _checkedPruneRetries!: Map<
666
+ string,
667
+ { attempts: number; timer?: ReturnType<typeof setTimeout> }
668
+ >;
555
669
 
556
670
  private replicationChangeDebounceFn!: ReturnType<
557
671
  typeof debounceAggregationChanges<ReplicationRangeIndexable<R>>
558
672
  >;
673
+ private _repairRetryTimers!: Set<ReturnType<typeof setTimeout>>;
674
+ private _recentRepairDispatch!: Map<string, Map<string, number>>;
675
+ private _repairSweepRunning!: boolean;
676
+ private _repairSweepForceFreshPending!: boolean;
677
+ private _repairSweepAddedPeersPending!: Set<string>;
559
678
 
560
679
  // regular distribution checks
561
680
  private distributeQueue?: PQueue;
@@ -572,6 +691,7 @@ export class SharedLog<
572
691
  waitForReplicatorRequestMaxAttempts?: number;
573
692
  waitForPruneDelay!: number;
574
693
  distributionDebounceTime!: number;
694
+ repairSweepTargetBufferSize!: number;
575
695
 
576
696
  replicationController!: PIDReplicationController;
577
697
  history!: { usedMemory: number; factor: number }[];
@@ -597,6 +717,676 @@ export class SharedLog<
597
717
  return (this.compatibility ?? Number.MAX_VALUE) < 9;
598
718
  }
599
719
 
720
+ private getFanoutChannelOptions(
721
+ options?: SharedLogFanoutOptions,
722
+ ): Omit<FanoutTreeChannelOptions, "role"> {
723
+ return {
724
+ ...DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS,
725
+ ...(options?.channel ?? {}),
726
+ };
727
+ }
728
+
729
+ private async _openFanoutChannel(options?: SharedLogFanoutOptions) {
730
+ this._closeFanoutChannel();
731
+ if (!options) {
732
+ return;
733
+ }
734
+
735
+ const fanoutService = (this.node.services as any).fanout;
736
+ if (!fanoutService) {
737
+ throw new Error(
738
+ `Fanout is configured for shared-log topic ${this.topic}, but no fanout service is available on this client`,
739
+ );
740
+ }
741
+
742
+ const resolvedRoot =
743
+ options.root ??
744
+ (await (fanoutService as any)?.topicRootControlPlane?.resolveTopicRoot?.(
745
+ this.topic,
746
+ ));
747
+ if (!resolvedRoot) {
748
+ throw new Error(
749
+ `Fanout is configured for shared-log topic ${this.topic}, but no fanout root was provided and none could be resolved`,
750
+ );
751
+ }
752
+
753
+ const channel = new FanoutChannel(fanoutService, {
754
+ topic: this.topic,
755
+ root: resolvedRoot,
756
+ });
757
+ this._fanoutChannel = channel;
758
+
759
+ this._onFanoutDataFn =
760
+ this._onFanoutDataFn ||
761
+ ((evt: any) => {
762
+ const detail = (evt as CustomEvent<FanoutTreeDataEvent>)?.detail;
763
+ if (!detail) {
764
+ return;
765
+ }
766
+ void this._onFanoutData(detail).catch((error) => logger.error(error));
767
+ });
768
+ channel.addEventListener("data", this._onFanoutDataFn);
769
+
770
+ this._onFanoutUnicastFn =
771
+ this._onFanoutUnicastFn ||
772
+ ((evt: any) => {
773
+ const detail = (evt as CustomEvent<FanoutTreeUnicastEvent>)?.detail;
774
+ if (!detail) {
775
+ return;
776
+ }
777
+ void this._onFanoutUnicast(detail).catch((error) => logger.error(error));
778
+ });
779
+ channel.addEventListener("unicast", this._onFanoutUnicastFn);
780
+
781
+ try {
782
+ const channelOptions = this.getFanoutChannelOptions(options);
783
+ if (resolvedRoot === fanoutService.publicKeyHash) {
784
+ await channel.openAsRoot(channelOptions);
785
+ return;
786
+ }
787
+ await channel.join(channelOptions, options.join);
788
+ } catch (error) {
789
+ this._closeFanoutChannel();
790
+ throw error;
791
+ }
792
+ }
793
+
794
+ private _closeFanoutChannel() {
795
+ if (this._fanoutChannel) {
796
+ if (this._onFanoutDataFn) {
797
+ this._fanoutChannel.removeEventListener("data", this._onFanoutDataFn);
798
+ }
799
+ if (this._onFanoutUnicastFn) {
800
+ this._fanoutChannel.removeEventListener(
801
+ "unicast",
802
+ this._onFanoutUnicastFn,
803
+ );
804
+ }
805
+ this._fanoutChannel.close();
806
+ }
807
+ this._fanoutChannel = undefined;
808
+ }
809
+
810
+ private async _onFanoutData(detail: FanoutTreeDataEvent) {
811
+ let envelope: FanoutEnvelope;
812
+ try {
813
+ envelope = deserialize(detail.payload, FanoutEnvelope);
814
+ } catch (error) {
815
+ if (error instanceof BorshError) {
816
+ return;
817
+ }
818
+ throw error;
819
+ }
820
+
821
+ let message: TransportMessage;
822
+ try {
823
+ message = deserialize(envelope.payload, TransportMessage);
824
+ } catch (error) {
825
+ if (error instanceof BorshError) {
826
+ return;
827
+ }
828
+ throw error;
829
+ }
830
+
831
+ if (!(message instanceof ExchangeHeadsMessage)) {
832
+ return;
833
+ }
834
+
835
+ const from =
836
+ (await this._resolvePublicKeyFromHash(envelope.from)) ??
837
+ ({ hashcode: () => envelope.from } as PublicSignKey);
838
+
839
+ const contextMessage = new DataMessage({
840
+ header: new MessageHeader({
841
+ session: 0,
842
+ mode: new AnyWhere(),
843
+ priority: 0,
844
+ }),
845
+ });
846
+ contextMessage.header.timestamp = envelope.timestamp;
847
+
848
+ await this.onMessage(message, {
849
+ from,
850
+ message: contextMessage,
851
+ });
852
+ }
853
+
854
+ private async _onFanoutUnicast(detail: FanoutTreeUnicastEvent) {
855
+ let message: TransportMessage;
856
+ try {
857
+ message = deserialize(detail.payload, TransportMessage);
858
+ } catch (error) {
859
+ if (error instanceof BorshError) {
860
+ return;
861
+ }
862
+ throw error;
863
+ }
864
+
865
+ const fromHash = detail.origin || detail.from;
866
+ const from =
867
+ (await this._resolvePublicKeyFromHash(fromHash)) ??
868
+ ({ hashcode: () => fromHash } as PublicSignKey);
869
+
870
+ const contextMessage = new DataMessage({
871
+ header: new MessageHeader({
872
+ session: 0,
873
+ mode: new AnyWhere(),
874
+ priority: 0,
875
+ }),
876
+ });
877
+ contextMessage.header.timestamp = detail.timestamp;
878
+
879
+ await this.onMessage(message, {
880
+ from,
881
+ message: contextMessage,
882
+ });
883
+ }
884
+
885
+ private async _publishExchangeHeadsViaFanout(
886
+ message: ExchangeHeadsMessage<any>,
887
+ ): Promise<void> {
888
+ if (!this._fanoutChannel) {
889
+ throw new Error(
890
+ `No fanout channel configured for shared-log topic ${this.topic}`,
891
+ );
892
+ }
893
+ const envelope = new FanoutEnvelope({
894
+ from: this.node.identity.publicKey.hashcode(),
895
+ timestamp: BigInt(Date.now()),
896
+ payload: serialize(message),
897
+ });
898
+ await this._fanoutChannel.publish(serialize(envelope));
899
+ }
900
+
901
+ private _parseDeliveryOptions(
902
+ deliveryArg: false | true | DeliveryOptions | undefined,
903
+ ): {
904
+ delivery?: DeliveryOptions;
905
+ reliability: DeliveryReliability;
906
+ requireRecipients: boolean;
907
+ minAcks?: number;
908
+ wrap?: (promise: Promise<void>) => Promise<void>;
909
+ } {
910
+ const delivery: DeliveryOptions | undefined =
911
+ deliveryArg === undefined || deliveryArg === false
912
+ ? undefined
913
+ : deliveryArg === true
914
+ ? { reliability: "ack" }
915
+ : deliveryArg;
916
+ if (!delivery) {
917
+ return {
918
+ delivery: undefined,
919
+ reliability: "best-effort",
920
+ requireRecipients: false,
921
+ minAcks: undefined,
922
+ wrap: undefined,
923
+ };
924
+ }
925
+
926
+ const reliability: DeliveryReliability = delivery.reliability ?? "ack";
927
+ const deliveryTimeout = delivery.timeout;
928
+ const deliverySignal = delivery.signal;
929
+ const requireRecipients = delivery.requireRecipients === true;
930
+ const minAcks =
931
+ delivery.minAcks != null && Number.isFinite(delivery.minAcks)
932
+ ? Math.max(0, Math.floor(delivery.minAcks))
933
+ : undefined;
934
+
935
+ const wrap =
936
+ deliveryTimeout == null && deliverySignal == null
937
+ ? undefined
938
+ : (promise: Promise<void>) =>
939
+ new Promise<void>((resolve, reject) => {
940
+ let settled = false;
941
+ let timer: ReturnType<typeof setTimeout> | undefined = undefined;
942
+ const onAbort = () => {
943
+ if (settled) {
944
+ return;
945
+ }
946
+ settled = true;
947
+ promise.catch(() => {});
948
+ cleanup();
949
+ reject(new AbortError());
950
+ };
951
+
952
+ const cleanup = () => {
953
+ if (timer != null) {
954
+ clearTimeout(timer);
955
+ timer = undefined;
956
+ }
957
+ deliverySignal?.removeEventListener("abort", onAbort);
958
+ };
959
+
960
+ if (deliverySignal) {
961
+ if (deliverySignal.aborted) {
962
+ onAbort();
963
+ return;
964
+ }
965
+ deliverySignal.addEventListener("abort", onAbort);
966
+ }
967
+
968
+ if (deliveryTimeout != null) {
969
+ timer = setTimeout(() => {
970
+ if (settled) {
971
+ return;
972
+ }
973
+ settled = true;
974
+ promise.catch(() => {});
975
+ cleanup();
976
+ reject(new TimeoutError(`Timeout waiting for delivery`));
977
+ }, deliveryTimeout);
978
+ }
979
+
980
+ promise
981
+ .then(() => {
982
+ if (settled) {
983
+ return;
984
+ }
985
+ settled = true;
986
+ cleanup();
987
+ resolve();
988
+ })
989
+ .catch((error) => {
990
+ if (settled) {
991
+ return;
992
+ }
993
+ settled = true;
994
+ cleanup();
995
+ reject(error);
996
+ });
997
+ });
998
+
999
+ return {
1000
+ delivery,
1001
+ reliability,
1002
+ requireRecipients,
1003
+ minAcks,
1004
+ wrap,
1005
+ };
1006
+ }
1007
+
1008
+ private async _getSortedRouteHints(
1009
+ targetHash: string,
1010
+ ): Promise<RouteHint[]> {
1011
+ const pubsub: any = this.node.services.pubsub as any;
1012
+ const maybeHints = await pubsub?.getUnifiedRouteHints?.(this.topic, targetHash);
1013
+ const hints: RouteHint[] = Array.isArray(maybeHints) ? maybeHints : [];
1014
+ const now = Date.now();
1015
+ return hints
1016
+ .filter((hint) => hint.expiresAt == null || hint.expiresAt > now)
1017
+ .sort((a, b) => {
1018
+ const rankA = a.kind === "directstream-ack" ? 0 : 1;
1019
+ const rankB = b.kind === "directstream-ack" ? 0 : 1;
1020
+ if (rankA !== rankB) {
1021
+ return rankA - rankB;
1022
+ }
1023
+
1024
+ const costA =
1025
+ a.kind === "directstream-ack"
1026
+ ? a.distance
1027
+ : Math.max(0, (a.route?.length ?? 1) - 1);
1028
+ const costB =
1029
+ b.kind === "directstream-ack"
1030
+ ? b.distance
1031
+ : Math.max(0, (b.route?.length ?? 1) - 1);
1032
+ if (costA !== costB) {
1033
+ return costA - costB;
1034
+ }
1035
+
1036
+ return (b.updatedAt ?? 0) - (a.updatedAt ?? 0);
1037
+ });
1038
+ }
1039
+
1040
+ private async _sendAckWithUnifiedHints(properties: {
1041
+ peer: string;
1042
+ message: ExchangeHeadsMessage<any>;
1043
+ payload: Uint8Array;
1044
+ fanoutUnicastOptions?: { timeoutMs?: number; signal?: AbortSignal };
1045
+ }): Promise<void> {
1046
+ const { peer, message, payload, fanoutUnicastOptions } = properties;
1047
+ const hints = await this._getSortedRouteHints(peer);
1048
+ const hasDirectHint = hints.some((hint) => hint.kind === "directstream-ack");
1049
+ const fanoutHint = hints.find(
1050
+ (hint): hint is Extract<RouteHint, { kind: "fanout-token" }> =>
1051
+ hint.kind === "fanout-token",
1052
+ );
1053
+
1054
+ if (hasDirectHint) {
1055
+ try {
1056
+ await this.rpc.send(message, {
1057
+ mode: new AcknowledgeDelivery({
1058
+ redundancy: 1,
1059
+ to: [peer],
1060
+ }),
1061
+ });
1062
+ return;
1063
+ } catch {
1064
+ // Fall back to fanout token/direct fanout unicast below.
1065
+ }
1066
+ }
1067
+
1068
+ if (fanoutHint && this._fanoutChannel) {
1069
+ try {
1070
+ await this._fanoutChannel.unicastAck(
1071
+ fanoutHint.route,
1072
+ payload,
1073
+ fanoutUnicastOptions,
1074
+ );
1075
+ return;
1076
+ } catch {
1077
+ // Fall back below.
1078
+ }
1079
+ }
1080
+
1081
+ if (this._fanoutChannel) {
1082
+ try {
1083
+ await this._fanoutChannel.unicastToAck(
1084
+ peer,
1085
+ payload,
1086
+ fanoutUnicastOptions,
1087
+ );
1088
+ return;
1089
+ } catch {
1090
+ // Fall back below.
1091
+ }
1092
+ }
1093
+
1094
+ await this.rpc.send(message, {
1095
+ mode: new AcknowledgeDelivery({
1096
+ redundancy: 1,
1097
+ to: [peer],
1098
+ }),
1099
+ });
1100
+ }
1101
+
1102
+ private async _appendDeliverToReplicators(
1103
+ entry: Entry<T>,
1104
+ minReplicasValue: number,
1105
+ leaders: Map<string, any>,
1106
+ selfHash: string,
1107
+ isLeader: boolean,
1108
+ deliveryArg: false | true | DeliveryOptions | undefined,
1109
+ ) {
1110
+ const { delivery, reliability, requireRecipients, minAcks, wrap } =
1111
+ this._parseDeliveryOptions(deliveryArg);
1112
+ const pending: Promise<void>[] = [];
1113
+ const track = (promise: Promise<void>) => {
1114
+ pending.push(wrap ? wrap(promise) : promise);
1115
+ };
1116
+ const fanoutUnicastOptions =
1117
+ delivery?.timeout != null || delivery?.signal != null
1118
+ ? { timeoutMs: delivery.timeout, signal: delivery.signal }
1119
+ : undefined;
1120
+
1121
+ for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
1122
+ await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
1123
+ const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
1124
+
1125
+ const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
1126
+ let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1127
+ const allowSubscriberFallback =
1128
+ this.syncronizer instanceof SimpleSyncronizer ||
1129
+ (this.compatibility ?? Number.MAX_VALUE) < 10;
1130
+ if (!hasRemotePeers && allowSubscriberFallback) {
1131
+ try {
1132
+ const subscribers = await this._getTopicSubscribers(this.topic);
1133
+ if (subscribers && subscribers.length > 0) {
1134
+ for (const subscriber of subscribers) {
1135
+ const hash = subscriber.hashcode();
1136
+ if (hash === selfHash) {
1137
+ continue;
1138
+ }
1139
+ set.add(hash);
1140
+ leadersForDelivery?.add(hash);
1141
+ }
1142
+ hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1143
+ }
1144
+ } catch {
1145
+ // Best-effort only; keep discovered recipients as-is.
1146
+ }
1147
+ }
1148
+ if (!hasRemotePeers) {
1149
+ if (requireRecipients) {
1150
+ throw new NoPeersError(this.rpc.topic);
1151
+ }
1152
+ continue;
1153
+ }
1154
+
1155
+ if (!delivery) {
1156
+ this.rpc
1157
+ .send(message, {
1158
+ mode: isLeader
1159
+ ? new SilentDelivery({ redundancy: 1, to: set })
1160
+ : new AcknowledgeDelivery({ redundancy: 1, to: set }),
1161
+ })
1162
+ .catch((error) => logger.error(error));
1163
+ continue;
1164
+ }
1165
+
1166
+ const orderedRemoteRecipients: string[] = [];
1167
+ for (const peer of leadersForDelivery!) {
1168
+ if (peer === selfHash) {
1169
+ continue;
1170
+ }
1171
+ orderedRemoteRecipients.push(peer);
1172
+ }
1173
+ for (const peer of set) {
1174
+ if (peer === selfHash) {
1175
+ continue;
1176
+ }
1177
+ if (leadersForDelivery!.has(peer)) {
1178
+ continue;
1179
+ }
1180
+ orderedRemoteRecipients.push(peer);
1181
+ }
1182
+
1183
+ const ackTo: string[] = [];
1184
+ let silentTo: string[] | undefined;
1185
+ // Default delivery semantics: require enough remote ACKs to reach the requested
1186
+ // replication degree (local append counts as 1).
1187
+ const defaultMinAcks = Math.max(0, minReplicasValue - 1);
1188
+ const ackLimitRaw =
1189
+ reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
1190
+ const ackLimit = Math.max(
1191
+ 0,
1192
+ Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length),
1193
+ );
1194
+
1195
+ for (const peer of orderedRemoteRecipients) {
1196
+ if (ackTo.length < ackLimit) {
1197
+ ackTo.push(peer);
1198
+ } else {
1199
+ silentTo ||= [];
1200
+ silentTo.push(peer);
1201
+ }
1202
+ }
1203
+
1204
+ if (requireRecipients && orderedRemoteRecipients.length === 0) {
1205
+ throw new NoPeersError(this.rpc.topic);
1206
+ }
1207
+ if (requireRecipients && ackTo.length + (silentTo?.length || 0) === 0) {
1208
+ throw new NoPeersError(this.rpc.topic);
1209
+ }
1210
+
1211
+ if (ackTo.length > 0) {
1212
+ const payload = serialize(message);
1213
+ for (const peer of ackTo) {
1214
+ track(
1215
+ (async () => {
1216
+ await this._sendAckWithUnifiedHints({
1217
+ peer,
1218
+ message,
1219
+ payload,
1220
+ fanoutUnicastOptions,
1221
+ });
1222
+ })(),
1223
+ );
1224
+ }
1225
+ }
1226
+
1227
+ if (silentTo?.length) {
1228
+ this.rpc
1229
+ .send(message, {
1230
+ mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
1231
+ })
1232
+ .catch((error) => logger.error(error));
1233
+ }
1234
+ }
1235
+
1236
+ if (pending.length > 0) {
1237
+ await Promise.all(pending);
1238
+ }
1239
+ }
1240
+
1241
+ private async _mergeLeadersFromGidReferences(
1242
+ message: ExchangeHeadsMessage<any>,
1243
+ minReplicasValue: number,
1244
+ leaders: Map<string, any>,
1245
+ ) {
1246
+ const gidReferences = message.heads[0]?.gidRefrences;
1247
+ if (!gidReferences || gidReferences.length === 0) {
1248
+ return;
1249
+ }
1250
+
1251
+ for (const gidReference of gidReferences) {
1252
+ const entryFromGid = this.log.entryIndex.getHeads(gidReference, false);
1253
+ for (const gidEntry of await entryFromGid.all()) {
1254
+ let coordinates = await this.getCoordinates(gidEntry);
1255
+ if (coordinates == null) {
1256
+ coordinates = await this.createCoordinates(gidEntry, minReplicasValue);
1257
+ }
1258
+
1259
+ const found = await this._findLeaders(coordinates);
1260
+ for (const [key, value] of found) {
1261
+ leaders.set(key, value);
1262
+ }
1263
+ }
1264
+ }
1265
+ }
1266
+
1267
+ private async _appendDeliverToAllFanout(entry: Entry<T>) {
1268
+ for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
1269
+ await this._publishExchangeHeadsViaFanout(message);
1270
+ }
1271
+ }
1272
+
1273
+ private async _resolvePublicKeyFromHash(
1274
+ hash: string,
1275
+ ): Promise<PublicSignKey | undefined> {
1276
+ const fanoutService = (this.node.services as any).fanout;
1277
+ return (
1278
+ fanoutService?.getPublicKey?.(hash) ??
1279
+ this.node.services.pubsub.getPublicKey(hash)
1280
+ );
1281
+ }
1282
+
1283
+ private async _getTopicSubscribers(
1284
+ topic: string,
1285
+ ): Promise<PublicSignKey[] | undefined> {
1286
+ const maxPeers = 64;
1287
+
1288
+ // Prefer the bounded peer set we already know from the fanout overlay.
1289
+ if (this._fanoutChannel && (topic === this.topic || topic === this.rpc.topic)) {
1290
+ const hashes = this._fanoutChannel
1291
+ .getPeerHashes({ includeSelf: false })
1292
+ .slice(0, maxPeers);
1293
+ if (hashes.length === 0) return [];
1294
+
1295
+ const keys = await Promise.all(
1296
+ hashes.map((hash) => this._resolvePublicKeyFromHash(hash)),
1297
+ );
1298
+ const uniqueKeys: PublicSignKey[] = [];
1299
+ const seen = new Set<string>();
1300
+ const selfHash = this.node.identity.publicKey.hashcode();
1301
+ for (const key of keys) {
1302
+ if (!key) continue;
1303
+ const hash = key.hashcode();
1304
+ if (hash === selfHash) continue;
1305
+ if (seen.has(hash)) continue;
1306
+ seen.add(hash);
1307
+ uniqueKeys.push(key);
1308
+ }
1309
+ return uniqueKeys;
1310
+ }
1311
+
1312
+ const selfHash = this.node.identity.publicKey.hashcode();
1313
+ const hashes: string[] = [];
1314
+
1315
+ // Best-effort provider discovery (bounded). This requires bootstrap trackers.
1316
+ try {
1317
+ const fanoutService = (this.node.services as any).fanout;
1318
+ if (fanoutService?.queryProviders) {
1319
+ const ns = `shared-log|${this.topic}`;
1320
+ const seed = hashToSeed32(topic);
1321
+ const providers: string[] = await fanoutService.queryProviders(ns, {
1322
+ want: maxPeers,
1323
+ seed,
1324
+ });
1325
+ for (const h of providers ?? []) {
1326
+ if (!h || h === selfHash) continue;
1327
+ hashes.push(h);
1328
+ if (hashes.length >= maxPeers) break;
1329
+ }
1330
+ }
1331
+ } catch {
1332
+ // Best-effort only.
1333
+ }
1334
+
1335
+ // Next, use already-connected peer streams (bounded and cheap).
1336
+ const peerMap: Map<string, unknown> | undefined = (this.node.services.pubsub as any)
1337
+ ?.peers;
1338
+ if (peerMap?.keys) {
1339
+ for (const h of peerMap.keys()) {
1340
+ if (!h || h === selfHash) continue;
1341
+ hashes.push(h);
1342
+ if (hashes.length >= maxPeers) break;
1343
+ }
1344
+ }
1345
+
1346
+ // Finally, fall back to libp2p connections (e.g. bootstrap peers) without requiring
1347
+ // any global topic membership view.
1348
+ if (hashes.length < maxPeers) {
1349
+ const connectionManager = (this.node.services.pubsub as any)?.components
1350
+ ?.connectionManager;
1351
+ const connections = connectionManager?.getConnections?.() ?? [];
1352
+ for (const conn of connections) {
1353
+ const peerId = conn?.remotePeer;
1354
+ if (!peerId) continue;
1355
+ try {
1356
+ const h = getPublicKeyFromPeerId(peerId).hashcode();
1357
+ if (!h || h === selfHash) continue;
1358
+ hashes.push(h);
1359
+ if (hashes.length >= maxPeers) break;
1360
+ } catch {
1361
+ // Best-effort only.
1362
+ }
1363
+ }
1364
+ }
1365
+
1366
+ if (hashes.length === 0) return [];
1367
+
1368
+ const uniqueHashes: string[] = [];
1369
+ const seen = new Set<string>();
1370
+ for (const h of hashes) {
1371
+ if (seen.has(h)) continue;
1372
+ seen.add(h);
1373
+ uniqueHashes.push(h);
1374
+ if (uniqueHashes.length >= maxPeers) break;
1375
+ }
1376
+
1377
+ const keys = await Promise.all(
1378
+ uniqueHashes.map((hash) => this._resolvePublicKeyFromHash(hash)),
1379
+ );
1380
+ const uniqueKeys: PublicSignKey[] = [];
1381
+ for (const key of keys) {
1382
+ if (!key) continue;
1383
+ const hash = key.hashcode();
1384
+ if (hash === selfHash) continue;
1385
+ uniqueKeys.push(key);
1386
+ }
1387
+ return uniqueKeys;
1388
+ }
1389
+
600
1390
  // @deprecated
601
1391
  private async getRole() {
602
1392
  const segments = await this.getMyReplicationSegments();
@@ -1004,8 +1794,9 @@ export class SharedLog<
1004
1794
  })
1005
1795
  .all();
1006
1796
 
1007
- this.uniqueReplicators.delete(keyHash);
1008
- await this.replicationIndex.del({ query: { hash: keyHash } });
1797
+ this.uniqueReplicators.delete(keyHash);
1798
+ this._replicatorJoinEmitted.delete(keyHash);
1799
+ await this.replicationIndex.del({ query: { hash: keyHash } });
1009
1800
 
1010
1801
  await this.updateOldestTimestampFromIndex();
1011
1802
 
@@ -1030,14 +1821,14 @@ export class SharedLog<
1030
1821
  }
1031
1822
  }
1032
1823
 
1033
- const timestamp = BigInt(+new Date());
1034
- for (const x of deleted) {
1035
- this.replicationChangeDebounceFn.add({
1036
- range: x.value,
1037
- type: "removed",
1038
- timestamp,
1039
- });
1040
- }
1824
+ const timestamp = BigInt(+new Date());
1825
+ for (const x of deleted) {
1826
+ this.replicationChangeDebounceFn.add({
1827
+ range: x.value,
1828
+ type: "removed",
1829
+ timestamp,
1830
+ });
1831
+ }
1041
1832
 
1042
1833
  const pendingMaturity = this.pendingMaturity.get(keyHash);
1043
1834
  if (pendingMaturity) {
@@ -1047,6 +1838,14 @@ export class SharedLog<
1047
1838
  this.pendingMaturity.delete(keyHash);
1048
1839
  }
1049
1840
 
1841
+ // Keep local sync/prune state consistent even when a peer disappears
1842
+ // through replication-info updates without a topic unsubscribe event.
1843
+ this.removePeerFromGidPeerHistory(keyHash);
1844
+ this._recentRepairDispatch.delete(keyHash);
1845
+ if (!isMe) {
1846
+ this.syncronizer.onPeerDisconnected(keyHash);
1847
+ }
1848
+
1050
1849
  if (!isMe) {
1051
1850
  this.rebalanceParticipationDebounced?.call();
1052
1851
  }
@@ -1118,9 +1917,10 @@ export class SharedLog<
1118
1917
  { query: { hash: from.hashcode() } },
1119
1918
  { shape: { id: true } },
1120
1919
  );
1121
- if ((await otherSegmentsIterator.next(1)).length === 0) {
1122
- this.uniqueReplicators.delete(from.hashcode());
1123
- }
1920
+ if ((await otherSegmentsIterator.next(1)).length === 0) {
1921
+ this.uniqueReplicators.delete(from.hashcode());
1922
+ this._replicatorJoinEmitted.delete(from.hashcode());
1923
+ }
1124
1924
  await otherSegmentsIterator.close();
1125
1925
 
1126
1926
  await this.updateOldestTimestampFromIndex();
@@ -1160,6 +1960,7 @@ export class SharedLog<
1160
1960
 
1161
1961
  let diffs: ReplicationChanges<ReplicationRangeIndexable<R>>;
1162
1962
  let deleted: ReplicationRangeIndexable<R>[] | undefined = undefined;
1963
+ let isStoppedReplicating = false;
1163
1964
  if (reset) {
1164
1965
  deleted = (
1165
1966
  await this.replicationIndex
@@ -1198,6 +1999,7 @@ export class SharedLog<
1198
1999
  }
1199
2000
 
1200
2001
  isNewReplicator = prevCount === 0 && ranges.length > 0;
2002
+ isStoppedReplicating = prevCount > 0 && ranges.length === 0;
1201
2003
  } else {
1202
2004
  let batchSize = 100;
1203
2005
  let existing: ReplicationRangeIndexable<R>[] = [];
@@ -1281,7 +2083,16 @@ export class SharedLog<
1281
2083
  diffs = changes;
1282
2084
  }
1283
2085
 
1284
- this.uniqueReplicators.add(from.hashcode());
2086
+ const fromHash = from.hashcode();
2087
+ // Track replicator membership transitions synchronously so join/leave events are
2088
+ // idempotent even if we process concurrent reset messages/unsubscribes.
2089
+ const stoppedTransition =
2090
+ ranges.length === 0 ? this.uniqueReplicators.delete(fromHash) : false;
2091
+ if (ranges.length === 0) {
2092
+ this._replicatorJoinEmitted.delete(fromHash);
2093
+ } else {
2094
+ this.uniqueReplicators.add(fromHash);
2095
+ }
1285
2096
 
1286
2097
  let now = +new Date();
1287
2098
  let minRoleAge = await this.getDefaultMinRoleAge();
@@ -1327,13 +2138,13 @@ export class SharedLog<
1327
2138
  }),
1328
2139
  );
1329
2140
 
1330
- if (rebalance && diff.range.mode !== ReplicationIntent.Strict) {
1331
- // TODO this statement (might) cause issues with triggering pruning if the segment is strict and maturity timings will affect the outcome of rebalancing
1332
- this.replicationChangeDebounceFn.add({
1333
- ...diff,
1334
- matured: true,
1335
- }); // we need to call this here because the outcom of findLeaders will be different when some ranges become mature, i.e. some of data we own might be prunable!
1336
- }
2141
+ if (rebalance && diff.range.mode !== ReplicationIntent.Strict) {
2142
+ // TODO this statement (might) cause issues with triggering pruning if the segment is strict and maturity timings will affect the outcome of rebalancing
2143
+ this.replicationChangeDebounceFn.add({
2144
+ ...diff,
2145
+ matured: true,
2146
+ }); // we need to call this here because the outcom of findLeaders will be different when some ranges become mature, i.e. some of data we own might be prunable!
2147
+ }
1337
2148
  pendingRanges.delete(diff.range.idString);
1338
2149
  if (pendingRanges.size === 0) {
1339
2150
  this.pendingMaturity.delete(diff.range.hash);
@@ -1379,28 +2190,39 @@ export class SharedLog<
1379
2190
  }),
1380
2191
  );
1381
2192
 
1382
- if (isNewReplicator) {
1383
- this.events.dispatchEvent(
1384
- new CustomEvent<ReplicatorJoinEvent>("replicator:join", {
1385
- detail: { publicKey: from },
1386
- }),
1387
- );
2193
+ if (isNewReplicator) {
2194
+ if (!this._replicatorJoinEmitted.has(fromHash)) {
2195
+ this._replicatorJoinEmitted.add(fromHash);
2196
+ this.events.dispatchEvent(
2197
+ new CustomEvent<ReplicatorJoinEvent>("replicator:join", {
2198
+ detail: { publicKey: from },
2199
+ }),
2200
+ );
2201
+ }
1388
2202
 
1389
- if (isAllMature) {
1390
- this.events.dispatchEvent(
1391
- new CustomEvent<ReplicatorMatureEvent>("replicator:mature", {
1392
- detail: { publicKey: from },
2203
+ if (isAllMature) {
2204
+ this.events.dispatchEvent(
2205
+ new CustomEvent<ReplicatorMatureEvent>("replicator:mature", {
2206
+ detail: { publicKey: from },
1393
2207
  }),
1394
2208
  );
1395
2209
  }
1396
2210
  }
1397
2211
 
1398
- if (rebalance) {
1399
- for (const diff of diffs) {
1400
- this.replicationChangeDebounceFn.add(diff);
1401
- }
2212
+ if (isStoppedReplicating && stoppedTransition) {
2213
+ this.events.dispatchEvent(
2214
+ new CustomEvent<ReplicatorLeaveEvent>("replicator:leave", {
2215
+ detail: { publicKey: from },
2216
+ }),
2217
+ );
1402
2218
  }
1403
2219
 
2220
+ if (rebalance) {
2221
+ for (const diff of diffs) {
2222
+ this.replicationChangeDebounceFn.add(diff);
2223
+ }
2224
+ }
2225
+
1404
2226
  if (!from.equals(this.node.identity.publicKey)) {
1405
2227
  this.rebalanceParticipationDebounced?.call();
1406
2228
  }
@@ -1432,6 +2254,20 @@ export class SharedLog<
1432
2254
  if (change) {
1433
2255
  let addedOrReplaced = change.filter((x) => x.type !== "removed");
1434
2256
  if (addedOrReplaced.length > 0) {
2257
+ // Provider discovery keep-alive (best-effort). This enables bounded targeted fetches
2258
+ // without relying on any global subscriber list.
2259
+ try {
2260
+ const fanoutService = (this.node.services as any).fanout;
2261
+ if (fanoutService?.provide && !this._providerHandle) {
2262
+ this._providerHandle = fanoutService.provide(`shared-log|${this.topic}`, {
2263
+ ttlMs: 120_000,
2264
+ announceIntervalMs: 60_000,
2265
+ });
2266
+ }
2267
+ } catch {
2268
+ // Best-effort only.
2269
+ }
2270
+
1435
2271
  let message:
1436
2272
  | AllReplicatingSegmentsMessage
1437
2273
  | AddedReplicationSegmentMessage
@@ -1494,6 +2330,218 @@ export class SharedLog<
1494
2330
  return set;
1495
2331
  }
1496
2332
 
2333
+ private dispatchMaybeMissingEntries(
2334
+ target: string,
2335
+ entries: Map<string, EntryReplicated<R>>,
2336
+ options?: {
2337
+ bypassRecentDedupe?: boolean;
2338
+ retryScheduleMs?: number[];
2339
+ forceFreshDelivery?: boolean;
2340
+ },
2341
+ ) {
2342
+ if (entries.size === 0) {
2343
+ return;
2344
+ }
2345
+
2346
+ const now = Date.now();
2347
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2348
+ if (!recentlyDispatchedByHash) {
2349
+ recentlyDispatchedByHash = new Map();
2350
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2351
+ }
2352
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2353
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2354
+ recentlyDispatchedByHash.delete(hash);
2355
+ }
2356
+ }
2357
+
2358
+ const filteredEntries =
2359
+ options?.bypassRecentDedupe === true
2360
+ ? new Map(entries)
2361
+ : new Map<string, EntryReplicated<any>>();
2362
+ if (options?.bypassRecentDedupe !== true) {
2363
+ for (const [hash, entry] of entries) {
2364
+ const prev = recentlyDispatchedByHash.get(hash);
2365
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2366
+ continue;
2367
+ }
2368
+ recentlyDispatchedByHash.set(hash, now);
2369
+ filteredEntries.set(hash, entry);
2370
+ }
2371
+ } else {
2372
+ for (const hash of entries.keys()) {
2373
+ recentlyDispatchedByHash.set(hash, now);
2374
+ }
2375
+ }
2376
+ if (filteredEntries.size === 0) {
2377
+ return;
2378
+ }
2379
+
2380
+ const run = () => {
2381
+ // For force-fresh churn repair we intentionally bypass rateless IBLT and
2382
+ // use simple hash-based sync. This path is a directed "push these hashes
2383
+ // to that peer" recovery flow; using simple sync here avoids occasional
2384
+ // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
2385
+ if (
2386
+ options?.forceFreshDelivery &&
2387
+ this.syncronizer instanceof RatelessIBLTSynchronizer
2388
+ ) {
2389
+ return Promise.resolve(
2390
+ this.syncronizer.simple.onMaybeMissingEntries({
2391
+ entries: filteredEntries,
2392
+ targets: [target],
2393
+ }),
2394
+ ).catch((error: any) => logger.error(error));
2395
+ }
2396
+
2397
+ return Promise.resolve(
2398
+ this.syncronizer.onMaybeMissingEntries({
2399
+ entries: filteredEntries,
2400
+ targets: [target],
2401
+ }),
2402
+ ).catch((error: any) => logger.error(error));
2403
+ };
2404
+
2405
+ const retrySchedule =
2406
+ options?.retryScheduleMs && options.retryScheduleMs.length > 0
2407
+ ? options.retryScheduleMs
2408
+ : options?.forceFreshDelivery
2409
+ ? FORCE_FRESH_RETRY_SCHEDULE_MS
2410
+ : [0];
2411
+
2412
+ for (const delayMs of retrySchedule) {
2413
+ if (delayMs === 0) {
2414
+ void run();
2415
+ continue;
2416
+ }
2417
+ const timer = setTimeout(() => {
2418
+ this._repairRetryTimers.delete(timer);
2419
+ if (this.closed) {
2420
+ return;
2421
+ }
2422
+ void run();
2423
+ }, delayMs);
2424
+ timer.unref?.();
2425
+ this._repairRetryTimers.add(timer);
2426
+ }
2427
+ }
2428
+
2429
+ private scheduleRepairSweep(options: {
2430
+ forceFreshDelivery: boolean;
2431
+ addedPeers: Set<string>;
2432
+ }) {
2433
+ if (options.forceFreshDelivery) {
2434
+ this._repairSweepForceFreshPending = true;
2435
+ }
2436
+ for (const peer of options.addedPeers) {
2437
+ this._repairSweepAddedPeersPending.add(peer);
2438
+ }
2439
+ if (!this._repairSweepRunning && !this.closed) {
2440
+ this._repairSweepRunning = true;
2441
+ void this.runRepairSweep();
2442
+ }
2443
+ }
2444
+
2445
+ private async runRepairSweep() {
2446
+ try {
2447
+ while (!this.closed) {
2448
+ const forceFreshDelivery = this._repairSweepForceFreshPending;
2449
+ const addedPeers = new Set(this._repairSweepAddedPeersPending);
2450
+ this._repairSweepForceFreshPending = false;
2451
+ this._repairSweepAddedPeersPending.clear();
2452
+
2453
+ if (!forceFreshDelivery && addedPeers.size === 0) {
2454
+ return;
2455
+ }
2456
+
2457
+ const pendingByTarget = new Map<string, Map<string, EntryReplicated<any>>>();
2458
+ const flushTarget = (target: string) => {
2459
+ const entries = pendingByTarget.get(target);
2460
+ if (!entries || entries.size === 0) {
2461
+ return;
2462
+ }
2463
+ const isJoinWarmupTarget = addedPeers.has(target);
2464
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
2465
+ this.dispatchMaybeMissingEntries(target, entries, {
2466
+ bypassRecentDedupe,
2467
+ retryScheduleMs: isJoinWarmupTarget
2468
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
2469
+ : undefined,
2470
+ forceFreshDelivery,
2471
+ });
2472
+ pendingByTarget.delete(target);
2473
+ };
2474
+ const queueEntryForTarget = (
2475
+ target: string,
2476
+ entry: EntryReplicated<any>,
2477
+ ) => {
2478
+ let set = pendingByTarget.get(target);
2479
+ if (!set) {
2480
+ set = new Map();
2481
+ pendingByTarget.set(target, set);
2482
+ }
2483
+ if (set.has(entry.hash)) {
2484
+ return;
2485
+ }
2486
+ set.set(entry.hash, entry);
2487
+ if (set.size >= this.repairSweepTargetBufferSize) {
2488
+ flushTarget(target);
2489
+ }
2490
+ };
2491
+
2492
+ const iterator = this.entryCoordinatesIndex.iterate({});
2493
+ try {
2494
+ while (!this.closed && !iterator.done()) {
2495
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2496
+ for (const entry of entries) {
2497
+ const entryReplicated = entry.value;
2498
+ const currentPeers = await this.findLeaders(
2499
+ entryReplicated.coordinates,
2500
+ entryReplicated,
2501
+ { roleAge: 0 },
2502
+ );
2503
+ if (forceFreshDelivery) {
2504
+ for (const [currentPeer] of currentPeers) {
2505
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
2506
+ continue;
2507
+ }
2508
+ queueEntryForTarget(currentPeer, entryReplicated);
2509
+ }
2510
+ }
2511
+ if (addedPeers.size > 0) {
2512
+ for (const peer of addedPeers) {
2513
+ if (currentPeers.has(peer)) {
2514
+ queueEntryForTarget(peer, entryReplicated);
2515
+ }
2516
+ }
2517
+ }
2518
+ }
2519
+ }
2520
+ } finally {
2521
+ await iterator.close();
2522
+ }
2523
+
2524
+ for (const target of [...pendingByTarget.keys()]) {
2525
+ flushTarget(target);
2526
+ }
2527
+ }
2528
+ } catch (error: any) {
2529
+ if (!isNotStartedError(error)) {
2530
+ logger.error(`Repair sweep failed: ${error?.message ?? error}`);
2531
+ }
2532
+ } finally {
2533
+ this._repairSweepRunning = false;
2534
+ if (
2535
+ !this.closed &&
2536
+ (this._repairSweepForceFreshPending ||
2537
+ this._repairSweepAddedPeersPending.size > 0)
2538
+ ) {
2539
+ this._repairSweepRunning = true;
2540
+ void this.runRepairSweep();
2541
+ }
2542
+ }
2543
+ }
2544
+
1497
2545
  private async pruneDebouncedFnAddIfNotKeeping(args: {
1498
2546
  key: string;
1499
2547
  value: {
@@ -1501,9 +2549,85 @@ export class SharedLog<
1501
2549
  leaders: Map<string, any>;
1502
2550
  };
1503
2551
  }) {
1504
- if (!this.keep || !(await this.keep(args.value.entry))) {
1505
- return this.pruneDebouncedFn.add(args);
2552
+ if (!this.keep || !(await this.keep(args.value.entry))) {
2553
+ return this.pruneDebouncedFn.add(args);
2554
+ }
2555
+ }
2556
+
2557
+ private clearCheckedPruneRetry(hash: string) {
2558
+ const state = this._checkedPruneRetries.get(hash);
2559
+ if (state?.timer) {
2560
+ clearTimeout(state.timer);
2561
+ }
2562
+ this._checkedPruneRetries.delete(hash);
2563
+ }
2564
+
2565
+ private scheduleCheckedPruneRetry(args: {
2566
+ entry: EntryReplicated<R> | ShallowOrFullEntry<any>;
2567
+ leaders: Map<string, unknown> | Set<string>;
2568
+ }) {
2569
+ if (this.closed) return;
2570
+ if (this._pendingDeletes.has(args.entry.hash)) return;
2571
+
2572
+ const hash = args.entry.hash;
2573
+ const state =
2574
+ this._checkedPruneRetries.get(hash) ?? { attempts: 0 };
2575
+
2576
+ if (state.timer) return;
2577
+ if (state.attempts >= CHECKED_PRUNE_RETRY_MAX_ATTEMPTS) {
2578
+ // Avoid unbounded background retries; a new replication-change event can
2579
+ // always re-enqueue pruning with fresh leader info.
2580
+ return;
1506
2581
  }
2582
+
2583
+ const attempt = state.attempts + 1;
2584
+ const jitterMs = Math.floor(Math.random() * 250);
2585
+ const delayMs = Math.min(
2586
+ CHECKED_PRUNE_RETRY_MAX_DELAY_MS,
2587
+ 1_000 * 2 ** (attempt - 1) + jitterMs,
2588
+ );
2589
+
2590
+ state.attempts = attempt;
2591
+ state.timer = setTimeout(async () => {
2592
+ const st = this._checkedPruneRetries.get(hash);
2593
+ if (st) st.timer = undefined;
2594
+ if (this.closed) return;
2595
+ if (this._pendingDeletes.has(hash)) return;
2596
+
2597
+ let leadersMap: Map<string, any> | undefined;
2598
+ try {
2599
+ const replicas = decodeReplicas(args.entry).getValue(this);
2600
+ leadersMap = await this.findLeadersFromEntry(args.entry, replicas, {
2601
+ roleAge: 0,
2602
+ });
2603
+ } catch {
2604
+ // Best-effort only.
2605
+ }
2606
+
2607
+ if (!leadersMap || leadersMap.size === 0) {
2608
+ if (args.leaders instanceof Map) {
2609
+ leadersMap = args.leaders as any;
2610
+ } else {
2611
+ leadersMap = new Map<string, any>();
2612
+ for (const k of args.leaders) {
2613
+ leadersMap.set(k, { intersecting: true });
2614
+ }
2615
+ }
2616
+ }
2617
+
2618
+ try {
2619
+ const leadersForRetry = leadersMap ?? new Map<string, any>();
2620
+ await this.pruneDebouncedFnAddIfNotKeeping({
2621
+ key: hash,
2622
+ // TODO types
2623
+ value: { entry: args.entry as any, leaders: leadersForRetry },
2624
+ });
2625
+ } catch {
2626
+ // Best-effort only; pruning will be re-attempted on future changes.
2627
+ }
2628
+ }, delayMs);
2629
+ state.timer.unref?.();
2630
+ this._checkedPruneRetries.set(hash, state);
1507
2631
  }
1508
2632
 
1509
2633
  async append(
@@ -1571,286 +2695,30 @@ export class SharedLog<
1571
2695
  if (options?.target !== "none") {
1572
2696
  const target = options?.target;
1573
2697
  const deliveryArg = options?.delivery;
1574
- const delivery: DeliveryOptions | undefined =
1575
- deliveryArg === undefined || deliveryArg === false
1576
- ? undefined
1577
- : deliveryArg === true
1578
- ? {}
1579
- : deliveryArg;
1580
-
1581
- let requireRecipients = false;
1582
- let settleMin: number | undefined;
1583
- let guardDelivery:
1584
- | ((promise: Promise<void>) => Promise<void>)
1585
- | undefined = undefined;
1586
-
1587
- let firstDeliveryPromise: Promise<void> | undefined;
1588
- let deliveryPromises: Promise<void>[] | undefined;
1589
- let addDeliveryPromise: ((promise: Promise<void>) => void) | undefined;
1590
-
1591
- const leadersForDelivery =
1592
- delivery && (target === "replicators" || !target)
1593
- ? new Set(leaders.keys())
1594
- : undefined;
1595
-
1596
- if (delivery) {
1597
- const deliverySettle = delivery.settle ?? true;
1598
- const deliveryTimeout = delivery.timeout;
1599
- const deliverySignal = delivery.signal;
1600
- requireRecipients = delivery.requireRecipients === true;
1601
- settleMin =
1602
- typeof deliverySettle === "object" &&
1603
- Number.isFinite(deliverySettle.min)
1604
- ? Math.max(0, Math.floor(deliverySettle.min))
1605
- : undefined;
1606
-
1607
- guardDelivery =
1608
- deliveryTimeout == null && deliverySignal == null
1609
- ? undefined
1610
- : (promise: Promise<void>) =>
1611
- new Promise<void>((resolve, reject) => {
1612
- let settled = false;
1613
- let timer: ReturnType<typeof setTimeout> | undefined =
1614
- undefined;
1615
- const onAbort = () => {
1616
- if (settled) {
1617
- return;
1618
- }
1619
- settled = true;
1620
- promise.catch(() => {});
1621
- cleanup();
1622
- reject(new AbortError());
1623
- };
1624
-
1625
- const cleanup = () => {
1626
- if (timer != null) {
1627
- clearTimeout(timer);
1628
- timer = undefined;
1629
- }
1630
- deliverySignal?.removeEventListener("abort", onAbort);
1631
- };
1632
-
1633
- if (deliverySignal) {
1634
- if (deliverySignal.aborted) {
1635
- onAbort();
1636
- return;
1637
- }
1638
- deliverySignal.addEventListener("abort", onAbort);
1639
- }
1640
-
1641
- if (deliveryTimeout != null) {
1642
- timer = setTimeout(() => {
1643
- if (settled) {
1644
- return;
1645
- }
1646
- settled = true;
1647
- promise.catch(() => {});
1648
- cleanup();
1649
- reject(new TimeoutError(`Timeout waiting for delivery`));
1650
- }, deliveryTimeout);
1651
- }
1652
-
1653
- promise
1654
- .then(() => {
1655
- if (settled) {
1656
- return;
1657
- }
1658
- settled = true;
1659
- cleanup();
1660
- resolve();
1661
- })
1662
- .catch((e) => {
1663
- if (settled) {
1664
- return;
1665
- }
1666
- settled = true;
1667
- cleanup();
1668
- reject(e);
1669
- });
1670
- });
2698
+ const hasDelivery = !(deliveryArg === undefined || deliveryArg === false);
1671
2699
 
1672
- addDeliveryPromise = (promise: Promise<void>) => {
1673
- if (!firstDeliveryPromise) {
1674
- firstDeliveryPromise = promise;
1675
- return;
1676
- }
1677
- if (!deliveryPromises) {
1678
- deliveryPromises = [firstDeliveryPromise, promise];
1679
- firstDeliveryPromise = undefined;
1680
- return;
1681
- }
1682
- deliveryPromises.push(promise);
1683
- };
2700
+ if (target === "all" && hasDelivery) {
2701
+ throw new Error(
2702
+ `delivery options are not supported with target="all"; fanout broadcast is fire-and-forward`,
2703
+ );
1684
2704
  }
1685
-
1686
- for await (const message of createExchangeHeadsMessages(this.log, [
1687
- result.entry,
1688
- ])) {
1689
- if (target === "replicators" || !target) {
1690
- if (message.heads[0].gidRefrences.length > 0) {
1691
- for (const ref of message.heads[0].gidRefrences) {
1692
- const entryFromGid = this.log.entryIndex.getHeads(ref, false);
1693
- for (const entry of await entryFromGid.all()) {
1694
- let coordinates = await this.getCoordinates(entry);
1695
- if (coordinates == null) {
1696
- coordinates = await this.createCoordinates(
1697
- entry,
1698
- minReplicasValue,
1699
- );
1700
- // TODO are we every to come here?
1701
- }
1702
-
1703
- const result = await this._findLeaders(coordinates);
1704
- for (const [k, v] of result) {
1705
- leaders.set(k, v);
1706
- }
1707
- }
1708
- }
1709
- }
1710
-
1711
- const set = this.addPeersToGidPeerHistory(
1712
- result.entry.meta.gid,
1713
- leaders.keys(),
1714
- );
1715
- let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1716
- if (!hasRemotePeers) {
1717
- if (requireRecipients) {
1718
- throw new NoPeersError(this.rpc.topic);
1719
- }
1720
- continue;
1721
- }
1722
-
1723
- if (!delivery) {
1724
- this.rpc
1725
- .send(message, {
1726
- mode: isLeader
1727
- ? new SilentDelivery({ redundancy: 1, to: set })
1728
- : new AcknowledgeDelivery({ redundancy: 1, to: set }),
1729
- })
1730
- .catch((e) => logger.error(e));
1731
- continue;
1732
- }
1733
-
1734
- let expectedRemoteRecipientsCount = 0;
1735
- const ackTo: string[] = [];
1736
- let silentTo: string[] | undefined;
1737
- const ackLimit =
1738
- settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
1739
-
1740
- // Always settle towards the current expected replicators for this entry,
1741
- // not the entire gid peer history.
1742
- for (const peer of leadersForDelivery!) {
1743
- if (peer === selfHash) {
1744
- continue;
1745
- }
1746
- expectedRemoteRecipientsCount++;
1747
- if (ackTo.length < ackLimit) {
1748
- ackTo.push(peer);
1749
- } else {
1750
- silentTo ||= [];
1751
- silentTo.push(peer);
1752
- }
1753
- }
1754
-
1755
- // Still deliver to known peers for the gid (best-effort), but don't let them
1756
- // satisfy the settle requirement.
1757
- for (const peer of set) {
1758
- if (peer === selfHash) {
1759
- continue;
1760
- }
1761
- if (leadersForDelivery!.has(peer)) {
1762
- continue;
1763
- }
1764
- silentTo ||= [];
1765
- silentTo.push(peer);
1766
- }
1767
-
1768
- if (requireRecipients && expectedRemoteRecipientsCount === 0) {
1769
- throw new NoPeersError(this.rpc.topic);
1770
- }
1771
-
1772
- if (
1773
- requireRecipients &&
1774
- ackTo.length + (silentTo?.length || 0) === 0
1775
- ) {
1776
- throw new NoPeersError(this.rpc.topic);
1777
- }
1778
-
1779
- if (ackTo.length > 0) {
1780
- const promise = this.rpc.send(message, {
1781
- mode: new AcknowledgeDelivery({
1782
- redundancy: 1,
1783
- to: ackTo,
1784
- }),
1785
- });
1786
- addDeliveryPromise!(
1787
- guardDelivery ? guardDelivery(promise) : promise,
1788
- );
1789
- }
1790
-
1791
- if (silentTo?.length) {
1792
- this.rpc
1793
- .send(message, {
1794
- mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
1795
- })
1796
- .catch((e) => logger.error(e));
1797
- }
1798
- } else {
1799
- if (!delivery) {
1800
- this.rpc.send(message).catch((e) => logger.error(e));
1801
- continue;
1802
- }
1803
-
1804
- const subscribers = await this.node.services.pubsub.getSubscribers(
1805
- this.rpc.topic,
1806
- );
1807
-
1808
- const ackTo: PublicSignKey[] = [];
1809
- let silentTo: PublicSignKey[] | undefined;
1810
- const ackLimit =
1811
- settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
1812
- for (const subscriber of subscribers || []) {
1813
- if (subscriber.hashcode() === selfHash) {
1814
- continue;
1815
- }
1816
- if (ackTo.length < ackLimit) {
1817
- ackTo.push(subscriber);
1818
- } else {
1819
- silentTo ||= [];
1820
- silentTo.push(subscriber);
1821
- }
1822
- }
1823
-
1824
- if (
1825
- requireRecipients &&
1826
- ackTo.length + (silentTo?.length || 0) === 0
1827
- ) {
1828
- throw new NoPeersError(this.rpc.topic);
1829
- }
1830
-
1831
- if (ackTo.length > 0) {
1832
- const promise = this.rpc.send(message, {
1833
- mode: new AcknowledgeDelivery({ redundancy: 1, to: ackTo }),
1834
- });
1835
- addDeliveryPromise!(
1836
- guardDelivery ? guardDelivery(promise) : promise,
1837
- );
1838
- }
1839
-
1840
- if (silentTo?.length) {
1841
- this.rpc
1842
- .send(message, {
1843
- mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
1844
- })
1845
- .catch((e) => logger.error(e));
1846
- }
1847
- }
2705
+ if (target === "all" && !this._fanoutChannel) {
2706
+ throw new Error(
2707
+ `No fanout channel configured for shared-log topic ${this.topic}`,
2708
+ );
1848
2709
  }
1849
2710
 
1850
- if (deliveryPromises) {
1851
- await Promise.all(deliveryPromises);
1852
- } else if (firstDeliveryPromise) {
1853
- await firstDeliveryPromise;
2711
+ if (target === "all") {
2712
+ await this._appendDeliverToAllFanout(result.entry);
2713
+ } else {
2714
+ await this._appendDeliverToReplicators(
2715
+ result.entry,
2716
+ minReplicasValue,
2717
+ leaders,
2718
+ selfHash,
2719
+ isLeader,
2720
+ deliveryArg,
2721
+ );
1854
2722
  }
1855
2723
  }
1856
2724
 
@@ -1891,19 +2759,33 @@ export class SharedLog<
1891
2759
  this.domain.resolution,
1892
2760
  );
1893
2761
  this._respondToIHaveTimeout = options?.respondToIHaveTimeout ?? 2e4;
1894
- this._pendingDeletes = new Map();
1895
- this._pendingIHave = new Map();
1896
- this.latestReplicationInfoMessage = new Map();
1897
- this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
1898
- this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
1899
-
1900
- this.uniqueReplicators = new Set();
1901
- this._replicatorsReconciled = false;
2762
+ this._pendingDeletes = new Map();
2763
+ this._pendingIHave = new Map();
2764
+ this.latestReplicationInfoMessage = new Map();
2765
+ this._replicationInfoBlockedPeers = new Set();
2766
+ this._replicationInfoRequestByPeer = new Map();
2767
+ this._replicationInfoApplyQueueByPeer = new Map();
2768
+ this._repairRetryTimers = new Set();
2769
+ this._recentRepairDispatch = new Map();
2770
+ this._repairSweepRunning = false;
2771
+ this._repairSweepForceFreshPending = false;
2772
+ this._repairSweepAddedPeersPending = new Set();
2773
+ this.coordinateToHash = new Cache<string>({ max: 1e6, ttl: 1e4 });
2774
+ this.recentlyRebalanced = new Cache<string>({ max: 1e4, ttl: 1e5 });
2775
+
2776
+ this.uniqueReplicators = new Set();
2777
+ this._replicatorJoinEmitted = new Set();
2778
+ this._replicatorsReconciled = false;
1902
2779
 
1903
2780
  this.openTime = +new Date();
1904
2781
  this.oldestOpenTime = this.openTime;
1905
2782
  this.distributionDebounceTime =
1906
2783
  options?.distributionDebounceTime || DEFAULT_DISTRIBUTION_DEBOUNCE_TIME; // expect > 0
2784
+ this.repairSweepTargetBufferSize = toPositiveInteger(
2785
+ options?.sync?.repairSweepTargetBufferSize,
2786
+ REPAIR_SWEEP_TARGET_BUFFER_SIZE,
2787
+ "sync.repairSweepTargetBufferSize",
2788
+ );
1907
2789
 
1908
2790
  this.timeUntilRoleMaturity =
1909
2791
  options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
@@ -1935,6 +2817,13 @@ export class SharedLog<
1935
2817
  }
1936
2818
 
1937
2819
  this._closeController = new AbortController();
2820
+ this._closeController.signal.addEventListener("abort", () => {
2821
+ for (const [_peer, state] of this._replicationInfoRequestByPeer) {
2822
+ if (state.timer) clearTimeout(state.timer);
2823
+ }
2824
+ this._replicationInfoRequestByPeer.clear();
2825
+ });
2826
+
1938
2827
  this._isTrustedReplicator = options?.canReplicate;
1939
2828
  this.keep = options?.keep;
1940
2829
  this.pendingMaturity = new Map();
@@ -1942,19 +2831,56 @@ export class SharedLog<
1942
2831
  const id = sha256Base64Sync(this.log.id);
1943
2832
  const storage = await this.node.storage.sublevel(id);
1944
2833
 
1945
- const localBlocks = await new AnyBlockStore(
1946
- await storage.sublevel("blocks"),
1947
- );
2834
+ const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
2835
+ const fanoutService = (this.node.services as any).fanout as FanoutTree | undefined;
2836
+ const blockProviderNamespace = (cid: string) => `cid:${cid}`;
1948
2837
  this.remoteBlocks = new RemoteBlocks({
1949
2838
  local: localBlocks,
1950
- publish: (message, options) =>
1951
- this.rpc.send(
1952
- new BlocksMessage(message),
1953
- (options as WithMode).mode instanceof AnyWhere ? undefined : options,
1954
- ),
2839
+ publish: (message, options) => this.rpc.send(new BlocksMessage(message), options),
1955
2840
  waitFor: this.rpc.waitFor.bind(this.rpc),
1956
2841
  publicKey: this.node.identity.publicKey,
1957
2842
  eagerBlocks: options?.eagerBlocks ?? true,
2843
+ resolveProviders: async (cid, opts) => {
2844
+ // 1) tracker-backed provider directory (best-effort, bounded)
2845
+ try {
2846
+ const providers = await fanoutService?.queryProviders(
2847
+ blockProviderNamespace(cid),
2848
+ {
2849
+ want: 8,
2850
+ timeoutMs: 2_000,
2851
+ queryTimeoutMs: 500,
2852
+ bootstrapMaxPeers: 2,
2853
+ signal: opts?.signal,
2854
+ },
2855
+ );
2856
+ if (providers && providers.length > 0) return providers;
2857
+ } catch {
2858
+ // ignore discovery failures
2859
+ }
2860
+
2861
+ // 2) fallback to currently connected RPC peers
2862
+ const self = this.node.identity.publicKey.hashcode();
2863
+ const out: string[] = [];
2864
+ const peers = (this.rpc as any)?.peers;
2865
+ for (const h of peers?.keys?.() ?? []) {
2866
+ if (h === self) continue;
2867
+ if (out.includes(h)) continue;
2868
+ out.push(h);
2869
+ if (out.length >= 32) break;
2870
+ }
2871
+ return out;
2872
+ },
2873
+ onPut: async (cid) => {
2874
+ // Best-effort directory announce for "get without remote.from" workflows.
2875
+ try {
2876
+ await fanoutService?.announceProvider(blockProviderNamespace(cid), {
2877
+ ttlMs: 120_000,
2878
+ bootstrapMaxPeers: 2,
2879
+ });
2880
+ } catch {
2881
+ // ignore announce failures
2882
+ }
2883
+ },
1958
2884
  });
1959
2885
 
1960
2886
  await this.remoteBlocks.start();
@@ -1981,9 +2907,10 @@ export class SharedLog<
1981
2907
  ],
1982
2908
  })) > 0;
1983
2909
 
1984
- this._gidPeersHistory = new Map();
1985
- this._requestIPruneSent = new Map();
1986
- this._requestIPruneResponseReplicatorSet = new Map();
2910
+ this._gidPeersHistory = new Map();
2911
+ this._requestIPruneSent = new Map();
2912
+ this._requestIPruneResponseReplicatorSet = new Map();
2913
+ this._checkedPruneRetries = new Map();
1987
2914
 
1988
2915
  this.replicationChangeDebounceFn = debounceAggregationChanges<
1989
2916
  ReplicationRangeIndexable<R>
@@ -2068,6 +2995,87 @@ export class SharedLog<
2068
2995
 
2069
2996
  await this.log.open(this.remoteBlocks, this.node.identity, {
2070
2997
  keychain: this.node.services.keychain,
2998
+ resolveRemotePeers: async (hash, options) => {
2999
+ if (options?.signal?.aborted) return undefined;
3000
+
3001
+ const maxPeers = 8;
3002
+ const self = this.node.identity.publicKey.hashcode();
3003
+ const seed = hashToSeed32(hash);
3004
+
3005
+ // Best hint: peers that have recently confirmed having this entry hash.
3006
+ const hinted = this._requestIPruneResponseReplicatorSet.get(hash);
3007
+ if (hinted && hinted.size > 0) {
3008
+ const peers = [...hinted].filter((p) => p !== self);
3009
+ return peers.length > 0
3010
+ ? pickDeterministicSubset(peers, seed, maxPeers)
3011
+ : undefined;
3012
+ }
3013
+
3014
+ // Next: peers we already contacted about this hash (may still have it).
3015
+ const contacted = this._requestIPruneSent.get(hash);
3016
+ if (contacted && contacted.size > 0) {
3017
+ const peers = [...contacted].filter((p) => p !== self);
3018
+ return peers.length > 0
3019
+ ? pickDeterministicSubset(peers, seed, maxPeers)
3020
+ : undefined;
3021
+ }
3022
+
3023
+ let candidates: string[] | undefined;
3024
+
3025
+ // Prefer the replicator cache; fall back to subscribers if we have no other signal.
3026
+ const replicatorCandidates = [...this.uniqueReplicators].filter(
3027
+ (p) => p !== self,
3028
+ );
3029
+ if (replicatorCandidates.length > 0) {
3030
+ candidates = replicatorCandidates;
3031
+ } else {
3032
+ try {
3033
+ const subscribers = await this._getTopicSubscribers(this.topic);
3034
+ const subscriberCandidates =
3035
+ subscribers?.map((k) => k.hashcode()).filter((p) => p !== self) ??
3036
+ [];
3037
+ candidates =
3038
+ subscriberCandidates.length > 0 ? subscriberCandidates : undefined;
3039
+ } catch {
3040
+ // Best-effort only.
3041
+ }
3042
+
3043
+ if (!candidates || candidates.length === 0) {
3044
+ // Last resort: peers we are already directly connected to. This avoids
3045
+ // depending on global membership knowledge in early-join scenarios.
3046
+ const peerMap = (this.node.services.pubsub as any)?.peers;
3047
+ if (peerMap?.keys) {
3048
+ candidates = [...peerMap.keys()];
3049
+ }
3050
+ }
3051
+
3052
+ if (!candidates || candidates.length === 0) {
3053
+ // Even if the pubsub stream has no established peer streams yet, we may
3054
+ // still have a libp2p connection to one or more peers (e.g. bootstrap).
3055
+ const connectionManager = (this.node.services.pubsub as any)?.components
3056
+ ?.connectionManager;
3057
+ const connections = connectionManager?.getConnections?.() ?? [];
3058
+ const connectionHashes: string[] = [];
3059
+ for (const conn of connections) {
3060
+ const peerId = conn?.remotePeer;
3061
+ if (!peerId) continue;
3062
+ try {
3063
+ connectionHashes.push(getPublicKeyFromPeerId(peerId).hashcode());
3064
+ } catch {
3065
+ // Best-effort only.
3066
+ }
3067
+ }
3068
+ if (connectionHashes.length > 0) {
3069
+ candidates = connectionHashes;
3070
+ }
3071
+ }
3072
+ }
3073
+
3074
+ if (!candidates || candidates.length === 0) return undefined;
3075
+ const peers = candidates.filter((p) => p !== self);
3076
+ if (peers.length === 0) return undefined;
3077
+ return pickDeterministicSubset(peers, seed, maxPeers);
3078
+ },
2071
3079
  ...this._logProperties,
2072
3080
  onChange: async (change) => {
2073
3081
  await this.onChange(change);
@@ -2148,6 +3156,7 @@ export class SharedLog<
2148
3156
  );
2149
3157
 
2150
3158
  await this.rpc.subscribe();
3159
+ await this._openFanoutChannel(options?.fanout);
2151
3160
 
2152
3161
  // mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
2153
3162
  await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
@@ -2234,17 +3243,15 @@ export class SharedLog<
2234
3243
  await this.rebalanceParticipation();
2235
3244
 
2236
3245
  // Take into account existing subscription
2237
- (await this.node.services.pubsub.getSubscribers(this.topic))?.forEach(
2238
- (v, k) => {
2239
- if (v.equals(this.node.identity.publicKey)) {
2240
- return;
2241
- }
2242
- if (this.closed) {
2243
- return;
2244
- }
2245
- this.handleSubscriptionChange(v, [this.topic], true);
2246
- },
2247
- );
3246
+ (await this._getTopicSubscribers(this.topic))?.forEach((v) => {
3247
+ if (v.equals(this.node.identity.publicKey)) {
3248
+ return;
3249
+ }
3250
+ if (this.closed) {
3251
+ return;
3252
+ }
3253
+ this.handleSubscriptionChange(v, [this.topic], true);
3254
+ });
2248
3255
  }
2249
3256
 
2250
3257
  async reset() {
@@ -2278,7 +3285,7 @@ export class SharedLog<
2278
3285
  })
2279
3286
  .then(async () => {
2280
3287
  // is reachable, announce change events
2281
- const key = await this.node.services.pubsub.getPublicKey(
3288
+ const key = await this._resolvePublicKeyFromHash(
2282
3289
  segment.value.hash,
2283
3290
  );
2284
3291
  if (!key) {
@@ -2288,22 +3295,26 @@ export class SharedLog<
2288
3295
  );
2289
3296
  }
2290
3297
 
2291
- this.uniqueReplicators.add(key.hashcode());
3298
+ const keyHash = key.hashcode();
3299
+ this.uniqueReplicators.add(keyHash);
2292
3300
 
2293
- this.events.dispatchEvent(
2294
- new CustomEvent<ReplicatorJoinEvent>("replicator:join", {
2295
- detail: { publicKey: key },
2296
- }),
2297
- );
2298
- this.events.dispatchEvent(
2299
- new CustomEvent<ReplicationChangeEvent>(
2300
- "replication:change",
2301
- {
2302
- detail: { publicKey: key },
2303
- },
2304
- ),
2305
- );
2306
- })
3301
+ if (!this._replicatorJoinEmitted.has(keyHash)) {
3302
+ this._replicatorJoinEmitted.add(keyHash);
3303
+ this.events.dispatchEvent(
3304
+ new CustomEvent<ReplicatorJoinEvent>("replicator:join", {
3305
+ detail: { publicKey: key },
3306
+ }),
3307
+ );
3308
+ this.events.dispatchEvent(
3309
+ new CustomEvent<ReplicationChangeEvent>(
3310
+ "replication:change",
3311
+ {
3312
+ detail: { publicKey: key },
3313
+ },
3314
+ ),
3315
+ );
3316
+ }
3317
+ })
2307
3318
  .catch(async (e) => {
2308
3319
  if (isNotStartedError(e)) {
2309
3320
  return; // TODO test this path
@@ -2435,48 +3446,59 @@ export class SharedLog<
2435
3446
  numbers: this.indexableDomain.numbers,
2436
3447
  });
2437
3448
 
2438
- // Check abort signal before building result
2439
- if (options?.signal?.aborted) {
2440
- return [];
2441
- }
3449
+ // Check abort signal before building result
3450
+ if (options?.signal?.aborted) {
3451
+ return [];
3452
+ }
2442
3453
 
2443
- // add all in flight
2444
- for (const [key, _] of this.syncronizer.syncInFlight) {
2445
- set.add(key);
2446
- }
3454
+ // add all in flight
3455
+ for (const [key, _] of this.syncronizer.syncInFlight) {
3456
+ set.add(key);
3457
+ }
2447
3458
 
2448
- if (options?.reachableOnly) {
2449
- // Prefer the live pubsub subscriber set when filtering reachability.
2450
- // `uniqueReplicators` is primarily driven by replication messages and can lag during
2451
- // joins/restarts; using subscribers prevents excluding peers that are reachable but
2452
- // whose replication ranges were loaded from disk or haven't been processed yet.
2453
- const subscribers =
2454
- (await this.node.services.pubsub.getSubscribers(this.topic)) ??
2455
- undefined;
2456
- const subscriberHashcodes = subscribers
2457
- ? new Set(subscribers.map((key) => key.hashcode()))
3459
+ const selfHash = this.node.identity.publicKey.hashcode();
3460
+
3461
+ if (options?.reachableOnly) {
3462
+ const directPeers: Map<string, unknown> | undefined = (this.node.services
3463
+ .pubsub as any)?.peers;
3464
+
3465
+ // Prefer the live pubsub subscriber set when filtering reachability. In some
3466
+ // flows peers can be reachable/active even before (or without) subscriber
3467
+ // state converging, so also consider direct pubsub peers.
3468
+ const subscribers =
3469
+ (await this._getTopicSubscribers(this.topic)) ?? undefined;
3470
+ const subscriberHashcodes = subscribers
3471
+ ? new Set(subscribers.map((key) => key.hashcode()))
2458
3472
  : undefined;
2459
3473
 
3474
+ // If reachability is requested but we have no basis for filtering yet
3475
+ // (subscriber snapshot hasn't converged), return the full cover set.
3476
+ // Otherwise, only keep peers we can currently reach.
3477
+ const canFilter =
3478
+ directPeers != null ||
3479
+ (subscriberHashcodes && subscriberHashcodes.size > 0);
3480
+ if (!canFilter) {
3481
+ return [...set];
3482
+ }
3483
+
2460
3484
  const reachable: string[] = [];
2461
- const selfHash = this.node.identity.publicKey.hashcode();
2462
3485
  for (const peer of set) {
2463
3486
  if (peer === selfHash) {
2464
3487
  reachable.push(peer);
2465
3488
  continue;
2466
3489
  }
2467
3490
  if (
2468
- subscriberHashcodes
2469
- ? subscriberHashcodes.has(peer)
2470
- : this.uniqueReplicators.has(peer)
3491
+ (subscriberHashcodes && subscriberHashcodes.has(peer)) ||
3492
+ (directPeers && directPeers.has(peer))
2471
3493
  ) {
2472
3494
  reachable.push(peer);
2473
3495
  }
2474
3496
  }
2475
3497
  return reachable;
2476
- }
3498
+ }
2477
3499
 
2478
- return [...set];
2479
- } catch (error) {
3500
+ return [...set];
3501
+ } catch (error) {
2480
3502
  // Handle race conditions where the index gets closed during the operation
2481
3503
  if (isNotStartedError(error as Error)) {
2482
3504
  return [];
@@ -2497,6 +3519,13 @@ export class SharedLog<
2497
3519
  this.pendingMaturity.clear();
2498
3520
 
2499
3521
  this.distributeQueue?.clear();
3522
+ this._closeFanoutChannel();
3523
+ try {
3524
+ this._providerHandle?.close();
3525
+ } catch {
3526
+ // ignore
3527
+ }
3528
+ this._providerHandle = undefined;
2500
3529
  this.coordinateToHash.clear();
2501
3530
  this.recentlyRebalanced.clear();
2502
3531
  this.uniqueReplicators.clear();
@@ -2513,33 +3542,100 @@ export class SharedLog<
2513
3542
  "unsubscribe",
2514
3543
  this._onUnsubscriptionFn,
2515
3544
  );
3545
+ for (const timer of this._repairRetryTimers) {
3546
+ clearTimeout(timer);
3547
+ }
3548
+ this._repairRetryTimers.clear();
3549
+ this._recentRepairDispatch.clear();
3550
+ this._repairSweepRunning = false;
3551
+ this._repairSweepForceFreshPending = false;
3552
+ this._repairSweepAddedPeersPending.clear();
2516
3553
 
2517
3554
  for (const [_k, v] of this._pendingDeletes) {
2518
3555
  v.clear();
2519
3556
  v.promise.resolve(); // TODO or reject?
2520
3557
  }
2521
- for (const [_k, v] of this._pendingIHave) {
2522
- v.clear();
2523
- }
3558
+ for (const [_k, v] of this._pendingIHave) {
3559
+ v.clear();
3560
+ }
3561
+ for (const [_k, v] of this._checkedPruneRetries) {
3562
+ if (v.timer) clearTimeout(v.timer);
3563
+ }
2524
3564
 
2525
3565
  await this.remoteBlocks.stop();
2526
- this._pendingDeletes.clear();
2527
- this._pendingIHave.clear();
2528
- this.latestReplicationInfoMessage.clear();
2529
- this._gidPeersHistory.clear();
2530
- this._requestIPruneSent.clear();
2531
- this._requestIPruneResponseReplicatorSet.clear();
2532
- this.pruneDebouncedFn = undefined as any;
2533
- this.rebalanceParticipationDebounced = undefined;
2534
- this._replicationRangeIndex.stop();
2535
- this._entryCoordinatesIndex.stop();
3566
+ this._pendingDeletes.clear();
3567
+ this._pendingIHave.clear();
3568
+ this._checkedPruneRetries.clear();
3569
+ this.latestReplicationInfoMessage.clear();
3570
+ this._gidPeersHistory.clear();
3571
+ this._requestIPruneSent.clear();
3572
+ this._requestIPruneResponseReplicatorSet.clear();
3573
+ // Cancel any pending debounced timers so they can't fire after we've torn down
3574
+ // indexes/RPC state.
3575
+ this.rebalanceParticipationDebounced?.close();
3576
+ this.replicationChangeDebounceFn?.close?.();
3577
+ this.pruneDebouncedFn?.close?.();
3578
+ this.responseToPruneDebouncedFn?.close?.();
3579
+ this.pruneDebouncedFn = undefined as any;
3580
+ this.rebalanceParticipationDebounced = undefined;
3581
+ this._replicationRangeIndex.stop();
3582
+ this._entryCoordinatesIndex.stop();
2536
3583
  this._replicationRangeIndex = undefined as any;
2537
3584
  this._entryCoordinatesIndex = undefined as any;
2538
3585
 
2539
3586
  this.cpuUsage?.stop?.();
2540
3587
  /* this._totalParticipation = 0; */
2541
3588
  }
2542
- async close(from?: Program): Promise<boolean> {
3589
+ async close(from?: Program): Promise<boolean> {
3590
+ // Best-effort: announce that we are going offline before tearing down
3591
+ // RPC/subscription state.
3592
+ //
3593
+ // Important: do not delete our local replication ranges here. Keeping them
3594
+ // allows `replicate: { type: "resume" }` to restore the previous role on
3595
+ // restart. Explicit `unreplicate()` still clears local state.
3596
+ try {
3597
+ if (!this.closed) {
3598
+ // Prevent any late debounced timers (rebalance/prune) from publishing
3599
+ // replication info after we announce "segments: []". These races can leave
3600
+ // stale segments on remotes after rapid open/close cycles.
3601
+ this._isReplicating = false;
3602
+ this._isAdaptiveReplicating = false;
3603
+ this.rebalanceParticipationDebounced?.close();
3604
+ this.replicationChangeDebounceFn?.close?.();
3605
+ this.pruneDebouncedFn?.close?.();
3606
+ this.responseToPruneDebouncedFn?.close?.();
3607
+
3608
+ // Ensure the "I'm leaving" replication reset is actually published before
3609
+ // the RPC child program closes and unsubscribes from its topic. If we fire
3610
+ // and forget here, the publish can race with `super.close()` and get dropped,
3611
+ // leaving stale replication segments on remotes (flaky join/leave tests).
3612
+ // Also ensure close is bounded even when shard overlays are mid-reconcile.
3613
+ const abort = new AbortController();
3614
+ const abortTimer = setTimeout(() => {
3615
+ try {
3616
+ abort.abort(
3617
+ new TimeoutError(
3618
+ "shared-log close replication reset timed out",
3619
+ ),
3620
+ );
3621
+ } catch {
3622
+ abort.abort();
3623
+ }
3624
+ }, 2_000);
3625
+ try {
3626
+ await this.rpc
3627
+ .send(new AllReplicatingSegmentsMessage({ segments: [] }), {
3628
+ priority: 1,
3629
+ signal: abort.signal,
3630
+ })
3631
+ .catch(() => {});
3632
+ } finally {
3633
+ clearTimeout(abortTimer);
3634
+ }
3635
+ }
3636
+ } catch {
3637
+ // ignore: close should be resilient even if we were never fully started
3638
+ }
2543
3639
  const superClosed = await super.close(from);
2544
3640
  if (!superClosed) {
2545
3641
  return superClosed;
@@ -2549,12 +3645,50 @@ export class SharedLog<
2549
3645
  return true;
2550
3646
  }
2551
3647
 
2552
- async drop(from?: Program): Promise<boolean> {
2553
- const superDropped = await super.drop(from);
2554
- if (!superDropped) {
2555
- return superDropped;
2556
- }
2557
- await this._entryCoordinatesIndex.drop();
3648
+ async drop(from?: Program): Promise<boolean> {
3649
+ // Best-effort: announce that we are going offline before tearing down
3650
+ // RPC/subscription state (same reasoning as in `close()`).
3651
+ try {
3652
+ if (!this.closed) {
3653
+ this._isReplicating = false;
3654
+ this._isAdaptiveReplicating = false;
3655
+ this.rebalanceParticipationDebounced?.close();
3656
+ this.replicationChangeDebounceFn?.close?.();
3657
+ this.pruneDebouncedFn?.close?.();
3658
+ this.responseToPruneDebouncedFn?.close?.();
3659
+
3660
+ const abort = new AbortController();
3661
+ const abortTimer = setTimeout(() => {
3662
+ try {
3663
+ abort.abort(
3664
+ new TimeoutError(
3665
+ "shared-log drop replication reset timed out",
3666
+ ),
3667
+ );
3668
+ } catch {
3669
+ abort.abort();
3670
+ }
3671
+ }, 2_000);
3672
+ try {
3673
+ await this.rpc
3674
+ .send(new AllReplicatingSegmentsMessage({ segments: [] }), {
3675
+ priority: 1,
3676
+ signal: abort.signal,
3677
+ })
3678
+ .catch(() => {});
3679
+ } finally {
3680
+ clearTimeout(abortTimer);
3681
+ }
3682
+ }
3683
+ } catch {
3684
+ // ignore: drop should be resilient even if we were never fully started
3685
+ }
3686
+
3687
+ const superDropped = await super.drop(from);
3688
+ if (!superDropped) {
3689
+ return superDropped;
3690
+ }
3691
+ await this._entryCoordinatesIndex.drop();
2558
3692
  await this._replicationRangeIndex.drop();
2559
3693
  await this.log.drop();
2560
3694
  await this._close();
@@ -2609,7 +3743,6 @@ export class SharedLog<
2609
3743
  if (filteredHeads.length === 0) {
2610
3744
  return;
2611
3745
  }
2612
-
2613
3746
  const groupedByGid = await groupByGid(filteredHeads);
2614
3747
  const promises: Promise<void>[] = [];
2615
3748
 
@@ -2921,20 +4054,20 @@ export class SharedLog<
2921
4054
  return;
2922
4055
  }
2923
4056
 
2924
- const segments = (await this.getMyReplicationSegments()).map((x) =>
2925
- x.toReplicationRange(),
2926
- );
4057
+ const segments = (await this.getMyReplicationSegments()).map((x) =>
4058
+ x.toReplicationRange(),
4059
+ );
2927
4060
 
2928
- this.rpc
2929
- .send(new AllReplicatingSegmentsMessage({ segments }), {
2930
- mode: new SeekDelivery({ to: [context.from], redundancy: 1 }),
2931
- })
2932
- .catch((e) => logger.error(e.toString()));
4061
+ this.rpc
4062
+ .send(new AllReplicatingSegmentsMessage({ segments }), {
4063
+ mode: new AcknowledgeDelivery({ to: [context.from], redundancy: 1 }),
4064
+ })
4065
+ .catch((e) => logger.error(e.toString()));
2933
4066
 
2934
- // for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
2935
- if (this.v8Behaviour) {
2936
- const role = this.getRole();
2937
- if (role instanceof Replicator) {
4067
+ // for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
4068
+ if (this.v8Behaviour) {
4069
+ const role = this.getRole();
4070
+ if (role instanceof Replicator) {
2938
4071
  const fixedSettings = !this._isAdaptiveReplicating;
2939
4072
  if (fixedSettings) {
2940
4073
  await this.rpc.send(
@@ -2959,71 +4092,91 @@ export class SharedLog<
2959
4092
  return;
2960
4093
  }
2961
4094
 
2962
- const replicationInfoMessage = msg as
2963
- | AllReplicatingSegmentsMessage
2964
- | AddedReplicationSegmentMessage;
2965
-
2966
- // Process replication updates even if the sender isn't yet considered "ready" by
2967
- // `Program.waitFor()`. Dropping these messages can lead to missing replicator info
2968
- // (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
2969
- const from = context.from!;
2970
- const messageTimestamp = context.message.header.timestamp;
2971
- (async () => {
2972
- const prev = this.latestReplicationInfoMessage.get(from.hashcode());
2973
- if (prev && prev > messageTimestamp) {
4095
+ const replicationInfoMessage = msg as
4096
+ | AllReplicatingSegmentsMessage
4097
+ | AddedReplicationSegmentMessage;
4098
+
4099
+ // Process replication updates even if the sender isn't yet considered "ready" by
4100
+ // `Program.waitFor()`. Dropping these messages can lead to missing replicator info
4101
+ // (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
4102
+ const from = context.from!;
4103
+ const fromHash = from.hashcode();
4104
+ if (this._replicationInfoBlockedPeers.has(fromHash)) {
2974
4105
  return;
2975
4106
  }
4107
+ const messageTimestamp = context.message.header.timestamp;
4108
+ await this.withReplicationInfoApplyQueue(fromHash, async () => {
4109
+ try {
4110
+ // The peer may have unsubscribed after this message was queued.
4111
+ if (this._replicationInfoBlockedPeers.has(fromHash)) {
4112
+ return;
4113
+ }
2976
4114
 
2977
- this.latestReplicationInfoMessage.set(
2978
- from.hashcode(),
2979
- messageTimestamp,
2980
- );
4115
+ // Process in-order to avoid races where repeated reset messages arrive
4116
+ // concurrently and trigger spurious "added" diffs / rebalancing.
4117
+ const prev = this.latestReplicationInfoMessage.get(fromHash);
4118
+ if (prev && prev > messageTimestamp) {
4119
+ return;
4120
+ }
2981
4121
 
2982
- if (this.closed) {
2983
- return;
2984
- }
4122
+ this.latestReplicationInfoMessage.set(fromHash, messageTimestamp);
2985
4123
 
2986
- const reset = msg instanceof AllReplicatingSegmentsMessage;
2987
- await this.addReplicationRange(
2988
- replicationInfoMessage.segments.map((x) =>
2989
- x.toReplicationRangeIndexable(from),
2990
- ),
2991
- from,
2992
- {
2993
- reset,
2994
- checkDuplicates: true,
2995
- timestamp: Number(messageTimestamp),
2996
- },
2997
- );
2998
- })().catch((e) => {
2999
- if (isNotStartedError(e)) {
3000
- return;
3001
- }
3002
- logger.error(
3003
- `Failed to apply replication settings from '${from.hashcode()}': ${
3004
- e?.message ?? e
3005
- }`,
3006
- );
3007
- });
3008
- } else if (msg instanceof StoppedReplicating) {
3009
- if (context.from.equals(this.node.identity.publicKey)) {
3010
- return;
3011
- }
4124
+ if (this.closed) {
4125
+ return;
4126
+ }
3012
4127
 
3013
- const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(
3014
- msg.segmentIds,
3015
- context.from,
3016
- );
4128
+ const reset = msg instanceof AllReplicatingSegmentsMessage;
4129
+ await this.addReplicationRange(
4130
+ replicationInfoMessage.segments.map((x) =>
4131
+ x.toReplicationRangeIndexable(from),
4132
+ ),
4133
+ from,
4134
+ {
4135
+ reset,
4136
+ checkDuplicates: true,
4137
+ timestamp: Number(messageTimestamp),
4138
+ },
4139
+ );
3017
4140
 
3018
- await this.removeReplicationRanges(rangesToRemove, context.from);
3019
- const timestamp = BigInt(+new Date());
3020
- for (const range of rangesToRemove) {
3021
- this.replicationChangeDebounceFn.add({
3022
- range,
3023
- type: "removed",
3024
- timestamp,
4141
+ // If the peer reports any replication segments, stop re-requesting.
4142
+ // (Empty reports can be transient during startup.)
4143
+ if (replicationInfoMessage.segments.length > 0) {
4144
+ this.cancelReplicationInfoRequests(fromHash);
4145
+ }
4146
+ } catch (e) {
4147
+ if (isNotStartedError(e as Error)) {
4148
+ return;
4149
+ }
4150
+ logger.error(
4151
+ `Failed to apply replication settings from '${fromHash}': ${
4152
+ (e as any)?.message ?? e
4153
+ }`,
4154
+ );
4155
+ }
3025
4156
  });
3026
- }
4157
+ } else if (msg instanceof StoppedReplicating) {
4158
+ if (context.from.equals(this.node.identity.publicKey)) {
4159
+ return;
4160
+ }
4161
+ const fromHash = context.from.hashcode();
4162
+ if (this._replicationInfoBlockedPeers.has(fromHash)) {
4163
+ return;
4164
+ }
4165
+
4166
+ const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(
4167
+ msg.segmentIds,
4168
+ context.from,
4169
+ );
4170
+
4171
+ await this.removeReplicationRanges(rangesToRemove, context.from);
4172
+ const timestamp = BigInt(+new Date());
4173
+ for (const range of rangesToRemove) {
4174
+ this.replicationChangeDebounceFn.add({
4175
+ range,
4176
+ type: "removed",
4177
+ timestamp,
4178
+ });
4179
+ }
3027
4180
  } else {
3028
4181
  throw new Error("Unexpected message");
3029
4182
  }
@@ -3325,10 +4478,10 @@ export class SharedLog<
3325
4478
  }
3326
4479
  }
3327
4480
 
3328
- async waitForReplicator(
3329
- key: PublicSignKey,
3330
- options?: {
3331
- signal?: AbortSignal;
4481
+ async waitForReplicator(
4482
+ key: PublicSignKey,
4483
+ options?: {
4484
+ signal?: AbortSignal;
3332
4485
  eager?: boolean;
3333
4486
  roleAge?: number;
3334
4487
  timeout?: number;
@@ -3340,9 +4493,9 @@ export class SharedLog<
3340
4493
  ? undefined
3341
4494
  : (options?.roleAge ?? (await this.getDefaultMinRoleAge()));
3342
4495
 
3343
- let settled = false;
3344
- let timer: ReturnType<typeof setTimeout> | undefined;
3345
- let requestTimer: ReturnType<typeof setTimeout> | undefined;
4496
+ let settled = false;
4497
+ let timer: ReturnType<typeof setTimeout> | undefined;
4498
+ let requestTimer: ReturnType<typeof setTimeout> | undefined;
3346
4499
 
3347
4500
  const clear = () => {
3348
4501
  this.events.removeEventListener("replicator:mature", check);
@@ -3358,14 +4511,19 @@ export class SharedLog<
3358
4511
  }
3359
4512
  };
3360
4513
 
3361
- const resolve = () => {
3362
- if (settled) {
3363
- return;
3364
- }
3365
- settled = true;
3366
- clear();
3367
- deferred.resolve();
3368
- };
4514
+ const resolve = async () => {
4515
+ if (settled) {
4516
+ return;
4517
+ }
4518
+ settled = true;
4519
+ clear();
4520
+ // `waitForReplicator()` is typically used as a precondition before join/replicate
4521
+ // flows. A replicator can become mature and enqueue a debounced rebalance
4522
+ // (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
4523
+ // observe a "late" rebalance after the wait resolves.
4524
+ await this.replicationChangeDebounceFn?.flush?.();
4525
+ deferred.resolve();
4526
+ };
3369
4527
 
3370
4528
  const reject = (error: Error) => {
3371
4529
  if (settled) {
@@ -3409,13 +4567,14 @@ export class SharedLog<
3409
4567
 
3410
4568
  this.rpc
3411
4569
  .send(new RequestReplicationInfoMessage(), {
3412
- mode: new SeekDelivery({ redundancy: 1, to: [key] }),
4570
+ mode: new AcknowledgeDelivery({ redundancy: 1, to: [key] }),
3413
4571
  })
3414
4572
  .catch((e) => {
3415
4573
  // Best-effort: missing peers / unopened RPC should not fail the wait logic.
3416
4574
  if (isNotStartedError(e as Error)) {
3417
4575
  return;
3418
4576
  }
4577
+ logger.error(e?.toString?.() ?? String(e));
3419
4578
  });
3420
4579
 
3421
4580
  if (requestAttempts < maxRequestAttempts) {
@@ -3423,29 +4582,29 @@ export class SharedLog<
3423
4582
  }
3424
4583
  };
3425
4584
 
3426
- const check = async () => {
3427
- const iterator = this.replicationIndex?.iterate(
3428
- { query: new StringMatch({ key: "hash", value: key.hashcode() }) },
3429
- { reference: true },
3430
- );
3431
- try {
3432
- const rects = await iterator?.next(1);
3433
- const rect = rects?.[0]?.value;
3434
- if (!rect) {
3435
- return;
3436
- }
3437
- if (!options?.eager && resolvedRoleAge != null) {
3438
- if (!isMatured(rect, +new Date(), resolvedRoleAge)) {
4585
+ const check = async () => {
4586
+ const iterator = this.replicationIndex?.iterate(
4587
+ { query: new StringMatch({ key: "hash", value: key.hashcode() }) },
4588
+ { reference: true },
4589
+ );
4590
+ try {
4591
+ const rects = await iterator?.next(1);
4592
+ const rect = rects?.[0]?.value;
4593
+ if (!rect) {
3439
4594
  return;
3440
4595
  }
4596
+ if (!options?.eager && resolvedRoleAge != null) {
4597
+ if (!isMatured(rect, +new Date(), resolvedRoleAge)) {
4598
+ return;
4599
+ }
4600
+ }
4601
+ await resolve();
4602
+ } catch (error) {
4603
+ reject(error instanceof Error ? error : new Error(String(error)));
4604
+ } finally {
4605
+ await iterator?.close();
3441
4606
  }
3442
- resolve();
3443
- } catch (error) {
3444
- reject(error instanceof Error ? error : new Error(String(error)));
3445
- } finally {
3446
- await iterator?.close();
3447
- }
3448
- };
4607
+ };
3449
4608
 
3450
4609
  requestReplicationInfo();
3451
4610
  check();
@@ -3462,15 +4621,6 @@ export class SharedLog<
3462
4621
  coverageThreshold?: number;
3463
4622
  waitForNewPeers?: boolean;
3464
4623
  }) {
3465
- // if no remotes, just return
3466
- const subscribers = await this.node.services.pubsub.getSubscribers(
3467
- this.rpc.topic,
3468
- );
3469
- let waitForNewPeers = options?.waitForNewPeers;
3470
- if (!waitForNewPeers && (subscribers?.length ?? 0) === 0) {
3471
- throw new NoPeersError(this.rpc.topic);
3472
- }
3473
-
3474
4624
  let coverageThreshold = options?.coverageThreshold ?? 1;
3475
4625
  let deferred = pDefer<void>();
3476
4626
  let settled = false;
@@ -3584,6 +4734,7 @@ export class SharedLog<
3584
4734
  const timeout = options.timeout ?? this.waitForReplicatorTimeout;
3585
4735
 
3586
4736
  return new Promise((resolve, reject) => {
4737
+ let settled = false;
3587
4738
  const removeListeners = () => {
3588
4739
  this.events.removeEventListener("replication:change", roleListener);
3589
4740
  this.events.removeEventListener("replicator:mature", roleListener); // TODO replication:change event ?
@@ -3592,15 +4743,26 @@ export class SharedLog<
3592
4743
  abortListener,
3593
4744
  );
3594
4745
  };
3595
- const abortListener = () => {
4746
+ const settleResolve = (value: Map<string, { intersecting: boolean }> | false) => {
4747
+ if (settled) return;
4748
+ settled = true;
4749
+ removeListeners();
4750
+ clearTimeout(timer);
4751
+ resolve(value);
4752
+ };
4753
+ const settleReject = (error: unknown) => {
4754
+ if (settled) return;
4755
+ settled = true;
3596
4756
  removeListeners();
3597
4757
  clearTimeout(timer);
3598
- resolve(false);
4758
+ reject(error);
4759
+ };
4760
+ const abortListener = () => {
4761
+ settleResolve(false);
3599
4762
  };
3600
4763
 
3601
4764
  const timer = setTimeout(async () => {
3602
- removeListeners();
3603
- resolve(false);
4765
+ settleResolve(false);
3604
4766
  }, timeout);
3605
4767
 
3606
4768
  const check = async () => {
@@ -3624,19 +4786,22 @@ export class SharedLog<
3624
4786
  }
3625
4787
  options?.onLeader && leaderKeys.forEach(options.onLeader);
3626
4788
 
3627
- removeListeners();
3628
- clearTimeout(timer);
3629
- resolve(leaders);
4789
+ settleResolve(leaders);
4790
+ };
4791
+ const runCheck = () => {
4792
+ void check().catch((error) => {
4793
+ settleReject(error);
4794
+ });
3630
4795
  };
3631
4796
 
3632
4797
  const roleListener = () => {
3633
- check();
4798
+ runCheck();
3634
4799
  };
3635
4800
 
3636
4801
  this.events.addEventListener("replication:change", roleListener); // TODO replication:change event ?
3637
4802
  this.events.addEventListener("replicator:mature", roleListener); // TODO replication:change event ?
3638
4803
  this._closeController.signal.addEventListener("abort", abortListener);
3639
- check();
4804
+ runCheck();
3640
4805
  });
3641
4806
  }
3642
4807
 
@@ -3735,9 +4900,7 @@ export class SharedLog<
3735
4900
  let subscribers = 1;
3736
4901
  if (!this.rpc.closed) {
3737
4902
  try {
3738
- subscribers =
3739
- (await this.node.services.pubsub.getSubscribers(this.rpc.topic))
3740
- ?.length ?? 1;
4903
+ subscribers = (await this._getTopicSubscribers(this.rpc.topic))?.length ?? 1;
3741
4904
  } catch {
3742
4905
  // Best-effort only; fall back to 1.
3743
4906
  }
@@ -3852,22 +5015,45 @@ export class SharedLog<
3852
5015
  const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
3853
5016
  const selfHash = this.node.identity.publicKey.hashcode();
3854
5017
 
3855
- // Use `uniqueReplicators` (replicator cache) once we've reconciled it against the
3856
- // persisted replication index. Until then, fall back to live pubsub subscribers
3857
- // and avoid relying on `uniqueReplicators` being complete.
5018
+ // Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
5019
+ // If it is still warming up (for example, only contains self), supplement with
5020
+ // current subscribers until we have enough candidates for this decision.
3858
5021
  let peerFilter: Set<string> | undefined = undefined;
3859
- if (this._replicatorsReconciled && this.uniqueReplicators.size > 0) {
3860
- peerFilter = this.uniqueReplicators.has(selfHash)
3861
- ? this.uniqueReplicators
3862
- : new Set([...this.uniqueReplicators, selfHash]);
5022
+ const selfReplicating = await this.isReplicating();
5023
+ if (this.uniqueReplicators.size > 0) {
5024
+ peerFilter = new Set(this.uniqueReplicators);
5025
+ if (selfReplicating) {
5026
+ peerFilter.add(selfHash);
5027
+ } else {
5028
+ peerFilter.delete(selfHash);
5029
+ }
5030
+
5031
+ try {
5032
+ const subscribers = await this._getTopicSubscribers(this.topic);
5033
+ if (subscribers && subscribers.length > 0) {
5034
+ for (const subscriber of subscribers) {
5035
+ peerFilter.add(subscriber.hashcode());
5036
+ }
5037
+ if (selfReplicating) {
5038
+ peerFilter.add(selfHash);
5039
+ } else {
5040
+ peerFilter.delete(selfHash);
5041
+ }
5042
+ }
5043
+ } catch {
5044
+ // Best-effort only; keep current peerFilter.
5045
+ }
3863
5046
  } else {
3864
5047
  try {
3865
5048
  const subscribers =
3866
- (await this.node.services.pubsub.getSubscribers(this.topic)) ??
3867
- undefined;
5049
+ (await this._getTopicSubscribers(this.topic)) ?? undefined;
3868
5050
  if (subscribers && subscribers.length > 0) {
3869
5051
  peerFilter = new Set(subscribers.map((key) => key.hashcode()));
3870
- peerFilter.add(selfHash);
5052
+ if (selfReplicating) {
5053
+ peerFilter.add(selfHash);
5054
+ } else {
5055
+ peerFilter.delete(selfHash);
5056
+ }
3871
5057
  }
3872
5058
  } catch {
3873
5059
  // Best-effort only; if pubsub isn't ready, do a full scan.
@@ -3913,76 +5099,171 @@ export class SharedLog<
3913
5099
  );
3914
5100
  }
3915
5101
 
3916
- async handleSubscriptionChange(
3917
- publicKey: PublicSignKey,
3918
- topics: string[],
3919
- subscribed: boolean,
3920
- ) {
3921
- if (!topics.includes(this.topic)) {
5102
+ private withReplicationInfoApplyQueue(
5103
+ peerHash: string,
5104
+ fn: () => Promise<void>,
5105
+ ): Promise<void> {
5106
+ const prev = this._replicationInfoApplyQueueByPeer.get(peerHash);
5107
+ const next = (prev ?? Promise.resolve())
5108
+ .catch(() => {
5109
+ // Avoid stuck queues if a previous apply failed.
5110
+ })
5111
+ .then(fn);
5112
+ this._replicationInfoApplyQueueByPeer.set(peerHash, next);
5113
+ return next.finally(() => {
5114
+ if (this._replicationInfoApplyQueueByPeer.get(peerHash) === next) {
5115
+ this._replicationInfoApplyQueueByPeer.delete(peerHash);
5116
+ }
5117
+ });
5118
+ }
5119
+
5120
+ private cancelReplicationInfoRequests(peerHash: string) {
5121
+ const state = this._replicationInfoRequestByPeer.get(peerHash);
5122
+ if (!state) return;
5123
+ if (state.timer) {
5124
+ clearTimeout(state.timer);
5125
+ }
5126
+ this._replicationInfoRequestByPeer.delete(peerHash);
5127
+ }
5128
+
5129
+ private scheduleReplicationInfoRequests(peer: PublicSignKey) {
5130
+ const peerHash = peer.hashcode();
5131
+ if (this._replicationInfoRequestByPeer.has(peerHash)) {
3922
5132
  return;
3923
5133
  }
3924
5134
 
3925
- if (!subscribed) {
3926
- this.removePeerFromGidPeerHistory(publicKey.hashcode());
5135
+ const state: { attempts: number; timer?: ReturnType<typeof setTimeout> } = {
5136
+ attempts: 0,
5137
+ };
5138
+ this._replicationInfoRequestByPeer.set(peerHash, state);
3927
5139
 
3928
- for (const [k, v] of this._requestIPruneSent) {
3929
- v.delete(publicKey.hashcode());
3930
- if (v.size === 0) {
3931
- this._requestIPruneSent.delete(k);
3932
- }
5140
+ const intervalMs = Math.max(50, this.waitForReplicatorRequestIntervalMs);
5141
+ const maxAttempts = Math.min(
5142
+ 5,
5143
+ this.waitForReplicatorRequestMaxAttempts ??
5144
+ WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS,
5145
+ );
5146
+
5147
+ const tick = () => {
5148
+ if (this.closed || this._closeController.signal.aborted) {
5149
+ this.cancelReplicationInfoRequests(peerHash);
5150
+ return;
3933
5151
  }
3934
5152
 
3935
- for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
3936
- v.delete(publicKey.hashcode());
3937
- if (v.size === 0) {
3938
- this._requestIPruneResponseReplicatorSet.delete(k);
3939
- }
5153
+ state.attempts++;
5154
+
5155
+ this.rpc
5156
+ .send(new RequestReplicationInfoMessage(), {
5157
+ mode: new AcknowledgeDelivery({ redundancy: 1, to: [peer] }),
5158
+ })
5159
+ .catch((e) => {
5160
+ // Best-effort: missing peers / unopened RPC should not fail join flows.
5161
+ if (isNotStartedError(e as Error)) {
5162
+ return;
5163
+ }
5164
+ logger.error(e?.toString?.() ?? String(e));
5165
+ });
5166
+
5167
+ if (state.attempts >= maxAttempts) {
5168
+ this.cancelReplicationInfoRequests(peerHash);
5169
+ return;
3940
5170
  }
3941
5171
 
3942
- this.syncronizer.onPeerDisconnected(publicKey);
5172
+ state.timer = setTimeout(tick, intervalMs);
5173
+ state.timer.unref?.();
5174
+ };
3943
5175
 
3944
- (await this.replicationIndex.count({
3945
- query: { hash: publicKey.hashcode() },
3946
- })) > 0 &&
3947
- this.events.dispatchEvent(
3948
- new CustomEvent<ReplicatorLeaveEvent>("replicator:leave", {
3949
- detail: { publicKey },
3950
- }),
3951
- );
3952
- }
5176
+ tick();
5177
+ }
3953
5178
 
3954
- if (subscribed) {
3955
- const replicationSegments = await this.getMyReplicationSegments();
3956
- if (replicationSegments.length > 0) {
3957
- this.rpc
3958
- .send(
3959
- new AllReplicatingSegmentsMessage({
3960
- segments: replicationSegments.map((x) => x.toReplicationRange()),
5179
+ async handleSubscriptionChange(
5180
+ publicKey: PublicSignKey,
5181
+ topics: string[],
5182
+ subscribed: boolean,
5183
+ ) {
5184
+ if (!topics.includes(this.topic)) {
5185
+ return;
5186
+ }
5187
+
5188
+ const peerHash = publicKey.hashcode();
5189
+ if (subscribed) {
5190
+ this._replicationInfoBlockedPeers.delete(peerHash);
5191
+ } else {
5192
+ this._replicationInfoBlockedPeers.add(peerHash);
5193
+ }
5194
+
5195
+ if (!subscribed) {
5196
+ const wasReplicator = this.uniqueReplicators.has(peerHash);
5197
+ try {
5198
+ // Unsubscribe can race with the peer's final replication reset message.
5199
+ // Proactively evict its ranges so leader selection doesn't keep stale owners.
5200
+ await this.removeReplicator(publicKey, { noEvent: true });
5201
+ } catch (error) {
5202
+ if (!isNotStartedError(error as Error)) {
5203
+ throw error;
5204
+ }
5205
+ }
5206
+
5207
+ // Emit replicator:leave at most once per (join -> leave) transition, even if we
5208
+ // concurrently process unsubscribe + replication reset messages for the same peer.
5209
+ const stoppedTransition = wasReplicator;
5210
+ this._replicatorJoinEmitted.delete(peerHash);
5211
+
5212
+ this.cancelReplicationInfoRequests(peerHash);
5213
+ this.removePeerFromGidPeerHistory(peerHash);
5214
+
5215
+ for (const [k, v] of this._requestIPruneSent) {
5216
+ v.delete(peerHash);
5217
+ if (v.size === 0) {
5218
+ this._requestIPruneSent.delete(k);
5219
+ }
5220
+ }
5221
+
5222
+ for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
5223
+ v.delete(peerHash);
5224
+ if (v.size === 0) {
5225
+ this._requestIPruneResponseReplicatorSet.delete(k);
5226
+ }
5227
+ }
5228
+
5229
+ this.syncronizer.onPeerDisconnected(publicKey);
5230
+
5231
+ stoppedTransition &&
5232
+ this.events.dispatchEvent(
5233
+ new CustomEvent<ReplicatorLeaveEvent>("replicator:leave", {
5234
+ detail: { publicKey },
3961
5235
  }),
3962
- {
3963
- mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
3964
- },
3965
- )
3966
- .catch((e) => logger.error(e.toString()));
5236
+ );
5237
+ }
3967
5238
 
3968
- if (this.v8Behaviour) {
3969
- // for backwards compatibility
5239
+ if (subscribed) {
5240
+ const replicationSegments = await this.getMyReplicationSegments();
5241
+ if (replicationSegments.length > 0) {
3970
5242
  this.rpc
3971
- .send(new ResponseRoleMessage({ role: await this.getRole() }), {
3972
- mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
3973
- })
5243
+ .send(
5244
+ new AllReplicatingSegmentsMessage({
5245
+ segments: replicationSegments.map((x) => x.toReplicationRange()),
5246
+ }),
5247
+ {
5248
+ mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
5249
+ },
5250
+ )
3974
5251
  .catch((e) => logger.error(e.toString()));
5252
+
5253
+ if (this.v8Behaviour) {
5254
+ // for backwards compatibility
5255
+ this.rpc
5256
+ .send(new ResponseRoleMessage({ role: await this.getRole() }), {
5257
+ mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
5258
+ })
5259
+ .catch((e) => logger.error(e.toString()));
5260
+ }
3975
5261
  }
3976
- }
3977
5262
 
3978
- // Request the remote peer's replication info. This makes joins resilient to
3979
- // timing-sensitive delivery/order issues where we may miss their initial
5263
+ // Request the remote peer's replication info. This makes joins resilient to
5264
+ // timing-sensitive delivery/order issues where we may miss their initial
3980
5265
  // replication announcement.
3981
- this.rpc
3982
- .send(new RequestReplicationInfoMessage(), {
3983
- mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
3984
- })
3985
- .catch((e) => logger.error(e.toString()));
5266
+ this.scheduleReplicationInfoRequests(publicKey);
3986
5267
  } else {
3987
5268
  await this.removeReplicator(publicKey);
3988
5269
  }
@@ -4025,8 +5306,8 @@ export class SharedLog<
4025
5306
  leaders: Map<string, unknown> | Set<string>;
4026
5307
  }
4027
5308
  >,
4028
- options?: { timeout?: number; unchecked?: boolean },
4029
- ): Promise<any>[] {
5309
+ options?: { timeout?: number; unchecked?: boolean },
5310
+ ): Promise<any>[] {
4030
5311
  if (options?.unchecked) {
4031
5312
  return [...entries.values()].map((x) => {
4032
5313
  this._gidPeersHistory.delete(x.entry.meta.gid);
@@ -4051,30 +5332,57 @@ export class SharedLog<
4051
5332
  // - An entry is joined, where min replicas is lower than before (for all heads for this particular gid) and therefore we are not replicating anymore for this particular gid
4052
5333
  // - Peers join and leave, which means we might not be a replicator anymore
4053
5334
 
4054
- const promises: Promise<any>[] = [];
5335
+ const promises: Promise<any>[] = [];
4055
5336
 
4056
- let peerToEntries: Map<string, string[]> = new Map();
4057
- let cleanupTimer: ReturnType<typeof setTimeout>[] = [];
5337
+ let peerToEntries: Map<string, string[]> = new Map();
5338
+ let cleanupTimer: ReturnType<typeof setTimeout>[] = [];
5339
+ const explicitTimeout = options?.timeout != null;
4058
5340
 
4059
- for (const { entry, leaders } of entries.values()) {
4060
- for (const leader of leaders.keys()) {
4061
- let set = peerToEntries.get(leader);
4062
- if (!set) {
4063
- set = [];
4064
- peerToEntries.set(leader, set);
4065
- }
5341
+ for (const { entry, leaders } of entries.values()) {
5342
+ for (const leader of leaders.keys()) {
5343
+ let set = peerToEntries.get(leader);
5344
+ if (!set) {
5345
+ set = [];
5346
+ peerToEntries.set(leader, set);
5347
+ }
4066
5348
 
4067
- set.push(entry.hash);
4068
- }
5349
+ set.push(entry.hash);
5350
+ }
4069
5351
 
4070
- const pendingPrev = this._pendingDeletes.get(entry.hash);
4071
- if (pendingPrev) {
4072
- promises.push(pendingPrev.promise.promise);
4073
- continue;
4074
- }
5352
+ const pendingPrev = this._pendingDeletes.get(entry.hash);
5353
+ if (pendingPrev) {
5354
+ // If a background prune is already in-flight, an explicit prune request should
5355
+ // still respect the caller's timeout. Otherwise, tests (and user calls) can
5356
+ // block on the longer "checked prune" timeout derived from
5357
+ // `_respondToIHaveTimeout + waitForReplicatorTimeout`, which is intentionally
5358
+ // large for resiliency.
5359
+ if (explicitTimeout) {
5360
+ const timeoutMs = Math.max(0, Math.floor(options?.timeout ?? 0));
5361
+ promises.push(
5362
+ new Promise((resolve, reject) => {
5363
+ // Mirror the checked-prune error prefix so existing callers/tests can
5364
+ // match on the message substring.
5365
+ const timer = setTimeout(() => {
5366
+ reject(
5367
+ new Error(
5368
+ `Timeout for checked pruning after ${timeoutMs}ms (pending=true closed=${this.closed})`,
5369
+ ),
5370
+ );
5371
+ }, timeoutMs);
5372
+ timer.unref?.();
5373
+ pendingPrev.promise.promise
5374
+ .then(resolve, reject)
5375
+ .finally(() => clearTimeout(timer));
5376
+ }),
5377
+ );
5378
+ } else {
5379
+ promises.push(pendingPrev.promise.promise);
5380
+ }
5381
+ continue;
5382
+ }
4075
5383
 
4076
- const minReplicas = decodeReplicas(entry);
4077
- const deferredPromise: DeferredPromise<void> = pDefer();
5384
+ const minReplicas = decodeReplicas(entry);
5385
+ const deferredPromise: DeferredPromise<void> = pDefer();
4078
5386
 
4079
5387
  const clear = () => {
4080
5388
  const pending = this._pendingDeletes.get(entry.hash);
@@ -4084,12 +5392,13 @@ export class SharedLog<
4084
5392
  clearTimeout(timeout);
4085
5393
  };
4086
5394
 
4087
- const resolve = () => {
4088
- clear();
4089
- cleanupTimer.push(
4090
- setTimeout(async () => {
4091
- this._gidPeersHistory.delete(entry.meta.gid);
4092
- this.removePruneRequestSent(entry.hash);
5395
+ const resolve = () => {
5396
+ clear();
5397
+ this.clearCheckedPruneRetry(entry.hash);
5398
+ cleanupTimer.push(
5399
+ setTimeout(async () => {
5400
+ this._gidPeersHistory.delete(entry.meta.gid);
5401
+ this.removePruneRequestSent(entry.hash);
4093
5402
  this._requestIPruneResponseReplicatorSet.delete(entry.hash);
4094
5403
 
4095
5404
  if (
@@ -4133,12 +5442,19 @@ export class SharedLog<
4133
5442
  );
4134
5443
  };
4135
5444
 
4136
- const reject = (e: any) => {
4137
- clear();
4138
- this.removePruneRequestSent(entry.hash);
4139
- this._requestIPruneResponseReplicatorSet.delete(entry.hash);
4140
- deferredPromise.reject(e);
4141
- };
5445
+ const reject = (e: any) => {
5446
+ clear();
5447
+ const isCheckedPruneTimeout =
5448
+ e instanceof Error &&
5449
+ typeof e.message === "string" &&
5450
+ e.message.startsWith("Timeout for checked pruning");
5451
+ if (explicitTimeout || !isCheckedPruneTimeout) {
5452
+ this.clearCheckedPruneRetry(entry.hash);
5453
+ }
5454
+ this.removePruneRequestSent(entry.hash);
5455
+ this._requestIPruneResponseReplicatorSet.delete(entry.hash);
5456
+ deferredPromise.reject(e);
5457
+ };
4142
5458
 
4143
5459
  let cursor: NumberFromType<R>[] | undefined = undefined;
4144
5460
 
@@ -4156,14 +5472,20 @@ export class SharedLog<
4156
5472
  PRUNE_DEBOUNCE_INTERVAL * 2,
4157
5473
  );
4158
5474
 
4159
- const timeout = setTimeout(() => {
4160
- reject(
4161
- new Error(
4162
- `Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`,
4163
- ),
4164
- );
4165
- }, checkedPruneTimeoutMs);
4166
- timeout.unref?.();
5475
+ const timeout = setTimeout(() => {
5476
+ // For internal/background prune flows (no explicit timeout), retry a few times
5477
+ // to avoid "permanently prunable" entries when `_pendingIHave` expires under
5478
+ // heavy load.
5479
+ if (!explicitTimeout) {
5480
+ this.scheduleCheckedPruneRetry({ entry, leaders });
5481
+ }
5482
+ reject(
5483
+ new Error(
5484
+ `Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`,
5485
+ ),
5486
+ );
5487
+ }, checkedPruneTimeoutMs);
5488
+ timeout.unref?.();
4167
5489
 
4168
5490
  this._pendingDeletes.set(entry.hash, {
4169
5491
  promise: deferredPromise,
@@ -4200,20 +5522,22 @@ export class SharedLog<
4200
5522
  let existCounter = this._requestIPruneResponseReplicatorSet.get(
4201
5523
  entry.hash,
4202
5524
  );
4203
- if (!existCounter) {
4204
- existCounter = new Set();
4205
- this._requestIPruneResponseReplicatorSet.set(
4206
- entry.hash,
4207
- existCounter,
4208
- );
4209
- }
4210
- existCounter.add(publicKeyHash);
5525
+ if (!existCounter) {
5526
+ existCounter = new Set();
5527
+ this._requestIPruneResponseReplicatorSet.set(
5528
+ entry.hash,
5529
+ existCounter,
5530
+ );
5531
+ }
5532
+ existCounter.add(publicKeyHash);
5533
+ // Seed provider hints so future remote reads can avoid extra round-trips.
5534
+ this.remoteBlocks.hintProviders(entry.hash, [publicKeyHash]);
4211
5535
 
4212
- if (minReplicasValue <= existCounter.size) {
4213
- resolve();
4214
- }
4215
- },
4216
- });
5536
+ if (minReplicasValue <= existCounter.size) {
5537
+ resolve();
5538
+ }
5539
+ },
5540
+ });
4217
5541
 
4218
5542
  promises.push(deferredPromise.promise);
4219
5543
  }
@@ -4249,16 +5573,58 @@ export class SharedLog<
4249
5573
  }
4250
5574
  };
4251
5575
 
4252
- for (const [k, v] of peerToEntries) {
4253
- emitMessages(v, k);
4254
- }
5576
+ for (const [k, v] of peerToEntries) {
5577
+ emitMessages(v, k);
5578
+ }
4255
5579
 
4256
- let cleanup = () => {
4257
- for (const timer of cleanupTimer) {
4258
- clearTimeout(timer);
5580
+ // Keep remote `_pendingIHave` alive in the common "leader doesn't have entry yet"
5581
+ // case. This is intentionally disabled when an explicit timeout is provided to
5582
+ // preserve unit tests that assert remote `_pendingIHave` clears promptly.
5583
+ if (!explicitTimeout && peerToEntries.size > 0) {
5584
+ const respondToIHaveTimeout = Number(this._respondToIHaveTimeout ?? 0);
5585
+ const resendIntervalMs = Math.min(
5586
+ CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS,
5587
+ Math.max(
5588
+ CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS,
5589
+ Math.floor(respondToIHaveTimeout / 2) || 1_000,
5590
+ ),
5591
+ );
5592
+ let inFlight = false;
5593
+ const timer = setInterval(() => {
5594
+ if (inFlight) return;
5595
+ if (this.closed) return;
5596
+
5597
+ const pendingByPeer: [string, string[]][] = [];
5598
+ for (const [peer, hashes] of peerToEntries) {
5599
+ const pending = hashes.filter((h) => this._pendingDeletes.has(h));
5600
+ if (pending.length > 0) {
5601
+ pendingByPeer.push([peer, pending]);
5602
+ }
5603
+ }
5604
+ if (pendingByPeer.length === 0) {
5605
+ clearInterval(timer);
5606
+ return;
5607
+ }
5608
+
5609
+ inFlight = true;
5610
+ Promise.allSettled(
5611
+ pendingByPeer.map(([peer, hashes]) =>
5612
+ emitMessages(hashes, peer).catch(() => {}),
5613
+ ),
5614
+ ).finally(() => {
5615
+ inFlight = false;
5616
+ });
5617
+ }, resendIntervalMs);
5618
+ timer.unref?.();
5619
+ cleanupTimer.push(timer as any);
4259
5620
  }
4260
- this._closeController.signal.removeEventListener("abort", cleanup);
4261
- };
5621
+
5622
+ let cleanup = () => {
5623
+ for (const timer of cleanupTimer) {
5624
+ clearTimeout(timer);
5625
+ }
5626
+ this._closeController.signal.removeEventListener("abort", cleanup);
5627
+ };
4262
5628
 
4263
5629
  Promise.allSettled(promises).finally(cleanup);
4264
5630
  this._closeController.signal.addEventListener("abort", cleanup);
@@ -4336,24 +5702,113 @@ export class SharedLog<
4336
5702
 
4337
5703
  await this.log.trim();
4338
5704
 
5705
+ const batchedChanges = Array.isArray(changeOrChanges[0])
5706
+ ? (changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>[])
5707
+ : [changeOrChanges as ReplicationChanges<ReplicationRangeIndexable<R>>];
5708
+ const changes = batchedChanges.flat();
5709
+ const selfHash = this.node.identity.publicKey.hashcode();
5710
+ // On removed ranges (peer leaves / shrink), gid-level history can hide
5711
+ // per-entry gaps. Force a fresh delivery pass for reassigned entries.
5712
+ const forceFreshDelivery = changes.some(
5713
+ (change) => change.type === "removed" && change.range.hash !== selfHash,
5714
+ );
5715
+ const gidPeersHistorySnapshot = new Map<string, Set<string> | undefined>();
5716
+ const dedupeCutoff = Date.now() - RECENT_REPAIR_DISPATCH_TTL_MS;
5717
+ for (const [target, hashes] of this._recentRepairDispatch) {
5718
+ for (const [hash, ts] of hashes) {
5719
+ if (ts <= dedupeCutoff) {
5720
+ hashes.delete(hash);
5721
+ }
5722
+ }
5723
+ if (hashes.size === 0) {
5724
+ this._recentRepairDispatch.delete(target);
5725
+ }
5726
+ }
5727
+
4339
5728
  const changed = false;
5729
+ const replacedPeers = new Set<string>();
5730
+ for (const change of changes) {
5731
+ if (change.type === "replaced" && change.range.hash !== selfHash) {
5732
+ replacedPeers.add(change.range.hash);
5733
+ }
5734
+ }
5735
+ const addedPeers = new Set<string>();
5736
+ for (const change of changes) {
5737
+ if (change.type === "added" || change.type === "replaced") {
5738
+ const hash = change.range.hash;
5739
+ if (hash !== selfHash) {
5740
+ // Range updates can reassign entries to an existing peer shortly after it
5741
+ // already received a subset. Avoid suppressing legitimate follow-up repair.
5742
+ this._recentRepairDispatch.delete(hash);
5743
+ }
5744
+ }
5745
+ if (change.type === "added") {
5746
+ const hash = change.range.hash;
5747
+ if (hash !== selfHash && !replacedPeers.has(hash)) {
5748
+ addedPeers.add(hash);
5749
+ }
5750
+ }
5751
+ }
4340
5752
 
4341
5753
  try {
4342
5754
  const uncheckedDeliver: Map<
4343
5755
  string,
4344
5756
  Map<string, EntryReplicated<any>>
4345
5757
  > = new Map();
5758
+ const flushUncheckedDeliverTarget = (target: string) => {
5759
+ const entries = uncheckedDeliver.get(target);
5760
+ if (!entries || entries.size === 0) {
5761
+ return;
5762
+ }
5763
+ const isJoinWarmupTarget = addedPeers.has(target);
5764
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
5765
+ this.dispatchMaybeMissingEntries(target, entries, {
5766
+ bypassRecentDedupe,
5767
+ retryScheduleMs: isJoinWarmupTarget
5768
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
5769
+ : undefined,
5770
+ forceFreshDelivery,
5771
+ });
5772
+ uncheckedDeliver.delete(target);
5773
+ };
5774
+ const queueUncheckedDeliver = (
5775
+ target: string,
5776
+ entry: EntryReplicated<any>,
5777
+ ) => {
5778
+ let set = uncheckedDeliver.get(target);
5779
+ if (!set) {
5780
+ set = new Map();
5781
+ uncheckedDeliver.set(target, set);
5782
+ }
5783
+ if (set.has(entry.hash)) {
5784
+ return;
5785
+ }
5786
+ set.set(entry.hash, entry);
5787
+ if (set.size >= this.repairSweepTargetBufferSize) {
5788
+ flushUncheckedDeliverTarget(target);
5789
+ }
5790
+ };
4346
5791
 
4347
5792
  for await (const entryReplicated of toRebalance<R>(
4348
- changeOrChanges,
5793
+ changes,
4349
5794
  this.entryCoordinatesIndex,
4350
5795
  this.recentlyRebalanced,
5796
+ { forceFresh: forceFreshDelivery },
4351
5797
  )) {
4352
5798
  if (this.closed) {
4353
5799
  break;
4354
5800
  }
4355
5801
 
4356
- let oldPeersSet = this._gidPeersHistory.get(entryReplicated.gid);
5802
+ let oldPeersSet: Set<string> | undefined;
5803
+ if (!forceFreshDelivery) {
5804
+ const gid = entryReplicated.gid;
5805
+ oldPeersSet = gidPeersHistorySnapshot.get(gid);
5806
+ if (!gidPeersHistorySnapshot.has(gid)) {
5807
+ const existing = this._gidPeersHistory.get(gid);
5808
+ oldPeersSet = existing ? new Set(existing) : undefined;
5809
+ gidPeersHistorySnapshot.set(gid, oldPeersSet);
5810
+ }
5811
+ }
4357
5812
  let isLeader = false;
4358
5813
 
4359
5814
  let currentPeers = await this.findLeaders(
@@ -4366,24 +5821,16 @@ export class SharedLog<
4366
5821
  },
4367
5822
  );
4368
5823
 
4369
- for (const [currentPeer] of currentPeers) {
4370
- if (currentPeer === this.node.identity.publicKey.hashcode()) {
4371
- isLeader = true;
4372
- continue;
4373
- }
4374
-
4375
- if (!oldPeersSet?.has(currentPeer)) {
4376
- let set = uncheckedDeliver.get(currentPeer);
4377
- if (!set) {
4378
- set = new Map();
4379
- uncheckedDeliver.set(currentPeer, set);
5824
+ for (const [currentPeer] of currentPeers) {
5825
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
5826
+ isLeader = true;
5827
+ continue;
4380
5828
  }
4381
5829
 
4382
- if (!set.has(entryReplicated.hash)) {
4383
- set.set(entryReplicated.hash, entryReplicated);
5830
+ if (!oldPeersSet?.has(currentPeer)) {
5831
+ queueUncheckedDeliver(currentPeer, entryReplicated);
4384
5832
  }
4385
5833
  }
4386
- }
4387
5834
 
4388
5835
  if (oldPeersSet) {
4389
5836
  for (const oldPeer of oldPeersSet) {
@@ -4414,11 +5861,15 @@ export class SharedLog<
4414
5861
  this.removePruneRequestSent(entryReplicated.hash);
4415
5862
  }
4416
5863
  }
4417
- for (const [target, entries] of uncheckedDeliver) {
4418
- this.syncronizer.onMaybeMissingEntries({
4419
- entries,
4420
- targets: [target],
4421
- });
5864
+
5865
+ if (forceFreshDelivery || addedPeers.size > 0) {
5866
+ // Schedule a coalesced background sweep for churn/join windows instead of
5867
+ // scanning the whole index synchronously on each replication change.
5868
+ this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
5869
+ }
5870
+
5871
+ for (const target of [...uncheckedDeliver.keys()]) {
5872
+ flushUncheckedDeliverTarget(target);
4422
5873
  }
4423
5874
 
4424
5875
  return changed;
@@ -4438,7 +5889,22 @@ export class SharedLog<
4438
5889
  evt.detail.topics.map((x) => x),
4439
5890
  )} '`,
4440
5891
  );
4441
- this.latestReplicationInfoMessage.delete(evt.detail.from.hashcode());
5892
+ if (!evt.detail.topics.includes(this.topic)) {
5893
+ return;
5894
+ }
5895
+
5896
+ const fromHash = evt.detail.from.hashcode();
5897
+ this._replicationInfoBlockedPeers.add(fromHash);
5898
+ this._recentRepairDispatch.delete(fromHash);
5899
+
5900
+ // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
5901
+ // prevents late/out-of-order replication-info messages from re-introducing
5902
+ // stale segments for a peer that has already left the topic.
5903
+ const now = BigInt(+new Date());
5904
+ const prev = this.latestReplicationInfoMessage.get(fromHash);
5905
+ if (!prev || prev < now) {
5906
+ this.latestReplicationInfoMessage.set(fromHash, now);
5907
+ }
4442
5908
 
4443
5909
  return this.handleSubscriptionChange(
4444
5910
  evt.detail.from,
@@ -4453,9 +5919,14 @@ export class SharedLog<
4453
5919
  evt.detail.topics.map((x) => x),
4454
5920
  )}'`,
4455
5921
  );
5922
+ if (!evt.detail.topics.includes(this.topic)) {
5923
+ return;
5924
+ }
5925
+
4456
5926
  this.remoteBlocks.onReachable(evt.detail.from);
5927
+ this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
4457
5928
 
4458
- return this.handleSubscriptionChange(
5929
+ await this.handleSubscriptionChange(
4459
5930
  evt.detail.from,
4460
5931
  evt.detail.topics,
4461
5932
  true,