@peerbit/shared-log 12.3.4 → 12.3.5-3f16953

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/index.js CHANGED
@@ -32,19 +32,19 @@ var __runInitializers = (this && this.__runInitializers) || function (thisArg, i
32
32
  }
33
33
  return useValue ? value : void 0;
34
34
  };
35
- import { BorshError, field, variant } from "@dao-xyz/borsh";
35
+ import { BorshError, deserialize, field, serialize, variant } from "@dao-xyz/borsh";
36
36
  import { AnyBlockStore, RemoteBlocks } from "@peerbit/blocks";
37
37
  import { cidifyString } from "@peerbit/blocks-interface";
38
38
  import { Cache } from "@peerbit/cache";
39
- import { AccessError, PublicSignKey, sha256Base64Sync, sha256Sync, } from "@peerbit/crypto";
39
+ import { AccessError, PublicSignKey, getPublicKeyFromPeerId, sha256Base64Sync, sha256Sync, } from "@peerbit/crypto";
40
40
  import { And, ByteMatchQuery, NotStartedError as IndexNotStartedError, Or, Sort, StringMatch, toId, } from "@peerbit/indexer-interface";
41
41
  import { Entry, Log, Meta, ShallowEntry, } from "@peerbit/log";
42
42
  import { logger as loggerFn } from "@peerbit/logger";
43
43
  import { ClosedError, Program } from "@peerbit/program";
44
- import { waitForSubscribers } from "@peerbit/pubsub";
44
+ import { FanoutChannel, waitForSubscribers, } from "@peerbit/pubsub";
45
45
  import { SubscriptionEvent, UnsubcriptionEvent, } from "@peerbit/pubsub-interface";
46
46
  import { RPC } from "@peerbit/rpc";
47
- import { AcknowledgeDelivery, AnyWhere, NotStartedError, SeekDelivery, SilentDelivery, } from "@peerbit/stream-interface";
47
+ import { AcknowledgeDelivery, AnyWhere, DataMessage, MessageHeader, NotStartedError, SilentDelivery, } from "@peerbit/stream-interface";
48
48
  import { AbortError, TimeoutError, debounceAccumulator, debounceFixedInterval, waitFor, } from "@peerbit/time";
49
49
  import pDefer, {} from "p-defer";
50
50
  import PQueue from "p-queue";
@@ -54,6 +54,7 @@ import { CPUUsageIntervalLag } from "./cpu.js";
54
54
  import { debouncedAccumulatorMap, } from "./debounce.js";
55
55
  import { NoPeersError } from "./errors.js";
56
56
  import { EntryWithRefs, ExchangeHeadsMessage, RequestIPrune, ResponseIPrune, createExchangeHeadsMessages, } from "./exchange-heads.js";
57
+ import { FanoutEnvelope } from "./fanout-envelope.js";
57
58
  import { MAX_U32, MAX_U64, bytesToNumber, createNumbers, denormalizer, } from "./integers.js";
58
59
  import { TransportMessage } from "./message.js";
59
60
  import { PIDReplicationController } from "./pid.js";
@@ -84,6 +85,34 @@ const getLatestEntry = (entries) => {
84
85
  }
85
86
  return latest;
86
87
  };
88
+ const hashToSeed32 = (str) => {
89
+ // FNV-1a 32-bit, fast and deterministic.
90
+ let hash = 0x811c9dc5;
91
+ for (let i = 0; i < str.length; i++) {
92
+ hash ^= str.charCodeAt(i);
93
+ hash = Math.imul(hash, 0x01000193);
94
+ }
95
+ return hash >>> 0;
96
+ };
97
+ const pickDeterministicSubset = (peers, seed, max) => {
98
+ if (peers.length <= max)
99
+ return peers;
100
+ const subset = [];
101
+ const used = new Set();
102
+ let x = seed || 1;
103
+ while (subset.length < max) {
104
+ // xorshift32
105
+ x ^= x << 13;
106
+ x ^= x >>> 17;
107
+ x ^= x << 5;
108
+ const peer = peers[(x >>> 0) % peers.length];
109
+ if (!used.has(peer)) {
110
+ used.add(peer);
111
+ subset.push(peer);
112
+ }
113
+ }
114
+ return subset;
115
+ };
87
116
  export { BlocksMessage };
88
117
  const isAdaptiveReplicatorOption = (options) => {
89
118
  if (typeof options === "number") {
@@ -176,6 +205,10 @@ export const WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS = 3;
176
205
  // Prefer making pruning robust without timing-based heuristics.
177
206
  export const WAIT_FOR_PRUNE_DELAY = 0;
178
207
  const PRUNE_DEBOUNCE_INTERVAL = 500;
208
+ const CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS = 250;
209
+ const CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS = 5_000;
210
+ const CHECKED_PRUNE_RETRY_MAX_ATTEMPTS = 3;
211
+ const CHECKED_PRUNE_RETRY_MAX_DELAY_MS = 30_000;
179
212
  // DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
180
213
  const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
181
214
  const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
@@ -183,6 +216,13 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
183
216
  const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
184
217
  const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
185
218
  const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
219
+ const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS = {
220
+ msgRate: 30,
221
+ msgSize: 1024,
222
+ uploadLimitBps: 5_000_000,
223
+ maxChildren: 24,
224
+ repair: true,
225
+ };
186
226
  const getIdForDynamicRange = (publicKey) => {
187
227
  return sha256Sync(concat([publicKey.bytes, new TextEncoder().encode("dynamic")]));
188
228
  };
@@ -226,12 +266,17 @@ let SharedLog = (() => {
226
266
  coordinateToHash;
227
267
  recentlyRebalanced;
228
268
  uniqueReplicators;
269
+ _replicatorJoinEmitted;
229
270
  _replicatorsReconciled;
230
271
  /* private _totalParticipation!: number; */
231
272
  // gid -> coordinate -> publicKeyHash list (of owners)
232
273
  _gidPeersHistory;
233
274
  _onSubscriptionFn;
234
275
  _onUnsubscriptionFn;
276
+ _onFanoutDataFn;
277
+ _onFanoutUnicastFn;
278
+ _fanoutChannel;
279
+ _providerHandle;
235
280
  _isTrustedReplicator;
236
281
  _logProperties;
237
282
  _closeController;
@@ -241,6 +286,12 @@ let SharedLog = (() => {
241
286
  // public key hash to range id to range
242
287
  pendingMaturity; // map of peerId to timeout
243
288
  latestReplicationInfoMessage;
289
+ // Peers that have unsubscribed from this log's topic. We ignore replication-info
290
+ // messages from them until we see a new subscription, to avoid re-introducing
291
+ // stale membership state during close/unsubscribe races.
292
+ _replicationInfoBlockedPeers;
293
+ _replicationInfoRequestByPeer;
294
+ _replicationInfoApplyQueueByPeer;
244
295
  remoteBlocks;
245
296
  openTime;
246
297
  oldestOpenTime;
@@ -252,6 +303,7 @@ let SharedLog = (() => {
252
303
  responseToPruneDebouncedFn;
253
304
  _requestIPruneSent; // tracks entry hash to peer hash for requesting I prune messages
254
305
  _requestIPruneResponseReplicatorSet; // tracks entry hash to peer hash
306
+ _checkedPruneRetries;
255
307
  replicationChangeDebounceFn;
256
308
  // regular distribution checks
257
309
  distributeQueue;
@@ -283,6 +335,492 @@ let SharedLog = (() => {
283
335
  get v8Behaviour() {
284
336
  return (this.compatibility ?? Number.MAX_VALUE) < 9;
285
337
  }
338
+ getFanoutChannelOptions(options) {
339
+ return {
340
+ ...DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS,
341
+ ...(options?.channel ?? {}),
342
+ };
343
+ }
344
+ async _openFanoutChannel(options) {
345
+ this._closeFanoutChannel();
346
+ if (!options) {
347
+ return;
348
+ }
349
+ const fanoutService = this.node.services.fanout;
350
+ if (!fanoutService) {
351
+ throw new Error(`Fanout is configured for shared-log topic ${this.topic}, but no fanout service is available on this client`);
352
+ }
353
+ const resolvedRoot = options.root ??
354
+ (await fanoutService?.topicRootControlPlane?.resolveTopicRoot?.(this.topic));
355
+ if (!resolvedRoot) {
356
+ throw new Error(`Fanout is configured for shared-log topic ${this.topic}, but no fanout root was provided and none could be resolved`);
357
+ }
358
+ const channel = new FanoutChannel(fanoutService, {
359
+ topic: this.topic,
360
+ root: resolvedRoot,
361
+ });
362
+ this._fanoutChannel = channel;
363
+ this._onFanoutDataFn =
364
+ this._onFanoutDataFn ||
365
+ ((evt) => {
366
+ const detail = evt?.detail;
367
+ if (!detail) {
368
+ return;
369
+ }
370
+ void this._onFanoutData(detail).catch((error) => logger.error(error));
371
+ });
372
+ channel.addEventListener("data", this._onFanoutDataFn);
373
+ this._onFanoutUnicastFn =
374
+ this._onFanoutUnicastFn ||
375
+ ((evt) => {
376
+ const detail = evt?.detail;
377
+ if (!detail) {
378
+ return;
379
+ }
380
+ void this._onFanoutUnicast(detail).catch((error) => logger.error(error));
381
+ });
382
+ channel.addEventListener("unicast", this._onFanoutUnicastFn);
383
+ try {
384
+ const channelOptions = this.getFanoutChannelOptions(options);
385
+ if (resolvedRoot === fanoutService.publicKeyHash) {
386
+ await channel.openAsRoot(channelOptions);
387
+ return;
388
+ }
389
+ await channel.join(channelOptions, options.join);
390
+ }
391
+ catch (error) {
392
+ this._closeFanoutChannel();
393
+ throw error;
394
+ }
395
+ }
396
+ _closeFanoutChannel() {
397
+ if (this._fanoutChannel) {
398
+ if (this._onFanoutDataFn) {
399
+ this._fanoutChannel.removeEventListener("data", this._onFanoutDataFn);
400
+ }
401
+ if (this._onFanoutUnicastFn) {
402
+ this._fanoutChannel.removeEventListener("unicast", this._onFanoutUnicastFn);
403
+ }
404
+ this._fanoutChannel.close();
405
+ }
406
+ this._fanoutChannel = undefined;
407
+ }
408
+ async _onFanoutData(detail) {
409
+ let envelope;
410
+ try {
411
+ envelope = deserialize(detail.payload, FanoutEnvelope);
412
+ }
413
+ catch (error) {
414
+ if (error instanceof BorshError) {
415
+ return;
416
+ }
417
+ throw error;
418
+ }
419
+ let message;
420
+ try {
421
+ message = deserialize(envelope.payload, TransportMessage);
422
+ }
423
+ catch (error) {
424
+ if (error instanceof BorshError) {
425
+ return;
426
+ }
427
+ throw error;
428
+ }
429
+ if (!(message instanceof ExchangeHeadsMessage)) {
430
+ return;
431
+ }
432
+ const from = (await this._resolvePublicKeyFromHash(envelope.from)) ??
433
+ { hashcode: () => envelope.from };
434
+ const contextMessage = new DataMessage({
435
+ header: new MessageHeader({
436
+ session: 0,
437
+ mode: new AnyWhere(),
438
+ priority: 0,
439
+ }),
440
+ });
441
+ contextMessage.header.timestamp = envelope.timestamp;
442
+ await this.onMessage(message, {
443
+ from,
444
+ message: contextMessage,
445
+ });
446
+ }
447
+ async _onFanoutUnicast(detail) {
448
+ let message;
449
+ try {
450
+ message = deserialize(detail.payload, TransportMessage);
451
+ }
452
+ catch (error) {
453
+ if (error instanceof BorshError) {
454
+ return;
455
+ }
456
+ throw error;
457
+ }
458
+ const fromHash = detail.origin || detail.from;
459
+ const from = (await this._resolvePublicKeyFromHash(fromHash)) ??
460
+ { hashcode: () => fromHash };
461
+ const contextMessage = new DataMessage({
462
+ header: new MessageHeader({
463
+ session: 0,
464
+ mode: new AnyWhere(),
465
+ priority: 0,
466
+ }),
467
+ });
468
+ contextMessage.header.timestamp = detail.timestamp;
469
+ await this.onMessage(message, {
470
+ from,
471
+ message: contextMessage,
472
+ });
473
+ }
474
+ async _publishExchangeHeadsViaFanout(message) {
475
+ if (!this._fanoutChannel) {
476
+ throw new Error(`No fanout channel configured for shared-log topic ${this.topic}`);
477
+ }
478
+ const envelope = new FanoutEnvelope({
479
+ from: this.node.identity.publicKey.hashcode(),
480
+ timestamp: BigInt(Date.now()),
481
+ payload: serialize(message),
482
+ });
483
+ await this._fanoutChannel.publish(serialize(envelope));
484
+ }
485
+ _parseDeliveryOptions(deliveryArg) {
486
+ const delivery = deliveryArg === undefined || deliveryArg === false
487
+ ? undefined
488
+ : deliveryArg === true
489
+ ? {}
490
+ : deliveryArg;
491
+ if (!delivery) {
492
+ return {
493
+ delivery: undefined,
494
+ requireRecipients: false,
495
+ settleMin: undefined,
496
+ wrap: undefined,
497
+ };
498
+ }
499
+ const deliverySettle = delivery.settle ?? true;
500
+ const deliveryTimeout = delivery.timeout;
501
+ const deliverySignal = delivery.signal;
502
+ const requireRecipients = delivery.requireRecipients === true;
503
+ const settleMin = typeof deliverySettle === "object" && Number.isFinite(deliverySettle.min)
504
+ ? Math.max(0, Math.floor(deliverySettle.min))
505
+ : undefined;
506
+ const wrap = deliveryTimeout == null && deliverySignal == null
507
+ ? undefined
508
+ : (promise) => new Promise((resolve, reject) => {
509
+ let settled = false;
510
+ let timer = undefined;
511
+ const onAbort = () => {
512
+ if (settled) {
513
+ return;
514
+ }
515
+ settled = true;
516
+ promise.catch(() => { });
517
+ cleanup();
518
+ reject(new AbortError());
519
+ };
520
+ const cleanup = () => {
521
+ if (timer != null) {
522
+ clearTimeout(timer);
523
+ timer = undefined;
524
+ }
525
+ deliverySignal?.removeEventListener("abort", onAbort);
526
+ };
527
+ if (deliverySignal) {
528
+ if (deliverySignal.aborted) {
529
+ onAbort();
530
+ return;
531
+ }
532
+ deliverySignal.addEventListener("abort", onAbort);
533
+ }
534
+ if (deliveryTimeout != null) {
535
+ timer = setTimeout(() => {
536
+ if (settled) {
537
+ return;
538
+ }
539
+ settled = true;
540
+ promise.catch(() => { });
541
+ cleanup();
542
+ reject(new TimeoutError(`Timeout waiting for delivery`));
543
+ }, deliveryTimeout);
544
+ }
545
+ promise
546
+ .then(() => {
547
+ if (settled) {
548
+ return;
549
+ }
550
+ settled = true;
551
+ cleanup();
552
+ resolve();
553
+ })
554
+ .catch((error) => {
555
+ if (settled) {
556
+ return;
557
+ }
558
+ settled = true;
559
+ cleanup();
560
+ reject(error);
561
+ });
562
+ });
563
+ return {
564
+ delivery,
565
+ requireRecipients,
566
+ settleMin,
567
+ wrap,
568
+ };
569
+ }
570
+ async _appendDeliverToReplicators(entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
571
+ const { delivery, requireRecipients, settleMin, wrap } = this._parseDeliveryOptions(deliveryArg);
572
+ const pending = [];
573
+ const track = (promise) => {
574
+ pending.push(wrap ? wrap(promise) : promise);
575
+ };
576
+ const fanoutUnicastOptions = delivery?.timeout != null || delivery?.signal != null
577
+ ? { timeoutMs: delivery.timeout, signal: delivery.signal }
578
+ : undefined;
579
+ for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
580
+ await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
581
+ const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
582
+ const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
583
+ const hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
584
+ if (!hasRemotePeers) {
585
+ if (requireRecipients) {
586
+ throw new NoPeersError(this.rpc.topic);
587
+ }
588
+ continue;
589
+ }
590
+ if (!delivery) {
591
+ this.rpc
592
+ .send(message, {
593
+ mode: isLeader
594
+ ? new SilentDelivery({ redundancy: 1, to: set })
595
+ : new AcknowledgeDelivery({ redundancy: 1, to: set }),
596
+ })
597
+ .catch((error) => logger.error(error));
598
+ continue;
599
+ }
600
+ const orderedRemoteRecipients = [];
601
+ for (const peer of leadersForDelivery) {
602
+ if (peer === selfHash) {
603
+ continue;
604
+ }
605
+ orderedRemoteRecipients.push(peer);
606
+ }
607
+ for (const peer of set) {
608
+ if (peer === selfHash) {
609
+ continue;
610
+ }
611
+ if (leadersForDelivery.has(peer)) {
612
+ continue;
613
+ }
614
+ orderedRemoteRecipients.push(peer);
615
+ }
616
+ const ackTo = [];
617
+ let silentTo;
618
+ // Default delivery semantics: require enough remote ACKs to reach the requested
619
+ // replication degree (local append counts as 1).
620
+ const ackLimit = settleMin == null ? Math.max(0, minReplicasValue - 1) : settleMin;
621
+ for (const peer of orderedRemoteRecipients) {
622
+ if (ackTo.length < ackLimit) {
623
+ ackTo.push(peer);
624
+ }
625
+ else {
626
+ silentTo ||= [];
627
+ silentTo.push(peer);
628
+ }
629
+ }
630
+ if (requireRecipients && orderedRemoteRecipients.length === 0) {
631
+ throw new NoPeersError(this.rpc.topic);
632
+ }
633
+ if (requireRecipients && ackTo.length + (silentTo?.length || 0) === 0) {
634
+ throw new NoPeersError(this.rpc.topic);
635
+ }
636
+ if (ackTo.length > 0) {
637
+ const payload = serialize(message);
638
+ for (const peer of ackTo) {
639
+ track((async () => {
640
+ // Unified decision point:
641
+ // - If we can prove a cheap direct path (connected or routed), use it.
642
+ // - Otherwise, fall back to the fanout unicast ACK path (bounded overlay routing).
643
+ // - If that fails, fall back to pubsub/RPC routing which may flood to discover routes.
644
+ const pubsub = this.node.services.pubsub;
645
+ const canDirectFast = Boolean(pubsub?.peers?.get?.(peer)?.isWritable) ||
646
+ Boolean(pubsub?.routes?.isReachable?.(pubsub?.publicKeyHash, peer, 0));
647
+ if (canDirectFast) {
648
+ await this.rpc.send(message, {
649
+ mode: new AcknowledgeDelivery({
650
+ redundancy: 1,
651
+ to: [peer],
652
+ }),
653
+ });
654
+ return;
655
+ }
656
+ if (this._fanoutChannel) {
657
+ try {
658
+ await this._fanoutChannel.unicastToAck(peer, payload, fanoutUnicastOptions);
659
+ return;
660
+ }
661
+ catch {
662
+ // fall back below
663
+ }
664
+ }
665
+ await this.rpc.send(message, {
666
+ mode: new AcknowledgeDelivery({
667
+ redundancy: 1,
668
+ to: [peer],
669
+ }),
670
+ });
671
+ })());
672
+ }
673
+ }
674
+ if (silentTo?.length) {
675
+ this.rpc
676
+ .send(message, {
677
+ mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
678
+ })
679
+ .catch((error) => logger.error(error));
680
+ }
681
+ }
682
+ if (pending.length > 0) {
683
+ await Promise.all(pending);
684
+ }
685
+ }
686
+ async _mergeLeadersFromGidReferences(message, minReplicasValue, leaders) {
687
+ const gidReferences = message.heads[0]?.gidRefrences;
688
+ if (!gidReferences || gidReferences.length === 0) {
689
+ return;
690
+ }
691
+ for (const gidReference of gidReferences) {
692
+ const entryFromGid = this.log.entryIndex.getHeads(gidReference, false);
693
+ for (const gidEntry of await entryFromGid.all()) {
694
+ let coordinates = await this.getCoordinates(gidEntry);
695
+ if (coordinates == null) {
696
+ coordinates = await this.createCoordinates(gidEntry, minReplicasValue);
697
+ }
698
+ const found = await this._findLeaders(coordinates);
699
+ for (const [key, value] of found) {
700
+ leaders.set(key, value);
701
+ }
702
+ }
703
+ }
704
+ }
705
+ async _appendDeliverToAllFanout(entry) {
706
+ for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
707
+ await this._publishExchangeHeadsViaFanout(message);
708
+ }
709
+ }
710
+ async _resolvePublicKeyFromHash(hash) {
711
+ const fanoutService = this.node.services.fanout;
712
+ return (fanoutService?.getPublicKey?.(hash) ??
713
+ this.node.services.pubsub.getPublicKey(hash));
714
+ }
715
+ async _getTopicSubscribers(topic) {
716
+ const maxPeers = 64;
717
+ // Prefer the bounded peer set we already know from the fanout overlay.
718
+ if (this._fanoutChannel && (topic === this.topic || topic === this.rpc.topic)) {
719
+ const hashes = this._fanoutChannel
720
+ .getPeerHashes({ includeSelf: false })
721
+ .slice(0, maxPeers);
722
+ if (hashes.length === 0)
723
+ return [];
724
+ const keys = await Promise.all(hashes.map((hash) => this._resolvePublicKeyFromHash(hash)));
725
+ const uniqueKeys = [];
726
+ const seen = new Set();
727
+ const selfHash = this.node.identity.publicKey.hashcode();
728
+ for (const key of keys) {
729
+ if (!key)
730
+ continue;
731
+ const hash = key.hashcode();
732
+ if (hash === selfHash)
733
+ continue;
734
+ if (seen.has(hash))
735
+ continue;
736
+ seen.add(hash);
737
+ uniqueKeys.push(key);
738
+ }
739
+ return uniqueKeys;
740
+ }
741
+ const selfHash = this.node.identity.publicKey.hashcode();
742
+ const hashes = [];
743
+ // Best-effort provider discovery (bounded). This requires bootstrap trackers.
744
+ try {
745
+ const fanoutService = this.node.services.fanout;
746
+ if (fanoutService?.queryProviders) {
747
+ const ns = `shared-log|${this.topic}`;
748
+ const seed = hashToSeed32(topic);
749
+ const providers = await fanoutService.queryProviders(ns, {
750
+ want: maxPeers,
751
+ seed,
752
+ });
753
+ for (const h of providers ?? []) {
754
+ if (!h || h === selfHash)
755
+ continue;
756
+ hashes.push(h);
757
+ if (hashes.length >= maxPeers)
758
+ break;
759
+ }
760
+ }
761
+ }
762
+ catch {
763
+ // Best-effort only.
764
+ }
765
+ // Next, use already-connected peer streams (bounded and cheap).
766
+ const peerMap = this.node.services.pubsub
767
+ ?.peers;
768
+ if (peerMap?.keys) {
769
+ for (const h of peerMap.keys()) {
770
+ if (!h || h === selfHash)
771
+ continue;
772
+ hashes.push(h);
773
+ if (hashes.length >= maxPeers)
774
+ break;
775
+ }
776
+ }
777
+ // Finally, fall back to libp2p connections (e.g. bootstrap peers) without requiring
778
+ // any global topic membership view.
779
+ if (hashes.length < maxPeers) {
780
+ const connectionManager = this.node.services.pubsub?.components
781
+ ?.connectionManager;
782
+ const connections = connectionManager?.getConnections?.() ?? [];
783
+ for (const conn of connections) {
784
+ const peerId = conn?.remotePeer;
785
+ if (!peerId)
786
+ continue;
787
+ try {
788
+ const h = getPublicKeyFromPeerId(peerId).hashcode();
789
+ if (!h || h === selfHash)
790
+ continue;
791
+ hashes.push(h);
792
+ if (hashes.length >= maxPeers)
793
+ break;
794
+ }
795
+ catch {
796
+ // Best-effort only.
797
+ }
798
+ }
799
+ }
800
+ if (hashes.length === 0)
801
+ return [];
802
+ const uniqueHashes = [];
803
+ const seen = new Set();
804
+ for (const h of hashes) {
805
+ if (seen.has(h))
806
+ continue;
807
+ seen.add(h);
808
+ uniqueHashes.push(h);
809
+ if (uniqueHashes.length >= maxPeers)
810
+ break;
811
+ }
812
+ const keys = await Promise.all(uniqueHashes.map((hash) => this._resolvePublicKeyFromHash(hash)));
813
+ const uniqueKeys = [];
814
+ for (const key of keys) {
815
+ if (!key)
816
+ continue;
817
+ const hash = key.hashcode();
818
+ if (hash === selfHash)
819
+ continue;
820
+ uniqueKeys.push(key);
821
+ }
822
+ return uniqueKeys;
823
+ }
286
824
  // @deprecated
287
825
  async getRole() {
288
826
  const segments = await this.getMyReplicationSegments();
@@ -586,6 +1124,7 @@ let SharedLog = (() => {
586
1124
  })
587
1125
  .all();
588
1126
  this.uniqueReplicators.delete(keyHash);
1127
+ this._replicatorJoinEmitted.delete(keyHash);
589
1128
  await this.replicationIndex.del({ query: { hash: keyHash } });
590
1129
  await this.updateOldestTimestampFromIndex();
591
1130
  const isMe = this.node.identity.publicKey.hashcode() === keyHash;
@@ -668,6 +1207,7 @@ let SharedLog = (() => {
668
1207
  const otherSegmentsIterator = this.replicationIndex.iterate({ query: { hash: from.hashcode() } }, { shape: { id: true } });
669
1208
  if ((await otherSegmentsIterator.next(1)).length === 0) {
670
1209
  this.uniqueReplicators.delete(from.hashcode());
1210
+ this._replicatorJoinEmitted.delete(from.hashcode());
671
1211
  }
672
1212
  await otherSegmentsIterator.close();
673
1213
  await this.updateOldestTimestampFromIndex();
@@ -687,6 +1227,7 @@ let SharedLog = (() => {
687
1227
  rebalance = rebalance == null ? true : rebalance;
688
1228
  let diffs;
689
1229
  let deleted = undefined;
1230
+ let isStoppedReplicating = false;
690
1231
  if (reset) {
691
1232
  deleted = (await this.replicationIndex
692
1233
  .iterate({
@@ -718,6 +1259,7 @@ let SharedLog = (() => {
718
1259
  ];
719
1260
  }
720
1261
  isNewReplicator = prevCount === 0 && ranges.length > 0;
1262
+ isStoppedReplicating = prevCount > 0 && ranges.length === 0;
721
1263
  }
722
1264
  else {
723
1265
  let batchSize = 100;
@@ -791,7 +1333,16 @@ let SharedLog = (() => {
791
1333
  .flat();
792
1334
  diffs = changes;
793
1335
  }
794
- this.uniqueReplicators.add(from.hashcode());
1336
+ const fromHash = from.hashcode();
1337
+ // Track replicator membership transitions synchronously so join/leave events are
1338
+ // idempotent even if we process concurrent reset messages/unsubscribes.
1339
+ const stoppedTransition = ranges.length === 0 ? this.uniqueReplicators.delete(fromHash) : false;
1340
+ if (ranges.length === 0) {
1341
+ this._replicatorJoinEmitted.delete(fromHash);
1342
+ }
1343
+ else {
1344
+ this.uniqueReplicators.add(fromHash);
1345
+ }
795
1346
  let now = +new Date();
796
1347
  let minRoleAge = await this.getDefaultMinRoleAge();
797
1348
  let isAllMature = true;
@@ -867,15 +1418,23 @@ let SharedLog = (() => {
867
1418
  detail: { publicKey: from },
868
1419
  }));
869
1420
  if (isNewReplicator) {
870
- this.events.dispatchEvent(new CustomEvent("replicator:join", {
871
- detail: { publicKey: from },
872
- }));
1421
+ if (!this._replicatorJoinEmitted.has(fromHash)) {
1422
+ this._replicatorJoinEmitted.add(fromHash);
1423
+ this.events.dispatchEvent(new CustomEvent("replicator:join", {
1424
+ detail: { publicKey: from },
1425
+ }));
1426
+ }
873
1427
  if (isAllMature) {
874
1428
  this.events.dispatchEvent(new CustomEvent("replicator:mature", {
875
1429
  detail: { publicKey: from },
876
1430
  }));
877
1431
  }
878
1432
  }
1433
+ if (isStoppedReplicating && stoppedTransition) {
1434
+ this.events.dispatchEvent(new CustomEvent("replicator:leave", {
1435
+ detail: { publicKey: from },
1436
+ }));
1437
+ }
879
1438
  if (rebalance) {
880
1439
  for (const diff of diffs) {
881
1440
  this.replicationChangeDebounceFn.add(diff);
@@ -895,6 +1454,20 @@ let SharedLog = (() => {
895
1454
  if (change) {
896
1455
  let addedOrReplaced = change.filter((x) => x.type !== "removed");
897
1456
  if (addedOrReplaced.length > 0) {
1457
+ // Provider discovery keep-alive (best-effort). This enables bounded targeted fetches
1458
+ // without relying on any global subscriber list.
1459
+ try {
1460
+ const fanoutService = this.node.services.fanout;
1461
+ if (fanoutService?.provide && !this._providerHandle) {
1462
+ this._providerHandle = fanoutService.provide(`shared-log|${this.topic}`, {
1463
+ ttlMs: 120_000,
1464
+ announceIntervalMs: 60_000,
1465
+ });
1466
+ }
1467
+ }
1468
+ catch {
1469
+ // Best-effort only.
1470
+ }
898
1471
  let message = undefined;
899
1472
  if (options.reset) {
900
1473
  message = new AllReplicatingSegmentsMessage({
@@ -954,6 +1527,75 @@ let SharedLog = (() => {
954
1527
  return this.pruneDebouncedFn.add(args);
955
1528
  }
956
1529
  }
1530
+ clearCheckedPruneRetry(hash) {
1531
+ const state = this._checkedPruneRetries.get(hash);
1532
+ if (state?.timer) {
1533
+ clearTimeout(state.timer);
1534
+ }
1535
+ this._checkedPruneRetries.delete(hash);
1536
+ }
1537
+ scheduleCheckedPruneRetry(args) {
1538
+ if (this.closed)
1539
+ return;
1540
+ if (this._pendingDeletes.has(args.entry.hash))
1541
+ return;
1542
+ const hash = args.entry.hash;
1543
+ const state = this._checkedPruneRetries.get(hash) ?? { attempts: 0 };
1544
+ if (state.timer)
1545
+ return;
1546
+ if (state.attempts >= CHECKED_PRUNE_RETRY_MAX_ATTEMPTS) {
1547
+ // Avoid unbounded background retries; a new replication-change event can
1548
+ // always re-enqueue pruning with fresh leader info.
1549
+ return;
1550
+ }
1551
+ const attempt = state.attempts + 1;
1552
+ const jitterMs = Math.floor(Math.random() * 250);
1553
+ const delayMs = Math.min(CHECKED_PRUNE_RETRY_MAX_DELAY_MS, 1_000 * 2 ** (attempt - 1) + jitterMs);
1554
+ state.attempts = attempt;
1555
+ state.timer = setTimeout(async () => {
1556
+ const st = this._checkedPruneRetries.get(hash);
1557
+ if (st)
1558
+ st.timer = undefined;
1559
+ if (this.closed)
1560
+ return;
1561
+ if (this._pendingDeletes.has(hash))
1562
+ return;
1563
+ let leadersMap;
1564
+ try {
1565
+ const replicas = decodeReplicas(args.entry).getValue(this);
1566
+ leadersMap = await this.findLeadersFromEntry(args.entry, replicas, {
1567
+ roleAge: 0,
1568
+ });
1569
+ }
1570
+ catch {
1571
+ // Best-effort only.
1572
+ }
1573
+ if (!leadersMap || leadersMap.size === 0) {
1574
+ if (args.leaders instanceof Map) {
1575
+ leadersMap = args.leaders;
1576
+ }
1577
+ else {
1578
+ leadersMap = new Map();
1579
+ for (const k of args.leaders) {
1580
+ leadersMap.set(k, { intersecting: true });
1581
+ }
1582
+ }
1583
+ }
1584
+ try {
1585
+ const leadersForRetry = leadersMap ?? new Map();
1586
+ await this.pruneDebouncedFnAddIfNotKeeping({
1587
+ key: hash,
1588
+ // TODO types
1589
+ value: { entry: args.entry, leaders: leadersForRetry },
1590
+ });
1591
+ }
1592
+ catch {
1593
+ // Best-effort only; pruning will be re-attempted on future changes.
1594
+ }
1595
+ }, delayMs);
1596
+ state.timer.unref?.();
1597
+ this._checkedPruneRetries.set(hash, state);
1598
+ }
957
1599
  async append(data, options) {
958
1600
  const appendOptions = { ...options };
959
1601
  const minReplicas = this.getClampedReplicas(options?.replicas
@@ -1002,239 +1644,18 @@ let SharedLog = (() => {
1002
1644
  if (options?.target !== "none") {
1003
1645
  const target = options?.target;
1004
1646
  const deliveryArg = options?.delivery;
1005
- const delivery = deliveryArg === undefined || deliveryArg === false
1006
- ? undefined
1007
- : deliveryArg === true
1008
- ? {}
1009
- : deliveryArg;
1010
- let requireRecipients = false;
1011
- let settleMin;
1012
- let guardDelivery = undefined;
1013
- let firstDeliveryPromise;
1014
- let deliveryPromises;
1015
- let addDeliveryPromise;
1016
- const leadersForDelivery = delivery && (target === "replicators" || !target)
1017
- ? new Set(leaders.keys())
1018
- : undefined;
1019
- if (delivery) {
1020
- const deliverySettle = delivery.settle ?? true;
1021
- const deliveryTimeout = delivery.timeout;
1022
- const deliverySignal = delivery.signal;
1023
- requireRecipients = delivery.requireRecipients === true;
1024
- settleMin =
1025
- typeof deliverySettle === "object" &&
1026
- Number.isFinite(deliverySettle.min)
1027
- ? Math.max(0, Math.floor(deliverySettle.min))
1028
- : undefined;
1029
- guardDelivery =
1030
- deliveryTimeout == null && deliverySignal == null
1031
- ? undefined
1032
- : (promise) => new Promise((resolve, reject) => {
1033
- let settled = false;
1034
- let timer = undefined;
1035
- const onAbort = () => {
1036
- if (settled) {
1037
- return;
1038
- }
1039
- settled = true;
1040
- promise.catch(() => { });
1041
- cleanup();
1042
- reject(new AbortError());
1043
- };
1044
- const cleanup = () => {
1045
- if (timer != null) {
1046
- clearTimeout(timer);
1047
- timer = undefined;
1048
- }
1049
- deliverySignal?.removeEventListener("abort", onAbort);
1050
- };
1051
- if (deliverySignal) {
1052
- if (deliverySignal.aborted) {
1053
- onAbort();
1054
- return;
1055
- }
1056
- deliverySignal.addEventListener("abort", onAbort);
1057
- }
1058
- if (deliveryTimeout != null) {
1059
- timer = setTimeout(() => {
1060
- if (settled) {
1061
- return;
1062
- }
1063
- settled = true;
1064
- promise.catch(() => { });
1065
- cleanup();
1066
- reject(new TimeoutError(`Timeout waiting for delivery`));
1067
- }, deliveryTimeout);
1068
- }
1069
- promise
1070
- .then(() => {
1071
- if (settled) {
1072
- return;
1073
- }
1074
- settled = true;
1075
- cleanup();
1076
- resolve();
1077
- })
1078
- .catch((e) => {
1079
- if (settled) {
1080
- return;
1081
- }
1082
- settled = true;
1083
- cleanup();
1084
- reject(e);
1085
- });
1086
- });
1087
- addDeliveryPromise = (promise) => {
1088
- if (!firstDeliveryPromise) {
1089
- firstDeliveryPromise = promise;
1090
- return;
1091
- }
1092
- if (!deliveryPromises) {
1093
- deliveryPromises = [firstDeliveryPromise, promise];
1094
- firstDeliveryPromise = undefined;
1095
- return;
1096
- }
1097
- deliveryPromises.push(promise);
1098
- };
1647
+ const hasDelivery = !(deliveryArg === undefined || deliveryArg === false);
1648
+ if (target === "all" && hasDelivery) {
1649
+ throw new Error(`delivery options are not supported with target="all"; fanout broadcast is fire-and-forward`);
1099
1650
  }
1100
- for await (const message of createExchangeHeadsMessages(this.log, [
1101
- result.entry,
1102
- ])) {
1103
- if (target === "replicators" || !target) {
1104
- if (message.heads[0].gidRefrences.length > 0) {
1105
- for (const ref of message.heads[0].gidRefrences) {
1106
- const entryFromGid = this.log.entryIndex.getHeads(ref, false);
1107
- for (const entry of await entryFromGid.all()) {
1108
- let coordinates = await this.getCoordinates(entry);
1109
- if (coordinates == null) {
1110
- coordinates = await this.createCoordinates(entry, minReplicasValue);
1111
- // TODO are we every to come here?
1112
- }
1113
- const result = await this._findLeaders(coordinates);
1114
- for (const [k, v] of result) {
1115
- leaders.set(k, v);
1116
- }
1117
- }
1118
- }
1119
- }
1120
- const set = this.addPeersToGidPeerHistory(result.entry.meta.gid, leaders.keys());
1121
- let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
1122
- if (!hasRemotePeers) {
1123
- if (requireRecipients) {
1124
- throw new NoPeersError(this.rpc.topic);
1125
- }
1126
- continue;
1127
- }
1128
- if (!delivery) {
1129
- this.rpc
1130
- .send(message, {
1131
- mode: isLeader
1132
- ? new SilentDelivery({ redundancy: 1, to: set })
1133
- : new AcknowledgeDelivery({ redundancy: 1, to: set }),
1134
- })
1135
- .catch((e) => logger.error(e));
1136
- continue;
1137
- }
1138
- let expectedRemoteRecipientsCount = 0;
1139
- const ackTo = [];
1140
- let silentTo;
1141
- const ackLimit = settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
1142
- // Always settle towards the current expected replicators for this entry,
1143
- // not the entire gid peer history.
1144
- for (const peer of leadersForDelivery) {
1145
- if (peer === selfHash) {
1146
- continue;
1147
- }
1148
- expectedRemoteRecipientsCount++;
1149
- if (ackTo.length < ackLimit) {
1150
- ackTo.push(peer);
1151
- }
1152
- else {
1153
- silentTo ||= [];
1154
- silentTo.push(peer);
1155
- }
1156
- }
1157
- // Still deliver to known peers for the gid (best-effort), but don't let them
1158
- // satisfy the settle requirement.
1159
- for (const peer of set) {
1160
- if (peer === selfHash) {
1161
- continue;
1162
- }
1163
- if (leadersForDelivery.has(peer)) {
1164
- continue;
1165
- }
1166
- silentTo ||= [];
1167
- silentTo.push(peer);
1168
- }
1169
- if (requireRecipients && expectedRemoteRecipientsCount === 0) {
1170
- throw new NoPeersError(this.rpc.topic);
1171
- }
1172
- if (requireRecipients &&
1173
- ackTo.length + (silentTo?.length || 0) === 0) {
1174
- throw new NoPeersError(this.rpc.topic);
1175
- }
1176
- if (ackTo.length > 0) {
1177
- const promise = this.rpc.send(message, {
1178
- mode: new AcknowledgeDelivery({
1179
- redundancy: 1,
1180
- to: ackTo,
1181
- }),
1182
- });
1183
- addDeliveryPromise(guardDelivery ? guardDelivery(promise) : promise);
1184
- }
1185
- if (silentTo?.length) {
1186
- this.rpc
1187
- .send(message, {
1188
- mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
1189
- })
1190
- .catch((e) => logger.error(e));
1191
- }
1192
- }
1193
- else {
1194
- if (!delivery) {
1195
- this.rpc.send(message).catch((e) => logger.error(e));
1196
- continue;
1197
- }
1198
- const subscribers = await this.node.services.pubsub.getSubscribers(this.rpc.topic);
1199
- const ackTo = [];
1200
- let silentTo;
1201
- const ackLimit = settleMin == null ? Number.POSITIVE_INFINITY : settleMin;
1202
- for (const subscriber of subscribers || []) {
1203
- if (subscriber.hashcode() === selfHash) {
1204
- continue;
1205
- }
1206
- if (ackTo.length < ackLimit) {
1207
- ackTo.push(subscriber);
1208
- }
1209
- else {
1210
- silentTo ||= [];
1211
- silentTo.push(subscriber);
1212
- }
1213
- }
1214
- if (requireRecipients &&
1215
- ackTo.length + (silentTo?.length || 0) === 0) {
1216
- throw new NoPeersError(this.rpc.topic);
1217
- }
1218
- if (ackTo.length > 0) {
1219
- const promise = this.rpc.send(message, {
1220
- mode: new AcknowledgeDelivery({ redundancy: 1, to: ackTo }),
1221
- });
1222
- addDeliveryPromise(guardDelivery ? guardDelivery(promise) : promise);
1223
- }
1224
- if (silentTo?.length) {
1225
- this.rpc
1226
- .send(message, {
1227
- mode: new SilentDelivery({ redundancy: 1, to: silentTo }),
1228
- })
1229
- .catch((e) => logger.error(e));
1230
- }
1231
- }
1651
+ if (target === "all" && !this._fanoutChannel) {
1652
+ throw new Error(`No fanout channel configured for shared-log topic ${this.topic}`);
1232
1653
  }
1233
- if (deliveryPromises) {
1234
- await Promise.all(deliveryPromises);
1654
+ if (target === "all") {
1655
+ await this._appendDeliverToAllFanout(result.entry);
1235
1656
  }
1236
- else if (firstDeliveryPromise) {
1237
- await firstDeliveryPromise;
1657
+ else {
1658
+ await this._appendDeliverToReplicators(result.entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
1238
1659
  }
1239
1660
  }
1240
1661
  if (!isLeader) {
@@ -1269,9 +1690,13 @@ let SharedLog = (() => {
1269
1690
  this._pendingDeletes = new Map();
1270
1691
  this._pendingIHave = new Map();
1271
1692
  this.latestReplicationInfoMessage = new Map();
1693
+ this._replicationInfoBlockedPeers = new Set();
1694
+ this._replicationInfoRequestByPeer = new Map();
1695
+ this._replicationInfoApplyQueueByPeer = new Map();
1272
1696
  this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
1273
1697
  this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
1274
1698
  this.uniqueReplicators = new Set();
1699
+ this._replicatorJoinEmitted = new Set();
1275
1700
  this._replicatorsReconciled = false;
1276
1701
  this.openTime = +new Date();
1277
1702
  this.oldestOpenTime = this.openTime;
@@ -1298,18 +1723,70 @@ let SharedLog = (() => {
1298
1723
  throw new Error("waitForReplicatorRequestMaxAttempts must be a positive number");
1299
1724
  }
1300
1725
  this._closeController = new AbortController();
1726
+ this._closeController.signal.addEventListener("abort", () => {
1727
+ for (const [_peer, state] of this._replicationInfoRequestByPeer) {
1728
+ if (state.timer)
1729
+ clearTimeout(state.timer);
1730
+ }
1731
+ this._replicationInfoRequestByPeer.clear();
1732
+ });
1301
1733
  this._isTrustedReplicator = options?.canReplicate;
1302
1734
  this.keep = options?.keep;
1303
1735
  this.pendingMaturity = new Map();
1304
1736
  const id = sha256Base64Sync(this.log.id);
1305
1737
  const storage = await this.node.storage.sublevel(id);
1306
1738
  const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
1739
+ const fanoutService = this.node.services.fanout;
1740
+ const blockProviderNamespace = (cid) => `cid:${cid}`;
1307
1741
  this.remoteBlocks = new RemoteBlocks({
1308
1742
  local: localBlocks,
1309
- publish: (message, options) => this.rpc.send(new BlocksMessage(message), options.mode instanceof AnyWhere ? undefined : options),
1743
+ publish: (message, options) => this.rpc.send(new BlocksMessage(message), options),
1310
1744
  waitFor: this.rpc.waitFor.bind(this.rpc),
1311
1745
  publicKey: this.node.identity.publicKey,
1312
1746
  eagerBlocks: options?.eagerBlocks ?? true,
1747
+ resolveProviders: async (cid, opts) => {
1748
+ // 1) tracker-backed provider directory (best-effort, bounded)
1749
+ try {
1750
+ const providers = await fanoutService?.queryProviders(blockProviderNamespace(cid), {
1751
+ want: 8,
1752
+ timeoutMs: 2_000,
1753
+ queryTimeoutMs: 500,
1754
+ bootstrapMaxPeers: 2,
1755
+ signal: opts?.signal,
1756
+ });
1757
+ if (providers && providers.length > 0)
1758
+ return providers;
1759
+ }
1760
+ catch {
1761
+ // ignore discovery failures
1762
+ }
1763
+ // 2) fallback to currently connected RPC peers
1764
+ const self = this.node.identity.publicKey.hashcode();
1765
+ const out = [];
1766
+ const peers = this.rpc?.peers;
1767
+ for (const h of peers?.keys?.() ?? []) {
1768
+ if (h === self)
1769
+ continue;
1770
+ if (out.includes(h))
1771
+ continue;
1772
+ out.push(h);
1773
+ if (out.length >= 32)
1774
+ break;
1775
+ }
1776
+ return out;
1777
+ },
1778
+ onPut: async (cid) => {
1779
+ // Best-effort directory announce for "get without remote.from" workflows.
1780
+ try {
1781
+ await fanoutService?.announceProvider(blockProviderNamespace(cid), {
1782
+ ttlMs: 120_000,
1783
+ bootstrapMaxPeers: 2,
1784
+ });
1785
+ }
1786
+ catch {
1787
+ // ignore announce failures
1788
+ }
1789
+ },
1313
1790
  });
1314
1791
  await this.remoteBlocks.start();
1315
1792
  const logScope = await this.node.indexer.scope(id);
@@ -1332,6 +1809,7 @@ let SharedLog = (() => {
1332
1809
  this._gidPeersHistory = new Map();
1333
1810
  this._requestIPruneSent = new Map();
1334
1811
  this._requestIPruneResponseReplicatorSet = new Map();
1812
+ this._checkedPruneRetries = new Map();
1335
1813
  this.replicationChangeDebounceFn = debounceAggregationChanges((change) => this.onReplicationChange(change).then(() => this.rebalanceParticipationDebounced?.call()), this.distributionDebounceTime);
1336
1814
  this.pruneDebouncedFn = debouncedAccumulatorMap((map) => {
1337
1815
  this.prune(map);
@@ -1389,6 +1867,83 @@ let SharedLog = (() => {
1389
1867
  }, PRUNE_DEBOUNCE_INTERVAL);
1390
1868
  await this.log.open(this.remoteBlocks, this.node.identity, {
1391
1869
  keychain: this.node.services.keychain,
1870
+ resolveRemotePeers: async (hash, options) => {
1871
+ if (options?.signal?.aborted)
1872
+ return undefined;
1873
+ const maxPeers = 8;
1874
+ const self = this.node.identity.publicKey.hashcode();
1875
+ const seed = hashToSeed32(hash);
1876
+ // Best hint: peers that have recently confirmed having this entry hash.
1877
+ const hinted = this._requestIPruneResponseReplicatorSet.get(hash);
1878
+ if (hinted && hinted.size > 0) {
1879
+ const peers = [...hinted].filter((p) => p !== self);
1880
+ return peers.length > 0
1881
+ ? pickDeterministicSubset(peers, seed, maxPeers)
1882
+ : undefined;
1883
+ }
1884
+ // Next: peers we already contacted about this hash (may still have it).
1885
+ const contacted = this._requestIPruneSent.get(hash);
1886
+ if (contacted && contacted.size > 0) {
1887
+ const peers = [...contacted].filter((p) => p !== self);
1888
+ return peers.length > 0
1889
+ ? pickDeterministicSubset(peers, seed, maxPeers)
1890
+ : undefined;
1891
+ }
1892
+ let candidates;
1893
+ // Prefer the replicator cache; fall back to subscribers if we have no other signal.
1894
+ const replicatorCandidates = [...this.uniqueReplicators].filter((p) => p !== self);
1895
+ if (replicatorCandidates.length > 0) {
1896
+ candidates = replicatorCandidates;
1897
+ }
1898
+ else {
1899
+ try {
1900
+ const subscribers = await this._getTopicSubscribers(this.topic);
1901
+ const subscriberCandidates = subscribers?.map((k) => k.hashcode()).filter((p) => p !== self) ??
1902
+ [];
1903
+ candidates =
1904
+ subscriberCandidates.length > 0 ? subscriberCandidates : undefined;
1905
+ }
1906
+ catch {
1907
+ // Best-effort only.
1908
+ }
1909
+ if (!candidates || candidates.length === 0) {
1910
+ // Last resort: peers we are already directly connected to. This avoids
1911
+ // depending on global membership knowledge in early-join scenarios.
1912
+ const peerMap = this.node.services.pubsub?.peers;
1913
+ if (peerMap?.keys) {
1914
+ candidates = [...peerMap.keys()];
1915
+ }
1916
+ }
1917
+ if (!candidates || candidates.length === 0) {
1918
+ // Even if the pubsub stream has no established peer streams yet, we may
1919
+ // still have a libp2p connection to one or more peers (e.g. bootstrap).
1920
+ const connectionManager = this.node.services.pubsub?.components
1921
+ ?.connectionManager;
1922
+ const connections = connectionManager?.getConnections?.() ?? [];
1923
+ const connectionHashes = [];
1924
+ for (const conn of connections) {
1925
+ const peerId = conn?.remotePeer;
1926
+ if (!peerId)
1927
+ continue;
1928
+ try {
1929
+ connectionHashes.push(getPublicKeyFromPeerId(peerId).hashcode());
1930
+ }
1931
+ catch {
1932
+ // Best-effort only.
1933
+ }
1934
+ }
1935
+ if (connectionHashes.length > 0) {
1936
+ candidates = connectionHashes;
1937
+ }
1938
+ }
1939
+ }
1940
+ if (!candidates || candidates.length === 0)
1941
+ return undefined;
1942
+ const peers = candidates.filter((p) => p !== self);
1943
+ if (peers.length === 0)
1944
+ return undefined;
1945
+ return pickDeterministicSubset(peers, seed, maxPeers);
1946
+ },
1392
1947
  ...this._logProperties,
1393
1948
  onChange: async (change) => {
1394
1949
  await this.onChange(change);
@@ -1456,6 +2011,7 @@ let SharedLog = (() => {
1456
2011
  this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
1457
2012
  await this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn);
1458
2013
  await this.rpc.subscribe();
2014
+ await this._openFanoutChannel(options?.fanout);
1459
2015
  // mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
1460
2016
  await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
1461
2017
  // if we had a previous session with replication info, and new replication info dictates that we unreplicate
@@ -1518,7 +2074,7 @@ let SharedLog = (() => {
1518
2074
  });
1519
2075
  await this.rebalanceParticipation();
1520
2076
  // Take into account existing subscription
1521
- (await this.node.services.pubsub.getSubscribers(this.topic))?.forEach((v, k) => {
2077
+ (await this._getTopicSubscribers(this.topic))?.forEach((v) => {
1522
2078
  if (v.equals(this.node.identity.publicKey)) {
1523
2079
  return;
1524
2080
  }
@@ -1551,18 +2107,22 @@ let SharedLog = (() => {
1551
2107
  })
1552
2108
  .then(async () => {
1553
2109
  // is reachable, announce change events
1554
- const key = await this.node.services.pubsub.getPublicKey(segment.value.hash);
2110
+ const key = await this._resolvePublicKeyFromHash(segment.value.hash);
1555
2111
  if (!key) {
1556
2112
  throw new Error("Failed to resolve public key from hash: " +
1557
2113
  segment.value.hash);
1558
2114
  }
1559
- this.uniqueReplicators.add(key.hashcode());
1560
- this.events.dispatchEvent(new CustomEvent("replicator:join", {
1561
- detail: { publicKey: key },
1562
- }));
1563
- this.events.dispatchEvent(new CustomEvent("replication:change", {
1564
- detail: { publicKey: key },
1565
- }));
2115
+ const keyHash = key.hashcode();
2116
+ this.uniqueReplicators.add(keyHash);
2117
+ if (!this._replicatorJoinEmitted.has(keyHash)) {
2118
+ this._replicatorJoinEmitted.add(keyHash);
2119
+ this.events.dispatchEvent(new CustomEvent("replicator:join", {
2120
+ detail: { publicKey: key },
2121
+ }));
2122
+ this.events.dispatchEvent(new CustomEvent("replication:change", {
2123
+ detail: { publicKey: key },
2124
+ }));
2125
+ }
1566
2126
  })
1567
2127
  .catch(async (e) => {
1568
2128
  if (isNotStartedError(e)) {
@@ -1672,26 +2232,33 @@ let SharedLog = (() => {
1672
2232
  for (const [key, _] of this.syncronizer.syncInFlight) {
1673
2233
  set.add(key);
1674
2234
  }
2235
+ const selfHash = this.node.identity.publicKey.hashcode();
1675
2236
  if (options?.reachableOnly) {
1676
- // Prefer the live pubsub subscriber set when filtering reachability.
1677
- // `uniqueReplicators` is primarily driven by replication messages and can lag during
1678
- // joins/restarts; using subscribers prevents excluding peers that are reachable but
1679
- // whose replication ranges were loaded from disk or haven't been processed yet.
1680
- const subscribers = (await this.node.services.pubsub.getSubscribers(this.topic)) ??
1681
- undefined;
2237
+ const directPeers = this.node.services
2238
+ .pubsub?.peers;
2239
+ // Prefer the live pubsub subscriber set when filtering reachability. In some
2240
+ // flows peers can be reachable/active even before (or without) subscriber
2241
+ // state converging, so also consider direct pubsub peers.
2242
+ const subscribers = (await this._getTopicSubscribers(this.topic)) ?? undefined;
1682
2243
  const subscriberHashcodes = subscribers
1683
2244
  ? new Set(subscribers.map((key) => key.hashcode()))
1684
2245
  : undefined;
2246
+ // If reachability is requested but we have no basis for filtering yet
2247
+ // (subscriber snapshot hasn't converged), return the full cover set.
2248
+ // Otherwise, only keep peers we can currently reach.
2249
+ const canFilter = directPeers != null ||
2250
+ (subscriberHashcodes && subscriberHashcodes.size > 0);
2251
+ if (!canFilter) {
2252
+ return [...set];
2253
+ }
1685
2254
  const reachable = [];
1686
- const selfHash = this.node.identity.publicKey.hashcode();
1687
2255
  for (const peer of set) {
1688
2256
  if (peer === selfHash) {
1689
2257
  reachable.push(peer);
1690
2258
  continue;
1691
2259
  }
1692
- if (subscriberHashcodes
1693
- ? subscriberHashcodes.has(peer)
1694
- : this.uniqueReplicators.has(peer)) {
2260
+ if ((subscriberHashcodes && subscriberHashcodes.has(peer)) ||
2261
+ (directPeers && directPeers.has(peer))) {
1695
2262
  reachable.push(peer);
1696
2263
  }
1697
2264
  }
@@ -1716,6 +2283,14 @@ let SharedLog = (() => {
1716
2283
  }
1717
2284
  this.pendingMaturity.clear();
1718
2285
  this.distributeQueue?.clear();
2286
+ this._closeFanoutChannel();
2287
+ try {
2288
+ this._providerHandle?.close();
2289
+ }
2290
+ catch {
2291
+ // ignore
2292
+ }
2293
+ this._providerHandle = undefined;
1719
2294
  this.coordinateToHash.clear();
1720
2295
  this.recentlyRebalanced.clear();
1721
2296
  this.uniqueReplicators.clear();
@@ -1730,13 +2305,24 @@ let SharedLog = (() => {
1730
2305
  for (const [_k, v] of this._pendingIHave) {
1731
2306
  v.clear();
1732
2307
  }
2308
+ for (const [_k, v] of this._checkedPruneRetries) {
2309
+ if (v.timer)
2310
+ clearTimeout(v.timer);
2311
+ }
1733
2312
  await this.remoteBlocks.stop();
1734
2313
  this._pendingDeletes.clear();
1735
2314
  this._pendingIHave.clear();
2315
+ this._checkedPruneRetries.clear();
1736
2316
  this.latestReplicationInfoMessage.clear();
1737
2317
  this._gidPeersHistory.clear();
1738
2318
  this._requestIPruneSent.clear();
1739
2319
  this._requestIPruneResponseReplicatorSet.clear();
2320
+ // Cancel any pending debounced timers so they can't fire after we've torn down
2321
+ // indexes/RPC state.
2322
+ this.rebalanceParticipationDebounced?.close();
2323
+ this.replicationChangeDebounceFn?.close?.();
2324
+ this.pruneDebouncedFn?.close?.();
2325
+ this.responseToPruneDebouncedFn?.close?.();
1740
2326
  this.pruneDebouncedFn = undefined;
1741
2327
  this.rebalanceParticipationDebounced = undefined;
1742
2328
  this._replicationRangeIndex.stop();
@@ -1747,6 +2333,53 @@ let SharedLog = (() => {
1747
2333
  /* this._totalParticipation = 0; */
1748
2334
  }
1749
2335
  async close(from) {
2336
+ // Best-effort: announce that we are going offline before tearing down
2337
+ // RPC/subscription state.
2338
+ //
2339
+ // Important: do not delete our local replication ranges here. Keeping them
2340
+ // allows `replicate: { type: "resume" }` to restore the previous role on
2341
+ // restart. Explicit `unreplicate()` still clears local state.
2342
+ try {
2343
+ if (!this.closed) {
2344
+ // Prevent any late debounced timers (rebalance/prune) from publishing
2345
+ // replication info after we announce "segments: []". These races can leave
2346
+ // stale segments on remotes after rapid open/close cycles.
2347
+ this._isReplicating = false;
2348
+ this._isAdaptiveReplicating = false;
2349
+ this.rebalanceParticipationDebounced?.close();
2350
+ this.replicationChangeDebounceFn?.close?.();
2351
+ this.pruneDebouncedFn?.close?.();
2352
+ this.responseToPruneDebouncedFn?.close?.();
2353
+ // Ensure the "I'm leaving" replication reset is actually published before
2354
+ // the RPC child program closes and unsubscribes from its topic. If we fire
2355
+ // and forget here, the publish can race with `super.close()` and get dropped,
2356
+ // leaving stale replication segments on remotes (flaky join/leave tests).
2357
+ // Also ensure close is bounded even when shard overlays are mid-reconcile.
2358
+ const abort = new AbortController();
2359
+ const abortTimer = setTimeout(() => {
2360
+ try {
2361
+ abort.abort(new TimeoutError("shared-log close replication reset timed out"));
2362
+ }
2363
+ catch {
2364
+ abort.abort();
2365
+ }
2366
+ }, 2_000);
2367
+ try {
2368
+ await this.rpc
2369
+ .send(new AllReplicatingSegmentsMessage({ segments: [] }), {
2370
+ priority: 1,
2371
+ signal: abort.signal,
2372
+ })
2373
+ .catch(() => { });
2374
+ }
2375
+ finally {
2376
+ clearTimeout(abortTimer);
2377
+ }
2378
+ }
2379
+ }
2380
+ catch {
2381
+ // ignore: close should be resilient even if we were never fully started
2382
+ }
1750
2383
  const superClosed = await super.close(from);
1751
2384
  if (!superClosed) {
1752
2385
  return superClosed;
@@ -1756,6 +2389,41 @@ let SharedLog = (() => {
1756
2389
  return true;
1757
2390
  }
1758
2391
  async drop(from) {
2392
+ // Best-effort: announce that we are going offline before tearing down
2393
+ // RPC/subscription state (same reasoning as in `close()`).
2394
+ try {
2395
+ if (!this.closed) {
2396
+ this._isReplicating = false;
2397
+ this._isAdaptiveReplicating = false;
2398
+ this.rebalanceParticipationDebounced?.close();
2399
+ this.replicationChangeDebounceFn?.close?.();
2400
+ this.pruneDebouncedFn?.close?.();
2401
+ this.responseToPruneDebouncedFn?.close?.();
2402
+ const abort = new AbortController();
2403
+ const abortTimer = setTimeout(() => {
2404
+ try {
2405
+ abort.abort(new TimeoutError("shared-log drop replication reset timed out"));
2406
+ }
2407
+ catch {
2408
+ abort.abort();
2409
+ }
2410
+ }, 2_000);
2411
+ try {
2412
+ await this.rpc
2413
+ .send(new AllReplicatingSegmentsMessage({ segments: [] }), {
2414
+ priority: 1,
2415
+ signal: abort.signal,
2416
+ })
2417
+ .catch(() => { });
2418
+ }
2419
+ finally {
2420
+ clearTimeout(abortTimer);
2421
+ }
2422
+ }
2423
+ }
2424
+ catch {
2425
+ // ignore: drop should be resilient even if we were never fully started
2426
+ }
1759
2427
  const superDropped = await super.drop(from);
1760
2428
  if (!superDropped) {
1761
2429
  return superDropped;
@@ -2041,7 +2709,7 @@ let SharedLog = (() => {
2041
2709
  const segments = (await this.getMyReplicationSegments()).map((x) => x.toReplicationRange());
2042
2710
  this.rpc
2043
2711
  .send(new AllReplicatingSegmentsMessage({ segments }), {
2044
- mode: new SeekDelivery({ to: [context.from], redundancy: 1 }),
2712
+ mode: new AcknowledgeDelivery({ to: [context.from], redundancy: 1 }),
2045
2713
  })
2046
2714
  .catch((e) => logger.error(e.toString()));
2047
2715
  // for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
@@ -2072,33 +2740,55 @@ let SharedLog = (() => {
2072
2740
  // `Program.waitFor()`. Dropping these messages can lead to missing replicator info
2073
2741
  // (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
2074
2742
  const from = context.from;
2743
+ const fromHash = from.hashcode();
2744
+ if (this._replicationInfoBlockedPeers.has(fromHash)) {
2745
+ return;
2746
+ }
2075
2747
  const messageTimestamp = context.message.header.timestamp;
2076
- (async () => {
2077
- const prev = this.latestReplicationInfoMessage.get(from.hashcode());
2078
- if (prev && prev > messageTimestamp) {
2079
- return;
2080
- }
2081
- this.latestReplicationInfoMessage.set(from.hashcode(), messageTimestamp);
2082
- if (this.closed) {
2083
- return;
2748
+ await this.withReplicationInfoApplyQueue(fromHash, async () => {
2749
+ try {
2750
+ // The peer may have unsubscribed after this message was queued.
2751
+ if (this._replicationInfoBlockedPeers.has(fromHash)) {
2752
+ return;
2753
+ }
2754
+ // Process in-order to avoid races where repeated reset messages arrive
2755
+ // concurrently and trigger spurious "added" diffs / rebalancing.
2756
+ const prev = this.latestReplicationInfoMessage.get(fromHash);
2757
+ if (prev && prev > messageTimestamp) {
2758
+ return;
2759
+ }
2760
+ this.latestReplicationInfoMessage.set(fromHash, messageTimestamp);
2761
+ if (this.closed) {
2762
+ return;
2763
+ }
2764
+ const reset = msg instanceof AllReplicatingSegmentsMessage;
2765
+ await this.addReplicationRange(replicationInfoMessage.segments.map((x) => x.toReplicationRangeIndexable(from)), from, {
2766
+ reset,
2767
+ checkDuplicates: true,
2768
+ timestamp: Number(messageTimestamp),
2769
+ });
2770
+ // If the peer reports any replication segments, stop re-requesting.
2771
+ // (Empty reports can be transient during startup.)
2772
+ if (replicationInfoMessage.segments.length > 0) {
2773
+ this.cancelReplicationInfoRequests(fromHash);
2774
+ }
2084
2775
  }
2085
- const reset = msg instanceof AllReplicatingSegmentsMessage;
2086
- await this.addReplicationRange(replicationInfoMessage.segments.map((x) => x.toReplicationRangeIndexable(from)), from, {
2087
- reset,
2088
- checkDuplicates: true,
2089
- timestamp: Number(messageTimestamp),
2090
- });
2091
- })().catch((e) => {
2092
- if (isNotStartedError(e)) {
2093
- return;
2776
+ catch (e) {
2777
+ if (isNotStartedError(e)) {
2778
+ return;
2779
+ }
2780
+ logger.error(`Failed to apply replication settings from '${fromHash}': ${e?.message ?? e}`);
2094
2781
  }
2095
- logger.error(`Failed to apply replication settings from '${from.hashcode()}': ${e?.message ?? e}`);
2096
2782
  });
2097
2783
  }
2098
2784
  else if (msg instanceof StoppedReplicating) {
2099
2785
  if (context.from.equals(this.node.identity.publicKey)) {
2100
2786
  return;
2101
2787
  }
2788
+ const fromHash = context.from.hashcode();
2789
+ if (this._replicationInfoBlockedPeers.has(fromHash)) {
2790
+ return;
2791
+ }
2102
2792
  const rangesToRemove = await this.resolveReplicationRangesFromIdsAndKey(msg.segmentIds, context.from);
2103
2793
  await this.removeReplicationRanges(rangesToRemove, context.from);
2104
2794
  const timestamp = BigInt(+new Date());
@@ -2363,12 +3053,17 @@ let SharedLog = (() => {
2363
3053
  requestTimer = undefined;
2364
3054
  }
2365
3055
  };
2366
- const resolve = () => {
3056
+ const resolve = async () => {
2367
3057
  if (settled) {
2368
3058
  return;
2369
3059
  }
2370
3060
  settled = true;
2371
3061
  clear();
3062
+ // `waitForReplicator()` is typically used as a precondition before join/replicate
3063
+ // flows. A replicator can become mature and enqueue a debounced rebalance
3064
+ // (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
3065
+ // observe a "late" rebalance after the wait resolves.
3066
+ await this.replicationChangeDebounceFn?.flush?.();
2372
3067
  deferred.resolve();
2373
3068
  };
2374
3069
  const reject = (error) => {
@@ -2400,13 +3095,14 @@ let SharedLog = (() => {
2400
3095
  requestAttempts++;
2401
3096
  this.rpc
2402
3097
  .send(new RequestReplicationInfoMessage(), {
2403
- mode: new SeekDelivery({ redundancy: 1, to: [key] }),
3098
+ mode: new AcknowledgeDelivery({ redundancy: 1, to: [key] }),
2404
3099
  })
2405
3100
  .catch((e) => {
2406
3101
  // Best-effort: missing peers / unopened RPC should not fail the wait logic.
2407
3102
  if (isNotStartedError(e)) {
2408
3103
  return;
2409
3104
  }
3105
+ logger.error(e?.toString?.() ?? String(e));
2410
3106
  });
2411
3107
  if (requestAttempts < maxRequestAttempts) {
2412
3108
  requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
@@ -2425,7 +3121,7 @@ let SharedLog = (() => {
2425
3121
  return;
2426
3122
  }
2427
3123
  }
2428
- resolve();
3124
+ await resolve();
2429
3125
  }
2430
3126
  catch (error) {
2431
3127
  reject(error instanceof Error ? error : new Error(String(error)));
@@ -2441,48 +3137,68 @@ let SharedLog = (() => {
2441
3137
  return deferred.promise.finally(clear);
2442
3138
  }
2443
3139
  async waitForReplicators(options) {
2444
- // if no remotes, just return
2445
- const subscribers = await this.node.services.pubsub.getSubscribers(this.rpc.topic);
2446
- let waitForNewPeers = options?.waitForNewPeers;
2447
- if (!waitForNewPeers && (subscribers?.length ?? 0) === 0) {
2448
- throw new NoPeersError(this.rpc.topic);
2449
- }
2450
3140
  let coverageThreshold = options?.coverageThreshold ?? 1;
2451
3141
  let deferred = pDefer();
3142
+ let settled = false;
2452
3143
  const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge());
2453
3144
  const providedCustomRoleAge = options?.roleAge != null;
2454
- let checkCoverage = async () => {
3145
+ const resolve = () => {
3146
+ if (settled)
3147
+ return;
3148
+ settled = true;
3149
+ deferred.resolve();
3150
+ };
3151
+ const reject = (error) => {
3152
+ if (settled)
3153
+ return;
3154
+ settled = true;
3155
+ deferred.reject(error);
3156
+ };
3157
+ let checkInFlight;
3158
+ const checkCoverage = async () => {
2455
3159
  const coverage = await this.calculateCoverage({
2456
3160
  roleAge,
2457
3161
  });
2458
3162
  if (coverage >= coverageThreshold) {
2459
- deferred.resolve();
3163
+ resolve();
2460
3164
  return true;
2461
3165
  }
2462
3166
  return false;
2463
3167
  };
3168
+ const scheduleCheckCoverage = () => {
3169
+ if (settled || checkInFlight) {
3170
+ return;
3171
+ }
3172
+ checkInFlight = checkCoverage()
3173
+ .then(() => { })
3174
+ .catch(reject)
3175
+ .finally(() => {
3176
+ checkInFlight = undefined;
3177
+ });
3178
+ };
2464
3179
  const onReplicatorMature = () => {
2465
- checkCoverage();
3180
+ scheduleCheckCoverage();
2466
3181
  };
2467
3182
  const onReplicationChange = () => {
2468
- checkCoverage();
3183
+ scheduleCheckCoverage();
2469
3184
  };
2470
3185
  this.events.addEventListener("replicator:mature", onReplicatorMature);
2471
3186
  this.events.addEventListener("replication:change", onReplicationChange);
2472
- await checkCoverage();
2473
- let interval = providedCustomRoleAge
3187
+ await checkCoverage().catch(reject);
3188
+ let intervalMs = providedCustomRoleAge ? 100 : 250;
3189
+ let interval = roleAge > 0
2474
3190
  ? setInterval(() => {
2475
- checkCoverage();
2476
- }, 100)
3191
+ scheduleCheckCoverage();
3192
+ }, intervalMs)
2477
3193
  : undefined;
2478
3194
  let timeout = options?.timeout ?? this.waitForReplicatorTimeout;
2479
3195
  const timer = setTimeout(() => {
2480
3196
  clear();
2481
- deferred.reject(new TimeoutError(`Timeout waiting for mature replicators`));
3197
+ reject(new TimeoutError(`Timeout waiting for mature replicators`));
2482
3198
  }, timeout);
2483
3199
  const abortListener = () => {
2484
3200
  clear();
2485
- deferred.reject(new AbortError());
3201
+ reject(new AbortError());
2486
3202
  };
2487
3203
  if (options?.signal) {
2488
3204
  options.signal.addEventListener("abort", abortListener);
@@ -2602,9 +3318,7 @@ let SharedLog = (() => {
2602
3318
  let subscribers = 1;
2603
3319
  if (!this.rpc.closed) {
2604
3320
  try {
2605
- subscribers =
2606
- (await this.node.services.pubsub.getSubscribers(this.rpc.topic))
2607
- ?.length ?? 1;
3321
+ subscribers = (await this._getTopicSubscribers(this.rpc.topic))?.length ?? 1;
2608
3322
  }
2609
3323
  catch {
2610
3324
  // Best-effort only; fall back to 1.
@@ -2671,22 +3385,31 @@ let SharedLog = (() => {
2671
3385
  async _findLeaders(cursors, options) {
2672
3386
  const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
2673
3387
  const selfHash = this.node.identity.publicKey.hashcode();
2674
- // Use `uniqueReplicators` (replicator cache) once we've reconciled it against the
2675
- // persisted replication index. Until then, fall back to live pubsub subscribers
2676
- // and avoid relying on `uniqueReplicators` being complete.
3388
+ // Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
3389
+ // Falling back to live pubsub subscribers can include non-replicators and can
3390
+ // break delivery/availability when writers are not directly connected.
2677
3391
  let peerFilter = undefined;
2678
- if (this._replicatorsReconciled && this.uniqueReplicators.size > 0) {
2679
- peerFilter = this.uniqueReplicators.has(selfHash)
2680
- ? this.uniqueReplicators
2681
- : new Set([...this.uniqueReplicators, selfHash]);
3392
+ const selfReplicating = await this.isReplicating();
3393
+ if (this.uniqueReplicators.size > 0) {
3394
+ peerFilter = new Set(this.uniqueReplicators);
3395
+ if (selfReplicating) {
3396
+ peerFilter.add(selfHash);
3397
+ }
3398
+ else {
3399
+ peerFilter.delete(selfHash);
3400
+ }
2682
3401
  }
2683
3402
  else {
2684
3403
  try {
2685
- const subscribers = (await this.node.services.pubsub.getSubscribers(this.topic)) ??
2686
- undefined;
3404
+ const subscribers = (await this._getTopicSubscribers(this.topic)) ?? undefined;
2687
3405
  if (subscribers && subscribers.length > 0) {
2688
3406
  peerFilter = new Set(subscribers.map((key) => key.hashcode()));
2689
- peerFilter.add(selfHash);
3407
+ if (selfReplicating) {
3408
+ peerFilter.add(selfHash);
3409
+ }
3410
+ else {
3411
+ peerFilter.delete(selfHash);
3412
+ }
2690
3413
  }
2691
3414
  }
2692
3415
  catch {
@@ -2709,28 +3432,99 @@ let SharedLog = (() => {
2709
3432
  replicas: maxReplicas(this, [entry]),
2710
3433
  }, options);
2711
3434
  }
3435
+ withReplicationInfoApplyQueue(peerHash, fn) {
3436
+ const prev = this._replicationInfoApplyQueueByPeer.get(peerHash);
3437
+ const next = (prev ?? Promise.resolve())
3438
+ .catch(() => {
3439
+ // Avoid stuck queues if a previous apply failed.
3440
+ })
3441
+ .then(fn);
3442
+ this._replicationInfoApplyQueueByPeer.set(peerHash, next);
3443
+ return next.finally(() => {
3444
+ if (this._replicationInfoApplyQueueByPeer.get(peerHash) === next) {
3445
+ this._replicationInfoApplyQueueByPeer.delete(peerHash);
3446
+ }
3447
+ });
3448
+ }
3449
+ cancelReplicationInfoRequests(peerHash) {
3450
+ const state = this._replicationInfoRequestByPeer.get(peerHash);
3451
+ if (!state)
3452
+ return;
3453
+ if (state.timer) {
3454
+ clearTimeout(state.timer);
3455
+ }
3456
+ this._replicationInfoRequestByPeer.delete(peerHash);
3457
+ }
3458
+ scheduleReplicationInfoRequests(peer) {
3459
+ const peerHash = peer.hashcode();
3460
+ if (this._replicationInfoRequestByPeer.has(peerHash)) {
3461
+ return;
3462
+ }
3463
+ const state = {
3464
+ attempts: 0,
3465
+ };
3466
+ this._replicationInfoRequestByPeer.set(peerHash, state);
3467
+ const intervalMs = Math.max(50, this.waitForReplicatorRequestIntervalMs);
3468
+ const maxAttempts = Math.min(5, this.waitForReplicatorRequestMaxAttempts ??
3469
+ WAIT_FOR_REPLICATOR_REQUEST_MIN_ATTEMPTS);
3470
+ const tick = () => {
3471
+ if (this.closed || this._closeController.signal.aborted) {
3472
+ this.cancelReplicationInfoRequests(peerHash);
3473
+ return;
3474
+ }
3475
+ state.attempts++;
3476
+ this.rpc
3477
+ .send(new RequestReplicationInfoMessage(), {
3478
+ mode: new AcknowledgeDelivery({ redundancy: 1, to: [peer] }),
3479
+ })
3480
+ .catch((e) => {
3481
+ // Best-effort: missing peers / unopened RPC should not fail join flows.
3482
+ if (isNotStartedError(e)) {
3483
+ return;
3484
+ }
3485
+ logger.error(e?.toString?.() ?? String(e));
3486
+ });
3487
+ if (state.attempts >= maxAttempts) {
3488
+ this.cancelReplicationInfoRequests(peerHash);
3489
+ return;
3490
+ }
3491
+ state.timer = setTimeout(tick, intervalMs);
3492
+ state.timer.unref?.();
3493
+ };
3494
+ tick();
3495
+ }
2712
3496
  async handleSubscriptionChange(publicKey, topics, subscribed) {
2713
3497
  if (!topics.includes(this.topic)) {
2714
3498
  return;
2715
3499
  }
3500
+ const peerHash = publicKey.hashcode();
3501
+ if (subscribed) {
3502
+ this._replicationInfoBlockedPeers.delete(peerHash);
3503
+ }
3504
+ else {
3505
+ this._replicationInfoBlockedPeers.add(peerHash);
3506
+ }
2716
3507
  if (!subscribed) {
2717
- this.removePeerFromGidPeerHistory(publicKey.hashcode());
3508
+ // Emit replicator:leave at most once per (join -> leave) transition, even if we
3509
+ // concurrently process unsubscribe + replication reset messages for the same peer.
3510
+ const stoppedTransition = this.uniqueReplicators.delete(peerHash);
3511
+ this._replicatorJoinEmitted.delete(peerHash);
3512
+ this.cancelReplicationInfoRequests(peerHash);
3513
+ this.removePeerFromGidPeerHistory(peerHash);
2718
3514
  for (const [k, v] of this._requestIPruneSent) {
2719
- v.delete(publicKey.hashcode());
3515
+ v.delete(peerHash);
2720
3516
  if (v.size === 0) {
2721
3517
  this._requestIPruneSent.delete(k);
2722
3518
  }
2723
3519
  }
2724
3520
  for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
2725
- v.delete(publicKey.hashcode());
3521
+ v.delete(peerHash);
2726
3522
  if (v.size === 0) {
2727
3523
  this._requestIPruneResponseReplicatorSet.delete(k);
2728
3524
  }
2729
3525
  }
2730
3526
  this.syncronizer.onPeerDisconnected(publicKey);
2731
- (await this.replicationIndex.count({
2732
- query: { hash: publicKey.hashcode() },
2733
- })) > 0 &&
3527
+ stoppedTransition &&
2734
3528
  this.events.dispatchEvent(new CustomEvent("replicator:leave", {
2735
3529
  detail: { publicKey },
2736
3530
  }));
@@ -2742,14 +3536,14 @@ let SharedLog = (() => {
2742
3536
  .send(new AllReplicatingSegmentsMessage({
2743
3537
  segments: replicationSegments.map((x) => x.toReplicationRange()),
2744
3538
  }), {
2745
- mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
3539
+ mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
2746
3540
  })
2747
3541
  .catch((e) => logger.error(e.toString()));
2748
3542
  if (this.v8Behaviour) {
2749
3543
  // for backwards compatibility
2750
3544
  this.rpc
2751
3545
  .send(new ResponseRoleMessage({ role: await this.getRole() }), {
2752
- mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
3546
+ mode: new AcknowledgeDelivery({ redundancy: 1, to: [publicKey] }),
2753
3547
  })
2754
3548
  .catch((e) => logger.error(e.toString()));
2755
3549
  }
@@ -2757,11 +3551,7 @@ let SharedLog = (() => {
2757
3551
  // Request the remote peer's replication info. This makes joins resilient to
2758
3552
  // timing-sensitive delivery/order issues where we may miss their initial
2759
3553
  // replication announcement.
2760
- this.rpc
2761
- .send(new RequestReplicationInfoMessage(), {
2762
- mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
2763
- })
2764
- .catch((e) => logger.error(e.toString()));
3554
+ this.scheduleReplicationInfoRequests(publicKey);
2765
3555
  }
2766
3556
  else {
2767
3557
  await this.removeReplicator(publicKey);
@@ -2815,6 +3605,7 @@ let SharedLog = (() => {
2815
3605
  const promises = [];
2816
3606
  let peerToEntries = new Map();
2817
3607
  let cleanupTimer = [];
3608
+ const explicitTimeout = options?.timeout != null;
2818
3609
  for (const { entry, leaders } of entries.values()) {
2819
3610
  for (const leader of leaders.keys()) {
2820
3611
  let set = peerToEntries.get(leader);
@@ -2826,7 +3617,28 @@ let SharedLog = (() => {
2826
3617
  }
2827
3618
  const pendingPrev = this._pendingDeletes.get(entry.hash);
2828
3619
  if (pendingPrev) {
2829
- promises.push(pendingPrev.promise.promise);
3620
+ // If a background prune is already in-flight, an explicit prune request should
3621
+ // still respect the caller's timeout. Otherwise, tests (and user calls) can
3622
+ // block on the longer "checked prune" timeout derived from
3623
+ // `_respondToIHaveTimeout + waitForReplicatorTimeout`, which is intentionally
3624
+ // large for resiliency.
3625
+ if (explicitTimeout) {
3626
+ const timeoutMs = Math.max(0, Math.floor(options?.timeout ?? 0));
3627
+ promises.push(new Promise((resolve, reject) => {
3628
+ // Mirror the checked-prune error prefix so existing callers/tests can
3629
+ // match on the message substring.
3630
+ const timer = setTimeout(() => {
3631
+ reject(new Error(`Timeout for checked pruning after ${timeoutMs}ms (pending=true closed=${this.closed})`));
3632
+ }, timeoutMs);
3633
+ timer.unref?.();
3634
+ pendingPrev.promise.promise
3635
+ .then(resolve, reject)
3636
+ .finally(() => clearTimeout(timer));
3637
+ }));
3638
+ }
3639
+ else {
3640
+ promises.push(pendingPrev.promise.promise);
3641
+ }
2830
3642
  continue;
2831
3643
  }
2832
3644
  const minReplicas = decodeReplicas(entry);
@@ -2840,6 +3652,7 @@ let SharedLog = (() => {
2840
3652
  };
2841
3653
  const resolve = () => {
2842
3654
  clear();
3655
+ this.clearCheckedPruneRetry(entry.hash);
2843
3656
  cleanupTimer.push(setTimeout(async () => {
2844
3657
  this._gidPeersHistory.delete(entry.meta.gid);
2845
3658
  this.removePruneRequestSent(entry.hash);
@@ -2877,6 +3690,12 @@ let SharedLog = (() => {
2877
3690
  };
2878
3691
  const reject = (e) => {
2879
3692
  clear();
3693
+ const isCheckedPruneTimeout = e instanceof Error &&
3694
+ typeof e.message === "string" &&
3695
+ e.message.startsWith("Timeout for checked pruning");
3696
+ if (explicitTimeout || !isCheckedPruneTimeout) {
3697
+ this.clearCheckedPruneRetry(entry.hash);
3698
+ }
2880
3699
  this.removePruneRequestSent(entry.hash);
2881
3700
  this._requestIPruneResponseReplicatorSet.delete(entry.hash);
2882
3701
  deferredPromise.reject(e);
@@ -2892,6 +3711,12 @@ let SharedLog = (() => {
2892
3711
  this.waitForReplicatorTimeout +
2893
3712
  PRUNE_DEBOUNCE_INTERVAL * 2);
2894
3713
  const timeout = setTimeout(() => {
3714
+ // For internal/background prune flows (no explicit timeout), retry a few times
3715
+ // to avoid "permanently prunable" entries when `_pendingIHave` expires under
3716
+ // heavy load.
3717
+ if (!explicitTimeout) {
3718
+ this.scheduleCheckedPruneRetry({ entry, leaders });
3719
+ }
2895
3720
  reject(new Error(`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`));
2896
3721
  }, checkedPruneTimeoutMs);
2897
3722
  timeout.unref?.();
@@ -2921,6 +3746,8 @@ let SharedLog = (() => {
2921
3746
  this._requestIPruneResponseReplicatorSet.set(entry.hash, existCounter);
2922
3747
  }
2923
3748
  existCounter.add(publicKeyHash);
3749
+ // Seed provider hints so future remote reads can avoid extra round-trips.
3750
+ this.remoteBlocks.hintProviders(entry.hash, [publicKeyHash]);
2924
3751
  if (minReplicasValue <= existCounter.size) {
2925
3752
  resolve();
2926
3753
  }
@@ -2958,6 +3785,37 @@ let SharedLog = (() => {
2958
3785
  for (const [k, v] of peerToEntries) {
2959
3786
  emitMessages(v, k);
2960
3787
  }
3788
+ // Keep remote `_pendingIHave` alive in the common "leader doesn't have entry yet"
3789
+ // case. This is intentionally disabled when an explicit timeout is provided to
3790
+ // preserve unit tests that assert remote `_pendingIHave` clears promptly.
3791
+ if (!explicitTimeout && peerToEntries.size > 0) {
3792
+ const respondToIHaveTimeout = Number(this._respondToIHaveTimeout ?? 0);
3793
+ const resendIntervalMs = Math.min(CHECKED_PRUNE_RESEND_INTERVAL_MAX_MS, Math.max(CHECKED_PRUNE_RESEND_INTERVAL_MIN_MS, Math.floor(respondToIHaveTimeout / 2) || 1_000));
3794
+ let inFlight = false;
3795
+ const timer = setInterval(() => {
3796
+ if (inFlight)
3797
+ return;
3798
+ if (this.closed)
3799
+ return;
3800
+ const pendingByPeer = [];
3801
+ for (const [peer, hashes] of peerToEntries) {
3802
+ const pending = hashes.filter((h) => this._pendingDeletes.has(h));
3803
+ if (pending.length > 0) {
3804
+ pendingByPeer.push([peer, pending]);
3805
+ }
3806
+ }
3807
+ if (pendingByPeer.length === 0) {
3808
+ clearInterval(timer);
3809
+ return;
3810
+ }
3811
+ inFlight = true;
3812
+ Promise.allSettled(pendingByPeer.map(([peer, hashes]) => emitMessages(hashes, peer).catch(() => { }))).finally(() => {
3813
+ inFlight = false;
3814
+ });
3815
+ }, resendIntervalMs);
3816
+ timer.unref?.();
3817
+ cleanupTimer.push(timer);
3818
+ }
2961
3819
  let cleanup = () => {
2962
3820
  for (const timer of cleanupTimer) {
2963
3821
  clearTimeout(timer);
@@ -3014,14 +3872,31 @@ let SharedLog = (() => {
3014
3872
  return;
3015
3873
  }
3016
3874
  await this.log.trim();
3875
+ const batchedChanges = Array.isArray(changeOrChanges[0])
3876
+ ? changeOrChanges
3877
+ : [changeOrChanges];
3878
+ const changes = batchedChanges.flat();
3879
+ // On removed ranges (peer leaves / shrink), gid-level history can hide
3880
+ // per-entry gaps. Force a fresh delivery pass for reassigned entries.
3881
+ const forceFreshDelivery = changes.some((change) => change.type === "removed");
3882
+ const gidPeersHistorySnapshot = new Map();
3017
3883
  const changed = false;
3018
3884
  try {
3019
3885
  const uncheckedDeliver = new Map();
3020
- for await (const entryReplicated of toRebalance(changeOrChanges, this.entryCoordinatesIndex, this.recentlyRebalanced)) {
3886
+ for await (const entryReplicated of toRebalance(changes, this.entryCoordinatesIndex, this.recentlyRebalanced)) {
3021
3887
  if (this.closed) {
3022
3888
  break;
3023
3889
  }
3024
- let oldPeersSet = this._gidPeersHistory.get(entryReplicated.gid);
3890
+ let oldPeersSet;
3891
+ if (!forceFreshDelivery) {
3892
+ const gid = entryReplicated.gid;
3893
+ oldPeersSet = gidPeersHistorySnapshot.get(gid);
3894
+ if (!gidPeersHistorySnapshot.has(gid)) {
3895
+ const existing = this._gidPeersHistory.get(gid);
3896
+ oldPeersSet = existing ? new Set(existing) : undefined;
3897
+ gidPeersHistorySnapshot.set(gid, oldPeersSet);
3898
+ }
3899
+ }
3025
3900
  let isLeader = false;
3026
3901
  let currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, {
3027
3902
  // we do this to make sure new replicators get data even though they are not mature so they can figure out if they want to replicate more or less
@@ -3085,12 +3960,28 @@ let SharedLog = (() => {
3085
3960
  }
3086
3961
  async _onUnsubscription(evt) {
3087
3962
  logger.trace(`Peer disconnected '${evt.detail.from.hashcode()}' from '${JSON.stringify(evt.detail.topics.map((x) => x))} '`);
3088
- this.latestReplicationInfoMessage.delete(evt.detail.from.hashcode());
3963
+ if (!evt.detail.topics.includes(this.topic)) {
3964
+ return;
3965
+ }
3966
+ const fromHash = evt.detail.from.hashcode();
3967
+ this._replicationInfoBlockedPeers.add(fromHash);
3968
+ // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
3969
+ // prevents late/out-of-order replication-info messages from re-introducing
3970
+ // stale segments for a peer that has already left the topic.
3971
+ const now = BigInt(+new Date());
3972
+ const prev = this.latestReplicationInfoMessage.get(fromHash);
3973
+ if (!prev || prev < now) {
3974
+ this.latestReplicationInfoMessage.set(fromHash, now);
3975
+ }
3089
3976
  return this.handleSubscriptionChange(evt.detail.from, evt.detail.topics, false);
3090
3977
  }
3091
3978
  async _onSubscription(evt) {
3092
3979
  logger.trace(`New peer '${evt.detail.from.hashcode()}' connected to '${JSON.stringify(evt.detail.topics.map((x) => x))}'`);
3980
+ if (!evt.detail.topics.includes(this.topic)) {
3981
+ return;
3982
+ }
3093
3983
  this.remoteBlocks.onReachable(evt.detail.from);
3984
+ this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
3094
3985
  return this.handleSubscriptionChange(evt.detail.from, evt.detail.topics, true);
3095
3986
  }
3096
3987
  async rebalanceParticipation() {