@peerbit/shared-log 12.3.5-3f16953 → 12.3.5-42e98ce

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/index.js CHANGED
@@ -216,6 +216,20 @@ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
216
216
  const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
217
217
  const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
218
218
  const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
219
+ const RECENT_REPAIR_DISPATCH_TTL_MS = 5_000;
220
+ const REPAIR_SWEEP_ENTRY_BATCH_SIZE = 1_000;
221
+ const REPAIR_SWEEP_TARGET_BUFFER_SIZE = 1024;
222
+ const FORCE_FRESH_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
223
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000];
224
+ const toPositiveInteger = (value, fallback, label) => {
225
+ if (value == null) {
226
+ return fallback;
227
+ }
228
+ if (!Number.isFinite(value) || value <= 0) {
229
+ throw new Error(`${label} must be a positive number`);
230
+ }
231
+ return Math.max(1, Math.floor(value));
232
+ };
219
233
  const DEFAULT_SHARED_LOG_FANOUT_CHANNEL_OPTIONS = {
220
234
  msgRate: 30,
221
235
  msgSize: 1024,
@@ -305,6 +319,11 @@ let SharedLog = (() => {
305
319
  _requestIPruneResponseReplicatorSet; // tracks entry hash to peer hash
306
320
  _checkedPruneRetries;
307
321
  replicationChangeDebounceFn;
322
+ _repairRetryTimers;
323
+ _recentRepairDispatch;
324
+ _repairSweepRunning;
325
+ _repairSweepForceFreshPending;
326
+ _repairSweepAddedPeersPending;
308
327
  // regular distribution checks
309
328
  distributeQueue;
310
329
  syncronizer;
@@ -316,6 +335,7 @@ let SharedLog = (() => {
316
335
  waitForReplicatorRequestMaxAttempts;
317
336
  waitForPruneDelay;
318
337
  distributionDebounceTime;
338
+ repairSweepTargetBufferSize;
319
339
  replicationController;
320
340
  history;
321
341
  domain;
@@ -486,22 +506,23 @@ let SharedLog = (() => {
486
506
  const delivery = deliveryArg === undefined || deliveryArg === false
487
507
  ? undefined
488
508
  : deliveryArg === true
489
- ? {}
509
+ ? { reliability: "ack" }
490
510
  : deliveryArg;
491
511
  if (!delivery) {
492
512
  return {
493
513
  delivery: undefined,
514
+ reliability: "best-effort",
494
515
  requireRecipients: false,
495
- settleMin: undefined,
516
+ minAcks: undefined,
496
517
  wrap: undefined,
497
518
  };
498
519
  }
499
- const deliverySettle = delivery.settle ?? true;
520
+ const reliability = delivery.reliability ?? "ack";
500
521
  const deliveryTimeout = delivery.timeout;
501
522
  const deliverySignal = delivery.signal;
502
523
  const requireRecipients = delivery.requireRecipients === true;
503
- const settleMin = typeof deliverySettle === "object" && Number.isFinite(deliverySettle.min)
504
- ? Math.max(0, Math.floor(deliverySettle.min))
524
+ const minAcks = delivery.minAcks != null && Number.isFinite(delivery.minAcks)
525
+ ? Math.max(0, Math.floor(delivery.minAcks))
505
526
  : undefined;
506
527
  const wrap = deliveryTimeout == null && deliverySignal == null
507
528
  ? undefined
@@ -562,13 +583,83 @@ let SharedLog = (() => {
562
583
  });
563
584
  return {
564
585
  delivery,
586
+ reliability,
565
587
  requireRecipients,
566
- settleMin,
588
+ minAcks,
567
589
  wrap,
568
590
  };
569
591
  }
592
+ async _getSortedRouteHints(targetHash) {
593
+ const pubsub = this.node.services.pubsub;
594
+ const maybeHints = await pubsub?.getUnifiedRouteHints?.(this.topic, targetHash);
595
+ const hints = Array.isArray(maybeHints) ? maybeHints : [];
596
+ const now = Date.now();
597
+ return hints
598
+ .filter((hint) => hint.expiresAt == null || hint.expiresAt > now)
599
+ .sort((a, b) => {
600
+ const rankA = a.kind === "directstream-ack" ? 0 : 1;
601
+ const rankB = b.kind === "directstream-ack" ? 0 : 1;
602
+ if (rankA !== rankB) {
603
+ return rankA - rankB;
604
+ }
605
+ const costA = a.kind === "directstream-ack"
606
+ ? a.distance
607
+ : Math.max(0, (a.route?.length ?? 1) - 1);
608
+ const costB = b.kind === "directstream-ack"
609
+ ? b.distance
610
+ : Math.max(0, (b.route?.length ?? 1) - 1);
611
+ if (costA !== costB) {
612
+ return costA - costB;
613
+ }
614
+ return (b.updatedAt ?? 0) - (a.updatedAt ?? 0);
615
+ });
616
+ }
617
+ async _sendAckWithUnifiedHints(properties) {
618
+ const { peer, message, payload, fanoutUnicastOptions } = properties;
619
+ const hints = await this._getSortedRouteHints(peer);
620
+ const hasDirectHint = hints.some((hint) => hint.kind === "directstream-ack");
621
+ const fanoutHint = hints.find((hint) => hint.kind === "fanout-token");
622
+ if (hasDirectHint) {
623
+ try {
624
+ await this.rpc.send(message, {
625
+ mode: new AcknowledgeDelivery({
626
+ redundancy: 1,
627
+ to: [peer],
628
+ }),
629
+ });
630
+ return;
631
+ }
632
+ catch {
633
+ // Fall back to fanout token/direct fanout unicast below.
634
+ }
635
+ }
636
+ if (fanoutHint && this._fanoutChannel) {
637
+ try {
638
+ await this._fanoutChannel.unicastAck(fanoutHint.route, payload, fanoutUnicastOptions);
639
+ return;
640
+ }
641
+ catch {
642
+ // Fall back below.
643
+ }
644
+ }
645
+ if (this._fanoutChannel) {
646
+ try {
647
+ await this._fanoutChannel.unicastToAck(peer, payload, fanoutUnicastOptions);
648
+ return;
649
+ }
650
+ catch {
651
+ // Fall back below.
652
+ }
653
+ }
654
+ await this.rpc.send(message, {
655
+ mode: new AcknowledgeDelivery({
656
+ redundancy: 1,
657
+ to: [peer],
658
+ }),
659
+ });
660
+ }
570
661
  async _appendDeliverToReplicators(entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
571
- const { delivery, requireRecipients, settleMin, wrap } = this._parseDeliveryOptions(deliveryArg);
662
+ const { delivery, reliability, requireRecipients, minAcks, wrap } = this._parseDeliveryOptions(deliveryArg);
572
663
  const pending = [];
573
664
  const track = (promise) => {
574
665
  pending.push(wrap ? wrap(promise) : promise);
@@ -580,7 +671,28 @@ let SharedLog = (() => {
580
671
  await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
581
672
  const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
582
673
  const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
583
- const hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
674
+ let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
675
+ const allowSubscriberFallback = this.syncronizer instanceof SimpleSyncronizer ||
676
+ (this.compatibility ?? Number.MAX_VALUE) < 10;
677
+ if (!hasRemotePeers && allowSubscriberFallback) {
678
+ try {
679
+ const subscribers = await this._getTopicSubscribers(this.topic);
680
+ if (subscribers && subscribers.length > 0) {
681
+ for (const subscriber of subscribers) {
682
+ const hash = subscriber.hashcode();
683
+ if (hash === selfHash) {
684
+ continue;
685
+ }
686
+ set.add(hash);
687
+ leadersForDelivery?.add(hash);
688
+ }
689
+ hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
690
+ }
691
+ }
692
+ catch {
693
+ // Best-effort only; keep discovered recipients as-is.
694
+ }
695
+ }
584
696
  if (!hasRemotePeers) {
585
697
  if (requireRecipients) {
586
698
  throw new NoPeersError(this.rpc.topic);
@@ -617,7 +729,9 @@ let SharedLog = (() => {
617
729
  let silentTo;
618
730
  // Default delivery semantics: require enough remote ACKs to reach the requested
619
731
  // replication degree (local append counts as 1).
620
- const ackLimit = settleMin == null ? Math.max(0, minReplicasValue - 1) : settleMin;
732
+ const defaultMinAcks = Math.max(0, minReplicasValue - 1);
733
+ const ackLimitRaw = reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
734
+ const ackLimit = Math.max(0, Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length));
621
735
  for (const peer of orderedRemoteRecipients) {
622
736
  if (ackTo.length < ackLimit) {
623
737
  ackTo.push(peer);
@@ -637,36 +751,11 @@ let SharedLog = (() => {
637
751
  const payload = serialize(message);
638
752
  for (const peer of ackTo) {
639
753
  track((async () => {
640
- // Unified decision point:
641
- // - If we can prove a cheap direct path (connected or routed), use it.
642
- // - Otherwise, fall back to the fanout unicast ACK path (bounded overlay routing).
643
- // - If that fails, fall back to pubsub/RPC routing which may flood to discover routes.
644
- const pubsub = this.node.services.pubsub;
645
- const canDirectFast = Boolean(pubsub?.peers?.get?.(peer)?.isWritable) ||
646
- Boolean(pubsub?.routes?.isReachable?.(pubsub?.publicKeyHash, peer, 0));
647
- if (canDirectFast) {
648
- await this.rpc.send(message, {
649
- mode: new AcknowledgeDelivery({
650
- redundancy: 1,
651
- to: [peer],
652
- }),
653
- });
654
- return;
655
- }
656
- if (this._fanoutChannel) {
657
- try {
658
- await this._fanoutChannel.unicastToAck(peer, payload, fanoutUnicastOptions);
659
- return;
660
- }
661
- catch {
662
- // fall back below
663
- }
664
- }
665
- await this.rpc.send(message, {
666
- mode: new AcknowledgeDelivery({
667
- redundancy: 1,
668
- to: [peer],
669
- }),
754
+ await this._sendAckWithUnifiedHints({
755
+ peer,
756
+ message,
757
+ payload,
758
+ fanoutUnicastOptions,
670
759
  });
671
760
  })());
672
761
  }
@@ -1159,6 +1248,13 @@ let SharedLog = (() => {
1159
1248
  }
1160
1249
  this.pendingMaturity.delete(keyHash);
1161
1250
  }
1251
+ // Keep local sync/prune state consistent even when a peer disappears
1252
+ // through replication-info updates without a topic unsubscribe event.
1253
+ this.removePeerFromGidPeerHistory(keyHash);
1254
+ this._recentRepairDispatch.delete(keyHash);
1255
+ if (!isMe) {
1256
+ this.syncronizer.onPeerDisconnected(keyHash);
1257
+ }
1162
1258
  if (!isMe) {
1163
1259
  this.rebalanceParticipationDebounced?.call();
1164
1260
  }
@@ -1522,6 +1618,168 @@ let SharedLog = (() => {
1522
1618
  }
1523
1619
  return set;
1524
1620
  }
1621
+ dispatchMaybeMissingEntries(target, entries, options) {
1622
+ if (entries.size === 0) {
1623
+ return;
1624
+ }
1625
+ const now = Date.now();
1626
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
1627
+ if (!recentlyDispatchedByHash) {
1628
+ recentlyDispatchedByHash = new Map();
1629
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
1630
+ }
1631
+ for (const [hash, ts] of recentlyDispatchedByHash) {
1632
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
1633
+ recentlyDispatchedByHash.delete(hash);
1634
+ }
1635
+ }
1636
+ const filteredEntries = options?.bypassRecentDedupe === true
1637
+ ? new Map(entries)
1638
+ : new Map();
1639
+ if (options?.bypassRecentDedupe !== true) {
1640
+ for (const [hash, entry] of entries) {
1641
+ const prev = recentlyDispatchedByHash.get(hash);
1642
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
1643
+ continue;
1644
+ }
1645
+ recentlyDispatchedByHash.set(hash, now);
1646
+ filteredEntries.set(hash, entry);
1647
+ }
1648
+ }
1649
+ else {
1650
+ for (const hash of entries.keys()) {
1651
+ recentlyDispatchedByHash.set(hash, now);
1652
+ }
1653
+ }
1654
+ if (filteredEntries.size === 0) {
1655
+ return;
1656
+ }
1657
+ const run = () => Promise.resolve(this.syncronizer.onMaybeMissingEntries({
1658
+ entries: filteredEntries,
1659
+ targets: [target],
1660
+ })).catch((error) => logger.error(error));
1661
+ const retrySchedule = options?.retryScheduleMs && options.retryScheduleMs.length > 0
1662
+ ? options.retryScheduleMs
1663
+ : options?.forceFreshDelivery
1664
+ ? FORCE_FRESH_RETRY_SCHEDULE_MS
1665
+ : [0];
1666
+ for (const delayMs of retrySchedule) {
1667
+ if (delayMs === 0) {
1668
+ void run();
1669
+ continue;
1670
+ }
1671
+ const timer = setTimeout(() => {
1672
+ this._repairRetryTimers.delete(timer);
1673
+ if (this.closed) {
1674
+ return;
1675
+ }
1676
+ void run();
1677
+ }, delayMs);
1678
+ timer.unref?.();
1679
+ this._repairRetryTimers.add(timer);
1680
+ }
1681
+ }
1682
+ scheduleRepairSweep(options) {
1683
+ if (options.forceFreshDelivery) {
1684
+ this._repairSweepForceFreshPending = true;
1685
+ }
1686
+ for (const peer of options.addedPeers) {
1687
+ this._repairSweepAddedPeersPending.add(peer);
1688
+ }
1689
+ if (!this._repairSweepRunning && !this.closed) {
1690
+ this._repairSweepRunning = true;
1691
+ void this.runRepairSweep();
1692
+ }
1693
+ }
1694
+ async runRepairSweep() {
1695
+ try {
1696
+ while (!this.closed) {
1697
+ const forceFreshDelivery = this._repairSweepForceFreshPending;
1698
+ const addedPeers = new Set(this._repairSweepAddedPeersPending);
1699
+ this._repairSweepForceFreshPending = false;
1700
+ this._repairSweepAddedPeersPending.clear();
1701
+ if (!forceFreshDelivery && addedPeers.size === 0) {
1702
+ return;
1703
+ }
1704
+ const pendingByTarget = new Map();
1705
+ const flushTarget = (target) => {
1706
+ const entries = pendingByTarget.get(target);
1707
+ if (!entries || entries.size === 0) {
1708
+ return;
1709
+ }
1710
+ const isJoinWarmupTarget = addedPeers.has(target);
1711
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
1712
+ this.dispatchMaybeMissingEntries(target, entries, {
1713
+ bypassRecentDedupe,
1714
+ retryScheduleMs: isJoinWarmupTarget
1715
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
1716
+ : undefined,
1717
+ forceFreshDelivery,
1718
+ });
1719
+ pendingByTarget.delete(target);
1720
+ };
1721
+ const queueEntryForTarget = (target, entry) => {
1722
+ let set = pendingByTarget.get(target);
1723
+ if (!set) {
1724
+ set = new Map();
1725
+ pendingByTarget.set(target, set);
1726
+ }
1727
+ if (set.has(entry.hash)) {
1728
+ return;
1729
+ }
1730
+ set.set(entry.hash, entry);
1731
+ if (set.size >= this.repairSweepTargetBufferSize) {
1732
+ flushTarget(target);
1733
+ }
1734
+ };
1735
+ const iterator = this.entryCoordinatesIndex.iterate({});
1736
+ try {
1737
+ while (!this.closed && !iterator.done()) {
1738
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
1739
+ for (const entry of entries) {
1740
+ const entryReplicated = entry.value;
1741
+ const currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
1742
+ if (forceFreshDelivery) {
1743
+ for (const [currentPeer] of currentPeers) {
1744
+ if (currentPeer === this.node.identity.publicKey.hashcode()) {
1745
+ continue;
1746
+ }
1747
+ queueEntryForTarget(currentPeer, entryReplicated);
1748
+ }
1749
+ }
1750
+ if (addedPeers.size > 0) {
1751
+ for (const peer of addedPeers) {
1752
+ if (currentPeers.has(peer)) {
1753
+ queueEntryForTarget(peer, entryReplicated);
1754
+ }
1755
+ }
1756
+ }
1757
+ }
1758
+ }
1759
+ }
1760
+ finally {
1761
+ await iterator.close();
1762
+ }
1763
+ for (const target of [...pendingByTarget.keys()]) {
1764
+ flushTarget(target);
1765
+ }
1766
+ }
1767
+ }
1768
+ catch (error) {
1769
+ if (!isNotStartedError(error)) {
1770
+ logger.error(`Repair sweep failed: ${error?.message ?? error}`);
1771
+ }
1772
+ }
1773
+ finally {
1774
+ this._repairSweepRunning = false;
1775
+ if (!this.closed &&
1776
+ (this._repairSweepForceFreshPending ||
1777
+ this._repairSweepAddedPeersPending.size > 0)) {
1778
+ this._repairSweepRunning = true;
1779
+ void this.runRepairSweep();
1780
+ }
1781
+ }
1782
+ }
1525
1783
  async pruneDebouncedFnAddIfNotKeeping(args) {
1526
1784
  if (!this.keep || !(await this.keep(args.value.entry))) {
1527
1785
  return this.pruneDebouncedFn.add(args);
@@ -1693,6 +1951,11 @@ let SharedLog = (() => {
1693
1951
  this._replicationInfoBlockedPeers = new Set();
1694
1952
  this._replicationInfoRequestByPeer = new Map();
1695
1953
  this._replicationInfoApplyQueueByPeer = new Map();
1954
+ this._repairRetryTimers = new Set();
1955
+ this._recentRepairDispatch = new Map();
1956
+ this._repairSweepRunning = false;
1957
+ this._repairSweepForceFreshPending = false;
1958
+ this._repairSweepAddedPeersPending = new Set();
1696
1959
  this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
1697
1960
  this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
1698
1961
  this.uniqueReplicators = new Set();
@@ -1702,6 +1965,7 @@ let SharedLog = (() => {
1702
1965
  this.oldestOpenTime = this.openTime;
1703
1966
  this.distributionDebounceTime =
1704
1967
  options?.distributionDebounceTime || DEFAULT_DISTRIBUTION_DEBOUNCE_TIME; // expect > 0
1968
+ this.repairSweepTargetBufferSize = toPositiveInteger(options?.sync?.repairSweepTargetBufferSize, REPAIR_SWEEP_TARGET_BUFFER_SIZE, "sync.repairSweepTargetBufferSize");
1705
1969
  this.timeUntilRoleMaturity =
1706
1970
  options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
1707
1971
  this.waitForReplicatorTimeout =
@@ -2298,6 +2562,14 @@ let SharedLog = (() => {
2298
2562
  clearInterval(this.interval);
2299
2563
  this.node.services.pubsub.removeEventListener("subscribe", this._onSubscriptionFn);
2300
2564
  this.node.services.pubsub.removeEventListener("unsubscribe", this._onUnsubscriptionFn);
2565
+ for (const timer of this._repairRetryTimers) {
2566
+ clearTimeout(timer);
2567
+ }
2568
+ this._repairRetryTimers.clear();
2569
+ this._recentRepairDispatch.clear();
2570
+ this._repairSweepRunning = false;
2571
+ this._repairSweepForceFreshPending = false;
2572
+ this._repairSweepAddedPeersPending.clear();
2301
2573
  for (const [_k, v] of this._pendingDeletes) {
2302
2574
  v.clear();
2303
2575
  v.promise.resolve(); // TODO or reject?
@@ -3219,19 +3491,33 @@ let SharedLog = (() => {
3219
3491
  async _waitForReplicators(cursors, entry, waitFor, options = { timeout: this.waitForReplicatorTimeout }) {
3220
3492
  const timeout = options.timeout ?? this.waitForReplicatorTimeout;
3221
3493
  return new Promise((resolve, reject) => {
3494
+ let settled = false;
3222
3495
  const removeListeners = () => {
3223
3496
  this.events.removeEventListener("replication:change", roleListener);
3224
3497
  this.events.removeEventListener("replicator:mature", roleListener); // TODO replication:change event ?
3225
3498
  this._closeController.signal.removeEventListener("abort", abortListener);
3226
3499
  };
3227
- const abortListener = () => {
3500
+ const settleResolve = (value) => {
3501
+ if (settled)
3502
+ return;
3503
+ settled = true;
3228
3504
  removeListeners();
3229
3505
  clearTimeout(timer);
3230
- resolve(false);
3506
+ resolve(value);
3231
3507
  };
3232
- const timer = setTimeout(async () => {
3508
+ const settleReject = (error) => {
3509
+ if (settled)
3510
+ return;
3511
+ settled = true;
3233
3512
  removeListeners();
3234
- resolve(false);
3513
+ clearTimeout(timer);
3514
+ reject(error);
3515
+ };
3516
+ const abortListener = () => {
3517
+ settleResolve(false);
3518
+ };
3519
+ const timer = setTimeout(async () => {
3520
+ settleResolve(false);
3235
3521
  }, timeout);
3236
3522
  const check = async () => {
3237
3523
  let leaderKeys = new Set();
@@ -3251,17 +3537,20 @@ let SharedLog = (() => {
3251
3537
  }
3252
3538
  }
3253
3539
  options?.onLeader && leaderKeys.forEach(options.onLeader);
3254
- removeListeners();
3255
- clearTimeout(timer);
3256
- resolve(leaders);
3540
+ settleResolve(leaders);
3541
+ };
3542
+ const runCheck = () => {
3543
+ void check().catch((error) => {
3544
+ settleReject(error);
3545
+ });
3257
3546
  };
3258
3547
  const roleListener = () => {
3259
- check();
3548
+ runCheck();
3260
3549
  };
3261
3550
  this.events.addEventListener("replication:change", roleListener); // TODO replication:change event ?
3262
3551
  this.events.addEventListener("replicator:mature", roleListener); // TODO replication:change event ?
3263
3552
  this._closeController.signal.addEventListener("abort", abortListener);
3264
- check();
3553
+ runCheck();
3265
3554
  });
3266
3555
  }
3267
3556
  async createCoordinates(entry, minReplicas) {
@@ -3386,8 +3675,8 @@ let SharedLog = (() => {
3386
3675
  const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
3387
3676
  const selfHash = this.node.identity.publicKey.hashcode();
3388
3677
  // Prefer `uniqueReplicators` (replicator cache) as soon as it has any data.
3389
- // Falling back to live pubsub subscribers can include non-replicators and can
3390
- // break delivery/availability when writers are not directly connected.
3678
+ // If it is still warming up (for example, only contains self), supplement with
3679
+ // current subscribers until we have enough candidates for this decision.
3391
3680
  let peerFilter = undefined;
3392
3681
  const selfReplicating = await this.isReplicating();
3393
3682
  if (this.uniqueReplicators.size > 0) {
@@ -3398,6 +3687,23 @@ let SharedLog = (() => {
3398
3687
  else {
3399
3688
  peerFilter.delete(selfHash);
3400
3689
  }
3690
+ try {
3691
+ const subscribers = await this._getTopicSubscribers(this.topic);
3692
+ if (subscribers && subscribers.length > 0) {
3693
+ for (const subscriber of subscribers) {
3694
+ peerFilter.add(subscriber.hashcode());
3695
+ }
3696
+ if (selfReplicating) {
3697
+ peerFilter.add(selfHash);
3698
+ }
3699
+ else {
3700
+ peerFilter.delete(selfHash);
3701
+ }
3702
+ }
3703
+ }
3704
+ catch {
3705
+ // Best-effort only; keep current peerFilter.
3706
+ }
3401
3707
  }
3402
3708
  else {
3403
3709
  try {
@@ -3505,9 +3811,20 @@ let SharedLog = (() => {
3505
3811
  this._replicationInfoBlockedPeers.add(peerHash);
3506
3812
  }
3507
3813
  if (!subscribed) {
3814
+ const wasReplicator = this.uniqueReplicators.has(peerHash);
3815
+ try {
3816
+ // Unsubscribe can race with the peer's final replication reset message.
3817
+ // Proactively evict its ranges so leader selection doesn't keep stale owners.
3818
+ await this.removeReplicator(publicKey, { noEvent: true });
3819
+ }
3820
+ catch (error) {
3821
+ if (!isNotStartedError(error)) {
3822
+ throw error;
3823
+ }
3824
+ }
3508
3825
  // Emit replicator:leave at most once per (join -> leave) transition, even if we
3509
3826
  // concurrently process unsubscribe + replication reset messages for the same peer.
3510
- const stoppedTransition = this.uniqueReplicators.delete(peerHash);
3827
+ const stoppedTransition = wasReplicator;
3511
3828
  this._replicatorJoinEmitted.delete(peerHash);
3512
3829
  this.cancelReplicationInfoRequests(peerHash);
3513
3830
  this.removePeerFromGidPeerHistory(peerHash);
@@ -3876,14 +4193,79 @@ let SharedLog = (() => {
3876
4193
  ? changeOrChanges
3877
4194
  : [changeOrChanges];
3878
4195
  const changes = batchedChanges.flat();
4196
+ const selfHash = this.node.identity.publicKey.hashcode();
3879
4197
  // On removed ranges (peer leaves / shrink), gid-level history can hide
3880
4198
  // per-entry gaps. Force a fresh delivery pass for reassigned entries.
3881
- const forceFreshDelivery = changes.some((change) => change.type === "removed");
4199
+ const forceFreshDelivery = changes.some((change) => change.type === "removed" && change.range.hash !== selfHash);
3882
4200
  const gidPeersHistorySnapshot = new Map();
4201
+ const dedupeCutoff = Date.now() - RECENT_REPAIR_DISPATCH_TTL_MS;
4202
+ for (const [target, hashes] of this._recentRepairDispatch) {
4203
+ for (const [hash, ts] of hashes) {
4204
+ if (ts <= dedupeCutoff) {
4205
+ hashes.delete(hash);
4206
+ }
4207
+ }
4208
+ if (hashes.size === 0) {
4209
+ this._recentRepairDispatch.delete(target);
4210
+ }
4211
+ }
3883
4212
  const changed = false;
4213
+ const replacedPeers = new Set();
4214
+ for (const change of changes) {
4215
+ if (change.type === "replaced" && change.range.hash !== selfHash) {
4216
+ replacedPeers.add(change.range.hash);
4217
+ }
4218
+ }
4219
+ const addedPeers = new Set();
4220
+ for (const change of changes) {
4221
+ if (change.type === "added" || change.type === "replaced") {
4222
+ const hash = change.range.hash;
4223
+ if (hash !== selfHash) {
4224
+ // Range updates can reassign entries to an existing peer shortly after it
4225
+ // already received a subset. Avoid suppressing legitimate follow-up repair.
4226
+ this._recentRepairDispatch.delete(hash);
4227
+ }
4228
+ }
4229
+ if (change.type === "added") {
4230
+ const hash = change.range.hash;
4231
+ if (hash !== selfHash && !replacedPeers.has(hash)) {
4232
+ addedPeers.add(hash);
4233
+ }
4234
+ }
4235
+ }
3884
4236
  try {
3885
4237
  const uncheckedDeliver = new Map();
3886
- for await (const entryReplicated of toRebalance(changes, this.entryCoordinatesIndex, this.recentlyRebalanced)) {
4238
+ const flushUncheckedDeliverTarget = (target) => {
4239
+ const entries = uncheckedDeliver.get(target);
4240
+ if (!entries || entries.size === 0) {
4241
+ return;
4242
+ }
4243
+ const isJoinWarmupTarget = addedPeers.has(target);
4244
+ const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
4245
+ this.dispatchMaybeMissingEntries(target, entries, {
4246
+ bypassRecentDedupe,
4247
+ retryScheduleMs: isJoinWarmupTarget
4248
+ ? JOIN_WARMUP_RETRY_SCHEDULE_MS
4249
+ : undefined,
4250
+ forceFreshDelivery,
4251
+ });
4252
+ uncheckedDeliver.delete(target);
4253
+ };
4254
+ const queueUncheckedDeliver = (target, entry) => {
4255
+ let set = uncheckedDeliver.get(target);
4256
+ if (!set) {
4257
+ set = new Map();
4258
+ uncheckedDeliver.set(target, set);
4259
+ }
4260
+ if (set.has(entry.hash)) {
4261
+ return;
4262
+ }
4263
+ set.set(entry.hash, entry);
4264
+ if (set.size >= this.repairSweepTargetBufferSize) {
4265
+ flushUncheckedDeliverTarget(target);
4266
+ }
4267
+ };
4268
+ for await (const entryReplicated of toRebalance(changes, this.entryCoordinatesIndex, this.recentlyRebalanced, { forceFresh: forceFreshDelivery })) {
3887
4269
  if (this.closed) {
3888
4270
  break;
3889
4271
  }
@@ -3909,14 +4291,7 @@ let SharedLog = (() => {
3909
4291
  continue;
3910
4292
  }
3911
4293
  if (!oldPeersSet?.has(currentPeer)) {
3912
- let set = uncheckedDeliver.get(currentPeer);
3913
- if (!set) {
3914
- set = new Map();
3915
- uncheckedDeliver.set(currentPeer, set);
3916
- }
3917
- if (!set.has(entryReplicated.hash)) {
3918
- set.set(entryReplicated.hash, entryReplicated);
3919
- }
4294
+ queueUncheckedDeliver(currentPeer, entryReplicated);
3920
4295
  }
3921
4296
  }
3922
4297
  if (oldPeersSet) {
@@ -3942,11 +4317,13 @@ let SharedLog = (() => {
3942
4317
  this.removePruneRequestSent(entryReplicated.hash);
3943
4318
  }
3944
4319
  }
3945
- for (const [target, entries] of uncheckedDeliver) {
3946
- this.syncronizer.onMaybeMissingEntries({
3947
- entries,
3948
- targets: [target],
3949
- });
4320
+ if (forceFreshDelivery || addedPeers.size > 0) {
4321
+ // Schedule a coalesced background sweep for churn/join windows instead of
4322
+ // scanning the whole index synchronously on each replication change.
4323
+ this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
4324
+ }
4325
+ for (const target of [...uncheckedDeliver.keys()]) {
4326
+ flushUncheckedDeliverTarget(target);
3950
4327
  }
3951
4328
  return changed;
3952
4329
  }
@@ -3965,6 +4342,7 @@ let SharedLog = (() => {
3965
4342
  }
3966
4343
  const fromHash = evt.detail.from.hashcode();
3967
4344
  this._replicationInfoBlockedPeers.add(fromHash);
4345
+ this._recentRepairDispatch.delete(fromHash);
3968
4346
  // Keep a per-peer timestamp watermark when we observe an unsubscribe. This
3969
4347
  // prevents late/out-of-order replication-info messages from re-introducing
3970
4348
  // stale segments for a peer that has already left the topic.
@@ -3982,7 +4360,7 @@ let SharedLog = (() => {
3982
4360
  }
3983
4361
  this.remoteBlocks.onReachable(evt.detail.from);
3984
4362
  this._replicationInfoBlockedPeers.delete(evt.detail.from.hashcode());
3985
- return this.handleSubscriptionChange(evt.detail.from, evt.detail.topics, true);
4363
+ await this.handleSubscriptionChange(evt.detail.from, evt.detail.topics, true);
3986
4364
  }
3987
4365
  async rebalanceParticipation() {
3988
4366
  // update more participation rate to converge to the average expected rate or bounded by