@peerbit/shared-log 13.1.0 → 13.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/index.js CHANGED
@@ -66,7 +66,7 @@ import {} from "./replication-domain.js";
66
66
  import { AbsoluteReplicas, AddedReplicationSegmentMessage, AllReplicatingSegmentsMessage, MinReplicas, ReplicationPingMessage, ReplicationError, RequestReplicationInfoMessage, ResponseRoleMessage, StoppedReplicating, decodeReplicas, encodeReplicas, maxReplicas, } from "./replication.js";
67
67
  import { Observer, Replicator } from "./role.js";
68
68
  import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
69
- import { SimpleSyncronizer } from "./sync/simple.js";
69
+ import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
70
70
  import { groupByGid } from "./utils.js";
71
71
  const toLocalPublicSignKey = (key) => {
72
72
  if (typeof key === "string") {
@@ -263,10 +263,95 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
263
263
  // Churn/join repair can race with pruning and transient missed sync requests under
264
264
  // heavy event-loop load. Keep retries alive with a longer tail so reassigned
265
265
  // entries are retried after short bursts and slower recovery windows.
266
- const FORCE_FRESH_RETRY_SCHEDULE_MS = [
266
+ const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
267
267
  0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
268
268
  ];
269
- const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000, 15_000];
269
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
270
+ 0,
271
+ 1_000,
272
+ 3_000,
273
+ 7_000,
274
+ 15_000,
275
+ 30_000,
276
+ 60_000,
277
+ ];
278
+ const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
279
+ 0,
280
+ 1_000,
281
+ 3_000,
282
+ 7_000,
283
+ 15_000,
284
+ 30_000,
285
+ 60_000,
286
+ ];
287
+ const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
288
+ const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
289
+ const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
290
+ JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
291
+ 7_000,
292
+ 15_000,
293
+ 30_000,
294
+ ];
295
+ const APPEND_BACKFILL_DELAY_MS = 500;
296
+ const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
297
+ const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
298
+ const REPAIR_DISPATCH_MODES = [
299
+ "join-warmup",
300
+ "join-authoritative",
301
+ "append-backfill",
302
+ "churn",
303
+ ];
304
+ const createRepairMetricBucket = () => ({
305
+ dispatches: 0,
306
+ entries: 0,
307
+ ratelessFirstPasses: 0,
308
+ simpleFallbackPasses: 0,
309
+ });
310
+ const createRepairMetrics = () => ({
311
+ "join-warmup": createRepairMetricBucket(),
312
+ "join-authoritative": createRepairMetricBucket(),
313
+ "append-backfill": createRepairMetricBucket(),
314
+ churn: createRepairMetricBucket(),
315
+ });
316
+ const createRepairPendingPeersByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]));
317
+ const cloneRepairPendingPeersByMode = (pending) => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]));
318
+ const createRepairFrontierByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
319
+ const createRepairActiveTargetsByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]));
320
+ const getRepairRetrySchedule = (mode) => {
321
+ switch (mode) {
322
+ case "join-warmup":
323
+ return JOIN_WARMUP_RETRY_SCHEDULE_MS;
324
+ case "join-authoritative":
325
+ return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
326
+ case "append-backfill":
327
+ return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
328
+ case "churn":
329
+ return CHURN_REPAIR_RETRY_SCHEDULE_MS;
330
+ }
331
+ };
332
+ const resolveRepairRetrySchedule = (mode, override, trackedFrontier = false) => {
333
+ const fallback = getRepairRetrySchedule(mode);
334
+ if (!override || override.length === 0) {
335
+ return fallback;
336
+ }
337
+ if (trackedFrontier &&
338
+ override.length === 1 &&
339
+ override[0] === 0 &&
340
+ fallback.length > 1) {
341
+ // A tracked frontier with only an immediate retry would otherwise stay on
342
+ // attempt 0 forever, which means rateless-only retries and no sparse-tail
343
+ // simple fallback. Keep the immediate seed, then continue with the normal
344
+ // tracked repair schedule.
345
+ return [0, ...fallback.slice(1)];
346
+ }
347
+ return override;
348
+ };
349
+ const getRepairTransportForAttempt = (mode, attemptIndex) => {
350
+ if (mode === "churn") {
351
+ return "simple";
352
+ }
353
+ return attemptIndex === 0 ? "rateless" : "simple";
354
+ };
270
355
  const toPositiveInteger = (value, fallback, label) => {
271
356
  if (value == null) {
272
357
  return fallback;
@@ -375,8 +460,18 @@ let SharedLog = (() => {
375
460
  _repairRetryTimers;
376
461
  _recentRepairDispatch;
377
462
  _repairSweepRunning;
378
- _repairSweepForceFreshPending;
379
- _repairSweepAddedPeersPending;
463
+ _repairSweepPendingModes;
464
+ _repairSweepPendingPeersByMode;
465
+ _repairFrontierByMode;
466
+ _repairFrontierActiveTargetsByMode;
467
+ _repairSweepOptimisticGidPeersPending;
468
+ _entryKnownPeers;
469
+ _joinAuthoritativeRepairTimersByDelay;
470
+ _joinAuthoritativeRepairPeersByDelay;
471
+ _assumeSyncedRepairSuppressedUntil;
472
+ _appendBackfillTimer;
473
+ _appendBackfillPendingByTarget;
474
+ _repairMetrics;
380
475
  _topicSubscribersCache;
381
476
  // regular distribution checks
382
477
  distributeQueue;
@@ -716,7 +811,7 @@ let SharedLog = (() => {
716
811
  }),
717
812
  });
718
813
  }
719
- async _appendDeliverToReplicators(entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
814
+ async _appendDeliverToReplicators(entry, coordinates, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
720
815
  const { delivery, reliability, requireRecipients, minAcks, wrap } = this._parseDeliveryOptions(deliveryArg);
721
816
  const pending = [];
722
817
  const track = (promise) => {
@@ -725,10 +820,32 @@ let SharedLog = (() => {
725
820
  const fanoutUnicastOptions = delivery?.timeout != null || delivery?.signal != null
726
821
  ? { timeoutMs: delivery.timeout, signal: delivery.signal }
727
822
  : undefined;
823
+ const fullReplicaDeliveryCandidates = await this.getFullReplicaRepairCandidates(undefined, {
824
+ includeSubscribers: false,
825
+ });
826
+ if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
827
+ for (const peer of fullReplicaDeliveryCandidates) {
828
+ if (!leaders.has(peer)) {
829
+ leaders.set(peer, { intersecting: true });
830
+ }
831
+ }
832
+ }
833
+ const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
834
+ entry,
835
+ coordinates,
836
+ leaders: leaders,
837
+ replicas: minReplicasValue,
838
+ });
728
839
  for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
729
840
  await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
730
- const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
731
- const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
841
+ const authoritativeRecipients = new Set(leaders.keys());
842
+ const leadersForDelivery = delivery
843
+ ? new Set(authoritativeRecipients)
844
+ : undefined;
845
+ // Outbound append delivery only tells us who we intend to send to, not who has
846
+ // actually stored the entry. Keep this recipient set local so later repair
847
+ // sweeps can still backfill peers that missed the initial delivery.
848
+ const set = new Set(leaders.keys());
732
849
  let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
733
850
  const allowSubscriberFallback = this.syncronizer instanceof SimpleSyncronizer ||
734
851
  (this.compatibility ?? Number.MAX_VALUE) < 10;
@@ -758,6 +875,17 @@ let SharedLog = (() => {
758
875
  continue;
759
876
  }
760
877
  if (!delivery) {
878
+ for (const peer of authoritativeRecipients) {
879
+ if (peer === selfHash) {
880
+ continue;
881
+ }
882
+ // Default live append delivery is still optimistic. If one remote misses
883
+ // the initial heads exchange and the caller did not opt into explicit
884
+ // delivery acks, we still need a targeted backfill source of truth for the
885
+ // authoritative recipients or one entry can get stuck at 2/3 replicas
886
+ // forever. Best-effort fallback subscribers are not repair-worthy.
887
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
888
+ }
761
889
  this.rpc
762
890
  .send(message, {
763
891
  mode: isLeader
@@ -785,12 +913,16 @@ let SharedLog = (() => {
785
913
  }
786
914
  const ackTo = [];
787
915
  let silentTo;
916
+ const repairTargets = new Set();
788
917
  // Default delivery semantics: require enough remote ACKs to reach the requested
789
918
  // replication degree (local append counts as 1).
790
919
  const defaultMinAcks = Math.max(0, minReplicasValue - 1);
791
920
  const ackLimitRaw = reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
792
921
  const ackLimit = Math.max(0, Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length));
793
922
  for (const peer of orderedRemoteRecipients) {
923
+ if (authoritativeRecipients.has(peer)) {
924
+ repairTargets.add(peer);
925
+ }
794
926
  if (ackTo.length < ackLimit) {
795
927
  ackTo.push(peer);
796
928
  }
@@ -825,6 +957,12 @@ let SharedLog = (() => {
825
957
  })
826
958
  .catch((error) => logger.error(error));
827
959
  }
960
+ for (const peer of repairTargets) {
961
+ // Direct append delivery is intentionally optimistic. Queue one delayed,
962
+ // batched maybe-sync pass for the intended recipients so stable 3-peer
963
+ // append workloads do not depend on perfect first-try delivery ordering.
964
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
965
+ }
828
966
  }
829
967
  if (pending.length > 0) {
830
968
  await Promise.all(pending);
@@ -1376,6 +1514,7 @@ let SharedLog = (() => {
1376
1514
  // Keep local sync/prune state consistent even when a peer disappears
1377
1515
  // through replication-info updates without a topic unsubscribe event.
1378
1516
  this.removePeerFromGidPeerHistory(keyHash);
1517
+ this.removeRepairFrontierTarget(keyHash);
1379
1518
  this._recentRepairDispatch.delete(keyHash);
1380
1519
  if (!isMe) {
1381
1520
  this.syncronizer.onPeerDisconnected(keyHash);
@@ -1726,6 +1865,7 @@ let SharedLog = (() => {
1726
1865
  for (const key of this._gidPeersHistory.keys()) {
1727
1866
  this.removePeerFromGidPeerHistory(publicKeyHash, key);
1728
1867
  }
1868
+ this.removePeerFromEntryKnownPeers(publicKeyHash);
1729
1869
  }
1730
1870
  }
1731
1871
  addPeersToGidPeerHistory(gid, publicKeys, reset) {
@@ -1744,10 +1884,343 @@ let SharedLog = (() => {
1744
1884
  }
1745
1885
  return set;
1746
1886
  }
1887
+ markEntriesKnownByPeer(hashes, peer) {
1888
+ for (const hash of hashes) {
1889
+ let peers = this._entryKnownPeers.get(hash);
1890
+ if (!peers) {
1891
+ peers = new Set();
1892
+ this._entryKnownPeers.set(hash, peers);
1893
+ }
1894
+ peers.add(peer);
1895
+ }
1896
+ }
1897
+ removeEntriesKnownByPeer(hashes, peer) {
1898
+ for (const hash of hashes) {
1899
+ const peers = this._entryKnownPeers.get(hash);
1900
+ if (!peers) {
1901
+ continue;
1902
+ }
1903
+ peers.delete(peer);
1904
+ if (peers.size === 0) {
1905
+ this._entryKnownPeers.delete(hash);
1906
+ }
1907
+ }
1908
+ }
1909
+ removePeerFromEntryKnownPeers(peer) {
1910
+ for (const [hash, peers] of this._entryKnownPeers) {
1911
+ peers.delete(peer);
1912
+ if (peers.size === 0) {
1913
+ this._entryKnownPeers.delete(hash);
1914
+ }
1915
+ }
1916
+ }
1917
+ isEntryKnownByPeer(hash, peer) {
1918
+ return this._entryKnownPeers.get(hash)?.has(peer) === true;
1919
+ }
1920
+ markRepairSweepOptimisticPeer(gid, peer) {
1921
+ let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
1922
+ if (!peers) {
1923
+ peers = new Map();
1924
+ this._repairSweepOptimisticGidPeersPending.set(gid, peers);
1925
+ }
1926
+ peers.set(peer, (peers.get(peer) || 0) + 1);
1927
+ }
1928
+ hasPendingRepairSweepOptimisticPeer(gid, peer) {
1929
+ return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
1930
+ }
1931
+ createEntryReplicatedForRepair(properties) {
1932
+ const assignedToRangeBoundary = shouldAssignToRangeBoundary(properties.leaders, properties.replicas);
1933
+ const cidObject = cidifyString(properties.entry.hash);
1934
+ const hashNumber = this.indexableDomain.numbers.bytesToNumber(cidObject.multihash.digest);
1935
+ return new this.indexableDomain.constructorEntry({
1936
+ assignedToRangeBoundary,
1937
+ coordinates: properties.coordinates,
1938
+ meta: properties.entry.meta,
1939
+ hash: properties.entry.hash,
1940
+ hashNumber,
1941
+ });
1942
+ }
1943
+ isAssumeSyncedRepairSuppressed() {
1944
+ return this._assumeSyncedRepairSuppressedUntil > Date.now();
1945
+ }
1946
+ isFrontierTrackedRepairMode(mode) {
1947
+ return mode !== "join-warmup";
1948
+ }
1949
+ async sleepTracked(delayMs) {
1950
+ if (delayMs <= 0) {
1951
+ return;
1952
+ }
1953
+ await new Promise((resolve) => {
1954
+ const timer = setTimeout(() => {
1955
+ this._repairRetryTimers.delete(timer);
1956
+ resolve();
1957
+ }, delayMs);
1958
+ timer.unref?.();
1959
+ this._repairRetryTimers.add(timer);
1960
+ });
1961
+ }
1962
+ queueRepairFrontierEntries(mode, target, entries) {
1963
+ let targets = this._repairFrontierByMode.get(mode);
1964
+ if (!targets) {
1965
+ targets = new Map();
1966
+ this._repairFrontierByMode.set(mode, targets);
1967
+ }
1968
+ let pending = targets.get(target);
1969
+ if (!pending) {
1970
+ pending = new Map();
1971
+ targets.set(target, pending);
1972
+ }
1973
+ for (const [hash, entry] of entries) {
1974
+ pending.set(hash, entry);
1975
+ }
1976
+ }
1977
+ clearRepairFrontierHashes(target, hashes) {
1978
+ const hashList = [...hashes];
1979
+ if (hashList.length === 0) {
1980
+ return;
1981
+ }
1982
+ for (const mode of REPAIR_DISPATCH_MODES) {
1983
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
1984
+ if (!pending) {
1985
+ continue;
1986
+ }
1987
+ for (const hash of hashList) {
1988
+ pending.delete(hash);
1989
+ }
1990
+ if (pending.size === 0) {
1991
+ this._repairFrontierByMode.get(mode)?.delete(target);
1992
+ }
1993
+ }
1994
+ }
1995
+ async getFullReplicaRepairCandidates(extraPeers, options) {
1996
+ const candidates = new Set([
1997
+ this.node.identity.publicKey.hashcode(),
1998
+ ]);
1999
+ try {
2000
+ for (const peer of await this.getReplicators()) {
2001
+ candidates.add(peer);
2002
+ }
2003
+ }
2004
+ catch {
2005
+ for (const peer of this.uniqueReplicators) {
2006
+ candidates.add(peer);
2007
+ }
2008
+ }
2009
+ for (const peer of extraPeers ?? []) {
2010
+ candidates.add(peer);
2011
+ }
2012
+ if (options?.includeSubscribers !== false) {
2013
+ try {
2014
+ for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
2015
+ candidates.add(subscriber.hashcode());
2016
+ }
2017
+ }
2018
+ catch {
2019
+ // Best-effort only; explicit repair peers still keep the path safe.
2020
+ }
2021
+ }
2022
+ return candidates;
2023
+ }
2024
+ removeRepairFrontierTarget(target) {
2025
+ for (const mode of REPAIR_DISPATCH_MODES) {
2026
+ this._repairFrontierByMode.get(mode)?.delete(target);
2027
+ this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
2028
+ }
2029
+ }
2030
+ async sendRepairConfirmation(target, hashes) {
2031
+ const uniqueHashes = [...new Set(hashes)];
2032
+ for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
2033
+ const chunk = uniqueHashes.slice(i, i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE);
2034
+ await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
2035
+ priority: 1,
2036
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2037
+ });
2038
+ }
2039
+ }
2040
+ async pushRepairEntries(target, entries) {
2041
+ for await (const message of createExchangeHeadsMessages(this.log, [...entries.keys()])) {
2042
+ await this.rpc.send(message, {
2043
+ priority: 1,
2044
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2045
+ });
2046
+ }
2047
+ }
2048
+ async sendRepairEntriesWithTransport(target, entries, transport, options) {
2049
+ const unknownEntries = new Map();
2050
+ const knownHashes = [];
2051
+ for (const [hash, entry] of entries) {
2052
+ if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
2053
+ unknownEntries.set(hash, entry);
2054
+ }
2055
+ else {
2056
+ knownHashes.push(hash);
2057
+ }
2058
+ }
2059
+ this.clearRepairFrontierHashes(target, knownHashes);
2060
+ if (unknownEntries.size === 0) {
2061
+ return;
2062
+ }
2063
+ if (transport === "simple") {
2064
+ // Fallback repair should not depend on the target completing the
2065
+ // RequestMaybeSync -> ResponseMaybeSync round trip.
2066
+ await this.pushRepairEntries(target, unknownEntries);
2067
+ return;
2068
+ }
2069
+ await this.syncronizer.onMaybeMissingEntries({
2070
+ entries: unknownEntries,
2071
+ targets: [target],
2072
+ });
2073
+ }
2074
+ async sendMaybeMissingEntriesNow(target, entries, options) {
2075
+ if (entries.size === 0) {
2076
+ return;
2077
+ }
2078
+ const now = Date.now();
2079
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2080
+ if (!recentlyDispatchedByHash) {
2081
+ recentlyDispatchedByHash = new Map();
2082
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2083
+ }
2084
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2085
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2086
+ recentlyDispatchedByHash.delete(hash);
2087
+ }
2088
+ }
2089
+ const filteredEntries = options.bypassRecentDedupe === true
2090
+ ? new Map(entries)
2091
+ : new Map();
2092
+ if (options.bypassRecentDedupe !== true) {
2093
+ for (const [hash, entry] of entries) {
2094
+ const prev = recentlyDispatchedByHash.get(hash);
2095
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2096
+ continue;
2097
+ }
2098
+ recentlyDispatchedByHash.set(hash, now);
2099
+ filteredEntries.set(hash, entry);
2100
+ }
2101
+ }
2102
+ else {
2103
+ for (const hash of entries.keys()) {
2104
+ recentlyDispatchedByHash.set(hash, now);
2105
+ }
2106
+ }
2107
+ if (filteredEntries.size === 0) {
2108
+ return;
2109
+ }
2110
+ const bucket = this._repairMetrics[options.mode];
2111
+ bucket.dispatches += 1;
2112
+ bucket.entries += filteredEntries.size;
2113
+ if (options.transport === "simple") {
2114
+ bucket.simpleFallbackPasses += 1;
2115
+ }
2116
+ else {
2117
+ bucket.ratelessFirstPasses += 1;
2118
+ }
2119
+ await Promise.resolve(this.sendRepairEntriesWithTransport(target, filteredEntries, options.transport, { bypassKnownPeers: options.mode === "churn" })).catch((error) => logger.error(error));
2120
+ }
2121
+ ensureRepairFrontierRunner(mode, target, retryScheduleMs) {
2122
+ const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
2123
+ if (!activeTargets || activeTargets.has(target) || this.closed) {
2124
+ return;
2125
+ }
2126
+ activeTargets.add(target);
2127
+ const retrySchedule = resolveRepairRetrySchedule(mode, retryScheduleMs, this.isFrontierTrackedRepairMode(mode));
2128
+ const steadyStateDelay = retrySchedule.length > 1
2129
+ ? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
2130
+ : Math.max(retrySchedule[0] || 1_000, 1_000);
2131
+ void (async () => {
2132
+ let attemptIndex = 0;
2133
+ try {
2134
+ for (;;) {
2135
+ if (this.closed) {
2136
+ return;
2137
+ }
2138
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
2139
+ if (!pending || pending.size === 0) {
2140
+ return;
2141
+ }
2142
+ if ((mode === "join-warmup" || mode === "join-authoritative") &&
2143
+ this.isAssumeSyncedRepairSuppressed()) {
2144
+ await this.sleepTracked(Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()));
2145
+ continue;
2146
+ }
2147
+ await this.sendMaybeMissingEntriesNow(target, pending, {
2148
+ mode,
2149
+ transport: getRepairTransportForAttempt(mode, attemptIndex),
2150
+ bypassRecentDedupe: true,
2151
+ });
2152
+ const remaining = this._repairFrontierByMode.get(mode)?.get(target);
2153
+ if (!remaining || remaining.size === 0) {
2154
+ return;
2155
+ }
2156
+ const waitMs = attemptIndex + 1 < retrySchedule.length
2157
+ ? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
2158
+ : steadyStateDelay;
2159
+ attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
2160
+ await this.sleepTracked(waitMs);
2161
+ }
2162
+ }
2163
+ finally {
2164
+ activeTargets.delete(target);
2165
+ if (!this.closed &&
2166
+ (this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0) {
2167
+ this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
2168
+ }
2169
+ }
2170
+ })().catch((error) => {
2171
+ activeTargets.delete(target);
2172
+ logger.error(error);
2173
+ });
2174
+ }
2175
+ flushAppendBackfill() {
2176
+ if (this._appendBackfillPendingByTarget.size === 0) {
2177
+ return;
2178
+ }
2179
+ const pending = this._appendBackfillPendingByTarget;
2180
+ this._appendBackfillPendingByTarget = new Map();
2181
+ for (const [target, entries] of pending) {
2182
+ this.dispatchMaybeMissingEntries(target, entries, {
2183
+ mode: "append-backfill",
2184
+ });
2185
+ }
2186
+ }
2187
+ queueAppendBackfill(target, entry) {
2188
+ let entries = this._appendBackfillPendingByTarget.get(target);
2189
+ if (!entries) {
2190
+ entries = new Map();
2191
+ this._appendBackfillPendingByTarget.set(target, entries);
2192
+ }
2193
+ entries.set(entry.hash, entry);
2194
+ if (entries.size >= this.repairSweepTargetBufferSize) {
2195
+ this.flushAppendBackfill();
2196
+ return;
2197
+ }
2198
+ if (this._appendBackfillTimer || this.closed) {
2199
+ return;
2200
+ }
2201
+ const timer = setTimeout(() => {
2202
+ this._repairRetryTimers.delete(timer);
2203
+ if (this._appendBackfillTimer === timer) {
2204
+ this._appendBackfillTimer = undefined;
2205
+ }
2206
+ if (this.closed) {
2207
+ return;
2208
+ }
2209
+ this.flushAppendBackfill();
2210
+ }, APPEND_BACKFILL_DELAY_MS);
2211
+ timer.unref?.();
2212
+ this._repairRetryTimers.add(timer);
2213
+ this._appendBackfillTimer = timer;
2214
+ }
1747
2215
  dispatchMaybeMissingEntries(target, entries, options) {
1748
2216
  if (entries.size === 0) {
1749
2217
  return;
1750
2218
  }
2219
+ if (this.isFrontierTrackedRepairMode(options.mode)) {
2220
+ this.queueRepairFrontierEntries(options.mode, target, entries);
2221
+ this.ensureRepairFrontierRunner(options.mode, target, options.retryScheduleMs);
2222
+ return;
2223
+ }
1751
2224
  const now = Date.now();
1752
2225
  let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
1753
2226
  if (!recentlyDispatchedByHash) {
@@ -1759,10 +2232,10 @@ let SharedLog = (() => {
1759
2232
  recentlyDispatchedByHash.delete(hash);
1760
2233
  }
1761
2234
  }
1762
- const filteredEntries = options?.bypassRecentDedupe === true
2235
+ const filteredEntries = options.bypassRecentDedupe === true
1763
2236
  ? new Map(entries)
1764
2237
  : new Map();
1765
- if (options?.bypassRecentDedupe !== true) {
2238
+ if (options.bypassRecentDedupe !== true) {
1766
2239
  for (const [hash, entry] of entries) {
1767
2240
  const prev = recentlyDispatchedByHash.get(hash);
1768
2241
  if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
@@ -1780,95 +2253,186 @@ let SharedLog = (() => {
1780
2253
  if (filteredEntries.size === 0) {
1781
2254
  return;
1782
2255
  }
1783
- const retrySchedule = options?.retryScheduleMs && options.retryScheduleMs.length > 0
1784
- ? options.retryScheduleMs
1785
- : options?.forceFreshDelivery
1786
- ? FORCE_FRESH_RETRY_SCHEDULE_MS
1787
- : [0];
1788
- const run = () => {
1789
- // For force-fresh churn repair we intentionally bypass rateless IBLT and
1790
- // use simple hash-based sync. This path is a directed "push these hashes
1791
- // to that peer" recovery flow; using simple sync here avoids occasional
1792
- // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
1793
- if (options?.forceFreshDelivery &&
1794
- this.syncronizer instanceof RatelessIBLTSynchronizer) {
1795
- return Promise.resolve(this.syncronizer.simple.onMaybeMissingEntries({
1796
- entries: filteredEntries,
1797
- targets: [target],
1798
- })).catch((error) => logger.error(error));
1799
- }
1800
- return Promise.resolve(this.syncronizer.onMaybeMissingEntries({
1801
- entries: filteredEntries,
1802
- targets: [target],
1803
- })).catch((error) => logger.error(error));
2256
+ if ((options.mode === "join-warmup" ||
2257
+ options.mode === "join-authoritative") &&
2258
+ this.isAssumeSyncedRepairSuppressed()) {
2259
+ return;
2260
+ }
2261
+ const retrySchedule = resolveRepairRetrySchedule(options.mode, options.retryScheduleMs, this.isFrontierTrackedRepairMode(options.mode));
2262
+ const bucket = this._repairMetrics[options.mode];
2263
+ bucket.dispatches += 1;
2264
+ bucket.entries += filteredEntries.size;
2265
+ const run = (transport) => {
2266
+ if (transport === "simple") {
2267
+ bucket.simpleFallbackPasses += 1;
2268
+ }
2269
+ else {
2270
+ bucket.ratelessFirstPasses += 1;
2271
+ }
2272
+ return Promise.resolve(this.sendRepairEntriesWithTransport(target, filteredEntries, transport, { bypassKnownPeers: options.mode === "churn" })).catch((error) => logger.error(error));
1804
2273
  };
1805
- for (const delayMs of retrySchedule) {
2274
+ retrySchedule.forEach((delayMs, index) => {
2275
+ const transport = getRepairTransportForAttempt(options.mode, index);
1806
2276
  if (delayMs === 0) {
1807
- void run();
1808
- continue;
2277
+ void run(transport);
2278
+ return;
1809
2279
  }
1810
2280
  const timer = setTimeout(() => {
1811
2281
  this._repairRetryTimers.delete(timer);
1812
2282
  if (this.closed) {
1813
2283
  return;
1814
2284
  }
1815
- void run();
2285
+ void run(transport);
1816
2286
  }, delayMs);
1817
2287
  timer.unref?.();
1818
2288
  this._repairRetryTimers.add(timer);
1819
- }
2289
+ });
1820
2290
  }
1821
2291
  scheduleRepairSweep(options) {
1822
- if (options.forceFreshDelivery) {
1823
- this._repairSweepForceFreshPending = true;
1824
- }
1825
- for (const peer of options.addedPeers) {
1826
- this._repairSweepAddedPeersPending.add(peer);
2292
+ this._repairSweepPendingModes.add(options.mode);
2293
+ const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
2294
+ if (pendingPeers) {
2295
+ for (const peer of options.peers ?? []) {
2296
+ pendingPeers.add(peer);
2297
+ }
1827
2298
  }
1828
2299
  if (!this._repairSweepRunning && !this.closed) {
1829
2300
  this._repairSweepRunning = true;
1830
2301
  void this.runRepairSweep();
1831
2302
  }
1832
2303
  }
2304
+ scheduleJoinAuthoritativeRepair(peers) {
2305
+ if (this.closed || peers.size === 0) {
2306
+ return;
2307
+ }
2308
+ for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
2309
+ let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
2310
+ if (!pendingPeers) {
2311
+ pendingPeers = new Set();
2312
+ this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
2313
+ }
2314
+ for (const peer of peers) {
2315
+ pendingPeers.add(peer);
2316
+ }
2317
+ if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
2318
+ continue;
2319
+ }
2320
+ const timer = setTimeout(() => {
2321
+ this._repairRetryTimers.delete(timer);
2322
+ this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
2323
+ if (this.closed) {
2324
+ return;
2325
+ }
2326
+ const peersForSweep = new Set(this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? []);
2327
+ this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
2328
+ if (peersForSweep.size === 0) {
2329
+ return;
2330
+ }
2331
+ // A joiner's leader view can still be partial on the first delayed pass
2332
+ // under pubsub jitter. Bounded per-peer rescans widen the authoritative
2333
+ // frontier without adding per-append sweeps.
2334
+ this.scheduleRepairSweep({
2335
+ mode: "join-authoritative",
2336
+ peers: peersForSweep,
2337
+ });
2338
+ }, delayMs);
2339
+ timer.unref?.();
2340
+ this._repairRetryTimers.add(timer);
2341
+ this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
2342
+ }
2343
+ }
1833
2344
  async runRepairSweep() {
1834
2345
  try {
1835
2346
  while (!this.closed) {
1836
- const forceFreshDelivery = this._repairSweepForceFreshPending;
1837
- const addedPeers = new Set(this._repairSweepAddedPeersPending);
1838
- this._repairSweepForceFreshPending = false;
1839
- this._repairSweepAddedPeersPending.clear();
1840
- if (!forceFreshDelivery && addedPeers.size === 0) {
2347
+ const pendingModes = new Set(this._repairSweepPendingModes);
2348
+ const pendingPeersByMode = cloneRepairPendingPeersByMode(this._repairSweepPendingPeersByMode);
2349
+ this._repairSweepPendingModes.clear();
2350
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
2351
+ peers.clear();
2352
+ }
2353
+ if (pendingModes.size === 0) {
1841
2354
  return;
1842
2355
  }
1843
- const pendingByTarget = new Map();
1844
- const flushTarget = (target) => {
1845
- const entries = pendingByTarget.get(target);
2356
+ const optimisticGidPeersByMode = new Map();
2357
+ const optimisticGidPeersConsumedByMode = new Map();
2358
+ for (const mode of pendingModes) {
2359
+ const modePeers = pendingPeersByMode.get(mode);
2360
+ if (!modePeers || modePeers.size === 0) {
2361
+ continue;
2362
+ }
2363
+ const optimisticGidPeers = new Map();
2364
+ const optimisticGidPeersConsumed = new Map();
2365
+ for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
2366
+ let matchedPeers;
2367
+ let matchedCounts;
2368
+ for (const [peer, count] of peerCounts) {
2369
+ if (!modePeers.has(peer)) {
2370
+ continue;
2371
+ }
2372
+ matchedPeers ||= new Set();
2373
+ matchedCounts ||= new Map();
2374
+ matchedPeers.add(peer);
2375
+ matchedCounts.set(peer, count);
2376
+ }
2377
+ if (matchedPeers && matchedCounts) {
2378
+ optimisticGidPeers.set(gid, matchedPeers);
2379
+ optimisticGidPeersConsumed.set(gid, matchedCounts);
2380
+ }
2381
+ }
2382
+ if (optimisticGidPeers.size > 0) {
2383
+ optimisticGidPeersByMode.set(mode, optimisticGidPeers);
2384
+ optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
2385
+ }
2386
+ }
2387
+ const pendingByMode = new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
2388
+ const pendingRepairPeers = new Set();
2389
+ for (const peers of pendingPeersByMode.values()) {
2390
+ for (const peer of peers) {
2391
+ pendingRepairPeers.add(peer);
2392
+ }
2393
+ }
2394
+ const fullReplicaRepairCandidates = await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
2395
+ includeSubscribers: false,
2396
+ });
2397
+ const fullReplicaRepairCandidateCount = Math.max(1, fullReplicaRepairCandidates.size);
2398
+ const nextFrontierByMode = new Map([
2399
+ ["join-authoritative", new Map()],
2400
+ ["churn", new Map()],
2401
+ ]);
2402
+ const flushTarget = (mode, target) => {
2403
+ const targets = pendingByMode.get(mode);
2404
+ const entries = targets?.get(target);
1846
2405
  if (!entries || entries.size === 0) {
1847
2406
  return;
1848
2407
  }
1849
- const isJoinWarmupTarget = addedPeers.has(target);
1850
- const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
1851
2408
  this.dispatchMaybeMissingEntries(target, entries, {
1852
- bypassRecentDedupe,
1853
- retryScheduleMs: isJoinWarmupTarget
1854
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
1855
- : undefined,
1856
- forceFreshDelivery,
2409
+ bypassRecentDedupe: true,
2410
+ mode,
1857
2411
  });
1858
- pendingByTarget.delete(target);
2412
+ targets?.delete(target);
1859
2413
  };
1860
- const queueEntryForTarget = (target, entry) => {
1861
- let set = pendingByTarget.get(target);
2414
+ const queueEntryForTarget = (mode, target, entry) => {
2415
+ const sweepTargets = nextFrontierByMode.get(mode);
2416
+ if (sweepTargets) {
2417
+ let sweepSet = sweepTargets.get(target);
2418
+ if (!sweepSet) {
2419
+ sweepSet = new Map();
2420
+ sweepTargets.set(target, sweepSet);
2421
+ }
2422
+ sweepSet.set(entry.hash, entry);
2423
+ }
2424
+ const targets = pendingByMode.get(mode);
2425
+ let set = targets.get(target);
1862
2426
  if (!set) {
1863
2427
  set = new Map();
1864
- pendingByTarget.set(target, set);
2428
+ targets.set(target, set);
1865
2429
  }
1866
2430
  if (set.has(entry.hash)) {
1867
2431
  return;
1868
2432
  }
1869
2433
  set.set(entry.hash, entry);
1870
2434
  if (set.size >= this.repairSweepTargetBufferSize) {
1871
- flushTarget(target);
2435
+ flushTarget(mode, target);
1872
2436
  }
1873
2437
  };
1874
2438
  const iterator = this.entryCoordinatesIndex.iterate({});
@@ -1877,20 +2441,42 @@ let SharedLog = (() => {
1877
2441
  const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
1878
2442
  for (const entry of entries) {
1879
2443
  const entryReplicated = entry.value;
1880
- const knownPeers = this._gidPeersHistory.get(entryReplicated.gid);
2444
+ const gid = entryReplicated.gid;
2445
+ const knownPeers = this._gidPeersHistory.get(gid);
2446
+ const requestedReplicas = decodeReplicas(entryReplicated).getValue(this);
1881
2447
  const currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
1882
- if (forceFreshDelivery) {
2448
+ if (pendingModes.has("churn")) {
1883
2449
  for (const [currentPeer] of currentPeers) {
1884
2450
  if (currentPeer === this.node.identity.publicKey.hashcode()) {
1885
2451
  continue;
1886
2452
  }
1887
- queueEntryForTarget(currentPeer, entryReplicated);
2453
+ queueEntryForTarget("churn", currentPeer, entryReplicated);
1888
2454
  }
1889
2455
  }
1890
- if (addedPeers.size > 0) {
1891
- for (const peer of addedPeers) {
1892
- if (currentPeers.has(peer) && !knownPeers?.has(peer)) {
1893
- queueEntryForTarget(peer, entryReplicated);
2456
+ for (const mode of pendingModes) {
2457
+ const modePeers = pendingPeersByMode.get(mode);
2458
+ if (!modePeers || modePeers.size === 0) {
2459
+ continue;
2460
+ }
2461
+ const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
2462
+ for (const peer of modePeers) {
2463
+ if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
2464
+ continue;
2465
+ }
2466
+ const wasOptimisticallyAssigned = optimisticPeers?.has(peer) === true;
2467
+ const isCoveredByFullReplicaRepair = mode === "join-authoritative" &&
2468
+ fullReplicaRepairCandidates.has(peer) &&
2469
+ requestedReplicas >= fullReplicaRepairCandidateCount;
2470
+ const shouldQueue = mode === "join-authoritative"
2471
+ ? currentPeers.has(peer) || isCoveredByFullReplicaRepair
2472
+ : wasOptimisticallyAssigned ||
2473
+ (currentPeers.has(peer) && !knownPeers?.has(peer));
2474
+ if (shouldQueue) {
2475
+ // Authoritative join repair must not trust partial gid peer history,
2476
+ // otherwise a late joiner can get stuck with a partial historical
2477
+ // backfill forever. Once we enter the authoritative pass, queue every
2478
+ // entry whose current leader set still includes the added peer.
2479
+ queueEntryForTarget(mode, peer, entryReplicated);
1894
2480
  }
1895
2481
  }
1896
2482
  }
@@ -1900,8 +2486,64 @@ let SharedLog = (() => {
1900
2486
  finally {
1901
2487
  await iterator.close();
1902
2488
  }
1903
- for (const target of [...pendingByTarget.keys()]) {
1904
- flushTarget(target);
2489
+ for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
2490
+ for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
2491
+ const pendingPeerCounts = this._repairSweepOptimisticGidPeersPending.get(gid);
2492
+ if (!pendingPeerCounts) {
2493
+ continue;
2494
+ }
2495
+ for (const [peer, count] of peerCounts) {
2496
+ const current = pendingPeerCounts.get(peer) || 0;
2497
+ const next = current - count;
2498
+ if (next > 0) {
2499
+ pendingPeerCounts.set(peer, next);
2500
+ }
2501
+ else {
2502
+ pendingPeerCounts.delete(peer);
2503
+ }
2504
+ }
2505
+ if (pendingPeerCounts.size === 0) {
2506
+ this._repairSweepOptimisticGidPeersPending.delete(gid);
2507
+ }
2508
+ }
2509
+ }
2510
+ for (const mode of pendingModes) {
2511
+ if (mode !== "join-authoritative" && mode !== "churn") {
2512
+ continue;
2513
+ }
2514
+ const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
2515
+ const frontierTargets = this._repairFrontierByMode.get(mode);
2516
+ for (const target of pendingPeersByMode.get(mode) ?? []) {
2517
+ const replacement = nextTargets.get(target);
2518
+ if (mode === "join-authoritative") {
2519
+ // Authoritative join repair is receipt-driven: a later sweep can have a
2520
+ // narrower transient leader view, but it must not forget unconfirmed
2521
+ // hashes that were already queued for this joiner.
2522
+ if (replacement && replacement.size > 0) {
2523
+ const existing = frontierTargets?.get(target);
2524
+ if (existing && existing.size > 0) {
2525
+ for (const [hash, entry] of replacement) {
2526
+ existing.set(hash, entry);
2527
+ }
2528
+ }
2529
+ else {
2530
+ frontierTargets?.set(target, replacement);
2531
+ }
2532
+ }
2533
+ continue;
2534
+ }
2535
+ if (replacement && replacement.size > 0) {
2536
+ frontierTargets?.set(target, replacement);
2537
+ }
2538
+ else {
2539
+ frontierTargets?.delete(target);
2540
+ }
2541
+ }
2542
+ }
2543
+ for (const [mode, targets] of pendingByMode) {
2544
+ for (const target of [...targets.keys()]) {
2545
+ flushTarget(mode, target);
2546
+ }
1905
2547
  }
1906
2548
  }
1907
2549
  }
@@ -1912,17 +2554,78 @@ let SharedLog = (() => {
1912
2554
  }
1913
2555
  finally {
1914
2556
  this._repairSweepRunning = false;
1915
- if (!this.closed &&
1916
- (this._repairSweepForceFreshPending ||
1917
- this._repairSweepAddedPeersPending.size > 0)) {
2557
+ if (!this.closed && this._repairSweepPendingModes.size > 0) {
1918
2558
  this._repairSweepRunning = true;
1919
2559
  void this.runRepairSweep();
1920
2560
  }
1921
2561
  }
1922
2562
  }
1923
2563
  async pruneDebouncedFnAddIfNotKeeping(args) {
1924
- if (!this.keep || !(await this.keep(args.value.entry))) {
1925
- return this.pruneDebouncedFn.add(args);
2564
+ if (this.keep && (await this.keep(args.value.entry))) {
2565
+ return false;
2566
+ }
2567
+ void this.pruneDebouncedFn.add(args);
2568
+ return true;
2569
+ }
2570
+ async pruneJoinedEntriesNoLongerLed(entries) {
2571
+ const selfHash = this.node.identity.publicKey.hashcode();
2572
+ for (const entry of entries) {
2573
+ if (this.closed || this._pendingDeletes.has(entry.hash)) {
2574
+ continue;
2575
+ }
2576
+ const leaders = await this.findLeadersFromEntry(entry, decodeReplicas(entry).getValue(this), { roleAge: 0 });
2577
+ if (leaders.has(selfHash)) {
2578
+ this.pruneDebouncedFn.delete(entry.hash);
2579
+ continue;
2580
+ }
2581
+ if (leaders.size === 0) {
2582
+ continue;
2583
+ }
2584
+ await this.pruneDebouncedFnAddIfNotKeeping({
2585
+ key: entry.hash,
2586
+ value: { entry, leaders },
2587
+ });
2588
+ this.responseToPruneDebouncedFn.delete(entry.hash);
2589
+ }
2590
+ }
2591
+ async pruneIndexedEntriesNoLongerLed() {
2592
+ const selfHash = this.node.identity.publicKey.hashcode();
2593
+ const iterator = this.entryCoordinatesIndex.iterate({});
2594
+ let enqueuedPrune = false;
2595
+ try {
2596
+ while (!this.closed && !iterator.done()) {
2597
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2598
+ for (const entry of entries) {
2599
+ const entryReplicated = entry.value;
2600
+ if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
2601
+ continue;
2602
+ }
2603
+ const leaders = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
2604
+ if (leaders.has(selfHash)) {
2605
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
2606
+ await this._pendingDeletes
2607
+ .get(entryReplicated.hash)
2608
+ ?.reject(new Error("Failed to delete, is leader again"));
2609
+ this.removePruneRequestSent(entryReplicated.hash);
2610
+ continue;
2611
+ }
2612
+ if (leaders.size === 0) {
2613
+ continue;
2614
+ }
2615
+ enqueuedPrune =
2616
+ (await this.pruneDebouncedFnAddIfNotKeeping({
2617
+ key: entryReplicated.hash,
2618
+ value: { entry: entryReplicated, leaders },
2619
+ })) || enqueuedPrune;
2620
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
2621
+ }
2622
+ }
2623
+ }
2624
+ finally {
2625
+ await iterator.close();
2626
+ }
2627
+ if (enqueuedPrune && !this.closed) {
2628
+ await this.pruneDebouncedFn.flush();
1926
2629
  }
1927
2630
  }
1928
2631
  clearCheckedPruneRetry(hash) {
@@ -2065,16 +2768,17 @@ let SharedLog = (() => {
2065
2768
  await this._appendDeliverToAllFanout(result.entry);
2066
2769
  }
2067
2770
  else {
2068
- await this._appendDeliverToReplicators(result.entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
2771
+ await this._appendDeliverToReplicators(result.entry, coordinates, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
2069
2772
  }
2070
2773
  }
2071
- if (!isLeader && !this.shouldDelayAdaptiveRebalance()) {
2774
+ const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
2775
+ if (!isLeader && !delayAdaptiveRebalance) {
2072
2776
  this.pruneDebouncedFnAddIfNotKeeping({
2073
2777
  key: result.entry.hash,
2074
2778
  value: { entry: result.entry, leaders },
2075
2779
  });
2076
2780
  }
2077
- if (!this._isAdaptiveReplicating) {
2781
+ if (!delayAdaptiveRebalance) {
2078
2782
  this.rebalanceParticipationDebounced?.call();
2079
2783
  }
2080
2784
  return result;
@@ -2108,8 +2812,18 @@ let SharedLog = (() => {
2108
2812
  this._repairRetryTimers = new Set();
2109
2813
  this._recentRepairDispatch = new Map();
2110
2814
  this._repairSweepRunning = false;
2111
- this._repairSweepForceFreshPending = false;
2112
- this._repairSweepAddedPeersPending = new Set();
2815
+ this._repairSweepPendingModes = new Set();
2816
+ this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
2817
+ this._repairFrontierByMode = createRepairFrontierByMode();
2818
+ this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
2819
+ this._repairSweepOptimisticGidPeersPending = new Map();
2820
+ this._entryKnownPeers = new Map();
2821
+ this._joinAuthoritativeRepairTimersByDelay = new Map();
2822
+ this._joinAuthoritativeRepairPeersByDelay = new Map();
2823
+ this._assumeSyncedRepairSuppressedUntil = 0;
2824
+ this._appendBackfillTimer = undefined;
2825
+ this._appendBackfillPendingByTarget = new Map();
2826
+ this._repairMetrics = createRepairMetrics();
2113
2827
  this._topicSubscribersCache = new Map();
2114
2828
  this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
2115
2829
  this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
@@ -2167,7 +2881,10 @@ let SharedLog = (() => {
2167
2881
  this.keep = options?.keep;
2168
2882
  this.pendingMaturity = new Map();
2169
2883
  const id = sha256Base64Sync(this.log.id);
2170
- const storage = await this.node.storage.sublevel(id);
2884
+ const [storage, logScope] = await Promise.all([
2885
+ this.node.storage.sublevel(id),
2886
+ this.node.indexer.scope(id),
2887
+ ]);
2171
2888
  const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
2172
2889
  const fanoutService = getSharedLogFanoutService(this.node.services);
2173
2890
  const blockProviderNamespace = (cid) => `cid:${cid}`;
@@ -2223,16 +2940,18 @@ let SharedLog = (() => {
2223
2940
  }
2224
2941
  },
2225
2942
  });
2226
- await this.remoteBlocks.start();
2227
- const logScope = await this.node.indexer.scope(id);
2228
- const replicationIndex = await logScope.scope("replication");
2943
+ const remoteBlocksStartPromise = this.remoteBlocks.start();
2944
+ const [replicationIndex, logIndex] = await Promise.all([
2945
+ logScope.scope("replication"),
2946
+ logScope.scope("log"),
2947
+ ]);
2229
2948
  this._replicationRangeIndex = await replicationIndex.init({
2230
2949
  schema: this.indexableDomain.constructorRange,
2231
2950
  });
2232
2951
  this._entryCoordinatesIndex = await replicationIndex.init({
2233
2952
  schema: this.indexableDomain.constructorEntry,
2234
2953
  });
2235
- const logIndex = await logScope.scope("log");
2954
+ await remoteBlocksStartPromise;
2236
2955
  const hasIndexedReplicationInfo = (await this.replicationIndex.count({
2237
2956
  query: [
2238
2957
  new StringMatch({
@@ -2360,27 +3079,33 @@ let SharedLog = (() => {
2360
3079
  }
2361
3080
  }
2362
3081
  // Open for communcation
2363
- await this.rpc.open({
2364
- queryType: TransportMessage,
2365
- responseType: TransportMessage,
2366
- responseHandler: (query, context) => this.onMessage(query, context),
2367
- topic: this.topic,
2368
- });
2369
3082
  this._onSubscriptionFn =
2370
3083
  this._onSubscriptionFn || this._onSubscription.bind(this);
2371
- await this.node.services.pubsub.addEventListener("subscribe", this._onSubscriptionFn);
2372
3084
  this._onUnsubscriptionFn =
2373
3085
  this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
2374
- await this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn);
2375
- await this.rpc.subscribe();
2376
- await this._openFanoutChannel(options?.fanout);
2377
- // mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
2378
- await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
3086
+ await Promise.all([
3087
+ this.rpc.open({
3088
+ queryType: TransportMessage,
3089
+ responseType: TransportMessage,
3090
+ responseHandler: (query, context) => this.onMessage(query, context),
3091
+ topic: this.topic,
3092
+ }),
3093
+ this.node.services.pubsub.addEventListener("subscribe", this._onSubscriptionFn),
3094
+ this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn),
3095
+ ]);
3096
+ const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
3097
+ // Mark previously-owned replication ranges as "new" only when they already exist.
3098
+ // Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
3099
+ const updateOwnedReplicationPromise = hasIndexedReplicationInfo
3100
+ ? this.updateTimestampOfOwnedReplicationRanges()
3101
+ : Promise.resolve();
3102
+ await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
2379
3103
  // if we had a previous session with replication info, and new replication info dictates that we unreplicate
2380
3104
  // we should do that. Otherwise if options is a unreplication we dont need to do anything because
2381
3105
  // we are already unreplicated (as we are just opening)
2382
- let isUnreplicationOptionsDefined = isUnreplicationOptions(options?.replicate);
2383
- const canResumeReplication = (await isReplicationOptionsDependentOnPreviousState(options?.replicate, this.replicationIndex, this.node.identity.publicKey)) && hasIndexedReplicationInfo;
3106
+ const isUnreplicationOptionsDefined = isUnreplicationOptions(options?.replicate);
3107
+ const canResumeReplication = hasIndexedReplicationInfo &&
3108
+ (await isReplicationOptionsDependentOnPreviousState(options?.replicate, this.replicationIndex, this.node.identity.publicKey));
2384
3109
  if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
2385
3110
  await this.replicate(options?.replicate, { checkDuplicates: true });
2386
3111
  }
@@ -2423,6 +3148,7 @@ let SharedLog = (() => {
2423
3148
  }
2424
3149
  async afterOpen() {
2425
3150
  await super.afterOpen();
3151
+ const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
2426
3152
  // We do this here, because these calls requires this.closed == false
2427
3153
  void this.pruneOfflineReplicators()
2428
3154
  .then(() => {
@@ -2437,7 +3163,7 @@ let SharedLog = (() => {
2437
3163
  this.startReplicatorLivenessSweep();
2438
3164
  await this.rebalanceParticipation();
2439
3165
  // Take into account existing subscription
2440
- (await this._getTopicSubscribers(this.topic))?.forEach((v) => {
3166
+ (await existingSubscribersPromise)?.forEach((v) => {
2441
3167
  if (v.equals(this.node.identity.publicKey)) {
2442
3168
  return;
2443
3169
  }
@@ -2952,8 +3678,28 @@ let SharedLog = (() => {
2952
3678
  this._repairRetryTimers.clear();
2953
3679
  this._recentRepairDispatch.clear();
2954
3680
  this._repairSweepRunning = false;
2955
- this._repairSweepForceFreshPending = false;
2956
- this._repairSweepAddedPeersPending.clear();
3681
+ this._repairSweepPendingModes.clear();
3682
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
3683
+ peers.clear();
3684
+ }
3685
+ this._repairSweepOptimisticGidPeersPending.clear();
3686
+ this._entryKnownPeers.clear();
3687
+ for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
3688
+ clearTimeout(timer);
3689
+ }
3690
+ this._joinAuthoritativeRepairTimersByDelay.clear();
3691
+ this._joinAuthoritativeRepairPeersByDelay.clear();
3692
+ for (const targets of this._repairFrontierByMode.values()) {
3693
+ targets.clear();
3694
+ }
3695
+ for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
3696
+ targets.clear();
3697
+ }
3698
+ if (this._appendBackfillTimer) {
3699
+ clearTimeout(this._appendBackfillTimer);
3700
+ this._appendBackfillTimer = undefined;
3701
+ }
3702
+ this._appendBackfillPendingByTarget.clear();
2957
3703
  for (const [_k, v] of this._pendingDeletes) {
2958
3704
  v.clear();
2959
3705
  v.promise.resolve(); // TODO or reject?
@@ -3114,6 +3860,7 @@ let SharedLog = (() => {
3114
3860
  logger.trace(`${this.node.identity.publicKey.hashcode()}: Recieved heads: ${heads.length === 1 ? heads[0].entry.hash : "#" + heads.length}, logId: ${this.log.idString}`);
3115
3861
  if (heads) {
3116
3862
  const filteredHeads = [];
3863
+ const confirmedHashes = new Set();
3117
3864
  for (const head of heads) {
3118
3865
  if (!(await this.log.has(head.entry.hash))) {
3119
3866
  head.entry.init({
@@ -3123,8 +3870,18 @@ let SharedLog = (() => {
3123
3870
  });
3124
3871
  filteredHeads.push(head);
3125
3872
  }
3873
+ else {
3874
+ confirmedHashes.add(head.entry.hash);
3875
+ }
3876
+ }
3877
+ const fromIsSelf = context.from.equals(this.node.identity.publicKey);
3878
+ if (!fromIsSelf) {
3879
+ this.markEntriesKnownByPeer(heads.map((head) => head.entry.hash), context.from.hashcode());
3126
3880
  }
3127
3881
  if (filteredHeads.length === 0) {
3882
+ if (confirmedHashes.size > 0 && !fromIsSelf) {
3883
+ await this.sendRepairConfirmation(context.from, confirmedHashes);
3884
+ }
3128
3885
  return;
3129
3886
  }
3130
3887
  const groupedByGid = await groupByGid(filteredHeads);
@@ -3222,7 +3979,12 @@ let SharedLog = (() => {
3222
3979
  return;
3223
3980
  }
3224
3981
  if (toMerge.length > 0) {
3982
+ this.markEntriesKnownByPeer(toMerge.map((entry) => entry.hash), context.from.hashcode());
3225
3983
  await this.log.join(toMerge);
3984
+ for (const merged of toMerge) {
3985
+ confirmedHashes.add(merged.hash);
3986
+ }
3987
+ await this.pruneJoinedEntriesNoLongerLed(toMerge);
3226
3988
  toDelete?.map((x) =>
3227
3989
  // TODO types
3228
3990
  this.pruneDebouncedFnAddIfNotKeeping({
@@ -3261,6 +4023,10 @@ let SharedLog = (() => {
3261
4023
  promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
3262
4024
  }
3263
4025
  await Promise.all(promises);
4026
+ if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
4027
+ this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
4028
+ await this.sendRepairConfirmation(context.from, confirmedHashes);
4029
+ }
3264
4030
  }
3265
4031
  }
3266
4032
  else if (msg instanceof RequestIPrune) {
@@ -3268,6 +4034,7 @@ let SharedLog = (() => {
3268
4034
  const from = context.from.hashcode();
3269
4035
  for (const hash of msg.hashes) {
3270
4036
  this.removePruneRequestSent(hash, from);
4037
+ this.removeEntriesKnownByPeer([hash], from);
3271
4038
  // if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
3272
4039
  // this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
3273
4040
  const outGoingPrunes = this._requestIPruneResponseReplicatorSet.get(hash);
@@ -3355,6 +4122,11 @@ let SharedLog = (() => {
3355
4122
  this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
3356
4123
  }
3357
4124
  }
4125
+ else if (msg instanceof ConfirmEntriesMessage) {
4126
+ this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
4127
+ this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
4128
+ return;
4129
+ }
3358
4130
  else if (await this.syncronizer.onMessage(msg, context)) {
3359
4131
  return; // the syncronizer has handled the message
3360
4132
  }
@@ -3679,6 +4451,11 @@ let SharedLog = (() => {
3679
4451
  if (options?.replicate) {
3680
4452
  let messageToSend = undefined;
3681
4453
  if (assumeSynced) {
4454
+ // `assumeSynced` is an explicit contract that this join should trust the
4455
+ // supplied history and avoid initiating outbound repair while the local
4456
+ // replication ranges settle.
4457
+ this._assumeSyncedRepairSuppressedUntil =
4458
+ Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
3682
4459
  for (const entry of entriesToReplicate) {
3683
4460
  await seedAssumeSyncedPeerHistory(entry);
3684
4461
  }
@@ -3747,9 +4524,14 @@ let SharedLog = (() => {
3747
4524
  clear();
3748
4525
  // `waitForReplicator()` is typically used as a precondition before join/replicate
3749
4526
  // flows. A replicator can become mature and enqueue a debounced rebalance
3750
- // (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
3751
- // observe a "late" rebalance after the wait resolves.
3752
- await this.replicationChangeDebounceFn?.flush?.();
4527
+ // (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
4528
+ // make membership waits depend on all rebalance work finishing; callers that
4529
+ // need settled distribution already wait for that explicitly.
4530
+ this.replicationChangeDebounceFn?.flush?.().catch((error) => {
4531
+ if (!isNotStartedError(error)) {
4532
+ logger.error(error?.toString?.() ?? String(error));
4533
+ }
4534
+ });
3753
4535
  deferred.resolve();
3754
4536
  };
3755
4537
  const reject = (error) => {
@@ -4141,11 +4923,51 @@ let SharedLog = (() => {
4141
4923
  }
4142
4924
  }
4143
4925
  }
4926
+ if (!options?.candidates) {
4927
+ const fullReplicaLeaders = await this.findFullReplicaLeaders(cursors.length, roleAge, peerFilter);
4928
+ if (fullReplicaLeaders) {
4929
+ return fullReplicaLeaders;
4930
+ }
4931
+ }
4144
4932
  return getSamples(cursors, this.replicationIndex, roleAge, this.indexableDomain.numbers, {
4145
4933
  peerFilter,
4146
4934
  uniqueReplicators: peerFilter,
4147
4935
  });
4148
4936
  }
4937
+ async findFullReplicaLeaders(replicas, roleAge, peerFilter) {
4938
+ const now = Date.now();
4939
+ const leaders = new Map();
4940
+ const includeStrict = this._logProperties?.strictFullReplicaFallback !== false;
4941
+ const iterator = this.replicationIndex.iterate({}, { shape: { hash: true, timestamp: true, mode: true } });
4942
+ try {
4943
+ for (;;) {
4944
+ const batch = await iterator.next(64);
4945
+ if (batch.length === 0) {
4946
+ break;
4947
+ }
4948
+ for (const result of batch) {
4949
+ const range = result.value;
4950
+ if (peerFilter && !peerFilter.has(range.hash)) {
4951
+ continue;
4952
+ }
4953
+ if (!isMatured(range, now, roleAge)) {
4954
+ continue;
4955
+ }
4956
+ if (range.mode === ReplicationIntent.Strict && !includeStrict) {
4957
+ continue;
4958
+ }
4959
+ leaders.set(range.hash, { intersecting: true });
4960
+ if (leaders.size > replicas) {
4961
+ return undefined;
4962
+ }
4963
+ }
4964
+ }
4965
+ }
4966
+ finally {
4967
+ await iterator.close();
4968
+ }
4969
+ return leaders.size > 0 ? leaders : undefined;
4970
+ }
4149
4971
  async findLeadersFromEntry(entry, replicas, options) {
4150
4972
  const coordinates = await this.createCoordinates(entry, replicas);
4151
4973
  const result = await this._findLeaders(coordinates, options);
@@ -4613,13 +5435,25 @@ let SharedLog = (() => {
4613
5435
  }
4614
5436
  const changed = false;
4615
5437
  const addedPeers = new Set();
5438
+ const authoritativeRepairPeers = new Set();
4616
5439
  const warmupPeers = new Set();
5440
+ const churnRepairPeers = new Set();
4617
5441
  const hasSelfWarmupChange = changes.some((change) => change.range.hash === selfHash &&
4618
5442
  (change.type === "added" || change.type === "replaced"));
5443
+ const hasSelfRangeRemoval = changes.some((change) => change.range.hash === selfHash &&
5444
+ (change.type === "removed" || change.type === "replaced"));
4619
5445
  for (const change of changes) {
5446
+ if (change.range.hash !== selfHash &&
5447
+ (change.type === "removed" || change.type === "replaced")) {
5448
+ this.removePeerFromEntryKnownPeers(change.range.hash);
5449
+ }
4620
5450
  if (change.type === "added" || change.type === "replaced") {
4621
5451
  const hash = change.range.hash;
4622
5452
  if (hash !== selfHash) {
5453
+ // Existing peers can widen/shift ranges after the initial join. If we
5454
+ // only rescan on first-seen "added", late authoritative range updates can
5455
+ // leave historical backfill permanently partial under load.
5456
+ authoritativeRepairPeers.add(hash);
4623
5457
  // Range updates can reassign entries to an existing peer shortly after it
4624
5458
  // already received a subset. Avoid suppressing legitimate follow-up repair.
4625
5459
  this._recentRepairDispatch.delete(hash);
@@ -4651,17 +5485,24 @@ let SharedLog = (() => {
4651
5485
  return;
4652
5486
  }
4653
5487
  const isWarmupTarget = warmupPeers.has(target);
4654
- const bypassRecentDedupe = isWarmupTarget || forceFreshDelivery;
5488
+ const mode = forceFreshDelivery
5489
+ ? "churn"
5490
+ : isWarmupTarget
5491
+ ? "join-warmup"
5492
+ : "join-authoritative";
4655
5493
  this.dispatchMaybeMissingEntries(target, entries, {
4656
- bypassRecentDedupe,
4657
- retryScheduleMs: isWarmupTarget
5494
+ bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
5495
+ mode,
5496
+ retryScheduleMs: mode === "join-warmup"
4658
5497
  ? JOIN_WARMUP_RETRY_SCHEDULE_MS
4659
- : undefined,
4660
- forceFreshDelivery,
5498
+ : mode === "join-authoritative"
5499
+ ? [0]
5500
+ : undefined,
4661
5501
  });
4662
5502
  uncheckedDeliver.delete(target);
4663
5503
  };
4664
5504
  const queueUncheckedDeliver = (target, entry) => {
5505
+ churnRepairPeers.add(target);
4665
5506
  let set = uncheckedDeliver.get(target);
4666
5507
  if (!set) {
4667
5508
  set = new Map();
@@ -4715,7 +5556,14 @@ let SharedLog = (() => {
4715
5556
  }
4716
5557
  }
4717
5558
  }
4718
- this.addPeersToGidPeerHistory(entryReplicated.gid, currentPeers.keys(), true);
5559
+ for (const [peer] of currentPeers) {
5560
+ if (warmupPeers.has(peer)) {
5561
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
5562
+ }
5563
+ }
5564
+ const authoritativePeers = [...currentPeers.keys()].filter((peer) => !warmupPeers.has(peer) &&
5565
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer));
5566
+ this.addPeersToGidPeerHistory(entryReplicated.gid, authoritativePeers, true);
4719
5567
  if (!currentPeers.has(selfHash)) {
4720
5568
  this.pruneDebouncedFnAddIfNotKeeping({
4721
5569
  key: entryReplicated.hash,
@@ -4763,7 +5611,14 @@ let SharedLog = (() => {
4763
5611
  }
4764
5612
  }
4765
5613
  }
4766
- this.addPeersToGidPeerHistory(entryReplicated.gid, currentPeers.keys(), true);
5614
+ for (const [peer] of currentPeers) {
5615
+ if (addedPeers.has(peer)) {
5616
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
5617
+ }
5618
+ }
5619
+ const authoritativePeers = [...currentPeers.keys()].filter((peer) => !addedPeers.has(peer) &&
5620
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer));
5621
+ this.addPeersToGidPeerHistory(entryReplicated.gid, authoritativePeers, true);
4767
5622
  if (!isLeader) {
4768
5623
  this.pruneDebouncedFnAddIfNotKeeping({
4769
5624
  key: entryReplicated.hash,
@@ -4780,9 +5635,17 @@ let SharedLog = (() => {
4780
5635
  }
4781
5636
  }
4782
5637
  }
5638
+ if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
5639
+ await this.pruneIndexedEntriesNoLongerLed();
5640
+ }
4783
5641
  if (forceFreshDelivery) {
4784
- // Removed/shrunk ranges still need the authoritative background pass.
4785
- this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
5642
+ // Pure leave/shrink churn can have zero `addedPeers`, but the peers that
5643
+ // received redistributed entries still need a follow-up repair pass if the
5644
+ // immediate maybe-sync misses one entry.
5645
+ this.scheduleRepairSweep({
5646
+ mode: "churn",
5647
+ peers: churnRepairPeers,
5648
+ });
4786
5649
  }
4787
5650
  else if (useJoinWarmupFastPath) {
4788
5651
  // Pure join warmup uses the cheap immediate maybe-missing dispatch above,
@@ -4795,19 +5658,22 @@ let SharedLog = (() => {
4795
5658
  return;
4796
5659
  }
4797
5660
  this.scheduleRepairSweep({
4798
- forceFreshDelivery: false,
4799
- addedPeers: peers,
5661
+ mode: "join-warmup",
5662
+ peers,
4800
5663
  });
4801
5664
  }, 250);
4802
5665
  timer.unref?.();
4803
5666
  this._repairRetryTimers.add(timer);
4804
5667
  }
4805
- else if (addedPeers.size > 0) {
5668
+ else if (authoritativeRepairPeers.size > 0) {
4806
5669
  this.scheduleRepairSweep({
4807
- forceFreshDelivery: false,
4808
- addedPeers,
5670
+ mode: "join-authoritative",
5671
+ peers: authoritativeRepairPeers,
4809
5672
  });
4810
5673
  }
5674
+ if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
5675
+ this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
5676
+ }
4811
5677
  for (const target of [...uncheckedDeliver.keys()]) {
4812
5678
  flushUncheckedDeliverTarget(target);
4813
5679
  }
@@ -4879,6 +5745,10 @@ let SharedLog = (() => {
4879
5745
  if (!dynamicRange) {
4880
5746
  return; // not allowed to replicate
4881
5747
  }
5748
+ if (this.replicationController.maxMemoryLimit != null &&
5749
+ usedMemory > this.replicationController.maxMemoryLimit) {
5750
+ await this.pruneIndexedEntriesNoLongerLed();
5751
+ }
4882
5752
  const peersSize = (await peers.getSize()) || 1;
4883
5753
  const totalParticipation = await this.calculateTotalParticipation();
4884
5754
  const newFactor = this.replicationController.step({