@peerbit/shared-log 13.0.24 → 13.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/index.js CHANGED
@@ -66,7 +66,7 @@ import {} from "./replication-domain.js";
66
66
  import { AbsoluteReplicas, AddedReplicationSegmentMessage, AllReplicatingSegmentsMessage, MinReplicas, ReplicationPingMessage, ReplicationError, RequestReplicationInfoMessage, ResponseRoleMessage, StoppedReplicating, decodeReplicas, encodeReplicas, maxReplicas, } from "./replication.js";
67
67
  import { Observer, Replicator } from "./role.js";
68
68
  import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
69
- import { SimpleSyncronizer } from "./sync/simple.js";
69
+ import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
70
70
  import { groupByGid } from "./utils.js";
71
71
  const toLocalPublicSignKey = (key) => {
72
72
  if (typeof key === "string") {
@@ -263,10 +263,95 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
263
263
  // Churn/join repair can race with pruning and transient missed sync requests under
264
264
  // heavy event-loop load. Keep retries alive with a longer tail so reassigned
265
265
  // entries are retried after short bursts and slower recovery windows.
266
- const FORCE_FRESH_RETRY_SCHEDULE_MS = [
266
+ const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
267
267
  0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
268
268
  ];
269
- const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000, 15_000];
269
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
270
+ 0,
271
+ 1_000,
272
+ 3_000,
273
+ 7_000,
274
+ 15_000,
275
+ 30_000,
276
+ 60_000,
277
+ ];
278
+ const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
279
+ 0,
280
+ 1_000,
281
+ 3_000,
282
+ 7_000,
283
+ 15_000,
284
+ 30_000,
285
+ 60_000,
286
+ ];
287
+ const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
288
+ const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
289
+ const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
290
+ JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
291
+ 7_000,
292
+ 15_000,
293
+ 30_000,
294
+ ];
295
+ const APPEND_BACKFILL_DELAY_MS = 500;
296
+ const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
297
+ const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
298
+ const REPAIR_DISPATCH_MODES = [
299
+ "join-warmup",
300
+ "join-authoritative",
301
+ "append-backfill",
302
+ "churn",
303
+ ];
304
+ const createRepairMetricBucket = () => ({
305
+ dispatches: 0,
306
+ entries: 0,
307
+ ratelessFirstPasses: 0,
308
+ simpleFallbackPasses: 0,
309
+ });
310
+ const createRepairMetrics = () => ({
311
+ "join-warmup": createRepairMetricBucket(),
312
+ "join-authoritative": createRepairMetricBucket(),
313
+ "append-backfill": createRepairMetricBucket(),
314
+ churn: createRepairMetricBucket(),
315
+ });
316
+ const createRepairPendingPeersByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]));
317
+ const cloneRepairPendingPeersByMode = (pending) => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]));
318
+ const createRepairFrontierByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
319
+ const createRepairActiveTargetsByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]));
320
+ const getRepairRetrySchedule = (mode) => {
321
+ switch (mode) {
322
+ case "join-warmup":
323
+ return JOIN_WARMUP_RETRY_SCHEDULE_MS;
324
+ case "join-authoritative":
325
+ return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
326
+ case "append-backfill":
327
+ return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
328
+ case "churn":
329
+ return CHURN_REPAIR_RETRY_SCHEDULE_MS;
330
+ }
331
+ };
332
+ const resolveRepairRetrySchedule = (mode, override, trackedFrontier = false) => {
333
+ const fallback = getRepairRetrySchedule(mode);
334
+ if (!override || override.length === 0) {
335
+ return fallback;
336
+ }
337
+ if (trackedFrontier &&
338
+ override.length === 1 &&
339
+ override[0] === 0 &&
340
+ fallback.length > 1) {
341
+ // A tracked frontier with only an immediate retry would otherwise stay on
342
+ // attempt 0 forever, which means rateless-only retries and no sparse-tail
343
+ // simple fallback. Keep the immediate seed, then continue with the normal
344
+ // tracked repair schedule.
345
+ return [0, ...fallback.slice(1)];
346
+ }
347
+ return override;
348
+ };
349
+ const getRepairTransportForAttempt = (mode, attemptIndex) => {
350
+ if (mode === "churn") {
351
+ return "simple";
352
+ }
353
+ return attemptIndex === 0 ? "rateless" : "simple";
354
+ };
270
355
  const toPositiveInteger = (value, fallback, label) => {
271
356
  if (value == null) {
272
357
  return fallback;
@@ -375,8 +460,18 @@ let SharedLog = (() => {
375
460
  _repairRetryTimers;
376
461
  _recentRepairDispatch;
377
462
  _repairSweepRunning;
378
- _repairSweepForceFreshPending;
379
- _repairSweepAddedPeersPending;
463
+ _repairSweepPendingModes;
464
+ _repairSweepPendingPeersByMode;
465
+ _repairFrontierByMode;
466
+ _repairFrontierActiveTargetsByMode;
467
+ _repairSweepOptimisticGidPeersPending;
468
+ _entryKnownPeers;
469
+ _joinAuthoritativeRepairTimersByDelay;
470
+ _joinAuthoritativeRepairPeersByDelay;
471
+ _assumeSyncedRepairSuppressedUntil;
472
+ _appendBackfillTimer;
473
+ _appendBackfillPendingByTarget;
474
+ _repairMetrics;
380
475
  _topicSubscribersCache;
381
476
  // regular distribution checks
382
477
  distributeQueue;
@@ -716,7 +811,7 @@ let SharedLog = (() => {
716
811
  }),
717
812
  });
718
813
  }
719
- async _appendDeliverToReplicators(entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
814
+ async _appendDeliverToReplicators(entry, coordinates, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
720
815
  const { delivery, reliability, requireRecipients, minAcks, wrap } = this._parseDeliveryOptions(deliveryArg);
721
816
  const pending = [];
722
817
  const track = (promise) => {
@@ -725,10 +820,32 @@ let SharedLog = (() => {
725
820
  const fanoutUnicastOptions = delivery?.timeout != null || delivery?.signal != null
726
821
  ? { timeoutMs: delivery.timeout, signal: delivery.signal }
727
822
  : undefined;
823
+ const fullReplicaDeliveryCandidates = await this.getFullReplicaRepairCandidates(undefined, {
824
+ includeSubscribers: false,
825
+ });
826
+ if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
827
+ for (const peer of fullReplicaDeliveryCandidates) {
828
+ if (!leaders.has(peer)) {
829
+ leaders.set(peer, { intersecting: true });
830
+ }
831
+ }
832
+ }
833
+ const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
834
+ entry,
835
+ coordinates,
836
+ leaders: leaders,
837
+ replicas: minReplicasValue,
838
+ });
728
839
  for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
729
840
  await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
730
- const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
731
- const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
841
+ const authoritativeRecipients = new Set(leaders.keys());
842
+ const leadersForDelivery = delivery
843
+ ? new Set(authoritativeRecipients)
844
+ : undefined;
845
+ // Outbound append delivery only tells us who we intend to send to, not who has
846
+ // actually stored the entry. Keep this recipient set local so later repair
847
+ // sweeps can still backfill peers that missed the initial delivery.
848
+ const set = new Set(leaders.keys());
732
849
  let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
733
850
  const allowSubscriberFallback = this.syncronizer instanceof SimpleSyncronizer ||
734
851
  (this.compatibility ?? Number.MAX_VALUE) < 10;
@@ -758,6 +875,17 @@ let SharedLog = (() => {
758
875
  continue;
759
876
  }
760
877
  if (!delivery) {
878
+ for (const peer of authoritativeRecipients) {
879
+ if (peer === selfHash) {
880
+ continue;
881
+ }
882
+ // Default live append delivery is still optimistic. If one remote misses
883
+ // the initial heads exchange and the caller did not opt into explicit
884
+ // delivery acks, we still need a targeted backfill source of truth for the
885
+ // authoritative recipients or one entry can get stuck at 2/3 replicas
886
+ // forever. Best-effort fallback subscribers are not repair-worthy.
887
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
888
+ }
761
889
  this.rpc
762
890
  .send(message, {
763
891
  mode: isLeader
@@ -785,12 +913,16 @@ let SharedLog = (() => {
785
913
  }
786
914
  const ackTo = [];
787
915
  let silentTo;
916
+ const repairTargets = new Set();
788
917
  // Default delivery semantics: require enough remote ACKs to reach the requested
789
918
  // replication degree (local append counts as 1).
790
919
  const defaultMinAcks = Math.max(0, minReplicasValue - 1);
791
920
  const ackLimitRaw = reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
792
921
  const ackLimit = Math.max(0, Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length));
793
922
  for (const peer of orderedRemoteRecipients) {
923
+ if (authoritativeRecipients.has(peer)) {
924
+ repairTargets.add(peer);
925
+ }
794
926
  if (ackTo.length < ackLimit) {
795
927
  ackTo.push(peer);
796
928
  }
@@ -825,6 +957,12 @@ let SharedLog = (() => {
825
957
  })
826
958
  .catch((error) => logger.error(error));
827
959
  }
960
+ for (const peer of repairTargets) {
961
+ // Direct append delivery is intentionally optimistic. Queue one delayed,
962
+ // batched maybe-sync pass for the intended recipients so stable 3-peer
963
+ // append workloads do not depend on perfect first-try delivery ordering.
964
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
965
+ }
828
966
  }
829
967
  if (pending.length > 0) {
830
968
  await Promise.all(pending);
@@ -1376,6 +1514,7 @@ let SharedLog = (() => {
1376
1514
  // Keep local sync/prune state consistent even when a peer disappears
1377
1515
  // through replication-info updates without a topic unsubscribe event.
1378
1516
  this.removePeerFromGidPeerHistory(keyHash);
1517
+ this.removeRepairFrontierTarget(keyHash);
1379
1518
  this._recentRepairDispatch.delete(keyHash);
1380
1519
  if (!isMe) {
1381
1520
  this.syncronizer.onPeerDisconnected(keyHash);
@@ -1726,6 +1865,7 @@ let SharedLog = (() => {
1726
1865
  for (const key of this._gidPeersHistory.keys()) {
1727
1866
  this.removePeerFromGidPeerHistory(publicKeyHash, key);
1728
1867
  }
1868
+ this.removePeerFromEntryKnownPeers(publicKeyHash);
1729
1869
  }
1730
1870
  }
1731
1871
  addPeersToGidPeerHistory(gid, publicKeys, reset) {
@@ -1744,10 +1884,343 @@ let SharedLog = (() => {
1744
1884
  }
1745
1885
  return set;
1746
1886
  }
1887
+ markEntriesKnownByPeer(hashes, peer) {
1888
+ for (const hash of hashes) {
1889
+ let peers = this._entryKnownPeers.get(hash);
1890
+ if (!peers) {
1891
+ peers = new Set();
1892
+ this._entryKnownPeers.set(hash, peers);
1893
+ }
1894
+ peers.add(peer);
1895
+ }
1896
+ }
1897
+ removeEntriesKnownByPeer(hashes, peer) {
1898
+ for (const hash of hashes) {
1899
+ const peers = this._entryKnownPeers.get(hash);
1900
+ if (!peers) {
1901
+ continue;
1902
+ }
1903
+ peers.delete(peer);
1904
+ if (peers.size === 0) {
1905
+ this._entryKnownPeers.delete(hash);
1906
+ }
1907
+ }
1908
+ }
1909
+ removePeerFromEntryKnownPeers(peer) {
1910
+ for (const [hash, peers] of this._entryKnownPeers) {
1911
+ peers.delete(peer);
1912
+ if (peers.size === 0) {
1913
+ this._entryKnownPeers.delete(hash);
1914
+ }
1915
+ }
1916
+ }
1917
+ isEntryKnownByPeer(hash, peer) {
1918
+ return this._entryKnownPeers.get(hash)?.has(peer) === true;
1919
+ }
1920
+ markRepairSweepOptimisticPeer(gid, peer) {
1921
+ let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
1922
+ if (!peers) {
1923
+ peers = new Map();
1924
+ this._repairSweepOptimisticGidPeersPending.set(gid, peers);
1925
+ }
1926
+ peers.set(peer, (peers.get(peer) || 0) + 1);
1927
+ }
1928
+ hasPendingRepairSweepOptimisticPeer(gid, peer) {
1929
+ return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
1930
+ }
1931
+ createEntryReplicatedForRepair(properties) {
1932
+ const assignedToRangeBoundary = shouldAssignToRangeBoundary(properties.leaders, properties.replicas);
1933
+ const cidObject = cidifyString(properties.entry.hash);
1934
+ const hashNumber = this.indexableDomain.numbers.bytesToNumber(cidObject.multihash.digest);
1935
+ return new this.indexableDomain.constructorEntry({
1936
+ assignedToRangeBoundary,
1937
+ coordinates: properties.coordinates,
1938
+ meta: properties.entry.meta,
1939
+ hash: properties.entry.hash,
1940
+ hashNumber,
1941
+ });
1942
+ }
1943
+ isAssumeSyncedRepairSuppressed() {
1944
+ return this._assumeSyncedRepairSuppressedUntil > Date.now();
1945
+ }
1946
+ isFrontierTrackedRepairMode(mode) {
1947
+ return mode !== "join-warmup";
1948
+ }
1949
+ async sleepTracked(delayMs) {
1950
+ if (delayMs <= 0) {
1951
+ return;
1952
+ }
1953
+ await new Promise((resolve) => {
1954
+ const timer = setTimeout(() => {
1955
+ this._repairRetryTimers.delete(timer);
1956
+ resolve();
1957
+ }, delayMs);
1958
+ timer.unref?.();
1959
+ this._repairRetryTimers.add(timer);
1960
+ });
1961
+ }
1962
+ queueRepairFrontierEntries(mode, target, entries) {
1963
+ let targets = this._repairFrontierByMode.get(mode);
1964
+ if (!targets) {
1965
+ targets = new Map();
1966
+ this._repairFrontierByMode.set(mode, targets);
1967
+ }
1968
+ let pending = targets.get(target);
1969
+ if (!pending) {
1970
+ pending = new Map();
1971
+ targets.set(target, pending);
1972
+ }
1973
+ for (const [hash, entry] of entries) {
1974
+ pending.set(hash, entry);
1975
+ }
1976
+ }
1977
+ clearRepairFrontierHashes(target, hashes) {
1978
+ const hashList = [...hashes];
1979
+ if (hashList.length === 0) {
1980
+ return;
1981
+ }
1982
+ for (const mode of REPAIR_DISPATCH_MODES) {
1983
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
1984
+ if (!pending) {
1985
+ continue;
1986
+ }
1987
+ for (const hash of hashList) {
1988
+ pending.delete(hash);
1989
+ }
1990
+ if (pending.size === 0) {
1991
+ this._repairFrontierByMode.get(mode)?.delete(target);
1992
+ }
1993
+ }
1994
+ }
1995
+ async getFullReplicaRepairCandidates(extraPeers, options) {
1996
+ const candidates = new Set([
1997
+ this.node.identity.publicKey.hashcode(),
1998
+ ]);
1999
+ try {
2000
+ for (const peer of await this.getReplicators()) {
2001
+ candidates.add(peer);
2002
+ }
2003
+ }
2004
+ catch {
2005
+ for (const peer of this.uniqueReplicators) {
2006
+ candidates.add(peer);
2007
+ }
2008
+ }
2009
+ for (const peer of extraPeers ?? []) {
2010
+ candidates.add(peer);
2011
+ }
2012
+ if (options?.includeSubscribers !== false) {
2013
+ try {
2014
+ for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
2015
+ candidates.add(subscriber.hashcode());
2016
+ }
2017
+ }
2018
+ catch {
2019
+ // Best-effort only; explicit repair peers still keep the path safe.
2020
+ }
2021
+ }
2022
+ return candidates;
2023
+ }
2024
+ removeRepairFrontierTarget(target) {
2025
+ for (const mode of REPAIR_DISPATCH_MODES) {
2026
+ this._repairFrontierByMode.get(mode)?.delete(target);
2027
+ this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
2028
+ }
2029
+ }
2030
+ async sendRepairConfirmation(target, hashes) {
2031
+ const uniqueHashes = [...new Set(hashes)];
2032
+ for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
2033
+ const chunk = uniqueHashes.slice(i, i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE);
2034
+ await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
2035
+ priority: 1,
2036
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2037
+ });
2038
+ }
2039
+ }
2040
+ async pushRepairEntries(target, entries) {
2041
+ for await (const message of createExchangeHeadsMessages(this.log, [...entries.keys()])) {
2042
+ await this.rpc.send(message, {
2043
+ priority: 1,
2044
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2045
+ });
2046
+ }
2047
+ }
2048
+ async sendRepairEntriesWithTransport(target, entries, transport, options) {
2049
+ const unknownEntries = new Map();
2050
+ const knownHashes = [];
2051
+ for (const [hash, entry] of entries) {
2052
+ if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
2053
+ unknownEntries.set(hash, entry);
2054
+ }
2055
+ else {
2056
+ knownHashes.push(hash);
2057
+ }
2058
+ }
2059
+ this.clearRepairFrontierHashes(target, knownHashes);
2060
+ if (unknownEntries.size === 0) {
2061
+ return;
2062
+ }
2063
+ if (transport === "simple") {
2064
+ // Fallback repair should not depend on the target completing the
2065
+ // RequestMaybeSync -> ResponseMaybeSync round trip.
2066
+ await this.pushRepairEntries(target, unknownEntries);
2067
+ return;
2068
+ }
2069
+ await this.syncronizer.onMaybeMissingEntries({
2070
+ entries: unknownEntries,
2071
+ targets: [target],
2072
+ });
2073
+ }
2074
+ async sendMaybeMissingEntriesNow(target, entries, options) {
2075
+ if (entries.size === 0) {
2076
+ return;
2077
+ }
2078
+ const now = Date.now();
2079
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2080
+ if (!recentlyDispatchedByHash) {
2081
+ recentlyDispatchedByHash = new Map();
2082
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2083
+ }
2084
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2085
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2086
+ recentlyDispatchedByHash.delete(hash);
2087
+ }
2088
+ }
2089
+ const filteredEntries = options.bypassRecentDedupe === true
2090
+ ? new Map(entries)
2091
+ : new Map();
2092
+ if (options.bypassRecentDedupe !== true) {
2093
+ for (const [hash, entry] of entries) {
2094
+ const prev = recentlyDispatchedByHash.get(hash);
2095
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2096
+ continue;
2097
+ }
2098
+ recentlyDispatchedByHash.set(hash, now);
2099
+ filteredEntries.set(hash, entry);
2100
+ }
2101
+ }
2102
+ else {
2103
+ for (const hash of entries.keys()) {
2104
+ recentlyDispatchedByHash.set(hash, now);
2105
+ }
2106
+ }
2107
+ if (filteredEntries.size === 0) {
2108
+ return;
2109
+ }
2110
+ const bucket = this._repairMetrics[options.mode];
2111
+ bucket.dispatches += 1;
2112
+ bucket.entries += filteredEntries.size;
2113
+ if (options.transport === "simple") {
2114
+ bucket.simpleFallbackPasses += 1;
2115
+ }
2116
+ else {
2117
+ bucket.ratelessFirstPasses += 1;
2118
+ }
2119
+ await Promise.resolve(this.sendRepairEntriesWithTransport(target, filteredEntries, options.transport, { bypassKnownPeers: options.mode === "churn" })).catch((error) => logger.error(error));
2120
+ }
2121
+ ensureRepairFrontierRunner(mode, target, retryScheduleMs) {
2122
+ const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
2123
+ if (!activeTargets || activeTargets.has(target) || this.closed) {
2124
+ return;
2125
+ }
2126
+ activeTargets.add(target);
2127
+ const retrySchedule = resolveRepairRetrySchedule(mode, retryScheduleMs, this.isFrontierTrackedRepairMode(mode));
2128
+ const steadyStateDelay = retrySchedule.length > 1
2129
+ ? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
2130
+ : Math.max(retrySchedule[0] || 1_000, 1_000);
2131
+ void (async () => {
2132
+ let attemptIndex = 0;
2133
+ try {
2134
+ for (;;) {
2135
+ if (this.closed) {
2136
+ return;
2137
+ }
2138
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
2139
+ if (!pending || pending.size === 0) {
2140
+ return;
2141
+ }
2142
+ if ((mode === "join-warmup" || mode === "join-authoritative") &&
2143
+ this.isAssumeSyncedRepairSuppressed()) {
2144
+ await this.sleepTracked(Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()));
2145
+ continue;
2146
+ }
2147
+ await this.sendMaybeMissingEntriesNow(target, pending, {
2148
+ mode,
2149
+ transport: getRepairTransportForAttempt(mode, attemptIndex),
2150
+ bypassRecentDedupe: true,
2151
+ });
2152
+ const remaining = this._repairFrontierByMode.get(mode)?.get(target);
2153
+ if (!remaining || remaining.size === 0) {
2154
+ return;
2155
+ }
2156
+ const waitMs = attemptIndex + 1 < retrySchedule.length
2157
+ ? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
2158
+ : steadyStateDelay;
2159
+ attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
2160
+ await this.sleepTracked(waitMs);
2161
+ }
2162
+ }
2163
+ finally {
2164
+ activeTargets.delete(target);
2165
+ if (!this.closed &&
2166
+ (this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0) {
2167
+ this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
2168
+ }
2169
+ }
2170
+ })().catch((error) => {
2171
+ activeTargets.delete(target);
2172
+ logger.error(error);
2173
+ });
2174
+ }
2175
+ flushAppendBackfill() {
2176
+ if (this._appendBackfillPendingByTarget.size === 0) {
2177
+ return;
2178
+ }
2179
+ const pending = this._appendBackfillPendingByTarget;
2180
+ this._appendBackfillPendingByTarget = new Map();
2181
+ for (const [target, entries] of pending) {
2182
+ this.dispatchMaybeMissingEntries(target, entries, {
2183
+ mode: "append-backfill",
2184
+ });
2185
+ }
2186
+ }
2187
+ queueAppendBackfill(target, entry) {
2188
+ let entries = this._appendBackfillPendingByTarget.get(target);
2189
+ if (!entries) {
2190
+ entries = new Map();
2191
+ this._appendBackfillPendingByTarget.set(target, entries);
2192
+ }
2193
+ entries.set(entry.hash, entry);
2194
+ if (entries.size >= this.repairSweepTargetBufferSize) {
2195
+ this.flushAppendBackfill();
2196
+ return;
2197
+ }
2198
+ if (this._appendBackfillTimer || this.closed) {
2199
+ return;
2200
+ }
2201
+ const timer = setTimeout(() => {
2202
+ this._repairRetryTimers.delete(timer);
2203
+ if (this._appendBackfillTimer === timer) {
2204
+ this._appendBackfillTimer = undefined;
2205
+ }
2206
+ if (this.closed) {
2207
+ return;
2208
+ }
2209
+ this.flushAppendBackfill();
2210
+ }, APPEND_BACKFILL_DELAY_MS);
2211
+ timer.unref?.();
2212
+ this._repairRetryTimers.add(timer);
2213
+ this._appendBackfillTimer = timer;
2214
+ }
1747
2215
  dispatchMaybeMissingEntries(target, entries, options) {
1748
2216
  if (entries.size === 0) {
1749
2217
  return;
1750
2218
  }
2219
+ if (this.isFrontierTrackedRepairMode(options.mode)) {
2220
+ this.queueRepairFrontierEntries(options.mode, target, entries);
2221
+ this.ensureRepairFrontierRunner(options.mode, target, options.retryScheduleMs);
2222
+ return;
2223
+ }
1751
2224
  const now = Date.now();
1752
2225
  let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
1753
2226
  if (!recentlyDispatchedByHash) {
@@ -1759,10 +2232,10 @@ let SharedLog = (() => {
1759
2232
  recentlyDispatchedByHash.delete(hash);
1760
2233
  }
1761
2234
  }
1762
- const filteredEntries = options?.bypassRecentDedupe === true
2235
+ const filteredEntries = options.bypassRecentDedupe === true
1763
2236
  ? new Map(entries)
1764
2237
  : new Map();
1765
- if (options?.bypassRecentDedupe !== true) {
2238
+ if (options.bypassRecentDedupe !== true) {
1766
2239
  for (const [hash, entry] of entries) {
1767
2240
  const prev = recentlyDispatchedByHash.get(hash);
1768
2241
  if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
@@ -1780,95 +2253,186 @@ let SharedLog = (() => {
1780
2253
  if (filteredEntries.size === 0) {
1781
2254
  return;
1782
2255
  }
1783
- const retrySchedule = options?.retryScheduleMs && options.retryScheduleMs.length > 0
1784
- ? options.retryScheduleMs
1785
- : options?.forceFreshDelivery
1786
- ? FORCE_FRESH_RETRY_SCHEDULE_MS
1787
- : [0];
1788
- const run = () => {
1789
- // For force-fresh churn repair we intentionally bypass rateless IBLT and
1790
- // use simple hash-based sync. This path is a directed "push these hashes
1791
- // to that peer" recovery flow; using simple sync here avoids occasional
1792
- // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
1793
- if (options?.forceFreshDelivery &&
1794
- this.syncronizer instanceof RatelessIBLTSynchronizer) {
1795
- return Promise.resolve(this.syncronizer.simple.onMaybeMissingEntries({
1796
- entries: filteredEntries,
1797
- targets: [target],
1798
- })).catch((error) => logger.error(error));
1799
- }
1800
- return Promise.resolve(this.syncronizer.onMaybeMissingEntries({
1801
- entries: filteredEntries,
1802
- targets: [target],
1803
- })).catch((error) => logger.error(error));
2256
+ if ((options.mode === "join-warmup" ||
2257
+ options.mode === "join-authoritative") &&
2258
+ this.isAssumeSyncedRepairSuppressed()) {
2259
+ return;
2260
+ }
2261
+ const retrySchedule = resolveRepairRetrySchedule(options.mode, options.retryScheduleMs, this.isFrontierTrackedRepairMode(options.mode));
2262
+ const bucket = this._repairMetrics[options.mode];
2263
+ bucket.dispatches += 1;
2264
+ bucket.entries += filteredEntries.size;
2265
+ const run = (transport) => {
2266
+ if (transport === "simple") {
2267
+ bucket.simpleFallbackPasses += 1;
2268
+ }
2269
+ else {
2270
+ bucket.ratelessFirstPasses += 1;
2271
+ }
2272
+ return Promise.resolve(this.sendRepairEntriesWithTransport(target, filteredEntries, transport, { bypassKnownPeers: options.mode === "churn" })).catch((error) => logger.error(error));
1804
2273
  };
1805
- for (const delayMs of retrySchedule) {
2274
+ retrySchedule.forEach((delayMs, index) => {
2275
+ const transport = getRepairTransportForAttempt(options.mode, index);
1806
2276
  if (delayMs === 0) {
1807
- void run();
1808
- continue;
2277
+ void run(transport);
2278
+ return;
1809
2279
  }
1810
2280
  const timer = setTimeout(() => {
1811
2281
  this._repairRetryTimers.delete(timer);
1812
2282
  if (this.closed) {
1813
2283
  return;
1814
2284
  }
1815
- void run();
2285
+ void run(transport);
1816
2286
  }, delayMs);
1817
2287
  timer.unref?.();
1818
2288
  this._repairRetryTimers.add(timer);
1819
- }
2289
+ });
1820
2290
  }
1821
2291
  scheduleRepairSweep(options) {
1822
- if (options.forceFreshDelivery) {
1823
- this._repairSweepForceFreshPending = true;
1824
- }
1825
- for (const peer of options.addedPeers) {
1826
- this._repairSweepAddedPeersPending.add(peer);
2292
+ this._repairSweepPendingModes.add(options.mode);
2293
+ const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
2294
+ if (pendingPeers) {
2295
+ for (const peer of options.peers ?? []) {
2296
+ pendingPeers.add(peer);
2297
+ }
1827
2298
  }
1828
2299
  if (!this._repairSweepRunning && !this.closed) {
1829
2300
  this._repairSweepRunning = true;
1830
2301
  void this.runRepairSweep();
1831
2302
  }
1832
2303
  }
2304
+ scheduleJoinAuthoritativeRepair(peers) {
2305
+ if (this.closed || peers.size === 0) {
2306
+ return;
2307
+ }
2308
+ for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
2309
+ let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
2310
+ if (!pendingPeers) {
2311
+ pendingPeers = new Set();
2312
+ this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
2313
+ }
2314
+ for (const peer of peers) {
2315
+ pendingPeers.add(peer);
2316
+ }
2317
+ if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
2318
+ continue;
2319
+ }
2320
+ const timer = setTimeout(() => {
2321
+ this._repairRetryTimers.delete(timer);
2322
+ this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
2323
+ if (this.closed) {
2324
+ return;
2325
+ }
2326
+ const peersForSweep = new Set(this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? []);
2327
+ this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
2328
+ if (peersForSweep.size === 0) {
2329
+ return;
2330
+ }
2331
+ // A joiner's leader view can still be partial on the first delayed pass
2332
+ // under pubsub jitter. Bounded per-peer rescans widen the authoritative
2333
+ // frontier without adding per-append sweeps.
2334
+ this.scheduleRepairSweep({
2335
+ mode: "join-authoritative",
2336
+ peers: peersForSweep,
2337
+ });
2338
+ }, delayMs);
2339
+ timer.unref?.();
2340
+ this._repairRetryTimers.add(timer);
2341
+ this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
2342
+ }
2343
+ }
1833
2344
  async runRepairSweep() {
1834
2345
  try {
1835
2346
  while (!this.closed) {
1836
- const forceFreshDelivery = this._repairSweepForceFreshPending;
1837
- const addedPeers = new Set(this._repairSweepAddedPeersPending);
1838
- this._repairSweepForceFreshPending = false;
1839
- this._repairSweepAddedPeersPending.clear();
1840
- if (!forceFreshDelivery && addedPeers.size === 0) {
2347
+ const pendingModes = new Set(this._repairSweepPendingModes);
2348
+ const pendingPeersByMode = cloneRepairPendingPeersByMode(this._repairSweepPendingPeersByMode);
2349
+ this._repairSweepPendingModes.clear();
2350
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
2351
+ peers.clear();
2352
+ }
2353
+ if (pendingModes.size === 0) {
1841
2354
  return;
1842
2355
  }
1843
- const pendingByTarget = new Map();
1844
- const flushTarget = (target) => {
1845
- const entries = pendingByTarget.get(target);
2356
+ const optimisticGidPeersByMode = new Map();
2357
+ const optimisticGidPeersConsumedByMode = new Map();
2358
+ for (const mode of pendingModes) {
2359
+ const modePeers = pendingPeersByMode.get(mode);
2360
+ if (!modePeers || modePeers.size === 0) {
2361
+ continue;
2362
+ }
2363
+ const optimisticGidPeers = new Map();
2364
+ const optimisticGidPeersConsumed = new Map();
2365
+ for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
2366
+ let matchedPeers;
2367
+ let matchedCounts;
2368
+ for (const [peer, count] of peerCounts) {
2369
+ if (!modePeers.has(peer)) {
2370
+ continue;
2371
+ }
2372
+ matchedPeers ||= new Set();
2373
+ matchedCounts ||= new Map();
2374
+ matchedPeers.add(peer);
2375
+ matchedCounts.set(peer, count);
2376
+ }
2377
+ if (matchedPeers && matchedCounts) {
2378
+ optimisticGidPeers.set(gid, matchedPeers);
2379
+ optimisticGidPeersConsumed.set(gid, matchedCounts);
2380
+ }
2381
+ }
2382
+ if (optimisticGidPeers.size > 0) {
2383
+ optimisticGidPeersByMode.set(mode, optimisticGidPeers);
2384
+ optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
2385
+ }
2386
+ }
2387
+ const pendingByMode = new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
2388
+ const pendingRepairPeers = new Set();
2389
+ for (const peers of pendingPeersByMode.values()) {
2390
+ for (const peer of peers) {
2391
+ pendingRepairPeers.add(peer);
2392
+ }
2393
+ }
2394
+ const fullReplicaRepairCandidates = await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
2395
+ includeSubscribers: false,
2396
+ });
2397
+ const fullReplicaRepairCandidateCount = Math.max(1, fullReplicaRepairCandidates.size);
2398
+ const nextFrontierByMode = new Map([
2399
+ ["join-authoritative", new Map()],
2400
+ ["churn", new Map()],
2401
+ ]);
2402
+ const flushTarget = (mode, target) => {
2403
+ const targets = pendingByMode.get(mode);
2404
+ const entries = targets?.get(target);
1846
2405
  if (!entries || entries.size === 0) {
1847
2406
  return;
1848
2407
  }
1849
- const isJoinWarmupTarget = addedPeers.has(target);
1850
- const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
1851
2408
  this.dispatchMaybeMissingEntries(target, entries, {
1852
- bypassRecentDedupe,
1853
- retryScheduleMs: isJoinWarmupTarget
1854
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
1855
- : undefined,
1856
- forceFreshDelivery,
2409
+ bypassRecentDedupe: true,
2410
+ mode,
1857
2411
  });
1858
- pendingByTarget.delete(target);
2412
+ targets?.delete(target);
1859
2413
  };
1860
- const queueEntryForTarget = (target, entry) => {
1861
- let set = pendingByTarget.get(target);
2414
+ const queueEntryForTarget = (mode, target, entry) => {
2415
+ const sweepTargets = nextFrontierByMode.get(mode);
2416
+ if (sweepTargets) {
2417
+ let sweepSet = sweepTargets.get(target);
2418
+ if (!sweepSet) {
2419
+ sweepSet = new Map();
2420
+ sweepTargets.set(target, sweepSet);
2421
+ }
2422
+ sweepSet.set(entry.hash, entry);
2423
+ }
2424
+ const targets = pendingByMode.get(mode);
2425
+ let set = targets.get(target);
1862
2426
  if (!set) {
1863
2427
  set = new Map();
1864
- pendingByTarget.set(target, set);
2428
+ targets.set(target, set);
1865
2429
  }
1866
2430
  if (set.has(entry.hash)) {
1867
2431
  return;
1868
2432
  }
1869
2433
  set.set(entry.hash, entry);
1870
2434
  if (set.size >= this.repairSweepTargetBufferSize) {
1871
- flushTarget(target);
2435
+ flushTarget(mode, target);
1872
2436
  }
1873
2437
  };
1874
2438
  const iterator = this.entryCoordinatesIndex.iterate({});
@@ -1877,19 +2441,42 @@ let SharedLog = (() => {
1877
2441
  const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
1878
2442
  for (const entry of entries) {
1879
2443
  const entryReplicated = entry.value;
2444
+ const gid = entryReplicated.gid;
2445
+ const knownPeers = this._gidPeersHistory.get(gid);
2446
+ const requestedReplicas = decodeReplicas(entryReplicated).getValue(this);
1880
2447
  const currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
1881
- if (forceFreshDelivery) {
2448
+ if (pendingModes.has("churn")) {
1882
2449
  for (const [currentPeer] of currentPeers) {
1883
2450
  if (currentPeer === this.node.identity.publicKey.hashcode()) {
1884
2451
  continue;
1885
2452
  }
1886
- queueEntryForTarget(currentPeer, entryReplicated);
2453
+ queueEntryForTarget("churn", currentPeer, entryReplicated);
1887
2454
  }
1888
2455
  }
1889
- if (addedPeers.size > 0) {
1890
- for (const peer of addedPeers) {
1891
- if (currentPeers.has(peer)) {
1892
- queueEntryForTarget(peer, entryReplicated);
2456
+ for (const mode of pendingModes) {
2457
+ const modePeers = pendingPeersByMode.get(mode);
2458
+ if (!modePeers || modePeers.size === 0) {
2459
+ continue;
2460
+ }
2461
+ const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
2462
+ for (const peer of modePeers) {
2463
+ if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
2464
+ continue;
2465
+ }
2466
+ const wasOptimisticallyAssigned = optimisticPeers?.has(peer) === true;
2467
+ const isCoveredByFullReplicaRepair = mode === "join-authoritative" &&
2468
+ fullReplicaRepairCandidates.has(peer) &&
2469
+ requestedReplicas >= fullReplicaRepairCandidateCount;
2470
+ const shouldQueue = mode === "join-authoritative"
2471
+ ? currentPeers.has(peer) || isCoveredByFullReplicaRepair
2472
+ : wasOptimisticallyAssigned ||
2473
+ (currentPeers.has(peer) && !knownPeers?.has(peer));
2474
+ if (shouldQueue) {
2475
+ // Authoritative join repair must not trust partial gid peer history,
2476
+ // otherwise a late joiner can get stuck with a partial historical
2477
+ // backfill forever. Once we enter the authoritative pass, queue every
2478
+ // entry whose current leader set still includes the added peer.
2479
+ queueEntryForTarget(mode, peer, entryReplicated);
1893
2480
  }
1894
2481
  }
1895
2482
  }
@@ -1899,8 +2486,64 @@ let SharedLog = (() => {
1899
2486
  finally {
1900
2487
  await iterator.close();
1901
2488
  }
1902
- for (const target of [...pendingByTarget.keys()]) {
1903
- flushTarget(target);
2489
+ for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
2490
+ for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
2491
+ const pendingPeerCounts = this._repairSweepOptimisticGidPeersPending.get(gid);
2492
+ if (!pendingPeerCounts) {
2493
+ continue;
2494
+ }
2495
+ for (const [peer, count] of peerCounts) {
2496
+ const current = pendingPeerCounts.get(peer) || 0;
2497
+ const next = current - count;
2498
+ if (next > 0) {
2499
+ pendingPeerCounts.set(peer, next);
2500
+ }
2501
+ else {
2502
+ pendingPeerCounts.delete(peer);
2503
+ }
2504
+ }
2505
+ if (pendingPeerCounts.size === 0) {
2506
+ this._repairSweepOptimisticGidPeersPending.delete(gid);
2507
+ }
2508
+ }
2509
+ }
2510
+ for (const mode of pendingModes) {
2511
+ if (mode !== "join-authoritative" && mode !== "churn") {
2512
+ continue;
2513
+ }
2514
+ const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
2515
+ const frontierTargets = this._repairFrontierByMode.get(mode);
2516
+ for (const target of pendingPeersByMode.get(mode) ?? []) {
2517
+ const replacement = nextTargets.get(target);
2518
+ if (mode === "join-authoritative") {
2519
+ // Authoritative join repair is receipt-driven: a later sweep can have a
2520
+ // narrower transient leader view, but it must not forget unconfirmed
2521
+ // hashes that were already queued for this joiner.
2522
+ if (replacement && replacement.size > 0) {
2523
+ const existing = frontierTargets?.get(target);
2524
+ if (existing && existing.size > 0) {
2525
+ for (const [hash, entry] of replacement) {
2526
+ existing.set(hash, entry);
2527
+ }
2528
+ }
2529
+ else {
2530
+ frontierTargets?.set(target, replacement);
2531
+ }
2532
+ }
2533
+ continue;
2534
+ }
2535
+ if (replacement && replacement.size > 0) {
2536
+ frontierTargets?.set(target, replacement);
2537
+ }
2538
+ else {
2539
+ frontierTargets?.delete(target);
2540
+ }
2541
+ }
2542
+ }
2543
+ for (const [mode, targets] of pendingByMode) {
2544
+ for (const target of [...targets.keys()]) {
2545
+ flushTarget(mode, target);
2546
+ }
1904
2547
  }
1905
2548
  }
1906
2549
  }
@@ -1911,17 +2554,78 @@ let SharedLog = (() => {
1911
2554
  }
1912
2555
  finally {
1913
2556
  this._repairSweepRunning = false;
1914
- if (!this.closed &&
1915
- (this._repairSweepForceFreshPending ||
1916
- this._repairSweepAddedPeersPending.size > 0)) {
2557
+ if (!this.closed && this._repairSweepPendingModes.size > 0) {
1917
2558
  this._repairSweepRunning = true;
1918
2559
  void this.runRepairSweep();
1919
2560
  }
1920
2561
  }
1921
2562
  }
1922
2563
  async pruneDebouncedFnAddIfNotKeeping(args) {
1923
- if (!this.keep || !(await this.keep(args.value.entry))) {
1924
- return this.pruneDebouncedFn.add(args);
2564
+ if (this.keep && (await this.keep(args.value.entry))) {
2565
+ return false;
2566
+ }
2567
+ void this.pruneDebouncedFn.add(args);
2568
+ return true;
2569
+ }
2570
+ async pruneJoinedEntriesNoLongerLed(entries) {
2571
+ const selfHash = this.node.identity.publicKey.hashcode();
2572
+ for (const entry of entries) {
2573
+ if (this.closed || this._pendingDeletes.has(entry.hash)) {
2574
+ continue;
2575
+ }
2576
+ const leaders = await this.findLeadersFromEntry(entry, decodeReplicas(entry).getValue(this), { roleAge: 0 });
2577
+ if (leaders.has(selfHash)) {
2578
+ this.pruneDebouncedFn.delete(entry.hash);
2579
+ continue;
2580
+ }
2581
+ if (leaders.size === 0) {
2582
+ continue;
2583
+ }
2584
+ await this.pruneDebouncedFnAddIfNotKeeping({
2585
+ key: entry.hash,
2586
+ value: { entry, leaders },
2587
+ });
2588
+ this.responseToPruneDebouncedFn.delete(entry.hash);
2589
+ }
2590
+ }
2591
+ async pruneIndexedEntriesNoLongerLed() {
2592
+ const selfHash = this.node.identity.publicKey.hashcode();
2593
+ const iterator = this.entryCoordinatesIndex.iterate({});
2594
+ let enqueuedPrune = false;
2595
+ try {
2596
+ while (!this.closed && !iterator.done()) {
2597
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2598
+ for (const entry of entries) {
2599
+ const entryReplicated = entry.value;
2600
+ if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
2601
+ continue;
2602
+ }
2603
+ const leaders = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
2604
+ if (leaders.has(selfHash)) {
2605
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
2606
+ await this._pendingDeletes
2607
+ .get(entryReplicated.hash)
2608
+ ?.reject(new Error("Failed to delete, is leader again"));
2609
+ this.removePruneRequestSent(entryReplicated.hash);
2610
+ continue;
2611
+ }
2612
+ if (leaders.size === 0) {
2613
+ continue;
2614
+ }
2615
+ enqueuedPrune =
2616
+ (await this.pruneDebouncedFnAddIfNotKeeping({
2617
+ key: entryReplicated.hash,
2618
+ value: { entry: entryReplicated, leaders },
2619
+ })) || enqueuedPrune;
2620
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
2621
+ }
2622
+ }
2623
+ }
2624
+ finally {
2625
+ await iterator.close();
2626
+ }
2627
+ if (enqueuedPrune && !this.closed) {
2628
+ await this.pruneDebouncedFn.flush();
1925
2629
  }
1926
2630
  }
1927
2631
  clearCheckedPruneRetry(hash) {
@@ -2064,16 +2768,17 @@ let SharedLog = (() => {
2064
2768
  await this._appendDeliverToAllFanout(result.entry);
2065
2769
  }
2066
2770
  else {
2067
- await this._appendDeliverToReplicators(result.entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
2771
+ await this._appendDeliverToReplicators(result.entry, coordinates, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
2068
2772
  }
2069
2773
  }
2070
- if (!isLeader && !this.shouldDelayAdaptiveRebalance()) {
2774
+ const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
2775
+ if (!isLeader && !delayAdaptiveRebalance) {
2071
2776
  this.pruneDebouncedFnAddIfNotKeeping({
2072
2777
  key: result.entry.hash,
2073
2778
  value: { entry: result.entry, leaders },
2074
2779
  });
2075
2780
  }
2076
- if (!this._isAdaptiveReplicating) {
2781
+ if (!delayAdaptiveRebalance) {
2077
2782
  this.rebalanceParticipationDebounced?.call();
2078
2783
  }
2079
2784
  return result;
@@ -2107,8 +2812,18 @@ let SharedLog = (() => {
2107
2812
  this._repairRetryTimers = new Set();
2108
2813
  this._recentRepairDispatch = new Map();
2109
2814
  this._repairSweepRunning = false;
2110
- this._repairSweepForceFreshPending = false;
2111
- this._repairSweepAddedPeersPending = new Set();
2815
+ this._repairSweepPendingModes = new Set();
2816
+ this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
2817
+ this._repairFrontierByMode = createRepairFrontierByMode();
2818
+ this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
2819
+ this._repairSweepOptimisticGidPeersPending = new Map();
2820
+ this._entryKnownPeers = new Map();
2821
+ this._joinAuthoritativeRepairTimersByDelay = new Map();
2822
+ this._joinAuthoritativeRepairPeersByDelay = new Map();
2823
+ this._assumeSyncedRepairSuppressedUntil = 0;
2824
+ this._appendBackfillTimer = undefined;
2825
+ this._appendBackfillPendingByTarget = new Map();
2826
+ this._repairMetrics = createRepairMetrics();
2112
2827
  this._topicSubscribersCache = new Map();
2113
2828
  this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
2114
2829
  this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
@@ -2166,7 +2881,10 @@ let SharedLog = (() => {
2166
2881
  this.keep = options?.keep;
2167
2882
  this.pendingMaturity = new Map();
2168
2883
  const id = sha256Base64Sync(this.log.id);
2169
- const storage = await this.node.storage.sublevel(id);
2884
+ const [storage, logScope] = await Promise.all([
2885
+ this.node.storage.sublevel(id),
2886
+ this.node.indexer.scope(id),
2887
+ ]);
2170
2888
  const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
2171
2889
  const fanoutService = getSharedLogFanoutService(this.node.services);
2172
2890
  const blockProviderNamespace = (cid) => `cid:${cid}`;
@@ -2199,6 +2917,16 @@ let SharedLog = (() => {
2199
2917
  maxPeers: 8,
2200
2918
  })) ?? []);
2201
2919
  },
2920
+ watchProviders: fanoutService
2921
+ ? (cid, opts) => fanoutService.watchProviders(blockProviderNamespace(cid), {
2922
+ signal: opts.signal,
2923
+ want: 8,
2924
+ ttlMs: 10_000,
2925
+ renewIntervalMs: 5_000,
2926
+ bootstrapMaxPeers: 2,
2927
+ onProviders: (providers) => opts.onProviders(providers.map((provider) => provider.hash)),
2928
+ })
2929
+ : undefined,
2202
2930
  onPut: async (cid) => {
2203
2931
  // Best-effort directory announce for "get without remote.from" workflows.
2204
2932
  try {
@@ -2212,16 +2940,18 @@ let SharedLog = (() => {
2212
2940
  }
2213
2941
  },
2214
2942
  });
2215
- await this.remoteBlocks.start();
2216
- const logScope = await this.node.indexer.scope(id);
2217
- const replicationIndex = await logScope.scope("replication");
2943
+ const remoteBlocksStartPromise = this.remoteBlocks.start();
2944
+ const [replicationIndex, logIndex] = await Promise.all([
2945
+ logScope.scope("replication"),
2946
+ logScope.scope("log"),
2947
+ ]);
2218
2948
  this._replicationRangeIndex = await replicationIndex.init({
2219
2949
  schema: this.indexableDomain.constructorRange,
2220
2950
  });
2221
2951
  this._entryCoordinatesIndex = await replicationIndex.init({
2222
2952
  schema: this.indexableDomain.constructorEntry,
2223
2953
  });
2224
- const logIndex = await logScope.scope("log");
2954
+ await remoteBlocksStartPromise;
2225
2955
  const hasIndexedReplicationInfo = (await this.replicationIndex.count({
2226
2956
  query: [
2227
2957
  new StringMatch({
@@ -2349,27 +3079,33 @@ let SharedLog = (() => {
2349
3079
  }
2350
3080
  }
2351
3081
  // Open for communcation
2352
- await this.rpc.open({
2353
- queryType: TransportMessage,
2354
- responseType: TransportMessage,
2355
- responseHandler: (query, context) => this.onMessage(query, context),
2356
- topic: this.topic,
2357
- });
2358
3082
  this._onSubscriptionFn =
2359
3083
  this._onSubscriptionFn || this._onSubscription.bind(this);
2360
- await this.node.services.pubsub.addEventListener("subscribe", this._onSubscriptionFn);
2361
3084
  this._onUnsubscriptionFn =
2362
3085
  this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
2363
- await this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn);
2364
- await this.rpc.subscribe();
2365
- await this._openFanoutChannel(options?.fanout);
2366
- // mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
2367
- await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
3086
+ await Promise.all([
3087
+ this.rpc.open({
3088
+ queryType: TransportMessage,
3089
+ responseType: TransportMessage,
3090
+ responseHandler: (query, context) => this.onMessage(query, context),
3091
+ topic: this.topic,
3092
+ }),
3093
+ this.node.services.pubsub.addEventListener("subscribe", this._onSubscriptionFn),
3094
+ this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn),
3095
+ ]);
3096
+ const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
3097
+ // Mark previously-owned replication ranges as "new" only when they already exist.
3098
+ // Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
3099
+ const updateOwnedReplicationPromise = hasIndexedReplicationInfo
3100
+ ? this.updateTimestampOfOwnedReplicationRanges()
3101
+ : Promise.resolve();
3102
+ await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
2368
3103
  // if we had a previous session with replication info, and new replication info dictates that we unreplicate
2369
3104
  // we should do that. Otherwise if options is a unreplication we dont need to do anything because
2370
3105
  // we are already unreplicated (as we are just opening)
2371
- let isUnreplicationOptionsDefined = isUnreplicationOptions(options?.replicate);
2372
- const canResumeReplication = (await isReplicationOptionsDependentOnPreviousState(options?.replicate, this.replicationIndex, this.node.identity.publicKey)) && hasIndexedReplicationInfo;
3106
+ const isUnreplicationOptionsDefined = isUnreplicationOptions(options?.replicate);
3107
+ const canResumeReplication = hasIndexedReplicationInfo &&
3108
+ (await isReplicationOptionsDependentOnPreviousState(options?.replicate, this.replicationIndex, this.node.identity.publicKey));
2373
3109
  if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
2374
3110
  await this.replicate(options?.replicate, { checkDuplicates: true });
2375
3111
  }
@@ -2412,6 +3148,7 @@ let SharedLog = (() => {
2412
3148
  }
2413
3149
  async afterOpen() {
2414
3150
  await super.afterOpen();
3151
+ const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
2415
3152
  // We do this here, because these calls requires this.closed == false
2416
3153
  void this.pruneOfflineReplicators()
2417
3154
  .then(() => {
@@ -2426,7 +3163,7 @@ let SharedLog = (() => {
2426
3163
  this.startReplicatorLivenessSweep();
2427
3164
  await this.rebalanceParticipation();
2428
3165
  // Take into account existing subscription
2429
- (await this._getTopicSubscribers(this.topic))?.forEach((v) => {
3166
+ (await existingSubscribersPromise)?.forEach((v) => {
2430
3167
  if (v.equals(this.node.identity.publicKey)) {
2431
3168
  return;
2432
3169
  }
@@ -2941,8 +3678,28 @@ let SharedLog = (() => {
2941
3678
  this._repairRetryTimers.clear();
2942
3679
  this._recentRepairDispatch.clear();
2943
3680
  this._repairSweepRunning = false;
2944
- this._repairSweepForceFreshPending = false;
2945
- this._repairSweepAddedPeersPending.clear();
3681
+ this._repairSweepPendingModes.clear();
3682
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
3683
+ peers.clear();
3684
+ }
3685
+ this._repairSweepOptimisticGidPeersPending.clear();
3686
+ this._entryKnownPeers.clear();
3687
+ for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
3688
+ clearTimeout(timer);
3689
+ }
3690
+ this._joinAuthoritativeRepairTimersByDelay.clear();
3691
+ this._joinAuthoritativeRepairPeersByDelay.clear();
3692
+ for (const targets of this._repairFrontierByMode.values()) {
3693
+ targets.clear();
3694
+ }
3695
+ for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
3696
+ targets.clear();
3697
+ }
3698
+ if (this._appendBackfillTimer) {
3699
+ clearTimeout(this._appendBackfillTimer);
3700
+ this._appendBackfillTimer = undefined;
3701
+ }
3702
+ this._appendBackfillPendingByTarget.clear();
2946
3703
  for (const [_k, v] of this._pendingDeletes) {
2947
3704
  v.clear();
2948
3705
  v.promise.resolve(); // TODO or reject?
@@ -3103,6 +3860,7 @@ let SharedLog = (() => {
3103
3860
  logger.trace(`${this.node.identity.publicKey.hashcode()}: Recieved heads: ${heads.length === 1 ? heads[0].entry.hash : "#" + heads.length}, logId: ${this.log.idString}`);
3104
3861
  if (heads) {
3105
3862
  const filteredHeads = [];
3863
+ const confirmedHashes = new Set();
3106
3864
  for (const head of heads) {
3107
3865
  if (!(await this.log.has(head.entry.hash))) {
3108
3866
  head.entry.init({
@@ -3112,8 +3870,18 @@ let SharedLog = (() => {
3112
3870
  });
3113
3871
  filteredHeads.push(head);
3114
3872
  }
3873
+ else {
3874
+ confirmedHashes.add(head.entry.hash);
3875
+ }
3876
+ }
3877
+ const fromIsSelf = context.from.equals(this.node.identity.publicKey);
3878
+ if (!fromIsSelf) {
3879
+ this.markEntriesKnownByPeer(heads.map((head) => head.entry.hash), context.from.hashcode());
3115
3880
  }
3116
3881
  if (filteredHeads.length === 0) {
3882
+ if (confirmedHashes.size > 0 && !fromIsSelf) {
3883
+ await this.sendRepairConfirmation(context.from, confirmedHashes);
3884
+ }
3117
3885
  return;
3118
3886
  }
3119
3887
  const groupedByGid = await groupByGid(filteredHeads);
@@ -3211,7 +3979,12 @@ let SharedLog = (() => {
3211
3979
  return;
3212
3980
  }
3213
3981
  if (toMerge.length > 0) {
3982
+ this.markEntriesKnownByPeer(toMerge.map((entry) => entry.hash), context.from.hashcode());
3214
3983
  await this.log.join(toMerge);
3984
+ for (const merged of toMerge) {
3985
+ confirmedHashes.add(merged.hash);
3986
+ }
3987
+ await this.pruneJoinedEntriesNoLongerLed(toMerge);
3215
3988
  toDelete?.map((x) =>
3216
3989
  // TODO types
3217
3990
  this.pruneDebouncedFnAddIfNotKeeping({
@@ -3250,6 +4023,10 @@ let SharedLog = (() => {
3250
4023
  promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
3251
4024
  }
3252
4025
  await Promise.all(promises);
4026
+ if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
4027
+ this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
4028
+ await this.sendRepairConfirmation(context.from, confirmedHashes);
4029
+ }
3253
4030
  }
3254
4031
  }
3255
4032
  else if (msg instanceof RequestIPrune) {
@@ -3257,6 +4034,7 @@ let SharedLog = (() => {
3257
4034
  const from = context.from.hashcode();
3258
4035
  for (const hash of msg.hashes) {
3259
4036
  this.removePruneRequestSent(hash, from);
4037
+ this.removeEntriesKnownByPeer([hash], from);
3260
4038
  // if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
3261
4039
  // this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
3262
4040
  const outGoingPrunes = this._requestIPruneResponseReplicatorSet.get(hash);
@@ -3344,6 +4122,11 @@ let SharedLog = (() => {
3344
4122
  this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
3345
4123
  }
3346
4124
  }
4125
+ else if (msg instanceof ConfirmEntriesMessage) {
4126
+ this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
4127
+ this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
4128
+ return;
4129
+ }
3347
4130
  else if (await this.syncronizer.onMessage(msg, context)) {
3348
4131
  return; // the syncronizer has handled the message
3349
4132
  }
@@ -3624,6 +4407,17 @@ let SharedLog = (() => {
3624
4407
  let assumeSynced = options?.replicate &&
3625
4408
  typeof options.replicate !== "boolean" &&
3626
4409
  options.replicate.assumeSynced;
4410
+ const seedAssumeSyncedPeerHistory = async (entry) => {
4411
+ if (!assumeSynced) {
4412
+ return;
4413
+ }
4414
+ const minReplicas = decodeReplicas(entry).getValue(this);
4415
+ const leaders = await this.findLeaders(await this.createCoordinates(entry, minReplicas), entry, {
4416
+ roleAge: 0,
4417
+ persist: false,
4418
+ });
4419
+ this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
4420
+ };
3627
4421
  const persistCoordinate = async (entry) => {
3628
4422
  const minReplicas = decodeReplicas(entry).getValue(this);
3629
4423
  const leaders = await this.findLeaders(await this.createCoordinates(entry, minReplicas), entry, { persist: {} });
@@ -3656,6 +4450,16 @@ let SharedLog = (() => {
3656
4450
  await this.log.join(entries, joinOptions);
3657
4451
  if (options?.replicate) {
3658
4452
  let messageToSend = undefined;
4453
+ if (assumeSynced) {
4454
+ // `assumeSynced` is an explicit contract that this join should trust the
4455
+ // supplied history and avoid initiating outbound repair while the local
4456
+ // replication ranges settle.
4457
+ this._assumeSyncedRepairSuppressedUntil =
4458
+ Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
4459
+ for (const entry of entriesToReplicate) {
4460
+ await seedAssumeSyncedPeerHistory(entry);
4461
+ }
4462
+ }
3659
4463
  await this.replicate(entriesToReplicate, {
3660
4464
  rebalance: assumeSynced ? false : true,
3661
4465
  checkDuplicates: true,
@@ -3720,9 +4524,14 @@ let SharedLog = (() => {
3720
4524
  clear();
3721
4525
  // `waitForReplicator()` is typically used as a precondition before join/replicate
3722
4526
  // flows. A replicator can become mature and enqueue a debounced rebalance
3723
- // (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
3724
- // observe a "late" rebalance after the wait resolves.
3725
- await this.replicationChangeDebounceFn?.flush?.();
4527
+ // (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
4528
+ // make membership waits depend on all rebalance work finishing; callers that
4529
+ // need settled distribution already wait for that explicitly.
4530
+ this.replicationChangeDebounceFn?.flush?.().catch((error) => {
4531
+ if (!isNotStartedError(error)) {
4532
+ logger.error(error?.toString?.() ?? String(error));
4533
+ }
4534
+ });
3726
4535
  deferred.resolve();
3727
4536
  };
3728
4537
  const reject = (error) => {
@@ -4114,11 +4923,51 @@ let SharedLog = (() => {
4114
4923
  }
4115
4924
  }
4116
4925
  }
4926
+ if (!options?.candidates) {
4927
+ const fullReplicaLeaders = await this.findFullReplicaLeaders(cursors.length, roleAge, peerFilter);
4928
+ if (fullReplicaLeaders) {
4929
+ return fullReplicaLeaders;
4930
+ }
4931
+ }
4117
4932
  return getSamples(cursors, this.replicationIndex, roleAge, this.indexableDomain.numbers, {
4118
4933
  peerFilter,
4119
4934
  uniqueReplicators: peerFilter,
4120
4935
  });
4121
4936
  }
4937
+ async findFullReplicaLeaders(replicas, roleAge, peerFilter) {
4938
+ const now = Date.now();
4939
+ const leaders = new Map();
4940
+ const includeStrict = this._logProperties?.strictFullReplicaFallback !== false;
4941
+ const iterator = this.replicationIndex.iterate({}, { shape: { hash: true, timestamp: true, mode: true } });
4942
+ try {
4943
+ for (;;) {
4944
+ const batch = await iterator.next(64);
4945
+ if (batch.length === 0) {
4946
+ break;
4947
+ }
4948
+ for (const result of batch) {
4949
+ const range = result.value;
4950
+ if (peerFilter && !peerFilter.has(range.hash)) {
4951
+ continue;
4952
+ }
4953
+ if (!isMatured(range, now, roleAge)) {
4954
+ continue;
4955
+ }
4956
+ if (range.mode === ReplicationIntent.Strict && !includeStrict) {
4957
+ continue;
4958
+ }
4959
+ leaders.set(range.hash, { intersecting: true });
4960
+ if (leaders.size > replicas) {
4961
+ return undefined;
4962
+ }
4963
+ }
4964
+ }
4965
+ }
4966
+ finally {
4967
+ await iterator.close();
4968
+ }
4969
+ return leaders.size > 0 ? leaders : undefined;
4970
+ }
4122
4971
  async findLeadersFromEntry(entry, replicas, options) {
4123
4972
  const coordinates = await this.createCoordinates(entry, replicas);
4124
4973
  const result = await this._findLeaders(coordinates, options);
@@ -4586,13 +5435,25 @@ let SharedLog = (() => {
4586
5435
  }
4587
5436
  const changed = false;
4588
5437
  const addedPeers = new Set();
5438
+ const authoritativeRepairPeers = new Set();
4589
5439
  const warmupPeers = new Set();
5440
+ const churnRepairPeers = new Set();
4590
5441
  const hasSelfWarmupChange = changes.some((change) => change.range.hash === selfHash &&
4591
5442
  (change.type === "added" || change.type === "replaced"));
5443
+ const hasSelfRangeRemoval = changes.some((change) => change.range.hash === selfHash &&
5444
+ (change.type === "removed" || change.type === "replaced"));
4592
5445
  for (const change of changes) {
5446
+ if (change.range.hash !== selfHash &&
5447
+ (change.type === "removed" || change.type === "replaced")) {
5448
+ this.removePeerFromEntryKnownPeers(change.range.hash);
5449
+ }
4593
5450
  if (change.type === "added" || change.type === "replaced") {
4594
5451
  const hash = change.range.hash;
4595
5452
  if (hash !== selfHash) {
5453
+ // Existing peers can widen/shift ranges after the initial join. If we
5454
+ // only rescan on first-seen "added", late authoritative range updates can
5455
+ // leave historical backfill permanently partial under load.
5456
+ authoritativeRepairPeers.add(hash);
4596
5457
  // Range updates can reassign entries to an existing peer shortly after it
4597
5458
  // already received a subset. Avoid suppressing legitimate follow-up repair.
4598
5459
  this._recentRepairDispatch.delete(hash);
@@ -4624,17 +5485,24 @@ let SharedLog = (() => {
4624
5485
  return;
4625
5486
  }
4626
5487
  const isWarmupTarget = warmupPeers.has(target);
4627
- const bypassRecentDedupe = isWarmupTarget || forceFreshDelivery;
5488
+ const mode = forceFreshDelivery
5489
+ ? "churn"
5490
+ : isWarmupTarget
5491
+ ? "join-warmup"
5492
+ : "join-authoritative";
4628
5493
  this.dispatchMaybeMissingEntries(target, entries, {
4629
- bypassRecentDedupe,
4630
- retryScheduleMs: isWarmupTarget
5494
+ bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
5495
+ mode,
5496
+ retryScheduleMs: mode === "join-warmup"
4631
5497
  ? JOIN_WARMUP_RETRY_SCHEDULE_MS
4632
- : undefined,
4633
- forceFreshDelivery,
5498
+ : mode === "join-authoritative"
5499
+ ? [0]
5500
+ : undefined,
4634
5501
  });
4635
5502
  uncheckedDeliver.delete(target);
4636
5503
  };
4637
5504
  const queueUncheckedDeliver = (target, entry) => {
5505
+ churnRepairPeers.add(target);
4638
5506
  let set = uncheckedDeliver.get(target);
4639
5507
  if (!set) {
4640
5508
  set = new Map();
@@ -4688,7 +5556,14 @@ let SharedLog = (() => {
4688
5556
  }
4689
5557
  }
4690
5558
  }
4691
- this.addPeersToGidPeerHistory(entryReplicated.gid, currentPeers.keys(), true);
5559
+ for (const [peer] of currentPeers) {
5560
+ if (warmupPeers.has(peer)) {
5561
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
5562
+ }
5563
+ }
5564
+ const authoritativePeers = [...currentPeers.keys()].filter((peer) => !warmupPeers.has(peer) &&
5565
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer));
5566
+ this.addPeersToGidPeerHistory(entryReplicated.gid, authoritativePeers, true);
4692
5567
  if (!currentPeers.has(selfHash)) {
4693
5568
  this.pruneDebouncedFnAddIfNotKeeping({
4694
5569
  key: entryReplicated.hash,
@@ -4736,7 +5611,14 @@ let SharedLog = (() => {
4736
5611
  }
4737
5612
  }
4738
5613
  }
4739
- this.addPeersToGidPeerHistory(entryReplicated.gid, currentPeers.keys(), true);
5614
+ for (const [peer] of currentPeers) {
5615
+ if (addedPeers.has(peer)) {
5616
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
5617
+ }
5618
+ }
5619
+ const authoritativePeers = [...currentPeers.keys()].filter((peer) => !addedPeers.has(peer) &&
5620
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer));
5621
+ this.addPeersToGidPeerHistory(entryReplicated.gid, authoritativePeers, true);
4740
5622
  if (!isLeader) {
4741
5623
  this.pruneDebouncedFnAddIfNotKeeping({
4742
5624
  key: entryReplicated.hash,
@@ -4753,9 +5635,17 @@ let SharedLog = (() => {
4753
5635
  }
4754
5636
  }
4755
5637
  }
5638
+ if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
5639
+ await this.pruneIndexedEntriesNoLongerLed();
5640
+ }
4756
5641
  if (forceFreshDelivery) {
4757
- // Removed/shrunk ranges still need the authoritative background pass.
4758
- this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
5642
+ // Pure leave/shrink churn can have zero `addedPeers`, but the peers that
5643
+ // received redistributed entries still need a follow-up repair pass if the
5644
+ // immediate maybe-sync misses one entry.
5645
+ this.scheduleRepairSweep({
5646
+ mode: "churn",
5647
+ peers: churnRepairPeers,
5648
+ });
4759
5649
  }
4760
5650
  else if (useJoinWarmupFastPath) {
4761
5651
  // Pure join warmup uses the cheap immediate maybe-missing dispatch above,
@@ -4768,19 +5658,22 @@ let SharedLog = (() => {
4768
5658
  return;
4769
5659
  }
4770
5660
  this.scheduleRepairSweep({
4771
- forceFreshDelivery: false,
4772
- addedPeers: peers,
5661
+ mode: "join-warmup",
5662
+ peers,
4773
5663
  });
4774
5664
  }, 250);
4775
5665
  timer.unref?.();
4776
5666
  this._repairRetryTimers.add(timer);
4777
5667
  }
4778
- else if (addedPeers.size > 0) {
5668
+ else if (authoritativeRepairPeers.size > 0) {
4779
5669
  this.scheduleRepairSweep({
4780
- forceFreshDelivery: false,
4781
- addedPeers,
5670
+ mode: "join-authoritative",
5671
+ peers: authoritativeRepairPeers,
4782
5672
  });
4783
5673
  }
5674
+ if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
5675
+ this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
5676
+ }
4784
5677
  for (const target of [...uncheckedDeliver.keys()]) {
4785
5678
  flushUncheckedDeliverTarget(target);
4786
5679
  }
@@ -4852,6 +5745,10 @@ let SharedLog = (() => {
4852
5745
  if (!dynamicRange) {
4853
5746
  return; // not allowed to replicate
4854
5747
  }
5748
+ if (this.replicationController.maxMemoryLimit != null &&
5749
+ usedMemory > this.replicationController.maxMemoryLimit) {
5750
+ await this.pruneIndexedEntriesNoLongerLed();
5751
+ }
4855
5752
  const peersSize = (await peers.getSize()) || 1;
4856
5753
  const totalParticipation = await this.calculateTotalParticipation();
4857
5754
  const newFactor = this.replicationController.step({