@peerbit/shared-log 13.1.0 → 13.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/index.js CHANGED
@@ -54,7 +54,7 @@ import { CPUUsageIntervalLag } from "./cpu.js";
54
54
  import { debouncedAccumulatorMap, } from "./debounce.js";
55
55
  import { NoPeersError } from "./errors.js";
56
56
  const getSharedLogFanoutService = (services) => services.fanout;
57
- import { EntryWithRefs, ExchangeHeadsMessage, RequestIPrune, ResponseIPrune, createExchangeHeadsMessages, } from "./exchange-heads.js";
57
+ import { EXCHANGE_HEADS_REPAIR_HINT, EntryWithRefs, ExchangeHeadsMessage, RequestIPrune, ResponseIPrune, createExchangeHeadsMessages, } from "./exchange-heads.js";
58
58
  import { FanoutEnvelope } from "./fanout-envelope.js";
59
59
  import { MAX_U32, MAX_U64, bytesToNumber, createNumbers, denormalizer, } from "./integers.js";
60
60
  import { TransportMessage } from "./message.js";
@@ -66,7 +66,7 @@ import {} from "./replication-domain.js";
66
66
  import { AbsoluteReplicas, AddedReplicationSegmentMessage, AllReplicatingSegmentsMessage, MinReplicas, ReplicationPingMessage, ReplicationError, RequestReplicationInfoMessage, ResponseRoleMessage, StoppedReplicating, decodeReplicas, encodeReplicas, maxReplicas, } from "./replication.js";
67
67
  import { Observer, Replicator } from "./role.js";
68
68
  import { RatelessIBLTSynchronizer } from "./sync/rateless-iblt.js";
69
- import { SimpleSyncronizer } from "./sync/simple.js";
69
+ import { ConfirmEntriesMessage, SimpleSyncronizer } from "./sync/simple.js";
70
70
  import { groupByGid } from "./utils.js";
71
71
  const toLocalPublicSignKey = (key) => {
72
72
  if (typeof key === "string") {
@@ -263,10 +263,95 @@ const REPLICATOR_LIVENESS_PROBE_FAILURES_TO_EVICT = 2;
263
263
  // Churn/join repair can race with pruning and transient missed sync requests under
264
264
  // heavy event-loop load. Keep retries alive with a longer tail so reassigned
265
265
  // entries are retried after short bursts and slower recovery windows.
266
- const FORCE_FRESH_RETRY_SCHEDULE_MS = [
266
+ const CHURN_REPAIR_RETRY_SCHEDULE_MS = [
267
267
  0, 1_000, 3_000, 7_000, 15_000, 30_000, 45_000,
268
268
  ];
269
- const JOIN_WARMUP_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000, 15_000];
269
+ const JOIN_WARMUP_RETRY_SCHEDULE_MS = [
270
+ 0,
271
+ 1_000,
272
+ 3_000,
273
+ 7_000,
274
+ 15_000,
275
+ 30_000,
276
+ 60_000,
277
+ ];
278
+ const JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS = [
279
+ 0,
280
+ 1_000,
281
+ 3_000,
282
+ 7_000,
283
+ 15_000,
284
+ 30_000,
285
+ 60_000,
286
+ ];
287
+ const APPEND_BACKFILL_RETRY_SCHEDULE_MS = [0, 1_000, 3_000, 7_000];
288
+ const JOIN_AUTHORITATIVE_REPAIR_DELAY_MS = 2_000;
289
+ const JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS = [
290
+ JOIN_AUTHORITATIVE_REPAIR_DELAY_MS,
291
+ 7_000,
292
+ 15_000,
293
+ 30_000,
294
+ ];
295
+ const APPEND_BACKFILL_DELAY_MS = 500;
296
+ const ASSUME_SYNCED_REPAIR_SUPPRESSION_MS = 5_000;
297
+ const REPAIR_CONFIRMATION_HASH_BATCH_SIZE = 1_024;
298
+ const REPAIR_DISPATCH_MODES = [
299
+ "join-warmup",
300
+ "join-authoritative",
301
+ "append-backfill",
302
+ "churn",
303
+ ];
304
+ const createRepairMetricBucket = () => ({
305
+ dispatches: 0,
306
+ entries: 0,
307
+ ratelessFirstPasses: 0,
308
+ simpleFallbackPasses: 0,
309
+ });
310
+ const createRepairMetrics = () => ({
311
+ "join-warmup": createRepairMetricBucket(),
312
+ "join-authoritative": createRepairMetricBucket(),
313
+ "append-backfill": createRepairMetricBucket(),
314
+ churn: createRepairMetricBucket(),
315
+ });
316
+ const createRepairPendingPeersByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]));
317
+ const cloneRepairPendingPeersByMode = (pending) => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set(pending.get(mode) ?? [])]));
318
+ const createRepairFrontierByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
319
+ const createRepairActiveTargetsByMode = () => new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Set()]));
320
+ const getRepairRetrySchedule = (mode) => {
321
+ switch (mode) {
322
+ case "join-warmup":
323
+ return JOIN_WARMUP_RETRY_SCHEDULE_MS;
324
+ case "join-authoritative":
325
+ return JOIN_AUTHORITATIVE_RETRY_SCHEDULE_MS;
326
+ case "append-backfill":
327
+ return APPEND_BACKFILL_RETRY_SCHEDULE_MS;
328
+ case "churn":
329
+ return CHURN_REPAIR_RETRY_SCHEDULE_MS;
330
+ }
331
+ };
332
+ const resolveRepairRetrySchedule = (mode, override, trackedFrontier = false) => {
333
+ const fallback = getRepairRetrySchedule(mode);
334
+ if (!override || override.length === 0) {
335
+ return fallback;
336
+ }
337
+ if (trackedFrontier &&
338
+ override.length === 1 &&
339
+ override[0] === 0 &&
340
+ fallback.length > 1) {
341
+ // A tracked frontier with only an immediate retry would otherwise stay on
342
+ // attempt 0 forever, which means rateless-only retries and no sparse-tail
343
+ // simple fallback. Keep the immediate seed, then continue with the normal
344
+ // tracked repair schedule.
345
+ return [0, ...fallback.slice(1)];
346
+ }
347
+ return override;
348
+ };
349
+ const getRepairTransportForAttempt = (mode, attemptIndex) => {
350
+ if (mode === "churn") {
351
+ return "simple";
352
+ }
353
+ return attemptIndex === 0 ? "rateless" : "simple";
354
+ };
270
355
  const toPositiveInteger = (value, fallback, label) => {
271
356
  if (value == null) {
272
357
  return fallback;
@@ -375,8 +460,18 @@ let SharedLog = (() => {
375
460
  _repairRetryTimers;
376
461
  _recentRepairDispatch;
377
462
  _repairSweepRunning;
378
- _repairSweepForceFreshPending;
379
- _repairSweepAddedPeersPending;
463
+ _repairSweepPendingModes;
464
+ _repairSweepPendingPeersByMode;
465
+ _repairFrontierByMode;
466
+ _repairFrontierActiveTargetsByMode;
467
+ _repairSweepOptimisticGidPeersPending;
468
+ _entryKnownPeers;
469
+ _joinAuthoritativeRepairTimersByDelay;
470
+ _joinAuthoritativeRepairPeersByDelay;
471
+ _assumeSyncedRepairSuppressedUntil;
472
+ _appendBackfillTimer;
473
+ _appendBackfillPendingByTarget;
474
+ _repairMetrics;
380
475
  _topicSubscribersCache;
381
476
  // regular distribution checks
382
477
  distributeQueue;
@@ -716,7 +811,7 @@ let SharedLog = (() => {
716
811
  }),
717
812
  });
718
813
  }
719
- async _appendDeliverToReplicators(entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
814
+ async _appendDeliverToReplicators(entry, coordinates, minReplicasValue, leaders, selfHash, isLeader, deliveryArg) {
720
815
  const { delivery, reliability, requireRecipients, minAcks, wrap } = this._parseDeliveryOptions(deliveryArg);
721
816
  const pending = [];
722
817
  const track = (promise) => {
@@ -725,10 +820,32 @@ let SharedLog = (() => {
725
820
  const fanoutUnicastOptions = delivery?.timeout != null || delivery?.signal != null
726
821
  ? { timeoutMs: delivery.timeout, signal: delivery.signal }
727
822
  : undefined;
823
+ const fullReplicaDeliveryCandidates = await this.getFullReplicaRepairCandidates(undefined, {
824
+ includeSubscribers: false,
825
+ });
826
+ if (minReplicasValue >= Math.max(1, fullReplicaDeliveryCandidates.size)) {
827
+ for (const peer of fullReplicaDeliveryCandidates) {
828
+ if (!leaders.has(peer)) {
829
+ leaders.set(peer, { intersecting: true });
830
+ }
831
+ }
832
+ }
833
+ const entryReplicatedForRepair = this.createEntryReplicatedForRepair({
834
+ entry,
835
+ coordinates,
836
+ leaders: leaders,
837
+ replicas: minReplicasValue,
838
+ });
728
839
  for await (const message of createExchangeHeadsMessages(this.log, [entry])) {
729
840
  await this._mergeLeadersFromGidReferences(message, minReplicasValue, leaders);
730
- const leadersForDelivery = delivery ? new Set(leaders.keys()) : undefined;
731
- const set = this.addPeersToGidPeerHistory(entry.meta.gid, leaders.keys());
841
+ const authoritativeRecipients = new Set(leaders.keys());
842
+ const leadersForDelivery = delivery
843
+ ? new Set(authoritativeRecipients)
844
+ : undefined;
845
+ // Outbound append delivery only tells us who we intend to send to, not who has
846
+ // actually stored the entry. Keep this recipient set local so later repair
847
+ // sweeps can still backfill peers that missed the initial delivery.
848
+ const set = new Set(leaders.keys());
732
849
  let hasRemotePeers = set.has(selfHash) ? set.size > 1 : set.size > 0;
733
850
  const allowSubscriberFallback = this.syncronizer instanceof SimpleSyncronizer ||
734
851
  (this.compatibility ?? Number.MAX_VALUE) < 10;
@@ -758,6 +875,17 @@ let SharedLog = (() => {
758
875
  continue;
759
876
  }
760
877
  if (!delivery) {
878
+ for (const peer of authoritativeRecipients) {
879
+ if (peer === selfHash) {
880
+ continue;
881
+ }
882
+ // Default live append delivery is still optimistic. If one remote misses
883
+ // the initial heads exchange and the caller did not opt into explicit
884
+ // delivery acks, we still need a targeted backfill source of truth for the
885
+ // authoritative recipients or one entry can get stuck at 2/3 replicas
886
+ // forever. Best-effort fallback subscribers are not repair-worthy.
887
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
888
+ }
761
889
  this.rpc
762
890
  .send(message, {
763
891
  mode: isLeader
@@ -785,12 +913,16 @@ let SharedLog = (() => {
785
913
  }
786
914
  const ackTo = [];
787
915
  let silentTo;
916
+ const repairTargets = new Set();
788
917
  // Default delivery semantics: require enough remote ACKs to reach the requested
789
918
  // replication degree (local append counts as 1).
790
919
  const defaultMinAcks = Math.max(0, minReplicasValue - 1);
791
920
  const ackLimitRaw = reliability === "ack" ? (minAcks ?? defaultMinAcks) : 0;
792
921
  const ackLimit = Math.max(0, Math.min(Math.floor(ackLimitRaw), orderedRemoteRecipients.length));
793
922
  for (const peer of orderedRemoteRecipients) {
923
+ if (authoritativeRecipients.has(peer)) {
924
+ repairTargets.add(peer);
925
+ }
794
926
  if (ackTo.length < ackLimit) {
795
927
  ackTo.push(peer);
796
928
  }
@@ -825,6 +957,12 @@ let SharedLog = (() => {
825
957
  })
826
958
  .catch((error) => logger.error(error));
827
959
  }
960
+ for (const peer of repairTargets) {
961
+ // Direct append delivery is intentionally optimistic. Queue one delayed,
962
+ // batched maybe-sync pass for the intended recipients so stable 3-peer
963
+ // append workloads do not depend on perfect first-try delivery ordering.
964
+ this.queueAppendBackfill(peer, entryReplicatedForRepair);
965
+ }
828
966
  }
829
967
  if (pending.length > 0) {
830
968
  await Promise.all(pending);
@@ -1376,6 +1514,7 @@ let SharedLog = (() => {
1376
1514
  // Keep local sync/prune state consistent even when a peer disappears
1377
1515
  // through replication-info updates without a topic unsubscribe event.
1378
1516
  this.removePeerFromGidPeerHistory(keyHash);
1517
+ this.removeRepairFrontierTarget(keyHash);
1379
1518
  this._recentRepairDispatch.delete(keyHash);
1380
1519
  if (!isMe) {
1381
1520
  this.syncronizer.onPeerDisconnected(keyHash);
@@ -1726,6 +1865,7 @@ let SharedLog = (() => {
1726
1865
  for (const key of this._gidPeersHistory.keys()) {
1727
1866
  this.removePeerFromGidPeerHistory(publicKeyHash, key);
1728
1867
  }
1868
+ this.removePeerFromEntryKnownPeers(publicKeyHash);
1729
1869
  }
1730
1870
  }
1731
1871
  addPeersToGidPeerHistory(gid, publicKeys, reset) {
@@ -1744,10 +1884,344 @@ let SharedLog = (() => {
1744
1884
  }
1745
1885
  return set;
1746
1886
  }
1887
+ markEntriesKnownByPeer(hashes, peer) {
1888
+ for (const hash of hashes) {
1889
+ let peers = this._entryKnownPeers.get(hash);
1890
+ if (!peers) {
1891
+ peers = new Set();
1892
+ this._entryKnownPeers.set(hash, peers);
1893
+ }
1894
+ peers.add(peer);
1895
+ }
1896
+ }
1897
+ removeEntriesKnownByPeer(hashes, peer) {
1898
+ for (const hash of hashes) {
1899
+ const peers = this._entryKnownPeers.get(hash);
1900
+ if (!peers) {
1901
+ continue;
1902
+ }
1903
+ peers.delete(peer);
1904
+ if (peers.size === 0) {
1905
+ this._entryKnownPeers.delete(hash);
1906
+ }
1907
+ }
1908
+ }
1909
+ removePeerFromEntryKnownPeers(peer) {
1910
+ for (const [hash, peers] of this._entryKnownPeers) {
1911
+ peers.delete(peer);
1912
+ if (peers.size === 0) {
1913
+ this._entryKnownPeers.delete(hash);
1914
+ }
1915
+ }
1916
+ }
1917
+ isEntryKnownByPeer(hash, peer) {
1918
+ return this._entryKnownPeers.get(hash)?.has(peer) === true;
1919
+ }
1920
+ markRepairSweepOptimisticPeer(gid, peer) {
1921
+ let peers = this._repairSweepOptimisticGidPeersPending.get(gid);
1922
+ if (!peers) {
1923
+ peers = new Map();
1924
+ this._repairSweepOptimisticGidPeersPending.set(gid, peers);
1925
+ }
1926
+ peers.set(peer, (peers.get(peer) || 0) + 1);
1927
+ }
1928
+ hasPendingRepairSweepOptimisticPeer(gid, peer) {
1929
+ return (this._repairSweepOptimisticGidPeersPending.get(gid)?.get(peer) || 0) > 0;
1930
+ }
1931
+ createEntryReplicatedForRepair(properties) {
1932
+ const assignedToRangeBoundary = shouldAssignToRangeBoundary(properties.leaders, properties.replicas);
1933
+ const cidObject = cidifyString(properties.entry.hash);
1934
+ const hashNumber = this.indexableDomain.numbers.bytesToNumber(cidObject.multihash.digest);
1935
+ return new this.indexableDomain.constructorEntry({
1936
+ assignedToRangeBoundary,
1937
+ coordinates: properties.coordinates,
1938
+ meta: properties.entry.meta,
1939
+ hash: properties.entry.hash,
1940
+ hashNumber,
1941
+ });
1942
+ }
1943
+ isAssumeSyncedRepairSuppressed() {
1944
+ return this._assumeSyncedRepairSuppressedUntil > Date.now();
1945
+ }
1946
+ isFrontierTrackedRepairMode(mode) {
1947
+ return mode !== "join-warmup";
1948
+ }
1949
+ async sleepTracked(delayMs) {
1950
+ if (delayMs <= 0) {
1951
+ return;
1952
+ }
1953
+ await new Promise((resolve) => {
1954
+ const timer = setTimeout(() => {
1955
+ this._repairRetryTimers.delete(timer);
1956
+ resolve();
1957
+ }, delayMs);
1958
+ timer.unref?.();
1959
+ this._repairRetryTimers.add(timer);
1960
+ });
1961
+ }
1962
+ queueRepairFrontierEntries(mode, target, entries) {
1963
+ let targets = this._repairFrontierByMode.get(mode);
1964
+ if (!targets) {
1965
+ targets = new Map();
1966
+ this._repairFrontierByMode.set(mode, targets);
1967
+ }
1968
+ let pending = targets.get(target);
1969
+ if (!pending) {
1970
+ pending = new Map();
1971
+ targets.set(target, pending);
1972
+ }
1973
+ for (const [hash, entry] of entries) {
1974
+ pending.set(hash, entry);
1975
+ }
1976
+ }
1977
+ clearRepairFrontierHashes(target, hashes) {
1978
+ const hashList = [...hashes];
1979
+ if (hashList.length === 0) {
1980
+ return;
1981
+ }
1982
+ for (const mode of REPAIR_DISPATCH_MODES) {
1983
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
1984
+ if (!pending) {
1985
+ continue;
1986
+ }
1987
+ for (const hash of hashList) {
1988
+ pending.delete(hash);
1989
+ }
1990
+ if (pending.size === 0) {
1991
+ this._repairFrontierByMode.get(mode)?.delete(target);
1992
+ }
1993
+ }
1994
+ }
1995
+ async getFullReplicaRepairCandidates(extraPeers, options) {
1996
+ const candidates = new Set([
1997
+ this.node.identity.publicKey.hashcode(),
1998
+ ]);
1999
+ try {
2000
+ for (const peer of await this.getReplicators()) {
2001
+ candidates.add(peer);
2002
+ }
2003
+ }
2004
+ catch {
2005
+ for (const peer of this.uniqueReplicators) {
2006
+ candidates.add(peer);
2007
+ }
2008
+ }
2009
+ for (const peer of extraPeers ?? []) {
2010
+ candidates.add(peer);
2011
+ }
2012
+ if (options?.includeSubscribers !== false) {
2013
+ try {
2014
+ for (const subscriber of (await this._getTopicSubscribers(this.topic)) ?? []) {
2015
+ candidates.add(subscriber.hashcode());
2016
+ }
2017
+ }
2018
+ catch {
2019
+ // Best-effort only; explicit repair peers still keep the path safe.
2020
+ }
2021
+ }
2022
+ return candidates;
2023
+ }
2024
+ removeRepairFrontierTarget(target) {
2025
+ for (const mode of REPAIR_DISPATCH_MODES) {
2026
+ this._repairFrontierByMode.get(mode)?.delete(target);
2027
+ this._repairFrontierActiveTargetsByMode.get(mode)?.delete(target);
2028
+ }
2029
+ }
2030
+ async sendRepairConfirmation(target, hashes) {
2031
+ const uniqueHashes = [...new Set(hashes)];
2032
+ for (let i = 0; i < uniqueHashes.length; i += REPAIR_CONFIRMATION_HASH_BATCH_SIZE) {
2033
+ const chunk = uniqueHashes.slice(i, i + REPAIR_CONFIRMATION_HASH_BATCH_SIZE);
2034
+ await this.rpc.send(new ConfirmEntriesMessage({ hashes: chunk }), {
2035
+ priority: 1,
2036
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2037
+ });
2038
+ }
2039
+ }
2040
+ async pushRepairEntries(target, entries) {
2041
+ for await (const message of createExchangeHeadsMessages(this.log, [...entries.keys()])) {
2042
+ message.reserved[0] |= EXCHANGE_HEADS_REPAIR_HINT;
2043
+ await this.rpc.send(message, {
2044
+ priority: 1,
2045
+ mode: new SilentDelivery({ to: [target], redundancy: 1 }),
2046
+ });
2047
+ }
2048
+ }
2049
+ async sendRepairEntriesWithTransport(target, entries, transport, options) {
2050
+ const unknownEntries = new Map();
2051
+ const knownHashes = [];
2052
+ for (const [hash, entry] of entries) {
2053
+ if (options?.bypassKnownPeers || !this.isEntryKnownByPeer(hash, target)) {
2054
+ unknownEntries.set(hash, entry);
2055
+ }
2056
+ else {
2057
+ knownHashes.push(hash);
2058
+ }
2059
+ }
2060
+ this.clearRepairFrontierHashes(target, knownHashes);
2061
+ if (unknownEntries.size === 0) {
2062
+ return;
2063
+ }
2064
+ if (transport === "simple") {
2065
+ // Fallback repair should not depend on the target completing the
2066
+ // RequestMaybeSync -> ResponseMaybeSync round trip.
2067
+ await this.pushRepairEntries(target, unknownEntries);
2068
+ return;
2069
+ }
2070
+ await this.syncronizer.onMaybeMissingEntries({
2071
+ entries: unknownEntries,
2072
+ targets: [target],
2073
+ });
2074
+ }
2075
+ async sendMaybeMissingEntriesNow(target, entries, options) {
2076
+ if (entries.size === 0) {
2077
+ return;
2078
+ }
2079
+ const now = Date.now();
2080
+ let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
2081
+ if (!recentlyDispatchedByHash) {
2082
+ recentlyDispatchedByHash = new Map();
2083
+ this._recentRepairDispatch.set(target, recentlyDispatchedByHash);
2084
+ }
2085
+ for (const [hash, ts] of recentlyDispatchedByHash) {
2086
+ if (now - ts > RECENT_REPAIR_DISPATCH_TTL_MS) {
2087
+ recentlyDispatchedByHash.delete(hash);
2088
+ }
2089
+ }
2090
+ const filteredEntries = options.bypassRecentDedupe === true
2091
+ ? new Map(entries)
2092
+ : new Map();
2093
+ if (options.bypassRecentDedupe !== true) {
2094
+ for (const [hash, entry] of entries) {
2095
+ const prev = recentlyDispatchedByHash.get(hash);
2096
+ if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
2097
+ continue;
2098
+ }
2099
+ recentlyDispatchedByHash.set(hash, now);
2100
+ filteredEntries.set(hash, entry);
2101
+ }
2102
+ }
2103
+ else {
2104
+ for (const hash of entries.keys()) {
2105
+ recentlyDispatchedByHash.set(hash, now);
2106
+ }
2107
+ }
2108
+ if (filteredEntries.size === 0) {
2109
+ return;
2110
+ }
2111
+ const bucket = this._repairMetrics[options.mode];
2112
+ bucket.dispatches += 1;
2113
+ bucket.entries += filteredEntries.size;
2114
+ if (options.transport === "simple") {
2115
+ bucket.simpleFallbackPasses += 1;
2116
+ }
2117
+ else {
2118
+ bucket.ratelessFirstPasses += 1;
2119
+ }
2120
+ await Promise.resolve(this.sendRepairEntriesWithTransport(target, filteredEntries, options.transport, { bypassKnownPeers: options.mode === "churn" })).catch((error) => logger.error(error));
2121
+ }
2122
+ ensureRepairFrontierRunner(mode, target, retryScheduleMs) {
2123
+ const activeTargets = this._repairFrontierActiveTargetsByMode.get(mode);
2124
+ if (!activeTargets || activeTargets.has(target) || this.closed) {
2125
+ return;
2126
+ }
2127
+ activeTargets.add(target);
2128
+ const retrySchedule = resolveRepairRetrySchedule(mode, retryScheduleMs, this.isFrontierTrackedRepairMode(mode));
2129
+ const steadyStateDelay = retrySchedule.length > 1
2130
+ ? Math.max(1, retrySchedule[retrySchedule.length - 1] - retrySchedule[retrySchedule.length - 2])
2131
+ : Math.max(retrySchedule[0] || 1_000, 1_000);
2132
+ void (async () => {
2133
+ let attemptIndex = 0;
2134
+ try {
2135
+ for (;;) {
2136
+ if (this.closed) {
2137
+ return;
2138
+ }
2139
+ const pending = this._repairFrontierByMode.get(mode)?.get(target);
2140
+ if (!pending || pending.size === 0) {
2141
+ return;
2142
+ }
2143
+ if ((mode === "join-warmup" || mode === "join-authoritative") &&
2144
+ this.isAssumeSyncedRepairSuppressed()) {
2145
+ await this.sleepTracked(Math.max(250, this._assumeSyncedRepairSuppressedUntil - Date.now()));
2146
+ continue;
2147
+ }
2148
+ await this.sendMaybeMissingEntriesNow(target, pending, {
2149
+ mode,
2150
+ transport: getRepairTransportForAttempt(mode, attemptIndex),
2151
+ bypassRecentDedupe: true,
2152
+ });
2153
+ const remaining = this._repairFrontierByMode.get(mode)?.get(target);
2154
+ if (!remaining || remaining.size === 0) {
2155
+ return;
2156
+ }
2157
+ const waitMs = attemptIndex + 1 < retrySchedule.length
2158
+ ? Math.max(0, retrySchedule[attemptIndex + 1] - retrySchedule[attemptIndex])
2159
+ : steadyStateDelay;
2160
+ attemptIndex = Math.min(attemptIndex + 1, retrySchedule.length - 1);
2161
+ await this.sleepTracked(waitMs);
2162
+ }
2163
+ }
2164
+ finally {
2165
+ activeTargets.delete(target);
2166
+ if (!this.closed &&
2167
+ (this._repairFrontierByMode.get(mode)?.get(target)?.size || 0) > 0) {
2168
+ this.ensureRepairFrontierRunner(mode, target, retryScheduleMs);
2169
+ }
2170
+ }
2171
+ })().catch((error) => {
2172
+ activeTargets.delete(target);
2173
+ logger.error(error);
2174
+ });
2175
+ }
2176
+ flushAppendBackfill() {
2177
+ if (this._appendBackfillPendingByTarget.size === 0) {
2178
+ return;
2179
+ }
2180
+ const pending = this._appendBackfillPendingByTarget;
2181
+ this._appendBackfillPendingByTarget = new Map();
2182
+ for (const [target, entries] of pending) {
2183
+ this.dispatchMaybeMissingEntries(target, entries, {
2184
+ mode: "append-backfill",
2185
+ });
2186
+ }
2187
+ }
2188
+ queueAppendBackfill(target, entry) {
2189
+ let entries = this._appendBackfillPendingByTarget.get(target);
2190
+ if (!entries) {
2191
+ entries = new Map();
2192
+ this._appendBackfillPendingByTarget.set(target, entries);
2193
+ }
2194
+ entries.set(entry.hash, entry);
2195
+ if (entries.size >= this.repairSweepTargetBufferSize) {
2196
+ this.flushAppendBackfill();
2197
+ return;
2198
+ }
2199
+ if (this._appendBackfillTimer || this.closed) {
2200
+ return;
2201
+ }
2202
+ const timer = setTimeout(() => {
2203
+ this._repairRetryTimers.delete(timer);
2204
+ if (this._appendBackfillTimer === timer) {
2205
+ this._appendBackfillTimer = undefined;
2206
+ }
2207
+ if (this.closed) {
2208
+ return;
2209
+ }
2210
+ this.flushAppendBackfill();
2211
+ }, APPEND_BACKFILL_DELAY_MS);
2212
+ timer.unref?.();
2213
+ this._repairRetryTimers.add(timer);
2214
+ this._appendBackfillTimer = timer;
2215
+ }
1747
2216
  dispatchMaybeMissingEntries(target, entries, options) {
1748
2217
  if (entries.size === 0) {
1749
2218
  return;
1750
2219
  }
2220
+ if (this.isFrontierTrackedRepairMode(options.mode)) {
2221
+ this.queueRepairFrontierEntries(options.mode, target, entries);
2222
+ this.ensureRepairFrontierRunner(options.mode, target, options.retryScheduleMs);
2223
+ return;
2224
+ }
1751
2225
  const now = Date.now();
1752
2226
  let recentlyDispatchedByHash = this._recentRepairDispatch.get(target);
1753
2227
  if (!recentlyDispatchedByHash) {
@@ -1759,10 +2233,10 @@ let SharedLog = (() => {
1759
2233
  recentlyDispatchedByHash.delete(hash);
1760
2234
  }
1761
2235
  }
1762
- const filteredEntries = options?.bypassRecentDedupe === true
2236
+ const filteredEntries = options.bypassRecentDedupe === true
1763
2237
  ? new Map(entries)
1764
2238
  : new Map();
1765
- if (options?.bypassRecentDedupe !== true) {
2239
+ if (options.bypassRecentDedupe !== true) {
1766
2240
  for (const [hash, entry] of entries) {
1767
2241
  const prev = recentlyDispatchedByHash.get(hash);
1768
2242
  if (prev != null && now - prev <= RECENT_REPAIR_DISPATCH_TTL_MS) {
@@ -1780,95 +2254,186 @@ let SharedLog = (() => {
1780
2254
  if (filteredEntries.size === 0) {
1781
2255
  return;
1782
2256
  }
1783
- const retrySchedule = options?.retryScheduleMs && options.retryScheduleMs.length > 0
1784
- ? options.retryScheduleMs
1785
- : options?.forceFreshDelivery
1786
- ? FORCE_FRESH_RETRY_SCHEDULE_MS
1787
- : [0];
1788
- const run = () => {
1789
- // For force-fresh churn repair we intentionally bypass rateless IBLT and
1790
- // use simple hash-based sync. This path is a directed "push these hashes
1791
- // to that peer" recovery flow; using simple sync here avoids occasional
1792
- // single-hash gaps seen with IBLT-oriented maybe-sync batches under churn.
1793
- if (options?.forceFreshDelivery &&
1794
- this.syncronizer instanceof RatelessIBLTSynchronizer) {
1795
- return Promise.resolve(this.syncronizer.simple.onMaybeMissingEntries({
1796
- entries: filteredEntries,
1797
- targets: [target],
1798
- })).catch((error) => logger.error(error));
1799
- }
1800
- return Promise.resolve(this.syncronizer.onMaybeMissingEntries({
1801
- entries: filteredEntries,
1802
- targets: [target],
1803
- })).catch((error) => logger.error(error));
2257
+ if ((options.mode === "join-warmup" ||
2258
+ options.mode === "join-authoritative") &&
2259
+ this.isAssumeSyncedRepairSuppressed()) {
2260
+ return;
2261
+ }
2262
+ const retrySchedule = resolveRepairRetrySchedule(options.mode, options.retryScheduleMs, this.isFrontierTrackedRepairMode(options.mode));
2263
+ const bucket = this._repairMetrics[options.mode];
2264
+ bucket.dispatches += 1;
2265
+ bucket.entries += filteredEntries.size;
2266
+ const run = (transport) => {
2267
+ if (transport === "simple") {
2268
+ bucket.simpleFallbackPasses += 1;
2269
+ }
2270
+ else {
2271
+ bucket.ratelessFirstPasses += 1;
2272
+ }
2273
+ return Promise.resolve(this.sendRepairEntriesWithTransport(target, filteredEntries, transport, { bypassKnownPeers: options.mode === "churn" })).catch((error) => logger.error(error));
1804
2274
  };
1805
- for (const delayMs of retrySchedule) {
2275
+ retrySchedule.forEach((delayMs, index) => {
2276
+ const transport = getRepairTransportForAttempt(options.mode, index);
1806
2277
  if (delayMs === 0) {
1807
- void run();
1808
- continue;
2278
+ void run(transport);
2279
+ return;
1809
2280
  }
1810
2281
  const timer = setTimeout(() => {
1811
2282
  this._repairRetryTimers.delete(timer);
1812
2283
  if (this.closed) {
1813
2284
  return;
1814
2285
  }
1815
- void run();
2286
+ void run(transport);
1816
2287
  }, delayMs);
1817
2288
  timer.unref?.();
1818
2289
  this._repairRetryTimers.add(timer);
1819
- }
2290
+ });
1820
2291
  }
1821
2292
  scheduleRepairSweep(options) {
1822
- if (options.forceFreshDelivery) {
1823
- this._repairSweepForceFreshPending = true;
1824
- }
1825
- for (const peer of options.addedPeers) {
1826
- this._repairSweepAddedPeersPending.add(peer);
2293
+ this._repairSweepPendingModes.add(options.mode);
2294
+ const pendingPeers = this._repairSweepPendingPeersByMode.get(options.mode);
2295
+ if (pendingPeers) {
2296
+ for (const peer of options.peers ?? []) {
2297
+ pendingPeers.add(peer);
2298
+ }
1827
2299
  }
1828
2300
  if (!this._repairSweepRunning && !this.closed) {
1829
2301
  this._repairSweepRunning = true;
1830
2302
  void this.runRepairSweep();
1831
2303
  }
1832
2304
  }
2305
+ scheduleJoinAuthoritativeRepair(peers) {
2306
+ if (this.closed || peers.size === 0) {
2307
+ return;
2308
+ }
2309
+ for (const delayMs of JOIN_AUTHORITATIVE_REPAIR_SWEEP_DELAYS_MS) {
2310
+ let pendingPeers = this._joinAuthoritativeRepairPeersByDelay.get(delayMs);
2311
+ if (!pendingPeers) {
2312
+ pendingPeers = new Set();
2313
+ this._joinAuthoritativeRepairPeersByDelay.set(delayMs, pendingPeers);
2314
+ }
2315
+ for (const peer of peers) {
2316
+ pendingPeers.add(peer);
2317
+ }
2318
+ if (this._joinAuthoritativeRepairTimersByDelay.has(delayMs)) {
2319
+ continue;
2320
+ }
2321
+ const timer = setTimeout(() => {
2322
+ this._repairRetryTimers.delete(timer);
2323
+ this._joinAuthoritativeRepairTimersByDelay.delete(delayMs);
2324
+ if (this.closed) {
2325
+ return;
2326
+ }
2327
+ const peersForSweep = new Set(this._joinAuthoritativeRepairPeersByDelay.get(delayMs) ?? []);
2328
+ this._joinAuthoritativeRepairPeersByDelay.delete(delayMs);
2329
+ if (peersForSweep.size === 0) {
2330
+ return;
2331
+ }
2332
+ // A joiner's leader view can still be partial on the first delayed pass
2333
+ // under pubsub jitter. Bounded per-peer rescans widen the authoritative
2334
+ // frontier without adding per-append sweeps.
2335
+ this.scheduleRepairSweep({
2336
+ mode: "join-authoritative",
2337
+ peers: peersForSweep,
2338
+ });
2339
+ }, delayMs);
2340
+ timer.unref?.();
2341
+ this._repairRetryTimers.add(timer);
2342
+ this._joinAuthoritativeRepairTimersByDelay.set(delayMs, timer);
2343
+ }
2344
+ }
1833
2345
  async runRepairSweep() {
1834
2346
  try {
1835
2347
  while (!this.closed) {
1836
- const forceFreshDelivery = this._repairSweepForceFreshPending;
1837
- const addedPeers = new Set(this._repairSweepAddedPeersPending);
1838
- this._repairSweepForceFreshPending = false;
1839
- this._repairSweepAddedPeersPending.clear();
1840
- if (!forceFreshDelivery && addedPeers.size === 0) {
2348
+ const pendingModes = new Set(this._repairSweepPendingModes);
2349
+ const pendingPeersByMode = cloneRepairPendingPeersByMode(this._repairSweepPendingPeersByMode);
2350
+ this._repairSweepPendingModes.clear();
2351
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
2352
+ peers.clear();
2353
+ }
2354
+ if (pendingModes.size === 0) {
1841
2355
  return;
1842
2356
  }
1843
- const pendingByTarget = new Map();
1844
- const flushTarget = (target) => {
1845
- const entries = pendingByTarget.get(target);
2357
+ const optimisticGidPeersByMode = new Map();
2358
+ const optimisticGidPeersConsumedByMode = new Map();
2359
+ for (const mode of pendingModes) {
2360
+ const modePeers = pendingPeersByMode.get(mode);
2361
+ if (!modePeers || modePeers.size === 0) {
2362
+ continue;
2363
+ }
2364
+ const optimisticGidPeers = new Map();
2365
+ const optimisticGidPeersConsumed = new Map();
2366
+ for (const [gid, peerCounts] of this._repairSweepOptimisticGidPeersPending) {
2367
+ let matchedPeers;
2368
+ let matchedCounts;
2369
+ for (const [peer, count] of peerCounts) {
2370
+ if (!modePeers.has(peer)) {
2371
+ continue;
2372
+ }
2373
+ matchedPeers ||= new Set();
2374
+ matchedCounts ||= new Map();
2375
+ matchedPeers.add(peer);
2376
+ matchedCounts.set(peer, count);
2377
+ }
2378
+ if (matchedPeers && matchedCounts) {
2379
+ optimisticGidPeers.set(gid, matchedPeers);
2380
+ optimisticGidPeersConsumed.set(gid, matchedCounts);
2381
+ }
2382
+ }
2383
+ if (optimisticGidPeers.size > 0) {
2384
+ optimisticGidPeersByMode.set(mode, optimisticGidPeers);
2385
+ optimisticGidPeersConsumedByMode.set(mode, optimisticGidPeersConsumed);
2386
+ }
2387
+ }
2388
+ const pendingByMode = new Map(REPAIR_DISPATCH_MODES.map((mode) => [mode, new Map()]));
2389
+ const pendingRepairPeers = new Set();
2390
+ for (const peers of pendingPeersByMode.values()) {
2391
+ for (const peer of peers) {
2392
+ pendingRepairPeers.add(peer);
2393
+ }
2394
+ }
2395
+ const fullReplicaRepairCandidates = await this.getFullReplicaRepairCandidates(pendingRepairPeers, {
2396
+ includeSubscribers: false,
2397
+ });
2398
+ const fullReplicaRepairCandidateCount = Math.max(1, fullReplicaRepairCandidates.size);
2399
+ const nextFrontierByMode = new Map([
2400
+ ["join-authoritative", new Map()],
2401
+ ["churn", new Map()],
2402
+ ]);
2403
+ const flushTarget = (mode, target) => {
2404
+ const targets = pendingByMode.get(mode);
2405
+ const entries = targets?.get(target);
1846
2406
  if (!entries || entries.size === 0) {
1847
2407
  return;
1848
2408
  }
1849
- const isJoinWarmupTarget = addedPeers.has(target);
1850
- const bypassRecentDedupe = isJoinWarmupTarget || forceFreshDelivery;
1851
2409
  this.dispatchMaybeMissingEntries(target, entries, {
1852
- bypassRecentDedupe,
1853
- retryScheduleMs: isJoinWarmupTarget
1854
- ? JOIN_WARMUP_RETRY_SCHEDULE_MS
1855
- : undefined,
1856
- forceFreshDelivery,
2410
+ bypassRecentDedupe: true,
2411
+ mode,
1857
2412
  });
1858
- pendingByTarget.delete(target);
2413
+ targets?.delete(target);
1859
2414
  };
1860
- const queueEntryForTarget = (target, entry) => {
1861
- let set = pendingByTarget.get(target);
2415
+ const queueEntryForTarget = (mode, target, entry) => {
2416
+ const sweepTargets = nextFrontierByMode.get(mode);
2417
+ if (sweepTargets) {
2418
+ let sweepSet = sweepTargets.get(target);
2419
+ if (!sweepSet) {
2420
+ sweepSet = new Map();
2421
+ sweepTargets.set(target, sweepSet);
2422
+ }
2423
+ sweepSet.set(entry.hash, entry);
2424
+ }
2425
+ const targets = pendingByMode.get(mode);
2426
+ let set = targets.get(target);
1862
2427
  if (!set) {
1863
2428
  set = new Map();
1864
- pendingByTarget.set(target, set);
2429
+ targets.set(target, set);
1865
2430
  }
1866
2431
  if (set.has(entry.hash)) {
1867
2432
  return;
1868
2433
  }
1869
2434
  set.set(entry.hash, entry);
1870
2435
  if (set.size >= this.repairSweepTargetBufferSize) {
1871
- flushTarget(target);
2436
+ flushTarget(mode, target);
1872
2437
  }
1873
2438
  };
1874
2439
  const iterator = this.entryCoordinatesIndex.iterate({});
@@ -1877,20 +2442,42 @@ let SharedLog = (() => {
1877
2442
  const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
1878
2443
  for (const entry of entries) {
1879
2444
  const entryReplicated = entry.value;
1880
- const knownPeers = this._gidPeersHistory.get(entryReplicated.gid);
2445
+ const gid = entryReplicated.gid;
2446
+ const knownPeers = this._gidPeersHistory.get(gid);
2447
+ const requestedReplicas = decodeReplicas(entryReplicated).getValue(this);
1881
2448
  const currentPeers = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
1882
- if (forceFreshDelivery) {
2449
+ if (pendingModes.has("churn")) {
1883
2450
  for (const [currentPeer] of currentPeers) {
1884
2451
  if (currentPeer === this.node.identity.publicKey.hashcode()) {
1885
2452
  continue;
1886
2453
  }
1887
- queueEntryForTarget(currentPeer, entryReplicated);
2454
+ queueEntryForTarget("churn", currentPeer, entryReplicated);
1888
2455
  }
1889
2456
  }
1890
- if (addedPeers.size > 0) {
1891
- for (const peer of addedPeers) {
1892
- if (currentPeers.has(peer) && !knownPeers?.has(peer)) {
1893
- queueEntryForTarget(peer, entryReplicated);
2457
+ for (const mode of pendingModes) {
2458
+ const modePeers = pendingPeersByMode.get(mode);
2459
+ if (!modePeers || modePeers.size === 0) {
2460
+ continue;
2461
+ }
2462
+ const optimisticPeers = optimisticGidPeersByMode.get(mode)?.get(gid);
2463
+ for (const peer of modePeers) {
2464
+ if (this.isEntryKnownByPeer(entryReplicated.hash, peer)) {
2465
+ continue;
2466
+ }
2467
+ const wasOptimisticallyAssigned = optimisticPeers?.has(peer) === true;
2468
+ const isCoveredByFullReplicaRepair = mode === "join-authoritative" &&
2469
+ fullReplicaRepairCandidates.has(peer) &&
2470
+ requestedReplicas >= fullReplicaRepairCandidateCount;
2471
+ const shouldQueue = mode === "join-authoritative"
2472
+ ? currentPeers.has(peer) || isCoveredByFullReplicaRepair
2473
+ : wasOptimisticallyAssigned ||
2474
+ (currentPeers.has(peer) && !knownPeers?.has(peer));
2475
+ if (shouldQueue) {
2476
+ // Authoritative join repair must not trust partial gid peer history,
2477
+ // otherwise a late joiner can get stuck with a partial historical
2478
+ // backfill forever. Once we enter the authoritative pass, queue every
2479
+ // entry whose current leader set still includes the added peer.
2480
+ queueEntryForTarget(mode, peer, entryReplicated);
1894
2481
  }
1895
2482
  }
1896
2483
  }
@@ -1900,8 +2487,64 @@ let SharedLog = (() => {
1900
2487
  finally {
1901
2488
  await iterator.close();
1902
2489
  }
1903
- for (const target of [...pendingByTarget.keys()]) {
1904
- flushTarget(target);
2490
+ for (const [, optimisticGidPeersConsumed] of optimisticGidPeersConsumedByMode) {
2491
+ for (const [gid, peerCounts] of optimisticGidPeersConsumed) {
2492
+ const pendingPeerCounts = this._repairSweepOptimisticGidPeersPending.get(gid);
2493
+ if (!pendingPeerCounts) {
2494
+ continue;
2495
+ }
2496
+ for (const [peer, count] of peerCounts) {
2497
+ const current = pendingPeerCounts.get(peer) || 0;
2498
+ const next = current - count;
2499
+ if (next > 0) {
2500
+ pendingPeerCounts.set(peer, next);
2501
+ }
2502
+ else {
2503
+ pendingPeerCounts.delete(peer);
2504
+ }
2505
+ }
2506
+ if (pendingPeerCounts.size === 0) {
2507
+ this._repairSweepOptimisticGidPeersPending.delete(gid);
2508
+ }
2509
+ }
2510
+ }
2511
+ for (const mode of pendingModes) {
2512
+ if (mode !== "join-authoritative" && mode !== "churn") {
2513
+ continue;
2514
+ }
2515
+ const nextTargets = nextFrontierByMode.get(mode) ?? new Map();
2516
+ const frontierTargets = this._repairFrontierByMode.get(mode);
2517
+ for (const target of pendingPeersByMode.get(mode) ?? []) {
2518
+ const replacement = nextTargets.get(target);
2519
+ if (mode === "join-authoritative") {
2520
+ // Authoritative join repair is receipt-driven: a later sweep can have a
2521
+ // narrower transient leader view, but it must not forget unconfirmed
2522
+ // hashes that were already queued for this joiner.
2523
+ if (replacement && replacement.size > 0) {
2524
+ const existing = frontierTargets?.get(target);
2525
+ if (existing && existing.size > 0) {
2526
+ for (const [hash, entry] of replacement) {
2527
+ existing.set(hash, entry);
2528
+ }
2529
+ }
2530
+ else {
2531
+ frontierTargets?.set(target, replacement);
2532
+ }
2533
+ }
2534
+ continue;
2535
+ }
2536
+ if (replacement && replacement.size > 0) {
2537
+ frontierTargets?.set(target, replacement);
2538
+ }
2539
+ else {
2540
+ frontierTargets?.delete(target);
2541
+ }
2542
+ }
2543
+ }
2544
+ for (const [mode, targets] of pendingByMode) {
2545
+ for (const target of [...targets.keys()]) {
2546
+ flushTarget(mode, target);
2547
+ }
1905
2548
  }
1906
2549
  }
1907
2550
  }
@@ -1912,17 +2555,78 @@ let SharedLog = (() => {
1912
2555
  }
1913
2556
  finally {
1914
2557
  this._repairSweepRunning = false;
1915
- if (!this.closed &&
1916
- (this._repairSweepForceFreshPending ||
1917
- this._repairSweepAddedPeersPending.size > 0)) {
2558
+ if (!this.closed && this._repairSweepPendingModes.size > 0) {
1918
2559
  this._repairSweepRunning = true;
1919
2560
  void this.runRepairSweep();
1920
2561
  }
1921
2562
  }
1922
2563
  }
1923
2564
  async pruneDebouncedFnAddIfNotKeeping(args) {
1924
- if (!this.keep || !(await this.keep(args.value.entry))) {
1925
- return this.pruneDebouncedFn.add(args);
2565
+ if (this.keep && (await this.keep(args.value.entry))) {
2566
+ return false;
2567
+ }
2568
+ void this.pruneDebouncedFn.add(args);
2569
+ return true;
2570
+ }
2571
+ async pruneJoinedEntriesNoLongerLed(entries) {
2572
+ const selfHash = this.node.identity.publicKey.hashcode();
2573
+ for (const entry of entries) {
2574
+ if (this.closed || this._pendingDeletes.has(entry.hash)) {
2575
+ continue;
2576
+ }
2577
+ const leaders = await this.findLeadersFromEntry(entry, decodeReplicas(entry).getValue(this), { roleAge: 0 });
2578
+ if (leaders.has(selfHash)) {
2579
+ this.pruneDebouncedFn.delete(entry.hash);
2580
+ continue;
2581
+ }
2582
+ if (leaders.size === 0) {
2583
+ continue;
2584
+ }
2585
+ await this.pruneDebouncedFnAddIfNotKeeping({
2586
+ key: entry.hash,
2587
+ value: { entry, leaders },
2588
+ });
2589
+ this.responseToPruneDebouncedFn.delete(entry.hash);
2590
+ }
2591
+ }
2592
+ async pruneIndexedEntriesNoLongerLed() {
2593
+ const selfHash = this.node.identity.publicKey.hashcode();
2594
+ const iterator = this.entryCoordinatesIndex.iterate({});
2595
+ let enqueuedPrune = false;
2596
+ try {
2597
+ while (!this.closed && !iterator.done()) {
2598
+ const entries = await iterator.next(REPAIR_SWEEP_ENTRY_BATCH_SIZE);
2599
+ for (const entry of entries) {
2600
+ const entryReplicated = entry.value;
2601
+ if (this.closed || this._pendingDeletes.has(entryReplicated.hash)) {
2602
+ continue;
2603
+ }
2604
+ const leaders = await this.findLeaders(entryReplicated.coordinates, entryReplicated, { roleAge: 0 });
2605
+ if (leaders.has(selfHash)) {
2606
+ this.pruneDebouncedFn.delete(entryReplicated.hash);
2607
+ await this._pendingDeletes
2608
+ .get(entryReplicated.hash)
2609
+ ?.reject(new Error("Failed to delete, is leader again"));
2610
+ this.removePruneRequestSent(entryReplicated.hash);
2611
+ continue;
2612
+ }
2613
+ if (leaders.size === 0) {
2614
+ continue;
2615
+ }
2616
+ enqueuedPrune =
2617
+ (await this.pruneDebouncedFnAddIfNotKeeping({
2618
+ key: entryReplicated.hash,
2619
+ value: { entry: entryReplicated, leaders },
2620
+ })) || enqueuedPrune;
2621
+ this.responseToPruneDebouncedFn.delete(entryReplicated.hash);
2622
+ }
2623
+ }
2624
+ }
2625
+ finally {
2626
+ await iterator.close();
2627
+ }
2628
+ if (enqueuedPrune && !this.closed) {
2629
+ await this.pruneDebouncedFn.flush();
1926
2630
  }
1927
2631
  }
1928
2632
  clearCheckedPruneRetry(hash) {
@@ -2065,16 +2769,17 @@ let SharedLog = (() => {
2065
2769
  await this._appendDeliverToAllFanout(result.entry);
2066
2770
  }
2067
2771
  else {
2068
- await this._appendDeliverToReplicators(result.entry, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
2772
+ await this._appendDeliverToReplicators(result.entry, coordinates, minReplicasValue, leaders, selfHash, isLeader, deliveryArg);
2069
2773
  }
2070
2774
  }
2071
- if (!isLeader && !this.shouldDelayAdaptiveRebalance()) {
2775
+ const delayAdaptiveRebalance = this.shouldDelayAdaptiveRebalance();
2776
+ if (!isLeader && !delayAdaptiveRebalance) {
2072
2777
  this.pruneDebouncedFnAddIfNotKeeping({
2073
2778
  key: result.entry.hash,
2074
2779
  value: { entry: result.entry, leaders },
2075
2780
  });
2076
2781
  }
2077
- if (!this._isAdaptiveReplicating) {
2782
+ if (!delayAdaptiveRebalance) {
2078
2783
  this.rebalanceParticipationDebounced?.call();
2079
2784
  }
2080
2785
  return result;
@@ -2108,8 +2813,18 @@ let SharedLog = (() => {
2108
2813
  this._repairRetryTimers = new Set();
2109
2814
  this._recentRepairDispatch = new Map();
2110
2815
  this._repairSweepRunning = false;
2111
- this._repairSweepForceFreshPending = false;
2112
- this._repairSweepAddedPeersPending = new Set();
2816
+ this._repairSweepPendingModes = new Set();
2817
+ this._repairSweepPendingPeersByMode = createRepairPendingPeersByMode();
2818
+ this._repairFrontierByMode = createRepairFrontierByMode();
2819
+ this._repairFrontierActiveTargetsByMode = createRepairActiveTargetsByMode();
2820
+ this._repairSweepOptimisticGidPeersPending = new Map();
2821
+ this._entryKnownPeers = new Map();
2822
+ this._joinAuthoritativeRepairTimersByDelay = new Map();
2823
+ this._joinAuthoritativeRepairPeersByDelay = new Map();
2824
+ this._assumeSyncedRepairSuppressedUntil = 0;
2825
+ this._appendBackfillTimer = undefined;
2826
+ this._appendBackfillPendingByTarget = new Map();
2827
+ this._repairMetrics = createRepairMetrics();
2113
2828
  this._topicSubscribersCache = new Map();
2114
2829
  this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
2115
2830
  this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
@@ -2167,7 +2882,10 @@ let SharedLog = (() => {
2167
2882
  this.keep = options?.keep;
2168
2883
  this.pendingMaturity = new Map();
2169
2884
  const id = sha256Base64Sync(this.log.id);
2170
- const storage = await this.node.storage.sublevel(id);
2885
+ const [storage, logScope] = await Promise.all([
2886
+ this.node.storage.sublevel(id),
2887
+ this.node.indexer.scope(id),
2888
+ ]);
2171
2889
  const localBlocks = await new AnyBlockStore(await storage.sublevel("blocks"));
2172
2890
  const fanoutService = getSharedLogFanoutService(this.node.services);
2173
2891
  const blockProviderNamespace = (cid) => `cid:${cid}`;
@@ -2223,16 +2941,18 @@ let SharedLog = (() => {
2223
2941
  }
2224
2942
  },
2225
2943
  });
2226
- await this.remoteBlocks.start();
2227
- const logScope = await this.node.indexer.scope(id);
2228
- const replicationIndex = await logScope.scope("replication");
2944
+ const remoteBlocksStartPromise = this.remoteBlocks.start();
2945
+ const [replicationIndex, logIndex] = await Promise.all([
2946
+ logScope.scope("replication"),
2947
+ logScope.scope("log"),
2948
+ ]);
2229
2949
  this._replicationRangeIndex = await replicationIndex.init({
2230
2950
  schema: this.indexableDomain.constructorRange,
2231
2951
  });
2232
2952
  this._entryCoordinatesIndex = await replicationIndex.init({
2233
2953
  schema: this.indexableDomain.constructorEntry,
2234
2954
  });
2235
- const logIndex = await logScope.scope("log");
2955
+ await remoteBlocksStartPromise;
2236
2956
  const hasIndexedReplicationInfo = (await this.replicationIndex.count({
2237
2957
  query: [
2238
2958
  new StringMatch({
@@ -2360,27 +3080,33 @@ let SharedLog = (() => {
2360
3080
  }
2361
3081
  }
2362
3082
  // Open for communcation
2363
- await this.rpc.open({
2364
- queryType: TransportMessage,
2365
- responseType: TransportMessage,
2366
- responseHandler: (query, context) => this.onMessage(query, context),
2367
- topic: this.topic,
2368
- });
2369
3083
  this._onSubscriptionFn =
2370
3084
  this._onSubscriptionFn || this._onSubscription.bind(this);
2371
- await this.node.services.pubsub.addEventListener("subscribe", this._onSubscriptionFn);
2372
3085
  this._onUnsubscriptionFn =
2373
3086
  this._onUnsubscriptionFn || this._onUnsubscription.bind(this);
2374
- await this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn);
2375
- await this.rpc.subscribe();
2376
- await this._openFanoutChannel(options?.fanout);
2377
- // mark all our replicaiton ranges as "new", this would allow other peers to understand that we recently reopend our database and might need some sync and warmup
2378
- await this.updateTimestampOfOwnedReplicationRanges(); // TODO do we need to do this before subscribing?
3087
+ await Promise.all([
3088
+ this.rpc.open({
3089
+ queryType: TransportMessage,
3090
+ responseType: TransportMessage,
3091
+ responseHandler: (query, context) => this.onMessage(query, context),
3092
+ topic: this.topic,
3093
+ }),
3094
+ this.node.services.pubsub.addEventListener("subscribe", this._onSubscriptionFn),
3095
+ this.node.services.pubsub.addEventListener("unsubscribe", this._onUnsubscriptionFn),
3096
+ ]);
3097
+ const fanoutOpenPromise = this._openFanoutChannel(options?.fanout);
3098
+ // Mark previously-owned replication ranges as "new" only when they already exist.
3099
+ // Fresh opens have nothing to touch here, so skip the extra scan/write entirely.
3100
+ const updateOwnedReplicationPromise = hasIndexedReplicationInfo
3101
+ ? this.updateTimestampOfOwnedReplicationRanges()
3102
+ : Promise.resolve();
3103
+ await Promise.all([fanoutOpenPromise, updateOwnedReplicationPromise]);
2379
3104
  // if we had a previous session with replication info, and new replication info dictates that we unreplicate
2380
3105
  // we should do that. Otherwise if options is a unreplication we dont need to do anything because
2381
3106
  // we are already unreplicated (as we are just opening)
2382
- let isUnreplicationOptionsDefined = isUnreplicationOptions(options?.replicate);
2383
- const canResumeReplication = (await isReplicationOptionsDependentOnPreviousState(options?.replicate, this.replicationIndex, this.node.identity.publicKey)) && hasIndexedReplicationInfo;
3107
+ const isUnreplicationOptionsDefined = isUnreplicationOptions(options?.replicate);
3108
+ const canResumeReplication = hasIndexedReplicationInfo &&
3109
+ (await isReplicationOptionsDependentOnPreviousState(options?.replicate, this.replicationIndex, this.node.identity.publicKey));
2384
3110
  if (hasIndexedReplicationInfo && isUnreplicationOptionsDefined) {
2385
3111
  await this.replicate(options?.replicate, { checkDuplicates: true });
2386
3112
  }
@@ -2423,6 +3149,7 @@ let SharedLog = (() => {
2423
3149
  }
2424
3150
  async afterOpen() {
2425
3151
  await super.afterOpen();
3152
+ const existingSubscribersPromise = this._getTopicSubscribers(this.topic);
2426
3153
  // We do this here, because these calls requires this.closed == false
2427
3154
  void this.pruneOfflineReplicators()
2428
3155
  .then(() => {
@@ -2437,7 +3164,7 @@ let SharedLog = (() => {
2437
3164
  this.startReplicatorLivenessSweep();
2438
3165
  await this.rebalanceParticipation();
2439
3166
  // Take into account existing subscription
2440
- (await this._getTopicSubscribers(this.topic))?.forEach((v) => {
3167
+ (await existingSubscribersPromise)?.forEach((v) => {
2441
3168
  if (v.equals(this.node.identity.publicKey)) {
2442
3169
  return;
2443
3170
  }
@@ -2952,8 +3679,28 @@ let SharedLog = (() => {
2952
3679
  this._repairRetryTimers.clear();
2953
3680
  this._recentRepairDispatch.clear();
2954
3681
  this._repairSweepRunning = false;
2955
- this._repairSweepForceFreshPending = false;
2956
- this._repairSweepAddedPeersPending.clear();
3682
+ this._repairSweepPendingModes.clear();
3683
+ for (const peers of this._repairSweepPendingPeersByMode.values()) {
3684
+ peers.clear();
3685
+ }
3686
+ this._repairSweepOptimisticGidPeersPending.clear();
3687
+ this._entryKnownPeers.clear();
3688
+ for (const timer of this._joinAuthoritativeRepairTimersByDelay.values()) {
3689
+ clearTimeout(timer);
3690
+ }
3691
+ this._joinAuthoritativeRepairTimersByDelay.clear();
3692
+ this._joinAuthoritativeRepairPeersByDelay.clear();
3693
+ for (const targets of this._repairFrontierByMode.values()) {
3694
+ targets.clear();
3695
+ }
3696
+ for (const targets of this._repairFrontierActiveTargetsByMode.values()) {
3697
+ targets.clear();
3698
+ }
3699
+ if (this._appendBackfillTimer) {
3700
+ clearTimeout(this._appendBackfillTimer);
3701
+ this._appendBackfillTimer = undefined;
3702
+ }
3703
+ this._appendBackfillPendingByTarget.clear();
2957
3704
  for (const [_k, v] of this._pendingDeletes) {
2958
3705
  v.clear();
2959
3706
  v.promise.resolve(); // TODO or reject?
@@ -3111,9 +3858,11 @@ let SharedLog = (() => {
3111
3858
  * I can use them to load associated logs and join/sync them with the data stores I own
3112
3859
  */
3113
3860
  const { heads } = msg;
3861
+ const isRepairHint = (msg.reserved[0] & EXCHANGE_HEADS_REPAIR_HINT) !== 0;
3114
3862
  logger.trace(`${this.node.identity.publicKey.hashcode()}: Recieved heads: ${heads.length === 1 ? heads[0].entry.hash : "#" + heads.length}, logId: ${this.log.idString}`);
3115
3863
  if (heads) {
3116
3864
  const filteredHeads = [];
3865
+ const confirmedHashes = new Set();
3117
3866
  for (const head of heads) {
3118
3867
  if (!(await this.log.has(head.entry.hash))) {
3119
3868
  head.entry.init({
@@ -3123,8 +3872,18 @@ let SharedLog = (() => {
3123
3872
  });
3124
3873
  filteredHeads.push(head);
3125
3874
  }
3875
+ else {
3876
+ confirmedHashes.add(head.entry.hash);
3877
+ }
3878
+ }
3879
+ const fromIsSelf = context.from.equals(this.node.identity.publicKey);
3880
+ if (!fromIsSelf) {
3881
+ this.markEntriesKnownByPeer(heads.map((head) => head.entry.hash), context.from.hashcode());
3126
3882
  }
3127
3883
  if (filteredHeads.length === 0) {
3884
+ if (confirmedHashes.size > 0 && !fromIsSelf) {
3885
+ await this.sendRepairConfirmation(context.from, confirmedHashes);
3886
+ }
3128
3887
  return;
3129
3888
  }
3130
3889
  const groupedByGid = await groupByGid(filteredHeads);
@@ -3186,8 +3945,15 @@ let SharedLog = (() => {
3186
3945
  }
3187
3946
  let maybeDelete;
3188
3947
  let toMerge = [];
3948
+ let toPersist = [];
3189
3949
  let toDelete;
3190
- if (isLeader) {
3950
+ // Targeted repair is sent only to peers the sender currently believes
3951
+ // should store the entry. Accept it while local membership catches up;
3952
+ // the normal checked-prune path below can still remove it if this peer
3953
+ // truly no longer owns the entry.
3954
+ const acceptsTargetedRepair = isRepairHint && fromIsLeader;
3955
+ const keepAsLeader = isLeader || acceptsTargetedRepair;
3956
+ if (keepAsLeader) {
3191
3957
  for (const entry of entries) {
3192
3958
  this.pruneDebouncedFn.delete(entry.entry.hash);
3193
3959
  this.removePruneRequestSent(entry.entry.hash);
@@ -3203,8 +3969,9 @@ let SharedLog = (() => {
3203
3969
  }
3204
3970
  }
3205
3971
  outer: for (const entry of entries) {
3206
- if (isLeader || (await this.keep?.(entry.entry))) {
3972
+ if (keepAsLeader || (await this.keep?.(entry.entry))) {
3207
3973
  toMerge.push(entry.entry);
3974
+ toPersist.push(entry.entry);
3208
3975
  }
3209
3976
  else {
3210
3977
  for (const ref of entry.gidRefrences) {
@@ -3222,7 +3989,18 @@ let SharedLog = (() => {
3222
3989
  return;
3223
3990
  }
3224
3991
  if (toMerge.length > 0) {
3992
+ this.markEntriesKnownByPeer(toMerge.map((entry) => entry.hash), context.from.hashcode());
3225
3993
  await this.log.join(toMerge);
3994
+ // Network joins bypass SharedLog.join(), but churn repair scans
3995
+ // the coordinate index to redistribute entries after membership changes.
3996
+ for (const entry of toPersist) {
3997
+ const replicas = decodeReplicas(entry).getValue(this);
3998
+ await this.findLeaders(await this.createCoordinates(entry, replicas), entry, { roleAge: 0, persist: {} });
3999
+ }
4000
+ for (const merged of toMerge) {
4001
+ confirmedHashes.add(merged.hash);
4002
+ }
4003
+ await this.pruneJoinedEntriesNoLongerLed(toMerge);
3226
4004
  toDelete?.map((x) =>
3227
4005
  // TODO types
3228
4006
  this.pruneDebouncedFnAddIfNotKeeping({
@@ -3261,6 +4039,10 @@ let SharedLog = (() => {
3261
4039
  promises.push(fn()); // we do this concurrently since waitForIsLeader might be a blocking operation for some entries
3262
4040
  }
3263
4041
  await Promise.all(promises);
4042
+ if (confirmedHashes.size > 0 && !context.from.equals(this.node.identity.publicKey)) {
4043
+ this.markEntriesKnownByPeer(confirmedHashes, context.from.hashcode());
4044
+ await this.sendRepairConfirmation(context.from, confirmedHashes);
4045
+ }
3264
4046
  }
3265
4047
  }
3266
4048
  else if (msg instanceof RequestIPrune) {
@@ -3268,6 +4050,7 @@ let SharedLog = (() => {
3268
4050
  const from = context.from.hashcode();
3269
4051
  for (const hash of msg.hashes) {
3270
4052
  this.removePruneRequestSent(hash, from);
4053
+ this.removeEntriesKnownByPeer([hash], from);
3271
4054
  // if we expect the remote to be owner of this entry because we are to prune ourselves, then we need to remove the remote
3272
4055
  // this is due to that the remote has previously indicated to be a replicator to help us prune but now has changed their mind
3273
4056
  const outGoingPrunes = this._requestIPruneResponseReplicatorSet.get(hash);
@@ -3276,7 +4059,9 @@ let SharedLog = (() => {
3276
4059
  }
3277
4060
  const indexedEntry = await this.log.entryIndex.getShallow(hash);
3278
4061
  let isLeader = false;
3279
- if (indexedEntry) {
4062
+ if (indexedEntry &&
4063
+ !this._pendingDeletes.has(hash) &&
4064
+ (await this.log.blocks.has(hash))) {
3280
4065
  this.removePeerFromGidPeerHistory(context.from.hashcode(), indexedEntry.value.meta.gid);
3281
4066
  await this._waitForReplicators(await this.createCoordinates(indexedEntry.value, decodeReplicas(indexedEntry.value).getValue(this)), indexedEntry.value, [
3282
4067
  {
@@ -3355,6 +4140,11 @@ let SharedLog = (() => {
3355
4140
  this._pendingDeletes.get(hash)?.resolve(context.from.hashcode());
3356
4141
  }
3357
4142
  }
4143
+ else if (msg instanceof ConfirmEntriesMessage) {
4144
+ this.markEntriesKnownByPeer(msg.hashes, context.from.hashcode());
4145
+ this.clearRepairFrontierHashes(context.from.hashcode(), msg.hashes);
4146
+ return;
4147
+ }
3358
4148
  else if (await this.syncronizer.onMessage(msg, context)) {
3359
4149
  return; // the syncronizer has handled the message
3360
4150
  }
@@ -3679,6 +4469,11 @@ let SharedLog = (() => {
3679
4469
  if (options?.replicate) {
3680
4470
  let messageToSend = undefined;
3681
4471
  if (assumeSynced) {
4472
+ // `assumeSynced` is an explicit contract that this join should trust the
4473
+ // supplied history and avoid initiating outbound repair while the local
4474
+ // replication ranges settle.
4475
+ this._assumeSyncedRepairSuppressedUntil =
4476
+ Date.now() + ASSUME_SYNCED_REPAIR_SUPPRESSION_MS;
3682
4477
  for (const entry of entriesToReplicate) {
3683
4478
  await seedAssumeSyncedPeerHistory(entry);
3684
4479
  }
@@ -3747,9 +4542,14 @@ let SharedLog = (() => {
3747
4542
  clear();
3748
4543
  // `waitForReplicator()` is typically used as a precondition before join/replicate
3749
4544
  // flows. A replicator can become mature and enqueue a debounced rebalance
3750
- // (`replicationChangeDebounceFn`) slightly later. Flush here so callers don't
3751
- // observe a "late" rebalance after the wait resolves.
3752
- await this.replicationChangeDebounceFn?.flush?.();
4545
+ // (`replicationChangeDebounceFn`) slightly later. Kick the flush, but do not
4546
+ // make membership waits depend on all rebalance work finishing; callers that
4547
+ // need settled distribution already wait for that explicitly.
4548
+ this.replicationChangeDebounceFn?.flush?.().catch((error) => {
4549
+ if (!isNotStartedError(error)) {
4550
+ logger.error(error?.toString?.() ?? String(error));
4551
+ }
4552
+ });
3753
4553
  deferred.resolve();
3754
4554
  };
3755
4555
  const reject = (error) => {
@@ -4141,11 +4941,51 @@ let SharedLog = (() => {
4141
4941
  }
4142
4942
  }
4143
4943
  }
4944
+ if (!options?.candidates) {
4945
+ const fullReplicaLeaders = await this.findFullReplicaLeaders(cursors.length, roleAge, peerFilter);
4946
+ if (fullReplicaLeaders) {
4947
+ return fullReplicaLeaders;
4948
+ }
4949
+ }
4144
4950
  return getSamples(cursors, this.replicationIndex, roleAge, this.indexableDomain.numbers, {
4145
4951
  peerFilter,
4146
4952
  uniqueReplicators: peerFilter,
4147
4953
  });
4148
4954
  }
4955
+ async findFullReplicaLeaders(replicas, roleAge, peerFilter) {
4956
+ const now = Date.now();
4957
+ const leaders = new Map();
4958
+ const includeStrict = this._logProperties?.strictFullReplicaFallback !== false;
4959
+ const iterator = this.replicationIndex.iterate({}, { shape: { hash: true, timestamp: true, mode: true } });
4960
+ try {
4961
+ for (;;) {
4962
+ const batch = await iterator.next(64);
4963
+ if (batch.length === 0) {
4964
+ break;
4965
+ }
4966
+ for (const result of batch) {
4967
+ const range = result.value;
4968
+ if (peerFilter && !peerFilter.has(range.hash)) {
4969
+ continue;
4970
+ }
4971
+ if (!isMatured(range, now, roleAge)) {
4972
+ continue;
4973
+ }
4974
+ if (range.mode === ReplicationIntent.Strict && !includeStrict) {
4975
+ continue;
4976
+ }
4977
+ leaders.set(range.hash, { intersecting: true });
4978
+ if (leaders.size > replicas) {
4979
+ return undefined;
4980
+ }
4981
+ }
4982
+ }
4983
+ }
4984
+ finally {
4985
+ await iterator.close();
4986
+ }
4987
+ return leaders.size > 0 ? leaders : undefined;
4988
+ }
4149
4989
  async findLeadersFromEntry(entry, replicas, options) {
4150
4990
  const coordinates = await this.createCoordinates(entry, replicas);
4151
4991
  const result = await this._findLeaders(coordinates, options);
@@ -4613,13 +5453,25 @@ let SharedLog = (() => {
4613
5453
  }
4614
5454
  const changed = false;
4615
5455
  const addedPeers = new Set();
5456
+ const authoritativeRepairPeers = new Set();
4616
5457
  const warmupPeers = new Set();
5458
+ const churnRepairPeers = new Set();
4617
5459
  const hasSelfWarmupChange = changes.some((change) => change.range.hash === selfHash &&
4618
5460
  (change.type === "added" || change.type === "replaced"));
5461
+ const hasSelfRangeRemoval = changes.some((change) => change.range.hash === selfHash &&
5462
+ (change.type === "removed" || change.type === "replaced"));
4619
5463
  for (const change of changes) {
5464
+ if (change.range.hash !== selfHash &&
5465
+ (change.type === "removed" || change.type === "replaced")) {
5466
+ this.removePeerFromEntryKnownPeers(change.range.hash);
5467
+ }
4620
5468
  if (change.type === "added" || change.type === "replaced") {
4621
5469
  const hash = change.range.hash;
4622
5470
  if (hash !== selfHash) {
5471
+ // Existing peers can widen/shift ranges after the initial join. If we
5472
+ // only rescan on first-seen "added", late authoritative range updates can
5473
+ // leave historical backfill permanently partial under load.
5474
+ authoritativeRepairPeers.add(hash);
4623
5475
  // Range updates can reassign entries to an existing peer shortly after it
4624
5476
  // already received a subset. Avoid suppressing legitimate follow-up repair.
4625
5477
  this._recentRepairDispatch.delete(hash);
@@ -4651,17 +5503,24 @@ let SharedLog = (() => {
4651
5503
  return;
4652
5504
  }
4653
5505
  const isWarmupTarget = warmupPeers.has(target);
4654
- const bypassRecentDedupe = isWarmupTarget || forceFreshDelivery;
5506
+ const mode = forceFreshDelivery
5507
+ ? "churn"
5508
+ : isWarmupTarget
5509
+ ? "join-warmup"
5510
+ : "join-authoritative";
4655
5511
  this.dispatchMaybeMissingEntries(target, entries, {
4656
- bypassRecentDedupe,
4657
- retryScheduleMs: isWarmupTarget
5512
+ bypassRecentDedupe: isWarmupTarget || forceFreshDelivery,
5513
+ mode,
5514
+ retryScheduleMs: mode === "join-warmup"
4658
5515
  ? JOIN_WARMUP_RETRY_SCHEDULE_MS
4659
- : undefined,
4660
- forceFreshDelivery,
5516
+ : mode === "join-authoritative"
5517
+ ? [0]
5518
+ : undefined,
4661
5519
  });
4662
5520
  uncheckedDeliver.delete(target);
4663
5521
  };
4664
5522
  const queueUncheckedDeliver = (target, entry) => {
5523
+ churnRepairPeers.add(target);
4665
5524
  let set = uncheckedDeliver.get(target);
4666
5525
  if (!set) {
4667
5526
  set = new Map();
@@ -4715,7 +5574,14 @@ let SharedLog = (() => {
4715
5574
  }
4716
5575
  }
4717
5576
  }
4718
- this.addPeersToGidPeerHistory(entryReplicated.gid, currentPeers.keys(), true);
5577
+ for (const [peer] of currentPeers) {
5578
+ if (warmupPeers.has(peer)) {
5579
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
5580
+ }
5581
+ }
5582
+ const authoritativePeers = [...currentPeers.keys()].filter((peer) => !warmupPeers.has(peer) &&
5583
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer));
5584
+ this.addPeersToGidPeerHistory(entryReplicated.gid, authoritativePeers, true);
4719
5585
  if (!currentPeers.has(selfHash)) {
4720
5586
  this.pruneDebouncedFnAddIfNotKeeping({
4721
5587
  key: entryReplicated.hash,
@@ -4763,7 +5629,14 @@ let SharedLog = (() => {
4763
5629
  }
4764
5630
  }
4765
5631
  }
4766
- this.addPeersToGidPeerHistory(entryReplicated.gid, currentPeers.keys(), true);
5632
+ for (const [peer] of currentPeers) {
5633
+ if (addedPeers.has(peer)) {
5634
+ this.markRepairSweepOptimisticPeer(entryReplicated.gid, peer);
5635
+ }
5636
+ }
5637
+ const authoritativePeers = [...currentPeers.keys()].filter((peer) => !addedPeers.has(peer) &&
5638
+ !this.hasPendingRepairSweepOptimisticPeer(entryReplicated.gid, peer));
5639
+ this.addPeersToGidPeerHistory(entryReplicated.gid, authoritativePeers, true);
4767
5640
  if (!isLeader) {
4768
5641
  this.pruneDebouncedFnAddIfNotKeeping({
4769
5642
  key: entryReplicated.hash,
@@ -4780,9 +5653,17 @@ let SharedLog = (() => {
4780
5653
  }
4781
5654
  }
4782
5655
  }
5656
+ if (this._isAdaptiveReplicating && hasSelfRangeRemoval) {
5657
+ await this.pruneIndexedEntriesNoLongerLed();
5658
+ }
4783
5659
  if (forceFreshDelivery) {
4784
- // Removed/shrunk ranges still need the authoritative background pass.
4785
- this.scheduleRepairSweep({ forceFreshDelivery, addedPeers });
5660
+ // Pure leave/shrink churn can have zero `addedPeers`, but the peers that
5661
+ // received redistributed entries still need a follow-up repair pass if the
5662
+ // immediate maybe-sync misses one entry.
5663
+ this.scheduleRepairSweep({
5664
+ mode: "churn",
5665
+ peers: churnRepairPeers,
5666
+ });
4786
5667
  }
4787
5668
  else if (useJoinWarmupFastPath) {
4788
5669
  // Pure join warmup uses the cheap immediate maybe-missing dispatch above,
@@ -4795,19 +5676,22 @@ let SharedLog = (() => {
4795
5676
  return;
4796
5677
  }
4797
5678
  this.scheduleRepairSweep({
4798
- forceFreshDelivery: false,
4799
- addedPeers: peers,
5679
+ mode: "join-warmup",
5680
+ peers,
4800
5681
  });
4801
5682
  }, 250);
4802
5683
  timer.unref?.();
4803
5684
  this._repairRetryTimers.add(timer);
4804
5685
  }
4805
- else if (addedPeers.size > 0) {
5686
+ else if (authoritativeRepairPeers.size > 0) {
4806
5687
  this.scheduleRepairSweep({
4807
- forceFreshDelivery: false,
4808
- addedPeers,
5688
+ mode: "join-authoritative",
5689
+ peers: authoritativeRepairPeers,
4809
5690
  });
4810
5691
  }
5692
+ if (!forceFreshDelivery && authoritativeRepairPeers.size > 0) {
5693
+ this.scheduleJoinAuthoritativeRepair(authoritativeRepairPeers);
5694
+ }
4811
5695
  for (const target of [...uncheckedDeliver.keys()]) {
4812
5696
  flushUncheckedDeliverTarget(target);
4813
5697
  }
@@ -4879,6 +5763,10 @@ let SharedLog = (() => {
4879
5763
  if (!dynamicRange) {
4880
5764
  return; // not allowed to replicate
4881
5765
  }
5766
+ if (this.replicationController.maxMemoryLimit != null &&
5767
+ usedMemory > this.replicationController.maxMemoryLimit) {
5768
+ await this.pruneIndexedEntriesNoLongerLed();
5769
+ }
4882
5770
  const peersSize = (await peers.getSize()) || 1;
4883
5771
  const totalParticipation = await this.calculateTotalParticipation();
4884
5772
  const newFactor = this.replicationController.step({