@peerbit/shared-log 12.2.0-62829ef → 12.2.0-6aaa5dd

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/src/index.js CHANGED
@@ -169,10 +169,17 @@ const createIndexableDomainFromResolution = (resolution) => {
169
169
  export const DEFAULT_MIN_REPLICAS = 2;
170
170
  export const WAIT_FOR_REPLICATOR_TIMEOUT = 9000;
171
171
  export const WAIT_FOR_ROLE_MATURITY = 5000;
172
- export const WAIT_FOR_PRUNE_DELAY = 5000;
172
+ // TODO(prune): Investigate if/when a non-zero prune delay is required for correctness
173
+ // (e.g. responsibility/replication-info message reordering in multi-peer scenarios).
174
+ // Prefer making pruning robust without timing-based heuristics.
175
+ export const WAIT_FOR_PRUNE_DELAY = 0;
173
176
  const PRUNE_DEBOUNCE_INTERVAL = 500;
174
177
  // DONT SET THIS ANY LOWER, because it will make the pid controller unstable as the system responses are not fast enough to updates from the pid controller
175
178
  const RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL = 1000;
179
+ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE = 0.01;
180
+ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT = 0.005;
181
+ const RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT = 0.001;
182
+ const RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR = 1e-3;
176
183
  const DEFAULT_DISTRIBUTION_DEBOUNCE_TIME = 500;
177
184
  const getIdForDynamicRange = (publicKey) => {
178
185
  return sha256Sync(concat([publicKey.bytes, new TextEncoder().encode("dynamic")]));
@@ -217,6 +224,7 @@ let SharedLog = (() => {
217
224
  coordinateToHash;
218
225
  recentlyRebalanced;
219
226
  uniqueReplicators;
227
+ _replicatorsReconciled;
220
228
  /* private _totalParticipation!: number; */
221
229
  // gid -> coordinate -> publicKeyHash list (of owners)
222
230
  _gidPeersHistory;
@@ -296,14 +304,6 @@ let SharedLog = (() => {
296
304
  }
297
305
  setupRebalanceDebounceFunction(interval = RECALCULATE_PARTICIPATION_DEBOUNCE_INTERVAL) {
298
306
  this.rebalanceParticipationDebounced = undefined;
299
- // make the rebalancing to respect warmup time
300
- let intervalTime = interval * 2;
301
- let timeout = setTimeout(() => {
302
- intervalTime = interval;
303
- }, this.timeUntilRoleMaturity);
304
- this._closeController.signal.addEventListener("abort", () => {
305
- clearTimeout(timeout);
306
- });
307
307
  this.rebalanceParticipationDebounced = debounceFixedInterval(() => this.rebalanceParticipation(),
308
308
  /* Math.max(
309
309
  REBALANCE_DEBOUNCE_INTERVAL,
@@ -312,7 +312,7 @@ let SharedLog = (() => {
312
312
  REBALANCE_DEBOUNCE_INTERVAL
313
313
  )
314
314
  ) */
315
- () => intervalTime);
315
+ interval);
316
316
  }
317
317
  async _replicate(options, { reset, checkDuplicates, announce, mergeSegments, rebalance, } = {}) {
318
318
  let offsetWasProvided = false;
@@ -690,15 +690,29 @@ let SharedLog = (() => {
690
690
  })
691
691
  .all()).map((x) => x.value);
692
692
  let prevCount = deleted.length;
693
- await this.replicationIndex.del({ query: { hash: from.hashcode() } });
694
- diffs = [
695
- ...deleted.map((x) => {
696
- return { range: x, type: "removed", timestamp };
697
- }),
698
- ...ranges.map((x) => {
699
- return { range: x, type: "added", timestamp };
700
- }),
701
- ];
693
+ const existingById = new Map(deleted.map((x) => [x.idString, x]));
694
+ const hasSameRanges = deleted.length === ranges.length &&
695
+ ranges.every((range) => {
696
+ const existing = existingById.get(range.idString);
697
+ return existing != null && existing.equalRange(range);
698
+ });
699
+ // Avoid churn on repeated full-state announcements that don't change any
700
+ // replication ranges. This prevents unnecessary `replication:change`
701
+ // events and rebalancing cascades.
702
+ if (hasSameRanges) {
703
+ diffs = [];
704
+ }
705
+ else {
706
+ await this.replicationIndex.del({ query: { hash: from.hashcode() } });
707
+ diffs = [
708
+ ...deleted.map((x) => {
709
+ return { range: x, type: "removed", timestamp };
710
+ }),
711
+ ...ranges.map((x) => {
712
+ return { range: x, type: "added", timestamp };
713
+ }),
714
+ ];
715
+ }
702
716
  isNewReplicator = prevCount === 0 && ranges.length > 0;
703
717
  }
704
718
  else {
@@ -716,16 +730,18 @@ let SharedLog = (() => {
716
730
  existing.push(result.value);
717
731
  }
718
732
  }
733
+ let prevCountForOwner = undefined;
719
734
  if (existing.length === 0) {
720
- let prevCount = await this.replicationIndex.count({
735
+ prevCountForOwner = await this.replicationIndex.count({
721
736
  query: new StringMatch({ key: "hash", value: from.hashcode() }),
722
737
  });
723
- isNewReplicator = prevCount === 0;
738
+ isNewReplicator = prevCountForOwner === 0;
724
739
  }
725
740
  else {
726
741
  isNewReplicator = false;
727
742
  }
728
- if (checkDuplicates) {
743
+ if (checkDuplicates &&
744
+ (existing.length > 0 || (prevCountForOwner ?? 0) > 0)) {
729
745
  let deduplicated = [];
730
746
  // TODO also deduplicate/de-overlap among the ranges that ought to be inserted?
731
747
  for (const range of ranges) {
@@ -1252,6 +1268,7 @@ let SharedLog = (() => {
1252
1268
  this.coordinateToHash = new Cache({ max: 1e6, ttl: 1e4 });
1253
1269
  this.recentlyRebalanced = new Cache({ max: 1e4, ttl: 1e5 });
1254
1270
  this.uniqueReplicators = new Set();
1271
+ this._replicatorsReconciled = false;
1255
1272
  this.openTime = +new Date();
1256
1273
  this.oldestOpenTime = this.openTime;
1257
1274
  this.distributionDebounceTime =
@@ -1259,8 +1276,8 @@ let SharedLog = (() => {
1259
1276
  this.timeUntilRoleMaturity =
1260
1277
  options?.timeUntilRoleMaturity ?? WAIT_FOR_ROLE_MATURITY;
1261
1278
  this.waitForReplicatorTimeout =
1262
- options?.waitForReplicatorTimeout || WAIT_FOR_REPLICATOR_TIMEOUT;
1263
- this.waitForPruneDelay = options?.waitForPruneDelay || WAIT_FOR_PRUNE_DELAY;
1279
+ options?.waitForReplicatorTimeout ?? WAIT_FOR_REPLICATOR_TIMEOUT;
1280
+ this.waitForPruneDelay = options?.waitForPruneDelay ?? WAIT_FOR_PRUNE_DELAY;
1264
1281
  if (this.waitForReplicatorTimeout < this.timeUntilRoleMaturity) {
1265
1282
  this.waitForReplicatorTimeout = this.timeUntilRoleMaturity; // does not makes sense to expect a replicator to mature faster than it is reachable
1266
1283
  }
@@ -1473,7 +1490,16 @@ let SharedLog = (() => {
1473
1490
  async afterOpen() {
1474
1491
  await super.afterOpen();
1475
1492
  // We do this here, because these calls requires this.closed == false
1476
- this.pruneOfflineReplicators();
1493
+ void this.pruneOfflineReplicators()
1494
+ .then(() => {
1495
+ this._replicatorsReconciled = true;
1496
+ })
1497
+ .catch((error) => {
1498
+ if (isNotStartedError(error)) {
1499
+ return;
1500
+ }
1501
+ logger.error(error);
1502
+ });
1477
1503
  await this.rebalanceParticipation();
1478
1504
  // Take into account existing subscription
1479
1505
  (await this.node.services.pubsub.getSubscribers(this.topic))?.forEach((v, k) => {
@@ -1631,13 +1657,29 @@ let SharedLog = (() => {
1631
1657
  set.add(key);
1632
1658
  }
1633
1659
  if (options?.reachableOnly) {
1634
- let reachableSet = [];
1660
+ // Prefer the live pubsub subscriber set when filtering reachability.
1661
+ // `uniqueReplicators` is primarily driven by replication messages and can lag during
1662
+ // joins/restarts; using subscribers prevents excluding peers that are reachable but
1663
+ // whose replication ranges were loaded from disk or haven't been processed yet.
1664
+ const subscribers = (await this.node.services.pubsub.getSubscribers(this.topic)) ??
1665
+ undefined;
1666
+ const subscriberHashcodes = subscribers
1667
+ ? new Set(subscribers.map((key) => key.hashcode()))
1668
+ : undefined;
1669
+ const reachable = [];
1670
+ const selfHash = this.node.identity.publicKey.hashcode();
1635
1671
  for (const peer of set) {
1636
- if (this.uniqueReplicators.has(peer)) {
1637
- reachableSet.push(peer);
1672
+ if (peer === selfHash) {
1673
+ reachable.push(peer);
1674
+ continue;
1675
+ }
1676
+ if (subscriberHashcodes
1677
+ ? subscriberHashcodes.has(peer)
1678
+ : this.uniqueReplicators.has(peer)) {
1679
+ reachable.push(peer);
1638
1680
  }
1639
1681
  }
1640
- return reachableSet;
1682
+ return reachable;
1641
1683
  }
1642
1684
  return [...set];
1643
1685
  }
@@ -1977,15 +2019,15 @@ let SharedLog = (() => {
1977
2019
  await this.remoteBlocks.onMessage(msg.message, context.from.hashcode());
1978
2020
  }
1979
2021
  else if (msg instanceof RequestReplicationInfoMessage) {
1980
- // TODO this message type is never used, should we remove it?
1981
2022
  if (context.from.equals(this.node.identity.publicKey)) {
1982
2023
  return;
1983
2024
  }
1984
- await this.rpc.send(new AllReplicatingSegmentsMessage({
1985
- segments: (await this.getMyReplicationSegments()).map((x) => x.toReplicationRange()),
1986
- }), {
1987
- mode: new SilentDelivery({ to: [context.from], redundancy: 1 }),
1988
- });
2025
+ const segments = (await this.getMyReplicationSegments()).map((x) => x.toReplicationRange());
2026
+ this.rpc
2027
+ .send(new AllReplicatingSegmentsMessage({ segments }), {
2028
+ mode: new SeekDelivery({ to: [context.from], redundancy: 1 }),
2029
+ })
2030
+ .catch((e) => logger.error(e.toString()));
1989
2031
  // for backwards compatibility (v8) remove this when we are sure that all nodes are v9+
1990
2032
  if (this.v8Behaviour) {
1991
2033
  const role = this.getRole();
@@ -2009,38 +2051,32 @@ let SharedLog = (() => {
2009
2051
  if (context.from.equals(this.node.identity.publicKey)) {
2010
2052
  return;
2011
2053
  }
2012
- let replicationInfoMessage = msg;
2013
- // we have this statement because peers might have changed/announced their role,
2014
- // but we don't know them as "subscribers" yet. i.e. they are not online
2015
- this.waitFor(context.from, {
2016
- signal: this._closeController.signal,
2017
- timeout: this.waitForReplicatorTimeout,
2018
- })
2019
- .then(async () => {
2020
- // do use an operation log here, because we want to make sure that we don't miss any updates
2021
- // and do them in the right order
2022
- const prev = this.latestReplicationInfoMessage.get(context.from.hashcode());
2023
- if (prev && prev > context.message.header.timestamp) {
2054
+ const replicationInfoMessage = msg;
2055
+ // Process replication updates even if the sender isn't yet considered "ready" by
2056
+ // `Program.waitFor()`. Dropping these messages can lead to missing replicator info
2057
+ // (and downstream `waitForReplicator()` timeouts) under timing-sensitive joins.
2058
+ const from = context.from;
2059
+ const messageTimestamp = context.message.header.timestamp;
2060
+ (async () => {
2061
+ const prev = this.latestReplicationInfoMessage.get(from.hashcode());
2062
+ if (prev && prev > messageTimestamp) {
2024
2063
  return;
2025
2064
  }
2026
- this.latestReplicationInfoMessage.set(context.from.hashcode(), context.message.header.timestamp);
2027
- let reset = msg instanceof AllReplicatingSegmentsMessage;
2065
+ this.latestReplicationInfoMessage.set(from.hashcode(), messageTimestamp);
2028
2066
  if (this.closed) {
2029
2067
  return;
2030
2068
  }
2031
- await this.addReplicationRange(replicationInfoMessage.segments.map((x) => x.toReplicationRangeIndexable(context.from)), context.from, {
2069
+ const reset = msg instanceof AllReplicatingSegmentsMessage;
2070
+ await this.addReplicationRange(replicationInfoMessage.segments.map((x) => x.toReplicationRangeIndexable(from)), from, {
2032
2071
  reset,
2033
2072
  checkDuplicates: true,
2034
- timestamp: Number(context.message.header.timestamp),
2073
+ timestamp: Number(messageTimestamp),
2035
2074
  });
2036
- /* await this._modifyReplicators(msg.role, context.from!); */
2037
- })
2038
- .catch((e) => {
2075
+ })().catch((e) => {
2039
2076
  if (isNotStartedError(e)) {
2040
2077
  return;
2041
2078
  }
2042
- logger.error("Failed to find peer who updated replication settings: " +
2043
- e?.message);
2079
+ logger.error(`Failed to apply replication settings from '${from.hashcode()}': ${e?.message ?? e}`);
2044
2080
  });
2045
2081
  }
2046
2082
  else if (msg instanceof StoppedReplicating) {
@@ -2181,15 +2217,17 @@ let SharedLog = (() => {
2181
2217
  }
2182
2218
  async join(entries, options) {
2183
2219
  let entriesToReplicate = [];
2184
- if (options?.replicate) {
2220
+ if (options?.replicate && this.log.length > 0) {
2185
2221
  // TODO this block should perhaps be called from a callback on the this.log.join method on all the ignored element because already joined, like "onAlreadyJoined"
2186
2222
  // check which entrise we already have but not are replicating, and replicate them
2187
2223
  // we can not just do the 'join' call because it will ignore the already joined entries
2188
2224
  for (const element of entries) {
2189
2225
  if (typeof element === "string") {
2190
- const entry = await this.log.get(element);
2191
- if (entry) {
2192
- entriesToReplicate.push(entry);
2226
+ if (await this.log.has(element)) {
2227
+ const entry = await this.log.get(element);
2228
+ if (entry) {
2229
+ entriesToReplicate.push(entry);
2230
+ }
2193
2231
  }
2194
2232
  }
2195
2233
  else if (element instanceof Entry) {
@@ -2198,9 +2236,11 @@ let SharedLog = (() => {
2198
2236
  }
2199
2237
  }
2200
2238
  else {
2201
- const entry = await this.log.get(element.hash);
2202
- if (entry) {
2203
- entriesToReplicate.push(entry);
2239
+ if (await this.log.has(element.hash)) {
2240
+ const entry = await this.log.get(element.hash);
2241
+ if (entry) {
2242
+ entriesToReplicate.push(entry);
2243
+ }
2204
2244
  }
2205
2245
  }
2206
2246
  }
@@ -2293,6 +2333,7 @@ let SharedLog = (() => {
2293
2333
  : (options?.roleAge ?? (await this.getDefaultMinRoleAge()));
2294
2334
  let settled = false;
2295
2335
  let timer;
2336
+ let requestTimer;
2296
2337
  const clear = () => {
2297
2338
  this.events.removeEventListener("replicator:mature", check);
2298
2339
  this.events.removeEventListener("replication:change", check);
@@ -2301,6 +2342,10 @@ let SharedLog = (() => {
2301
2342
  clearTimeout(timer);
2302
2343
  timer = undefined;
2303
2344
  }
2345
+ if (requestTimer != null) {
2346
+ clearTimeout(requestTimer);
2347
+ requestTimer = undefined;
2348
+ }
2304
2349
  };
2305
2350
  const resolve = () => {
2306
2351
  if (settled) {
@@ -2325,6 +2370,31 @@ let SharedLog = (() => {
2325
2370
  timer = setTimeout(() => {
2326
2371
  reject(new TimeoutError(`Timeout waiting for replicator ${key.hashcode()}`));
2327
2372
  }, timeoutMs);
2373
+ let requestAttempts = 0;
2374
+ const requestIntervalMs = 1000;
2375
+ const maxRequestAttempts = Math.max(3, Math.ceil(timeoutMs / requestIntervalMs));
2376
+ const requestReplicationInfo = () => {
2377
+ if (settled || this.closed) {
2378
+ return;
2379
+ }
2380
+ if (requestAttempts >= maxRequestAttempts) {
2381
+ return;
2382
+ }
2383
+ requestAttempts++;
2384
+ this.rpc
2385
+ .send(new RequestReplicationInfoMessage(), {
2386
+ mode: new SeekDelivery({ redundancy: 1, to: [key] }),
2387
+ })
2388
+ .catch((e) => {
2389
+ // Best-effort: missing peers / unopened RPC should not fail the wait logic.
2390
+ if (isNotStartedError(e)) {
2391
+ return;
2392
+ }
2393
+ });
2394
+ if (requestAttempts < maxRequestAttempts) {
2395
+ requestTimer = setTimeout(requestReplicationInfo, requestIntervalMs);
2396
+ }
2397
+ };
2328
2398
  const check = async () => {
2329
2399
  const iterator = this.replicationIndex?.iterate({ query: new StringMatch({ key: "hash", value: key.hashcode() }) }, { reference: true });
2330
2400
  try {
@@ -2347,6 +2417,7 @@ let SharedLog = (() => {
2347
2417
  await iterator?.close();
2348
2418
  }
2349
2419
  };
2420
+ requestReplicationInfo();
2350
2421
  check();
2351
2422
  this.events.addEventListener("replicator:mature", check);
2352
2423
  this.events.addEventListener("replication:change", check);
@@ -2504,15 +2575,49 @@ let SharedLog = (() => {
2504
2575
  if (this._isReplicating === false) {
2505
2576
  return 0;
2506
2577
  }
2507
- const now = +new Date();
2508
- const subscribers = this.rpc.closed
2509
- ? 1
2510
- : ((await this.node.services.pubsub.getSubscribers(this.rpc.topic))
2511
- ?.length ?? 1);
2512
- const diffToOldest = subscribers > 1 ? now - this.oldestOpenTime - 1 : Number.MAX_SAFE_INTEGER;
2513
- const result = Math.min(this.timeUntilRoleMaturity, Math.max(diffToOldest, this.timeUntilRoleMaturity), Math.max(Math.round((this.timeUntilRoleMaturity * Math.log(subscribers + 1)) / 3), this.timeUntilRoleMaturity)); // / 3 so that if 2 replicators and timeUntilRoleMaturity = 1e4 the result will be 1
2514
- return result;
2515
- /* return Math.min(1e3, this.timeUntilRoleMaturity); */
2578
+ // Explicitly disable maturity gating (used by many tests).
2579
+ if (this.timeUntilRoleMaturity <= 0) {
2580
+ return 0;
2581
+ }
2582
+ // If we're alone (or pubsub isn't ready), a fixed maturity time is sufficient.
2583
+ // When there are multiple replicators we want a stable threshold that doesn't
2584
+ // depend on "now" (otherwise it can drift and turn into a flake).
2585
+ let subscribers = 1;
2586
+ if (!this.rpc.closed) {
2587
+ try {
2588
+ subscribers =
2589
+ (await this.node.services.pubsub.getSubscribers(this.rpc.topic))
2590
+ ?.length ?? 1;
2591
+ }
2592
+ catch {
2593
+ // Best-effort only; fall back to 1.
2594
+ }
2595
+ }
2596
+ if (subscribers <= 1) {
2597
+ return this.timeUntilRoleMaturity;
2598
+ }
2599
+ // Use replication range timestamps to compute a stable "age gap" between the
2600
+ // newest and oldest known roles. This keeps the oldest role mature while
2601
+ // preventing newer roles from being treated as mature purely because time
2602
+ // passes between test steps / network events.
2603
+ let newestOpenTime = this.openTime;
2604
+ try {
2605
+ const newestIterator = await this.replicationIndex.iterate({
2606
+ sort: [new Sort({ key: "timestamp", direction: "desc" })],
2607
+ }, { shape: { timestamp: true }, reference: true });
2608
+ const newestTimestampFromDB = (await newestIterator.next(1))[0]?.value
2609
+ .timestamp;
2610
+ await newestIterator.close();
2611
+ if (newestTimestampFromDB != null) {
2612
+ newestOpenTime = Number(newestTimestampFromDB);
2613
+ }
2614
+ }
2615
+ catch {
2616
+ // Best-effort only; fall back to local open time.
2617
+ }
2618
+ const ageGapToOldest = newestOpenTime - this.oldestOpenTime;
2619
+ const roleAge = Math.max(this.timeUntilRoleMaturity, ageGapToOldest);
2620
+ return roleAge < 0 ? 0 : roleAge;
2516
2621
  }
2517
2622
  async findLeaders(cursors, entry, options) {
2518
2623
  // we consume a list of coordinates in this method since if we are leader of one coordinate we want to persist all of them
@@ -2548,8 +2653,32 @@ let SharedLog = (() => {
2548
2653
  }
2549
2654
  async _findLeaders(cursors, options) {
2550
2655
  const roleAge = options?.roleAge ?? (await this.getDefaultMinRoleAge()); // TODO -500 as is added so that i f someone else is just as new as us, then we treat them as mature as us. without -500 we might be slower syncing if two nodes starts almost at the same time
2656
+ const selfHash = this.node.identity.publicKey.hashcode();
2657
+ // Use `uniqueReplicators` (replicator cache) once we've reconciled it against the
2658
+ // persisted replication index. Until then, fall back to live pubsub subscribers
2659
+ // and avoid relying on `uniqueReplicators` being complete.
2660
+ let peerFilter = undefined;
2661
+ if (this._replicatorsReconciled && this.uniqueReplicators.size > 0) {
2662
+ peerFilter = this.uniqueReplicators.has(selfHash)
2663
+ ? this.uniqueReplicators
2664
+ : new Set([...this.uniqueReplicators, selfHash]);
2665
+ }
2666
+ else {
2667
+ try {
2668
+ const subscribers = (await this.node.services.pubsub.getSubscribers(this.topic)) ??
2669
+ undefined;
2670
+ if (subscribers && subscribers.length > 0) {
2671
+ peerFilter = new Set(subscribers.map((key) => key.hashcode()));
2672
+ peerFilter.add(selfHash);
2673
+ }
2674
+ }
2675
+ catch {
2676
+ // Best-effort only; if pubsub isn't ready, do a full scan.
2677
+ }
2678
+ }
2551
2679
  return getSamples(cursors, this.replicationIndex, roleAge, this.indexableDomain.numbers, {
2552
- uniqueReplicators: this.uniqueReplicators,
2680
+ peerFilter,
2681
+ uniqueReplicators: peerFilter,
2553
2682
  });
2554
2683
  }
2555
2684
  async findLeadersFromEntry(entry, replicas, options) {
@@ -2578,7 +2707,7 @@ let SharedLog = (() => {
2578
2707
  for (const [k, v] of this._requestIPruneResponseReplicatorSet) {
2579
2708
  v.delete(publicKey.hashcode());
2580
2709
  if (v.size === 0) {
2581
- this._requestIPruneSent.delete(k);
2710
+ this._requestIPruneResponseReplicatorSet.delete(k);
2582
2711
  }
2583
2712
  }
2584
2713
  this.syncronizer.onPeerDisconnected(publicKey);
@@ -2608,6 +2737,14 @@ let SharedLog = (() => {
2608
2737
  .catch((e) => logger.error(e.toString()));
2609
2738
  }
2610
2739
  }
2740
+ // Request the remote peer's replication info. This makes joins resilient to
2741
+ // timing-sensitive delivery/order issues where we may miss their initial
2742
+ // replication announcement.
2743
+ this.rpc
2744
+ .send(new RequestReplicationInfoMessage(), {
2745
+ mode: new SeekDelivery({ redundancy: 1, to: [publicKey] }),
2746
+ })
2747
+ .catch((e) => logger.error(e.toString()));
2611
2748
  }
2612
2749
  else {
2613
2750
  await this.removeReplicator(publicKey);
@@ -2728,9 +2865,19 @@ let SharedLog = (() => {
2728
2865
  deferredPromise.reject(e);
2729
2866
  };
2730
2867
  let cursor = undefined;
2731
- const timeout = setTimeout(async () => {
2732
- reject(new Error("Timeout for checked pruning: Closed: " + this.closed));
2733
- }, options?.timeout ?? 1e4);
2868
+ // Checked prune requests can legitimately take longer than a fixed 10s:
2869
+ // - The remote may not have the entry yet and will wait up to `_respondToIHaveTimeout`
2870
+ // - Leadership/replicator information may take up to `waitForReplicatorTimeout` to settle
2871
+ // If we time out too early we can end up with permanently prunable heads that never
2872
+ // get retried (a common CI flake in "prune before join" tests).
2873
+ const checkedPruneTimeoutMs = options?.timeout ??
2874
+ Math.max(10_000, Number(this._respondToIHaveTimeout ?? 0) +
2875
+ this.waitForReplicatorTimeout +
2876
+ PRUNE_DEBOUNCE_INTERVAL * 2);
2877
+ const timeout = setTimeout(() => {
2878
+ reject(new Error(`Timeout for checked pruning after ${checkedPruneTimeoutMs}ms (closed=${this.closed})`));
2879
+ }, checkedPruneTimeoutMs);
2880
+ timeout.unref?.();
2734
2881
  this._pendingDeletes.set(entry.hash, {
2735
2882
  promise: deferredPromise,
2736
2883
  clear,
@@ -2838,8 +2985,8 @@ let SharedLog = (() => {
2838
2985
  return { range: x, type: "added", timestamp };
2839
2986
  }));
2840
2987
  }
2841
- async waitForPruned() {
2842
- await waitFor(() => this._pendingDeletes.size === 0);
2988
+ async waitForPruned(options) {
2989
+ await waitFor(() => this._pendingDeletes.size === 0, options);
2843
2990
  }
2844
2991
  async onReplicationChange(changeOrChanges) {
2845
2992
  /**
@@ -2956,9 +3103,19 @@ let SharedLog = (() => {
2956
3103
  peerCount: peersSize,
2957
3104
  cpuUsage: this.cpuUsage?.value(),
2958
3105
  });
2959
- const relativeDifference = Math.abs(dynamicRange.widthNormalized - newFactor) /
2960
- dynamicRange.widthNormalized;
2961
- if (relativeDifference > 0.0001) {
3106
+ const absoluteDifference = Math.abs(dynamicRange.widthNormalized - newFactor);
3107
+ const relativeDifference = absoluteDifference /
3108
+ Math.max(dynamicRange.widthNormalized, RECALCULATE_PARTICIPATION_RELATIVE_DENOMINATOR_FLOOR);
3109
+ let minRelativeChange = RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE;
3110
+ if (this.replicationController.maxMemoryLimit != null) {
3111
+ minRelativeChange =
3112
+ RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_MEMORY_LIMIT;
3113
+ }
3114
+ else if (this.replicationController.maxCPUUsage != null) {
3115
+ minRelativeChange =
3116
+ RECALCULATE_PARTICIPATION_MIN_RELATIVE_CHANGE_WITH_CPU_LIMIT;
3117
+ }
3118
+ if (relativeDifference > minRelativeChange) {
2962
3119
  // TODO can not reuse old range, since it will (potentially) affect the index because of sideeffects
2963
3120
  dynamicRange = new this.indexableDomain.constructorRange({
2964
3121
  offset: dynamicRange.start1,